diff options
Diffstat (limited to 'fs')
355 files changed, 9370 insertions, 5370 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index bb7991c7e5c7..53161ec058a7 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
| @@ -451,7 +451,7 @@ void v9fs_evict_inode(struct inode *inode) | |||
| 451 | { | 451 | { |
| 452 | struct v9fs_inode *v9inode = V9FS_I(inode); | 452 | struct v9fs_inode *v9inode = V9FS_I(inode); |
| 453 | 453 | ||
| 454 | truncate_inode_pages(inode->i_mapping, 0); | 454 | truncate_inode_pages_final(inode->i_mapping); |
| 455 | clear_inode(inode); | 455 | clear_inode(inode); |
| 456 | filemap_fdatawrite(inode->i_mapping); | 456 | filemap_fdatawrite(inode->i_mapping); |
| 457 | 457 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index 7385e54be4b9..312393f32948 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -96,6 +96,7 @@ endif # BLOCK | |||
| 96 | menu "Pseudo filesystems" | 96 | menu "Pseudo filesystems" |
| 97 | 97 | ||
| 98 | source "fs/proc/Kconfig" | 98 | source "fs/proc/Kconfig" |
| 99 | source "fs/kernfs/Kconfig" | ||
| 99 | source "fs/sysfs/Kconfig" | 100 | source "fs/sysfs/Kconfig" |
| 100 | 101 | ||
| 101 | config TMPFS | 102 | config TMPFS |
diff --git a/fs/Makefile b/fs/Makefile index 47ac07bb4acc..f9cb9876e466 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -52,7 +52,8 @@ obj-$(CONFIG_FHANDLE) += fhandle.o | |||
| 52 | obj-y += quota/ | 52 | obj-y += quota/ |
| 53 | 53 | ||
| 54 | obj-$(CONFIG_PROC_FS) += proc/ | 54 | obj-$(CONFIG_PROC_FS) += proc/ |
| 55 | obj-$(CONFIG_SYSFS) += sysfs/ kernfs/ | 55 | obj-$(CONFIG_KERNFS) += kernfs/ |
| 56 | obj-$(CONFIG_SYSFS) += sysfs/ | ||
| 56 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ | 57 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ |
| 57 | obj-y += devpts/ | 58 | obj-y += devpts/ |
| 58 | 59 | ||
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 7b3003cb6f1b..952aeb048349 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
| @@ -212,6 +212,7 @@ static int parse_options(struct super_block *sb, char *options) | |||
| 212 | 212 | ||
| 213 | static int adfs_remount(struct super_block *sb, int *flags, char *data) | 213 | static int adfs_remount(struct super_block *sb, int *flags, char *data) |
| 214 | { | 214 | { |
| 215 | sync_filesystem(sb); | ||
| 215 | *flags |= MS_NODIRATIME; | 216 | *flags |= MS_NODIRATIME; |
| 216 | return parse_options(sb, data); | 217 | return parse_options(sb, data); |
| 217 | } | 218 | } |
diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 0e092d08680e..96df91e8c334 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c | |||
| @@ -259,7 +259,7 @@ affs_evict_inode(struct inode *inode) | |||
| 259 | { | 259 | { |
| 260 | unsigned long cache_page; | 260 | unsigned long cache_page; |
| 261 | pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); | 261 | pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); |
| 262 | truncate_inode_pages(&inode->i_data, 0); | 262 | truncate_inode_pages_final(&inode->i_data); |
| 263 | 263 | ||
| 264 | if (!inode->i_nlink) { | 264 | if (!inode->i_nlink) { |
| 265 | inode->i_size = 0; | 265 | inode->i_size = 0; |
diff --git a/fs/affs/super.c b/fs/affs/super.c index d098731b82ff..307453086c3f 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
| @@ -530,6 +530,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
| 530 | 530 | ||
| 531 | pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); | 531 | pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); |
| 532 | 532 | ||
| 533 | sync_filesystem(sb); | ||
| 533 | *flags |= MS_NODIRATIME; | 534 | *flags |= MS_NODIRATIME; |
| 534 | 535 | ||
| 535 | memcpy(volume, sbi->s_volume, 32); | 536 | memcpy(volume, sbi->s_volume, 32); |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ce25d755b7aa..294671288449 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
| @@ -422,7 +422,7 @@ void afs_evict_inode(struct inode *inode) | |||
| 422 | 422 | ||
| 423 | ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); | 423 | ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); |
| 424 | 424 | ||
| 425 | truncate_inode_pages(&inode->i_data, 0); | 425 | truncate_inode_pages_final(&inode->i_data); |
| 426 | clear_inode(inode); | 426 | clear_inode(inode); |
| 427 | 427 | ||
| 428 | afs_give_up_callback(vnode); | 428 | afs_give_up_callback(vnode); |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6621f8008122..be75b500005d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
| @@ -75,6 +75,7 @@ struct afs_call { | |||
| 75 | const struct afs_call_type *type; /* type of call */ | 75 | const struct afs_call_type *type; /* type of call */ |
| 76 | const struct afs_wait_mode *wait_mode; /* completion wait mode */ | 76 | const struct afs_wait_mode *wait_mode; /* completion wait mode */ |
| 77 | wait_queue_head_t waitq; /* processes awaiting completion */ | 77 | wait_queue_head_t waitq; /* processes awaiting completion */ |
| 78 | work_func_t async_workfn; | ||
| 78 | struct work_struct async_work; /* asynchronous work processor */ | 79 | struct work_struct async_work; /* asynchronous work processor */ |
| 79 | struct work_struct work; /* actual work processor */ | 80 | struct work_struct work; /* actual work processor */ |
| 80 | struct sk_buff_head rx_queue; /* received packets */ | 81 | struct sk_buff_head rx_queue; /* received packets */ |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 8ad8c2a0703a..ef943df73b8c 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
| @@ -644,7 +644,7 @@ static void afs_process_async_call(struct work_struct *work) | |||
| 644 | 644 | ||
| 645 | /* we can't just delete the call because the work item may be | 645 | /* we can't just delete the call because the work item may be |
| 646 | * queued */ | 646 | * queued */ |
| 647 | PREPARE_WORK(&call->async_work, afs_delete_async_call); | 647 | call->async_workfn = afs_delete_async_call; |
| 648 | queue_work(afs_async_calls, &call->async_work); | 648 | queue_work(afs_async_calls, &call->async_work); |
| 649 | } | 649 | } |
| 650 | 650 | ||
| @@ -663,6 +663,13 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) | |||
| 663 | call->reply_size += len; | 663 | call->reply_size += len; |
| 664 | } | 664 | } |
| 665 | 665 | ||
| 666 | static void afs_async_workfn(struct work_struct *work) | ||
| 667 | { | ||
| 668 | struct afs_call *call = container_of(work, struct afs_call, async_work); | ||
| 669 | |||
| 670 | call->async_workfn(work); | ||
| 671 | } | ||
| 672 | |||
| 666 | /* | 673 | /* |
| 667 | * accept the backlog of incoming calls | 674 | * accept the backlog of incoming calls |
| 668 | */ | 675 | */ |
| @@ -685,7 +692,8 @@ static void afs_collect_incoming_call(struct work_struct *work) | |||
| 685 | return; | 692 | return; |
| 686 | } | 693 | } |
| 687 | 694 | ||
| 688 | INIT_WORK(&call->async_work, afs_process_async_call); | 695 | call->async_workfn = afs_process_async_call; |
| 696 | INIT_WORK(&call->async_work, afs_async_workfn); | ||
| 689 | call->wait_mode = &afs_async_incoming_call; | 697 | call->wait_mode = &afs_async_incoming_call; |
| 690 | call->type = &afs_RXCMxxxx; | 698 | call->type = &afs_RXCMxxxx; |
| 691 | init_waitqueue_head(&call->waitq); | 699 | init_waitqueue_head(&call->waitq); |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 24084732b1d0..80ef38c73e5a 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
| @@ -41,19 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { | |||
| 41 | static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, | 41 | static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, |
| 42 | int flags, const char *dev_name, void *data) | 42 | int flags, const char *dev_name, void *data) |
| 43 | { | 43 | { |
| 44 | struct dentry *root; | 44 | return mount_pseudo(fs_type, "anon_inode:", NULL, |
| 45 | root = mount_pseudo(fs_type, "anon_inode:", NULL, | ||
| 46 | &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); | 45 | &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); |
| 47 | if (!IS_ERR(root)) { | ||
| 48 | struct super_block *s = root->d_sb; | ||
| 49 | anon_inode_inode = alloc_anon_inode(s); | ||
| 50 | if (IS_ERR(anon_inode_inode)) { | ||
| 51 | dput(root); | ||
| 52 | deactivate_locked_super(s); | ||
| 53 | root = ERR_CAST(anon_inode_inode); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | return root; | ||
| 57 | } | 46 | } |
| 58 | 47 | ||
| 59 | static struct file_system_type anon_inode_fs_type = { | 48 | static struct file_system_type anon_inode_fs_type = { |
| @@ -175,22 +164,15 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd); | |||
| 175 | 164 | ||
| 176 | static int __init anon_inode_init(void) | 165 | static int __init anon_inode_init(void) |
| 177 | { | 166 | { |
| 178 | int error; | ||
| 179 | |||
| 180 | error = register_filesystem(&anon_inode_fs_type); | ||
| 181 | if (error) | ||
| 182 | goto err_exit; | ||
| 183 | anon_inode_mnt = kern_mount(&anon_inode_fs_type); | 167 | anon_inode_mnt = kern_mount(&anon_inode_fs_type); |
| 184 | if (IS_ERR(anon_inode_mnt)) { | 168 | if (IS_ERR(anon_inode_mnt)) |
| 185 | error = PTR_ERR(anon_inode_mnt); | 169 | panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); |
| 186 | goto err_unregister_filesystem; | ||
| 187 | } | ||
| 188 | return 0; | ||
| 189 | 170 | ||
| 190 | err_unregister_filesystem: | 171 | anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); |
| 191 | unregister_filesystem(&anon_inode_fs_type); | 172 | if (IS_ERR(anon_inode_inode)) |
| 192 | err_exit: | 173 | panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); |
| 193 | panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); | 174 | |
| 175 | return 0; | ||
| 194 | } | 176 | } |
| 195 | 177 | ||
| 196 | fs_initcall(anon_inode_init); | 178 | fs_initcall(anon_inode_init); |
diff --git a/fs/befs/Makefile b/fs/befs/Makefile index 2f370bd7a50d..8b9f66642a83 100644 --- a/fs/befs/Makefile +++ b/fs/befs/Makefile | |||
| @@ -3,5 +3,5 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_BEFS_FS) += befs.o | 5 | obj-$(CONFIG_BEFS_FS) += befs.o |
| 6 | 6 | ccflags-$(CONFIG_BEFS_DEBUG) += -DDEBUG | |
| 7 | befs-objs := datastream.o btree.o super.o inode.o debug.o io.o linuxvfs.o | 7 | befs-objs := datastream.o btree.o super.o inode.o debug.o io.o linuxvfs.o |
diff --git a/fs/befs/befs.h b/fs/befs/befs.h index b26642839156..3a7813ab8c95 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h | |||
| @@ -88,8 +88,11 @@ enum befs_err { | |||
| 88 | 88 | ||
| 89 | /****************************/ | 89 | /****************************/ |
| 90 | /* debug.c */ | 90 | /* debug.c */ |
| 91 | __printf(2, 3) | ||
| 91 | void befs_error(const struct super_block *sb, const char *fmt, ...); | 92 | void befs_error(const struct super_block *sb, const char *fmt, ...); |
| 93 | __printf(2, 3) | ||
| 92 | void befs_warning(const struct super_block *sb, const char *fmt, ...); | 94 | void befs_warning(const struct super_block *sb, const char *fmt, ...); |
| 95 | __printf(2, 3) | ||
| 93 | void befs_debug(const struct super_block *sb, const char *fmt, ...); | 96 | void befs_debug(const struct super_block *sb, const char *fmt, ...); |
| 94 | 97 | ||
| 95 | void befs_dump_super_block(const struct super_block *sb, befs_super_block *); | 98 | void befs_dump_super_block(const struct super_block *sb, befs_super_block *); |
diff --git a/fs/befs/btree.c b/fs/befs/btree.c index 74e397db0b8b..a2cd305a993a 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c | |||
| @@ -137,7 +137,7 @@ befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, | |||
| 137 | struct buffer_head *bh = NULL; | 137 | struct buffer_head *bh = NULL; |
| 138 | befs_disk_btree_super *od_sup = NULL; | 138 | befs_disk_btree_super *od_sup = NULL; |
| 139 | 139 | ||
| 140 | befs_debug(sb, "---> befs_btree_read_super()"); | 140 | befs_debug(sb, "---> %s", __func__); |
| 141 | 141 | ||
| 142 | bh = befs_read_datastream(sb, ds, 0, NULL); | 142 | bh = befs_read_datastream(sb, ds, 0, NULL); |
| 143 | 143 | ||
| @@ -162,11 +162,11 @@ befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, | |||
| 162 | goto error; | 162 | goto error; |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | befs_debug(sb, "<--- befs_btree_read_super()"); | 165 | befs_debug(sb, "<--- %s", __func__); |
| 166 | return BEFS_OK; | 166 | return BEFS_OK; |
| 167 | 167 | ||
| 168 | error: | 168 | error: |
| 169 | befs_debug(sb, "<--- befs_btree_read_super() ERROR"); | 169 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 170 | return BEFS_ERR; | 170 | return BEFS_ERR; |
| 171 | } | 171 | } |
| 172 | 172 | ||
| @@ -195,16 +195,16 @@ befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, | |||
| 195 | { | 195 | { |
| 196 | uint off = 0; | 196 | uint off = 0; |
| 197 | 197 | ||
| 198 | befs_debug(sb, "---> befs_bt_read_node()"); | 198 | befs_debug(sb, "---> %s", __func__); |
| 199 | 199 | ||
| 200 | if (node->bh) | 200 | if (node->bh) |
| 201 | brelse(node->bh); | 201 | brelse(node->bh); |
| 202 | 202 | ||
| 203 | node->bh = befs_read_datastream(sb, ds, node_off, &off); | 203 | node->bh = befs_read_datastream(sb, ds, node_off, &off); |
| 204 | if (!node->bh) { | 204 | if (!node->bh) { |
| 205 | befs_error(sb, "befs_bt_read_node() failed to read " | 205 | befs_error(sb, "%s failed to read " |
| 206 | "node at %Lu", node_off); | 206 | "node at %llu", __func__, node_off); |
| 207 | befs_debug(sb, "<--- befs_bt_read_node() ERROR"); | 207 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 208 | 208 | ||
| 209 | return BEFS_ERR; | 209 | return BEFS_ERR; |
| 210 | } | 210 | } |
| @@ -221,7 +221,7 @@ befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, | |||
| 221 | node->head.all_key_length = | 221 | node->head.all_key_length = |
| 222 | fs16_to_cpu(sb, node->od_node->all_key_length); | 222 | fs16_to_cpu(sb, node->od_node->all_key_length); |
| 223 | 223 | ||
| 224 | befs_debug(sb, "<--- befs_btree_read_node()"); | 224 | befs_debug(sb, "<--- %s", __func__); |
| 225 | return BEFS_OK; | 225 | return BEFS_OK; |
| 226 | } | 226 | } |
| 227 | 227 | ||
| @@ -252,7 +252,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
| 252 | befs_off_t node_off; | 252 | befs_off_t node_off; |
| 253 | int res; | 253 | int res; |
| 254 | 254 | ||
| 255 | befs_debug(sb, "---> befs_btree_find() Key: %s", key); | 255 | befs_debug(sb, "---> %s Key: %s", __func__, key); |
| 256 | 256 | ||
| 257 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { | 257 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { |
| 258 | befs_error(sb, | 258 | befs_error(sb, |
| @@ -263,7 +263,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
| 263 | this_node = kmalloc(sizeof (befs_btree_node), | 263 | this_node = kmalloc(sizeof (befs_btree_node), |
| 264 | GFP_NOFS); | 264 | GFP_NOFS); |
| 265 | if (!this_node) { | 265 | if (!this_node) { |
| 266 | befs_error(sb, "befs_btree_find() failed to allocate %u " | 266 | befs_error(sb, "befs_btree_find() failed to allocate %zu " |
| 267 | "bytes of memory", sizeof (befs_btree_node)); | 267 | "bytes of memory", sizeof (befs_btree_node)); |
| 268 | goto error; | 268 | goto error; |
| 269 | } | 269 | } |
| @@ -274,7 +274,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
| 274 | node_off = bt_super.root_node_ptr; | 274 | node_off = bt_super.root_node_ptr; |
| 275 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { | 275 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { |
| 276 | befs_error(sb, "befs_btree_find() failed to read " | 276 | befs_error(sb, "befs_btree_find() failed to read " |
| 277 | "node at %Lu", node_off); | 277 | "node at %llu", node_off); |
| 278 | goto error_alloc; | 278 | goto error_alloc; |
| 279 | } | 279 | } |
| 280 | 280 | ||
| @@ -285,7 +285,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
| 285 | /* if no match, go to overflow node */ | 285 | /* if no match, go to overflow node */ |
| 286 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { | 286 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { |
| 287 | befs_error(sb, "befs_btree_find() failed to read " | 287 | befs_error(sb, "befs_btree_find() failed to read " |
| 288 | "node at %Lu", node_off); | 288 | "node at %llu", node_off); |
| 289 | goto error_alloc; | 289 | goto error_alloc; |
| 290 | } | 290 | } |
| 291 | } | 291 | } |
| @@ -298,11 +298,11 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
| 298 | kfree(this_node); | 298 | kfree(this_node); |
| 299 | 299 | ||
| 300 | if (res != BEFS_BT_MATCH) { | 300 | if (res != BEFS_BT_MATCH) { |
| 301 | befs_debug(sb, "<--- befs_btree_find() Key %s not found", key); | 301 | befs_debug(sb, "<--- %s Key %s not found", __func__, key); |
| 302 | *value = 0; | 302 | *value = 0; |
| 303 | return BEFS_BT_NOT_FOUND; | 303 | return BEFS_BT_NOT_FOUND; |
| 304 | } | 304 | } |
| 305 | befs_debug(sb, "<--- befs_btree_find() Found key %s, value %Lu", | 305 | befs_debug(sb, "<--- %s Found key %s, value %llu", __func__, |
| 306 | key, *value); | 306 | key, *value); |
| 307 | return BEFS_OK; | 307 | return BEFS_OK; |
| 308 | 308 | ||
| @@ -310,7 +310,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
| 310 | kfree(this_node); | 310 | kfree(this_node); |
| 311 | error: | 311 | error: |
| 312 | *value = 0; | 312 | *value = 0; |
| 313 | befs_debug(sb, "<--- befs_btree_find() ERROR"); | 313 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 314 | return BEFS_ERR; | 314 | return BEFS_ERR; |
| 315 | } | 315 | } |
| 316 | 316 | ||
| @@ -343,7 +343,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
| 343 | char *thiskey; | 343 | char *thiskey; |
| 344 | fs64 *valarray; | 344 | fs64 *valarray; |
| 345 | 345 | ||
| 346 | befs_debug(sb, "---> befs_find_key() %s", findkey); | 346 | befs_debug(sb, "---> %s %s", __func__, findkey); |
| 347 | 347 | ||
| 348 | *value = 0; | 348 | *value = 0; |
| 349 | 349 | ||
| @@ -355,7 +355,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
| 355 | 355 | ||
| 356 | eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len); | 356 | eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len); |
| 357 | if (eq < 0) { | 357 | if (eq < 0) { |
| 358 | befs_debug(sb, "<--- befs_find_key() %s not found", findkey); | 358 | befs_debug(sb, "<--- %s %s not found", __func__, findkey); |
| 359 | return BEFS_BT_NOT_FOUND; | 359 | return BEFS_BT_NOT_FOUND; |
| 360 | } | 360 | } |
| 361 | 361 | ||
| @@ -373,8 +373,8 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
| 373 | findkey_len); | 373 | findkey_len); |
| 374 | 374 | ||
| 375 | if (eq == 0) { | 375 | if (eq == 0) { |
| 376 | befs_debug(sb, "<--- befs_find_key() found %s at %d", | 376 | befs_debug(sb, "<--- %s found %s at %d", |
| 377 | thiskey, mid); | 377 | __func__, thiskey, mid); |
| 378 | 378 | ||
| 379 | *value = fs64_to_cpu(sb, valarray[mid]); | 379 | *value = fs64_to_cpu(sb, valarray[mid]); |
| 380 | return BEFS_BT_MATCH; | 380 | return BEFS_BT_MATCH; |
| @@ -388,7 +388,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
| 388 | *value = fs64_to_cpu(sb, valarray[mid + 1]); | 388 | *value = fs64_to_cpu(sb, valarray[mid + 1]); |
| 389 | else | 389 | else |
| 390 | *value = fs64_to_cpu(sb, valarray[mid]); | 390 | *value = fs64_to_cpu(sb, valarray[mid]); |
| 391 | befs_debug(sb, "<--- befs_find_key() found %s at %d", thiskey, mid); | 391 | befs_debug(sb, "<--- %s found %s at %d", __func__, thiskey, mid); |
| 392 | return BEFS_BT_PARMATCH; | 392 | return BEFS_BT_PARMATCH; |
| 393 | } | 393 | } |
| 394 | 394 | ||
| @@ -428,7 +428,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 428 | 428 | ||
| 429 | uint key_sum = 0; | 429 | uint key_sum = 0; |
| 430 | 430 | ||
| 431 | befs_debug(sb, "---> befs_btree_read()"); | 431 | befs_debug(sb, "---> %s", __func__); |
| 432 | 432 | ||
| 433 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { | 433 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { |
| 434 | befs_error(sb, | 434 | befs_error(sb, |
| @@ -437,7 +437,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 437 | } | 437 | } |
| 438 | 438 | ||
| 439 | if ((this_node = kmalloc(sizeof (befs_btree_node), GFP_NOFS)) == NULL) { | 439 | if ((this_node = kmalloc(sizeof (befs_btree_node), GFP_NOFS)) == NULL) { |
| 440 | befs_error(sb, "befs_btree_read() failed to allocate %u " | 440 | befs_error(sb, "befs_btree_read() failed to allocate %zu " |
| 441 | "bytes of memory", sizeof (befs_btree_node)); | 441 | "bytes of memory", sizeof (befs_btree_node)); |
| 442 | goto error; | 442 | goto error; |
| 443 | } | 443 | } |
| @@ -452,7 +452,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 452 | kfree(this_node); | 452 | kfree(this_node); |
| 453 | *value = 0; | 453 | *value = 0; |
| 454 | *keysize = 0; | 454 | *keysize = 0; |
| 455 | befs_debug(sb, "<--- befs_btree_read() Tree is EMPTY"); | 455 | befs_debug(sb, "<--- %s Tree is EMPTY", __func__); |
| 456 | return BEFS_BT_EMPTY; | 456 | return BEFS_BT_EMPTY; |
| 457 | } else if (res == BEFS_ERR) { | 457 | } else if (res == BEFS_ERR) { |
| 458 | goto error_alloc; | 458 | goto error_alloc; |
| @@ -467,7 +467,8 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 467 | *keysize = 0; | 467 | *keysize = 0; |
| 468 | *value = 0; | 468 | *value = 0; |
| 469 | befs_debug(sb, | 469 | befs_debug(sb, |
| 470 | "<--- befs_btree_read() END of keys at %Lu", | 470 | "<--- %s END of keys at %llu", __func__, |
| 471 | (unsigned long long) | ||
| 471 | key_sum + this_node->head.all_key_count); | 472 | key_sum + this_node->head.all_key_count); |
| 472 | brelse(this_node->bh); | 473 | brelse(this_node->bh); |
| 473 | kfree(this_node); | 474 | kfree(this_node); |
| @@ -478,8 +479,8 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 478 | node_off = this_node->head.right; | 479 | node_off = this_node->head.right; |
| 479 | 480 | ||
| 480 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { | 481 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { |
| 481 | befs_error(sb, "befs_btree_read() failed to read " | 482 | befs_error(sb, "%s failed to read node at %llu", |
| 482 | "node at %Lu", node_off); | 483 | __func__, (unsigned long long)node_off); |
| 483 | goto error_alloc; | 484 | goto error_alloc; |
| 484 | } | 485 | } |
| 485 | } | 486 | } |
| @@ -492,11 +493,13 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 492 | 493 | ||
| 493 | keystart = befs_bt_get_key(sb, this_node, cur_key, &keylen); | 494 | keystart = befs_bt_get_key(sb, this_node, cur_key, &keylen); |
| 494 | 495 | ||
| 495 | befs_debug(sb, "Read [%Lu,%d]: keysize %d", node_off, cur_key, keylen); | 496 | befs_debug(sb, "Read [%llu,%d]: keysize %d", |
| 497 | (long long unsigned int)node_off, (int)cur_key, | ||
| 498 | (int)keylen); | ||
| 496 | 499 | ||
| 497 | if (bufsize < keylen + 1) { | 500 | if (bufsize < keylen + 1) { |
| 498 | befs_error(sb, "befs_btree_read() keybuf too small (%u) " | 501 | befs_error(sb, "%s keybuf too small (%zu) " |
| 499 | "for key of size %d", bufsize, keylen); | 502 | "for key of size %d", __func__, bufsize, keylen); |
| 500 | brelse(this_node->bh); | 503 | brelse(this_node->bh); |
| 501 | goto error_alloc; | 504 | goto error_alloc; |
| 502 | }; | 505 | }; |
| @@ -506,13 +509,13 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 506 | *keysize = keylen; | 509 | *keysize = keylen; |
| 507 | keybuf[keylen] = '\0'; | 510 | keybuf[keylen] = '\0'; |
| 508 | 511 | ||
| 509 | befs_debug(sb, "Read [%Lu,%d]: Key \"%.*s\", Value %Lu", node_off, | 512 | befs_debug(sb, "Read [%llu,%d]: Key \"%.*s\", Value %llu", node_off, |
| 510 | cur_key, keylen, keybuf, *value); | 513 | cur_key, keylen, keybuf, *value); |
| 511 | 514 | ||
| 512 | brelse(this_node->bh); | 515 | brelse(this_node->bh); |
| 513 | kfree(this_node); | 516 | kfree(this_node); |
| 514 | 517 | ||
| 515 | befs_debug(sb, "<--- befs_btree_read()"); | 518 | befs_debug(sb, "<--- %s", __func__); |
| 516 | 519 | ||
| 517 | return BEFS_OK; | 520 | return BEFS_OK; |
| 518 | 521 | ||
| @@ -522,7 +525,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
| 522 | error: | 525 | error: |
| 523 | *keysize = 0; | 526 | *keysize = 0; |
| 524 | *value = 0; | 527 | *value = 0; |
| 525 | befs_debug(sb, "<--- befs_btree_read() ERROR"); | 528 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 526 | return BEFS_ERR; | 529 | return BEFS_ERR; |
| 527 | } | 530 | } |
| 528 | 531 | ||
| @@ -547,26 +550,26 @@ befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds, | |||
| 547 | befs_off_t * node_off) | 550 | befs_off_t * node_off) |
| 548 | { | 551 | { |
| 549 | 552 | ||
| 550 | befs_debug(sb, "---> befs_btree_seekleaf()"); | 553 | befs_debug(sb, "---> %s", __func__); |
| 551 | 554 | ||
| 552 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { | 555 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { |
| 553 | befs_error(sb, "befs_btree_seekleaf() failed to read " | 556 | befs_error(sb, "%s failed to read " |
| 554 | "node at %Lu", *node_off); | 557 | "node at %llu", __func__, *node_off); |
| 555 | goto error; | 558 | goto error; |
| 556 | } | 559 | } |
| 557 | befs_debug(sb, "Seekleaf to root node %Lu", *node_off); | 560 | befs_debug(sb, "Seekleaf to root node %llu", *node_off); |
| 558 | 561 | ||
| 559 | if (this_node->head.all_key_count == 0 && befs_leafnode(this_node)) { | 562 | if (this_node->head.all_key_count == 0 && befs_leafnode(this_node)) { |
| 560 | befs_debug(sb, "<--- befs_btree_seekleaf() Tree is EMPTY"); | 563 | befs_debug(sb, "<--- %s Tree is EMPTY", __func__); |
| 561 | return BEFS_BT_EMPTY; | 564 | return BEFS_BT_EMPTY; |
| 562 | } | 565 | } |
| 563 | 566 | ||
| 564 | while (!befs_leafnode(this_node)) { | 567 | while (!befs_leafnode(this_node)) { |
| 565 | 568 | ||
| 566 | if (this_node->head.all_key_count == 0) { | 569 | if (this_node->head.all_key_count == 0) { |
| 567 | befs_debug(sb, "befs_btree_seekleaf() encountered " | 570 | befs_debug(sb, "%s encountered " |
| 568 | "an empty interior node: %Lu. Using Overflow " | 571 | "an empty interior node: %llu. Using Overflow " |
| 569 | "node: %Lu", *node_off, | 572 | "node: %llu", __func__, *node_off, |
| 570 | this_node->head.overflow); | 573 | this_node->head.overflow); |
| 571 | *node_off = this_node->head.overflow; | 574 | *node_off = this_node->head.overflow; |
| 572 | } else { | 575 | } else { |
| @@ -574,19 +577,19 @@ befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds, | |||
| 574 | *node_off = fs64_to_cpu(sb, valarray[0]); | 577 | *node_off = fs64_to_cpu(sb, valarray[0]); |
| 575 | } | 578 | } |
| 576 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { | 579 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { |
| 577 | befs_error(sb, "befs_btree_seekleaf() failed to read " | 580 | befs_error(sb, "%s failed to read " |
| 578 | "node at %Lu", *node_off); | 581 | "node at %llu", __func__, *node_off); |
| 579 | goto error; | 582 | goto error; |
| 580 | } | 583 | } |
| 581 | 584 | ||
| 582 | befs_debug(sb, "Seekleaf to child node %Lu", *node_off); | 585 | befs_debug(sb, "Seekleaf to child node %llu", *node_off); |
| 583 | } | 586 | } |
| 584 | befs_debug(sb, "Node %Lu is a leaf node", *node_off); | 587 | befs_debug(sb, "Node %llu is a leaf node", *node_off); |
| 585 | 588 | ||
| 586 | return BEFS_OK; | 589 | return BEFS_OK; |
| 587 | 590 | ||
| 588 | error: | 591 | error: |
| 589 | befs_debug(sb, "<--- befs_btree_seekleaf() ERROR"); | 592 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 590 | return BEFS_ERR; | 593 | return BEFS_ERR; |
| 591 | } | 594 | } |
| 592 | 595 | ||
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index 59096b5e0fc7..c467bebd50af 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c | |||
| @@ -52,26 +52,25 @@ befs_read_datastream(struct super_block *sb, befs_data_stream * ds, | |||
| 52 | befs_block_run run; | 52 | befs_block_run run; |
| 53 | befs_blocknr_t block; /* block coresponding to pos */ | 53 | befs_blocknr_t block; /* block coresponding to pos */ |
| 54 | 54 | ||
| 55 | befs_debug(sb, "---> befs_read_datastream() %Lu", pos); | 55 | befs_debug(sb, "---> %s %llu", __func__, pos); |
| 56 | block = pos >> BEFS_SB(sb)->block_shift; | 56 | block = pos >> BEFS_SB(sb)->block_shift; |
| 57 | if (off) | 57 | if (off) |
| 58 | *off = pos - (block << BEFS_SB(sb)->block_shift); | 58 | *off = pos - (block << BEFS_SB(sb)->block_shift); |
| 59 | 59 | ||
| 60 | if (befs_fblock2brun(sb, ds, block, &run) != BEFS_OK) { | 60 | if (befs_fblock2brun(sb, ds, block, &run) != BEFS_OK) { |
| 61 | befs_error(sb, "BeFS: Error finding disk addr of block %lu", | 61 | befs_error(sb, "BeFS: Error finding disk addr of block %lu", |
| 62 | block); | 62 | (unsigned long)block); |
| 63 | befs_debug(sb, "<--- befs_read_datastream() ERROR"); | 63 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 64 | return NULL; | 64 | return NULL; |
| 65 | } | 65 | } |
| 66 | bh = befs_bread_iaddr(sb, run); | 66 | bh = befs_bread_iaddr(sb, run); |
| 67 | if (!bh) { | 67 | if (!bh) { |
| 68 | befs_error(sb, "BeFS: Error reading block %lu from datastream", | 68 | befs_error(sb, "BeFS: Error reading block %lu from datastream", |
| 69 | block); | 69 | (unsigned long)block); |
| 70 | return NULL; | 70 | return NULL; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | befs_debug(sb, "<--- befs_read_datastream() read data, starting at %Lu", | 73 | befs_debug(sb, "<--- %s read data, starting at %llu", __func__, pos); |
| 74 | pos); | ||
| 75 | 74 | ||
| 76 | return bh; | 75 | return bh; |
| 77 | } | 76 | } |
| @@ -106,7 +105,8 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data, | |||
| 106 | } else { | 105 | } else { |
| 107 | befs_error(sb, | 106 | befs_error(sb, |
| 108 | "befs_fblock2brun() was asked to find block %lu, " | 107 | "befs_fblock2brun() was asked to find block %lu, " |
| 109 | "which is not mapped by the datastream\n", fblock); | 108 | "which is not mapped by the datastream\n", |
| 109 | (unsigned long)fblock); | ||
| 110 | err = BEFS_ERR; | 110 | err = BEFS_ERR; |
| 111 | } | 111 | } |
| 112 | return err; | 112 | return err; |
| @@ -128,14 +128,14 @@ befs_read_lsymlink(struct super_block * sb, befs_data_stream * ds, void *buff, | |||
| 128 | befs_off_t bytes_read = 0; /* bytes readed */ | 128 | befs_off_t bytes_read = 0; /* bytes readed */ |
| 129 | u16 plen; | 129 | u16 plen; |
| 130 | struct buffer_head *bh = NULL; | 130 | struct buffer_head *bh = NULL; |
| 131 | befs_debug(sb, "---> befs_read_lsymlink() length: %Lu", len); | 131 | befs_debug(sb, "---> %s length: %llu", __func__, len); |
| 132 | 132 | ||
| 133 | while (bytes_read < len) { | 133 | while (bytes_read < len) { |
| 134 | bh = befs_read_datastream(sb, ds, bytes_read, NULL); | 134 | bh = befs_read_datastream(sb, ds, bytes_read, NULL); |
| 135 | if (!bh) { | 135 | if (!bh) { |
| 136 | befs_error(sb, "BeFS: Error reading datastream block " | 136 | befs_error(sb, "BeFS: Error reading datastream block " |
| 137 | "starting from %Lu", bytes_read); | 137 | "starting from %llu", bytes_read); |
| 138 | befs_debug(sb, "<--- befs_read_lsymlink() ERROR"); | 138 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 139 | return bytes_read; | 139 | return bytes_read; |
| 140 | 140 | ||
| 141 | } | 141 | } |
| @@ -146,7 +146,8 @@ befs_read_lsymlink(struct super_block * sb, befs_data_stream * ds, void *buff, | |||
| 146 | bytes_read += plen; | 146 | bytes_read += plen; |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | befs_debug(sb, "<--- befs_read_lsymlink() read %u bytes", bytes_read); | 149 | befs_debug(sb, "<--- %s read %u bytes", __func__, (unsigned int) |
| 150 | bytes_read); | ||
| 150 | return bytes_read; | 151 | return bytes_read; |
| 151 | } | 152 | } |
| 152 | 153 | ||
| @@ -169,7 +170,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds) | |||
| 169 | befs_blocknr_t metablocks; /* FS metadata blocks */ | 170 | befs_blocknr_t metablocks; /* FS metadata blocks */ |
| 170 | befs_sb_info *befs_sb = BEFS_SB(sb); | 171 | befs_sb_info *befs_sb = BEFS_SB(sb); |
| 171 | 172 | ||
| 172 | befs_debug(sb, "---> befs_count_blocks()"); | 173 | befs_debug(sb, "---> %s", __func__); |
| 173 | 174 | ||
| 174 | datablocks = ds->size >> befs_sb->block_shift; | 175 | datablocks = ds->size >> befs_sb->block_shift; |
| 175 | if (ds->size & (befs_sb->block_size - 1)) | 176 | if (ds->size & (befs_sb->block_size - 1)) |
| @@ -206,7 +207,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds) | |||
| 206 | } | 207 | } |
| 207 | 208 | ||
| 208 | blocks = datablocks + metablocks; | 209 | blocks = datablocks + metablocks; |
| 209 | befs_debug(sb, "<--- befs_count_blocks() %u blocks", blocks); | 210 | befs_debug(sb, "<--- %s %u blocks", __func__, (unsigned int)blocks); |
| 210 | 211 | ||
| 211 | return blocks; | 212 | return blocks; |
| 212 | } | 213 | } |
| @@ -251,11 +252,11 @@ befs_find_brun_direct(struct super_block *sb, befs_data_stream * data, | |||
| 251 | befs_blocknr_t max_block = | 252 | befs_blocknr_t max_block = |
| 252 | data->max_direct_range >> BEFS_SB(sb)->block_shift; | 253 | data->max_direct_range >> BEFS_SB(sb)->block_shift; |
| 253 | 254 | ||
| 254 | befs_debug(sb, "---> befs_find_brun_direct(), find %lu", blockno); | 255 | befs_debug(sb, "---> %s, find %lu", __func__, (unsigned long)blockno); |
| 255 | 256 | ||
| 256 | if (blockno > max_block) { | 257 | if (blockno > max_block) { |
| 257 | befs_error(sb, "befs_find_brun_direct() passed block outside of" | 258 | befs_error(sb, "%s passed block outside of direct region", |
| 258 | "direct region"); | 259 | __func__); |
| 259 | return BEFS_ERR; | 260 | return BEFS_ERR; |
| 260 | } | 261 | } |
| 261 | 262 | ||
| @@ -267,13 +268,14 @@ befs_find_brun_direct(struct super_block *sb, befs_data_stream * data, | |||
| 267 | run->start = array[i].start + offset; | 268 | run->start = array[i].start + offset; |
| 268 | run->len = array[i].len - offset; | 269 | run->len = array[i].len - offset; |
| 269 | 270 | ||
| 270 | befs_debug(sb, "---> befs_find_brun_direct(), " | 271 | befs_debug(sb, "---> %s, " |
| 271 | "found %lu at direct[%d]", blockno, i); | 272 | "found %lu at direct[%d]", __func__, |
| 273 | (unsigned long)blockno, i); | ||
| 272 | return BEFS_OK; | 274 | return BEFS_OK; |
| 273 | } | 275 | } |
| 274 | } | 276 | } |
| 275 | 277 | ||
| 276 | befs_debug(sb, "---> befs_find_brun_direct() ERROR"); | 278 | befs_debug(sb, "---> %s ERROR", __func__); |
| 277 | return BEFS_ERR; | 279 | return BEFS_ERR; |
| 278 | } | 280 | } |
| 279 | 281 | ||
| @@ -316,7 +318,7 @@ befs_find_brun_indirect(struct super_block *sb, | |||
| 316 | befs_blocknr_t indirblockno = iaddr2blockno(sb, &indirect); | 318 | befs_blocknr_t indirblockno = iaddr2blockno(sb, &indirect); |
| 317 | int arraylen = befs_iaddrs_per_block(sb); | 319 | int arraylen = befs_iaddrs_per_block(sb); |
| 318 | 320 | ||
| 319 | befs_debug(sb, "---> befs_find_brun_indirect(), find %lu", blockno); | 321 | befs_debug(sb, "---> %s, find %lu", __func__, (unsigned long)blockno); |
| 320 | 322 | ||
| 321 | indir_start_blk = data->max_direct_range >> BEFS_SB(sb)->block_shift; | 323 | indir_start_blk = data->max_direct_range >> BEFS_SB(sb)->block_shift; |
| 322 | search_blk = blockno - indir_start_blk; | 324 | search_blk = blockno - indir_start_blk; |
| @@ -325,10 +327,9 @@ befs_find_brun_indirect(struct super_block *sb, | |||
| 325 | for (i = 0; i < indirect.len; i++) { | 327 | for (i = 0; i < indirect.len; i++) { |
| 326 | indirblock = befs_bread(sb, indirblockno + i); | 328 | indirblock = befs_bread(sb, indirblockno + i); |
| 327 | if (indirblock == NULL) { | 329 | if (indirblock == NULL) { |
| 328 | befs_debug(sb, | 330 | befs_debug(sb, "---> %s failed to read " |
| 329 | "---> befs_find_brun_indirect() failed to " | 331 | "disk block %lu from the indirect brun", |
| 330 | "read disk block %lu from the indirect brun", | 332 | __func__, (unsigned long)indirblockno + i); |
| 331 | indirblockno + i); | ||
| 332 | return BEFS_ERR; | 333 | return BEFS_ERR; |
| 333 | } | 334 | } |
| 334 | 335 | ||
| @@ -348,9 +349,10 @@ befs_find_brun_indirect(struct super_block *sb, | |||
| 348 | 349 | ||
| 349 | brelse(indirblock); | 350 | brelse(indirblock); |
| 350 | befs_debug(sb, | 351 | befs_debug(sb, |
| 351 | "<--- befs_find_brun_indirect() found " | 352 | "<--- %s found file block " |
| 352 | "file block %lu at indirect[%d]", | 353 | "%lu at indirect[%d]", __func__, |
| 353 | blockno, j + (i * arraylen)); | 354 | (unsigned long)blockno, |
| 355 | j + (i * arraylen)); | ||
| 354 | return BEFS_OK; | 356 | return BEFS_OK; |
| 355 | } | 357 | } |
| 356 | sum += len; | 358 | sum += len; |
| @@ -360,10 +362,10 @@ befs_find_brun_indirect(struct super_block *sb, | |||
| 360 | } | 362 | } |
| 361 | 363 | ||
| 362 | /* Only fallthrough is an error */ | 364 | /* Only fallthrough is an error */ |
| 363 | befs_error(sb, "BeFS: befs_find_brun_indirect() failed to find " | 365 | befs_error(sb, "BeFS: %s failed to find " |
| 364 | "file block %lu", blockno); | 366 | "file block %lu", __func__, (unsigned long)blockno); |
| 365 | 367 | ||
| 366 | befs_debug(sb, "<--- befs_find_brun_indirect() ERROR"); | 368 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 367 | return BEFS_ERR; | 369 | return BEFS_ERR; |
| 368 | } | 370 | } |
| 369 | 371 | ||
| @@ -444,7 +446,7 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
| 444 | size_t diblklen = iblklen * befs_iaddrs_per_block(sb) | 446 | size_t diblklen = iblklen * befs_iaddrs_per_block(sb) |
| 445 | * BEFS_DBLINDIR_BRUN_LEN; | 447 | * BEFS_DBLINDIR_BRUN_LEN; |
| 446 | 448 | ||
| 447 | befs_debug(sb, "---> befs_find_brun_dblindirect() find %lu", blockno); | 449 | befs_debug(sb, "---> %s find %lu", __func__, (unsigned long)blockno); |
| 448 | 450 | ||
| 449 | /* First, discover which of the double_indir->indir blocks | 451 | /* First, discover which of the double_indir->indir blocks |
| 450 | * contains pos. Then figure out how much of pos that | 452 | * contains pos. Then figure out how much of pos that |
| @@ -460,8 +462,9 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
| 460 | dbl_which_block = dblindir_indx / befs_iaddrs_per_block(sb); | 462 | dbl_which_block = dblindir_indx / befs_iaddrs_per_block(sb); |
| 461 | if (dbl_which_block > data->double_indirect.len) { | 463 | if (dbl_which_block > data->double_indirect.len) { |
| 462 | befs_error(sb, "The double-indirect index calculated by " | 464 | befs_error(sb, "The double-indirect index calculated by " |
| 463 | "befs_read_brun_dblindirect(), %d, is outside the range " | 465 | "%s, %d, is outside the range " |
| 464 | "of the double-indirect block", dblindir_indx); | 466 | "of the double-indirect block", __func__, |
| 467 | dblindir_indx); | ||
| 465 | return BEFS_ERR; | 468 | return BEFS_ERR; |
| 466 | } | 469 | } |
| 467 | 470 | ||
| @@ -469,10 +472,10 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
| 469 | befs_bread(sb, iaddr2blockno(sb, &data->double_indirect) + | 472 | befs_bread(sb, iaddr2blockno(sb, &data->double_indirect) + |
| 470 | dbl_which_block); | 473 | dbl_which_block); |
| 471 | if (dbl_indir_block == NULL) { | 474 | if (dbl_indir_block == NULL) { |
| 472 | befs_error(sb, "befs_read_brun_dblindirect() couldn't read the " | 475 | befs_error(sb, "%s couldn't read the " |
| 473 | "double-indirect block at blockno %lu", | 476 | "double-indirect block at blockno %lu", __func__, |
| 474 | iaddr2blockno(sb, | 477 | (unsigned long) |
| 475 | &data->double_indirect) + | 478 | iaddr2blockno(sb, &data->double_indirect) + |
| 476 | dbl_which_block); | 479 | dbl_which_block); |
| 477 | brelse(dbl_indir_block); | 480 | brelse(dbl_indir_block); |
| 478 | return BEFS_ERR; | 481 | return BEFS_ERR; |
| @@ -489,16 +492,16 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
| 489 | which_block = indir_indx / befs_iaddrs_per_block(sb); | 492 | which_block = indir_indx / befs_iaddrs_per_block(sb); |
| 490 | if (which_block > indir_run.len) { | 493 | if (which_block > indir_run.len) { |
| 491 | befs_error(sb, "The indirect index calculated by " | 494 | befs_error(sb, "The indirect index calculated by " |
| 492 | "befs_read_brun_dblindirect(), %d, is outside the range " | 495 | "%s, %d, is outside the range " |
| 493 | "of the indirect block", indir_indx); | 496 | "of the indirect block", __func__, indir_indx); |
| 494 | return BEFS_ERR; | 497 | return BEFS_ERR; |
| 495 | } | 498 | } |
| 496 | 499 | ||
| 497 | indir_block = | 500 | indir_block = |
| 498 | befs_bread(sb, iaddr2blockno(sb, &indir_run) + which_block); | 501 | befs_bread(sb, iaddr2blockno(sb, &indir_run) + which_block); |
| 499 | if (indir_block == NULL) { | 502 | if (indir_block == NULL) { |
| 500 | befs_error(sb, "befs_read_brun_dblindirect() couldn't read the " | 503 | befs_error(sb, "%s couldn't read the indirect block " |
| 501 | "indirect block at blockno %lu", | 504 | "at blockno %lu", __func__, (unsigned long) |
| 502 | iaddr2blockno(sb, &indir_run) + which_block); | 505 | iaddr2blockno(sb, &indir_run) + which_block); |
| 503 | brelse(indir_block); | 506 | brelse(indir_block); |
| 504 | return BEFS_ERR; | 507 | return BEFS_ERR; |
| @@ -519,7 +522,7 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
| 519 | run->len -= offset; | 522 | run->len -= offset; |
| 520 | 523 | ||
| 521 | befs_debug(sb, "Found file block %lu in double_indirect[%d][%d]," | 524 | befs_debug(sb, "Found file block %lu in double_indirect[%d][%d]," |
| 522 | " double_indirect_leftover = %lu", | 525 | " double_indirect_leftover = %lu", (unsigned long) |
| 523 | blockno, dblindir_indx, indir_indx, dblindir_leftover); | 526 | blockno, dblindir_indx, indir_indx, dblindir_leftover); |
| 524 | 527 | ||
| 525 | return BEFS_OK; | 528 | return BEFS_OK; |
diff --git a/fs/befs/debug.c b/fs/befs/debug.c index 622e73775c83..4de7cffcd662 100644 --- a/fs/befs/debug.c +++ b/fs/befs/debug.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | * debug functions | 10 | * debug functions |
| 11 | */ | 11 | */ |
| 12 | 12 | ||
| 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 13 | #ifdef __KERNEL__ | 14 | #ifdef __KERNEL__ |
| 14 | 15 | ||
| 15 | #include <stdarg.h> | 16 | #include <stdarg.h> |
| @@ -23,43 +24,30 @@ | |||
| 23 | 24 | ||
| 24 | #include "befs.h" | 25 | #include "befs.h" |
| 25 | 26 | ||
| 26 | #define ERRBUFSIZE 1024 | ||
| 27 | |||
| 28 | void | 27 | void |
| 29 | befs_error(const struct super_block *sb, const char *fmt, ...) | 28 | befs_error(const struct super_block *sb, const char *fmt, ...) |
| 30 | { | 29 | { |
| 30 | struct va_format vaf; | ||
| 31 | va_list args; | 31 | va_list args; |
| 32 | char *err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL); | ||
| 33 | if (err_buf == NULL) { | ||
| 34 | printk(KERN_ERR "could not allocate %d bytes\n", ERRBUFSIZE); | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | 32 | ||
| 38 | va_start(args, fmt); | 33 | va_start(args, fmt); |
| 39 | vsnprintf(err_buf, ERRBUFSIZE, fmt, args); | 34 | vaf.fmt = fmt; |
| 35 | vaf.va = &args; | ||
| 36 | pr_err("(%s): %pV\n", sb->s_id, &vaf); | ||
| 40 | va_end(args); | 37 | va_end(args); |
| 41 | |||
| 42 | printk(KERN_ERR "BeFS(%s): %s\n", sb->s_id, err_buf); | ||
| 43 | kfree(err_buf); | ||
| 44 | } | 38 | } |
| 45 | 39 | ||
| 46 | void | 40 | void |
| 47 | befs_warning(const struct super_block *sb, const char *fmt, ...) | 41 | befs_warning(const struct super_block *sb, const char *fmt, ...) |
| 48 | { | 42 | { |
| 43 | struct va_format vaf; | ||
| 49 | va_list args; | 44 | va_list args; |
| 50 | char *err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL); | ||
| 51 | if (err_buf == NULL) { | ||
| 52 | printk(KERN_ERR "could not allocate %d bytes\n", ERRBUFSIZE); | ||
| 53 | return; | ||
| 54 | } | ||
| 55 | 45 | ||
| 56 | va_start(args, fmt); | 46 | va_start(args, fmt); |
| 57 | vsnprintf(err_buf, ERRBUFSIZE, fmt, args); | 47 | vaf.fmt = fmt; |
| 48 | vaf.va = &args; | ||
| 49 | pr_warn("(%s): %pV\n", sb->s_id, &vaf); | ||
| 58 | va_end(args); | 50 | va_end(args); |
| 59 | |||
| 60 | printk(KERN_WARNING "BeFS(%s): %s\n", sb->s_id, err_buf); | ||
| 61 | |||
| 62 | kfree(err_buf); | ||
| 63 | } | 51 | } |
| 64 | 52 | ||
| 65 | void | 53 | void |
| @@ -67,25 +55,13 @@ befs_debug(const struct super_block *sb, const char *fmt, ...) | |||
| 67 | { | 55 | { |
| 68 | #ifdef CONFIG_BEFS_DEBUG | 56 | #ifdef CONFIG_BEFS_DEBUG |
| 69 | 57 | ||
| 58 | struct va_format vaf; | ||
| 70 | va_list args; | 59 | va_list args; |
| 71 | char *err_buf = NULL; | 60 | va_start(args, fmt); |
| 72 | 61 | vaf.fmt = fmt; | |
| 73 | if (BEFS_SB(sb)->mount_opts.debug) { | 62 | vaf.va = &args; |
| 74 | err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL); | 63 | pr_debug("(%s): %pV\n", sb->s_id, &vaf); |
| 75 | if (err_buf == NULL) { | 64 | va_end(args); |
| 76 | printk(KERN_ERR "could not allocate %d bytes\n", | ||
| 77 | ERRBUFSIZE); | ||
| 78 | return; | ||
| 79 | } | ||
| 80 | |||
| 81 | va_start(args, fmt); | ||
| 82 | vsnprintf(err_buf, ERRBUFSIZE, fmt, args); | ||
| 83 | va_end(args); | ||
| 84 | |||
| 85 | printk(KERN_DEBUG "BeFS(%s): %s\n", sb->s_id, err_buf); | ||
| 86 | |||
| 87 | kfree(err_buf); | ||
| 88 | } | ||
| 89 | 65 | ||
| 90 | #endif //CONFIG_BEFS_DEBUG | 66 | #endif //CONFIG_BEFS_DEBUG |
| 91 | } | 67 | } |
| @@ -109,9 +85,9 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
| 109 | befs_debug(sb, " gid %u", fs32_to_cpu(sb, inode->gid)); | 85 | befs_debug(sb, " gid %u", fs32_to_cpu(sb, inode->gid)); |
| 110 | befs_debug(sb, " mode %08x", fs32_to_cpu(sb, inode->mode)); | 86 | befs_debug(sb, " mode %08x", fs32_to_cpu(sb, inode->mode)); |
| 111 | befs_debug(sb, " flags %08x", fs32_to_cpu(sb, inode->flags)); | 87 | befs_debug(sb, " flags %08x", fs32_to_cpu(sb, inode->flags)); |
| 112 | befs_debug(sb, " create_time %Lu", | 88 | befs_debug(sb, " create_time %llu", |
| 113 | fs64_to_cpu(sb, inode->create_time)); | 89 | fs64_to_cpu(sb, inode->create_time)); |
| 114 | befs_debug(sb, " last_modified_time %Lu", | 90 | befs_debug(sb, " last_modified_time %llu", |
| 115 | fs64_to_cpu(sb, inode->last_modified_time)); | 91 | fs64_to_cpu(sb, inode->last_modified_time)); |
| 116 | 92 | ||
| 117 | tmp_run = fsrun_to_cpu(sb, inode->parent); | 93 | tmp_run = fsrun_to_cpu(sb, inode->parent); |
| @@ -137,7 +113,7 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
| 137 | tmp_run.allocation_group, tmp_run.start, | 113 | tmp_run.allocation_group, tmp_run.start, |
| 138 | tmp_run.len); | 114 | tmp_run.len); |
| 139 | } | 115 | } |
| 140 | befs_debug(sb, " max_direct_range %Lu", | 116 | befs_debug(sb, " max_direct_range %llu", |
| 141 | fs64_to_cpu(sb, | 117 | fs64_to_cpu(sb, |
| 142 | inode->data.datastream. | 118 | inode->data.datastream. |
| 143 | max_direct_range)); | 119 | max_direct_range)); |
| @@ -147,7 +123,7 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
| 147 | tmp_run.allocation_group, | 123 | tmp_run.allocation_group, |
| 148 | tmp_run.start, tmp_run.len); | 124 | tmp_run.start, tmp_run.len); |
| 149 | 125 | ||
| 150 | befs_debug(sb, " max_indirect_range %Lu", | 126 | befs_debug(sb, " max_indirect_range %llu", |
| 151 | fs64_to_cpu(sb, | 127 | fs64_to_cpu(sb, |
| 152 | inode->data.datastream. | 128 | inode->data.datastream. |
| 153 | max_indirect_range)); | 129 | max_indirect_range)); |
| @@ -158,12 +134,12 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
| 158 | tmp_run.allocation_group, tmp_run.start, | 134 | tmp_run.allocation_group, tmp_run.start, |
| 159 | tmp_run.len); | 135 | tmp_run.len); |
| 160 | 136 | ||
| 161 | befs_debug(sb, " max_double_indirect_range %Lu", | 137 | befs_debug(sb, " max_double_indirect_range %llu", |
| 162 | fs64_to_cpu(sb, | 138 | fs64_to_cpu(sb, |
| 163 | inode->data.datastream. | 139 | inode->data.datastream. |
| 164 | max_double_indirect_range)); | 140 | max_double_indirect_range)); |
| 165 | 141 | ||
| 166 | befs_debug(sb, " size %Lu", | 142 | befs_debug(sb, " size %llu", |
| 167 | fs64_to_cpu(sb, inode->data.datastream.size)); | 143 | fs64_to_cpu(sb, inode->data.datastream.size)); |
| 168 | } | 144 | } |
| 169 | 145 | ||
| @@ -191,8 +167,8 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup) | |||
| 191 | befs_debug(sb, " block_size %u", fs32_to_cpu(sb, sup->block_size)); | 167 | befs_debug(sb, " block_size %u", fs32_to_cpu(sb, sup->block_size)); |
| 192 | befs_debug(sb, " block_shift %u", fs32_to_cpu(sb, sup->block_shift)); | 168 | befs_debug(sb, " block_shift %u", fs32_to_cpu(sb, sup->block_shift)); |
| 193 | 169 | ||
| 194 | befs_debug(sb, " num_blocks %Lu", fs64_to_cpu(sb, sup->num_blocks)); | 170 | befs_debug(sb, " num_blocks %llu", fs64_to_cpu(sb, sup->num_blocks)); |
| 195 | befs_debug(sb, " used_blocks %Lu", fs64_to_cpu(sb, sup->used_blocks)); | 171 | befs_debug(sb, " used_blocks %llu", fs64_to_cpu(sb, sup->used_blocks)); |
| 196 | 172 | ||
| 197 | befs_debug(sb, " magic2 %08x", fs32_to_cpu(sb, sup->magic2)); | 173 | befs_debug(sb, " magic2 %08x", fs32_to_cpu(sb, sup->magic2)); |
| 198 | befs_debug(sb, " blocks_per_ag %u", | 174 | befs_debug(sb, " blocks_per_ag %u", |
| @@ -206,8 +182,8 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup) | |||
| 206 | befs_debug(sb, " log_blocks %u, %hu, %hu", | 182 | befs_debug(sb, " log_blocks %u, %hu, %hu", |
| 207 | tmp_run.allocation_group, tmp_run.start, tmp_run.len); | 183 | tmp_run.allocation_group, tmp_run.start, tmp_run.len); |
| 208 | 184 | ||
| 209 | befs_debug(sb, " log_start %Ld", fs64_to_cpu(sb, sup->log_start)); | 185 | befs_debug(sb, " log_start %lld", fs64_to_cpu(sb, sup->log_start)); |
| 210 | befs_debug(sb, " log_end %Ld", fs64_to_cpu(sb, sup->log_end)); | 186 | befs_debug(sb, " log_end %lld", fs64_to_cpu(sb, sup->log_end)); |
| 211 | 187 | ||
| 212 | befs_debug(sb, " magic3 %08x", fs32_to_cpu(sb, sup->magic3)); | 188 | befs_debug(sb, " magic3 %08x", fs32_to_cpu(sb, sup->magic3)); |
| 213 | 189 | ||
diff --git a/fs/befs/inode.c b/fs/befs/inode.c index 94c17f9a9576..fa4b718de597 100644 --- a/fs/befs/inode.c +++ b/fs/befs/inode.c | |||
| @@ -25,7 +25,8 @@ befs_check_inode(struct super_block *sb, befs_inode * raw_inode, | |||
| 25 | /* check magic header. */ | 25 | /* check magic header. */ |
| 26 | if (magic1 != BEFS_INODE_MAGIC1) { | 26 | if (magic1 != BEFS_INODE_MAGIC1) { |
| 27 | befs_error(sb, | 27 | befs_error(sb, |
| 28 | "Inode has a bad magic header - inode = %lu", inode); | 28 | "Inode has a bad magic header - inode = %lu", |
| 29 | (unsigned long)inode); | ||
| 29 | return BEFS_BAD_INODE; | 30 | return BEFS_BAD_INODE; |
| 30 | } | 31 | } |
| 31 | 32 | ||
| @@ -34,8 +35,8 @@ befs_check_inode(struct super_block *sb, befs_inode * raw_inode, | |||
| 34 | */ | 35 | */ |
| 35 | if (inode != iaddr2blockno(sb, &ino_num)) { | 36 | if (inode != iaddr2blockno(sb, &ino_num)) { |
| 36 | befs_error(sb, "inode blocknr field disagrees with vfs " | 37 | befs_error(sb, "inode blocknr field disagrees with vfs " |
| 37 | "VFS: %lu, Inode %lu", | 38 | "VFS: %lu, Inode %lu", (unsigned long) |
| 38 | inode, iaddr2blockno(sb, &ino_num)); | 39 | inode, (unsigned long)iaddr2blockno(sb, &ino_num)); |
| 39 | return BEFS_BAD_INODE; | 40 | return BEFS_BAD_INODE; |
| 40 | } | 41 | } |
| 41 | 42 | ||
| @@ -44,7 +45,8 @@ befs_check_inode(struct super_block *sb, befs_inode * raw_inode, | |||
| 44 | */ | 45 | */ |
| 45 | 46 | ||
| 46 | if (!(flags & BEFS_INODE_IN_USE)) { | 47 | if (!(flags & BEFS_INODE_IN_USE)) { |
| 47 | befs_error(sb, "inode is not used - inode = %lu", inode); | 48 | befs_error(sb, "inode is not used - inode = %lu", |
| 49 | (unsigned long)inode); | ||
| 48 | return BEFS_BAD_INODE; | 50 | return BEFS_BAD_INODE; |
| 49 | } | 51 | } |
| 50 | 52 | ||
diff --git a/fs/befs/io.c b/fs/befs/io.c index ddef98aa255d..0408a3d601d0 100644 --- a/fs/befs/io.c +++ b/fs/befs/io.c | |||
| @@ -30,9 +30,9 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr) | |||
| 30 | befs_blocknr_t block = 0; | 30 | befs_blocknr_t block = 0; |
| 31 | befs_sb_info *befs_sb = BEFS_SB(sb); | 31 | befs_sb_info *befs_sb = BEFS_SB(sb); |
| 32 | 32 | ||
| 33 | befs_debug(sb, "---> Enter befs_read_iaddr() " | 33 | befs_debug(sb, "---> Enter %s " |
| 34 | "[%u, %hu, %hu]", | 34 | "[%u, %hu, %hu]", __func__, iaddr.allocation_group, |
| 35 | iaddr.allocation_group, iaddr.start, iaddr.len); | 35 | iaddr.start, iaddr.len); |
| 36 | 36 | ||
| 37 | if (iaddr.allocation_group > befs_sb->num_ags) { | 37 | if (iaddr.allocation_group > befs_sb->num_ags) { |
| 38 | befs_error(sb, "BEFS: Invalid allocation group %u, max is %u", | 38 | befs_error(sb, "BEFS: Invalid allocation group %u, max is %u", |
| @@ -42,20 +42,21 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr) | |||
| 42 | 42 | ||
| 43 | block = iaddr2blockno(sb, &iaddr); | 43 | block = iaddr2blockno(sb, &iaddr); |
| 44 | 44 | ||
| 45 | befs_debug(sb, "befs_read_iaddr: offset = %lu", block); | 45 | befs_debug(sb, "%s: offset = %lu", __func__, (unsigned long)block); |
| 46 | 46 | ||
| 47 | bh = sb_bread(sb, block); | 47 | bh = sb_bread(sb, block); |
| 48 | 48 | ||
| 49 | if (bh == NULL) { | 49 | if (bh == NULL) { |
| 50 | befs_error(sb, "Failed to read block %lu", block); | 50 | befs_error(sb, "Failed to read block %lu", |
| 51 | (unsigned long)block); | ||
| 51 | goto error; | 52 | goto error; |
| 52 | } | 53 | } |
| 53 | 54 | ||
| 54 | befs_debug(sb, "<--- befs_read_iaddr()"); | 55 | befs_debug(sb, "<--- %s", __func__); |
| 55 | return bh; | 56 | return bh; |
| 56 | 57 | ||
| 57 | error: | 58 | error: |
| 58 | befs_debug(sb, "<--- befs_read_iaddr() ERROR"); | 59 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 59 | return NULL; | 60 | return NULL; |
| 60 | } | 61 | } |
| 61 | 62 | ||
| @@ -64,20 +65,21 @@ befs_bread(struct super_block *sb, befs_blocknr_t block) | |||
| 64 | { | 65 | { |
| 65 | struct buffer_head *bh = NULL; | 66 | struct buffer_head *bh = NULL; |
| 66 | 67 | ||
| 67 | befs_debug(sb, "---> Enter befs_read() %Lu", block); | 68 | befs_debug(sb, "---> Enter %s %lu", __func__, (unsigned long)block); |
| 68 | 69 | ||
| 69 | bh = sb_bread(sb, block); | 70 | bh = sb_bread(sb, block); |
| 70 | 71 | ||
| 71 | if (bh == NULL) { | 72 | if (bh == NULL) { |
| 72 | befs_error(sb, "Failed to read block %lu", block); | 73 | befs_error(sb, "Failed to read block %lu", |
| 74 | (unsigned long)block); | ||
| 73 | goto error; | 75 | goto error; |
| 74 | } | 76 | } |
| 75 | 77 | ||
| 76 | befs_debug(sb, "<--- befs_read()"); | 78 | befs_debug(sb, "<--- %s", __func__); |
| 77 | 79 | ||
| 78 | return bh; | 80 | return bh; |
| 79 | 81 | ||
| 80 | error: | 82 | error: |
| 81 | befs_debug(sb, "<--- befs_read() ERROR"); | 83 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 82 | return NULL; | 84 | return NULL; |
| 83 | } | 85 | } |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 845d2d690ce2..d626756ff721 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | * | 5 | * |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 9 | |||
| 8 | #include <linux/module.h> | 10 | #include <linux/module.h> |
| 9 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
| 10 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
| @@ -39,7 +41,6 @@ static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int) | |||
| 39 | static struct inode *befs_iget(struct super_block *, unsigned long); | 41 | static struct inode *befs_iget(struct super_block *, unsigned long); |
| 40 | static struct inode *befs_alloc_inode(struct super_block *sb); | 42 | static struct inode *befs_alloc_inode(struct super_block *sb); |
| 41 | static void befs_destroy_inode(struct inode *inode); | 43 | static void befs_destroy_inode(struct inode *inode); |
| 42 | static int befs_init_inodecache(void); | ||
| 43 | static void befs_destroy_inodecache(void); | 44 | static void befs_destroy_inodecache(void); |
| 44 | static void *befs_follow_link(struct dentry *, struct nameidata *); | 45 | static void *befs_follow_link(struct dentry *, struct nameidata *); |
| 45 | static void *befs_fast_follow_link(struct dentry *, struct nameidata *); | 46 | static void *befs_fast_follow_link(struct dentry *, struct nameidata *); |
| @@ -131,26 +132,28 @@ befs_get_block(struct inode *inode, sector_t block, | |||
| 131 | ulong disk_off; | 132 | ulong disk_off; |
| 132 | 133 | ||
| 133 | befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld", | 134 | befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld", |
| 134 | inode->i_ino, block); | 135 | (unsigned long)inode->i_ino, (long)block); |
| 135 | 136 | ||
| 136 | if (block < 0) { | 137 | if (block < 0) { |
| 137 | befs_error(sb, "befs_get_block() was asked for a block " | 138 | befs_error(sb, "befs_get_block() was asked for a block " |
| 138 | "number less than zero: block %ld in inode %lu", | 139 | "number less than zero: block %ld in inode %lu", |
| 139 | block, inode->i_ino); | 140 | (long)block, (unsigned long)inode->i_ino); |
| 140 | return -EIO; | 141 | return -EIO; |
| 141 | } | 142 | } |
| 142 | 143 | ||
| 143 | if (create) { | 144 | if (create) { |
| 144 | befs_error(sb, "befs_get_block() was asked to write to " | 145 | befs_error(sb, "befs_get_block() was asked to write to " |
| 145 | "block %ld in inode %lu", block, inode->i_ino); | 146 | "block %ld in inode %lu", (long)block, |
| 147 | (unsigned long)inode->i_ino); | ||
| 146 | return -EPERM; | 148 | return -EPERM; |
| 147 | } | 149 | } |
| 148 | 150 | ||
| 149 | res = befs_fblock2brun(sb, ds, block, &run); | 151 | res = befs_fblock2brun(sb, ds, block, &run); |
| 150 | if (res != BEFS_OK) { | 152 | if (res != BEFS_OK) { |
| 151 | befs_error(sb, | 153 | befs_error(sb, |
| 152 | "<--- befs_get_block() for inode %lu, block " | 154 | "<--- %s for inode %lu, block %ld ERROR", |
| 153 | "%ld ERROR", inode->i_ino, block); | 155 | __func__, (unsigned long)inode->i_ino, |
| 156 | (long)block); | ||
| 154 | return -EFBIG; | 157 | return -EFBIG; |
| 155 | } | 158 | } |
| 156 | 159 | ||
| @@ -158,8 +161,9 @@ befs_get_block(struct inode *inode, sector_t block, | |||
| 158 | 161 | ||
| 159 | map_bh(bh_result, inode->i_sb, disk_off); | 162 | map_bh(bh_result, inode->i_sb, disk_off); |
| 160 | 163 | ||
| 161 | befs_debug(sb, "<--- befs_get_block() for inode %lu, block %ld, " | 164 | befs_debug(sb, "<--- %s for inode %lu, block %ld, disk address %lu", |
| 162 | "disk address %lu", inode->i_ino, block, disk_off); | 165 | __func__, (unsigned long)inode->i_ino, (long)block, |
| 166 | (unsigned long)disk_off); | ||
| 163 | 167 | ||
| 164 | return 0; | 168 | return 0; |
| 165 | } | 169 | } |
| @@ -176,15 +180,15 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
| 176 | char *utfname; | 180 | char *utfname; |
| 177 | const char *name = dentry->d_name.name; | 181 | const char *name = dentry->d_name.name; |
| 178 | 182 | ||
| 179 | befs_debug(sb, "---> befs_lookup() " | 183 | befs_debug(sb, "---> %s name %s inode %ld", __func__, |
| 180 | "name %s inode %ld", dentry->d_name.name, dir->i_ino); | 184 | dentry->d_name.name, dir->i_ino); |
| 181 | 185 | ||
| 182 | /* Convert to UTF-8 */ | 186 | /* Convert to UTF-8 */ |
| 183 | if (BEFS_SB(sb)->nls) { | 187 | if (BEFS_SB(sb)->nls) { |
| 184 | ret = | 188 | ret = |
| 185 | befs_nls2utf(sb, name, strlen(name), &utfname, &utfnamelen); | 189 | befs_nls2utf(sb, name, strlen(name), &utfname, &utfnamelen); |
| 186 | if (ret < 0) { | 190 | if (ret < 0) { |
| 187 | befs_debug(sb, "<--- befs_lookup() ERROR"); | 191 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 188 | return ERR_PTR(ret); | 192 | return ERR_PTR(ret); |
| 189 | } | 193 | } |
| 190 | ret = befs_btree_find(sb, ds, utfname, &offset); | 194 | ret = befs_btree_find(sb, ds, utfname, &offset); |
| @@ -195,12 +199,12 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
| 195 | } | 199 | } |
| 196 | 200 | ||
| 197 | if (ret == BEFS_BT_NOT_FOUND) { | 201 | if (ret == BEFS_BT_NOT_FOUND) { |
| 198 | befs_debug(sb, "<--- befs_lookup() %s not found", | 202 | befs_debug(sb, "<--- %s %s not found", __func__, |
| 199 | dentry->d_name.name); | 203 | dentry->d_name.name); |
| 200 | return ERR_PTR(-ENOENT); | 204 | return ERR_PTR(-ENOENT); |
| 201 | 205 | ||
| 202 | } else if (ret != BEFS_OK || offset == 0) { | 206 | } else if (ret != BEFS_OK || offset == 0) { |
| 203 | befs_warning(sb, "<--- befs_lookup() Error"); | 207 | befs_warning(sb, "<--- %s Error", __func__); |
| 204 | return ERR_PTR(-ENODATA); | 208 | return ERR_PTR(-ENODATA); |
| 205 | } | 209 | } |
| 206 | 210 | ||
| @@ -210,7 +214,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
| 210 | 214 | ||
| 211 | d_add(dentry, inode); | 215 | d_add(dentry, inode); |
| 212 | 216 | ||
| 213 | befs_debug(sb, "<--- befs_lookup()"); | 217 | befs_debug(sb, "<--- %s", __func__); |
| 214 | 218 | ||
| 215 | return NULL; | 219 | return NULL; |
| 216 | } | 220 | } |
| @@ -228,26 +232,25 @@ befs_readdir(struct file *file, struct dir_context *ctx) | |||
| 228 | char keybuf[BEFS_NAME_LEN + 1]; | 232 | char keybuf[BEFS_NAME_LEN + 1]; |
| 229 | const char *dirname = file->f_path.dentry->d_name.name; | 233 | const char *dirname = file->f_path.dentry->d_name.name; |
| 230 | 234 | ||
| 231 | befs_debug(sb, "---> befs_readdir() " | 235 | befs_debug(sb, "---> %s name %s, inode %ld, ctx->pos %lld", |
| 232 | "name %s, inode %ld, ctx->pos %Ld", | 236 | __func__, dirname, inode->i_ino, ctx->pos); |
| 233 | dirname, inode->i_ino, ctx->pos); | ||
| 234 | 237 | ||
| 235 | more: | 238 | more: |
| 236 | result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, | 239 | result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, |
| 237 | keybuf, &keysize, &value); | 240 | keybuf, &keysize, &value); |
| 238 | 241 | ||
| 239 | if (result == BEFS_ERR) { | 242 | if (result == BEFS_ERR) { |
| 240 | befs_debug(sb, "<--- befs_readdir() ERROR"); | 243 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 241 | befs_error(sb, "IO error reading %s (inode %lu)", | 244 | befs_error(sb, "IO error reading %s (inode %lu)", |
| 242 | dirname, inode->i_ino); | 245 | dirname, inode->i_ino); |
| 243 | return -EIO; | 246 | return -EIO; |
| 244 | 247 | ||
| 245 | } else if (result == BEFS_BT_END) { | 248 | } else if (result == BEFS_BT_END) { |
| 246 | befs_debug(sb, "<--- befs_readdir() END"); | 249 | befs_debug(sb, "<--- %s END", __func__); |
| 247 | return 0; | 250 | return 0; |
| 248 | 251 | ||
| 249 | } else if (result == BEFS_BT_EMPTY) { | 252 | } else if (result == BEFS_BT_EMPTY) { |
| 250 | befs_debug(sb, "<--- befs_readdir() Empty directory"); | 253 | befs_debug(sb, "<--- %s Empty directory", __func__); |
| 251 | return 0; | 254 | return 0; |
| 252 | } | 255 | } |
| 253 | 256 | ||
| @@ -260,7 +263,7 @@ more: | |||
| 260 | result = | 263 | result = |
| 261 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); | 264 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); |
| 262 | if (result < 0) { | 265 | if (result < 0) { |
| 263 | befs_debug(sb, "<--- befs_readdir() ERROR"); | 266 | befs_debug(sb, "<--- %s ERROR", __func__); |
| 264 | return result; | 267 | return result; |
| 265 | } | 268 | } |
| 266 | if (!dir_emit(ctx, nlsname, nlsnamelen, | 269 | if (!dir_emit(ctx, nlsname, nlsnamelen, |
| @@ -277,7 +280,7 @@ more: | |||
| 277 | ctx->pos++; | 280 | ctx->pos++; |
| 278 | goto more; | 281 | goto more; |
| 279 | 282 | ||
| 280 | befs_debug(sb, "<--- befs_readdir() pos %Ld", ctx->pos); | 283 | befs_debug(sb, "<--- %s pos %lld", __func__, ctx->pos); |
| 281 | 284 | ||
| 282 | return 0; | 285 | return 0; |
| 283 | } | 286 | } |
| @@ -321,7 +324,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
| 321 | struct inode *inode; | 324 | struct inode *inode; |
| 322 | long ret = -EIO; | 325 | long ret = -EIO; |
| 323 | 326 | ||
| 324 | befs_debug(sb, "---> befs_read_inode() " "inode = %lu", ino); | 327 | befs_debug(sb, "---> %s inode = %lu", __func__, ino); |
| 325 | 328 | ||
| 326 | inode = iget_locked(sb, ino); | 329 | inode = iget_locked(sb, ino); |
| 327 | if (!inode) | 330 | if (!inode) |
| @@ -428,7 +431,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
| 428 | } | 431 | } |
| 429 | 432 | ||
| 430 | brelse(bh); | 433 | brelse(bh); |
| 431 | befs_debug(sb, "<--- befs_read_inode()"); | 434 | befs_debug(sb, "<--- %s", __func__); |
| 432 | unlock_new_inode(inode); | 435 | unlock_new_inode(inode); |
| 433 | return inode; | 436 | return inode; |
| 434 | 437 | ||
| @@ -437,7 +440,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
| 437 | 440 | ||
| 438 | unacquire_none: | 441 | unacquire_none: |
| 439 | iget_failed(inode); | 442 | iget_failed(inode); |
| 440 | befs_debug(sb, "<--- befs_read_inode() - Bad inode"); | 443 | befs_debug(sb, "<--- %s - Bad inode", __func__); |
| 441 | return ERR_PTR(ret); | 444 | return ERR_PTR(ret); |
| 442 | } | 445 | } |
| 443 | 446 | ||
| @@ -445,7 +448,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
| 445 | * | 448 | * |
| 446 | * Taken from NFS implementation by Al Viro. | 449 | * Taken from NFS implementation by Al Viro. |
| 447 | */ | 450 | */ |
| 448 | static int | 451 | static int __init |
| 449 | befs_init_inodecache(void) | 452 | befs_init_inodecache(void) |
| 450 | { | 453 | { |
| 451 | befs_inode_cachep = kmem_cache_create("befs_inode_cache", | 454 | befs_inode_cachep = kmem_cache_create("befs_inode_cache", |
| @@ -454,11 +457,9 @@ befs_init_inodecache(void) | |||
| 454 | SLAB_MEM_SPREAD), | 457 | SLAB_MEM_SPREAD), |
| 455 | init_once); | 458 | init_once); |
| 456 | if (befs_inode_cachep == NULL) { | 459 | if (befs_inode_cachep == NULL) { |
| 457 | printk(KERN_ERR "befs_init_inodecache: " | 460 | pr_err("%s: Couldn't initialize inode slabcache\n", __func__); |
| 458 | "Couldn't initialize inode slabcache\n"); | ||
| 459 | return -ENOMEM; | 461 | return -ENOMEM; |
| 460 | } | 462 | } |
| 461 | |||
| 462 | return 0; | 463 | return 0; |
| 463 | } | 464 | } |
| 464 | 465 | ||
| @@ -544,16 +545,16 @@ befs_utf2nls(struct super_block *sb, const char *in, | |||
| 544 | */ | 545 | */ |
| 545 | int maxlen = in_len + 1; | 546 | int maxlen = in_len + 1; |
| 546 | 547 | ||
| 547 | befs_debug(sb, "---> utf2nls()"); | 548 | befs_debug(sb, "---> %s", __func__); |
| 548 | 549 | ||
| 549 | if (!nls) { | 550 | if (!nls) { |
| 550 | befs_error(sb, "befs_utf2nls called with no NLS table loaded"); | 551 | befs_error(sb, "%s called with no NLS table loaded", __func__); |
| 551 | return -EINVAL; | 552 | return -EINVAL; |
| 552 | } | 553 | } |
| 553 | 554 | ||
| 554 | *out = result = kmalloc(maxlen, GFP_NOFS); | 555 | *out = result = kmalloc(maxlen, GFP_NOFS); |
| 555 | if (!*out) { | 556 | if (!*out) { |
| 556 | befs_error(sb, "befs_utf2nls() cannot allocate memory"); | 557 | befs_error(sb, "%s cannot allocate memory", __func__); |
| 557 | *out_len = 0; | 558 | *out_len = 0; |
| 558 | return -ENOMEM; | 559 | return -ENOMEM; |
| 559 | } | 560 | } |
| @@ -575,14 +576,14 @@ befs_utf2nls(struct super_block *sb, const char *in, | |||
| 575 | result[o] = '\0'; | 576 | result[o] = '\0'; |
| 576 | *out_len = o; | 577 | *out_len = o; |
| 577 | 578 | ||
| 578 | befs_debug(sb, "<--- utf2nls()"); | 579 | befs_debug(sb, "<--- %s", __func__); |
| 579 | 580 | ||
| 580 | return o; | 581 | return o; |
| 581 | 582 | ||
| 582 | conv_err: | 583 | conv_err: |
| 583 | befs_error(sb, "Name using character set %s contains a character that " | 584 | befs_error(sb, "Name using character set %s contains a character that " |
| 584 | "cannot be converted to unicode.", nls->charset); | 585 | "cannot be converted to unicode.", nls->charset); |
| 585 | befs_debug(sb, "<--- utf2nls()"); | 586 | befs_debug(sb, "<--- %s", __func__); |
| 586 | kfree(result); | 587 | kfree(result); |
| 587 | return -EILSEQ; | 588 | return -EILSEQ; |
| 588 | } | 589 | } |
| @@ -623,16 +624,17 @@ befs_nls2utf(struct super_block *sb, const char *in, | |||
| 623 | * in special cases */ | 624 | * in special cases */ |
| 624 | int maxlen = (3 * in_len) + 1; | 625 | int maxlen = (3 * in_len) + 1; |
| 625 | 626 | ||
| 626 | befs_debug(sb, "---> nls2utf()\n"); | 627 | befs_debug(sb, "---> %s\n", __func__); |
| 627 | 628 | ||
| 628 | if (!nls) { | 629 | if (!nls) { |
| 629 | befs_error(sb, "befs_nls2utf called with no NLS table loaded."); | 630 | befs_error(sb, "%s called with no NLS table loaded.", |
| 631 | __func__); | ||
| 630 | return -EINVAL; | 632 | return -EINVAL; |
| 631 | } | 633 | } |
| 632 | 634 | ||
| 633 | *out = result = kmalloc(maxlen, GFP_NOFS); | 635 | *out = result = kmalloc(maxlen, GFP_NOFS); |
| 634 | if (!*out) { | 636 | if (!*out) { |
| 635 | befs_error(sb, "befs_nls2utf() cannot allocate memory"); | 637 | befs_error(sb, "%s cannot allocate memory", __func__); |
| 636 | *out_len = 0; | 638 | *out_len = 0; |
| 637 | return -ENOMEM; | 639 | return -ENOMEM; |
| 638 | } | 640 | } |
| @@ -653,14 +655,14 @@ befs_nls2utf(struct super_block *sb, const char *in, | |||
| 653 | result[o] = '\0'; | 655 | result[o] = '\0'; |
| 654 | *out_len = o; | 656 | *out_len = o; |
| 655 | 657 | ||
| 656 | befs_debug(sb, "<--- nls2utf()"); | 658 | befs_debug(sb, "<--- %s", __func__); |
| 657 | 659 | ||
| 658 | return i; | 660 | return i; |
| 659 | 661 | ||
| 660 | conv_err: | 662 | conv_err: |
| 661 | befs_error(sb, "Name using charecter set %s contains a charecter that " | 663 | befs_error(sb, "Name using charecter set %s contains a charecter that " |
| 662 | "cannot be converted to unicode.", nls->charset); | 664 | "cannot be converted to unicode.", nls->charset); |
| 663 | befs_debug(sb, "<--- nls2utf()"); | 665 | befs_debug(sb, "<--- %s", __func__); |
| 664 | kfree(result); | 666 | kfree(result); |
| 665 | return -EILSEQ; | 667 | return -EILSEQ; |
| 666 | } | 668 | } |
| @@ -715,8 +717,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
| 715 | if (option >= 0) | 717 | if (option >= 0) |
| 716 | uid = make_kuid(current_user_ns(), option); | 718 | uid = make_kuid(current_user_ns(), option); |
| 717 | if (!uid_valid(uid)) { | 719 | if (!uid_valid(uid)) { |
| 718 | printk(KERN_ERR "BeFS: Invalid uid %d, " | 720 | pr_err("Invalid uid %d, " |
| 719 | "using default\n", option); | 721 | "using default\n", option); |
| 720 | break; | 722 | break; |
| 721 | } | 723 | } |
| 722 | opts->uid = uid; | 724 | opts->uid = uid; |
| @@ -729,8 +731,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
| 729 | if (option >= 0) | 731 | if (option >= 0) |
| 730 | gid = make_kgid(current_user_ns(), option); | 732 | gid = make_kgid(current_user_ns(), option); |
| 731 | if (!gid_valid(gid)) { | 733 | if (!gid_valid(gid)) { |
| 732 | printk(KERN_ERR "BeFS: Invalid gid %d, " | 734 | pr_err("Invalid gid %d, " |
| 733 | "using default\n", option); | 735 | "using default\n", option); |
| 734 | break; | 736 | break; |
| 735 | } | 737 | } |
| 736 | opts->gid = gid; | 738 | opts->gid = gid; |
| @@ -740,8 +742,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
| 740 | kfree(opts->iocharset); | 742 | kfree(opts->iocharset); |
| 741 | opts->iocharset = match_strdup(&args[0]); | 743 | opts->iocharset = match_strdup(&args[0]); |
| 742 | if (!opts->iocharset) { | 744 | if (!opts->iocharset) { |
| 743 | printk(KERN_ERR "BeFS: allocation failure for " | 745 | pr_err("allocation failure for " |
| 744 | "iocharset string\n"); | 746 | "iocharset string\n"); |
| 745 | return 0; | 747 | return 0; |
| 746 | } | 748 | } |
| 747 | break; | 749 | break; |
| @@ -749,8 +751,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
| 749 | opts->debug = 1; | 751 | opts->debug = 1; |
| 750 | break; | 752 | break; |
| 751 | default: | 753 | default: |
| 752 | printk(KERN_ERR "BeFS: Unrecognized mount option \"%s\" " | 754 | pr_err("Unrecognized mount option \"%s\" " |
| 753 | "or missing value\n", p); | 755 | "or missing value\n", p); |
| 754 | return 0; | 756 | return 0; |
| 755 | } | 757 | } |
| 756 | } | 758 | } |
| @@ -791,22 +793,20 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 791 | 793 | ||
| 792 | save_mount_options(sb, data); | 794 | save_mount_options(sb, data); |
| 793 | 795 | ||
| 794 | sb->s_fs_info = kmalloc(sizeof (*befs_sb), GFP_KERNEL); | 796 | sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL); |
| 795 | if (sb->s_fs_info == NULL) { | 797 | if (sb->s_fs_info == NULL) { |
| 796 | printk(KERN_ERR | 798 | pr_err("(%s): Unable to allocate memory for private " |
| 797 | "BeFS(%s): Unable to allocate memory for private " | ||
| 798 | "portion of superblock. Bailing.\n", sb->s_id); | 799 | "portion of superblock. Bailing.\n", sb->s_id); |
| 799 | goto unacquire_none; | 800 | goto unacquire_none; |
| 800 | } | 801 | } |
| 801 | befs_sb = BEFS_SB(sb); | 802 | befs_sb = BEFS_SB(sb); |
| 802 | memset(befs_sb, 0, sizeof(befs_sb_info)); | ||
| 803 | 803 | ||
| 804 | if (!parse_options((char *) data, &befs_sb->mount_opts)) { | 804 | if (!parse_options((char *) data, &befs_sb->mount_opts)) { |
| 805 | befs_error(sb, "cannot parse mount options"); | 805 | befs_error(sb, "cannot parse mount options"); |
| 806 | goto unacquire_priv_sbp; | 806 | goto unacquire_priv_sbp; |
| 807 | } | 807 | } |
| 808 | 808 | ||
| 809 | befs_debug(sb, "---> befs_fill_super()"); | 809 | befs_debug(sb, "---> %s", __func__); |
| 810 | 810 | ||
| 811 | #ifndef CONFIG_BEFS_RW | 811 | #ifndef CONFIG_BEFS_RW |
| 812 | if (!(sb->s_flags & MS_RDONLY)) { | 812 | if (!(sb->s_flags & MS_RDONLY)) { |
| @@ -854,7 +854,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 854 | goto unacquire_priv_sbp; | 854 | goto unacquire_priv_sbp; |
| 855 | 855 | ||
| 856 | if( befs_sb->num_blocks > ~((sector_t)0) ) { | 856 | if( befs_sb->num_blocks > ~((sector_t)0) ) { |
| 857 | befs_error(sb, "blocks count: %Lu " | 857 | befs_error(sb, "blocks count: %llu " |
| 858 | "is larger than the host can use", | 858 | "is larger than the host can use", |
| 859 | befs_sb->num_blocks); | 859 | befs_sb->num_blocks); |
| 860 | goto unacquire_priv_sbp; | 860 | goto unacquire_priv_sbp; |
| @@ -913,6 +913,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 913 | static int | 913 | static int |
| 914 | befs_remount(struct super_block *sb, int *flags, char *data) | 914 | befs_remount(struct super_block *sb, int *flags, char *data) |
| 915 | { | 915 | { |
| 916 | sync_filesystem(sb); | ||
| 916 | if (!(*flags & MS_RDONLY)) | 917 | if (!(*flags & MS_RDONLY)) |
| 917 | return -EINVAL; | 918 | return -EINVAL; |
| 918 | return 0; | 919 | return 0; |
| @@ -924,7 +925,7 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 924 | struct super_block *sb = dentry->d_sb; | 925 | struct super_block *sb = dentry->d_sb; |
| 925 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 926 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
| 926 | 927 | ||
| 927 | befs_debug(sb, "---> befs_statfs()"); | 928 | befs_debug(sb, "---> %s", __func__); |
| 928 | 929 | ||
| 929 | buf->f_type = BEFS_SUPER_MAGIC; | 930 | buf->f_type = BEFS_SUPER_MAGIC; |
| 930 | buf->f_bsize = sb->s_blocksize; | 931 | buf->f_bsize = sb->s_blocksize; |
| @@ -937,7 +938,7 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 937 | buf->f_fsid.val[1] = (u32)(id >> 32); | 938 | buf->f_fsid.val[1] = (u32)(id >> 32); |
| 938 | buf->f_namelen = BEFS_NAME_LEN; | 939 | buf->f_namelen = BEFS_NAME_LEN; |
| 939 | 940 | ||
| 940 | befs_debug(sb, "<--- befs_statfs()"); | 941 | befs_debug(sb, "<--- %s", __func__); |
| 941 | 942 | ||
| 942 | return 0; | 943 | return 0; |
| 943 | } | 944 | } |
| @@ -963,7 +964,7 @@ init_befs_fs(void) | |||
| 963 | { | 964 | { |
| 964 | int err; | 965 | int err; |
| 965 | 966 | ||
| 966 | printk(KERN_INFO "BeFS version: %s\n", BEFS_VERSION); | 967 | pr_info("version: %s\n", BEFS_VERSION); |
| 967 | 968 | ||
| 968 | err = befs_init_inodecache(); | 969 | err = befs_init_inodecache(); |
| 969 | if (err) | 970 | if (err) |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8defc6b3f9a2..29aa5cf6639b 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
| @@ -172,7 +172,7 @@ static void bfs_evict_inode(struct inode *inode) | |||
| 172 | 172 | ||
| 173 | dprintf("ino=%08lx\n", ino); | 173 | dprintf("ino=%08lx\n", ino); |
| 174 | 174 | ||
| 175 | truncate_inode_pages(&inode->i_data, 0); | 175 | truncate_inode_pages_final(&inode->i_data); |
| 176 | invalidate_inode_buffers(inode); | 176 | invalidate_inode_buffers(inode); |
| 177 | clear_inode(inode); | 177 | clear_inode(inode); |
| 178 | 178 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 67be2951b98a..0f59799fa105 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -46,10 +46,15 @@ | |||
| 46 | #endif | 46 | #endif |
| 47 | 47 | ||
| 48 | static int load_elf_binary(struct linux_binprm *bprm); | 48 | static int load_elf_binary(struct linux_binprm *bprm); |
| 49 | static int load_elf_library(struct file *); | ||
| 50 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, | 49 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, |
| 51 | int, int, unsigned long); | 50 | int, int, unsigned long); |
| 52 | 51 | ||
| 52 | #ifdef CONFIG_USELIB | ||
| 53 | static int load_elf_library(struct file *); | ||
| 54 | #else | ||
| 55 | #define load_elf_library NULL | ||
| 56 | #endif | ||
| 57 | |||
| 53 | /* | 58 | /* |
| 54 | * If we don't support core dumping, then supply a NULL so we | 59 | * If we don't support core dumping, then supply a NULL so we |
| 55 | * don't even try. | 60 | * don't even try. |
| @@ -1005,6 +1010,7 @@ out_free_ph: | |||
| 1005 | goto out; | 1010 | goto out; |
| 1006 | } | 1011 | } |
| 1007 | 1012 | ||
| 1013 | #ifdef CONFIG_USELIB | ||
| 1008 | /* This is really simpleminded and specialized - we are loading an | 1014 | /* This is really simpleminded and specialized - we are loading an |
| 1009 | a.out library that is given an ELF header. */ | 1015 | a.out library that is given an ELF header. */ |
| 1010 | static int load_elf_library(struct file *file) | 1016 | static int load_elf_library(struct file *file) |
| @@ -1083,6 +1089,7 @@ out_free_ph: | |||
| 1083 | out: | 1089 | out: |
| 1084 | return error; | 1090 | return error; |
| 1085 | } | 1091 | } |
| 1092 | #endif /* #ifdef CONFIG_USELIB */ | ||
| 1086 | 1093 | ||
| 1087 | #ifdef CONFIG_ELF_CORE | 1094 | #ifdef CONFIG_ELF_CORE |
| 1088 | /* | 1095 | /* |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 1c740e152f38..b60500300dd7 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
| @@ -656,6 +656,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer, | |||
| 656 | 656 | ||
| 657 | mutex_unlock(&root->d_inode->i_mutex); | 657 | mutex_unlock(&root->d_inode->i_mutex); |
| 658 | dput(root); | 658 | dput(root); |
| 659 | break; | ||
| 659 | default: return res; | 660 | default: return res; |
| 660 | } | 661 | } |
| 661 | return count; | 662 | return count; |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 0129b78a6908..29696b78d1f4 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
| @@ -301,25 +301,25 @@ int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) | |||
| 301 | EXPORT_SYMBOL(bio_integrity_get_tag); | 301 | EXPORT_SYMBOL(bio_integrity_get_tag); |
| 302 | 302 | ||
| 303 | /** | 303 | /** |
| 304 | * bio_integrity_generate - Generate integrity metadata for a bio | 304 | * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio |
| 305 | * @bio: bio to generate integrity metadata for | 305 | * @bio: bio to generate/verify integrity metadata for |
| 306 | * | 306 | * @operate: operate number, 1 for generate, 0 for verify |
| 307 | * Description: Generates integrity metadata for a bio by calling the | ||
| 308 | * block device's generation callback function. The bio must have a | ||
| 309 | * bip attached with enough room to accommodate the generated | ||
| 310 | * integrity metadata. | ||
| 311 | */ | 307 | */ |
| 312 | static void bio_integrity_generate(struct bio *bio) | 308 | static int bio_integrity_generate_verify(struct bio *bio, int operate) |
| 313 | { | 309 | { |
| 314 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | 310 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
| 315 | struct blk_integrity_exchg bix; | 311 | struct blk_integrity_exchg bix; |
| 316 | struct bio_vec bv; | 312 | struct bio_vec bv; |
| 317 | struct bvec_iter iter; | 313 | struct bvec_iter iter; |
| 318 | sector_t sector = bio->bi_iter.bi_sector; | 314 | sector_t sector; |
| 319 | unsigned int sectors, total; | 315 | unsigned int sectors, ret = 0; |
| 320 | void *prot_buf = bio->bi_integrity->bip_buf; | 316 | void *prot_buf = bio->bi_integrity->bip_buf; |
| 321 | 317 | ||
| 322 | total = 0; | 318 | if (operate) |
| 319 | sector = bio->bi_iter.bi_sector; | ||
| 320 | else | ||
| 321 | sector = bio->bi_integrity->bip_iter.bi_sector; | ||
| 322 | |||
| 323 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | 323 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; |
| 324 | bix.sector_size = bi->sector_size; | 324 | bix.sector_size = bi->sector_size; |
| 325 | 325 | ||
| @@ -330,16 +330,37 @@ static void bio_integrity_generate(struct bio *bio) | |||
| 330 | bix.prot_buf = prot_buf; | 330 | bix.prot_buf = prot_buf; |
| 331 | bix.sector = sector; | 331 | bix.sector = sector; |
| 332 | 332 | ||
| 333 | bi->generate_fn(&bix); | 333 | if (operate) { |
| 334 | bi->generate_fn(&bix); | ||
| 335 | } else { | ||
| 336 | ret = bi->verify_fn(&bix); | ||
| 337 | if (ret) { | ||
| 338 | kunmap_atomic(kaddr); | ||
| 339 | return ret; | ||
| 340 | } | ||
| 341 | } | ||
| 334 | 342 | ||
| 335 | sectors = bv.bv_len / bi->sector_size; | 343 | sectors = bv.bv_len / bi->sector_size; |
| 336 | sector += sectors; | 344 | sector += sectors; |
| 337 | prot_buf += sectors * bi->tuple_size; | 345 | prot_buf += sectors * bi->tuple_size; |
| 338 | total += sectors * bi->tuple_size; | ||
| 339 | BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); | ||
| 340 | 346 | ||
| 341 | kunmap_atomic(kaddr); | 347 | kunmap_atomic(kaddr); |
| 342 | } | 348 | } |
| 349 | return ret; | ||
| 350 | } | ||
| 351 | |||
| 352 | /** | ||
| 353 | * bio_integrity_generate - Generate integrity metadata for a bio | ||
| 354 | * @bio: bio to generate integrity metadata for | ||
| 355 | * | ||
| 356 | * Description: Generates integrity metadata for a bio by calling the | ||
| 357 | * block device's generation callback function. The bio must have a | ||
| 358 | * bip attached with enough room to accommodate the generated | ||
| 359 | * integrity metadata. | ||
| 360 | */ | ||
| 361 | static void bio_integrity_generate(struct bio *bio) | ||
| 362 | { | ||
| 363 | bio_integrity_generate_verify(bio, 1); | ||
| 343 | } | 364 | } |
| 344 | 365 | ||
| 345 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) | 366 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) |
| @@ -454,43 +475,7 @@ EXPORT_SYMBOL(bio_integrity_prep); | |||
| 454 | */ | 475 | */ |
| 455 | static int bio_integrity_verify(struct bio *bio) | 476 | static int bio_integrity_verify(struct bio *bio) |
| 456 | { | 477 | { |
| 457 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | 478 | return bio_integrity_generate_verify(bio, 0); |
| 458 | struct blk_integrity_exchg bix; | ||
| 459 | struct bio_vec *bv; | ||
| 460 | sector_t sector = bio->bi_integrity->bip_iter.bi_sector; | ||
| 461 | unsigned int sectors, total, ret; | ||
| 462 | void *prot_buf = bio->bi_integrity->bip_buf; | ||
| 463 | int i; | ||
| 464 | |||
| 465 | ret = total = 0; | ||
| 466 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | ||
| 467 | bix.sector_size = bi->sector_size; | ||
| 468 | |||
| 469 | bio_for_each_segment_all(bv, bio, i) { | ||
| 470 | void *kaddr = kmap_atomic(bv->bv_page); | ||
| 471 | |||
| 472 | bix.data_buf = kaddr + bv->bv_offset; | ||
| 473 | bix.data_size = bv->bv_len; | ||
| 474 | bix.prot_buf = prot_buf; | ||
| 475 | bix.sector = sector; | ||
| 476 | |||
| 477 | ret = bi->verify_fn(&bix); | ||
| 478 | |||
| 479 | if (ret) { | ||
| 480 | kunmap_atomic(kaddr); | ||
| 481 | return ret; | ||
| 482 | } | ||
| 483 | |||
| 484 | sectors = bv->bv_len / bi->sector_size; | ||
| 485 | sector += sectors; | ||
| 486 | prot_buf += sectors * bi->tuple_size; | ||
| 487 | total += sectors * bi->tuple_size; | ||
| 488 | BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); | ||
| 489 | |||
| 490 | kunmap_atomic(kaddr); | ||
| 491 | } | ||
| 492 | |||
| 493 | return ret; | ||
| 494 | } | 479 | } |
| 495 | 480 | ||
| 496 | /** | 481 | /** |
| @@ -116,7 +116,6 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) | |||
| 116 | if (!slab) | 116 | if (!slab) |
| 117 | goto out_unlock; | 117 | goto out_unlock; |
| 118 | 118 | ||
| 119 | printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry); | ||
| 120 | bslab->slab = slab; | 119 | bslab->slab = slab; |
| 121 | bslab->slab_ref = 1; | 120 | bslab->slab_ref = 1; |
| 122 | bslab->slab_size = sz; | 121 | bslab->slab_size = sz; |
| @@ -1970,7 +1969,7 @@ int bio_associate_current(struct bio *bio) | |||
| 1970 | 1969 | ||
| 1971 | /* associate blkcg if exists */ | 1970 | /* associate blkcg if exists */ |
| 1972 | rcu_read_lock(); | 1971 | rcu_read_lock(); |
| 1973 | css = task_css(current, blkio_subsys_id); | 1972 | css = task_css(current, blkio_cgrp_id); |
| 1974 | if (css && css_tryget(css)) | 1973 | if (css && css_tryget(css)) |
| 1975 | bio->bi_css = css; | 1974 | bio->bi_css = css; |
| 1976 | rcu_read_unlock(); | 1975 | rcu_read_unlock(); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e86823a9cbd..ba0d2b05bb78 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -83,7 +83,7 @@ void kill_bdev(struct block_device *bdev) | |||
| 83 | { | 83 | { |
| 84 | struct address_space *mapping = bdev->bd_inode->i_mapping; | 84 | struct address_space *mapping = bdev->bd_inode->i_mapping; |
| 85 | 85 | ||
| 86 | if (mapping->nrpages == 0) | 86 | if (mapping->nrpages == 0 && mapping->nrshadows == 0) |
| 87 | return; | 87 | return; |
| 88 | 88 | ||
| 89 | invalidate_bh_lrus(); | 89 | invalidate_bh_lrus(); |
| @@ -419,7 +419,7 @@ static void bdev_evict_inode(struct inode *inode) | |||
| 419 | { | 419 | { |
| 420 | struct block_device *bdev = &BDEV_I(inode)->bdev; | 420 | struct block_device *bdev = &BDEV_I(inode)->bdev; |
| 421 | struct list_head *p; | 421 | struct list_head *p; |
| 422 | truncate_inode_pages(&inode->i_data, 0); | 422 | truncate_inode_pages_final(&inode->i_data); |
| 423 | invalidate_inode_buffers(inode); /* is it needed here? */ | 423 | invalidate_inode_buffers(inode); /* is it needed here? */ |
| 424 | clear_inode(inode); | 424 | clear_inode(inode); |
| 425 | spin_lock(&bdev_lock); | 425 | spin_lock(&bdev_lock); |
| @@ -1523,7 +1523,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1523 | ssize_t err; | 1523 | ssize_t err; |
| 1524 | 1524 | ||
| 1525 | err = generic_write_sync(file, pos, ret); | 1525 | err = generic_write_sync(file, pos, ret); |
| 1526 | if (err < 0 && ret > 0) | 1526 | if (err < 0) |
| 1527 | ret = err; | 1527 | ret = err; |
| 1528 | } | 1528 | } |
| 1529 | blk_finish_plug(&plug); | 1529 | blk_finish_plug(&plug); |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c1e0b0caf9cc..ecb5832c0967 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
| 3 | * | 4 | * |
| 4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
| @@ -21,708 +22,313 @@ | |||
| 21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
| 22 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
| 23 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
| 25 | #include <linux/workqueue.h> | ||
| 24 | #include "async-thread.h" | 26 | #include "async-thread.h" |
| 27 | #include "ctree.h" | ||
| 28 | |||
| 29 | #define WORK_DONE_BIT 0 | ||
| 30 | #define WORK_ORDER_DONE_BIT 1 | ||
| 31 | #define WORK_HIGH_PRIO_BIT 2 | ||
| 32 | |||
| 33 | #define NO_THRESHOLD (-1) | ||
| 34 | #define DFT_THRESHOLD (32) | ||
| 35 | |||
| 36 | struct __btrfs_workqueue { | ||
| 37 | struct workqueue_struct *normal_wq; | ||
| 38 | /* List head pointing to ordered work list */ | ||
| 39 | struct list_head ordered_list; | ||
| 40 | |||
| 41 | /* Spinlock for ordered_list */ | ||
| 42 | spinlock_t list_lock; | ||
| 43 | |||
| 44 | /* Thresholding related variants */ | ||
| 45 | atomic_t pending; | ||
| 46 | int max_active; | ||
| 47 | int current_max; | ||
| 48 | int thresh; | ||
| 49 | unsigned int count; | ||
| 50 | spinlock_t thres_lock; | ||
| 51 | }; | ||
| 25 | 52 | ||
| 26 | #define WORK_QUEUED_BIT 0 | 53 | struct btrfs_workqueue { |
| 27 | #define WORK_DONE_BIT 1 | 54 | struct __btrfs_workqueue *normal; |
| 28 | #define WORK_ORDER_DONE_BIT 2 | 55 | struct __btrfs_workqueue *high; |
| 29 | #define WORK_HIGH_PRIO_BIT 3 | 56 | }; |
| 30 | |||
| 31 | /* | ||
| 32 | * container for the kthread task pointer and the list of pending work | ||
| 33 | * One of these is allocated per thread. | ||
| 34 | */ | ||
| 35 | struct btrfs_worker_thread { | ||
| 36 | /* pool we belong to */ | ||
| 37 | struct btrfs_workers *workers; | ||
| 38 | |||
| 39 | /* list of struct btrfs_work that are waiting for service */ | ||
| 40 | struct list_head pending; | ||
| 41 | struct list_head prio_pending; | ||
| 42 | |||
| 43 | /* list of worker threads from struct btrfs_workers */ | ||
| 44 | struct list_head worker_list; | ||
| 45 | |||
| 46 | /* kthread */ | ||
| 47 | struct task_struct *task; | ||
| 48 | 57 | ||
| 49 | /* number of things on the pending list */ | 58 | static inline struct __btrfs_workqueue |
| 50 | atomic_t num_pending; | 59 | *__btrfs_alloc_workqueue(const char *name, int flags, int max_active, |
| 60 | int thresh) | ||
| 61 | { | ||
| 62 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 51 | 63 | ||
| 52 | /* reference counter for this struct */ | 64 | if (unlikely(!ret)) |
| 53 | atomic_t refs; | 65 | return NULL; |
| 54 | 66 | ||
| 55 | unsigned long sequence; | 67 | ret->max_active = max_active; |
| 68 | atomic_set(&ret->pending, 0); | ||
| 69 | if (thresh == 0) | ||
| 70 | thresh = DFT_THRESHOLD; | ||
| 71 | /* For low threshold, disabling threshold is a better choice */ | ||
| 72 | if (thresh < DFT_THRESHOLD) { | ||
| 73 | ret->current_max = max_active; | ||
| 74 | ret->thresh = NO_THRESHOLD; | ||
| 75 | } else { | ||
| 76 | ret->current_max = 1; | ||
| 77 | ret->thresh = thresh; | ||
| 78 | } | ||
| 56 | 79 | ||
| 57 | /* protects the pending list. */ | 80 | if (flags & WQ_HIGHPRI) |
| 58 | spinlock_t lock; | 81 | ret->normal_wq = alloc_workqueue("%s-%s-high", flags, |
| 82 | ret->max_active, | ||
| 83 | "btrfs", name); | ||
| 84 | else | ||
| 85 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | ||
| 86 | ret->max_active, "btrfs", | ||
| 87 | name); | ||
| 88 | if (unlikely(!ret->normal_wq)) { | ||
| 89 | kfree(ret); | ||
| 90 | return NULL; | ||
| 91 | } | ||
| 59 | 92 | ||
| 60 | /* set to non-zero when this thread is already awake and kicking */ | 93 | INIT_LIST_HEAD(&ret->ordered_list); |
| 61 | int working; | 94 | spin_lock_init(&ret->list_lock); |
| 95 | spin_lock_init(&ret->thres_lock); | ||
| 96 | trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); | ||
| 97 | return ret; | ||
| 98 | } | ||
| 62 | 99 | ||
| 63 | /* are we currently idle */ | 100 | static inline void |
| 64 | int idle; | 101 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); |
| 65 | }; | ||
| 66 | 102 | ||
| 67 | static int __btrfs_start_workers(struct btrfs_workers *workers); | 103 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
| 104 | int flags, | ||
| 105 | int max_active, | ||
| 106 | int thresh) | ||
| 107 | { | ||
| 108 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 68 | 109 | ||
| 69 | /* | 110 | if (unlikely(!ret)) |
| 70 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | 111 | return NULL; |
| 71 | * for a very long time. It will actually throttle on page writeback, | ||
| 72 | * and so it may not make progress until after our btrfs worker threads | ||
| 73 | * process all of the pending work structs in their queue | ||
| 74 | * | ||
| 75 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
| 76 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
| 77 | * involves all of the worker threads. | ||
| 78 | * | ||
| 79 | * Instead we have a helper queue who never has more than one thread | ||
| 80 | * where we scheduler thread start operations. This worker_start struct | ||
| 81 | * is used to contain the work and hold a pointer to the queue that needs | ||
| 82 | * another worker. | ||
| 83 | */ | ||
| 84 | struct worker_start { | ||
| 85 | struct btrfs_work work; | ||
| 86 | struct btrfs_workers *queue; | ||
| 87 | }; | ||
| 88 | 112 | ||
| 89 | static void start_new_worker_func(struct btrfs_work *work) | 113 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
| 90 | { | 114 | max_active, thresh); |
| 91 | struct worker_start *start; | 115 | if (unlikely(!ret->normal)) { |
| 92 | start = container_of(work, struct worker_start, work); | 116 | kfree(ret); |
| 93 | __btrfs_start_workers(start->queue); | 117 | return NULL; |
| 94 | kfree(start); | 118 | } |
| 95 | } | ||
| 96 | 119 | ||
| 97 | /* | 120 | if (flags & WQ_HIGHPRI) { |
| 98 | * helper function to move a thread onto the idle list after it | 121 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
| 99 | * has finished some requests. | 122 | thresh); |
| 100 | */ | 123 | if (unlikely(!ret->high)) { |
| 101 | static void check_idle_worker(struct btrfs_worker_thread *worker) | 124 | __btrfs_destroy_workqueue(ret->normal); |
| 102 | { | 125 | kfree(ret); |
| 103 | if (!worker->idle && atomic_read(&worker->num_pending) < | 126 | return NULL; |
| 104 | worker->workers->idle_thresh / 2) { | ||
| 105 | unsigned long flags; | ||
| 106 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 107 | worker->idle = 1; | ||
| 108 | |||
| 109 | /* the list may be empty if the worker is just starting */ | ||
| 110 | if (!list_empty(&worker->worker_list) && | ||
| 111 | !worker->workers->stopping) { | ||
| 112 | list_move(&worker->worker_list, | ||
| 113 | &worker->workers->idle_list); | ||
| 114 | } | 127 | } |
| 115 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 116 | } | 128 | } |
| 129 | return ret; | ||
| 117 | } | 130 | } |
| 118 | 131 | ||
| 119 | /* | 132 | /* |
| 120 | * helper function to move a thread off the idle list after new | 133 | * Hook for threshold which will be called in btrfs_queue_work. |
| 121 | * pending work is added. | 134 | * This hook WILL be called in IRQ handler context, |
| 135 | * so workqueue_set_max_active MUST NOT be called in this hook | ||
| 122 | */ | 136 | */ |
| 123 | static void check_busy_worker(struct btrfs_worker_thread *worker) | 137 | static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) |
| 124 | { | 138 | { |
| 125 | if (worker->idle && atomic_read(&worker->num_pending) >= | 139 | if (wq->thresh == NO_THRESHOLD) |
| 126 | worker->workers->idle_thresh) { | 140 | return; |
| 127 | unsigned long flags; | 141 | atomic_inc(&wq->pending); |
| 128 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 129 | worker->idle = 0; | ||
| 130 | |||
| 131 | if (!list_empty(&worker->worker_list) && | ||
| 132 | !worker->workers->stopping) { | ||
| 133 | list_move_tail(&worker->worker_list, | ||
| 134 | &worker->workers->worker_list); | ||
| 135 | } | ||
| 136 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 137 | } | ||
| 138 | } | 142 | } |
| 139 | 143 | ||
| 140 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | 144 | /* |
| 145 | * Hook for threshold which will be called before executing the work, | ||
| 146 | * This hook is called in kthread content. | ||
| 147 | * So workqueue_set_max_active is called here. | ||
| 148 | */ | ||
| 149 | static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) | ||
| 141 | { | 150 | { |
| 142 | struct btrfs_workers *workers = worker->workers; | 151 | int new_max_active; |
| 143 | struct worker_start *start; | 152 | long pending; |
| 144 | unsigned long flags; | 153 | int need_change = 0; |
| 145 | 154 | ||
| 146 | rmb(); | 155 | if (wq->thresh == NO_THRESHOLD) |
| 147 | if (!workers->atomic_start_pending) | ||
| 148 | return; | 156 | return; |
| 149 | 157 | ||
| 150 | start = kzalloc(sizeof(*start), GFP_NOFS); | 158 | atomic_dec(&wq->pending); |
| 151 | if (!start) | 159 | spin_lock(&wq->thres_lock); |
| 152 | return; | 160 | /* |
| 153 | 161 | * Use wq->count to limit the calling frequency of | |
| 154 | start->work.func = start_new_worker_func; | 162 | * workqueue_set_max_active. |
| 155 | start->queue = workers; | 163 | */ |
| 156 | 164 | wq->count++; | |
| 157 | spin_lock_irqsave(&workers->lock, flags); | 165 | wq->count %= (wq->thresh / 4); |
| 158 | if (!workers->atomic_start_pending) | 166 | if (!wq->count) |
| 159 | goto out; | 167 | goto out; |
| 160 | 168 | new_max_active = wq->current_max; | |
| 161 | workers->atomic_start_pending = 0; | ||
| 162 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 163 | workers->max_workers) | ||
| 164 | goto out; | ||
| 165 | |||
| 166 | workers->num_workers_starting += 1; | ||
| 167 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 168 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); | ||
| 169 | return; | ||
| 170 | 169 | ||
| 170 | /* | ||
| 171 | * pending may be changed later, but it's OK since we really | ||
| 172 | * don't need it so accurate to calculate new_max_active. | ||
| 173 | */ | ||
| 174 | pending = atomic_read(&wq->pending); | ||
| 175 | if (pending > wq->thresh) | ||
| 176 | new_max_active++; | ||
| 177 | if (pending < wq->thresh / 2) | ||
| 178 | new_max_active--; | ||
| 179 | new_max_active = clamp_val(new_max_active, 1, wq->max_active); | ||
| 180 | if (new_max_active != wq->current_max) { | ||
| 181 | need_change = 1; | ||
| 182 | wq->current_max = new_max_active; | ||
| 183 | } | ||
| 171 | out: | 184 | out: |
| 172 | kfree(start); | 185 | spin_unlock(&wq->thres_lock); |
| 173 | spin_unlock_irqrestore(&workers->lock, flags); | 186 | |
| 187 | if (need_change) { | ||
| 188 | workqueue_set_max_active(wq->normal_wq, wq->current_max); | ||
| 189 | } | ||
| 174 | } | 190 | } |
| 175 | 191 | ||
| 176 | static noinline void run_ordered_completions(struct btrfs_workers *workers, | 192 | static void run_ordered_work(struct __btrfs_workqueue *wq) |
| 177 | struct btrfs_work *work) | ||
| 178 | { | 193 | { |
| 179 | if (!workers->ordered) | 194 | struct list_head *list = &wq->ordered_list; |
| 180 | return; | 195 | struct btrfs_work *work; |
| 181 | 196 | spinlock_t *lock = &wq->list_lock; | |
| 182 | set_bit(WORK_DONE_BIT, &work->flags); | 197 | unsigned long flags; |
| 183 | |||
| 184 | spin_lock(&workers->order_lock); | ||
| 185 | 198 | ||
| 186 | while (1) { | 199 | while (1) { |
| 187 | if (!list_empty(&workers->prio_order_list)) { | 200 | spin_lock_irqsave(lock, flags); |
| 188 | work = list_entry(workers->prio_order_list.next, | 201 | if (list_empty(list)) |
| 189 | struct btrfs_work, order_list); | ||
| 190 | } else if (!list_empty(&workers->order_list)) { | ||
| 191 | work = list_entry(workers->order_list.next, | ||
| 192 | struct btrfs_work, order_list); | ||
| 193 | } else { | ||
| 194 | break; | 202 | break; |
| 195 | } | 203 | work = list_entry(list->next, struct btrfs_work, |
| 204 | ordered_list); | ||
| 196 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 205 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
| 197 | break; | 206 | break; |
| 198 | 207 | ||
| 199 | /* we are going to call the ordered done function, but | 208 | /* |
| 209 | * we are going to call the ordered done function, but | ||
| 200 | * we leave the work item on the list as a barrier so | 210 | * we leave the work item on the list as a barrier so |
| 201 | * that later work items that are done don't have their | 211 | * that later work items that are done don't have their |
| 202 | * functions called before this one returns | 212 | * functions called before this one returns |
| 203 | */ | 213 | */ |
| 204 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 214 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| 205 | break; | 215 | break; |
| 206 | 216 | trace_btrfs_ordered_sched(work); | |
| 207 | spin_unlock(&workers->order_lock); | 217 | spin_unlock_irqrestore(lock, flags); |
| 208 | |||
| 209 | work->ordered_func(work); | 218 | work->ordered_func(work); |
| 210 | 219 | ||
| 211 | /* now take the lock again and drop our item from the list */ | 220 | /* now take the lock again and drop our item from the list */ |
| 212 | spin_lock(&workers->order_lock); | 221 | spin_lock_irqsave(lock, flags); |
| 213 | list_del(&work->order_list); | 222 | list_del(&work->ordered_list); |
| 214 | spin_unlock(&workers->order_lock); | 223 | spin_unlock_irqrestore(lock, flags); |
| 215 | 224 | ||
| 216 | /* | 225 | /* |
| 217 | * we don't want to call the ordered free functions | 226 | * we don't want to call the ordered free functions |
| 218 | * with the lock held though | 227 | * with the lock held though |
| 219 | */ | 228 | */ |
| 220 | work->ordered_free(work); | 229 | work->ordered_free(work); |
| 221 | spin_lock(&workers->order_lock); | 230 | trace_btrfs_all_work_done(work); |
| 222 | } | ||
| 223 | |||
| 224 | spin_unlock(&workers->order_lock); | ||
| 225 | } | ||
| 226 | |||
| 227 | static void put_worker(struct btrfs_worker_thread *worker) | ||
| 228 | { | ||
| 229 | if (atomic_dec_and_test(&worker->refs)) | ||
| 230 | kfree(worker); | ||
| 231 | } | ||
| 232 | |||
| 233 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
| 234 | { | ||
| 235 | int freeit = 0; | ||
| 236 | |||
| 237 | spin_lock_irq(&worker->lock); | ||
| 238 | spin_lock(&worker->workers->lock); | ||
| 239 | if (worker->workers->num_workers > 1 && | ||
| 240 | worker->idle && | ||
| 241 | !worker->working && | ||
| 242 | !list_empty(&worker->worker_list) && | ||
| 243 | list_empty(&worker->prio_pending) && | ||
| 244 | list_empty(&worker->pending) && | ||
| 245 | atomic_read(&worker->num_pending) == 0) { | ||
| 246 | freeit = 1; | ||
| 247 | list_del_init(&worker->worker_list); | ||
| 248 | worker->workers->num_workers--; | ||
| 249 | } | 231 | } |
| 250 | spin_unlock(&worker->workers->lock); | 232 | spin_unlock_irqrestore(lock, flags); |
| 251 | spin_unlock_irq(&worker->lock); | ||
| 252 | |||
| 253 | if (freeit) | ||
| 254 | put_worker(worker); | ||
| 255 | return freeit; | ||
| 256 | } | 233 | } |
| 257 | 234 | ||
| 258 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | 235 | static void normal_work_helper(struct work_struct *arg) |
| 259 | struct list_head *prio_head, | ||
| 260 | struct list_head *head) | ||
| 261 | { | ||
| 262 | struct btrfs_work *work = NULL; | ||
| 263 | struct list_head *cur = NULL; | ||
| 264 | |||
| 265 | if (!list_empty(prio_head)) | ||
| 266 | cur = prio_head->next; | ||
| 267 | |||
| 268 | smp_mb(); | ||
| 269 | if (!list_empty(&worker->prio_pending)) | ||
| 270 | goto refill; | ||
| 271 | |||
| 272 | if (!list_empty(head)) | ||
| 273 | cur = head->next; | ||
| 274 | |||
| 275 | if (cur) | ||
| 276 | goto out; | ||
| 277 | |||
| 278 | refill: | ||
| 279 | spin_lock_irq(&worker->lock); | ||
| 280 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
| 281 | list_splice_tail_init(&worker->pending, head); | ||
| 282 | |||
| 283 | if (!list_empty(prio_head)) | ||
| 284 | cur = prio_head->next; | ||
| 285 | else if (!list_empty(head)) | ||
| 286 | cur = head->next; | ||
| 287 | spin_unlock_irq(&worker->lock); | ||
| 288 | |||
| 289 | if (!cur) | ||
| 290 | goto out_fail; | ||
| 291 | |||
| 292 | out: | ||
| 293 | work = list_entry(cur, struct btrfs_work, list); | ||
| 294 | |||
| 295 | out_fail: | ||
| 296 | return work; | ||
| 297 | } | ||
| 298 | |||
| 299 | /* | ||
| 300 | * main loop for servicing work items | ||
| 301 | */ | ||
| 302 | static int worker_loop(void *arg) | ||
| 303 | { | 236 | { |
| 304 | struct btrfs_worker_thread *worker = arg; | ||
| 305 | struct list_head head; | ||
| 306 | struct list_head prio_head; | ||
| 307 | struct btrfs_work *work; | 237 | struct btrfs_work *work; |
| 238 | struct __btrfs_workqueue *wq; | ||
| 239 | int need_order = 0; | ||
| 308 | 240 | ||
| 309 | INIT_LIST_HEAD(&head); | 241 | work = container_of(arg, struct btrfs_work, normal_work); |
| 310 | INIT_LIST_HEAD(&prio_head); | 242 | /* |
| 311 | 243 | * We should not touch things inside work in the following cases: | |
| 312 | do { | 244 | * 1) after work->func() if it has no ordered_free |
| 313 | again: | 245 | * Since the struct is freed in work->func(). |
| 314 | while (1) { | 246 | * 2) after setting WORK_DONE_BIT |
| 315 | 247 | * The work may be freed in other threads almost instantly. | |
| 316 | 248 | * So we save the needed things here. | |
| 317 | work = get_next_work(worker, &prio_head, &head); | 249 | */ |
| 318 | if (!work) | 250 | if (work->ordered_func) |
| 319 | break; | 251 | need_order = 1; |
| 320 | 252 | wq = work->wq; | |
| 321 | list_del(&work->list); | 253 | |
| 322 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 254 | trace_btrfs_work_sched(work); |
| 323 | 255 | thresh_exec_hook(wq); | |
| 324 | work->worker = worker; | 256 | work->func(work); |
| 325 | 257 | if (need_order) { | |
| 326 | work->func(work); | 258 | set_bit(WORK_DONE_BIT, &work->flags); |
| 327 | 259 | run_ordered_work(wq); | |
| 328 | atomic_dec(&worker->num_pending); | ||
| 329 | /* | ||
| 330 | * unless this is an ordered work queue, | ||
| 331 | * 'work' was probably freed by func above. | ||
| 332 | */ | ||
| 333 | run_ordered_completions(worker->workers, work); | ||
| 334 | |||
| 335 | check_pending_worker_creates(worker); | ||
| 336 | cond_resched(); | ||
| 337 | } | ||
| 338 | |||
| 339 | spin_lock_irq(&worker->lock); | ||
| 340 | check_idle_worker(worker); | ||
| 341 | |||
| 342 | if (freezing(current)) { | ||
| 343 | worker->working = 0; | ||
| 344 | spin_unlock_irq(&worker->lock); | ||
| 345 | try_to_freeze(); | ||
| 346 | } else { | ||
| 347 | spin_unlock_irq(&worker->lock); | ||
| 348 | if (!kthread_should_stop()) { | ||
| 349 | cpu_relax(); | ||
| 350 | /* | ||
| 351 | * we've dropped the lock, did someone else | ||
| 352 | * jump_in? | ||
| 353 | */ | ||
| 354 | smp_mb(); | ||
| 355 | if (!list_empty(&worker->pending) || | ||
| 356 | !list_empty(&worker->prio_pending)) | ||
| 357 | continue; | ||
| 358 | |||
| 359 | /* | ||
| 360 | * this short schedule allows more work to | ||
| 361 | * come in without the queue functions | ||
| 362 | * needing to go through wake_up_process() | ||
| 363 | * | ||
| 364 | * worker->working is still 1, so nobody | ||
| 365 | * is going to try and wake us up | ||
| 366 | */ | ||
| 367 | schedule_timeout(1); | ||
| 368 | smp_mb(); | ||
| 369 | if (!list_empty(&worker->pending) || | ||
| 370 | !list_empty(&worker->prio_pending)) | ||
| 371 | continue; | ||
| 372 | |||
| 373 | if (kthread_should_stop()) | ||
| 374 | break; | ||
| 375 | |||
| 376 | /* still no more work?, sleep for real */ | ||
| 377 | spin_lock_irq(&worker->lock); | ||
| 378 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 379 | if (!list_empty(&worker->pending) || | ||
| 380 | !list_empty(&worker->prio_pending)) { | ||
| 381 | spin_unlock_irq(&worker->lock); | ||
| 382 | set_current_state(TASK_RUNNING); | ||
| 383 | goto again; | ||
| 384 | } | ||
| 385 | |||
| 386 | /* | ||
| 387 | * this makes sure we get a wakeup when someone | ||
| 388 | * adds something new to the queue | ||
| 389 | */ | ||
| 390 | worker->working = 0; | ||
| 391 | spin_unlock_irq(&worker->lock); | ||
| 392 | |||
| 393 | if (!kthread_should_stop()) { | ||
| 394 | schedule_timeout(HZ * 120); | ||
| 395 | if (!worker->working && | ||
| 396 | try_worker_shutdown(worker)) { | ||
| 397 | return 0; | ||
| 398 | } | ||
| 399 | } | ||
| 400 | } | ||
| 401 | __set_current_state(TASK_RUNNING); | ||
| 402 | } | ||
| 403 | } while (!kthread_should_stop()); | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* | ||
| 408 | * this will wait for all the worker threads to shutdown | ||
| 409 | */ | ||
| 410 | void btrfs_stop_workers(struct btrfs_workers *workers) | ||
| 411 | { | ||
| 412 | struct list_head *cur; | ||
| 413 | struct btrfs_worker_thread *worker; | ||
| 414 | int can_stop; | ||
| 415 | |||
| 416 | spin_lock_irq(&workers->lock); | ||
| 417 | workers->stopping = 1; | ||
| 418 | list_splice_init(&workers->idle_list, &workers->worker_list); | ||
| 419 | while (!list_empty(&workers->worker_list)) { | ||
| 420 | cur = workers->worker_list.next; | ||
| 421 | worker = list_entry(cur, struct btrfs_worker_thread, | ||
| 422 | worker_list); | ||
| 423 | |||
| 424 | atomic_inc(&worker->refs); | ||
| 425 | workers->num_workers -= 1; | ||
| 426 | if (!list_empty(&worker->worker_list)) { | ||
| 427 | list_del_init(&worker->worker_list); | ||
| 428 | put_worker(worker); | ||
| 429 | can_stop = 1; | ||
| 430 | } else | ||
| 431 | can_stop = 0; | ||
| 432 | spin_unlock_irq(&workers->lock); | ||
| 433 | if (can_stop) | ||
| 434 | kthread_stop(worker->task); | ||
| 435 | spin_lock_irq(&workers->lock); | ||
| 436 | put_worker(worker); | ||
| 437 | } | 260 | } |
| 438 | spin_unlock_irq(&workers->lock); | 261 | if (!need_order) |
| 262 | trace_btrfs_all_work_done(work); | ||
| 439 | } | 263 | } |
| 440 | 264 | ||
| 441 | /* | 265 | void btrfs_init_work(struct btrfs_work *work, |
| 442 | * simple init on struct btrfs_workers | 266 | btrfs_func_t func, |
| 443 | */ | 267 | btrfs_func_t ordered_func, |
| 444 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 268 | btrfs_func_t ordered_free) |
| 445 | struct btrfs_workers *async_helper) | ||
| 446 | { | 269 | { |
| 447 | workers->num_workers = 0; | 270 | work->func = func; |
| 448 | workers->num_workers_starting = 0; | 271 | work->ordered_func = ordered_func; |
| 449 | INIT_LIST_HEAD(&workers->worker_list); | 272 | work->ordered_free = ordered_free; |
| 450 | INIT_LIST_HEAD(&workers->idle_list); | 273 | INIT_WORK(&work->normal_work, normal_work_helper); |
| 451 | INIT_LIST_HEAD(&workers->order_list); | 274 | INIT_LIST_HEAD(&work->ordered_list); |
| 452 | INIT_LIST_HEAD(&workers->prio_order_list); | 275 | work->flags = 0; |
| 453 | spin_lock_init(&workers->lock); | ||
| 454 | spin_lock_init(&workers->order_lock); | ||
| 455 | workers->max_workers = max; | ||
| 456 | workers->idle_thresh = 32; | ||
| 457 | workers->name = name; | ||
| 458 | workers->ordered = 0; | ||
| 459 | workers->atomic_start_pending = 0; | ||
| 460 | workers->atomic_worker_start = async_helper; | ||
| 461 | workers->stopping = 0; | ||
| 462 | } | 276 | } |
| 463 | 277 | ||
| 464 | /* | 278 | static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, |
| 465 | * starts new worker threads. This does not enforce the max worker | 279 | struct btrfs_work *work) |
| 466 | * count in case you need to temporarily go past it. | ||
| 467 | */ | ||
| 468 | static int __btrfs_start_workers(struct btrfs_workers *workers) | ||
| 469 | { | 280 | { |
| 470 | struct btrfs_worker_thread *worker; | 281 | unsigned long flags; |
| 471 | int ret = 0; | ||
| 472 | |||
| 473 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | ||
| 474 | if (!worker) { | ||
| 475 | ret = -ENOMEM; | ||
| 476 | goto fail; | ||
| 477 | } | ||
| 478 | |||
| 479 | INIT_LIST_HEAD(&worker->pending); | ||
| 480 | INIT_LIST_HEAD(&worker->prio_pending); | ||
| 481 | INIT_LIST_HEAD(&worker->worker_list); | ||
| 482 | spin_lock_init(&worker->lock); | ||
| 483 | |||
| 484 | atomic_set(&worker->num_pending, 0); | ||
| 485 | atomic_set(&worker->refs, 1); | ||
| 486 | worker->workers = workers; | ||
| 487 | worker->task = kthread_create(worker_loop, worker, | ||
| 488 | "btrfs-%s-%d", workers->name, | ||
| 489 | workers->num_workers + 1); | ||
| 490 | if (IS_ERR(worker->task)) { | ||
| 491 | ret = PTR_ERR(worker->task); | ||
| 492 | goto fail; | ||
| 493 | } | ||
| 494 | 282 | ||
| 495 | spin_lock_irq(&workers->lock); | 283 | work->wq = wq; |
| 496 | if (workers->stopping) { | 284 | thresh_queue_hook(wq); |
| 497 | spin_unlock_irq(&workers->lock); | 285 | if (work->ordered_func) { |
| 498 | ret = -EINVAL; | 286 | spin_lock_irqsave(&wq->list_lock, flags); |
| 499 | goto fail_kthread; | 287 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
| 288 | spin_unlock_irqrestore(&wq->list_lock, flags); | ||
| 500 | } | 289 | } |
| 501 | list_add_tail(&worker->worker_list, &workers->idle_list); | 290 | queue_work(wq->normal_wq, &work->normal_work); |
| 502 | worker->idle = 1; | 291 | trace_btrfs_work_queued(work); |
| 503 | workers->num_workers++; | ||
| 504 | workers->num_workers_starting--; | ||
| 505 | WARN_ON(workers->num_workers_starting < 0); | ||
| 506 | spin_unlock_irq(&workers->lock); | ||
| 507 | |||
| 508 | wake_up_process(worker->task); | ||
| 509 | return 0; | ||
| 510 | |||
| 511 | fail_kthread: | ||
| 512 | kthread_stop(worker->task); | ||
| 513 | fail: | ||
| 514 | kfree(worker); | ||
| 515 | spin_lock_irq(&workers->lock); | ||
| 516 | workers->num_workers_starting--; | ||
| 517 | spin_unlock_irq(&workers->lock); | ||
| 518 | return ret; | ||
| 519 | } | 292 | } |
| 520 | 293 | ||
| 521 | int btrfs_start_workers(struct btrfs_workers *workers) | 294 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
| 295 | struct btrfs_work *work) | ||
| 522 | { | 296 | { |
| 523 | spin_lock_irq(&workers->lock); | 297 | struct __btrfs_workqueue *dest_wq; |
| 524 | workers->num_workers_starting++; | ||
| 525 | spin_unlock_irq(&workers->lock); | ||
| 526 | return __btrfs_start_workers(workers); | ||
| 527 | } | ||
| 528 | |||
| 529 | /* | ||
| 530 | * run through the list and find a worker thread that doesn't have a lot | ||
| 531 | * to do right now. This can return null if we aren't yet at the thread | ||
| 532 | * count limit and all of the threads are busy. | ||
| 533 | */ | ||
| 534 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | ||
| 535 | { | ||
| 536 | struct btrfs_worker_thread *worker; | ||
| 537 | struct list_head *next; | ||
| 538 | int enforce_min; | ||
| 539 | |||
| 540 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
| 541 | workers->max_workers; | ||
| 542 | |||
| 543 | /* | ||
| 544 | * if we find an idle thread, don't move it to the end of the | ||
| 545 | * idle list. This improves the chance that the next submission | ||
| 546 | * will reuse the same thread, and maybe catch it while it is still | ||
| 547 | * working | ||
| 548 | */ | ||
| 549 | if (!list_empty(&workers->idle_list)) { | ||
| 550 | next = workers->idle_list.next; | ||
| 551 | worker = list_entry(next, struct btrfs_worker_thread, | ||
| 552 | worker_list); | ||
| 553 | return worker; | ||
| 554 | } | ||
| 555 | if (enforce_min || list_empty(&workers->worker_list)) | ||
| 556 | return NULL; | ||
| 557 | |||
| 558 | /* | ||
| 559 | * if we pick a busy task, move the task to the end of the list. | ||
| 560 | * hopefully this will keep things somewhat evenly balanced. | ||
| 561 | * Do the move in batches based on the sequence number. This groups | ||
| 562 | * requests submitted at roughly the same time onto the same worker. | ||
| 563 | */ | ||
| 564 | next = workers->worker_list.next; | ||
| 565 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | ||
| 566 | worker->sequence++; | ||
| 567 | 298 | ||
| 568 | if (worker->sequence % workers->idle_thresh == 0) | 299 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) |
| 569 | list_move_tail(next, &workers->worker_list); | 300 | dest_wq = wq->high; |
| 570 | return worker; | 301 | else |
| 302 | dest_wq = wq->normal; | ||
| 303 | __btrfs_queue_work(dest_wq, work); | ||
| 571 | } | 304 | } |
| 572 | 305 | ||
| 573 | /* | 306 | static inline void |
| 574 | * selects a worker thread to take the next job. This will either find | 307 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) |
| 575 | * an idle worker, start a new worker up to the max count, or just return | ||
| 576 | * one of the existing busy workers. | ||
| 577 | */ | ||
| 578 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | ||
| 579 | { | 308 | { |
| 580 | struct btrfs_worker_thread *worker; | 309 | destroy_workqueue(wq->normal_wq); |
| 581 | unsigned long flags; | 310 | trace_btrfs_workqueue_destroy(wq); |
| 582 | struct list_head *fallback; | 311 | kfree(wq); |
| 583 | int ret; | ||
| 584 | |||
| 585 | spin_lock_irqsave(&workers->lock, flags); | ||
| 586 | again: | ||
| 587 | worker = next_worker(workers); | ||
| 588 | |||
| 589 | if (!worker) { | ||
| 590 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 591 | workers->max_workers) { | ||
| 592 | goto fallback; | ||
| 593 | } else if (workers->atomic_worker_start) { | ||
| 594 | workers->atomic_start_pending = 1; | ||
| 595 | goto fallback; | ||
| 596 | } else { | ||
| 597 | workers->num_workers_starting++; | ||
| 598 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 599 | /* we're below the limit, start another worker */ | ||
| 600 | ret = __btrfs_start_workers(workers); | ||
| 601 | spin_lock_irqsave(&workers->lock, flags); | ||
| 602 | if (ret) | ||
| 603 | goto fallback; | ||
| 604 | goto again; | ||
| 605 | } | ||
| 606 | } | ||
| 607 | goto found; | ||
| 608 | |||
| 609 | fallback: | ||
| 610 | fallback = NULL; | ||
| 611 | /* | ||
| 612 | * we have failed to find any workers, just | ||
| 613 | * return the first one we can find. | ||
| 614 | */ | ||
| 615 | if (!list_empty(&workers->worker_list)) | ||
| 616 | fallback = workers->worker_list.next; | ||
| 617 | if (!list_empty(&workers->idle_list)) | ||
| 618 | fallback = workers->idle_list.next; | ||
| 619 | BUG_ON(!fallback); | ||
| 620 | worker = list_entry(fallback, | ||
| 621 | struct btrfs_worker_thread, worker_list); | ||
| 622 | found: | ||
| 623 | /* | ||
| 624 | * this makes sure the worker doesn't exit before it is placed | ||
| 625 | * onto a busy/idle list | ||
| 626 | */ | ||
| 627 | atomic_inc(&worker->num_pending); | ||
| 628 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 629 | return worker; | ||
| 630 | } | 312 | } |
| 631 | 313 | ||
| 632 | /* | 314 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) |
| 633 | * btrfs_requeue_work just puts the work item back on the tail of the list | ||
| 634 | * it was taken from. It is intended for use with long running work functions | ||
| 635 | * that make some progress and want to give the cpu up for others. | ||
| 636 | */ | ||
| 637 | void btrfs_requeue_work(struct btrfs_work *work) | ||
| 638 | { | 315 | { |
| 639 | struct btrfs_worker_thread *worker = work->worker; | 316 | if (!wq) |
| 640 | unsigned long flags; | ||
| 641 | int wake = 0; | ||
| 642 | |||
| 643 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 644 | return; | 317 | return; |
| 645 | 318 | if (wq->high) | |
| 646 | spin_lock_irqsave(&worker->lock, flags); | 319 | __btrfs_destroy_workqueue(wq->high); |
| 647 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | 320 | __btrfs_destroy_workqueue(wq->normal); |
| 648 | list_add_tail(&work->list, &worker->prio_pending); | 321 | kfree(wq); |
| 649 | else | ||
| 650 | list_add_tail(&work->list, &worker->pending); | ||
| 651 | atomic_inc(&worker->num_pending); | ||
| 652 | |||
| 653 | /* by definition we're busy, take ourselves off the idle | ||
| 654 | * list | ||
| 655 | */ | ||
| 656 | if (worker->idle) { | ||
| 657 | spin_lock(&worker->workers->lock); | ||
| 658 | worker->idle = 0; | ||
| 659 | list_move_tail(&worker->worker_list, | ||
| 660 | &worker->workers->worker_list); | ||
| 661 | spin_unlock(&worker->workers->lock); | ||
| 662 | } | ||
| 663 | if (!worker->working) { | ||
| 664 | wake = 1; | ||
| 665 | worker->working = 1; | ||
| 666 | } | ||
| 667 | |||
| 668 | if (wake) | ||
| 669 | wake_up_process(worker->task); | ||
| 670 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 671 | } | 322 | } |
| 672 | 323 | ||
| 673 | void btrfs_set_work_high_prio(struct btrfs_work *work) | 324 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) |
| 674 | { | 325 | { |
| 675 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | 326 | wq->normal->max_active = max; |
| 327 | if (wq->high) | ||
| 328 | wq->high->max_active = max; | ||
| 676 | } | 329 | } |
| 677 | 330 | ||
| 678 | /* | 331 | void btrfs_set_work_high_priority(struct btrfs_work *work) |
| 679 | * places a struct btrfs_work into the pending queue of one of the kthreads | ||
| 680 | */ | ||
| 681 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | ||
| 682 | { | 332 | { |
| 683 | struct btrfs_worker_thread *worker; | 333 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); |
| 684 | unsigned long flags; | ||
| 685 | int wake = 0; | ||
| 686 | |||
| 687 | /* don't requeue something already on a list */ | ||
| 688 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 689 | return; | ||
| 690 | |||
| 691 | worker = find_worker(workers); | ||
| 692 | if (workers->ordered) { | ||
| 693 | /* | ||
| 694 | * you're not allowed to do ordered queues from an | ||
| 695 | * interrupt handler | ||
| 696 | */ | ||
| 697 | spin_lock(&workers->order_lock); | ||
| 698 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | ||
| 699 | list_add_tail(&work->order_list, | ||
| 700 | &workers->prio_order_list); | ||
| 701 | } else { | ||
| 702 | list_add_tail(&work->order_list, &workers->order_list); | ||
| 703 | } | ||
| 704 | spin_unlock(&workers->order_lock); | ||
| 705 | } else { | ||
| 706 | INIT_LIST_HEAD(&work->order_list); | ||
| 707 | } | ||
| 708 | |||
| 709 | spin_lock_irqsave(&worker->lock, flags); | ||
| 710 | |||
| 711 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | ||
| 712 | list_add_tail(&work->list, &worker->prio_pending); | ||
| 713 | else | ||
| 714 | list_add_tail(&work->list, &worker->pending); | ||
| 715 | check_busy_worker(worker); | ||
| 716 | |||
| 717 | /* | ||
| 718 | * avoid calling into wake_up_process if this thread has already | ||
| 719 | * been kicked | ||
| 720 | */ | ||
| 721 | if (!worker->working) | ||
| 722 | wake = 1; | ||
| 723 | worker->working = 1; | ||
| 724 | |||
| 725 | if (wake) | ||
| 726 | wake_up_process(worker->task); | ||
| 727 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 728 | } | 334 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1f26792683ed..9c6b66d15fb0 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
| 3 | * | 4 | * |
| 4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
| @@ -19,103 +20,35 @@ | |||
| 19 | #ifndef __BTRFS_ASYNC_THREAD_ | 20 | #ifndef __BTRFS_ASYNC_THREAD_ |
| 20 | #define __BTRFS_ASYNC_THREAD_ | 21 | #define __BTRFS_ASYNC_THREAD_ |
| 21 | 22 | ||
| 22 | struct btrfs_worker_thread; | 23 | struct btrfs_workqueue; |
| 24 | /* Internal use only */ | ||
| 25 | struct __btrfs_workqueue; | ||
| 26 | struct btrfs_work; | ||
| 27 | typedef void (*btrfs_func_t)(struct btrfs_work *arg); | ||
| 23 | 28 | ||
| 24 | /* | ||
| 25 | * This is similar to a workqueue, but it is meant to spread the operations | ||
| 26 | * across all available cpus instead of just the CPU that was used to | ||
| 27 | * queue the work. There is also some batching introduced to try and | ||
| 28 | * cut down on context switches. | ||
| 29 | * | ||
| 30 | * By default threads are added on demand up to 2 * the number of cpus. | ||
| 31 | * Changing struct btrfs_workers->max_workers is one way to prevent | ||
| 32 | * demand creation of kthreads. | ||
| 33 | * | ||
| 34 | * the basic model of these worker threads is to embed a btrfs_work | ||
| 35 | * structure in your own data struct, and use container_of in a | ||
| 36 | * work function to get back to your data struct. | ||
| 37 | */ | ||
| 38 | struct btrfs_work { | 29 | struct btrfs_work { |
| 39 | /* | 30 | btrfs_func_t func; |
| 40 | * func should be set to the function you want called | 31 | btrfs_func_t ordered_func; |
| 41 | * your work struct is passed as the only arg | 32 | btrfs_func_t ordered_free; |
| 42 | * | 33 | |
| 43 | * ordered_func must be set for work sent to an ordered work queue, | 34 | /* Don't touch things below */ |
| 44 | * and it is called to complete a given work item in the same | 35 | struct work_struct normal_work; |
| 45 | * order they were sent to the queue. | 36 | struct list_head ordered_list; |
| 46 | */ | 37 | struct __btrfs_workqueue *wq; |
| 47 | void (*func)(struct btrfs_work *work); | ||
| 48 | void (*ordered_func)(struct btrfs_work *work); | ||
| 49 | void (*ordered_free)(struct btrfs_work *work); | ||
| 50 | |||
| 51 | /* | ||
| 52 | * flags should be set to zero. It is used to make sure the | ||
| 53 | * struct is only inserted once into the list. | ||
| 54 | */ | ||
| 55 | unsigned long flags; | 38 | unsigned long flags; |
| 56 | |||
| 57 | /* don't touch these */ | ||
| 58 | struct btrfs_worker_thread *worker; | ||
| 59 | struct list_head list; | ||
| 60 | struct list_head order_list; | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct btrfs_workers { | ||
| 64 | /* current number of running workers */ | ||
| 65 | int num_workers; | ||
| 66 | |||
| 67 | int num_workers_starting; | ||
| 68 | |||
| 69 | /* max number of workers allowed. changed by btrfs_start_workers */ | ||
| 70 | int max_workers; | ||
| 71 | |||
| 72 | /* once a worker has this many requests or fewer, it is idle */ | ||
| 73 | int idle_thresh; | ||
| 74 | |||
| 75 | /* force completions in the order they were queued */ | ||
| 76 | int ordered; | ||
| 77 | |||
| 78 | /* more workers required, but in an interrupt handler */ | ||
| 79 | int atomic_start_pending; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * are we allowed to sleep while starting workers or are we required | ||
| 83 | * to start them at a later time? If we can't sleep, this indicates | ||
| 84 | * which queue we need to use to schedule thread creation. | ||
| 85 | */ | ||
| 86 | struct btrfs_workers *atomic_worker_start; | ||
| 87 | |||
| 88 | /* list with all the work threads. The workers on the idle thread | ||
| 89 | * may be actively servicing jobs, but they haven't yet hit the | ||
| 90 | * idle thresh limit above. | ||
| 91 | */ | ||
| 92 | struct list_head worker_list; | ||
| 93 | struct list_head idle_list; | ||
| 94 | |||
| 95 | /* | ||
| 96 | * when operating in ordered mode, this maintains the list | ||
| 97 | * of work items waiting for completion | ||
| 98 | */ | ||
| 99 | struct list_head order_list; | ||
| 100 | struct list_head prio_order_list; | ||
| 101 | |||
| 102 | /* lock for finding the next worker thread to queue on */ | ||
| 103 | spinlock_t lock; | ||
| 104 | |||
| 105 | /* lock for the ordered lists */ | ||
| 106 | spinlock_t order_lock; | ||
| 107 | |||
| 108 | /* extra name for this worker, used for current->name */ | ||
| 109 | char *name; | ||
| 110 | |||
| 111 | int stopping; | ||
| 112 | }; | 39 | }; |
| 113 | 40 | ||
| 114 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 41 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
| 115 | int btrfs_start_workers(struct btrfs_workers *workers); | 42 | int flags, |
| 116 | void btrfs_stop_workers(struct btrfs_workers *workers); | 43 | int max_active, |
| 117 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 44 | int thresh); |
| 118 | struct btrfs_workers *async_starter); | 45 | void btrfs_init_work(struct btrfs_work *work, |
| 119 | void btrfs_requeue_work(struct btrfs_work *work); | 46 | btrfs_func_t func, |
| 120 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 47 | btrfs_func_t ordered_func, |
| 48 | btrfs_func_t ordered_free); | ||
| 49 | void btrfs_queue_work(struct btrfs_workqueue *wq, | ||
| 50 | struct btrfs_work *work); | ||
| 51 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); | ||
| 52 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); | ||
| 53 | void btrfs_set_work_high_priority(struct btrfs_work *work); | ||
| 121 | #endif | 54 | #endif |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index aded3ef3d3d4..aad7201ad11b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -220,7 +220,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
| 220 | 220 | ||
| 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
| 222 | struct ulist *parents, struct __prelim_ref *ref, | 222 | struct ulist *parents, struct __prelim_ref *ref, |
| 223 | int level, u64 time_seq, const u64 *extent_item_pos) | 223 | int level, u64 time_seq, const u64 *extent_item_pos, |
| 224 | u64 total_refs) | ||
| 224 | { | 225 | { |
| 225 | int ret = 0; | 226 | int ret = 0; |
| 226 | int slot; | 227 | int slot; |
| @@ -249,7 +250,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
| 249 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) | 250 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) |
| 250 | ret = btrfs_next_old_leaf(root, path, time_seq); | 251 | ret = btrfs_next_old_leaf(root, path, time_seq); |
| 251 | 252 | ||
| 252 | while (!ret && count < ref->count) { | 253 | while (!ret && count < total_refs) { |
| 253 | eb = path->nodes[0]; | 254 | eb = path->nodes[0]; |
| 254 | slot = path->slots[0]; | 255 | slot = path->slots[0]; |
| 255 | 256 | ||
| @@ -306,7 +307,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 306 | struct btrfs_path *path, u64 time_seq, | 307 | struct btrfs_path *path, u64 time_seq, |
| 307 | struct __prelim_ref *ref, | 308 | struct __prelim_ref *ref, |
| 308 | struct ulist *parents, | 309 | struct ulist *parents, |
| 309 | const u64 *extent_item_pos) | 310 | const u64 *extent_item_pos, u64 total_refs) |
| 310 | { | 311 | { |
| 311 | struct btrfs_root *root; | 312 | struct btrfs_root *root; |
| 312 | struct btrfs_key root_key; | 313 | struct btrfs_key root_key; |
| @@ -361,7 +362,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 361 | } | 362 | } |
| 362 | 363 | ||
| 363 | ret = add_all_parents(root, path, parents, ref, level, time_seq, | 364 | ret = add_all_parents(root, path, parents, ref, level, time_seq, |
| 364 | extent_item_pos); | 365 | extent_item_pos, total_refs); |
| 365 | out: | 366 | out: |
| 366 | path->lowest_level = 0; | 367 | path->lowest_level = 0; |
| 367 | btrfs_release_path(path); | 368 | btrfs_release_path(path); |
| @@ -374,7 +375,7 @@ out: | |||
| 374 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 375 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
| 375 | struct btrfs_path *path, u64 time_seq, | 376 | struct btrfs_path *path, u64 time_seq, |
| 376 | struct list_head *head, | 377 | struct list_head *head, |
| 377 | const u64 *extent_item_pos) | 378 | const u64 *extent_item_pos, u64 total_refs) |
| 378 | { | 379 | { |
| 379 | int err; | 380 | int err; |
| 380 | int ret = 0; | 381 | int ret = 0; |
| @@ -400,7 +401,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 400 | if (ref->count == 0) | 401 | if (ref->count == 0) |
| 401 | continue; | 402 | continue; |
| 402 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, | 403 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
| 403 | parents, extent_item_pos); | 404 | parents, extent_item_pos, |
| 405 | total_refs); | ||
| 404 | /* | 406 | /* |
| 405 | * we can only tolerate ENOENT,otherwise,we should catch error | 407 | * we can only tolerate ENOENT,otherwise,we should catch error |
| 406 | * and return directly. | 408 | * and return directly. |
| @@ -557,7 +559,7 @@ static void __merge_refs(struct list_head *head, int mode) | |||
| 557 | * smaller or equal that seq to the list | 559 | * smaller or equal that seq to the list |
| 558 | */ | 560 | */ |
| 559 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 561 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
| 560 | struct list_head *prefs) | 562 | struct list_head *prefs, u64 *total_refs) |
| 561 | { | 563 | { |
| 562 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 564 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
| 563 | struct rb_node *n = &head->node.rb_node; | 565 | struct rb_node *n = &head->node.rb_node; |
| @@ -593,6 +595,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 593 | default: | 595 | default: |
| 594 | BUG_ON(1); | 596 | BUG_ON(1); |
| 595 | } | 597 | } |
| 598 | *total_refs += (node->ref_mod * sgn); | ||
| 596 | switch (node->type) { | 599 | switch (node->type) { |
| 597 | case BTRFS_TREE_BLOCK_REF_KEY: { | 600 | case BTRFS_TREE_BLOCK_REF_KEY: { |
| 598 | struct btrfs_delayed_tree_ref *ref; | 601 | struct btrfs_delayed_tree_ref *ref; |
| @@ -653,7 +656,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 653 | */ | 656 | */ |
| 654 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 657 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
| 655 | struct btrfs_path *path, u64 bytenr, | 658 | struct btrfs_path *path, u64 bytenr, |
| 656 | int *info_level, struct list_head *prefs) | 659 | int *info_level, struct list_head *prefs, |
| 660 | u64 *total_refs) | ||
| 657 | { | 661 | { |
| 658 | int ret = 0; | 662 | int ret = 0; |
| 659 | int slot; | 663 | int slot; |
| @@ -677,6 +681,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 677 | 681 | ||
| 678 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | 682 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); |
| 679 | flags = btrfs_extent_flags(leaf, ei); | 683 | flags = btrfs_extent_flags(leaf, ei); |
| 684 | *total_refs += btrfs_extent_refs(leaf, ei); | ||
| 680 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 685 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
| 681 | 686 | ||
| 682 | ptr = (unsigned long)(ei + 1); | 687 | ptr = (unsigned long)(ei + 1); |
| @@ -859,6 +864,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 859 | struct list_head prefs; | 864 | struct list_head prefs; |
| 860 | struct __prelim_ref *ref; | 865 | struct __prelim_ref *ref; |
| 861 | struct extent_inode_elem *eie = NULL; | 866 | struct extent_inode_elem *eie = NULL; |
| 867 | u64 total_refs = 0; | ||
| 862 | 868 | ||
| 863 | INIT_LIST_HEAD(&prefs); | 869 | INIT_LIST_HEAD(&prefs); |
| 864 | INIT_LIST_HEAD(&prefs_delayed); | 870 | INIT_LIST_HEAD(&prefs_delayed); |
| @@ -873,8 +879,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 873 | path = btrfs_alloc_path(); | 879 | path = btrfs_alloc_path(); |
| 874 | if (!path) | 880 | if (!path) |
| 875 | return -ENOMEM; | 881 | return -ENOMEM; |
| 876 | if (!trans) | 882 | if (!trans) { |
| 877 | path->search_commit_root = 1; | 883 | path->search_commit_root = 1; |
| 884 | path->skip_locking = 1; | ||
| 885 | } | ||
| 878 | 886 | ||
| 879 | /* | 887 | /* |
| 880 | * grab both a lock on the path and a lock on the delayed ref head. | 888 | * grab both a lock on the path and a lock on the delayed ref head. |
| @@ -915,7 +923,7 @@ again: | |||
| 915 | } | 923 | } |
| 916 | spin_unlock(&delayed_refs->lock); | 924 | spin_unlock(&delayed_refs->lock); |
| 917 | ret = __add_delayed_refs(head, time_seq, | 925 | ret = __add_delayed_refs(head, time_seq, |
| 918 | &prefs_delayed); | 926 | &prefs_delayed, &total_refs); |
| 919 | mutex_unlock(&head->mutex); | 927 | mutex_unlock(&head->mutex); |
| 920 | if (ret) | 928 | if (ret) |
| 921 | goto out; | 929 | goto out; |
| @@ -936,7 +944,8 @@ again: | |||
| 936 | (key.type == BTRFS_EXTENT_ITEM_KEY || | 944 | (key.type == BTRFS_EXTENT_ITEM_KEY || |
| 937 | key.type == BTRFS_METADATA_ITEM_KEY)) { | 945 | key.type == BTRFS_METADATA_ITEM_KEY)) { |
| 938 | ret = __add_inline_refs(fs_info, path, bytenr, | 946 | ret = __add_inline_refs(fs_info, path, bytenr, |
| 939 | &info_level, &prefs); | 947 | &info_level, &prefs, |
| 948 | &total_refs); | ||
| 940 | if (ret) | 949 | if (ret) |
| 941 | goto out; | 950 | goto out; |
| 942 | ret = __add_keyed_refs(fs_info, path, bytenr, | 951 | ret = __add_keyed_refs(fs_info, path, bytenr, |
| @@ -956,7 +965,7 @@ again: | |||
| 956 | __merge_refs(&prefs, 1); | 965 | __merge_refs(&prefs, 1); |
| 957 | 966 | ||
| 958 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, | 967 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
| 959 | extent_item_pos); | 968 | extent_item_pos, total_refs); |
| 960 | if (ret) | 969 | if (ret) |
| 961 | goto out; | 970 | goto out; |
| 962 | 971 | ||
| @@ -965,7 +974,7 @@ again: | |||
| 965 | while (!list_empty(&prefs)) { | 974 | while (!list_empty(&prefs)) { |
| 966 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 975 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
| 967 | WARN_ON(ref->count < 0); | 976 | WARN_ON(ref->count < 0); |
| 968 | if (ref->count && ref->root_id && ref->parent == 0) { | 977 | if (roots && ref->count && ref->root_id && ref->parent == 0) { |
| 969 | /* no parent == root of tree */ | 978 | /* no parent == root of tree */ |
| 970 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 979 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
| 971 | if (ret < 0) | 980 | if (ret < 0) |
| @@ -1061,22 +1070,14 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
| 1061 | u64 time_seq, struct ulist **leafs, | 1070 | u64 time_seq, struct ulist **leafs, |
| 1062 | const u64 *extent_item_pos) | 1071 | const u64 *extent_item_pos) |
| 1063 | { | 1072 | { |
| 1064 | struct ulist *tmp; | ||
| 1065 | int ret; | 1073 | int ret; |
| 1066 | 1074 | ||
| 1067 | tmp = ulist_alloc(GFP_NOFS); | ||
| 1068 | if (!tmp) | ||
| 1069 | return -ENOMEM; | ||
| 1070 | *leafs = ulist_alloc(GFP_NOFS); | 1075 | *leafs = ulist_alloc(GFP_NOFS); |
| 1071 | if (!*leafs) { | 1076 | if (!*leafs) |
| 1072 | ulist_free(tmp); | ||
| 1073 | return -ENOMEM; | 1077 | return -ENOMEM; |
| 1074 | } | ||
| 1075 | 1078 | ||
| 1076 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1079 | ret = find_parent_nodes(trans, fs_info, bytenr, |
| 1077 | time_seq, *leafs, tmp, extent_item_pos); | 1080 | time_seq, *leafs, NULL, extent_item_pos); |
| 1078 | ulist_free(tmp); | ||
| 1079 | |||
| 1080 | if (ret < 0 && ret != -ENOENT) { | 1081 | if (ret < 0 && ret != -ENOENT) { |
| 1081 | free_leaf_list(*leafs); | 1082 | free_leaf_list(*leafs); |
| 1082 | return ret; | 1083 | return ret; |
| @@ -1333,38 +1334,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1333 | if (ret < 0) | 1334 | if (ret < 0) |
| 1334 | return ret; | 1335 | return ret; |
| 1335 | 1336 | ||
| 1336 | while (1) { | 1337 | ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0); |
| 1337 | u32 nritems; | 1338 | if (ret) { |
| 1338 | if (path->slots[0] == 0) { | 1339 | if (ret > 0) |
| 1339 | btrfs_set_path_blocking(path); | 1340 | ret = -ENOENT; |
| 1340 | ret = btrfs_prev_leaf(fs_info->extent_root, path); | 1341 | return ret; |
| 1341 | if (ret != 0) { | ||
| 1342 | if (ret > 0) { | ||
| 1343 | pr_debug("logical %llu is not within " | ||
| 1344 | "any extent\n", logical); | ||
| 1345 | ret = -ENOENT; | ||
| 1346 | } | ||
| 1347 | return ret; | ||
| 1348 | } | ||
| 1349 | } else { | ||
| 1350 | path->slots[0]--; | ||
| 1351 | } | ||
| 1352 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 1353 | if (nritems == 0) { | ||
| 1354 | pr_debug("logical %llu is not within any extent\n", | ||
| 1355 | logical); | ||
| 1356 | return -ENOENT; | ||
| 1357 | } | ||
| 1358 | if (path->slots[0] == nritems) | ||
| 1359 | path->slots[0]--; | ||
| 1360 | |||
| 1361 | btrfs_item_key_to_cpu(path->nodes[0], found_key, | ||
| 1362 | path->slots[0]); | ||
| 1363 | if (found_key->type == BTRFS_EXTENT_ITEM_KEY || | ||
| 1364 | found_key->type == BTRFS_METADATA_ITEM_KEY) | ||
| 1365 | break; | ||
| 1366 | } | 1342 | } |
| 1367 | 1343 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | |
| 1368 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) | 1344 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
| 1369 | size = fs_info->extent_root->leafsize; | 1345 | size = fs_info->extent_root->leafsize; |
| 1370 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | 1346 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8fed2125689e..c9a24444ec9a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -109,14 +109,17 @@ struct btrfs_inode { | |||
| 109 | u64 last_trans; | 109 | u64 last_trans; |
| 110 | 110 | ||
| 111 | /* | 111 | /* |
| 112 | * log transid when this inode was last modified | 112 | * transid that last logged this inode |
| 113 | */ | 113 | */ |
| 114 | u64 last_sub_trans; | 114 | u64 logged_trans; |
| 115 | 115 | ||
| 116 | /* | 116 | /* |
| 117 | * transid that last logged this inode | 117 | * log transid when this inode was last modified |
| 118 | */ | 118 | */ |
| 119 | u64 logged_trans; | 119 | int last_sub_trans; |
| 120 | |||
| 121 | /* a local copy of root's last_log_commit */ | ||
| 122 | int last_log_commit; | ||
| 120 | 123 | ||
| 121 | /* total number of bytes pending delalloc, used by stat to calc the | 124 | /* total number of bytes pending delalloc, used by stat to calc the |
| 122 | * real block usage of the file | 125 | * real block usage of the file |
| @@ -155,9 +158,6 @@ struct btrfs_inode { | |||
| 155 | /* flags field from the on disk inode */ | 158 | /* flags field from the on disk inode */ |
| 156 | u32 flags; | 159 | u32 flags; |
| 157 | 160 | ||
| 158 | /* a local copy of root's last_log_commit */ | ||
| 159 | unsigned long last_log_commit; | ||
| 160 | |||
| 161 | /* | 161 | /* |
| 162 | * Counters to keep track of the number of extent item's we may use due | 162 | * Counters to keep track of the number of extent item's we may use due |
| 163 | * to delalloc and such. outstanding_extents is the number of extent | 163 | * to delalloc and such. outstanding_extents is the number of extent |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b01fb6c527e3..d43c544d3b68 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 472 | rcu_read_lock(); | 472 | rcu_read_lock(); |
| 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); | 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); |
| 474 | rcu_read_unlock(); | 474 | rcu_read_unlock(); |
| 475 | if (page) { | 475 | if (page && !radix_tree_exceptional_entry(page)) { |
| 476 | misses++; | 476 | misses++; |
| 477 | if (misses > 4) | 477 | if (misses > 4) |
| 478 | break; | 478 | break; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cbd3a7d6fa68..88d1b1eedc9c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -5376,6 +5376,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5376 | int advance_right; | 5376 | int advance_right; |
| 5377 | u64 left_blockptr; | 5377 | u64 left_blockptr; |
| 5378 | u64 right_blockptr; | 5378 | u64 right_blockptr; |
| 5379 | u64 left_gen; | ||
| 5380 | u64 right_gen; | ||
| 5379 | u64 left_start_ctransid; | 5381 | u64 left_start_ctransid; |
| 5380 | u64 right_start_ctransid; | 5382 | u64 right_start_ctransid; |
| 5381 | u64 ctransid; | 5383 | u64 ctransid; |
| @@ -5640,7 +5642,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5640 | right_blockptr = btrfs_node_blockptr( | 5642 | right_blockptr = btrfs_node_blockptr( |
| 5641 | right_path->nodes[right_level], | 5643 | right_path->nodes[right_level], |
| 5642 | right_path->slots[right_level]); | 5644 | right_path->slots[right_level]); |
| 5643 | if (left_blockptr == right_blockptr) { | 5645 | left_gen = btrfs_node_ptr_generation( |
| 5646 | left_path->nodes[left_level], | ||
| 5647 | left_path->slots[left_level]); | ||
| 5648 | right_gen = btrfs_node_ptr_generation( | ||
| 5649 | right_path->nodes[right_level], | ||
| 5650 | right_path->slots[right_level]); | ||
| 5651 | if (left_blockptr == right_blockptr && | ||
| 5652 | left_gen == right_gen) { | ||
| 5644 | /* | 5653 | /* |
| 5645 | * As we're on a shared block, don't | 5654 | * As we're on a shared block, don't |
| 5646 | * allow to go deeper. | 5655 | * allow to go deeper. |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c1a42ca519f..bc96c03dd259 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -351,6 +351,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
| 351 | #define BTRFS_FS_STATE_ERROR 0 | 351 | #define BTRFS_FS_STATE_ERROR 0 |
| 352 | #define BTRFS_FS_STATE_REMOUNTING 1 | 352 | #define BTRFS_FS_STATE_REMOUNTING 1 |
| 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 | 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 |
| 354 | #define BTRFS_FS_STATE_DEV_REPLACING 3 | ||
| 354 | 355 | ||
| 355 | /* Super block flags */ | 356 | /* Super block flags */ |
| 356 | /* Errors detected */ | 357 | /* Errors detected */ |
| @@ -1489,6 +1490,7 @@ struct btrfs_fs_info { | |||
| 1489 | */ | 1490 | */ |
| 1490 | struct list_head ordered_roots; | 1491 | struct list_head ordered_roots; |
| 1491 | 1492 | ||
| 1493 | struct mutex delalloc_root_mutex; | ||
| 1492 | spinlock_t delalloc_root_lock; | 1494 | spinlock_t delalloc_root_lock; |
| 1493 | /* all fs/file tree roots that have delalloc inodes. */ | 1495 | /* all fs/file tree roots that have delalloc inodes. */ |
| 1494 | struct list_head delalloc_roots; | 1496 | struct list_head delalloc_roots; |
| @@ -1503,28 +1505,27 @@ struct btrfs_fs_info { | |||
| 1503 | * A third pool does submit_bio to avoid deadlocking with the other | 1505 | * A third pool does submit_bio to avoid deadlocking with the other |
| 1504 | * two | 1506 | * two |
| 1505 | */ | 1507 | */ |
| 1506 | struct btrfs_workers generic_worker; | 1508 | struct btrfs_workqueue *workers; |
| 1507 | struct btrfs_workers workers; | 1509 | struct btrfs_workqueue *delalloc_workers; |
| 1508 | struct btrfs_workers delalloc_workers; | 1510 | struct btrfs_workqueue *flush_workers; |
| 1509 | struct btrfs_workers flush_workers; | 1511 | struct btrfs_workqueue *endio_workers; |
| 1510 | struct btrfs_workers endio_workers; | 1512 | struct btrfs_workqueue *endio_meta_workers; |
| 1511 | struct btrfs_workers endio_meta_workers; | 1513 | struct btrfs_workqueue *endio_raid56_workers; |
| 1512 | struct btrfs_workers endio_raid56_workers; | 1514 | struct btrfs_workqueue *rmw_workers; |
| 1513 | struct btrfs_workers rmw_workers; | 1515 | struct btrfs_workqueue *endio_meta_write_workers; |
| 1514 | struct btrfs_workers endio_meta_write_workers; | 1516 | struct btrfs_workqueue *endio_write_workers; |
| 1515 | struct btrfs_workers endio_write_workers; | 1517 | struct btrfs_workqueue *endio_freespace_worker; |
| 1516 | struct btrfs_workers endio_freespace_worker; | 1518 | struct btrfs_workqueue *submit_workers; |
| 1517 | struct btrfs_workers submit_workers; | 1519 | struct btrfs_workqueue *caching_workers; |
| 1518 | struct btrfs_workers caching_workers; | 1520 | struct btrfs_workqueue *readahead_workers; |
| 1519 | struct btrfs_workers readahead_workers; | ||
| 1520 | 1521 | ||
| 1521 | /* | 1522 | /* |
| 1522 | * fixup workers take dirty pages that didn't properly go through | 1523 | * fixup workers take dirty pages that didn't properly go through |
| 1523 | * the cow mechanism and make them safe to write. It happens | 1524 | * the cow mechanism and make them safe to write. It happens |
| 1524 | * for the sys_munmap function call path | 1525 | * for the sys_munmap function call path |
| 1525 | */ | 1526 | */ |
| 1526 | struct btrfs_workers fixup_workers; | 1527 | struct btrfs_workqueue *fixup_workers; |
| 1527 | struct btrfs_workers delayed_workers; | 1528 | struct btrfs_workqueue *delayed_workers; |
| 1528 | struct task_struct *transaction_kthread; | 1529 | struct task_struct *transaction_kthread; |
| 1529 | struct task_struct *cleaner_kthread; | 1530 | struct task_struct *cleaner_kthread; |
| 1530 | int thread_pool_size; | 1531 | int thread_pool_size; |
| @@ -1604,9 +1605,9 @@ struct btrfs_fs_info { | |||
| 1604 | atomic_t scrub_cancel_req; | 1605 | atomic_t scrub_cancel_req; |
| 1605 | wait_queue_head_t scrub_pause_wait; | 1606 | wait_queue_head_t scrub_pause_wait; |
| 1606 | int scrub_workers_refcnt; | 1607 | int scrub_workers_refcnt; |
| 1607 | struct btrfs_workers scrub_workers; | 1608 | struct btrfs_workqueue *scrub_workers; |
| 1608 | struct btrfs_workers scrub_wr_completion_workers; | 1609 | struct btrfs_workqueue *scrub_wr_completion_workers; |
| 1609 | struct btrfs_workers scrub_nocow_workers; | 1610 | struct btrfs_workqueue *scrub_nocow_workers; |
| 1610 | 1611 | ||
| 1611 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1612 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 1612 | u32 check_integrity_print_mask; | 1613 | u32 check_integrity_print_mask; |
| @@ -1647,7 +1648,7 @@ struct btrfs_fs_info { | |||
| 1647 | /* qgroup rescan items */ | 1648 | /* qgroup rescan items */ |
| 1648 | struct mutex qgroup_rescan_lock; /* protects the progress item */ | 1649 | struct mutex qgroup_rescan_lock; /* protects the progress item */ |
| 1649 | struct btrfs_key qgroup_rescan_progress; | 1650 | struct btrfs_key qgroup_rescan_progress; |
| 1650 | struct btrfs_workers qgroup_rescan_workers; | 1651 | struct btrfs_workqueue *qgroup_rescan_workers; |
| 1651 | struct completion qgroup_rescan_completion; | 1652 | struct completion qgroup_rescan_completion; |
| 1652 | struct btrfs_work qgroup_rescan_work; | 1653 | struct btrfs_work qgroup_rescan_work; |
| 1653 | 1654 | ||
| @@ -1674,10 +1675,18 @@ struct btrfs_fs_info { | |||
| 1674 | 1675 | ||
| 1675 | atomic_t mutually_exclusive_operation_running; | 1676 | atomic_t mutually_exclusive_operation_running; |
| 1676 | 1677 | ||
| 1678 | struct percpu_counter bio_counter; | ||
| 1679 | wait_queue_head_t replace_wait; | ||
| 1680 | |||
| 1677 | struct semaphore uuid_tree_rescan_sem; | 1681 | struct semaphore uuid_tree_rescan_sem; |
| 1678 | unsigned int update_uuid_tree_gen:1; | 1682 | unsigned int update_uuid_tree_gen:1; |
| 1679 | }; | 1683 | }; |
| 1680 | 1684 | ||
| 1685 | struct btrfs_subvolume_writers { | ||
| 1686 | struct percpu_counter counter; | ||
| 1687 | wait_queue_head_t wait; | ||
| 1688 | }; | ||
| 1689 | |||
| 1681 | /* | 1690 | /* |
| 1682 | * in ram representation of the tree. extent_root is used for all allocations | 1691 | * in ram representation of the tree. extent_root is used for all allocations |
| 1683 | * and for the extent tree extent_root root. | 1692 | * and for the extent tree extent_root root. |
| @@ -1714,11 +1723,15 @@ struct btrfs_root { | |||
| 1714 | struct mutex log_mutex; | 1723 | struct mutex log_mutex; |
| 1715 | wait_queue_head_t log_writer_wait; | 1724 | wait_queue_head_t log_writer_wait; |
| 1716 | wait_queue_head_t log_commit_wait[2]; | 1725 | wait_queue_head_t log_commit_wait[2]; |
| 1726 | struct list_head log_ctxs[2]; | ||
| 1717 | atomic_t log_writers; | 1727 | atomic_t log_writers; |
| 1718 | atomic_t log_commit[2]; | 1728 | atomic_t log_commit[2]; |
| 1719 | atomic_t log_batch; | 1729 | atomic_t log_batch; |
| 1720 | unsigned long log_transid; | 1730 | int log_transid; |
| 1721 | unsigned long last_log_commit; | 1731 | /* No matter the commit succeeds or not*/ |
| 1732 | int log_transid_committed; | ||
| 1733 | /* Just be updated when the commit succeeds. */ | ||
| 1734 | int last_log_commit; | ||
| 1722 | pid_t log_start_pid; | 1735 | pid_t log_start_pid; |
| 1723 | bool log_multiple_pids; | 1736 | bool log_multiple_pids; |
| 1724 | 1737 | ||
| @@ -1793,6 +1806,7 @@ struct btrfs_root { | |||
| 1793 | spinlock_t root_item_lock; | 1806 | spinlock_t root_item_lock; |
| 1794 | atomic_t refs; | 1807 | atomic_t refs; |
| 1795 | 1808 | ||
| 1809 | struct mutex delalloc_mutex; | ||
| 1796 | spinlock_t delalloc_lock; | 1810 | spinlock_t delalloc_lock; |
| 1797 | /* | 1811 | /* |
| 1798 | * all of the inodes that have delalloc bytes. It is possible for | 1812 | * all of the inodes that have delalloc bytes. It is possible for |
| @@ -1802,6 +1816,8 @@ struct btrfs_root { | |||
| 1802 | struct list_head delalloc_inodes; | 1816 | struct list_head delalloc_inodes; |
| 1803 | struct list_head delalloc_root; | 1817 | struct list_head delalloc_root; |
| 1804 | u64 nr_delalloc_inodes; | 1818 | u64 nr_delalloc_inodes; |
| 1819 | |||
| 1820 | struct mutex ordered_extent_mutex; | ||
| 1805 | /* | 1821 | /* |
| 1806 | * this is used by the balancing code to wait for all the pending | 1822 | * this is used by the balancing code to wait for all the pending |
| 1807 | * ordered extents | 1823 | * ordered extents |
| @@ -1822,6 +1838,8 @@ struct btrfs_root { | |||
| 1822 | * manipulation with the read-only status via SUBVOL_SETFLAGS | 1838 | * manipulation with the read-only status via SUBVOL_SETFLAGS |
| 1823 | */ | 1839 | */ |
| 1824 | int send_in_progress; | 1840 | int send_in_progress; |
| 1841 | struct btrfs_subvolume_writers *subv_writers; | ||
| 1842 | atomic_t will_be_snapshoted; | ||
| 1825 | }; | 1843 | }; |
| 1826 | 1844 | ||
| 1827 | struct btrfs_ioctl_defrag_range_args { | 1845 | struct btrfs_ioctl_defrag_range_args { |
| @@ -3346,6 +3364,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
| 3346 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3364 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
| 3347 | struct btrfs_fs_info *fs_info); | 3365 | struct btrfs_fs_info *fs_info); |
| 3348 | int __get_raid_index(u64 flags); | 3366 | int __get_raid_index(u64 flags); |
| 3367 | |||
| 3368 | int btrfs_start_nocow_write(struct btrfs_root *root); | ||
| 3369 | void btrfs_end_nocow_write(struct btrfs_root *root); | ||
| 3349 | /* ctree.c */ | 3370 | /* ctree.c */ |
| 3350 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3371 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 3351 | int level, int *slot); | 3372 | int level, int *slot); |
| @@ -3723,7 +3744,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 3723 | u32 min_type); | 3744 | u32 min_type); |
| 3724 | 3745 | ||
| 3725 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 3746 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
| 3726 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput); | 3747 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
| 3748 | int nr); | ||
| 3727 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 3749 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 3728 | struct extent_state **cached_state); | 3750 | struct extent_state **cached_state); |
| 3729 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 3751 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| @@ -4005,6 +4027,11 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, | |||
| 4005 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 4027 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
| 4006 | struct btrfs_scrub_progress *progress); | 4028 | struct btrfs_scrub_progress *progress); |
| 4007 | 4029 | ||
| 4030 | /* dev-replace.c */ | ||
| 4031 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | ||
| 4032 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); | ||
| 4033 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); | ||
| 4034 | |||
| 4008 | /* reada.c */ | 4035 | /* reada.c */ |
| 4009 | struct reada_control { | 4036 | struct reada_control { |
| 4010 | struct btrfs_root *root; /* tree to prefetch */ | 4037 | struct btrfs_root *root; /* tree to prefetch */ |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 451b00c86f6c..33e561a84013 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -1392,11 +1392,11 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | |||
| 1392 | return -ENOMEM; | 1392 | return -ENOMEM; |
| 1393 | 1393 | ||
| 1394 | async_work->delayed_root = delayed_root; | 1394 | async_work->delayed_root = delayed_root; |
| 1395 | async_work->work.func = btrfs_async_run_delayed_root; | 1395 | btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, |
| 1396 | async_work->work.flags = 0; | 1396 | NULL, NULL); |
| 1397 | async_work->nr = nr; | 1397 | async_work->nr = nr; |
| 1398 | 1398 | ||
| 1399 | btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work); | 1399 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); |
| 1400 | return 0; | 1400 | return 0; |
| 1401 | } | 1401 | } |
| 1402 | 1402 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index f3bff89eecf0..31299646024d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
| @@ -199,44 +199,31 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, | |||
| 199 | */ | 199 | */ |
| 200 | static struct btrfs_delayed_ref_head * | 200 | static struct btrfs_delayed_ref_head * |
| 201 | find_ref_head(struct rb_root *root, u64 bytenr, | 201 | find_ref_head(struct rb_root *root, u64 bytenr, |
| 202 | struct btrfs_delayed_ref_head **last, int return_bigger) | 202 | int return_bigger) |
| 203 | { | 203 | { |
| 204 | struct rb_node *n; | 204 | struct rb_node *n; |
| 205 | struct btrfs_delayed_ref_head *entry; | 205 | struct btrfs_delayed_ref_head *entry; |
| 206 | int cmp = 0; | ||
| 207 | 206 | ||
| 208 | again: | ||
| 209 | n = root->rb_node; | 207 | n = root->rb_node; |
| 210 | entry = NULL; | 208 | entry = NULL; |
| 211 | while (n) { | 209 | while (n) { |
| 212 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); | 210 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
| 213 | if (last) | ||
| 214 | *last = entry; | ||
| 215 | 211 | ||
| 216 | if (bytenr < entry->node.bytenr) | 212 | if (bytenr < entry->node.bytenr) |
| 217 | cmp = -1; | ||
| 218 | else if (bytenr > entry->node.bytenr) | ||
| 219 | cmp = 1; | ||
| 220 | else | ||
| 221 | cmp = 0; | ||
| 222 | |||
| 223 | if (cmp < 0) | ||
| 224 | n = n->rb_left; | 213 | n = n->rb_left; |
| 225 | else if (cmp > 0) | 214 | else if (bytenr > entry->node.bytenr) |
| 226 | n = n->rb_right; | 215 | n = n->rb_right; |
| 227 | else | 216 | else |
| 228 | return entry; | 217 | return entry; |
| 229 | } | 218 | } |
| 230 | if (entry && return_bigger) { | 219 | if (entry && return_bigger) { |
| 231 | if (cmp > 0) { | 220 | if (bytenr > entry->node.bytenr) { |
| 232 | n = rb_next(&entry->href_node); | 221 | n = rb_next(&entry->href_node); |
| 233 | if (!n) | 222 | if (!n) |
| 234 | n = rb_first(root); | 223 | n = rb_first(root); |
| 235 | entry = rb_entry(n, struct btrfs_delayed_ref_head, | 224 | entry = rb_entry(n, struct btrfs_delayed_ref_head, |
| 236 | href_node); | 225 | href_node); |
| 237 | bytenr = entry->node.bytenr; | 226 | return entry; |
| 238 | return_bigger = 0; | ||
| 239 | goto again; | ||
| 240 | } | 227 | } |
| 241 | return entry; | 228 | return entry; |
| 242 | } | 229 | } |
| @@ -415,12 +402,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans) | |||
| 415 | 402 | ||
| 416 | again: | 403 | again: |
| 417 | start = delayed_refs->run_delayed_start; | 404 | start = delayed_refs->run_delayed_start; |
| 418 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 405 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 419 | if (!head && !loop) { | 406 | if (!head && !loop) { |
| 420 | delayed_refs->run_delayed_start = 0; | 407 | delayed_refs->run_delayed_start = 0; |
| 421 | start = 0; | 408 | start = 0; |
| 422 | loop = true; | 409 | loop = true; |
| 423 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 410 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 424 | if (!head) | 411 | if (!head) |
| 425 | return NULL; | 412 | return NULL; |
| 426 | } else if (!head && loop) { | 413 | } else if (!head && loop) { |
| @@ -508,6 +495,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
| 508 | ref = btrfs_delayed_node_to_head(update); | 495 | ref = btrfs_delayed_node_to_head(update); |
| 509 | BUG_ON(existing_ref->is_data != ref->is_data); | 496 | BUG_ON(existing_ref->is_data != ref->is_data); |
| 510 | 497 | ||
| 498 | spin_lock(&existing_ref->lock); | ||
| 511 | if (ref->must_insert_reserved) { | 499 | if (ref->must_insert_reserved) { |
| 512 | /* if the extent was freed and then | 500 | /* if the extent was freed and then |
| 513 | * reallocated before the delayed ref | 501 | * reallocated before the delayed ref |
| @@ -549,7 +537,6 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
| 549 | * only need the lock for this case cause we could be processing it | 537 | * only need the lock for this case cause we could be processing it |
| 550 | * currently, for refs we just added we know we're a-ok. | 538 | * currently, for refs we just added we know we're a-ok. |
| 551 | */ | 539 | */ |
| 552 | spin_lock(&existing_ref->lock); | ||
| 553 | existing->ref_mod += update->ref_mod; | 540 | existing->ref_mod += update->ref_mod; |
| 554 | spin_unlock(&existing_ref->lock); | 541 | spin_unlock(&existing_ref->lock); |
| 555 | } | 542 | } |
| @@ -898,7 +885,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
| 898 | struct btrfs_delayed_ref_root *delayed_refs; | 885 | struct btrfs_delayed_ref_root *delayed_refs; |
| 899 | 886 | ||
| 900 | delayed_refs = &trans->transaction->delayed_refs; | 887 | delayed_refs = &trans->transaction->delayed_refs; |
| 901 | return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0); | 888 | return find_ref_head(&delayed_refs->href_root, bytenr, 0); |
| 902 | } | 889 | } |
| 903 | 890 | ||
| 904 | void btrfs_delayed_ref_exit(void) | 891 | void btrfs_delayed_ref_exit(void) |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 564c92638b20..9f2290509aca 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
| @@ -431,6 +431,35 @@ leave_no_lock: | |||
| 431 | return ret; | 431 | return ret; |
| 432 | } | 432 | } |
| 433 | 433 | ||
| 434 | /* | ||
| 435 | * blocked until all flighting bios are finished. | ||
| 436 | */ | ||
| 437 | static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) | ||
| 438 | { | ||
| 439 | s64 writers; | ||
| 440 | DEFINE_WAIT(wait); | ||
| 441 | |||
| 442 | set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
| 443 | do { | ||
| 444 | prepare_to_wait(&fs_info->replace_wait, &wait, | ||
| 445 | TASK_UNINTERRUPTIBLE); | ||
| 446 | writers = percpu_counter_sum(&fs_info->bio_counter); | ||
| 447 | if (writers) | ||
| 448 | schedule(); | ||
| 449 | finish_wait(&fs_info->replace_wait, &wait); | ||
| 450 | } while (writers); | ||
| 451 | } | ||
| 452 | |||
| 453 | /* | ||
| 454 | * we have removed target device, it is safe to allow new bios request. | ||
| 455 | */ | ||
| 456 | static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) | ||
| 457 | { | ||
| 458 | clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
| 459 | if (waitqueue_active(&fs_info->replace_wait)) | ||
| 460 | wake_up(&fs_info->replace_wait); | ||
| 461 | } | ||
| 462 | |||
| 434 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | 463 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, |
| 435 | int scrub_ret) | 464 | int scrub_ret) |
| 436 | { | 465 | { |
| @@ -458,17 +487,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 458 | src_device = dev_replace->srcdev; | 487 | src_device = dev_replace->srcdev; |
| 459 | btrfs_dev_replace_unlock(dev_replace); | 488 | btrfs_dev_replace_unlock(dev_replace); |
| 460 | 489 | ||
| 461 | /* replace old device with new one in mapping tree */ | ||
| 462 | if (!scrub_ret) | ||
| 463 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 464 | src_device, | ||
| 465 | tgt_device); | ||
| 466 | |||
| 467 | /* | 490 | /* |
| 468 | * flush all outstanding I/O and inode extent mappings before the | 491 | * flush all outstanding I/O and inode extent mappings before the |
| 469 | * copy operation is declared as being finished | 492 | * copy operation is declared as being finished |
| 470 | */ | 493 | */ |
| 471 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 494 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
| 472 | if (ret) { | 495 | if (ret) { |
| 473 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 496 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
| 474 | return ret; | 497 | return ret; |
| @@ -484,6 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 484 | WARN_ON(ret); | 507 | WARN_ON(ret); |
| 485 | 508 | ||
| 486 | /* keep away write_all_supers() during the finishing procedure */ | 509 | /* keep away write_all_supers() during the finishing procedure */ |
| 510 | mutex_lock(&root->fs_info->chunk_mutex); | ||
| 487 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 511 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 488 | btrfs_dev_replace_lock(dev_replace); | 512 | btrfs_dev_replace_lock(dev_replace); |
| 489 | dev_replace->replace_state = | 513 | dev_replace->replace_state = |
| @@ -494,7 +518,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 494 | dev_replace->time_stopped = get_seconds(); | 518 | dev_replace->time_stopped = get_seconds(); |
| 495 | dev_replace->item_needs_writeback = 1; | 519 | dev_replace->item_needs_writeback = 1; |
| 496 | 520 | ||
| 497 | if (scrub_ret) { | 521 | /* replace old device with new one in mapping tree */ |
| 522 | if (!scrub_ret) { | ||
| 523 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 524 | src_device, | ||
| 525 | tgt_device); | ||
| 526 | } else { | ||
| 498 | printk_in_rcu(KERN_ERR | 527 | printk_in_rcu(KERN_ERR |
| 499 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", | 528 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", |
| 500 | src_device->missing ? "<missing disk>" : | 529 | src_device->missing ? "<missing disk>" : |
| @@ -503,6 +532,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 503 | rcu_str_deref(tgt_device->name), scrub_ret); | 532 | rcu_str_deref(tgt_device->name), scrub_ret); |
| 504 | btrfs_dev_replace_unlock(dev_replace); | 533 | btrfs_dev_replace_unlock(dev_replace); |
| 505 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 534 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 535 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 506 | if (tgt_device) | 536 | if (tgt_device) |
| 507 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 537 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
| 508 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 538 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
| @@ -532,8 +562,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 532 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 562 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
| 533 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 563 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
| 534 | 564 | ||
| 565 | btrfs_rm_dev_replace_blocked(fs_info); | ||
| 566 | |||
| 535 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 567 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
| 536 | 568 | ||
| 569 | btrfs_rm_dev_replace_unblocked(fs_info); | ||
| 570 | |||
| 537 | /* | 571 | /* |
| 538 | * this is again a consistent state where no dev_replace procedure | 572 | * this is again a consistent state where no dev_replace procedure |
| 539 | * is running, the target device is part of the filesystem, the | 573 | * is running, the target device is part of the filesystem, the |
| @@ -543,6 +577,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 543 | */ | 577 | */ |
| 544 | btrfs_dev_replace_unlock(dev_replace); | 578 | btrfs_dev_replace_unlock(dev_replace); |
| 545 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 579 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 580 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 546 | 581 | ||
| 547 | /* write back the superblocks */ | 582 | /* write back the superblocks */ |
| 548 | trans = btrfs_start_transaction(root, 0); | 583 | trans = btrfs_start_transaction(root, 0); |
| @@ -862,3 +897,31 @@ void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | |||
| 862 | mutex_unlock(&dev_replace->lock_management_lock); | 897 | mutex_unlock(&dev_replace->lock_management_lock); |
| 863 | } | 898 | } |
| 864 | } | 899 | } |
| 900 | |||
| 901 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | ||
| 902 | { | ||
| 903 | percpu_counter_inc(&fs_info->bio_counter); | ||
| 904 | } | ||
| 905 | |||
| 906 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||
| 907 | { | ||
| 908 | percpu_counter_dec(&fs_info->bio_counter); | ||
| 909 | |||
| 910 | if (waitqueue_active(&fs_info->replace_wait)) | ||
| 911 | wake_up(&fs_info->replace_wait); | ||
| 912 | } | ||
| 913 | |||
| 914 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) | ||
| 915 | { | ||
| 916 | DEFINE_WAIT(wait); | ||
| 917 | again: | ||
| 918 | percpu_counter_inc(&fs_info->bio_counter); | ||
| 919 | if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { | ||
| 920 | btrfs_bio_counter_dec(fs_info); | ||
| 921 | wait_event(fs_info->replace_wait, | ||
| 922 | !test_bit(BTRFS_FS_STATE_DEV_REPLACING, | ||
| 923 | &fs_info->fs_state)); | ||
| 924 | goto again; | ||
| 925 | } | ||
| 926 | |||
| 927 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81ea55314b1f..bd0f752b797b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -678,32 +678,31 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
| 678 | 678 | ||
| 679 | fs_info = end_io_wq->info; | 679 | fs_info = end_io_wq->info; |
| 680 | end_io_wq->error = err; | 680 | end_io_wq->error = err; |
| 681 | end_io_wq->work.func = end_workqueue_fn; | 681 | btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); |
| 682 | end_io_wq->work.flags = 0; | ||
| 683 | 682 | ||
| 684 | if (bio->bi_rw & REQ_WRITE) { | 683 | if (bio->bi_rw & REQ_WRITE) { |
| 685 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) | 684 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) |
| 686 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 685 | btrfs_queue_work(fs_info->endio_meta_write_workers, |
| 687 | &end_io_wq->work); | 686 | &end_io_wq->work); |
| 688 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) | 687 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) |
| 689 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | 688 | btrfs_queue_work(fs_info->endio_freespace_worker, |
| 690 | &end_io_wq->work); | 689 | &end_io_wq->work); |
| 691 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 690 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
| 692 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 691 | btrfs_queue_work(fs_info->endio_raid56_workers, |
| 693 | &end_io_wq->work); | 692 | &end_io_wq->work); |
| 694 | else | 693 | else |
| 695 | btrfs_queue_worker(&fs_info->endio_write_workers, | 694 | btrfs_queue_work(fs_info->endio_write_workers, |
| 696 | &end_io_wq->work); | 695 | &end_io_wq->work); |
| 697 | } else { | 696 | } else { |
| 698 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 697 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
| 699 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 698 | btrfs_queue_work(fs_info->endio_raid56_workers, |
| 700 | &end_io_wq->work); | 699 | &end_io_wq->work); |
| 701 | else if (end_io_wq->metadata) | 700 | else if (end_io_wq->metadata) |
| 702 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 701 | btrfs_queue_work(fs_info->endio_meta_workers, |
| 703 | &end_io_wq->work); | 702 | &end_io_wq->work); |
| 704 | else | 703 | else |
| 705 | btrfs_queue_worker(&fs_info->endio_workers, | 704 | btrfs_queue_work(fs_info->endio_workers, |
| 706 | &end_io_wq->work); | 705 | &end_io_wq->work); |
| 707 | } | 706 | } |
| 708 | } | 707 | } |
| 709 | 708 | ||
| @@ -738,7 +737,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
| 738 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | 737 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) |
| 739 | { | 738 | { |
| 740 | unsigned long limit = min_t(unsigned long, | 739 | unsigned long limit = min_t(unsigned long, |
| 741 | info->workers.max_workers, | 740 | info->thread_pool_size, |
| 742 | info->fs_devices->open_devices); | 741 | info->fs_devices->open_devices); |
| 743 | return 256 * limit; | 742 | return 256 * limit; |
| 744 | } | 743 | } |
| @@ -811,11 +810,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 811 | async->submit_bio_start = submit_bio_start; | 810 | async->submit_bio_start = submit_bio_start; |
| 812 | async->submit_bio_done = submit_bio_done; | 811 | async->submit_bio_done = submit_bio_done; |
| 813 | 812 | ||
| 814 | async->work.func = run_one_async_start; | 813 | btrfs_init_work(&async->work, run_one_async_start, |
| 815 | async->work.ordered_func = run_one_async_done; | 814 | run_one_async_done, run_one_async_free); |
| 816 | async->work.ordered_free = run_one_async_free; | ||
| 817 | 815 | ||
| 818 | async->work.flags = 0; | ||
| 819 | async->bio_flags = bio_flags; | 816 | async->bio_flags = bio_flags; |
| 820 | async->bio_offset = bio_offset; | 817 | async->bio_offset = bio_offset; |
| 821 | 818 | ||
| @@ -824,9 +821,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 824 | atomic_inc(&fs_info->nr_async_submits); | 821 | atomic_inc(&fs_info->nr_async_submits); |
| 825 | 822 | ||
| 826 | if (rw & REQ_SYNC) | 823 | if (rw & REQ_SYNC) |
| 827 | btrfs_set_work_high_prio(&async->work); | 824 | btrfs_set_work_high_priority(&async->work); |
| 828 | 825 | ||
| 829 | btrfs_queue_worker(&fs_info->workers, &async->work); | 826 | btrfs_queue_work(fs_info->workers, &async->work); |
| 830 | 827 | ||
| 831 | while (atomic_read(&fs_info->async_submit_draining) && | 828 | while (atomic_read(&fs_info->async_submit_draining) && |
| 832 | atomic_read(&fs_info->nr_async_submits)) { | 829 | atomic_read(&fs_info->nr_async_submits)) { |
| @@ -1149,6 +1146,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 1149 | } | 1146 | } |
| 1150 | } | 1147 | } |
| 1151 | 1148 | ||
| 1149 | static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) | ||
| 1150 | { | ||
| 1151 | struct btrfs_subvolume_writers *writers; | ||
| 1152 | int ret; | ||
| 1153 | |||
| 1154 | writers = kmalloc(sizeof(*writers), GFP_NOFS); | ||
| 1155 | if (!writers) | ||
| 1156 | return ERR_PTR(-ENOMEM); | ||
| 1157 | |||
| 1158 | ret = percpu_counter_init(&writers->counter, 0); | ||
| 1159 | if (ret < 0) { | ||
| 1160 | kfree(writers); | ||
| 1161 | return ERR_PTR(ret); | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | init_waitqueue_head(&writers->wait); | ||
| 1165 | return writers; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | static void | ||
| 1169 | btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) | ||
| 1170 | { | ||
| 1171 | percpu_counter_destroy(&writers->counter); | ||
| 1172 | kfree(writers); | ||
| 1173 | } | ||
| 1174 | |||
| 1152 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1175 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, |
| 1153 | u32 stripesize, struct btrfs_root *root, | 1176 | u32 stripesize, struct btrfs_root *root, |
| 1154 | struct btrfs_fs_info *fs_info, | 1177 | struct btrfs_fs_info *fs_info, |
| @@ -1194,16 +1217,22 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1194 | spin_lock_init(&root->log_extents_lock[1]); | 1217 | spin_lock_init(&root->log_extents_lock[1]); |
| 1195 | mutex_init(&root->objectid_mutex); | 1218 | mutex_init(&root->objectid_mutex); |
| 1196 | mutex_init(&root->log_mutex); | 1219 | mutex_init(&root->log_mutex); |
| 1220 | mutex_init(&root->ordered_extent_mutex); | ||
| 1221 | mutex_init(&root->delalloc_mutex); | ||
| 1197 | init_waitqueue_head(&root->log_writer_wait); | 1222 | init_waitqueue_head(&root->log_writer_wait); |
| 1198 | init_waitqueue_head(&root->log_commit_wait[0]); | 1223 | init_waitqueue_head(&root->log_commit_wait[0]); |
| 1199 | init_waitqueue_head(&root->log_commit_wait[1]); | 1224 | init_waitqueue_head(&root->log_commit_wait[1]); |
| 1225 | INIT_LIST_HEAD(&root->log_ctxs[0]); | ||
| 1226 | INIT_LIST_HEAD(&root->log_ctxs[1]); | ||
| 1200 | atomic_set(&root->log_commit[0], 0); | 1227 | atomic_set(&root->log_commit[0], 0); |
| 1201 | atomic_set(&root->log_commit[1], 0); | 1228 | atomic_set(&root->log_commit[1], 0); |
| 1202 | atomic_set(&root->log_writers, 0); | 1229 | atomic_set(&root->log_writers, 0); |
| 1203 | atomic_set(&root->log_batch, 0); | 1230 | atomic_set(&root->log_batch, 0); |
| 1204 | atomic_set(&root->orphan_inodes, 0); | 1231 | atomic_set(&root->orphan_inodes, 0); |
| 1205 | atomic_set(&root->refs, 1); | 1232 | atomic_set(&root->refs, 1); |
| 1233 | atomic_set(&root->will_be_snapshoted, 0); | ||
| 1206 | root->log_transid = 0; | 1234 | root->log_transid = 0; |
| 1235 | root->log_transid_committed = -1; | ||
| 1207 | root->last_log_commit = 0; | 1236 | root->last_log_commit = 0; |
| 1208 | if (fs_info) | 1237 | if (fs_info) |
| 1209 | extent_io_tree_init(&root->dirty_log_pages, | 1238 | extent_io_tree_init(&root->dirty_log_pages, |
| @@ -1417,6 +1446,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
| 1417 | WARN_ON(root->log_root); | 1446 | WARN_ON(root->log_root); |
| 1418 | root->log_root = log_root; | 1447 | root->log_root = log_root; |
| 1419 | root->log_transid = 0; | 1448 | root->log_transid = 0; |
| 1449 | root->log_transid_committed = -1; | ||
| 1420 | root->last_log_commit = 0; | 1450 | root->last_log_commit = 0; |
| 1421 | return 0; | 1451 | return 0; |
| 1422 | } | 1452 | } |
| @@ -1498,6 +1528,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
| 1498 | int btrfs_init_fs_root(struct btrfs_root *root) | 1528 | int btrfs_init_fs_root(struct btrfs_root *root) |
| 1499 | { | 1529 | { |
| 1500 | int ret; | 1530 | int ret; |
| 1531 | struct btrfs_subvolume_writers *writers; | ||
| 1501 | 1532 | ||
| 1502 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1533 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); |
| 1503 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1534 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), |
| @@ -1507,6 +1538,13 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
| 1507 | goto fail; | 1538 | goto fail; |
| 1508 | } | 1539 | } |
| 1509 | 1540 | ||
| 1541 | writers = btrfs_alloc_subvolume_writers(); | ||
| 1542 | if (IS_ERR(writers)) { | ||
| 1543 | ret = PTR_ERR(writers); | ||
| 1544 | goto fail; | ||
| 1545 | } | ||
| 1546 | root->subv_writers = writers; | ||
| 1547 | |||
| 1510 | btrfs_init_free_ino_ctl(root); | 1548 | btrfs_init_free_ino_ctl(root); |
| 1511 | mutex_init(&root->fs_commit_mutex); | 1549 | mutex_init(&root->fs_commit_mutex); |
| 1512 | spin_lock_init(&root->cache_lock); | 1550 | spin_lock_init(&root->cache_lock); |
| @@ -1514,8 +1552,11 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
| 1514 | 1552 | ||
| 1515 | ret = get_anon_bdev(&root->anon_dev); | 1553 | ret = get_anon_bdev(&root->anon_dev); |
| 1516 | if (ret) | 1554 | if (ret) |
| 1517 | goto fail; | 1555 | goto free_writers; |
| 1518 | return 0; | 1556 | return 0; |
| 1557 | |||
| 1558 | free_writers: | ||
| 1559 | btrfs_free_subvolume_writers(root->subv_writers); | ||
| 1519 | fail: | 1560 | fail: |
| 1520 | kfree(root->free_ino_ctl); | 1561 | kfree(root->free_ino_ctl); |
| 1521 | kfree(root->free_ino_pinned); | 1562 | kfree(root->free_ino_pinned); |
| @@ -1990,23 +2031,22 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, | |||
| 1990 | /* helper to cleanup workers */ | 2031 | /* helper to cleanup workers */ |
| 1991 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | 2032 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) |
| 1992 | { | 2033 | { |
| 1993 | btrfs_stop_workers(&fs_info->generic_worker); | 2034 | btrfs_destroy_workqueue(fs_info->fixup_workers); |
| 1994 | btrfs_stop_workers(&fs_info->fixup_workers); | 2035 | btrfs_destroy_workqueue(fs_info->delalloc_workers); |
| 1995 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2036 | btrfs_destroy_workqueue(fs_info->workers); |
| 1996 | btrfs_stop_workers(&fs_info->workers); | 2037 | btrfs_destroy_workqueue(fs_info->endio_workers); |
| 1997 | btrfs_stop_workers(&fs_info->endio_workers); | 2038 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); |
| 1998 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2039 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); |
| 1999 | btrfs_stop_workers(&fs_info->endio_raid56_workers); | 2040 | btrfs_destroy_workqueue(fs_info->rmw_workers); |
| 2000 | btrfs_stop_workers(&fs_info->rmw_workers); | 2041 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); |
| 2001 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2042 | btrfs_destroy_workqueue(fs_info->endio_write_workers); |
| 2002 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2043 | btrfs_destroy_workqueue(fs_info->endio_freespace_worker); |
| 2003 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2044 | btrfs_destroy_workqueue(fs_info->submit_workers); |
| 2004 | btrfs_stop_workers(&fs_info->submit_workers); | 2045 | btrfs_destroy_workqueue(fs_info->delayed_workers); |
| 2005 | btrfs_stop_workers(&fs_info->delayed_workers); | 2046 | btrfs_destroy_workqueue(fs_info->caching_workers); |
| 2006 | btrfs_stop_workers(&fs_info->caching_workers); | 2047 | btrfs_destroy_workqueue(fs_info->readahead_workers); |
| 2007 | btrfs_stop_workers(&fs_info->readahead_workers); | 2048 | btrfs_destroy_workqueue(fs_info->flush_workers); |
| 2008 | btrfs_stop_workers(&fs_info->flush_workers); | 2049 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); |
| 2009 | btrfs_stop_workers(&fs_info->qgroup_rescan_workers); | ||
| 2010 | } | 2050 | } |
| 2011 | 2051 | ||
| 2012 | static void free_root_extent_buffers(struct btrfs_root *root) | 2052 | static void free_root_extent_buffers(struct btrfs_root *root) |
| @@ -2097,6 +2137,8 @@ int open_ctree(struct super_block *sb, | |||
| 2097 | int err = -EINVAL; | 2137 | int err = -EINVAL; |
| 2098 | int num_backups_tried = 0; | 2138 | int num_backups_tried = 0; |
| 2099 | int backup_index = 0; | 2139 | int backup_index = 0; |
| 2140 | int max_active; | ||
| 2141 | int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
| 2100 | bool create_uuid_tree; | 2142 | bool create_uuid_tree; |
| 2101 | bool check_uuid_tree; | 2143 | bool check_uuid_tree; |
| 2102 | 2144 | ||
| @@ -2133,10 +2175,16 @@ int open_ctree(struct super_block *sb, | |||
| 2133 | goto fail_dirty_metadata_bytes; | 2175 | goto fail_dirty_metadata_bytes; |
| 2134 | } | 2176 | } |
| 2135 | 2177 | ||
| 2178 | ret = percpu_counter_init(&fs_info->bio_counter, 0); | ||
| 2179 | if (ret) { | ||
| 2180 | err = ret; | ||
| 2181 | goto fail_delalloc_bytes; | ||
| 2182 | } | ||
| 2183 | |||
| 2136 | fs_info->btree_inode = new_inode(sb); | 2184 | fs_info->btree_inode = new_inode(sb); |
| 2137 | if (!fs_info->btree_inode) { | 2185 | if (!fs_info->btree_inode) { |
| 2138 | err = -ENOMEM; | 2186 | err = -ENOMEM; |
| 2139 | goto fail_delalloc_bytes; | 2187 | goto fail_bio_counter; |
| 2140 | } | 2188 | } |
| 2141 | 2189 | ||
| 2142 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 2190 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
| @@ -2159,6 +2207,7 @@ int open_ctree(struct super_block *sb, | |||
| 2159 | spin_lock_init(&fs_info->buffer_lock); | 2207 | spin_lock_init(&fs_info->buffer_lock); |
| 2160 | rwlock_init(&fs_info->tree_mod_log_lock); | 2208 | rwlock_init(&fs_info->tree_mod_log_lock); |
| 2161 | mutex_init(&fs_info->reloc_mutex); | 2209 | mutex_init(&fs_info->reloc_mutex); |
| 2210 | mutex_init(&fs_info->delalloc_root_mutex); | ||
| 2162 | seqlock_init(&fs_info->profiles_lock); | 2211 | seqlock_init(&fs_info->profiles_lock); |
| 2163 | 2212 | ||
| 2164 | init_completion(&fs_info->kobj_unregister); | 2213 | init_completion(&fs_info->kobj_unregister); |
| @@ -2211,6 +2260,7 @@ int open_ctree(struct super_block *sb, | |||
| 2211 | atomic_set(&fs_info->scrub_pause_req, 0); | 2260 | atomic_set(&fs_info->scrub_pause_req, 0); |
| 2212 | atomic_set(&fs_info->scrubs_paused, 0); | 2261 | atomic_set(&fs_info->scrubs_paused, 0); |
| 2213 | atomic_set(&fs_info->scrub_cancel_req, 0); | 2262 | atomic_set(&fs_info->scrub_cancel_req, 0); |
| 2263 | init_waitqueue_head(&fs_info->replace_wait); | ||
| 2214 | init_waitqueue_head(&fs_info->scrub_pause_wait); | 2264 | init_waitqueue_head(&fs_info->scrub_pause_wait); |
| 2215 | fs_info->scrub_workers_refcnt = 0; | 2265 | fs_info->scrub_workers_refcnt = 0; |
| 2216 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2266 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| @@ -2458,104 +2508,68 @@ int open_ctree(struct super_block *sb, | |||
| 2458 | goto fail_alloc; | 2508 | goto fail_alloc; |
| 2459 | } | 2509 | } |
| 2460 | 2510 | ||
| 2461 | btrfs_init_workers(&fs_info->generic_worker, | 2511 | max_active = fs_info->thread_pool_size; |
| 2462 | "genwork", 1, NULL); | ||
| 2463 | |||
| 2464 | btrfs_init_workers(&fs_info->workers, "worker", | ||
| 2465 | fs_info->thread_pool_size, | ||
| 2466 | &fs_info->generic_worker); | ||
| 2467 | 2512 | ||
| 2468 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 2513 | fs_info->workers = |
| 2469 | fs_info->thread_pool_size, NULL); | 2514 | btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, |
| 2515 | max_active, 16); | ||
| 2470 | 2516 | ||
| 2471 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | 2517 | fs_info->delalloc_workers = |
| 2472 | fs_info->thread_pool_size, NULL); | 2518 | btrfs_alloc_workqueue("delalloc", flags, max_active, 2); |
| 2473 | 2519 | ||
| 2474 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2520 | fs_info->flush_workers = |
| 2475 | min_t(u64, fs_devices->num_devices, | 2521 | btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); |
| 2476 | fs_info->thread_pool_size), NULL); | ||
| 2477 | 2522 | ||
| 2478 | btrfs_init_workers(&fs_info->caching_workers, "cache", | 2523 | fs_info->caching_workers = |
| 2479 | fs_info->thread_pool_size, NULL); | 2524 | btrfs_alloc_workqueue("cache", flags, max_active, 0); |
| 2480 | 2525 | ||
| 2481 | /* a higher idle thresh on the submit workers makes it much more | 2526 | /* |
| 2527 | * a higher idle thresh on the submit workers makes it much more | ||
| 2482 | * likely that bios will be send down in a sane order to the | 2528 | * likely that bios will be send down in a sane order to the |
| 2483 | * devices | 2529 | * devices |
| 2484 | */ | 2530 | */ |
| 2485 | fs_info->submit_workers.idle_thresh = 64; | 2531 | fs_info->submit_workers = |
| 2486 | 2532 | btrfs_alloc_workqueue("submit", flags, | |
| 2487 | fs_info->workers.idle_thresh = 16; | 2533 | min_t(u64, fs_devices->num_devices, |
| 2488 | fs_info->workers.ordered = 1; | 2534 | max_active), 64); |
| 2489 | 2535 | ||
| 2490 | fs_info->delalloc_workers.idle_thresh = 2; | 2536 | fs_info->fixup_workers = |
| 2491 | fs_info->delalloc_workers.ordered = 1; | 2537 | btrfs_alloc_workqueue("fixup", flags, 1, 0); |
| 2492 | |||
| 2493 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, | ||
| 2494 | &fs_info->generic_worker); | ||
| 2495 | btrfs_init_workers(&fs_info->endio_workers, "endio", | ||
| 2496 | fs_info->thread_pool_size, | ||
| 2497 | &fs_info->generic_worker); | ||
| 2498 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | ||
| 2499 | fs_info->thread_pool_size, | ||
| 2500 | &fs_info->generic_worker); | ||
| 2501 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | ||
| 2502 | "endio-meta-write", fs_info->thread_pool_size, | ||
| 2503 | &fs_info->generic_worker); | ||
| 2504 | btrfs_init_workers(&fs_info->endio_raid56_workers, | ||
| 2505 | "endio-raid56", fs_info->thread_pool_size, | ||
| 2506 | &fs_info->generic_worker); | ||
| 2507 | btrfs_init_workers(&fs_info->rmw_workers, | ||
| 2508 | "rmw", fs_info->thread_pool_size, | ||
| 2509 | &fs_info->generic_worker); | ||
| 2510 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | ||
| 2511 | fs_info->thread_pool_size, | ||
| 2512 | &fs_info->generic_worker); | ||
| 2513 | btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", | ||
| 2514 | 1, &fs_info->generic_worker); | ||
| 2515 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | ||
| 2516 | fs_info->thread_pool_size, | ||
| 2517 | &fs_info->generic_worker); | ||
| 2518 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
| 2519 | fs_info->thread_pool_size, | ||
| 2520 | &fs_info->generic_worker); | ||
| 2521 | btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, | ||
| 2522 | &fs_info->generic_worker); | ||
| 2523 | 2538 | ||
| 2524 | /* | 2539 | /* |
| 2525 | * endios are largely parallel and should have a very | 2540 | * endios are largely parallel and should have a very |
| 2526 | * low idle thresh | 2541 | * low idle thresh |
| 2527 | */ | 2542 | */ |
| 2528 | fs_info->endio_workers.idle_thresh = 4; | 2543 | fs_info->endio_workers = |
| 2529 | fs_info->endio_meta_workers.idle_thresh = 4; | 2544 | btrfs_alloc_workqueue("endio", flags, max_active, 4); |
| 2530 | fs_info->endio_raid56_workers.idle_thresh = 4; | 2545 | fs_info->endio_meta_workers = |
| 2531 | fs_info->rmw_workers.idle_thresh = 2; | 2546 | btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); |
| 2532 | 2547 | fs_info->endio_meta_write_workers = | |
| 2533 | fs_info->endio_write_workers.idle_thresh = 2; | 2548 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); |
| 2534 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2549 | fs_info->endio_raid56_workers = |
| 2535 | fs_info->readahead_workers.idle_thresh = 2; | 2550 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); |
| 2536 | 2551 | fs_info->rmw_workers = | |
| 2537 | /* | 2552 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); |
| 2538 | * btrfs_start_workers can really only fail because of ENOMEM so just | 2553 | fs_info->endio_write_workers = |
| 2539 | * return -ENOMEM if any of these fail. | 2554 | btrfs_alloc_workqueue("endio-write", flags, max_active, 2); |
| 2540 | */ | 2555 | fs_info->endio_freespace_worker = |
| 2541 | ret = btrfs_start_workers(&fs_info->workers); | 2556 | btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); |
| 2542 | ret |= btrfs_start_workers(&fs_info->generic_worker); | 2557 | fs_info->delayed_workers = |
| 2543 | ret |= btrfs_start_workers(&fs_info->submit_workers); | 2558 | btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); |
| 2544 | ret |= btrfs_start_workers(&fs_info->delalloc_workers); | 2559 | fs_info->readahead_workers = |
| 2545 | ret |= btrfs_start_workers(&fs_info->fixup_workers); | 2560 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); |
| 2546 | ret |= btrfs_start_workers(&fs_info->endio_workers); | 2561 | fs_info->qgroup_rescan_workers = |
| 2547 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); | 2562 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); |
| 2548 | ret |= btrfs_start_workers(&fs_info->rmw_workers); | 2563 | |
| 2549 | ret |= btrfs_start_workers(&fs_info->endio_raid56_workers); | 2564 | if (!(fs_info->workers && fs_info->delalloc_workers && |
| 2550 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); | 2565 | fs_info->submit_workers && fs_info->flush_workers && |
| 2551 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); | 2566 | fs_info->endio_workers && fs_info->endio_meta_workers && |
| 2552 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | 2567 | fs_info->endio_meta_write_workers && |
| 2553 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2568 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && |
| 2554 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2569 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
| 2555 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2570 | fs_info->caching_workers && fs_info->readahead_workers && |
| 2556 | ret |= btrfs_start_workers(&fs_info->flush_workers); | 2571 | fs_info->fixup_workers && fs_info->delayed_workers && |
| 2557 | ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); | 2572 | fs_info->qgroup_rescan_workers)) { |
| 2558 | if (ret) { | ||
| 2559 | err = -ENOMEM; | 2573 | err = -ENOMEM; |
| 2560 | goto fail_sb_buffer; | 2574 | goto fail_sb_buffer; |
| 2561 | } | 2575 | } |
| @@ -2963,6 +2977,8 @@ fail_iput: | |||
| 2963 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2977 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| 2964 | 2978 | ||
| 2965 | iput(fs_info->btree_inode); | 2979 | iput(fs_info->btree_inode); |
| 2980 | fail_bio_counter: | ||
| 2981 | percpu_counter_destroy(&fs_info->bio_counter); | ||
| 2966 | fail_delalloc_bytes: | 2982 | fail_delalloc_bytes: |
| 2967 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 2983 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
| 2968 | fail_dirty_metadata_bytes: | 2984 | fail_dirty_metadata_bytes: |
| @@ -3244,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 3244 | /* send down all the barriers */ | 3260 | /* send down all the barriers */ |
| 3245 | head = &info->fs_devices->devices; | 3261 | head = &info->fs_devices->devices; |
| 3246 | list_for_each_entry_rcu(dev, head, dev_list) { | 3262 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 3263 | if (dev->missing) | ||
| 3264 | continue; | ||
| 3247 | if (!dev->bdev) { | 3265 | if (!dev->bdev) { |
| 3248 | errors_send++; | 3266 | errors_send++; |
| 3249 | continue; | 3267 | continue; |
| @@ -3258,6 +3276,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 3258 | 3276 | ||
| 3259 | /* wait for all the barriers */ | 3277 | /* wait for all the barriers */ |
| 3260 | list_for_each_entry_rcu(dev, head, dev_list) { | 3278 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 3279 | if (dev->missing) | ||
| 3280 | continue; | ||
| 3261 | if (!dev->bdev) { | 3281 | if (!dev->bdev) { |
| 3262 | errors_wait++; | 3282 | errors_wait++; |
| 3263 | continue; | 3283 | continue; |
| @@ -3477,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
| 3477 | root->orphan_block_rsv = NULL; | 3497 | root->orphan_block_rsv = NULL; |
| 3478 | if (root->anon_dev) | 3498 | if (root->anon_dev) |
| 3479 | free_anon_bdev(root->anon_dev); | 3499 | free_anon_bdev(root->anon_dev); |
| 3500 | if (root->subv_writers) | ||
| 3501 | btrfs_free_subvolume_writers(root->subv_writers); | ||
| 3480 | free_extent_buffer(root->node); | 3502 | free_extent_buffer(root->node); |
| 3481 | free_extent_buffer(root->commit_root); | 3503 | free_extent_buffer(root->commit_root); |
| 3482 | kfree(root->free_ino_ctl); | 3504 | kfree(root->free_ino_ctl); |
| @@ -3610,6 +3632,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 3610 | 3632 | ||
| 3611 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | 3633 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); |
| 3612 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 3634 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
| 3635 | percpu_counter_destroy(&fs_info->bio_counter); | ||
| 3613 | bdi_destroy(&fs_info->bdi); | 3636 | bdi_destroy(&fs_info->bdi); |
| 3614 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3637 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
| 3615 | 3638 | ||
| @@ -3791,9 +3814,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | |||
| 3791 | list_move_tail(&root->ordered_root, | 3814 | list_move_tail(&root->ordered_root, |
| 3792 | &fs_info->ordered_roots); | 3815 | &fs_info->ordered_roots); |
| 3793 | 3816 | ||
| 3817 | spin_unlock(&fs_info->ordered_root_lock); | ||
| 3794 | btrfs_destroy_ordered_extents(root); | 3818 | btrfs_destroy_ordered_extents(root); |
| 3795 | 3819 | ||
| 3796 | cond_resched_lock(&fs_info->ordered_root_lock); | 3820 | cond_resched(); |
| 3821 | spin_lock(&fs_info->ordered_root_lock); | ||
| 3797 | } | 3822 | } |
| 3798 | spin_unlock(&fs_info->ordered_root_lock); | 3823 | spin_unlock(&fs_info->ordered_root_lock); |
| 3799 | } | 3824 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32312e09f0f5..c6b6a6e3e735 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -549,7 +549,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 549 | caching_ctl->block_group = cache; | 549 | caching_ctl->block_group = cache; |
| 550 | caching_ctl->progress = cache->key.objectid; | 550 | caching_ctl->progress = cache->key.objectid; |
| 551 | atomic_set(&caching_ctl->count, 1); | 551 | atomic_set(&caching_ctl->count, 1); |
| 552 | caching_ctl->work.func = caching_thread; | 552 | btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); |
| 553 | 553 | ||
| 554 | spin_lock(&cache->lock); | 554 | spin_lock(&cache->lock); |
| 555 | /* | 555 | /* |
| @@ -640,7 +640,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 640 | 640 | ||
| 641 | btrfs_get_block_group(cache); | 641 | btrfs_get_block_group(cache); |
| 642 | 642 | ||
| 643 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); | 643 | btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); |
| 644 | 644 | ||
| 645 | return ret; | 645 | return ret; |
| 646 | } | 646 | } |
| @@ -3971,7 +3971,7 @@ static int can_overcommit(struct btrfs_root *root, | |||
| 3971 | } | 3971 | } |
| 3972 | 3972 | ||
| 3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | 3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, |
| 3974 | unsigned long nr_pages) | 3974 | unsigned long nr_pages, int nr_items) |
| 3975 | { | 3975 | { |
| 3976 | struct super_block *sb = root->fs_info->sb; | 3976 | struct super_block *sb = root->fs_info->sb; |
| 3977 | 3977 | ||
| @@ -3986,9 +3986,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
| 3986 | * the filesystem is readonly(all dirty pages are written to | 3986 | * the filesystem is readonly(all dirty pages are written to |
| 3987 | * the disk). | 3987 | * the disk). |
| 3988 | */ | 3988 | */ |
| 3989 | btrfs_start_delalloc_roots(root->fs_info, 0); | 3989 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); |
| 3990 | if (!current->journal_info) | 3990 | if (!current->journal_info) |
| 3991 | btrfs_wait_ordered_roots(root->fs_info, -1); | 3991 | btrfs_wait_ordered_roots(root->fs_info, nr_items); |
| 3992 | } | 3992 | } |
| 3993 | } | 3993 | } |
| 3994 | 3994 | ||
| @@ -4045,7 +4045,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 4045 | while (delalloc_bytes && loops < 3) { | 4045 | while (delalloc_bytes && loops < 3) { |
| 4046 | max_reclaim = min(delalloc_bytes, to_reclaim); | 4046 | max_reclaim = min(delalloc_bytes, to_reclaim); |
| 4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
| 4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages); | 4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages, items); |
| 4049 | /* | 4049 | /* |
| 4050 | * We need to wait for the async pages to actually start before | 4050 | * We need to wait for the async pages to actually start before |
| 4051 | * we do anything. | 4051 | * we do anything. |
| @@ -4112,13 +4112,9 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 4112 | goto commit; | 4112 | goto commit; |
| 4113 | 4113 | ||
| 4114 | /* See if there is enough pinned space to make this reservation */ | 4114 | /* See if there is enough pinned space to make this reservation */ |
| 4115 | spin_lock(&space_info->lock); | ||
| 4116 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4115 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4117 | bytes) >= 0) { | 4116 | bytes) >= 0) |
| 4118 | spin_unlock(&space_info->lock); | ||
| 4119 | goto commit; | 4117 | goto commit; |
| 4120 | } | ||
| 4121 | spin_unlock(&space_info->lock); | ||
| 4122 | 4118 | ||
| 4123 | /* | 4119 | /* |
| 4124 | * See if there is some space in the delayed insertion reservation for | 4120 | * See if there is some space in the delayed insertion reservation for |
| @@ -4127,16 +4123,13 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 4127 | if (space_info != delayed_rsv->space_info) | 4123 | if (space_info != delayed_rsv->space_info) |
| 4128 | return -ENOSPC; | 4124 | return -ENOSPC; |
| 4129 | 4125 | ||
| 4130 | spin_lock(&space_info->lock); | ||
| 4131 | spin_lock(&delayed_rsv->lock); | 4126 | spin_lock(&delayed_rsv->lock); |
| 4132 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4127 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4133 | bytes - delayed_rsv->size) >= 0) { | 4128 | bytes - delayed_rsv->size) >= 0) { |
| 4134 | spin_unlock(&delayed_rsv->lock); | 4129 | spin_unlock(&delayed_rsv->lock); |
| 4135 | spin_unlock(&space_info->lock); | ||
| 4136 | return -ENOSPC; | 4130 | return -ENOSPC; |
| 4137 | } | 4131 | } |
| 4138 | spin_unlock(&delayed_rsv->lock); | 4132 | spin_unlock(&delayed_rsv->lock); |
| 4139 | spin_unlock(&space_info->lock); | ||
| 4140 | 4133 | ||
| 4141 | commit: | 4134 | commit: |
| 4142 | trans = btrfs_join_transaction(root); | 4135 | trans = btrfs_join_transaction(root); |
| @@ -4181,7 +4174,7 @@ static int flush_space(struct btrfs_root *root, | |||
| 4181 | break; | 4174 | break; |
| 4182 | case FLUSH_DELALLOC: | 4175 | case FLUSH_DELALLOC: |
| 4183 | case FLUSH_DELALLOC_WAIT: | 4176 | case FLUSH_DELALLOC_WAIT: |
| 4184 | shrink_delalloc(root, num_bytes, orig_bytes, | 4177 | shrink_delalloc(root, num_bytes * 2, orig_bytes, |
| 4185 | state == FLUSH_DELALLOC_WAIT); | 4178 | state == FLUSH_DELALLOC_WAIT); |
| 4186 | break; | 4179 | break; |
| 4187 | case ALLOC_CHUNK: | 4180 | case ALLOC_CHUNK: |
| @@ -8938,3 +8931,38 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
| 8938 | range->len = trimmed; | 8931 | range->len = trimmed; |
| 8939 | return ret; | 8932 | return ret; |
| 8940 | } | 8933 | } |
| 8934 | |||
| 8935 | /* | ||
| 8936 | * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(), | ||
| 8937 | * they are used to prevent the some tasks writing data into the page cache | ||
| 8938 | * by nocow before the subvolume is snapshoted, but flush the data into | ||
| 8939 | * the disk after the snapshot creation. | ||
| 8940 | */ | ||
| 8941 | void btrfs_end_nocow_write(struct btrfs_root *root) | ||
| 8942 | { | ||
| 8943 | percpu_counter_dec(&root->subv_writers->counter); | ||
| 8944 | /* | ||
| 8945 | * Make sure counter is updated before we wake up | ||
| 8946 | * waiters. | ||
| 8947 | */ | ||
| 8948 | smp_mb(); | ||
| 8949 | if (waitqueue_active(&root->subv_writers->wait)) | ||
| 8950 | wake_up(&root->subv_writers->wait); | ||
| 8951 | } | ||
| 8952 | |||
| 8953 | int btrfs_start_nocow_write(struct btrfs_root *root) | ||
| 8954 | { | ||
| 8955 | if (unlikely(atomic_read(&root->will_be_snapshoted))) | ||
| 8956 | return 0; | ||
| 8957 | |||
| 8958 | percpu_counter_inc(&root->subv_writers->counter); | ||
| 8959 | /* | ||
| 8960 | * Make sure counter is updated before we check for snapshot creation. | ||
| 8961 | */ | ||
| 8962 | smp_mb(); | ||
| 8963 | if (unlikely(atomic_read(&root->will_be_snapshoted))) { | ||
| 8964 | btrfs_end_nocow_write(root); | ||
| 8965 | return 0; | ||
| 8966 | } | ||
| 8967 | return 1; | ||
| 8968 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 85bbd01f1271..ae69a00387e7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -229,12 +229,14 @@ void free_extent_state(struct extent_state *state) | |||
| 229 | } | 229 | } |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 232 | static struct rb_node *tree_insert(struct rb_root *root, |
| 233 | struct rb_node *search_start, | ||
| 234 | u64 offset, | ||
| 233 | struct rb_node *node, | 235 | struct rb_node *node, |
| 234 | struct rb_node ***p_in, | 236 | struct rb_node ***p_in, |
| 235 | struct rb_node **parent_in) | 237 | struct rb_node **parent_in) |
| 236 | { | 238 | { |
| 237 | struct rb_node **p = &root->rb_node; | 239 | struct rb_node **p; |
| 238 | struct rb_node *parent = NULL; | 240 | struct rb_node *parent = NULL; |
| 239 | struct tree_entry *entry; | 241 | struct tree_entry *entry; |
| 240 | 242 | ||
| @@ -244,6 +246,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
| 244 | goto do_insert; | 246 | goto do_insert; |
| 245 | } | 247 | } |
| 246 | 248 | ||
| 249 | p = search_start ? &search_start : &root->rb_node; | ||
| 247 | while (*p) { | 250 | while (*p) { |
| 248 | parent = *p; | 251 | parent = *p; |
| 249 | entry = rb_entry(parent, struct tree_entry, rb_node); | 252 | entry = rb_entry(parent, struct tree_entry, rb_node); |
| @@ -430,7 +433,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 430 | 433 | ||
| 431 | set_state_bits(tree, state, bits); | 434 | set_state_bits(tree, state, bits); |
| 432 | 435 | ||
| 433 | node = tree_insert(&tree->state, end, &state->rb_node, p, parent); | 436 | node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent); |
| 434 | if (node) { | 437 | if (node) { |
| 435 | struct extent_state *found; | 438 | struct extent_state *found; |
| 436 | found = rb_entry(node, struct extent_state, rb_node); | 439 | found = rb_entry(node, struct extent_state, rb_node); |
| @@ -477,8 +480,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 477 | prealloc->state = orig->state; | 480 | prealloc->state = orig->state; |
| 478 | orig->start = split; | 481 | orig->start = split; |
| 479 | 482 | ||
| 480 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node, | 483 | node = tree_insert(&tree->state, &orig->rb_node, prealloc->end, |
| 481 | NULL, NULL); | 484 | &prealloc->rb_node, NULL, NULL); |
| 482 | if (node) { | 485 | if (node) { |
| 483 | free_extent_state(prealloc); | 486 | free_extent_state(prealloc); |
| 484 | return -EEXIST; | 487 | return -EEXIST; |
| @@ -2757,7 +2760,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, | |||
| 2757 | 2760 | ||
| 2758 | if (em_cached && *em_cached) { | 2761 | if (em_cached && *em_cached) { |
| 2759 | em = *em_cached; | 2762 | em = *em_cached; |
| 2760 | if (em->in_tree && start >= em->start && | 2763 | if (extent_map_in_tree(em) && start >= em->start && |
| 2761 | start < extent_map_end(em)) { | 2764 | start < extent_map_end(em)) { |
| 2762 | atomic_inc(&em->refs); | 2765 | atomic_inc(&em->refs); |
| 2763 | return em; | 2766 | return em; |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 996ad56b57db..1874aee69c86 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -51,7 +51,7 @@ struct extent_map *alloc_extent_map(void) | |||
| 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); | 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); |
| 52 | if (!em) | 52 | if (!em) |
| 53 | return NULL; | 53 | return NULL; |
| 54 | em->in_tree = 0; | 54 | RB_CLEAR_NODE(&em->rb_node); |
| 55 | em->flags = 0; | 55 | em->flags = 0; |
| 56 | em->compress_type = BTRFS_COMPRESS_NONE; | 56 | em->compress_type = BTRFS_COMPRESS_NONE; |
| 57 | em->generation = 0; | 57 | em->generation = 0; |
| @@ -73,7 +73,7 @@ void free_extent_map(struct extent_map *em) | |||
| 73 | return; | 73 | return; |
| 74 | WARN_ON(atomic_read(&em->refs) == 0); | 74 | WARN_ON(atomic_read(&em->refs) == 0); |
| 75 | if (atomic_dec_and_test(&em->refs)) { | 75 | if (atomic_dec_and_test(&em->refs)) { |
| 76 | WARN_ON(em->in_tree); | 76 | WARN_ON(extent_map_in_tree(em)); |
| 77 | WARN_ON(!list_empty(&em->list)); | 77 | WARN_ON(!list_empty(&em->list)); |
| 78 | kmem_cache_free(extent_map_cache, em); | 78 | kmem_cache_free(extent_map_cache, em); |
| 79 | } | 79 | } |
| @@ -99,8 +99,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
| 99 | parent = *p; | 99 | parent = *p; |
| 100 | entry = rb_entry(parent, struct extent_map, rb_node); | 100 | entry = rb_entry(parent, struct extent_map, rb_node); |
| 101 | 101 | ||
| 102 | WARN_ON(!entry->in_tree); | ||
| 103 | |||
| 104 | if (em->start < entry->start) | 102 | if (em->start < entry->start) |
| 105 | p = &(*p)->rb_left; | 103 | p = &(*p)->rb_left; |
| 106 | else if (em->start >= extent_map_end(entry)) | 104 | else if (em->start >= extent_map_end(entry)) |
| @@ -128,7 +126,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
| 128 | if (end > entry->start && em->start < extent_map_end(entry)) | 126 | if (end > entry->start && em->start < extent_map_end(entry)) |
| 129 | return -EEXIST; | 127 | return -EEXIST; |
| 130 | 128 | ||
| 131 | em->in_tree = 1; | ||
| 132 | rb_link_node(&em->rb_node, orig_parent, p); | 129 | rb_link_node(&em->rb_node, orig_parent, p); |
| 133 | rb_insert_color(&em->rb_node, root); | 130 | rb_insert_color(&em->rb_node, root); |
| 134 | return 0; | 131 | return 0; |
| @@ -153,8 +150,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
| 153 | prev = n; | 150 | prev = n; |
| 154 | prev_entry = entry; | 151 | prev_entry = entry; |
| 155 | 152 | ||
| 156 | WARN_ON(!entry->in_tree); | ||
| 157 | |||
| 158 | if (offset < entry->start) | 153 | if (offset < entry->start) |
| 159 | n = n->rb_left; | 154 | n = n->rb_left; |
| 160 | else if (offset >= extent_map_end(entry)) | 155 | else if (offset >= extent_map_end(entry)) |
| @@ -240,12 +235,12 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 240 | em->len += merge->len; | 235 | em->len += merge->len; |
| 241 | em->block_len += merge->block_len; | 236 | em->block_len += merge->block_len; |
| 242 | em->block_start = merge->block_start; | 237 | em->block_start = merge->block_start; |
| 243 | merge->in_tree = 0; | ||
| 244 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; | 238 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; |
| 245 | em->mod_start = merge->mod_start; | 239 | em->mod_start = merge->mod_start; |
| 246 | em->generation = max(em->generation, merge->generation); | 240 | em->generation = max(em->generation, merge->generation); |
| 247 | 241 | ||
| 248 | rb_erase(&merge->rb_node, &tree->map); | 242 | rb_erase(&merge->rb_node, &tree->map); |
| 243 | RB_CLEAR_NODE(&merge->rb_node); | ||
| 249 | free_extent_map(merge); | 244 | free_extent_map(merge); |
| 250 | } | 245 | } |
| 251 | } | 246 | } |
| @@ -257,7 +252,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 257 | em->len += merge->len; | 252 | em->len += merge->len; |
| 258 | em->block_len += merge->block_len; | 253 | em->block_len += merge->block_len; |
| 259 | rb_erase(&merge->rb_node, &tree->map); | 254 | rb_erase(&merge->rb_node, &tree->map); |
| 260 | merge->in_tree = 0; | 255 | RB_CLEAR_NODE(&merge->rb_node); |
| 261 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; | 256 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; |
| 262 | em->generation = max(em->generation, merge->generation); | 257 | em->generation = max(em->generation, merge->generation); |
| 263 | free_extent_map(merge); | 258 | free_extent_map(merge); |
| @@ -319,7 +314,21 @@ out: | |||
| 319 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) | 314 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) |
| 320 | { | 315 | { |
| 321 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | 316 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
| 322 | if (em->in_tree) | 317 | if (extent_map_in_tree(em)) |
| 318 | try_merge_map(tree, em); | ||
| 319 | } | ||
| 320 | |||
| 321 | static inline void setup_extent_mapping(struct extent_map_tree *tree, | ||
| 322 | struct extent_map *em, | ||
| 323 | int modified) | ||
| 324 | { | ||
| 325 | atomic_inc(&em->refs); | ||
| 326 | em->mod_start = em->start; | ||
| 327 | em->mod_len = em->len; | ||
| 328 | |||
| 329 | if (modified) | ||
| 330 | list_move(&em->list, &tree->modified_extents); | ||
| 331 | else | ||
| 323 | try_merge_map(tree, em); | 332 | try_merge_map(tree, em); |
| 324 | } | 333 | } |
| 325 | 334 | ||
| @@ -342,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 342 | if (ret) | 351 | if (ret) |
| 343 | goto out; | 352 | goto out; |
| 344 | 353 | ||
| 345 | atomic_inc(&em->refs); | 354 | setup_extent_mapping(tree, em, modified); |
| 346 | |||
| 347 | em->mod_start = em->start; | ||
| 348 | em->mod_len = em->len; | ||
| 349 | |||
| 350 | if (modified) | ||
| 351 | list_move(&em->list, &tree->modified_extents); | ||
| 352 | else | ||
| 353 | try_merge_map(tree, em); | ||
| 354 | out: | 355 | out: |
| 355 | return ret; | 356 | return ret; |
| 356 | } | 357 | } |
| @@ -434,6 +435,21 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 434 | rb_erase(&em->rb_node, &tree->map); | 435 | rb_erase(&em->rb_node, &tree->map); |
| 435 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | 436 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) |
| 436 | list_del_init(&em->list); | 437 | list_del_init(&em->list); |
| 437 | em->in_tree = 0; | 438 | RB_CLEAR_NODE(&em->rb_node); |
| 438 | return ret; | 439 | return ret; |
| 439 | } | 440 | } |
| 441 | |||
| 442 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
| 443 | struct extent_map *cur, | ||
| 444 | struct extent_map *new, | ||
| 445 | int modified) | ||
| 446 | { | ||
| 447 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags)); | ||
| 448 | ASSERT(extent_map_in_tree(cur)); | ||
| 449 | if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags)) | ||
| 450 | list_del_init(&cur->list); | ||
| 451 | rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map); | ||
| 452 | RB_CLEAR_NODE(&cur->rb_node); | ||
| 453 | |||
| 454 | setup_extent_mapping(tree, new, modified); | ||
| 455 | } | ||
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 93fba716d7f8..e7fd8a56a140 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -33,7 +33,6 @@ struct extent_map { | |||
| 33 | unsigned long flags; | 33 | unsigned long flags; |
| 34 | struct block_device *bdev; | 34 | struct block_device *bdev; |
| 35 | atomic_t refs; | 35 | atomic_t refs; |
| 36 | unsigned int in_tree; | ||
| 37 | unsigned int compress_type; | 36 | unsigned int compress_type; |
| 38 | struct list_head list; | 37 | struct list_head list; |
| 39 | }; | 38 | }; |
| @@ -44,6 +43,11 @@ struct extent_map_tree { | |||
| 44 | rwlock_t lock; | 43 | rwlock_t lock; |
| 45 | }; | 44 | }; |
| 46 | 45 | ||
| 46 | static inline int extent_map_in_tree(const struct extent_map *em) | ||
| 47 | { | ||
| 48 | return !RB_EMPTY_NODE(&em->rb_node); | ||
| 49 | } | ||
| 50 | |||
| 47 | static inline u64 extent_map_end(struct extent_map *em) | 51 | static inline u64 extent_map_end(struct extent_map *em) |
| 48 | { | 52 | { |
| 49 | if (em->start + em->len < em->start) | 53 | if (em->start + em->len < em->start) |
| @@ -64,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 64 | int add_extent_mapping(struct extent_map_tree *tree, | 68 | int add_extent_mapping(struct extent_map_tree *tree, |
| 65 | struct extent_map *em, int modified); | 69 | struct extent_map *em, int modified); |
| 66 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); | 70 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); |
| 71 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
| 72 | struct extent_map *cur, | ||
| 73 | struct extent_map *new, | ||
| 74 | int modified); | ||
| 67 | 75 | ||
| 68 | struct extent_map *alloc_extent_map(void); | 76 | struct extent_map *alloc_extent_map(void); |
| 69 | void free_extent_map(struct extent_map *em); | 77 | void free_extent_map(struct extent_map *em); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0165b8672f09..e1ffb1e22898 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -591,7 +591,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); | 592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); |
| 593 | modified = !list_empty(&em->list); | 593 | modified = !list_empty(&em->list); |
| 594 | remove_extent_mapping(em_tree, em); | ||
| 595 | if (no_splits) | 594 | if (no_splits) |
| 596 | goto next; | 595 | goto next; |
| 597 | 596 | ||
| @@ -622,8 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 622 | split->bdev = em->bdev; | 621 | split->bdev = em->bdev; |
| 623 | split->flags = flags; | 622 | split->flags = flags; |
| 624 | split->compress_type = em->compress_type; | 623 | split->compress_type = em->compress_type; |
| 625 | ret = add_extent_mapping(em_tree, split, modified); | 624 | replace_extent_mapping(em_tree, em, split, modified); |
| 626 | BUG_ON(ret); /* Logic error */ | ||
| 627 | free_extent_map(split); | 625 | free_extent_map(split); |
| 628 | split = split2; | 626 | split = split2; |
| 629 | split2 = NULL; | 627 | split2 = NULL; |
| @@ -661,12 +659,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 661 | split->orig_block_len = 0; | 659 | split->orig_block_len = 0; |
| 662 | } | 660 | } |
| 663 | 661 | ||
| 664 | ret = add_extent_mapping(em_tree, split, modified); | 662 | if (extent_map_in_tree(em)) { |
| 665 | BUG_ON(ret); /* Logic error */ | 663 | replace_extent_mapping(em_tree, em, split, |
| 664 | modified); | ||
| 665 | } else { | ||
| 666 | ret = add_extent_mapping(em_tree, split, | ||
| 667 | modified); | ||
| 668 | ASSERT(ret == 0); /* Logic error */ | ||
| 669 | } | ||
| 666 | free_extent_map(split); | 670 | free_extent_map(split); |
| 667 | split = NULL; | 671 | split = NULL; |
| 668 | } | 672 | } |
| 669 | next: | 673 | next: |
| 674 | if (extent_map_in_tree(em)) | ||
| 675 | remove_extent_mapping(em_tree, em); | ||
| 670 | write_unlock(&em_tree->lock); | 676 | write_unlock(&em_tree->lock); |
| 671 | 677 | ||
| 672 | /* once for us */ | 678 | /* once for us */ |
| @@ -720,7 +726,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 720 | if (drop_cache) | 726 | if (drop_cache) |
| 721 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 727 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
| 722 | 728 | ||
| 723 | if (start >= BTRFS_I(inode)->disk_i_size) | 729 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
| 724 | modify_tree = 0; | 730 | modify_tree = 0; |
| 725 | 731 | ||
| 726 | while (1) { | 732 | while (1) { |
| @@ -798,7 +804,10 @@ next_slot: | |||
| 798 | */ | 804 | */ |
| 799 | if (start > key.offset && end < extent_end) { | 805 | if (start > key.offset && end < extent_end) { |
| 800 | BUG_ON(del_nr > 0); | 806 | BUG_ON(del_nr > 0); |
| 801 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 807 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 808 | ret = -EINVAL; | ||
| 809 | break; | ||
| 810 | } | ||
| 802 | 811 | ||
| 803 | memcpy(&new_key, &key, sizeof(new_key)); | 812 | memcpy(&new_key, &key, sizeof(new_key)); |
| 804 | new_key.offset = start; | 813 | new_key.offset = start; |
| @@ -841,7 +850,10 @@ next_slot: | |||
| 841 | * | -------- extent -------- | | 850 | * | -------- extent -------- | |
| 842 | */ | 851 | */ |
| 843 | if (start <= key.offset && end < extent_end) { | 852 | if (start <= key.offset && end < extent_end) { |
| 844 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 853 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 854 | ret = -EINVAL; | ||
| 855 | break; | ||
| 856 | } | ||
| 845 | 857 | ||
| 846 | memcpy(&new_key, &key, sizeof(new_key)); | 858 | memcpy(&new_key, &key, sizeof(new_key)); |
| 847 | new_key.offset = end; | 859 | new_key.offset = end; |
| @@ -864,7 +876,10 @@ next_slot: | |||
| 864 | */ | 876 | */ |
| 865 | if (start > key.offset && end >= extent_end) { | 877 | if (start > key.offset && end >= extent_end) { |
| 866 | BUG_ON(del_nr > 0); | 878 | BUG_ON(del_nr > 0); |
| 867 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 879 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 880 | ret = -EINVAL; | ||
| 881 | break; | ||
| 882 | } | ||
| 868 | 883 | ||
| 869 | btrfs_set_file_extent_num_bytes(leaf, fi, | 884 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 870 | start - key.offset); | 885 | start - key.offset); |
| @@ -938,34 +953,42 @@ next_slot: | |||
| 938 | * Set path->slots[0] to first slot, so that after the delete | 953 | * Set path->slots[0] to first slot, so that after the delete |
| 939 | * if items are move off from our leaf to its immediate left or | 954 | * if items are move off from our leaf to its immediate left or |
| 940 | * right neighbor leafs, we end up with a correct and adjusted | 955 | * right neighbor leafs, we end up with a correct and adjusted |
| 941 | * path->slots[0] for our insertion. | 956 | * path->slots[0] for our insertion (if replace_extent != 0). |
| 942 | */ | 957 | */ |
| 943 | path->slots[0] = del_slot; | 958 | path->slots[0] = del_slot; |
| 944 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 959 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
| 945 | if (ret) | 960 | if (ret) |
| 946 | btrfs_abort_transaction(trans, root, ret); | 961 | btrfs_abort_transaction(trans, root, ret); |
| 962 | } | ||
| 947 | 963 | ||
| 948 | leaf = path->nodes[0]; | 964 | leaf = path->nodes[0]; |
| 949 | /* | 965 | /* |
| 950 | * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that | 966 | * If btrfs_del_items() was called, it might have deleted a leaf, in |
| 951 | * is, its contents got pushed to its neighbors), in which case | 967 | * which case it unlocked our path, so check path->locks[0] matches a |
| 952 | * it means path->locks[0] == 0 | 968 | * write lock. |
| 953 | */ | 969 | */ |
| 954 | if (!ret && replace_extent && leafs_visited == 1 && | 970 | if (!ret && replace_extent && leafs_visited == 1 && |
| 955 | path->locks[0] && | 971 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || |
| 956 | btrfs_leaf_free_space(root, leaf) >= | 972 | path->locks[0] == BTRFS_WRITE_LOCK) && |
| 957 | sizeof(struct btrfs_item) + extent_item_size) { | 973 | btrfs_leaf_free_space(root, leaf) >= |
| 958 | 974 | sizeof(struct btrfs_item) + extent_item_size) { | |
| 959 | key.objectid = ino; | 975 | |
| 960 | key.type = BTRFS_EXTENT_DATA_KEY; | 976 | key.objectid = ino; |
| 961 | key.offset = start; | 977 | key.type = BTRFS_EXTENT_DATA_KEY; |
| 962 | setup_items_for_insert(root, path, &key, | 978 | key.offset = start; |
| 963 | &extent_item_size, | 979 | if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { |
| 964 | extent_item_size, | 980 | struct btrfs_key slot_key; |
| 965 | sizeof(struct btrfs_item) + | 981 | |
| 966 | extent_item_size, 1); | 982 | btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); |
| 967 | *key_inserted = 1; | 983 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) |
| 984 | path->slots[0]++; | ||
| 968 | } | 985 | } |
| 986 | setup_items_for_insert(root, path, &key, | ||
| 987 | &extent_item_size, | ||
| 988 | extent_item_size, | ||
| 989 | sizeof(struct btrfs_item) + | ||
| 990 | extent_item_size, 1); | ||
| 991 | *key_inserted = 1; | ||
| 969 | } | 992 | } |
| 970 | 993 | ||
| 971 | if (!replace_extent || !(*key_inserted)) | 994 | if (!replace_extent || !(*key_inserted)) |
| @@ -1346,11 +1369,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
| 1346 | struct btrfs_ordered_extent *ordered; | 1369 | struct btrfs_ordered_extent *ordered; |
| 1347 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 1370 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
| 1348 | start_pos, last_pos, 0, cached_state); | 1371 | start_pos, last_pos, 0, cached_state); |
| 1349 | ordered = btrfs_lookup_first_ordered_extent(inode, last_pos); | 1372 | ordered = btrfs_lookup_ordered_range(inode, start_pos, |
| 1373 | last_pos - start_pos + 1); | ||
| 1350 | if (ordered && | 1374 | if (ordered && |
| 1351 | ordered->file_offset + ordered->len > start_pos && | 1375 | ordered->file_offset + ordered->len > start_pos && |
| 1352 | ordered->file_offset <= last_pos) { | 1376 | ordered->file_offset <= last_pos) { |
| 1353 | btrfs_put_ordered_extent(ordered); | ||
| 1354 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1377 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
| 1355 | start_pos, last_pos, | 1378 | start_pos, last_pos, |
| 1356 | cached_state, GFP_NOFS); | 1379 | cached_state, GFP_NOFS); |
| @@ -1358,12 +1381,9 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
| 1358 | unlock_page(pages[i]); | 1381 | unlock_page(pages[i]); |
| 1359 | page_cache_release(pages[i]); | 1382 | page_cache_release(pages[i]); |
| 1360 | } | 1383 | } |
| 1361 | ret = btrfs_wait_ordered_range(inode, start_pos, | 1384 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 1362 | last_pos - start_pos + 1); | 1385 | btrfs_put_ordered_extent(ordered); |
| 1363 | if (ret) | 1386 | return -EAGAIN; |
| 1364 | return ret; | ||
| 1365 | else | ||
| 1366 | return -EAGAIN; | ||
| 1367 | } | 1387 | } |
| 1368 | if (ordered) | 1388 | if (ordered) |
| 1369 | btrfs_put_ordered_extent(ordered); | 1389 | btrfs_put_ordered_extent(ordered); |
| @@ -1396,8 +1416,12 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
| 1396 | u64 num_bytes; | 1416 | u64 num_bytes; |
| 1397 | int ret; | 1417 | int ret; |
| 1398 | 1418 | ||
| 1419 | ret = btrfs_start_nocow_write(root); | ||
| 1420 | if (!ret) | ||
| 1421 | return -ENOSPC; | ||
| 1422 | |||
| 1399 | lockstart = round_down(pos, root->sectorsize); | 1423 | lockstart = round_down(pos, root->sectorsize); |
| 1400 | lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; | 1424 | lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; |
| 1401 | 1425 | ||
| 1402 | while (1) { | 1426 | while (1) { |
| 1403 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1427 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
| @@ -1415,12 +1439,10 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
| 1415 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1439 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
| 1416 | if (ret <= 0) { | 1440 | if (ret <= 0) { |
| 1417 | ret = 0; | 1441 | ret = 0; |
| 1442 | btrfs_end_nocow_write(root); | ||
| 1418 | } else { | 1443 | } else { |
| 1419 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 1444 | *write_bytes = min_t(size_t, *write_bytes , |
| 1420 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1445 | num_bytes - pos + lockstart); |
| 1421 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, | ||
| 1422 | NULL, GFP_NOFS); | ||
| 1423 | *write_bytes = min_t(size_t, *write_bytes, num_bytes); | ||
| 1424 | } | 1446 | } |
| 1425 | 1447 | ||
| 1426 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1448 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
| @@ -1510,6 +1532,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1510 | if (!only_release_metadata) | 1532 | if (!only_release_metadata) |
| 1511 | btrfs_free_reserved_data_space(inode, | 1533 | btrfs_free_reserved_data_space(inode, |
| 1512 | reserve_bytes); | 1534 | reserve_bytes); |
| 1535 | else | ||
| 1536 | btrfs_end_nocow_write(root); | ||
| 1513 | break; | 1537 | break; |
| 1514 | } | 1538 | } |
| 1515 | 1539 | ||
| @@ -1598,6 +1622,9 @@ again: | |||
| 1598 | } | 1622 | } |
| 1599 | 1623 | ||
| 1600 | release_bytes = 0; | 1624 | release_bytes = 0; |
| 1625 | if (only_release_metadata) | ||
| 1626 | btrfs_end_nocow_write(root); | ||
| 1627 | |||
| 1601 | if (only_release_metadata && copied > 0) { | 1628 | if (only_release_metadata && copied > 0) { |
| 1602 | u64 lockstart = round_down(pos, root->sectorsize); | 1629 | u64 lockstart = round_down(pos, root->sectorsize); |
| 1603 | u64 lockend = lockstart + | 1630 | u64 lockend = lockstart + |
| @@ -1624,10 +1651,12 @@ again: | |||
| 1624 | kfree(pages); | 1651 | kfree(pages); |
| 1625 | 1652 | ||
| 1626 | if (release_bytes) { | 1653 | if (release_bytes) { |
| 1627 | if (only_release_metadata) | 1654 | if (only_release_metadata) { |
| 1655 | btrfs_end_nocow_write(root); | ||
| 1628 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1656 | btrfs_delalloc_release_metadata(inode, release_bytes); |
| 1629 | else | 1657 | } else { |
| 1630 | btrfs_delalloc_release_space(inode, release_bytes); | 1658 | btrfs_delalloc_release_space(inode, release_bytes); |
| 1659 | } | ||
| 1631 | } | 1660 | } |
| 1632 | 1661 | ||
| 1633 | return num_written ? num_written : ret; | 1662 | return num_written ? num_written : ret; |
| @@ -1797,7 +1826,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1797 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1826 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
| 1798 | if (num_written > 0) { | 1827 | if (num_written > 0) { |
| 1799 | err = generic_write_sync(file, pos, num_written); | 1828 | err = generic_write_sync(file, pos, num_written); |
| 1800 | if (err < 0 && num_written > 0) | 1829 | if (err < 0) |
| 1801 | num_written = err; | 1830 | num_written = err; |
| 1802 | } | 1831 | } |
| 1803 | 1832 | ||
| @@ -1856,8 +1885,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1856 | struct dentry *dentry = file->f_path.dentry; | 1885 | struct dentry *dentry = file->f_path.dentry; |
| 1857 | struct inode *inode = dentry->d_inode; | 1886 | struct inode *inode = dentry->d_inode; |
| 1858 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1887 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1859 | int ret = 0; | ||
| 1860 | struct btrfs_trans_handle *trans; | 1888 | struct btrfs_trans_handle *trans; |
| 1889 | struct btrfs_log_ctx ctx; | ||
| 1890 | int ret = 0; | ||
| 1861 | bool full_sync = 0; | 1891 | bool full_sync = 0; |
| 1862 | 1892 | ||
| 1863 | trace_btrfs_sync_file(file, datasync); | 1893 | trace_btrfs_sync_file(file, datasync); |
| @@ -1951,7 +1981,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1951 | } | 1981 | } |
| 1952 | trans->sync = true; | 1982 | trans->sync = true; |
| 1953 | 1983 | ||
| 1954 | ret = btrfs_log_dentry_safe(trans, root, dentry); | 1984 | btrfs_init_log_ctx(&ctx); |
| 1985 | |||
| 1986 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); | ||
| 1955 | if (ret < 0) { | 1987 | if (ret < 0) { |
| 1956 | /* Fallthrough and commit/free transaction. */ | 1988 | /* Fallthrough and commit/free transaction. */ |
| 1957 | ret = 1; | 1989 | ret = 1; |
| @@ -1971,7 +2003,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1971 | 2003 | ||
| 1972 | if (ret != BTRFS_NO_LOG_SYNC) { | 2004 | if (ret != BTRFS_NO_LOG_SYNC) { |
| 1973 | if (!ret) { | 2005 | if (!ret) { |
| 1974 | ret = btrfs_sync_log(trans, root); | 2006 | ret = btrfs_sync_log(trans, root, &ctx); |
| 1975 | if (!ret) { | 2007 | if (!ret) { |
| 1976 | ret = btrfs_end_transaction(trans, root); | 2008 | ret = btrfs_end_transaction(trans, root); |
| 1977 | goto out; | 2009 | goto out; |
| @@ -2157,6 +2189,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2157 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2189 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
| 2158 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | 2190 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
| 2159 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2191 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
| 2192 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
| 2160 | 2193 | ||
| 2161 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2194 | ret = btrfs_wait_ordered_range(inode, offset, len); |
| 2162 | if (ret) | 2195 | if (ret) |
| @@ -2172,14 +2205,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2172 | * entire page. | 2205 | * entire page. |
| 2173 | */ | 2206 | */ |
| 2174 | if (same_page && len < PAGE_CACHE_SIZE) { | 2207 | if (same_page && len < PAGE_CACHE_SIZE) { |
| 2175 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) | 2208 | if (offset < ino_size) |
| 2176 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2209 | ret = btrfs_truncate_page(inode, offset, len, 0); |
| 2177 | mutex_unlock(&inode->i_mutex); | 2210 | mutex_unlock(&inode->i_mutex); |
| 2178 | return ret; | 2211 | return ret; |
| 2179 | } | 2212 | } |
| 2180 | 2213 | ||
| 2181 | /* zero back part of the first page */ | 2214 | /* zero back part of the first page */ |
| 2182 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2215 | if (offset < ino_size) { |
| 2183 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2216 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
| 2184 | if (ret) { | 2217 | if (ret) { |
| 2185 | mutex_unlock(&inode->i_mutex); | 2218 | mutex_unlock(&inode->i_mutex); |
| @@ -2188,7 +2221,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2188 | } | 2221 | } |
| 2189 | 2222 | ||
| 2190 | /* zero the front end of the last page */ | 2223 | /* zero the front end of the last page */ |
| 2191 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2224 | if (offset + len < ino_size) { |
| 2192 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2225 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
| 2193 | if (ret) { | 2226 | if (ret) { |
| 2194 | mutex_unlock(&inode->i_mutex); | 2227 | mutex_unlock(&inode->i_mutex); |
| @@ -2277,10 +2310,13 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2277 | 2310 | ||
| 2278 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2311 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 2279 | 2312 | ||
| 2280 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2313 | if (cur_offset < ino_size) { |
| 2281 | if (ret) { | 2314 | ret = fill_holes(trans, inode, path, cur_offset, |
| 2282 | err = ret; | 2315 | drop_end); |
| 2283 | break; | 2316 | if (ret) { |
| 2317 | err = ret; | ||
| 2318 | break; | ||
| 2319 | } | ||
| 2284 | } | 2320 | } |
| 2285 | 2321 | ||
| 2286 | cur_offset = drop_end; | 2322 | cur_offset = drop_end; |
| @@ -2313,10 +2349,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2313 | } | 2349 | } |
| 2314 | 2350 | ||
| 2315 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2351 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 2316 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2352 | if (cur_offset < ino_size) { |
| 2317 | if (ret) { | 2353 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
| 2318 | err = ret; | 2354 | if (ret) { |
| 2319 | goto out_trans; | 2355 | err = ret; |
| 2356 | goto out_trans; | ||
| 2357 | } | ||
| 2320 | } | 2358 | } |
| 2321 | 2359 | ||
| 2322 | out_trans: | 2360 | out_trans: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3d44486290b..06e9a4152b14 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -864,7 +864,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 864 | 864 | ||
| 865 | if (btrfs_is_free_space_inode(inode)) { | 865 | if (btrfs_is_free_space_inode(inode)) { |
| 866 | WARN_ON_ONCE(1); | 866 | WARN_ON_ONCE(1); |
| 867 | return -EINVAL; | 867 | ret = -EINVAL; |
| 868 | goto out_unlock; | ||
| 868 | } | 869 | } |
| 869 | 870 | ||
| 870 | num_bytes = ALIGN(end - start + 1, blocksize); | 871 | num_bytes = ALIGN(end - start + 1, blocksize); |
| @@ -1075,17 +1076,15 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
| 1075 | async_cow->end = cur_end; | 1076 | async_cow->end = cur_end; |
| 1076 | INIT_LIST_HEAD(&async_cow->extents); | 1077 | INIT_LIST_HEAD(&async_cow->extents); |
| 1077 | 1078 | ||
| 1078 | async_cow->work.func = async_cow_start; | 1079 | btrfs_init_work(&async_cow->work, async_cow_start, |
| 1079 | async_cow->work.ordered_func = async_cow_submit; | 1080 | async_cow_submit, async_cow_free); |
| 1080 | async_cow->work.ordered_free = async_cow_free; | ||
| 1081 | async_cow->work.flags = 0; | ||
| 1082 | 1081 | ||
| 1083 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | 1082 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> |
| 1084 | PAGE_CACHE_SHIFT; | 1083 | PAGE_CACHE_SHIFT; |
| 1085 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); | 1084 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); |
| 1086 | 1085 | ||
| 1087 | btrfs_queue_worker(&root->fs_info->delalloc_workers, | 1086 | btrfs_queue_work(root->fs_info->delalloc_workers, |
| 1088 | &async_cow->work); | 1087 | &async_cow->work); |
| 1089 | 1088 | ||
| 1090 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { | 1089 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { |
| 1091 | wait_event(root->fs_info->async_submit_wait, | 1090 | wait_event(root->fs_info->async_submit_wait, |
| @@ -1843,9 +1842,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
| 1843 | 1842 | ||
| 1844 | SetPageChecked(page); | 1843 | SetPageChecked(page); |
| 1845 | page_cache_get(page); | 1844 | page_cache_get(page); |
| 1846 | fixup->work.func = btrfs_writepage_fixup_worker; | 1845 | btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); |
| 1847 | fixup->page = page; | 1846 | fixup->page = page; |
| 1848 | btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); | 1847 | btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); |
| 1849 | return -EBUSY; | 1848 | return -EBUSY; |
| 1850 | } | 1849 | } |
| 1851 | 1850 | ||
| @@ -2239,6 +2238,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path, | |||
| 2239 | return PTR_ERR(root); | 2238 | return PTR_ERR(root); |
| 2240 | } | 2239 | } |
| 2241 | 2240 | ||
| 2241 | if (btrfs_root_readonly(root)) { | ||
| 2242 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 2243 | return 0; | ||
| 2244 | } | ||
| 2245 | |||
| 2242 | /* step 2: get inode */ | 2246 | /* step 2: get inode */ |
| 2243 | key.objectid = backref->inum; | 2247 | key.objectid = backref->inum; |
| 2244 | key.type = BTRFS_INODE_ITEM_KEY; | 2248 | key.type = BTRFS_INODE_ITEM_KEY; |
| @@ -2759,7 +2763,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 2759 | struct inode *inode = page->mapping->host; | 2763 | struct inode *inode = page->mapping->host; |
| 2760 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2764 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2761 | struct btrfs_ordered_extent *ordered_extent = NULL; | 2765 | struct btrfs_ordered_extent *ordered_extent = NULL; |
| 2762 | struct btrfs_workers *workers; | 2766 | struct btrfs_workqueue *workers; |
| 2763 | 2767 | ||
| 2764 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 2768 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
| 2765 | 2769 | ||
| @@ -2768,14 +2772,13 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 2768 | end - start + 1, uptodate)) | 2772 | end - start + 1, uptodate)) |
| 2769 | return 0; | 2773 | return 0; |
| 2770 | 2774 | ||
| 2771 | ordered_extent->work.func = finish_ordered_fn; | 2775 | btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); |
| 2772 | ordered_extent->work.flags = 0; | ||
| 2773 | 2776 | ||
| 2774 | if (btrfs_is_free_space_inode(inode)) | 2777 | if (btrfs_is_free_space_inode(inode)) |
| 2775 | workers = &root->fs_info->endio_freespace_worker; | 2778 | workers = root->fs_info->endio_freespace_worker; |
| 2776 | else | 2779 | else |
| 2777 | workers = &root->fs_info->endio_write_workers; | 2780 | workers = root->fs_info->endio_write_workers; |
| 2778 | btrfs_queue_worker(workers, &ordered_extent->work); | 2781 | btrfs_queue_work(workers, &ordered_extent->work); |
| 2779 | 2782 | ||
| 2780 | return 0; | 2783 | return 0; |
| 2781 | } | 2784 | } |
| @@ -4593,7 +4596,7 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
| 4593 | struct rb_node *node; | 4596 | struct rb_node *node; |
| 4594 | 4597 | ||
| 4595 | ASSERT(inode->i_state & I_FREEING); | 4598 | ASSERT(inode->i_state & I_FREEING); |
| 4596 | truncate_inode_pages(&inode->i_data, 0); | 4599 | truncate_inode_pages_final(&inode->i_data); |
| 4597 | 4600 | ||
| 4598 | write_lock(&map_tree->lock); | 4601 | write_lock(&map_tree->lock); |
| 4599 | while (!RB_EMPTY_ROOT(&map_tree->map)) { | 4602 | while (!RB_EMPTY_ROOT(&map_tree->map)) { |
| @@ -4924,7 +4927,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root) | |||
| 4924 | struct inode *inode; | 4927 | struct inode *inode; |
| 4925 | u64 objectid = 0; | 4928 | u64 objectid = 0; |
| 4926 | 4929 | ||
| 4927 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | 4930 | if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 4931 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
| 4928 | 4932 | ||
| 4929 | spin_lock(&root->inode_lock); | 4933 | spin_lock(&root->inode_lock); |
| 4930 | again: | 4934 | again: |
| @@ -5799,6 +5803,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 5799 | } | 5803 | } |
| 5800 | out_unlock: | 5804 | out_unlock: |
| 5801 | btrfs_end_transaction(trans, root); | 5805 | btrfs_end_transaction(trans, root); |
| 5806 | btrfs_balance_delayed_items(root); | ||
| 5802 | btrfs_btree_balance_dirty(root); | 5807 | btrfs_btree_balance_dirty(root); |
| 5803 | if (drop_inode) { | 5808 | if (drop_inode) { |
| 5804 | inode_dec_link_count(inode); | 5809 | inode_dec_link_count(inode); |
| @@ -5872,6 +5877,7 @@ out_unlock: | |||
| 5872 | inode_dec_link_count(inode); | 5877 | inode_dec_link_count(inode); |
| 5873 | iput(inode); | 5878 | iput(inode); |
| 5874 | } | 5879 | } |
| 5880 | btrfs_balance_delayed_items(root); | ||
| 5875 | btrfs_btree_balance_dirty(root); | 5881 | btrfs_btree_balance_dirty(root); |
| 5876 | return err; | 5882 | return err; |
| 5877 | } | 5883 | } |
| @@ -5930,6 +5936,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5930 | } | 5936 | } |
| 5931 | 5937 | ||
| 5932 | btrfs_end_transaction(trans, root); | 5938 | btrfs_end_transaction(trans, root); |
| 5939 | btrfs_balance_delayed_items(root); | ||
| 5933 | fail: | 5940 | fail: |
| 5934 | if (drop_inode) { | 5941 | if (drop_inode) { |
| 5935 | inode_dec_link_count(inode); | 5942 | inode_dec_link_count(inode); |
| @@ -5996,6 +6003,7 @@ out_fail: | |||
| 5996 | btrfs_end_transaction(trans, root); | 6003 | btrfs_end_transaction(trans, root); |
| 5997 | if (drop_on_err) | 6004 | if (drop_on_err) |
| 5998 | iput(inode); | 6005 | iput(inode); |
| 6006 | btrfs_balance_delayed_items(root); | ||
| 5999 | btrfs_btree_balance_dirty(root); | 6007 | btrfs_btree_balance_dirty(root); |
| 6000 | return err; | 6008 | return err; |
| 6001 | } | 6009 | } |
| @@ -6550,6 +6558,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6550 | int ret; | 6558 | int ret; |
| 6551 | struct extent_buffer *leaf; | 6559 | struct extent_buffer *leaf; |
| 6552 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6560 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 6561 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 6553 | struct btrfs_file_extent_item *fi; | 6562 | struct btrfs_file_extent_item *fi; |
| 6554 | struct btrfs_key key; | 6563 | struct btrfs_key key; |
| 6555 | u64 disk_bytenr; | 6564 | u64 disk_bytenr; |
| @@ -6626,6 +6635,20 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6626 | 6635 | ||
| 6627 | if (btrfs_extent_readonly(root, disk_bytenr)) | 6636 | if (btrfs_extent_readonly(root, disk_bytenr)) |
| 6628 | goto out; | 6637 | goto out; |
| 6638 | |||
| 6639 | num_bytes = min(offset + *len, extent_end) - offset; | ||
| 6640 | if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 6641 | u64 range_end; | ||
| 6642 | |||
| 6643 | range_end = round_up(offset + num_bytes, root->sectorsize) - 1; | ||
| 6644 | ret = test_range_bit(io_tree, offset, range_end, | ||
| 6645 | EXTENT_DELALLOC, 0, NULL); | ||
| 6646 | if (ret) { | ||
| 6647 | ret = -EAGAIN; | ||
| 6648 | goto out; | ||
| 6649 | } | ||
| 6650 | } | ||
| 6651 | |||
| 6629 | btrfs_release_path(path); | 6652 | btrfs_release_path(path); |
| 6630 | 6653 | ||
| 6631 | /* | 6654 | /* |
| @@ -6654,7 +6677,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6654 | */ | 6677 | */ |
| 6655 | disk_bytenr += backref_offset; | 6678 | disk_bytenr += backref_offset; |
| 6656 | disk_bytenr += offset - key.offset; | 6679 | disk_bytenr += offset - key.offset; |
| 6657 | num_bytes = min(offset + *len, extent_end) - offset; | ||
| 6658 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | 6680 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) |
| 6659 | goto out; | 6681 | goto out; |
| 6660 | /* | 6682 | /* |
| @@ -7024,10 +7046,9 @@ again: | |||
| 7024 | if (!ret) | 7046 | if (!ret) |
| 7025 | goto out_test; | 7047 | goto out_test; |
| 7026 | 7048 | ||
| 7027 | ordered->work.func = finish_ordered_fn; | 7049 | btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); |
| 7028 | ordered->work.flags = 0; | 7050 | btrfs_queue_work(root->fs_info->endio_write_workers, |
| 7029 | btrfs_queue_worker(&root->fs_info->endio_write_workers, | 7051 | &ordered->work); |
| 7030 | &ordered->work); | ||
| 7031 | out_test: | 7052 | out_test: |
| 7032 | /* | 7053 | /* |
| 7033 | * our bio might span multiple ordered extents. If we haven't | 7054 | * our bio might span multiple ordered extents. If we haven't |
| @@ -7404,15 +7425,15 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7404 | smp_mb__after_atomic_inc(); | 7425 | smp_mb__after_atomic_inc(); |
| 7405 | 7426 | ||
| 7406 | /* | 7427 | /* |
| 7407 | * The generic stuff only does filemap_write_and_wait_range, which isn't | 7428 | * The generic stuff only does filemap_write_and_wait_range, which |
| 7408 | * enough if we've written compressed pages to this area, so we need to | 7429 | * isn't enough if we've written compressed pages to this area, so |
| 7409 | * call btrfs_wait_ordered_range to make absolutely sure that any | 7430 | * we need to flush the dirty pages again to make absolutely sure |
| 7410 | * outstanding dirty pages are on disk. | 7431 | * that any outstanding dirty pages are on disk. |
| 7411 | */ | 7432 | */ |
| 7412 | count = iov_length(iov, nr_segs); | 7433 | count = iov_length(iov, nr_segs); |
| 7413 | ret = btrfs_wait_ordered_range(inode, offset, count); | 7434 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
| 7414 | if (ret) | 7435 | &BTRFS_I(inode)->runtime_flags)) |
| 7415 | return ret; | 7436 | filemap_fdatawrite_range(inode->i_mapping, offset, count); |
| 7416 | 7437 | ||
| 7417 | if (rw & WRITE) { | 7438 | if (rw & WRITE) { |
| 7418 | /* | 7439 | /* |
| @@ -8404,7 +8425,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | |||
| 8404 | work->inode = inode; | 8425 | work->inode = inode; |
| 8405 | work->wait = wait; | 8426 | work->wait = wait; |
| 8406 | work->delay_iput = delay_iput; | 8427 | work->delay_iput = delay_iput; |
| 8407 | work->work.func = btrfs_run_delalloc_work; | 8428 | btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); |
| 8408 | 8429 | ||
| 8409 | return work; | 8430 | return work; |
| 8410 | } | 8431 | } |
| @@ -8419,7 +8440,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | |||
| 8419 | * some fairly slow code that needs optimization. This walks the list | 8440 | * some fairly slow code that needs optimization. This walks the list |
| 8420 | * of all the inodes with pending delalloc and forces them to disk. | 8441 | * of all the inodes with pending delalloc and forces them to disk. |
| 8421 | */ | 8442 | */ |
| 8422 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 8443 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, |
| 8444 | int nr) | ||
| 8423 | { | 8445 | { |
| 8424 | struct btrfs_inode *binode; | 8446 | struct btrfs_inode *binode; |
| 8425 | struct inode *inode; | 8447 | struct inode *inode; |
| @@ -8431,6 +8453,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8431 | INIT_LIST_HEAD(&works); | 8453 | INIT_LIST_HEAD(&works); |
| 8432 | INIT_LIST_HEAD(&splice); | 8454 | INIT_LIST_HEAD(&splice); |
| 8433 | 8455 | ||
| 8456 | mutex_lock(&root->delalloc_mutex); | ||
| 8434 | spin_lock(&root->delalloc_lock); | 8457 | spin_lock(&root->delalloc_lock); |
| 8435 | list_splice_init(&root->delalloc_inodes, &splice); | 8458 | list_splice_init(&root->delalloc_inodes, &splice); |
| 8436 | while (!list_empty(&splice)) { | 8459 | while (!list_empty(&splice)) { |
| @@ -8453,12 +8476,14 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8453 | else | 8476 | else |
| 8454 | iput(inode); | 8477 | iput(inode); |
| 8455 | ret = -ENOMEM; | 8478 | ret = -ENOMEM; |
| 8456 | goto out; | 8479 | break; |
| 8457 | } | 8480 | } |
| 8458 | list_add_tail(&work->list, &works); | 8481 | list_add_tail(&work->list, &works); |
| 8459 | btrfs_queue_worker(&root->fs_info->flush_workers, | 8482 | btrfs_queue_work(root->fs_info->flush_workers, |
| 8460 | &work->work); | 8483 | &work->work); |
| 8461 | 8484 | ret++; | |
| 8485 | if (nr != -1 && ret >= nr) | ||
| 8486 | break; | ||
| 8462 | cond_resched(); | 8487 | cond_resched(); |
| 8463 | spin_lock(&root->delalloc_lock); | 8488 | spin_lock(&root->delalloc_lock); |
| 8464 | } | 8489 | } |
| @@ -8468,18 +8493,13 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8468 | list_del_init(&work->list); | 8493 | list_del_init(&work->list); |
| 8469 | btrfs_wait_and_free_delalloc_work(work); | 8494 | btrfs_wait_and_free_delalloc_work(work); |
| 8470 | } | 8495 | } |
| 8471 | return 0; | ||
| 8472 | out: | ||
| 8473 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 8474 | list_del_init(&work->list); | ||
| 8475 | btrfs_wait_and_free_delalloc_work(work); | ||
| 8476 | } | ||
| 8477 | 8496 | ||
| 8478 | if (!list_empty_careful(&splice)) { | 8497 | if (!list_empty_careful(&splice)) { |
| 8479 | spin_lock(&root->delalloc_lock); | 8498 | spin_lock(&root->delalloc_lock); |
| 8480 | list_splice_tail(&splice, &root->delalloc_inodes); | 8499 | list_splice_tail(&splice, &root->delalloc_inodes); |
| 8481 | spin_unlock(&root->delalloc_lock); | 8500 | spin_unlock(&root->delalloc_lock); |
| 8482 | } | 8501 | } |
| 8502 | mutex_unlock(&root->delalloc_mutex); | ||
| 8483 | return ret; | 8503 | return ret; |
| 8484 | } | 8504 | } |
| 8485 | 8505 | ||
| @@ -8490,7 +8510,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8490 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 8510 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 8491 | return -EROFS; | 8511 | return -EROFS; |
| 8492 | 8512 | ||
| 8493 | ret = __start_delalloc_inodes(root, delay_iput); | 8513 | ret = __start_delalloc_inodes(root, delay_iput, -1); |
| 8514 | if (ret > 0) | ||
| 8515 | ret = 0; | ||
| 8494 | /* | 8516 | /* |
| 8495 | * the filemap_flush will queue IO into the worker threads, but | 8517 | * the filemap_flush will queue IO into the worker threads, but |
| 8496 | * we have to make sure the IO is actually started and that | 8518 | * we have to make sure the IO is actually started and that |
| @@ -8507,7 +8529,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8507 | return ret; | 8529 | return ret; |
| 8508 | } | 8530 | } |
| 8509 | 8531 | ||
| 8510 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | 8532 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
| 8533 | int nr) | ||
| 8511 | { | 8534 | { |
| 8512 | struct btrfs_root *root; | 8535 | struct btrfs_root *root; |
| 8513 | struct list_head splice; | 8536 | struct list_head splice; |
| @@ -8518,9 +8541,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8518 | 8541 | ||
| 8519 | INIT_LIST_HEAD(&splice); | 8542 | INIT_LIST_HEAD(&splice); |
| 8520 | 8543 | ||
| 8544 | mutex_lock(&fs_info->delalloc_root_mutex); | ||
| 8521 | spin_lock(&fs_info->delalloc_root_lock); | 8545 | spin_lock(&fs_info->delalloc_root_lock); |
| 8522 | list_splice_init(&fs_info->delalloc_roots, &splice); | 8546 | list_splice_init(&fs_info->delalloc_roots, &splice); |
| 8523 | while (!list_empty(&splice)) { | 8547 | while (!list_empty(&splice) && nr) { |
| 8524 | root = list_first_entry(&splice, struct btrfs_root, | 8548 | root = list_first_entry(&splice, struct btrfs_root, |
| 8525 | delalloc_root); | 8549 | delalloc_root); |
| 8526 | root = btrfs_grab_fs_root(root); | 8550 | root = btrfs_grab_fs_root(root); |
| @@ -8529,15 +8553,20 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8529 | &fs_info->delalloc_roots); | 8553 | &fs_info->delalloc_roots); |
| 8530 | spin_unlock(&fs_info->delalloc_root_lock); | 8554 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8531 | 8555 | ||
| 8532 | ret = __start_delalloc_inodes(root, delay_iput); | 8556 | ret = __start_delalloc_inodes(root, delay_iput, nr); |
| 8533 | btrfs_put_fs_root(root); | 8557 | btrfs_put_fs_root(root); |
| 8534 | if (ret) | 8558 | if (ret < 0) |
| 8535 | goto out; | 8559 | goto out; |
| 8536 | 8560 | ||
| 8561 | if (nr != -1) { | ||
| 8562 | nr -= ret; | ||
| 8563 | WARN_ON(nr < 0); | ||
| 8564 | } | ||
| 8537 | spin_lock(&fs_info->delalloc_root_lock); | 8565 | spin_lock(&fs_info->delalloc_root_lock); |
| 8538 | } | 8566 | } |
| 8539 | spin_unlock(&fs_info->delalloc_root_lock); | 8567 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8540 | 8568 | ||
| 8569 | ret = 0; | ||
| 8541 | atomic_inc(&fs_info->async_submit_draining); | 8570 | atomic_inc(&fs_info->async_submit_draining); |
| 8542 | while (atomic_read(&fs_info->nr_async_submits) || | 8571 | while (atomic_read(&fs_info->nr_async_submits) || |
| 8543 | atomic_read(&fs_info->async_delalloc_pages)) { | 8572 | atomic_read(&fs_info->async_delalloc_pages)) { |
| @@ -8546,13 +8575,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8546 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | 8575 | atomic_read(&fs_info->async_delalloc_pages) == 0)); |
| 8547 | } | 8576 | } |
| 8548 | atomic_dec(&fs_info->async_submit_draining); | 8577 | atomic_dec(&fs_info->async_submit_draining); |
| 8549 | return 0; | ||
| 8550 | out: | 8578 | out: |
| 8551 | if (!list_empty_careful(&splice)) { | 8579 | if (!list_empty_careful(&splice)) { |
| 8552 | spin_lock(&fs_info->delalloc_root_lock); | 8580 | spin_lock(&fs_info->delalloc_root_lock); |
| 8553 | list_splice_tail(&splice, &fs_info->delalloc_roots); | 8581 | list_splice_tail(&splice, &fs_info->delalloc_roots); |
| 8554 | spin_unlock(&fs_info->delalloc_root_lock); | 8582 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8555 | } | 8583 | } |
| 8584 | mutex_unlock(&fs_info->delalloc_root_mutex); | ||
| 8556 | return ret; | 8585 | return ret; |
| 8557 | } | 8586 | } |
| 8558 | 8587 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a6d8efa46bfe..0401397b5c92 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -59,6 +59,32 @@ | |||
| 59 | #include "props.h" | 59 | #include "props.h" |
| 60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
| 61 | 61 | ||
| 62 | #ifdef CONFIG_64BIT | ||
| 63 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | ||
| 64 | * structures are incorrect, as the timespec structure from userspace | ||
| 65 | * is 4 bytes too small. We define these alternatives here to teach | ||
| 66 | * the kernel about the 32-bit struct packing. | ||
| 67 | */ | ||
| 68 | struct btrfs_ioctl_timespec_32 { | ||
| 69 | __u64 sec; | ||
| 70 | __u32 nsec; | ||
| 71 | } __attribute__ ((__packed__)); | ||
| 72 | |||
| 73 | struct btrfs_ioctl_received_subvol_args_32 { | ||
| 74 | char uuid[BTRFS_UUID_SIZE]; /* in */ | ||
| 75 | __u64 stransid; /* in */ | ||
| 76 | __u64 rtransid; /* out */ | ||
| 77 | struct btrfs_ioctl_timespec_32 stime; /* in */ | ||
| 78 | struct btrfs_ioctl_timespec_32 rtime; /* out */ | ||
| 79 | __u64 flags; /* in */ | ||
| 80 | __u64 reserved[16]; /* in */ | ||
| 81 | } __attribute__ ((__packed__)); | ||
| 82 | |||
| 83 | #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ | ||
| 84 | struct btrfs_ioctl_received_subvol_args_32) | ||
| 85 | #endif | ||
| 86 | |||
| 87 | |||
| 62 | static int btrfs_clone(struct inode *src, struct inode *inode, | 88 | static int btrfs_clone(struct inode *src, struct inode *inode, |
| 63 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); | 89 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); |
| 64 | 90 | ||
| @@ -585,6 +611,23 @@ fail: | |||
| 585 | return ret; | 611 | return ret; |
| 586 | } | 612 | } |
| 587 | 613 | ||
| 614 | static void btrfs_wait_nocow_write(struct btrfs_root *root) | ||
| 615 | { | ||
| 616 | s64 writers; | ||
| 617 | DEFINE_WAIT(wait); | ||
| 618 | |||
| 619 | do { | ||
| 620 | prepare_to_wait(&root->subv_writers->wait, &wait, | ||
| 621 | TASK_UNINTERRUPTIBLE); | ||
| 622 | |||
| 623 | writers = percpu_counter_sum(&root->subv_writers->counter); | ||
| 624 | if (writers) | ||
| 625 | schedule(); | ||
| 626 | |||
| 627 | finish_wait(&root->subv_writers->wait, &wait); | ||
| 628 | } while (writers); | ||
| 629 | } | ||
| 630 | |||
| 588 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, | 631 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, |
| 589 | struct dentry *dentry, char *name, int namelen, | 632 | struct dentry *dentry, char *name, int namelen, |
| 590 | u64 *async_transid, bool readonly, | 633 | u64 *async_transid, bool readonly, |
| @@ -598,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 598 | if (!root->ref_cows) | 641 | if (!root->ref_cows) |
| 599 | return -EINVAL; | 642 | return -EINVAL; |
| 600 | 643 | ||
| 644 | atomic_inc(&root->will_be_snapshoted); | ||
| 645 | smp_mb__after_atomic_inc(); | ||
| 646 | btrfs_wait_nocow_write(root); | ||
| 647 | |||
| 601 | ret = btrfs_start_delalloc_inodes(root, 0); | 648 | ret = btrfs_start_delalloc_inodes(root, 0); |
| 602 | if (ret) | 649 | if (ret) |
| 603 | return ret; | 650 | goto out; |
| 604 | 651 | ||
| 605 | btrfs_wait_ordered_extents(root, -1); | 652 | btrfs_wait_ordered_extents(root, -1); |
| 606 | 653 | ||
| 607 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 654 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 608 | if (!pending_snapshot) | 655 | if (!pending_snapshot) { |
| 609 | return -ENOMEM; | 656 | ret = -ENOMEM; |
| 657 | goto out; | ||
| 658 | } | ||
| 610 | 659 | ||
| 611 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, | 660 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
| 612 | BTRFS_BLOCK_RSV_TEMP); | 661 | BTRFS_BLOCK_RSV_TEMP); |
| @@ -623,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 623 | &pending_snapshot->qgroup_reserved, | 672 | &pending_snapshot->qgroup_reserved, |
| 624 | false); | 673 | false); |
| 625 | if (ret) | 674 | if (ret) |
| 626 | goto out; | 675 | goto free; |
| 627 | 676 | ||
| 628 | pending_snapshot->dentry = dentry; | 677 | pending_snapshot->dentry = dentry; |
| 629 | pending_snapshot->root = root; | 678 | pending_snapshot->root = root; |
| @@ -674,8 +723,10 @@ fail: | |||
| 674 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, | 723 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, |
| 675 | &pending_snapshot->block_rsv, | 724 | &pending_snapshot->block_rsv, |
| 676 | pending_snapshot->qgroup_reserved); | 725 | pending_snapshot->qgroup_reserved); |
| 677 | out: | 726 | free: |
| 678 | kfree(pending_snapshot); | 727 | kfree(pending_snapshot); |
| 728 | out: | ||
| 729 | atomic_dec(&root->will_be_snapshoted); | ||
| 679 | return ret; | 730 | return ret; |
| 680 | } | 731 | } |
| 681 | 732 | ||
| @@ -884,12 +935,14 @@ static int find_new_extents(struct btrfs_root *root, | |||
| 884 | min_key.type = BTRFS_EXTENT_DATA_KEY; | 935 | min_key.type = BTRFS_EXTENT_DATA_KEY; |
| 885 | min_key.offset = *off; | 936 | min_key.offset = *off; |
| 886 | 937 | ||
| 887 | path->keep_locks = 1; | ||
| 888 | |||
| 889 | while (1) { | 938 | while (1) { |
| 939 | path->keep_locks = 1; | ||
| 890 | ret = btrfs_search_forward(root, &min_key, path, newer_than); | 940 | ret = btrfs_search_forward(root, &min_key, path, newer_than); |
| 891 | if (ret != 0) | 941 | if (ret != 0) |
| 892 | goto none; | 942 | goto none; |
| 943 | path->keep_locks = 0; | ||
| 944 | btrfs_unlock_up_safe(path, 1); | ||
| 945 | process_slot: | ||
| 893 | if (min_key.objectid != ino) | 946 | if (min_key.objectid != ino) |
| 894 | goto none; | 947 | goto none; |
| 895 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | 948 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) |
| @@ -908,6 +961,12 @@ static int find_new_extents(struct btrfs_root *root, | |||
| 908 | return 0; | 961 | return 0; |
| 909 | } | 962 | } |
| 910 | 963 | ||
| 964 | path->slots[0]++; | ||
| 965 | if (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
| 966 | btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); | ||
| 967 | goto process_slot; | ||
| 968 | } | ||
| 969 | |||
| 911 | if (min_key.offset == (u64)-1) | 970 | if (min_key.offset == (u64)-1) |
| 912 | goto none; | 971 | goto none; |
| 913 | 972 | ||
| @@ -935,10 +994,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) | |||
| 935 | read_unlock(&em_tree->lock); | 994 | read_unlock(&em_tree->lock); |
| 936 | 995 | ||
| 937 | if (!em) { | 996 | if (!em) { |
| 997 | struct extent_state *cached = NULL; | ||
| 998 | u64 end = start + len - 1; | ||
| 999 | |||
| 938 | /* get the big lock and read metadata off disk */ | 1000 | /* get the big lock and read metadata off disk */ |
| 939 | lock_extent(io_tree, start, start + len - 1); | 1001 | lock_extent_bits(io_tree, start, end, 0, &cached); |
| 940 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 1002 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
| 941 | unlock_extent(io_tree, start, start + len - 1); | 1003 | unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); |
| 942 | 1004 | ||
| 943 | if (IS_ERR(em)) | 1005 | if (IS_ERR(em)) |
| 944 | return NULL; | 1006 | return NULL; |
| @@ -957,7 +1019,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
| 957 | return false; | 1019 | return false; |
| 958 | 1020 | ||
| 959 | next = defrag_lookup_extent(inode, em->start + em->len); | 1021 | next = defrag_lookup_extent(inode, em->start + em->len); |
| 960 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) | 1022 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || |
| 1023 | (em->block_start + em->block_len == next->block_start)) | ||
| 961 | ret = false; | 1024 | ret = false; |
| 962 | 1025 | ||
| 963 | free_extent_map(next); | 1026 | free_extent_map(next); |
| @@ -1076,10 +1139,12 @@ again: | |||
| 1076 | page_start = page_offset(page); | 1139 | page_start = page_offset(page); |
| 1077 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 1140 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 1078 | while (1) { | 1141 | while (1) { |
| 1079 | lock_extent(tree, page_start, page_end); | 1142 | lock_extent_bits(tree, page_start, page_end, |
| 1143 | 0, &cached_state); | ||
| 1080 | ordered = btrfs_lookup_ordered_extent(inode, | 1144 | ordered = btrfs_lookup_ordered_extent(inode, |
| 1081 | page_start); | 1145 | page_start); |
| 1082 | unlock_extent(tree, page_start, page_end); | 1146 | unlock_extent_cached(tree, page_start, page_end, |
| 1147 | &cached_state, GFP_NOFS); | ||
| 1083 | if (!ordered) | 1148 | if (!ordered) |
| 1084 | break; | 1149 | break; |
| 1085 | 1150 | ||
| @@ -1356,8 +1421,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1356 | } | 1421 | } |
| 1357 | } | 1422 | } |
| 1358 | 1423 | ||
| 1359 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | 1424 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { |
| 1360 | filemap_flush(inode->i_mapping); | 1425 | filemap_flush(inode->i_mapping); |
| 1426 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
| 1427 | &BTRFS_I(inode)->runtime_flags)) | ||
| 1428 | filemap_flush(inode->i_mapping); | ||
| 1429 | } | ||
| 1361 | 1430 | ||
| 1362 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | 1431 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { |
| 1363 | /* the filemap_flush will queue IO into the worker threads, but | 1432 | /* the filemap_flush will queue IO into the worker threads, but |
| @@ -1573,7 +1642,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 1573 | if (src_inode->i_sb != file_inode(file)->i_sb) { | 1642 | if (src_inode->i_sb != file_inode(file)->i_sb) { |
| 1574 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, | 1643 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, |
| 1575 | "Snapshot src from another FS"); | 1644 | "Snapshot src from another FS"); |
| 1576 | ret = -EINVAL; | 1645 | ret = -EXDEV; |
| 1577 | } else if (!inode_owner_or_capable(src_inode)) { | 1646 | } else if (!inode_owner_or_capable(src_inode)) { |
| 1578 | /* | 1647 | /* |
| 1579 | * Subvolume creation is not restricted, but snapshots | 1648 | * Subvolume creation is not restricted, but snapshots |
| @@ -1797,7 +1866,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) | |||
| 1797 | if (di && !IS_ERR(di)) { | 1866 | if (di && !IS_ERR(di)) { |
| 1798 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | 1867 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
| 1799 | if (key.objectid == root->root_key.objectid) { | 1868 | if (key.objectid == root->root_key.objectid) { |
| 1800 | ret = -ENOTEMPTY; | 1869 | ret = -EPERM; |
| 1870 | btrfs_err(root->fs_info, "deleting default subvolume " | ||
| 1871 | "%llu is not allowed", key.objectid); | ||
| 1801 | goto out; | 1872 | goto out; |
| 1802 | } | 1873 | } |
| 1803 | btrfs_release_path(path); | 1874 | btrfs_release_path(path); |
| @@ -2994,8 +3065,9 @@ process_slot: | |||
| 2994 | new_key.offset + datal, | 3065 | new_key.offset + datal, |
| 2995 | 1); | 3066 | 1); |
| 2996 | if (ret) { | 3067 | if (ret) { |
| 2997 | btrfs_abort_transaction(trans, root, | 3068 | if (ret != -EINVAL) |
| 2998 | ret); | 3069 | btrfs_abort_transaction(trans, |
| 3070 | root, ret); | ||
| 2999 | btrfs_end_transaction(trans, root); | 3071 | btrfs_end_transaction(trans, root); |
| 3000 | goto out; | 3072 | goto out; |
| 3001 | } | 3073 | } |
| @@ -3153,8 +3225,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 3153 | * decompress into destination's address_space (the file offset | 3225 | * decompress into destination's address_space (the file offset |
| 3154 | * may change, so source mapping won't do), then recompress (or | 3226 | * may change, so source mapping won't do), then recompress (or |
| 3155 | * otherwise reinsert) a subrange. | 3227 | * otherwise reinsert) a subrange. |
| 3156 | * - allow ranges within the same file to be cloned (provided | 3228 | * |
| 3157 | * they don't overlap)? | 3229 | * - split destination inode's inline extents. The inline extents can |
| 3230 | * be either compressed or non-compressed. | ||
| 3158 | */ | 3231 | */ |
| 3159 | 3232 | ||
| 3160 | /* the destination must be opened for writing */ | 3233 | /* the destination must be opened for writing */ |
| @@ -4353,10 +4426,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | |||
| 4353 | return btrfs_qgroup_wait_for_completion(root->fs_info); | 4426 | return btrfs_qgroup_wait_for_completion(root->fs_info); |
| 4354 | } | 4427 | } |
| 4355 | 4428 | ||
| 4356 | static long btrfs_ioctl_set_received_subvol(struct file *file, | 4429 | static long _btrfs_ioctl_set_received_subvol(struct file *file, |
| 4357 | void __user *arg) | 4430 | struct btrfs_ioctl_received_subvol_args *sa) |
| 4358 | { | 4431 | { |
| 4359 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
| 4360 | struct inode *inode = file_inode(file); | 4432 | struct inode *inode = file_inode(file); |
| 4361 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4433 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 4362 | struct btrfs_root_item *root_item = &root->root_item; | 4434 | struct btrfs_root_item *root_item = &root->root_item; |
| @@ -4384,13 +4456,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
| 4384 | goto out; | 4456 | goto out; |
| 4385 | } | 4457 | } |
| 4386 | 4458 | ||
| 4387 | sa = memdup_user(arg, sizeof(*sa)); | ||
| 4388 | if (IS_ERR(sa)) { | ||
| 4389 | ret = PTR_ERR(sa); | ||
| 4390 | sa = NULL; | ||
| 4391 | goto out; | ||
| 4392 | } | ||
| 4393 | |||
| 4394 | /* | 4459 | /* |
| 4395 | * 1 - root item | 4460 | * 1 - root item |
| 4396 | * 2 - uuid items (received uuid + subvol uuid) | 4461 | * 2 - uuid items (received uuid + subvol uuid) |
| @@ -4444,14 +4509,91 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
| 4444 | goto out; | 4509 | goto out; |
| 4445 | } | 4510 | } |
| 4446 | 4511 | ||
| 4512 | out: | ||
| 4513 | up_write(&root->fs_info->subvol_sem); | ||
| 4514 | mnt_drop_write_file(file); | ||
| 4515 | return ret; | ||
| 4516 | } | ||
| 4517 | |||
| 4518 | #ifdef CONFIG_64BIT | ||
| 4519 | static long btrfs_ioctl_set_received_subvol_32(struct file *file, | ||
| 4520 | void __user *arg) | ||
| 4521 | { | ||
| 4522 | struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; | ||
| 4523 | struct btrfs_ioctl_received_subvol_args *args64 = NULL; | ||
| 4524 | int ret = 0; | ||
| 4525 | |||
| 4526 | args32 = memdup_user(arg, sizeof(*args32)); | ||
| 4527 | if (IS_ERR(args32)) { | ||
| 4528 | ret = PTR_ERR(args32); | ||
| 4529 | args32 = NULL; | ||
| 4530 | goto out; | ||
| 4531 | } | ||
| 4532 | |||
| 4533 | args64 = kmalloc(sizeof(*args64), GFP_NOFS); | ||
| 4534 | if (IS_ERR(args64)) { | ||
| 4535 | ret = PTR_ERR(args64); | ||
| 4536 | args64 = NULL; | ||
| 4537 | goto out; | ||
| 4538 | } | ||
| 4539 | |||
| 4540 | memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); | ||
| 4541 | args64->stransid = args32->stransid; | ||
| 4542 | args64->rtransid = args32->rtransid; | ||
| 4543 | args64->stime.sec = args32->stime.sec; | ||
| 4544 | args64->stime.nsec = args32->stime.nsec; | ||
| 4545 | args64->rtime.sec = args32->rtime.sec; | ||
| 4546 | args64->rtime.nsec = args32->rtime.nsec; | ||
| 4547 | args64->flags = args32->flags; | ||
| 4548 | |||
| 4549 | ret = _btrfs_ioctl_set_received_subvol(file, args64); | ||
| 4550 | if (ret) | ||
| 4551 | goto out; | ||
| 4552 | |||
| 4553 | memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); | ||
| 4554 | args32->stransid = args64->stransid; | ||
| 4555 | args32->rtransid = args64->rtransid; | ||
| 4556 | args32->stime.sec = args64->stime.sec; | ||
| 4557 | args32->stime.nsec = args64->stime.nsec; | ||
| 4558 | args32->rtime.sec = args64->rtime.sec; | ||
| 4559 | args32->rtime.nsec = args64->rtime.nsec; | ||
| 4560 | args32->flags = args64->flags; | ||
| 4561 | |||
| 4562 | ret = copy_to_user(arg, args32, sizeof(*args32)); | ||
| 4563 | if (ret) | ||
| 4564 | ret = -EFAULT; | ||
| 4565 | |||
| 4566 | out: | ||
| 4567 | kfree(args32); | ||
| 4568 | kfree(args64); | ||
| 4569 | return ret; | ||
| 4570 | } | ||
| 4571 | #endif | ||
| 4572 | |||
| 4573 | static long btrfs_ioctl_set_received_subvol(struct file *file, | ||
| 4574 | void __user *arg) | ||
| 4575 | { | ||
| 4576 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
| 4577 | int ret = 0; | ||
| 4578 | |||
| 4579 | sa = memdup_user(arg, sizeof(*sa)); | ||
| 4580 | if (IS_ERR(sa)) { | ||
| 4581 | ret = PTR_ERR(sa); | ||
| 4582 | sa = NULL; | ||
| 4583 | goto out; | ||
| 4584 | } | ||
| 4585 | |||
| 4586 | ret = _btrfs_ioctl_set_received_subvol(file, sa); | ||
| 4587 | |||
| 4588 | if (ret) | ||
| 4589 | goto out; | ||
| 4590 | |||
| 4447 | ret = copy_to_user(arg, sa, sizeof(*sa)); | 4591 | ret = copy_to_user(arg, sa, sizeof(*sa)); |
| 4448 | if (ret) | 4592 | if (ret) |
| 4449 | ret = -EFAULT; | 4593 | ret = -EFAULT; |
| 4450 | 4594 | ||
| 4451 | out: | 4595 | out: |
| 4452 | kfree(sa); | 4596 | kfree(sa); |
| 4453 | up_write(&root->fs_info->subvol_sem); | ||
| 4454 | mnt_drop_write_file(file); | ||
| 4455 | return ret; | 4597 | return ret; |
| 4456 | } | 4598 | } |
| 4457 | 4599 | ||
| @@ -4746,7 +4888,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4746 | case BTRFS_IOC_SYNC: { | 4888 | case BTRFS_IOC_SYNC: { |
| 4747 | int ret; | 4889 | int ret; |
| 4748 | 4890 | ||
| 4749 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 4891 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
| 4750 | if (ret) | 4892 | if (ret) |
| 4751 | return ret; | 4893 | return ret; |
| 4752 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); | 4894 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); |
| @@ -4770,6 +4912,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4770 | return btrfs_ioctl_balance_progress(root, argp); | 4912 | return btrfs_ioctl_balance_progress(root, argp); |
| 4771 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: | 4913 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: |
| 4772 | return btrfs_ioctl_set_received_subvol(file, argp); | 4914 | return btrfs_ioctl_set_received_subvol(file, argp); |
| 4915 | #ifdef CONFIG_64BIT | ||
| 4916 | case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: | ||
| 4917 | return btrfs_ioctl_set_received_subvol_32(file, argp); | ||
| 4918 | #endif | ||
| 4773 | case BTRFS_IOC_SEND: | 4919 | case BTRFS_IOC_SEND: |
| 4774 | return btrfs_ioctl_send(file, argp); | 4920 | return btrfs_ioctl_send(file, argp); |
| 4775 | case BTRFS_IOC_GET_DEV_STATS: | 4921 | case BTRFS_IOC_GET_DEV_STATS: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b16450b840e7..a94b05f72869 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -349,10 +349,13 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
| 349 | if (!uptodate) | 349 | if (!uptodate) |
| 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
| 351 | 351 | ||
| 352 | if (entry->bytes_left == 0) | 352 | if (entry->bytes_left == 0) { |
| 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 354 | else | 354 | if (waitqueue_active(&entry->wait)) |
| 355 | wake_up(&entry->wait); | ||
| 356 | } else { | ||
| 355 | ret = 1; | 357 | ret = 1; |
| 358 | } | ||
| 356 | out: | 359 | out: |
| 357 | if (!ret && cached && entry) { | 360 | if (!ret && cached && entry) { |
| 358 | *cached = entry; | 361 | *cached = entry; |
| @@ -410,10 +413,13 @@ have_entry: | |||
| 410 | if (!uptodate) | 413 | if (!uptodate) |
| 411 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 414 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
| 412 | 415 | ||
| 413 | if (entry->bytes_left == 0) | 416 | if (entry->bytes_left == 0) { |
| 414 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 417 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 415 | else | 418 | if (waitqueue_active(&entry->wait)) |
| 419 | wake_up(&entry->wait); | ||
| 420 | } else { | ||
| 416 | ret = 1; | 421 | ret = 1; |
| 422 | } | ||
| 417 | out: | 423 | out: |
| 418 | if (!ret && cached && entry) { | 424 | if (!ret && cached && entry) { |
| 419 | *cached = entry; | 425 | *cached = entry; |
| @@ -424,27 +430,48 @@ out: | |||
| 424 | } | 430 | } |
| 425 | 431 | ||
| 426 | /* Needs to either be called under a log transaction or the log_mutex */ | 432 | /* Needs to either be called under a log transaction or the log_mutex */ |
| 427 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode) | 433 | void btrfs_get_logged_extents(struct inode *inode, |
| 434 | struct list_head *logged_list) | ||
| 428 | { | 435 | { |
| 429 | struct btrfs_ordered_inode_tree *tree; | 436 | struct btrfs_ordered_inode_tree *tree; |
| 430 | struct btrfs_ordered_extent *ordered; | 437 | struct btrfs_ordered_extent *ordered; |
| 431 | struct rb_node *n; | 438 | struct rb_node *n; |
| 432 | int index = log->log_transid % 2; | ||
| 433 | 439 | ||
| 434 | tree = &BTRFS_I(inode)->ordered_tree; | 440 | tree = &BTRFS_I(inode)->ordered_tree; |
| 435 | spin_lock_irq(&tree->lock); | 441 | spin_lock_irq(&tree->lock); |
| 436 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { | 442 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { |
| 437 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); | 443 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
| 438 | spin_lock(&log->log_extents_lock[index]); | 444 | if (!list_empty(&ordered->log_list)) |
| 439 | if (list_empty(&ordered->log_list)) { | 445 | continue; |
| 440 | list_add_tail(&ordered->log_list, &log->logged_list[index]); | 446 | list_add_tail(&ordered->log_list, logged_list); |
| 441 | atomic_inc(&ordered->refs); | 447 | atomic_inc(&ordered->refs); |
| 442 | } | ||
| 443 | spin_unlock(&log->log_extents_lock[index]); | ||
| 444 | } | 448 | } |
| 445 | spin_unlock_irq(&tree->lock); | 449 | spin_unlock_irq(&tree->lock); |
| 446 | } | 450 | } |
| 447 | 451 | ||
| 452 | void btrfs_put_logged_extents(struct list_head *logged_list) | ||
| 453 | { | ||
| 454 | struct btrfs_ordered_extent *ordered; | ||
| 455 | |||
| 456 | while (!list_empty(logged_list)) { | ||
| 457 | ordered = list_first_entry(logged_list, | ||
| 458 | struct btrfs_ordered_extent, | ||
| 459 | log_list); | ||
| 460 | list_del_init(&ordered->log_list); | ||
| 461 | btrfs_put_ordered_extent(ordered); | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
| 466 | struct btrfs_root *log) | ||
| 467 | { | ||
| 468 | int index = log->log_transid % 2; | ||
| 469 | |||
| 470 | spin_lock_irq(&log->log_extents_lock[index]); | ||
| 471 | list_splice_tail(logged_list, &log->logged_list[index]); | ||
| 472 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 473 | } | ||
| 474 | |||
| 448 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | 475 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) |
| 449 | { | 476 | { |
| 450 | struct btrfs_ordered_extent *ordered; | 477 | struct btrfs_ordered_extent *ordered; |
| @@ -577,7 +604,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 577 | INIT_LIST_HEAD(&splice); | 604 | INIT_LIST_HEAD(&splice); |
| 578 | INIT_LIST_HEAD(&works); | 605 | INIT_LIST_HEAD(&works); |
| 579 | 606 | ||
| 580 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 607 | mutex_lock(&root->ordered_extent_mutex); |
| 581 | spin_lock(&root->ordered_extent_lock); | 608 | spin_lock(&root->ordered_extent_lock); |
| 582 | list_splice_init(&root->ordered_extents, &splice); | 609 | list_splice_init(&root->ordered_extents, &splice); |
| 583 | while (!list_empty(&splice) && nr) { | 610 | while (!list_empty(&splice) && nr) { |
| @@ -588,10 +615,11 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 588 | atomic_inc(&ordered->refs); | 615 | atomic_inc(&ordered->refs); |
| 589 | spin_unlock(&root->ordered_extent_lock); | 616 | spin_unlock(&root->ordered_extent_lock); |
| 590 | 617 | ||
| 591 | ordered->flush_work.func = btrfs_run_ordered_extent_work; | 618 | btrfs_init_work(&ordered->flush_work, |
| 619 | btrfs_run_ordered_extent_work, NULL, NULL); | ||
| 592 | list_add_tail(&ordered->work_list, &works); | 620 | list_add_tail(&ordered->work_list, &works); |
| 593 | btrfs_queue_worker(&root->fs_info->flush_workers, | 621 | btrfs_queue_work(root->fs_info->flush_workers, |
| 594 | &ordered->flush_work); | 622 | &ordered->flush_work); |
| 595 | 623 | ||
| 596 | cond_resched(); | 624 | cond_resched(); |
| 597 | spin_lock(&root->ordered_extent_lock); | 625 | spin_lock(&root->ordered_extent_lock); |
| @@ -608,7 +636,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 608 | btrfs_put_ordered_extent(ordered); | 636 | btrfs_put_ordered_extent(ordered); |
| 609 | cond_resched(); | 637 | cond_resched(); |
| 610 | } | 638 | } |
| 611 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 639 | mutex_unlock(&root->ordered_extent_mutex); |
| 612 | 640 | ||
| 613 | return count; | 641 | return count; |
| 614 | } | 642 | } |
| @@ -621,6 +649,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
| 621 | 649 | ||
| 622 | INIT_LIST_HEAD(&splice); | 650 | INIT_LIST_HEAD(&splice); |
| 623 | 651 | ||
| 652 | mutex_lock(&fs_info->ordered_operations_mutex); | ||
| 624 | spin_lock(&fs_info->ordered_root_lock); | 653 | spin_lock(&fs_info->ordered_root_lock); |
| 625 | list_splice_init(&fs_info->ordered_roots, &splice); | 654 | list_splice_init(&fs_info->ordered_roots, &splice); |
| 626 | while (!list_empty(&splice) && nr) { | 655 | while (!list_empty(&splice) && nr) { |
| @@ -643,6 +672,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
| 643 | } | 672 | } |
| 644 | list_splice_tail(&splice, &fs_info->ordered_roots); | 673 | list_splice_tail(&splice, &fs_info->ordered_roots); |
| 645 | spin_unlock(&fs_info->ordered_root_lock); | 674 | spin_unlock(&fs_info->ordered_root_lock); |
| 675 | mutex_unlock(&fs_info->ordered_operations_mutex); | ||
| 646 | } | 676 | } |
| 647 | 677 | ||
| 648 | /* | 678 | /* |
| @@ -704,8 +734,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
| 704 | goto out; | 734 | goto out; |
| 705 | } | 735 | } |
| 706 | list_add_tail(&work->list, &works); | 736 | list_add_tail(&work->list, &works); |
| 707 | btrfs_queue_worker(&root->fs_info->flush_workers, | 737 | btrfs_queue_work(root->fs_info->flush_workers, |
| 708 | &work->work); | 738 | &work->work); |
| 709 | 739 | ||
| 710 | cond_resched(); | 740 | cond_resched(); |
| 711 | spin_lock(&root->fs_info->ordered_root_lock); | 741 | spin_lock(&root->fs_info->ordered_root_lock); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 9b0450f7ac20..246897058efb 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -197,7 +197,11 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 197 | struct inode *inode); | 197 | struct inode *inode); |
| 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); | 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); |
| 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); | 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); |
| 200 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | 200 | void btrfs_get_logged_extents(struct inode *inode, |
| 201 | struct list_head *logged_list); | ||
| 202 | void btrfs_put_logged_extents(struct list_head *logged_list); | ||
| 203 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
| 204 | struct btrfs_root *log); | ||
| 201 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 205 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); |
| 202 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 206 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
| 203 | int __init ordered_data_init(void); | 207 | int __init ordered_data_init(void); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 472302a2d745..2cf905877aaf 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1509,8 +1509,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
| 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); | 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); |
| 1510 | if (!ret) { | 1510 | if (!ret) { |
| 1511 | qgroup_rescan_zero_tracking(fs_info); | 1511 | qgroup_rescan_zero_tracking(fs_info); |
| 1512 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 1512 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 1513 | &fs_info->qgroup_rescan_work); | 1513 | &fs_info->qgroup_rescan_work); |
| 1514 | } | 1514 | } |
| 1515 | ret = 0; | 1515 | ret = 0; |
| 1516 | } | 1516 | } |
| @@ -2095,7 +2095,8 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | |||
| 2095 | 2095 | ||
| 2096 | memset(&fs_info->qgroup_rescan_work, 0, | 2096 | memset(&fs_info->qgroup_rescan_work, 0, |
| 2097 | sizeof(fs_info->qgroup_rescan_work)); | 2097 | sizeof(fs_info->qgroup_rescan_work)); |
| 2098 | fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker; | 2098 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
| 2099 | btrfs_qgroup_rescan_worker, NULL, NULL); | ||
| 2099 | 2100 | ||
| 2100 | if (ret) { | 2101 | if (ret) { |
| 2101 | err: | 2102 | err: |
| @@ -2158,8 +2159,8 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
| 2158 | 2159 | ||
| 2159 | qgroup_rescan_zero_tracking(fs_info); | 2160 | qgroup_rescan_zero_tracking(fs_info); |
| 2160 | 2161 | ||
| 2161 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2162 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 2162 | &fs_info->qgroup_rescan_work); | 2163 | &fs_info->qgroup_rescan_work); |
| 2163 | 2164 | ||
| 2164 | return 0; | 2165 | return 0; |
| 2165 | } | 2166 | } |
| @@ -2190,6 +2191,6 @@ void | |||
| 2190 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | 2191 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) |
| 2191 | { | 2192 | { |
| 2192 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | 2193 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) |
| 2193 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2194 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 2194 | &fs_info->qgroup_rescan_work); | 2195 | &fs_info->qgroup_rescan_work); |
| 2195 | } | 2196 | } |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9af0b25d991a..4055291a523e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
| @@ -1416,20 +1416,18 @@ cleanup: | |||
| 1416 | 1416 | ||
| 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) | 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) |
| 1418 | { | 1418 | { |
| 1419 | rbio->work.flags = 0; | 1419 | btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); |
| 1420 | rbio->work.func = rmw_work; | ||
| 1421 | 1420 | ||
| 1422 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1421 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
| 1423 | &rbio->work); | 1422 | &rbio->work); |
| 1424 | } | 1423 | } |
| 1425 | 1424 | ||
| 1426 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) | 1425 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) |
| 1427 | { | 1426 | { |
| 1428 | rbio->work.flags = 0; | 1427 | btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); |
| 1429 | rbio->work.func = read_rebuild_work; | ||
| 1430 | 1428 | ||
| 1431 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1429 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
| 1432 | &rbio->work); | 1430 | &rbio->work); |
| 1433 | } | 1431 | } |
| 1434 | 1432 | ||
| 1435 | /* | 1433 | /* |
| @@ -1667,10 +1665,9 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
| 1667 | plug = container_of(cb, struct btrfs_plug_cb, cb); | 1665 | plug = container_of(cb, struct btrfs_plug_cb, cb); |
| 1668 | 1666 | ||
| 1669 | if (from_schedule) { | 1667 | if (from_schedule) { |
| 1670 | plug->work.flags = 0; | 1668 | btrfs_init_work(&plug->work, unplug_work, NULL, NULL); |
| 1671 | plug->work.func = unplug_work; | 1669 | btrfs_queue_work(plug->info->rmw_workers, |
| 1672 | btrfs_queue_worker(&plug->info->rmw_workers, | 1670 | &plug->work); |
| 1673 | &plug->work); | ||
| 1674 | return; | 1671 | return; |
| 1675 | } | 1672 | } |
| 1676 | run_plug(plug); | 1673 | run_plug(plug); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 31c797c48c3e..30947f923620 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -793,10 +793,10 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
| 793 | /* FIXME we cannot handle this properly right now */ | 793 | /* FIXME we cannot handle this properly right now */ |
| 794 | BUG(); | 794 | BUG(); |
| 795 | } | 795 | } |
| 796 | rmw->work.func = reada_start_machine_worker; | 796 | btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); |
| 797 | rmw->fs_info = fs_info; | 797 | rmw->fs_info = fs_info; |
| 798 | 798 | ||
| 799 | btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); | 799 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
| 800 | } | 800 | } |
| 801 | 801 | ||
| 802 | #ifdef DEBUG | 802 | #ifdef DEBUG |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 07b3b36f40ee..def428a25b2a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -4248,7 +4248,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", | 4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", |
| 4249 | rc->block_group->key.objectid, rc->block_group->flags); | 4249 | rc->block_group->key.objectid, rc->block_group->flags); |
| 4250 | 4250 | ||
| 4251 | ret = btrfs_start_delalloc_roots(fs_info, 0); | 4251 | ret = btrfs_start_delalloc_roots(fs_info, 0, -1); |
| 4252 | if (ret < 0) { | 4252 | if (ret < 0) { |
| 4253 | err = ret; | 4253 | err = ret; |
| 4254 | goto out; | 4254 | goto out; |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1389b69059de..38bb47e7d6b1 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/err.h> | ||
| 19 | #include <linux/uuid.h> | 20 | #include <linux/uuid.h> |
| 20 | #include "ctree.h" | 21 | #include "ctree.h" |
| 21 | #include "transaction.h" | 22 | #include "transaction.h" |
| @@ -271,7 +272,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 271 | key.offset++; | 272 | key.offset++; |
| 272 | 273 | ||
| 273 | root = btrfs_read_fs_root(tree_root, &root_key); | 274 | root = btrfs_read_fs_root(tree_root, &root_key); |
| 274 | err = PTR_RET(root); | 275 | err = PTR_ERR_OR_ZERO(root); |
| 275 | if (err && err != -ENOENT) { | 276 | if (err && err != -ENOENT) { |
| 276 | break; | 277 | break; |
| 277 | } else if (err == -ENOENT) { | 278 | } else if (err == -ENOENT) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efba5d1282ee..93e6d7172844 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -315,6 +315,16 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | |||
| 315 | atomic_inc(&fs_info->scrubs_running); | 315 | atomic_inc(&fs_info->scrubs_running); |
| 316 | atomic_inc(&fs_info->scrubs_paused); | 316 | atomic_inc(&fs_info->scrubs_paused); |
| 317 | mutex_unlock(&fs_info->scrub_lock); | 317 | mutex_unlock(&fs_info->scrub_lock); |
| 318 | |||
| 319 | /* | ||
| 320 | * check if @scrubs_running=@scrubs_paused condition | ||
| 321 | * inside wait_event() is not an atomic operation. | ||
| 322 | * which means we may inc/dec @scrub_running/paused | ||
| 323 | * at any time. Let's wake up @scrub_pause_wait as | ||
| 324 | * much as we can to let commit transaction blocked less. | ||
| 325 | */ | ||
| 326 | wake_up(&fs_info->scrub_pause_wait); | ||
| 327 | |||
| 318 | atomic_inc(&sctx->workers_pending); | 328 | atomic_inc(&sctx->workers_pending); |
| 319 | } | 329 | } |
| 320 | 330 | ||
| @@ -418,7 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
| 418 | sbio->index = i; | 428 | sbio->index = i; |
| 419 | sbio->sctx = sctx; | 429 | sbio->sctx = sctx; |
| 420 | sbio->page_count = 0; | 430 | sbio->page_count = 0; |
| 421 | sbio->work.func = scrub_bio_end_io_worker; | 431 | btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, |
| 432 | NULL, NULL); | ||
| 422 | 433 | ||
| 423 | if (i != SCRUB_BIOS_PER_SCTX - 1) | 434 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
| 424 | sctx->bios[i]->next_free = i + 1; | 435 | sctx->bios[i]->next_free = i + 1; |
| @@ -987,9 +998,10 @@ nodatasum_case: | |||
| 987 | fixup_nodatasum->root = fs_info->extent_root; | 998 | fixup_nodatasum->root = fs_info->extent_root; |
| 988 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 999 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
| 989 | scrub_pending_trans_workers_inc(sctx); | 1000 | scrub_pending_trans_workers_inc(sctx); |
| 990 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; | 1001 | btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, |
| 991 | btrfs_queue_worker(&fs_info->scrub_workers, | 1002 | NULL, NULL); |
| 992 | &fixup_nodatasum->work); | 1003 | btrfs_queue_work(fs_info->scrub_workers, |
| 1004 | &fixup_nodatasum->work); | ||
| 993 | goto out; | 1005 | goto out; |
| 994 | } | 1006 | } |
| 995 | 1007 | ||
| @@ -1603,8 +1615,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) | |||
| 1603 | sbio->err = err; | 1615 | sbio->err = err; |
| 1604 | sbio->bio = bio; | 1616 | sbio->bio = bio; |
| 1605 | 1617 | ||
| 1606 | sbio->work.func = scrub_wr_bio_end_io_worker; | 1618 | btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); |
| 1607 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | 1619 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); |
| 1608 | } | 1620 | } |
| 1609 | 1621 | ||
| 1610 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | 1622 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) |
| @@ -2072,7 +2084,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
| 2072 | sbio->err = err; | 2084 | sbio->err = err; |
| 2073 | sbio->bio = bio; | 2085 | sbio->bio = bio; |
| 2074 | 2086 | ||
| 2075 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | 2087 | btrfs_queue_work(fs_info->scrub_workers, &sbio->work); |
| 2076 | } | 2088 | } |
| 2077 | 2089 | ||
| 2078 | static void scrub_bio_end_io_worker(struct btrfs_work *work) | 2090 | static void scrub_bio_end_io_worker(struct btrfs_work *work) |
| @@ -2686,10 +2698,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
| 2686 | 2698 | ||
| 2687 | wait_event(sctx->list_wait, | 2699 | wait_event(sctx->list_wait, |
| 2688 | atomic_read(&sctx->bios_in_flight) == 0); | 2700 | atomic_read(&sctx->bios_in_flight) == 0); |
| 2689 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | 2701 | atomic_inc(&fs_info->scrubs_paused); |
| 2702 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2703 | |||
| 2704 | /* | ||
| 2705 | * must be called before we decrease @scrub_paused. | ||
| 2706 | * make sure we don't block transaction commit while | ||
| 2707 | * we are waiting pending workers finished. | ||
| 2708 | */ | ||
| 2690 | wait_event(sctx->list_wait, | 2709 | wait_event(sctx->list_wait, |
| 2691 | atomic_read(&sctx->workers_pending) == 0); | 2710 | atomic_read(&sctx->workers_pending) == 0); |
| 2692 | scrub_blocked_if_needed(fs_info); | 2711 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); |
| 2712 | |||
| 2713 | mutex_lock(&fs_info->scrub_lock); | ||
| 2714 | __scrub_blocked_if_needed(fs_info); | ||
| 2715 | atomic_dec(&fs_info->scrubs_paused); | ||
| 2716 | mutex_unlock(&fs_info->scrub_lock); | ||
| 2717 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2693 | 2718 | ||
| 2694 | btrfs_put_block_group(cache); | 2719 | btrfs_put_block_group(cache); |
| 2695 | if (ret) | 2720 | if (ret) |
| @@ -2757,33 +2782,35 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | |||
| 2757 | int is_dev_replace) | 2782 | int is_dev_replace) |
| 2758 | { | 2783 | { |
| 2759 | int ret = 0; | 2784 | int ret = 0; |
| 2785 | int flags = WQ_FREEZABLE | WQ_UNBOUND; | ||
| 2786 | int max_active = fs_info->thread_pool_size; | ||
| 2760 | 2787 | ||
| 2761 | if (fs_info->scrub_workers_refcnt == 0) { | 2788 | if (fs_info->scrub_workers_refcnt == 0) { |
| 2762 | if (is_dev_replace) | 2789 | if (is_dev_replace) |
| 2763 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, | 2790 | fs_info->scrub_workers = |
| 2764 | &fs_info->generic_worker); | 2791 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
| 2792 | 1, 4); | ||
| 2765 | else | 2793 | else |
| 2766 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2794 | fs_info->scrub_workers = |
| 2767 | fs_info->thread_pool_size, | 2795 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
| 2768 | &fs_info->generic_worker); | 2796 | max_active, 4); |
| 2769 | fs_info->scrub_workers.idle_thresh = 4; | 2797 | if (!fs_info->scrub_workers) { |
| 2770 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2798 | ret = -ENOMEM; |
| 2771 | if (ret) | ||
| 2772 | goto out; | 2799 | goto out; |
| 2773 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | 2800 | } |
| 2774 | "scrubwrc", | 2801 | fs_info->scrub_wr_completion_workers = |
| 2775 | fs_info->thread_pool_size, | 2802 | btrfs_alloc_workqueue("btrfs-scrubwrc", flags, |
| 2776 | &fs_info->generic_worker); | 2803 | max_active, 2); |
| 2777 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | 2804 | if (!fs_info->scrub_wr_completion_workers) { |
| 2778 | ret = btrfs_start_workers( | 2805 | ret = -ENOMEM; |
| 2779 | &fs_info->scrub_wr_completion_workers); | ||
| 2780 | if (ret) | ||
| 2781 | goto out; | 2806 | goto out; |
| 2782 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | 2807 | } |
| 2783 | &fs_info->generic_worker); | 2808 | fs_info->scrub_nocow_workers = |
| 2784 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | 2809 | btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); |
| 2785 | if (ret) | 2810 | if (!fs_info->scrub_nocow_workers) { |
| 2811 | ret = -ENOMEM; | ||
| 2786 | goto out; | 2812 | goto out; |
| 2813 | } | ||
| 2787 | } | 2814 | } |
| 2788 | ++fs_info->scrub_workers_refcnt; | 2815 | ++fs_info->scrub_workers_refcnt; |
| 2789 | out: | 2816 | out: |
| @@ -2793,9 +2820,9 @@ out: | |||
| 2793 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) | 2820 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
| 2794 | { | 2821 | { |
| 2795 | if (--fs_info->scrub_workers_refcnt == 0) { | 2822 | if (--fs_info->scrub_workers_refcnt == 0) { |
| 2796 | btrfs_stop_workers(&fs_info->scrub_workers); | 2823 | btrfs_destroy_workqueue(fs_info->scrub_workers); |
| 2797 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | 2824 | btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); |
| 2798 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | 2825 | btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); |
| 2799 | } | 2826 | } |
| 2800 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2827 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
| 2801 | } | 2828 | } |
| @@ -3106,10 +3133,10 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
| 3106 | nocow_ctx->len = len; | 3133 | nocow_ctx->len = len; |
| 3107 | nocow_ctx->mirror_num = mirror_num; | 3134 | nocow_ctx->mirror_num = mirror_num; |
| 3108 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | 3135 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; |
| 3109 | nocow_ctx->work.func = copy_nocow_pages_worker; | 3136 | btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); |
| 3110 | INIT_LIST_HEAD(&nocow_ctx->inodes); | 3137 | INIT_LIST_HEAD(&nocow_ctx->inodes); |
| 3111 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | 3138 | btrfs_queue_work(fs_info->scrub_nocow_workers, |
| 3112 | &nocow_ctx->work); | 3139 | &nocow_ctx->work); |
| 3113 | 3140 | ||
| 3114 | return 0; | 3141 | return 0; |
| 3115 | } | 3142 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9dde9717c1b9..9b6da9d55f9a 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -51,15 +51,18 @@ struct fs_path { | |||
| 51 | struct { | 51 | struct { |
| 52 | char *start; | 52 | char *start; |
| 53 | char *end; | 53 | char *end; |
| 54 | char *prepared; | ||
| 55 | 54 | ||
| 56 | char *buf; | 55 | char *buf; |
| 57 | int buf_len; | 56 | unsigned short buf_len:15; |
| 58 | unsigned int reversed:1; | 57 | unsigned short reversed:1; |
| 59 | unsigned int virtual_mem:1; | ||
| 60 | char inline_buf[]; | 58 | char inline_buf[]; |
| 61 | }; | 59 | }; |
| 62 | char pad[PAGE_SIZE]; | 60 | /* |
| 61 | * Average path length does not exceed 200 bytes, we'll have | ||
| 62 | * better packing in the slab and higher chance to satisfy | ||
| 63 | * a allocation later during send. | ||
| 64 | */ | ||
| 65 | char pad[256]; | ||
| 63 | }; | 66 | }; |
| 64 | }; | 67 | }; |
| 65 | #define FS_PATH_INLINE_SIZE \ | 68 | #define FS_PATH_INLINE_SIZE \ |
| @@ -109,6 +112,7 @@ struct send_ctx { | |||
| 109 | int cur_inode_deleted; | 112 | int cur_inode_deleted; |
| 110 | u64 cur_inode_size; | 113 | u64 cur_inode_size; |
| 111 | u64 cur_inode_mode; | 114 | u64 cur_inode_mode; |
| 115 | u64 cur_inode_rdev; | ||
| 112 | u64 cur_inode_last_extent; | 116 | u64 cur_inode_last_extent; |
| 113 | 117 | ||
| 114 | u64 send_progress; | 118 | u64 send_progress; |
| @@ -120,6 +124,8 @@ struct send_ctx { | |||
| 120 | struct list_head name_cache_list; | 124 | struct list_head name_cache_list; |
| 121 | int name_cache_size; | 125 | int name_cache_size; |
| 122 | 126 | ||
| 127 | struct file_ra_state ra; | ||
| 128 | |||
| 123 | char *read_buf; | 129 | char *read_buf; |
| 124 | 130 | ||
| 125 | /* | 131 | /* |
| @@ -175,6 +181,47 @@ struct send_ctx { | |||
| 175 | * own move/rename can be performed. | 181 | * own move/rename can be performed. |
| 176 | */ | 182 | */ |
| 177 | struct rb_root waiting_dir_moves; | 183 | struct rb_root waiting_dir_moves; |
| 184 | |||
| 185 | /* | ||
| 186 | * A directory that is going to be rm'ed might have a child directory | ||
| 187 | * which is in the pending directory moves index above. In this case, | ||
| 188 | * the directory can only be removed after the move/rename of its child | ||
| 189 | * is performed. Example: | ||
| 190 | * | ||
| 191 | * Parent snapshot: | ||
| 192 | * | ||
| 193 | * . (ino 256) | ||
| 194 | * |-- a/ (ino 257) | ||
| 195 | * |-- b/ (ino 258) | ||
| 196 | * |-- c/ (ino 259) | ||
| 197 | * | |-- x/ (ino 260) | ||
| 198 | * | | ||
| 199 | * |-- y/ (ino 261) | ||
| 200 | * | ||
| 201 | * Send snapshot: | ||
| 202 | * | ||
| 203 | * . (ino 256) | ||
| 204 | * |-- a/ (ino 257) | ||
| 205 | * |-- b/ (ino 258) | ||
| 206 | * |-- YY/ (ino 261) | ||
| 207 | * |-- x/ (ino 260) | ||
| 208 | * | ||
| 209 | * Sequence of steps that lead to the send snapshot: | ||
| 210 | * rm -f /a/b/c/foo.txt | ||
| 211 | * mv /a/b/y /a/b/YY | ||
| 212 | * mv /a/b/c/x /a/b/YY | ||
| 213 | * rmdir /a/b/c | ||
| 214 | * | ||
| 215 | * When the child is processed, its move/rename is delayed until its | ||
| 216 | * parent is processed (as explained above), but all other operations | ||
| 217 | * like update utimes, chown, chgrp, etc, are performed and the paths | ||
| 218 | * that it uses for those operations must use the orphanized name of | ||
| 219 | * its parent (the directory we're going to rm later), so we need to | ||
| 220 | * memorize that name. | ||
| 221 | * | ||
| 222 | * Indexed by the inode number of the directory to be deleted. | ||
| 223 | */ | ||
| 224 | struct rb_root orphan_dirs; | ||
| 178 | }; | 225 | }; |
| 179 | 226 | ||
| 180 | struct pending_dir_move { | 227 | struct pending_dir_move { |
| @@ -189,6 +236,18 @@ struct pending_dir_move { | |||
| 189 | struct waiting_dir_move { | 236 | struct waiting_dir_move { |
| 190 | struct rb_node node; | 237 | struct rb_node node; |
| 191 | u64 ino; | 238 | u64 ino; |
| 239 | /* | ||
| 240 | * There might be some directory that could not be removed because it | ||
| 241 | * was waiting for this directory inode to be moved first. Therefore | ||
| 242 | * after this directory is moved, we can try to rmdir the ino rmdir_ino. | ||
| 243 | */ | ||
| 244 | u64 rmdir_ino; | ||
| 245 | }; | ||
| 246 | |||
| 247 | struct orphan_dir_info { | ||
| 248 | struct rb_node node; | ||
| 249 | u64 ino; | ||
| 250 | u64 gen; | ||
| 192 | }; | 251 | }; |
| 193 | 252 | ||
| 194 | struct name_cache_entry { | 253 | struct name_cache_entry { |
| @@ -214,6 +273,11 @@ struct name_cache_entry { | |||
| 214 | 273 | ||
| 215 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); | 274 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); |
| 216 | 275 | ||
| 276 | static struct waiting_dir_move * | ||
| 277 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino); | ||
| 278 | |||
| 279 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); | ||
| 280 | |||
| 217 | static int need_send_hole(struct send_ctx *sctx) | 281 | static int need_send_hole(struct send_ctx *sctx) |
| 218 | { | 282 | { |
| 219 | return (sctx->parent_root && !sctx->cur_inode_new && | 283 | return (sctx->parent_root && !sctx->cur_inode_new && |
| @@ -242,7 +306,6 @@ static struct fs_path *fs_path_alloc(void) | |||
| 242 | if (!p) | 306 | if (!p) |
| 243 | return NULL; | 307 | return NULL; |
| 244 | p->reversed = 0; | 308 | p->reversed = 0; |
| 245 | p->virtual_mem = 0; | ||
| 246 | p->buf = p->inline_buf; | 309 | p->buf = p->inline_buf; |
| 247 | p->buf_len = FS_PATH_INLINE_SIZE; | 310 | p->buf_len = FS_PATH_INLINE_SIZE; |
| 248 | fs_path_reset(p); | 311 | fs_path_reset(p); |
| @@ -265,12 +328,8 @@ static void fs_path_free(struct fs_path *p) | |||
| 265 | { | 328 | { |
| 266 | if (!p) | 329 | if (!p) |
| 267 | return; | 330 | return; |
| 268 | if (p->buf != p->inline_buf) { | 331 | if (p->buf != p->inline_buf) |
| 269 | if (p->virtual_mem) | 332 | kfree(p->buf); |
| 270 | vfree(p->buf); | ||
| 271 | else | ||
| 272 | kfree(p->buf); | ||
| 273 | } | ||
| 274 | kfree(p); | 333 | kfree(p); |
| 275 | } | 334 | } |
| 276 | 335 | ||
| @@ -292,40 +351,23 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
| 292 | 351 | ||
| 293 | path_len = p->end - p->start; | 352 | path_len = p->end - p->start; |
| 294 | old_buf_len = p->buf_len; | 353 | old_buf_len = p->buf_len; |
| 295 | len = PAGE_ALIGN(len); | 354 | |
| 296 | 355 | /* | |
| 297 | if (p->buf == p->inline_buf) { | 356 | * First time the inline_buf does not suffice |
| 298 | tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN); | 357 | */ |
| 299 | if (!tmp_buf) { | 358 | if (p->buf == p->inline_buf) |
| 300 | tmp_buf = vmalloc(len); | 359 | tmp_buf = kmalloc(len, GFP_NOFS); |
| 301 | if (!tmp_buf) | 360 | else |
| 302 | return -ENOMEM; | 361 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); |
| 303 | p->virtual_mem = 1; | 362 | if (!tmp_buf) |
| 304 | } | 363 | return -ENOMEM; |
| 305 | memcpy(tmp_buf, p->buf, p->buf_len); | 364 | p->buf = tmp_buf; |
| 306 | p->buf = tmp_buf; | 365 | /* |
| 307 | p->buf_len = len; | 366 | * The real size of the buffer is bigger, this will let the fast path |
| 308 | } else { | 367 | * happen most of the time |
| 309 | if (p->virtual_mem) { | 368 | */ |
| 310 | tmp_buf = vmalloc(len); | 369 | p->buf_len = ksize(p->buf); |
| 311 | if (!tmp_buf) | 370 | |
| 312 | return -ENOMEM; | ||
| 313 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
| 314 | vfree(p->buf); | ||
| 315 | } else { | ||
| 316 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | ||
| 317 | if (!tmp_buf) { | ||
| 318 | tmp_buf = vmalloc(len); | ||
| 319 | if (!tmp_buf) | ||
| 320 | return -ENOMEM; | ||
| 321 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
| 322 | kfree(p->buf); | ||
| 323 | p->virtual_mem = 1; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | p->buf = tmp_buf; | ||
| 327 | p->buf_len = len; | ||
| 328 | } | ||
| 329 | if (p->reversed) { | 371 | if (p->reversed) { |
| 330 | tmp_buf = p->buf + old_buf_len - path_len - 1; | 372 | tmp_buf = p->buf + old_buf_len - path_len - 1; |
| 331 | p->end = p->buf + p->buf_len - 1; | 373 | p->end = p->buf + p->buf_len - 1; |
| @@ -338,7 +380,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
| 338 | return 0; | 380 | return 0; |
| 339 | } | 381 | } |
| 340 | 382 | ||
| 341 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | 383 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len, |
| 384 | char **prepared) | ||
| 342 | { | 385 | { |
| 343 | int ret; | 386 | int ret; |
| 344 | int new_len; | 387 | int new_len; |
| @@ -354,11 +397,11 @@ static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | |||
| 354 | if (p->start != p->end) | 397 | if (p->start != p->end) |
| 355 | *--p->start = '/'; | 398 | *--p->start = '/'; |
| 356 | p->start -= name_len; | 399 | p->start -= name_len; |
| 357 | p->prepared = p->start; | 400 | *prepared = p->start; |
| 358 | } else { | 401 | } else { |
| 359 | if (p->start != p->end) | 402 | if (p->start != p->end) |
| 360 | *p->end++ = '/'; | 403 | *p->end++ = '/'; |
| 361 | p->prepared = p->end; | 404 | *prepared = p->end; |
| 362 | p->end += name_len; | 405 | p->end += name_len; |
| 363 | *p->end = 0; | 406 | *p->end = 0; |
| 364 | } | 407 | } |
| @@ -370,12 +413,12 @@ out: | |||
| 370 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) | 413 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) |
| 371 | { | 414 | { |
| 372 | int ret; | 415 | int ret; |
| 416 | char *prepared; | ||
| 373 | 417 | ||
| 374 | ret = fs_path_prepare_for_add(p, name_len); | 418 | ret = fs_path_prepare_for_add(p, name_len, &prepared); |
| 375 | if (ret < 0) | 419 | if (ret < 0) |
| 376 | goto out; | 420 | goto out; |
| 377 | memcpy(p->prepared, name, name_len); | 421 | memcpy(prepared, name, name_len); |
| 378 | p->prepared = NULL; | ||
| 379 | 422 | ||
| 380 | out: | 423 | out: |
| 381 | return ret; | 424 | return ret; |
| @@ -384,12 +427,12 @@ out: | |||
| 384 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) | 427 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) |
| 385 | { | 428 | { |
| 386 | int ret; | 429 | int ret; |
| 430 | char *prepared; | ||
| 387 | 431 | ||
| 388 | ret = fs_path_prepare_for_add(p, p2->end - p2->start); | 432 | ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); |
| 389 | if (ret < 0) | 433 | if (ret < 0) |
| 390 | goto out; | 434 | goto out; |
| 391 | memcpy(p->prepared, p2->start, p2->end - p2->start); | 435 | memcpy(prepared, p2->start, p2->end - p2->start); |
| 392 | p->prepared = NULL; | ||
| 393 | 436 | ||
| 394 | out: | 437 | out: |
| 395 | return ret; | 438 | return ret; |
| @@ -400,13 +443,13 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p, | |||
| 400 | unsigned long off, int len) | 443 | unsigned long off, int len) |
| 401 | { | 444 | { |
| 402 | int ret; | 445 | int ret; |
| 446 | char *prepared; | ||
| 403 | 447 | ||
| 404 | ret = fs_path_prepare_for_add(p, len); | 448 | ret = fs_path_prepare_for_add(p, len, &prepared); |
| 405 | if (ret < 0) | 449 | if (ret < 0) |
| 406 | goto out; | 450 | goto out; |
| 407 | 451 | ||
| 408 | read_extent_buffer(eb, p->prepared, off, len); | 452 | read_extent_buffer(eb, prepared, off, len); |
| 409 | p->prepared = NULL; | ||
| 410 | 453 | ||
| 411 | out: | 454 | out: |
| 412 | return ret; | 455 | return ret; |
| @@ -915,9 +958,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 915 | struct btrfs_dir_item *di; | 958 | struct btrfs_dir_item *di; |
| 916 | struct btrfs_key di_key; | 959 | struct btrfs_key di_key; |
| 917 | char *buf = NULL; | 960 | char *buf = NULL; |
| 918 | char *buf2 = NULL; | 961 | const int buf_len = PATH_MAX; |
| 919 | int buf_len; | ||
| 920 | int buf_virtual = 0; | ||
| 921 | u32 name_len; | 962 | u32 name_len; |
| 922 | u32 data_len; | 963 | u32 data_len; |
| 923 | u32 cur; | 964 | u32 cur; |
| @@ -927,7 +968,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 927 | int num; | 968 | int num; |
| 928 | u8 type; | 969 | u8 type; |
| 929 | 970 | ||
| 930 | buf_len = PAGE_SIZE; | ||
| 931 | buf = kmalloc(buf_len, GFP_NOFS); | 971 | buf = kmalloc(buf_len, GFP_NOFS); |
| 932 | if (!buf) { | 972 | if (!buf) { |
| 933 | ret = -ENOMEM; | 973 | ret = -ENOMEM; |
| @@ -949,30 +989,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 949 | type = btrfs_dir_type(eb, di); | 989 | type = btrfs_dir_type(eb, di); |
| 950 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | 990 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); |
| 951 | 991 | ||
| 992 | /* | ||
| 993 | * Path too long | ||
| 994 | */ | ||
| 952 | if (name_len + data_len > buf_len) { | 995 | if (name_len + data_len > buf_len) { |
| 953 | buf_len = PAGE_ALIGN(name_len + data_len); | 996 | ret = -ENAMETOOLONG; |
| 954 | if (buf_virtual) { | 997 | goto out; |
| 955 | buf2 = vmalloc(buf_len); | ||
| 956 | if (!buf2) { | ||
| 957 | ret = -ENOMEM; | ||
| 958 | goto out; | ||
| 959 | } | ||
| 960 | vfree(buf); | ||
| 961 | } else { | ||
| 962 | buf2 = krealloc(buf, buf_len, GFP_NOFS); | ||
| 963 | if (!buf2) { | ||
| 964 | buf2 = vmalloc(buf_len); | ||
| 965 | if (!buf2) { | ||
| 966 | ret = -ENOMEM; | ||
| 967 | goto out; | ||
| 968 | } | ||
| 969 | kfree(buf); | ||
| 970 | buf_virtual = 1; | ||
| 971 | } | ||
| 972 | } | ||
| 973 | |||
| 974 | buf = buf2; | ||
| 975 | buf2 = NULL; | ||
| 976 | } | 998 | } |
| 977 | 999 | ||
| 978 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1000 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
| @@ -995,10 +1017,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 995 | } | 1017 | } |
| 996 | 1018 | ||
| 997 | out: | 1019 | out: |
| 998 | if (buf_virtual) | 1020 | kfree(buf); |
| 999 | vfree(buf); | ||
| 1000 | else | ||
| 1001 | kfree(buf); | ||
| 1002 | return ret; | 1021 | return ret; |
| 1003 | } | 1022 | } |
| 1004 | 1023 | ||
| @@ -1292,8 +1311,6 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1292 | extent_item_pos = logical - found_key.objectid; | 1311 | extent_item_pos = logical - found_key.objectid; |
| 1293 | else | 1312 | else |
| 1294 | extent_item_pos = 0; | 1313 | extent_item_pos = 0; |
| 1295 | |||
| 1296 | extent_item_pos = logical - found_key.objectid; | ||
| 1297 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1314 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
| 1298 | found_key.objectid, extent_item_pos, 1, | 1315 | found_key.objectid, extent_item_pos, 1, |
| 1299 | __iterate_backrefs, backref_ctx); | 1316 | __iterate_backrefs, backref_ctx); |
| @@ -1418,11 +1435,7 @@ static int gen_unique_name(struct send_ctx *sctx, | |||
| 1418 | while (1) { | 1435 | while (1) { |
| 1419 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", | 1436 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", |
| 1420 | ino, gen, idx); | 1437 | ino, gen, idx); |
| 1421 | if (len >= sizeof(tmp)) { | 1438 | ASSERT(len < sizeof(tmp)); |
| 1422 | /* should really not happen */ | ||
| 1423 | ret = -EOVERFLOW; | ||
| 1424 | goto out; | ||
| 1425 | } | ||
| 1426 | 1439 | ||
| 1427 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, | 1440 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, |
| 1428 | path, BTRFS_FIRST_FREE_OBJECTID, | 1441 | path, BTRFS_FIRST_FREE_OBJECTID, |
| @@ -1898,13 +1911,20 @@ static void name_cache_delete(struct send_ctx *sctx, | |||
| 1898 | 1911 | ||
| 1899 | nce_head = radix_tree_lookup(&sctx->name_cache, | 1912 | nce_head = radix_tree_lookup(&sctx->name_cache, |
| 1900 | (unsigned long)nce->ino); | 1913 | (unsigned long)nce->ino); |
| 1901 | BUG_ON(!nce_head); | 1914 | if (!nce_head) { |
| 1915 | btrfs_err(sctx->send_root->fs_info, | ||
| 1916 | "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", | ||
| 1917 | nce->ino, sctx->name_cache_size); | ||
| 1918 | } | ||
| 1902 | 1919 | ||
| 1903 | list_del(&nce->radix_list); | 1920 | list_del(&nce->radix_list); |
| 1904 | list_del(&nce->list); | 1921 | list_del(&nce->list); |
| 1905 | sctx->name_cache_size--; | 1922 | sctx->name_cache_size--; |
| 1906 | 1923 | ||
| 1907 | if (list_empty(nce_head)) { | 1924 | /* |
| 1925 | * We may not get to the final release of nce_head if the lookup fails | ||
| 1926 | */ | ||
| 1927 | if (nce_head && list_empty(nce_head)) { | ||
| 1908 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | 1928 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); |
| 1909 | kfree(nce_head); | 1929 | kfree(nce_head); |
| 1910 | } | 1930 | } |
| @@ -1977,7 +1997,6 @@ static void name_cache_free(struct send_ctx *sctx) | |||
| 1977 | */ | 1997 | */ |
| 1978 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 1998 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
| 1979 | u64 ino, u64 gen, | 1999 | u64 ino, u64 gen, |
| 1980 | int skip_name_cache, | ||
| 1981 | u64 *parent_ino, | 2000 | u64 *parent_ino, |
| 1982 | u64 *parent_gen, | 2001 | u64 *parent_gen, |
| 1983 | struct fs_path *dest) | 2002 | struct fs_path *dest) |
| @@ -1987,8 +2006,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1987 | struct btrfs_path *path = NULL; | 2006 | struct btrfs_path *path = NULL; |
| 1988 | struct name_cache_entry *nce = NULL; | 2007 | struct name_cache_entry *nce = NULL; |
| 1989 | 2008 | ||
| 1990 | if (skip_name_cache) | ||
| 1991 | goto get_ref; | ||
| 1992 | /* | 2009 | /* |
| 1993 | * First check if we already did a call to this function with the same | 2010 | * First check if we already did a call to this function with the same |
| 1994 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | 2011 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes |
| @@ -2033,12 +2050,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 2033 | goto out_cache; | 2050 | goto out_cache; |
| 2034 | } | 2051 | } |
| 2035 | 2052 | ||
| 2036 | get_ref: | ||
| 2037 | /* | 2053 | /* |
| 2038 | * Depending on whether the inode was already processed or not, use | 2054 | * Depending on whether the inode was already processed or not, use |
| 2039 | * send_root or parent_root for ref lookup. | 2055 | * send_root or parent_root for ref lookup. |
| 2040 | */ | 2056 | */ |
| 2041 | if (ino < sctx->send_progress && !skip_name_cache) | 2057 | if (ino < sctx->send_progress) |
| 2042 | ret = get_first_ref(sctx->send_root, ino, | 2058 | ret = get_first_ref(sctx->send_root, ino, |
| 2043 | parent_ino, parent_gen, dest); | 2059 | parent_ino, parent_gen, dest); |
| 2044 | else | 2060 | else |
| @@ -2062,8 +2078,6 @@ get_ref: | |||
| 2062 | goto out; | 2078 | goto out; |
| 2063 | ret = 1; | 2079 | ret = 1; |
| 2064 | } | 2080 | } |
| 2065 | if (skip_name_cache) | ||
| 2066 | goto out; | ||
| 2067 | 2081 | ||
| 2068 | out_cache: | 2082 | out_cache: |
| 2069 | /* | 2083 | /* |
| @@ -2131,9 +2145,6 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2131 | u64 parent_inode = 0; | 2145 | u64 parent_inode = 0; |
| 2132 | u64 parent_gen = 0; | 2146 | u64 parent_gen = 0; |
| 2133 | int stop = 0; | 2147 | int stop = 0; |
| 2134 | u64 start_ino = ino; | ||
| 2135 | u64 start_gen = gen; | ||
| 2136 | int skip_name_cache = 0; | ||
| 2137 | 2148 | ||
| 2138 | name = fs_path_alloc(); | 2149 | name = fs_path_alloc(); |
| 2139 | if (!name) { | 2150 | if (!name) { |
| @@ -2141,31 +2152,33 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2141 | goto out; | 2152 | goto out; |
| 2142 | } | 2153 | } |
| 2143 | 2154 | ||
| 2144 | if (is_waiting_for_move(sctx, ino)) | ||
| 2145 | skip_name_cache = 1; | ||
| 2146 | |||
| 2147 | again: | ||
| 2148 | dest->reversed = 1; | 2155 | dest->reversed = 1; |
| 2149 | fs_path_reset(dest); | 2156 | fs_path_reset(dest); |
| 2150 | 2157 | ||
| 2151 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { | 2158 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { |
| 2152 | fs_path_reset(name); | 2159 | fs_path_reset(name); |
| 2153 | 2160 | ||
| 2154 | ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, | 2161 | if (is_waiting_for_rm(sctx, ino)) { |
| 2155 | &parent_inode, &parent_gen, name); | 2162 | ret = gen_unique_name(sctx, ino, gen, name); |
| 2163 | if (ret < 0) | ||
| 2164 | goto out; | ||
| 2165 | ret = fs_path_add_path(dest, name); | ||
| 2166 | break; | ||
| 2167 | } | ||
| 2168 | |||
| 2169 | if (is_waiting_for_move(sctx, ino)) { | ||
| 2170 | ret = get_first_ref(sctx->parent_root, ino, | ||
| 2171 | &parent_inode, &parent_gen, name); | ||
| 2172 | } else { | ||
| 2173 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
| 2174 | &parent_inode, | ||
| 2175 | &parent_gen, name); | ||
| 2176 | if (ret) | ||
| 2177 | stop = 1; | ||
| 2178 | } | ||
| 2179 | |||
| 2156 | if (ret < 0) | 2180 | if (ret < 0) |
| 2157 | goto out; | 2181 | goto out; |
| 2158 | if (ret) | ||
| 2159 | stop = 1; | ||
| 2160 | |||
| 2161 | if (!skip_name_cache && | ||
| 2162 | is_waiting_for_move(sctx, parent_inode)) { | ||
| 2163 | ino = start_ino; | ||
| 2164 | gen = start_gen; | ||
| 2165 | stop = 0; | ||
| 2166 | skip_name_cache = 1; | ||
| 2167 | goto again; | ||
| 2168 | } | ||
| 2169 | 2182 | ||
| 2170 | ret = fs_path_add_path(dest, name); | 2183 | ret = fs_path_add_path(dest, name); |
| 2171 | if (ret < 0) | 2184 | if (ret < 0) |
| @@ -2429,10 +2442,16 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
| 2429 | if (!p) | 2442 | if (!p) |
| 2430 | return -ENOMEM; | 2443 | return -ENOMEM; |
| 2431 | 2444 | ||
| 2432 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, | 2445 | if (ino != sctx->cur_ino) { |
| 2433 | NULL, &rdev); | 2446 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, |
| 2434 | if (ret < 0) | 2447 | NULL, NULL, &rdev); |
| 2435 | goto out; | 2448 | if (ret < 0) |
| 2449 | goto out; | ||
| 2450 | } else { | ||
| 2451 | gen = sctx->cur_inode_gen; | ||
| 2452 | mode = sctx->cur_inode_mode; | ||
| 2453 | rdev = sctx->cur_inode_rdev; | ||
| 2454 | } | ||
| 2436 | 2455 | ||
| 2437 | if (S_ISREG(mode)) { | 2456 | if (S_ISREG(mode)) { |
| 2438 | cmd = BTRFS_SEND_C_MKFILE; | 2457 | cmd = BTRFS_SEND_C_MKFILE; |
| @@ -2512,17 +2531,26 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
| 2512 | key.objectid = dir; | 2531 | key.objectid = dir; |
| 2513 | key.type = BTRFS_DIR_INDEX_KEY; | 2532 | key.type = BTRFS_DIR_INDEX_KEY; |
| 2514 | key.offset = 0; | 2533 | key.offset = 0; |
| 2534 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | ||
| 2535 | if (ret < 0) | ||
| 2536 | goto out; | ||
| 2537 | |||
| 2515 | while (1) { | 2538 | while (1) { |
| 2516 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | 2539 | eb = path->nodes[0]; |
| 2517 | 1, 0); | 2540 | slot = path->slots[0]; |
| 2518 | if (ret < 0) | 2541 | if (slot >= btrfs_header_nritems(eb)) { |
| 2519 | goto out; | 2542 | ret = btrfs_next_leaf(sctx->send_root, path); |
| 2520 | if (!ret) { | 2543 | if (ret < 0) { |
| 2521 | eb = path->nodes[0]; | 2544 | goto out; |
| 2522 | slot = path->slots[0]; | 2545 | } else if (ret > 0) { |
| 2523 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 2546 | ret = 0; |
| 2547 | break; | ||
| 2548 | } | ||
| 2549 | continue; | ||
| 2524 | } | 2550 | } |
| 2525 | if (ret || found_key.objectid != key.objectid || | 2551 | |
| 2552 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 2553 | if (found_key.objectid != key.objectid || | ||
| 2526 | found_key.type != key.type) { | 2554 | found_key.type != key.type) { |
| 2527 | ret = 0; | 2555 | ret = 0; |
| 2528 | goto out; | 2556 | goto out; |
| @@ -2537,8 +2565,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
| 2537 | goto out; | 2565 | goto out; |
| 2538 | } | 2566 | } |
| 2539 | 2567 | ||
| 2540 | key.offset = found_key.offset + 1; | 2568 | path->slots[0]++; |
| 2541 | btrfs_release_path(path); | ||
| 2542 | } | 2569 | } |
| 2543 | 2570 | ||
| 2544 | out: | 2571 | out: |
| @@ -2590,7 +2617,7 @@ struct recorded_ref { | |||
| 2590 | * everything mixed. So we first record all refs and later process them. | 2617 | * everything mixed. So we first record all refs and later process them. |
| 2591 | * This function is a helper to record one ref. | 2618 | * This function is a helper to record one ref. |
| 2592 | */ | 2619 | */ |
| 2593 | static int record_ref(struct list_head *head, u64 dir, | 2620 | static int __record_ref(struct list_head *head, u64 dir, |
| 2594 | u64 dir_gen, struct fs_path *path) | 2621 | u64 dir_gen, struct fs_path *path) |
| 2595 | { | 2622 | { |
| 2596 | struct recorded_ref *ref; | 2623 | struct recorded_ref *ref; |
| @@ -2676,12 +2703,78 @@ out: | |||
| 2676 | return ret; | 2703 | return ret; |
| 2677 | } | 2704 | } |
| 2678 | 2705 | ||
| 2706 | static struct orphan_dir_info * | ||
| 2707 | add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
| 2708 | { | ||
| 2709 | struct rb_node **p = &sctx->orphan_dirs.rb_node; | ||
| 2710 | struct rb_node *parent = NULL; | ||
| 2711 | struct orphan_dir_info *entry, *odi; | ||
| 2712 | |||
| 2713 | odi = kmalloc(sizeof(*odi), GFP_NOFS); | ||
| 2714 | if (!odi) | ||
| 2715 | return ERR_PTR(-ENOMEM); | ||
| 2716 | odi->ino = dir_ino; | ||
| 2717 | odi->gen = 0; | ||
| 2718 | |||
| 2719 | while (*p) { | ||
| 2720 | parent = *p; | ||
| 2721 | entry = rb_entry(parent, struct orphan_dir_info, node); | ||
| 2722 | if (dir_ino < entry->ino) { | ||
| 2723 | p = &(*p)->rb_left; | ||
| 2724 | } else if (dir_ino > entry->ino) { | ||
| 2725 | p = &(*p)->rb_right; | ||
| 2726 | } else { | ||
| 2727 | kfree(odi); | ||
| 2728 | return entry; | ||
| 2729 | } | ||
| 2730 | } | ||
| 2731 | |||
| 2732 | rb_link_node(&odi->node, parent, p); | ||
| 2733 | rb_insert_color(&odi->node, &sctx->orphan_dirs); | ||
| 2734 | return odi; | ||
| 2735 | } | ||
| 2736 | |||
| 2737 | static struct orphan_dir_info * | ||
| 2738 | get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
| 2739 | { | ||
| 2740 | struct rb_node *n = sctx->orphan_dirs.rb_node; | ||
| 2741 | struct orphan_dir_info *entry; | ||
| 2742 | |||
| 2743 | while (n) { | ||
| 2744 | entry = rb_entry(n, struct orphan_dir_info, node); | ||
| 2745 | if (dir_ino < entry->ino) | ||
| 2746 | n = n->rb_left; | ||
| 2747 | else if (dir_ino > entry->ino) | ||
| 2748 | n = n->rb_right; | ||
| 2749 | else | ||
| 2750 | return entry; | ||
| 2751 | } | ||
| 2752 | return NULL; | ||
| 2753 | } | ||
| 2754 | |||
| 2755 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) | ||
| 2756 | { | ||
| 2757 | struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); | ||
| 2758 | |||
| 2759 | return odi != NULL; | ||
| 2760 | } | ||
| 2761 | |||
| 2762 | static void free_orphan_dir_info(struct send_ctx *sctx, | ||
| 2763 | struct orphan_dir_info *odi) | ||
| 2764 | { | ||
| 2765 | if (!odi) | ||
| 2766 | return; | ||
| 2767 | rb_erase(&odi->node, &sctx->orphan_dirs); | ||
| 2768 | kfree(odi); | ||
| 2769 | } | ||
| 2770 | |||
| 2679 | /* | 2771 | /* |
| 2680 | * Returns 1 if a directory can be removed at this point in time. | 2772 | * Returns 1 if a directory can be removed at this point in time. |
| 2681 | * We check this by iterating all dir items and checking if the inode behind | 2773 | * We check this by iterating all dir items and checking if the inode behind |
| 2682 | * the dir item was already processed. | 2774 | * the dir item was already processed. |
| 2683 | */ | 2775 | */ |
| 2684 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | 2776 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
| 2777 | u64 send_progress) | ||
| 2685 | { | 2778 | { |
| 2686 | int ret = 0; | 2779 | int ret = 0; |
| 2687 | struct btrfs_root *root = sctx->parent_root; | 2780 | struct btrfs_root *root = sctx->parent_root; |
| @@ -2704,31 +2797,52 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
| 2704 | key.objectid = dir; | 2797 | key.objectid = dir; |
| 2705 | key.type = BTRFS_DIR_INDEX_KEY; | 2798 | key.type = BTRFS_DIR_INDEX_KEY; |
| 2706 | key.offset = 0; | 2799 | key.offset = 0; |
| 2800 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 2801 | if (ret < 0) | ||
| 2802 | goto out; | ||
| 2707 | 2803 | ||
| 2708 | while (1) { | 2804 | while (1) { |
| 2709 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 2805 | struct waiting_dir_move *dm; |
| 2710 | if (ret < 0) | 2806 | |
| 2711 | goto out; | 2807 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { |
| 2712 | if (!ret) { | 2808 | ret = btrfs_next_leaf(root, path); |
| 2713 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 2809 | if (ret < 0) |
| 2714 | path->slots[0]); | 2810 | goto out; |
| 2811 | else if (ret > 0) | ||
| 2812 | break; | ||
| 2813 | continue; | ||
| 2715 | } | 2814 | } |
| 2716 | if (ret || found_key.objectid != key.objectid || | 2815 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
| 2717 | found_key.type != key.type) { | 2816 | path->slots[0]); |
| 2817 | if (found_key.objectid != key.objectid || | ||
| 2818 | found_key.type != key.type) | ||
| 2718 | break; | 2819 | break; |
| 2719 | } | ||
| 2720 | 2820 | ||
| 2721 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2821 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 2722 | struct btrfs_dir_item); | 2822 | struct btrfs_dir_item); |
| 2723 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); | 2823 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); |
| 2724 | 2824 | ||
| 2825 | dm = get_waiting_dir_move(sctx, loc.objectid); | ||
| 2826 | if (dm) { | ||
| 2827 | struct orphan_dir_info *odi; | ||
| 2828 | |||
| 2829 | odi = add_orphan_dir_info(sctx, dir); | ||
| 2830 | if (IS_ERR(odi)) { | ||
| 2831 | ret = PTR_ERR(odi); | ||
| 2832 | goto out; | ||
| 2833 | } | ||
| 2834 | odi->gen = dir_gen; | ||
| 2835 | dm->rmdir_ino = dir; | ||
| 2836 | ret = 0; | ||
| 2837 | goto out; | ||
| 2838 | } | ||
| 2839 | |||
| 2725 | if (loc.objectid > send_progress) { | 2840 | if (loc.objectid > send_progress) { |
| 2726 | ret = 0; | 2841 | ret = 0; |
| 2727 | goto out; | 2842 | goto out; |
| 2728 | } | 2843 | } |
| 2729 | 2844 | ||
| 2730 | btrfs_release_path(path); | 2845 | path->slots[0]++; |
| 2731 | key.offset = found_key.offset + 1; | ||
| 2732 | } | 2846 | } |
| 2733 | 2847 | ||
| 2734 | ret = 1; | 2848 | ret = 1; |
| @@ -2740,19 +2854,9 @@ out: | |||
| 2740 | 2854 | ||
| 2741 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) | 2855 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) |
| 2742 | { | 2856 | { |
| 2743 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2857 | struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); |
| 2744 | struct waiting_dir_move *entry; | ||
| 2745 | 2858 | ||
| 2746 | while (n) { | 2859 | return entry != NULL; |
| 2747 | entry = rb_entry(n, struct waiting_dir_move, node); | ||
| 2748 | if (ino < entry->ino) | ||
| 2749 | n = n->rb_left; | ||
| 2750 | else if (ino > entry->ino) | ||
| 2751 | n = n->rb_right; | ||
| 2752 | else | ||
| 2753 | return 1; | ||
| 2754 | } | ||
| 2755 | return 0; | ||
| 2756 | } | 2860 | } |
| 2757 | 2861 | ||
| 2758 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2862 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) |
| @@ -2765,6 +2869,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
| 2765 | if (!dm) | 2869 | if (!dm) |
| 2766 | return -ENOMEM; | 2870 | return -ENOMEM; |
| 2767 | dm->ino = ino; | 2871 | dm->ino = ino; |
| 2872 | dm->rmdir_ino = 0; | ||
| 2768 | 2873 | ||
| 2769 | while (*p) { | 2874 | while (*p) { |
| 2770 | parent = *p; | 2875 | parent = *p; |
| @@ -2784,31 +2889,41 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
| 2784 | return 0; | 2889 | return 0; |
| 2785 | } | 2890 | } |
| 2786 | 2891 | ||
| 2787 | static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2892 | static struct waiting_dir_move * |
| 2893 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino) | ||
| 2788 | { | 2894 | { |
| 2789 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2895 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; |
| 2790 | struct waiting_dir_move *entry; | 2896 | struct waiting_dir_move *entry; |
| 2791 | 2897 | ||
| 2792 | while (n) { | 2898 | while (n) { |
| 2793 | entry = rb_entry(n, struct waiting_dir_move, node); | 2899 | entry = rb_entry(n, struct waiting_dir_move, node); |
| 2794 | if (ino < entry->ino) { | 2900 | if (ino < entry->ino) |
| 2795 | n = n->rb_left; | 2901 | n = n->rb_left; |
| 2796 | } else if (ino > entry->ino) { | 2902 | else if (ino > entry->ino) |
| 2797 | n = n->rb_right; | 2903 | n = n->rb_right; |
| 2798 | } else { | 2904 | else |
| 2799 | rb_erase(&entry->node, &sctx->waiting_dir_moves); | 2905 | return entry; |
| 2800 | kfree(entry); | ||
| 2801 | return 0; | ||
| 2802 | } | ||
| 2803 | } | 2906 | } |
| 2804 | return -ENOENT; | 2907 | return NULL; |
| 2908 | } | ||
| 2909 | |||
| 2910 | static void free_waiting_dir_move(struct send_ctx *sctx, | ||
| 2911 | struct waiting_dir_move *dm) | ||
| 2912 | { | ||
| 2913 | if (!dm) | ||
| 2914 | return; | ||
| 2915 | rb_erase(&dm->node, &sctx->waiting_dir_moves); | ||
| 2916 | kfree(dm); | ||
| 2805 | } | 2917 | } |
| 2806 | 2918 | ||
| 2807 | static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | 2919 | static int add_pending_dir_move(struct send_ctx *sctx, |
| 2920 | u64 ino, | ||
| 2921 | u64 ino_gen, | ||
| 2922 | u64 parent_ino) | ||
| 2808 | { | 2923 | { |
| 2809 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2924 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
| 2810 | struct rb_node *parent = NULL; | 2925 | struct rb_node *parent = NULL; |
| 2811 | struct pending_dir_move *entry, *pm; | 2926 | struct pending_dir_move *entry = NULL, *pm; |
| 2812 | struct recorded_ref *cur; | 2927 | struct recorded_ref *cur; |
| 2813 | int exists = 0; | 2928 | int exists = 0; |
| 2814 | int ret; | 2929 | int ret; |
| @@ -2817,8 +2932,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | |||
| 2817 | if (!pm) | 2932 | if (!pm) |
| 2818 | return -ENOMEM; | 2933 | return -ENOMEM; |
| 2819 | pm->parent_ino = parent_ino; | 2934 | pm->parent_ino = parent_ino; |
| 2820 | pm->ino = sctx->cur_ino; | 2935 | pm->ino = ino; |
| 2821 | pm->gen = sctx->cur_inode_gen; | 2936 | pm->gen = ino_gen; |
| 2822 | INIT_LIST_HEAD(&pm->list); | 2937 | INIT_LIST_HEAD(&pm->list); |
| 2823 | INIT_LIST_HEAD(&pm->update_refs); | 2938 | INIT_LIST_HEAD(&pm->update_refs); |
| 2824 | RB_CLEAR_NODE(&pm->node); | 2939 | RB_CLEAR_NODE(&pm->node); |
| @@ -2888,19 +3003,52 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2888 | { | 3003 | { |
| 2889 | struct fs_path *from_path = NULL; | 3004 | struct fs_path *from_path = NULL; |
| 2890 | struct fs_path *to_path = NULL; | 3005 | struct fs_path *to_path = NULL; |
| 3006 | struct fs_path *name = NULL; | ||
| 2891 | u64 orig_progress = sctx->send_progress; | 3007 | u64 orig_progress = sctx->send_progress; |
| 2892 | struct recorded_ref *cur; | 3008 | struct recorded_ref *cur; |
| 3009 | u64 parent_ino, parent_gen; | ||
| 3010 | struct waiting_dir_move *dm = NULL; | ||
| 3011 | u64 rmdir_ino = 0; | ||
| 2893 | int ret; | 3012 | int ret; |
| 2894 | 3013 | ||
| 3014 | name = fs_path_alloc(); | ||
| 2895 | from_path = fs_path_alloc(); | 3015 | from_path = fs_path_alloc(); |
| 2896 | if (!from_path) | 3016 | if (!name || !from_path) { |
| 2897 | return -ENOMEM; | 3017 | ret = -ENOMEM; |
| 3018 | goto out; | ||
| 3019 | } | ||
| 2898 | 3020 | ||
| 2899 | sctx->send_progress = pm->ino; | 3021 | dm = get_waiting_dir_move(sctx, pm->ino); |
| 2900 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | 3022 | ASSERT(dm); |
| 3023 | rmdir_ino = dm->rmdir_ino; | ||
| 3024 | free_waiting_dir_move(sctx, dm); | ||
| 3025 | |||
| 3026 | ret = get_first_ref(sctx->parent_root, pm->ino, | ||
| 3027 | &parent_ino, &parent_gen, name); | ||
| 2901 | if (ret < 0) | 3028 | if (ret < 0) |
| 2902 | goto out; | 3029 | goto out; |
| 2903 | 3030 | ||
| 3031 | if (parent_ino == sctx->cur_ino) { | ||
| 3032 | /* child only renamed, not moved */ | ||
| 3033 | ASSERT(parent_gen == sctx->cur_inode_gen); | ||
| 3034 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
| 3035 | from_path); | ||
| 3036 | if (ret < 0) | ||
| 3037 | goto out; | ||
| 3038 | ret = fs_path_add_path(from_path, name); | ||
| 3039 | if (ret < 0) | ||
| 3040 | goto out; | ||
| 3041 | } else { | ||
| 3042 | /* child moved and maybe renamed too */ | ||
| 3043 | sctx->send_progress = pm->ino; | ||
| 3044 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | ||
| 3045 | if (ret < 0) | ||
| 3046 | goto out; | ||
| 3047 | } | ||
| 3048 | |||
| 3049 | fs_path_free(name); | ||
| 3050 | name = NULL; | ||
| 3051 | |||
| 2904 | to_path = fs_path_alloc(); | 3052 | to_path = fs_path_alloc(); |
| 2905 | if (!to_path) { | 3053 | if (!to_path) { |
| 2906 | ret = -ENOMEM; | 3054 | ret = -ENOMEM; |
| @@ -2908,9 +3056,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2908 | } | 3056 | } |
| 2909 | 3057 | ||
| 2910 | sctx->send_progress = sctx->cur_ino + 1; | 3058 | sctx->send_progress = sctx->cur_ino + 1; |
| 2911 | ret = del_waiting_dir_move(sctx, pm->ino); | ||
| 2912 | ASSERT(ret == 0); | ||
| 2913 | |||
| 2914 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); | 3059 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); |
| 2915 | if (ret < 0) | 3060 | if (ret < 0) |
| 2916 | goto out; | 3061 | goto out; |
| @@ -2919,6 +3064,35 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2919 | if (ret < 0) | 3064 | if (ret < 0) |
| 2920 | goto out; | 3065 | goto out; |
| 2921 | 3066 | ||
| 3067 | if (rmdir_ino) { | ||
| 3068 | struct orphan_dir_info *odi; | ||
| 3069 | |||
| 3070 | odi = get_orphan_dir_info(sctx, rmdir_ino); | ||
| 3071 | if (!odi) { | ||
| 3072 | /* already deleted */ | ||
| 3073 | goto finish; | ||
| 3074 | } | ||
| 3075 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); | ||
| 3076 | if (ret < 0) | ||
| 3077 | goto out; | ||
| 3078 | if (!ret) | ||
| 3079 | goto finish; | ||
| 3080 | |||
| 3081 | name = fs_path_alloc(); | ||
| 3082 | if (!name) { | ||
| 3083 | ret = -ENOMEM; | ||
| 3084 | goto out; | ||
| 3085 | } | ||
| 3086 | ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); | ||
| 3087 | if (ret < 0) | ||
| 3088 | goto out; | ||
| 3089 | ret = send_rmdir(sctx, name); | ||
| 3090 | if (ret < 0) | ||
| 3091 | goto out; | ||
| 3092 | free_orphan_dir_info(sctx, odi); | ||
| 3093 | } | ||
| 3094 | |||
| 3095 | finish: | ||
| 2922 | ret = send_utimes(sctx, pm->ino, pm->gen); | 3096 | ret = send_utimes(sctx, pm->ino, pm->gen); |
| 2923 | if (ret < 0) | 3097 | if (ret < 0) |
| 2924 | goto out; | 3098 | goto out; |
| @@ -2928,12 +3102,15 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2928 | * and old parent(s). | 3102 | * and old parent(s). |
| 2929 | */ | 3103 | */ |
| 2930 | list_for_each_entry(cur, &pm->update_refs, list) { | 3104 | list_for_each_entry(cur, &pm->update_refs, list) { |
| 3105 | if (cur->dir == rmdir_ino) | ||
| 3106 | continue; | ||
| 2931 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3107 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
| 2932 | if (ret < 0) | 3108 | if (ret < 0) |
| 2933 | goto out; | 3109 | goto out; |
| 2934 | } | 3110 | } |
| 2935 | 3111 | ||
| 2936 | out: | 3112 | out: |
| 3113 | fs_path_free(name); | ||
| 2937 | fs_path_free(from_path); | 3114 | fs_path_free(from_path); |
| 2938 | fs_path_free(to_path); | 3115 | fs_path_free(to_path); |
| 2939 | sctx->send_progress = orig_progress; | 3116 | sctx->send_progress = orig_progress; |
| @@ -3005,17 +3182,19 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3005 | int ret; | 3182 | int ret; |
| 3006 | u64 ino = parent_ref->dir; | 3183 | u64 ino = parent_ref->dir; |
| 3007 | u64 parent_ino_before, parent_ino_after; | 3184 | u64 parent_ino_before, parent_ino_after; |
| 3008 | u64 new_gen, old_gen; | 3185 | u64 old_gen; |
| 3009 | struct fs_path *path_before = NULL; | 3186 | struct fs_path *path_before = NULL; |
| 3010 | struct fs_path *path_after = NULL; | 3187 | struct fs_path *path_after = NULL; |
| 3011 | int len1, len2; | 3188 | int len1, len2; |
| 3012 | 3189 | int register_upper_dirs; | |
| 3013 | if (parent_ref->dir <= sctx->cur_ino) | 3190 | u64 gen; |
| 3014 | return 0; | ||
| 3015 | 3191 | ||
| 3016 | if (is_waiting_for_move(sctx, ino)) | 3192 | if (is_waiting_for_move(sctx, ino)) |
| 3017 | return 1; | 3193 | return 1; |
| 3018 | 3194 | ||
| 3195 | if (parent_ref->dir <= sctx->cur_ino) | ||
| 3196 | return 0; | ||
| 3197 | |||
| 3019 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, | 3198 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, |
| 3020 | NULL, NULL, NULL, NULL); | 3199 | NULL, NULL, NULL, NULL); |
| 3021 | if (ret == -ENOENT) | 3200 | if (ret == -ENOENT) |
| @@ -3023,12 +3202,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3023 | else if (ret < 0) | 3202 | else if (ret < 0) |
| 3024 | return ret; | 3203 | return ret; |
| 3025 | 3204 | ||
| 3026 | ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, | 3205 | if (parent_ref->dir_gen != old_gen) |
| 3027 | NULL, NULL, NULL, NULL); | ||
| 3028 | if (ret < 0) | ||
| 3029 | return ret; | ||
| 3030 | |||
| 3031 | if (new_gen != old_gen) | ||
| 3032 | return 0; | 3206 | return 0; |
| 3033 | 3207 | ||
| 3034 | path_before = fs_path_alloc(); | 3208 | path_before = fs_path_alloc(); |
| @@ -3051,7 +3225,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3051 | } | 3225 | } |
| 3052 | 3226 | ||
| 3053 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | 3227 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, |
| 3054 | NULL, path_after); | 3228 | &gen, path_after); |
| 3055 | if (ret == -ENOENT) { | 3229 | if (ret == -ENOENT) { |
| 3056 | ret = 0; | 3230 | ret = 0; |
| 3057 | goto out; | 3231 | goto out; |
| @@ -3061,13 +3235,67 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3061 | 3235 | ||
| 3062 | len1 = fs_path_len(path_before); | 3236 | len1 = fs_path_len(path_before); |
| 3063 | len2 = fs_path_len(path_after); | 3237 | len2 = fs_path_len(path_after); |
| 3064 | if ((parent_ino_before != parent_ino_after) && (len1 != len2 || | 3238 | if (parent_ino_before != parent_ino_after || len1 != len2 || |
| 3065 | memcmp(path_before->start, path_after->start, len1))) { | 3239 | memcmp(path_before->start, path_after->start, len1)) { |
| 3066 | ret = 1; | 3240 | ret = 1; |
| 3067 | goto out; | 3241 | goto out; |
| 3068 | } | 3242 | } |
| 3069 | ret = 0; | 3243 | ret = 0; |
| 3070 | 3244 | ||
| 3245 | /* | ||
| 3246 | * Ok, our new most direct ancestor has a higher inode number but | ||
| 3247 | * wasn't moved/renamed. So maybe some of the new ancestors higher in | ||
| 3248 | * the hierarchy have an higher inode number too *and* were renamed | ||
| 3249 | * or moved - in this case we need to wait for the ancestor's rename | ||
| 3250 | * or move operation before we can do the move/rename for the current | ||
| 3251 | * inode. | ||
| 3252 | */ | ||
| 3253 | register_upper_dirs = 0; | ||
| 3254 | ino = parent_ino_after; | ||
| 3255 | again: | ||
| 3256 | while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { | ||
| 3257 | u64 parent_gen; | ||
| 3258 | |||
| 3259 | fs_path_reset(path_before); | ||
| 3260 | fs_path_reset(path_after); | ||
| 3261 | |||
| 3262 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | ||
| 3263 | &parent_gen, path_after); | ||
| 3264 | if (ret < 0) | ||
| 3265 | goto out; | ||
| 3266 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | ||
| 3267 | NULL, path_before); | ||
| 3268 | if (ret == -ENOENT) { | ||
| 3269 | ret = 0; | ||
| 3270 | break; | ||
| 3271 | } else if (ret < 0) { | ||
| 3272 | goto out; | ||
| 3273 | } | ||
| 3274 | |||
| 3275 | len1 = fs_path_len(path_before); | ||
| 3276 | len2 = fs_path_len(path_after); | ||
| 3277 | if (parent_ino_before != parent_ino_after || len1 != len2 || | ||
| 3278 | memcmp(path_before->start, path_after->start, len1)) { | ||
| 3279 | ret = 1; | ||
| 3280 | if (register_upper_dirs) { | ||
| 3281 | break; | ||
| 3282 | } else { | ||
| 3283 | register_upper_dirs = 1; | ||
| 3284 | ino = parent_ref->dir; | ||
| 3285 | gen = parent_ref->dir_gen; | ||
| 3286 | goto again; | ||
| 3287 | } | ||
| 3288 | } else if (register_upper_dirs) { | ||
| 3289 | ret = add_pending_dir_move(sctx, ino, gen, | ||
| 3290 | parent_ino_after); | ||
| 3291 | if (ret < 0 && ret != -EEXIST) | ||
| 3292 | goto out; | ||
| 3293 | } | ||
| 3294 | |||
| 3295 | ino = parent_ino_after; | ||
| 3296 | gen = parent_gen; | ||
| 3297 | } | ||
| 3298 | |||
| 3071 | out: | 3299 | out: |
| 3072 | fs_path_free(path_before); | 3300 | fs_path_free(path_before); |
| 3073 | fs_path_free(path_after); | 3301 | fs_path_free(path_after); |
| @@ -3089,6 +3317,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
| 3089 | u64 ow_gen; | 3317 | u64 ow_gen; |
| 3090 | int did_overwrite = 0; | 3318 | int did_overwrite = 0; |
| 3091 | int is_orphan = 0; | 3319 | int is_orphan = 0; |
| 3320 | u64 last_dir_ino_rm = 0; | ||
| 3092 | 3321 | ||
| 3093 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 3322 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
| 3094 | 3323 | ||
| @@ -3227,9 +3456,14 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3227 | * dirs, we always have one new and one deleted | 3456 | * dirs, we always have one new and one deleted |
| 3228 | * ref. The deleted ref is ignored later. | 3457 | * ref. The deleted ref is ignored later. |
| 3229 | */ | 3458 | */ |
| 3230 | if (wait_for_parent_move(sctx, cur)) { | 3459 | ret = wait_for_parent_move(sctx, cur); |
| 3460 | if (ret < 0) | ||
| 3461 | goto out; | ||
| 3462 | if (ret) { | ||
| 3231 | ret = add_pending_dir_move(sctx, | 3463 | ret = add_pending_dir_move(sctx, |
| 3232 | cur->dir); | 3464 | sctx->cur_ino, |
| 3465 | sctx->cur_inode_gen, | ||
| 3466 | cur->dir); | ||
| 3233 | *pending_move = 1; | 3467 | *pending_move = 1; |
| 3234 | } else { | 3468 | } else { |
| 3235 | ret = send_rename(sctx, valid_path, | 3469 | ret = send_rename(sctx, valid_path, |
| @@ -3259,7 +3493,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3259 | * later, we do this check again and rmdir it then if possible. | 3493 | * later, we do this check again and rmdir it then if possible. |
| 3260 | * See the use of check_dirs for more details. | 3494 | * See the use of check_dirs for more details. |
| 3261 | */ | 3495 | */ |
| 3262 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); | 3496 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, |
| 3497 | sctx->cur_ino); | ||
| 3263 | if (ret < 0) | 3498 | if (ret < 0) |
| 3264 | goto out; | 3499 | goto out; |
| 3265 | if (ret) { | 3500 | if (ret) { |
| @@ -3350,8 +3585,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3350 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3585 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
| 3351 | if (ret < 0) | 3586 | if (ret < 0) |
| 3352 | goto out; | 3587 | goto out; |
| 3353 | } else if (ret == inode_state_did_delete) { | 3588 | } else if (ret == inode_state_did_delete && |
| 3354 | ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); | 3589 | cur->dir != last_dir_ino_rm) { |
| 3590 | ret = can_rmdir(sctx, cur->dir, cur->dir_gen, | ||
| 3591 | sctx->cur_ino); | ||
| 3355 | if (ret < 0) | 3592 | if (ret < 0) |
| 3356 | goto out; | 3593 | goto out; |
| 3357 | if (ret) { | 3594 | if (ret) { |
| @@ -3362,6 +3599,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3362 | ret = send_rmdir(sctx, valid_path); | 3599 | ret = send_rmdir(sctx, valid_path); |
| 3363 | if (ret < 0) | 3600 | if (ret < 0) |
| 3364 | goto out; | 3601 | goto out; |
| 3602 | last_dir_ino_rm = cur->dir; | ||
| 3365 | } | 3603 | } |
| 3366 | } | 3604 | } |
| 3367 | } | 3605 | } |
| @@ -3375,9 +3613,8 @@ out: | |||
| 3375 | return ret; | 3613 | return ret; |
| 3376 | } | 3614 | } |
| 3377 | 3615 | ||
| 3378 | static int __record_new_ref(int num, u64 dir, int index, | 3616 | static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, |
| 3379 | struct fs_path *name, | 3617 | struct fs_path *name, void *ctx, struct list_head *refs) |
| 3380 | void *ctx) | ||
| 3381 | { | 3618 | { |
| 3382 | int ret = 0; | 3619 | int ret = 0; |
| 3383 | struct send_ctx *sctx = ctx; | 3620 | struct send_ctx *sctx = ctx; |
| @@ -3388,7 +3625,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3388 | if (!p) | 3625 | if (!p) |
| 3389 | return -ENOMEM; | 3626 | return -ENOMEM; |
| 3390 | 3627 | ||
| 3391 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3628 | ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, |
| 3392 | NULL, NULL); | 3629 | NULL, NULL); |
| 3393 | if (ret < 0) | 3630 | if (ret < 0) |
| 3394 | goto out; | 3631 | goto out; |
| @@ -3400,7 +3637,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3400 | if (ret < 0) | 3637 | if (ret < 0) |
| 3401 | goto out; | 3638 | goto out; |
| 3402 | 3639 | ||
| 3403 | ret = record_ref(&sctx->new_refs, dir, gen, p); | 3640 | ret = __record_ref(refs, dir, gen, p); |
| 3404 | 3641 | ||
| 3405 | out: | 3642 | out: |
| 3406 | if (ret) | 3643 | if (ret) |
| @@ -3408,37 +3645,23 @@ out: | |||
| 3408 | return ret; | 3645 | return ret; |
| 3409 | } | 3646 | } |
| 3410 | 3647 | ||
| 3648 | static int __record_new_ref(int num, u64 dir, int index, | ||
| 3649 | struct fs_path *name, | ||
| 3650 | void *ctx) | ||
| 3651 | { | ||
| 3652 | struct send_ctx *sctx = ctx; | ||
| 3653 | return record_ref(sctx->send_root, num, dir, index, name, | ||
| 3654 | ctx, &sctx->new_refs); | ||
| 3655 | } | ||
| 3656 | |||
| 3657 | |||
| 3411 | static int __record_deleted_ref(int num, u64 dir, int index, | 3658 | static int __record_deleted_ref(int num, u64 dir, int index, |
| 3412 | struct fs_path *name, | 3659 | struct fs_path *name, |
| 3413 | void *ctx) | 3660 | void *ctx) |
| 3414 | { | 3661 | { |
| 3415 | int ret = 0; | ||
| 3416 | struct send_ctx *sctx = ctx; | 3662 | struct send_ctx *sctx = ctx; |
| 3417 | struct fs_path *p; | 3663 | return record_ref(sctx->parent_root, num, dir, index, name, |
| 3418 | u64 gen; | 3664 | ctx, &sctx->deleted_refs); |
| 3419 | |||
| 3420 | p = fs_path_alloc(); | ||
| 3421 | if (!p) | ||
| 3422 | return -ENOMEM; | ||
| 3423 | |||
| 3424 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | ||
| 3425 | NULL, NULL); | ||
| 3426 | if (ret < 0) | ||
| 3427 | goto out; | ||
| 3428 | |||
| 3429 | ret = get_cur_path(sctx, dir, gen, p); | ||
| 3430 | if (ret < 0) | ||
| 3431 | goto out; | ||
| 3432 | ret = fs_path_add_path(p, name); | ||
| 3433 | if (ret < 0) | ||
| 3434 | goto out; | ||
| 3435 | |||
| 3436 | ret = record_ref(&sctx->deleted_refs, dir, gen, p); | ||
| 3437 | |||
| 3438 | out: | ||
| 3439 | if (ret) | ||
| 3440 | fs_path_free(p); | ||
| 3441 | return ret; | ||
| 3442 | } | 3665 | } |
| 3443 | 3666 | ||
| 3444 | static int record_new_ref(struct send_ctx *sctx) | 3667 | static int record_new_ref(struct send_ctx *sctx) |
| @@ -3619,21 +3842,31 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3619 | root = sctx->parent_root; | 3842 | root = sctx->parent_root; |
| 3620 | cb = __record_deleted_ref; | 3843 | cb = __record_deleted_ref; |
| 3621 | } else { | 3844 | } else { |
| 3622 | BUG(); | 3845 | btrfs_err(sctx->send_root->fs_info, |
| 3846 | "Wrong command %d in process_all_refs", cmd); | ||
| 3847 | ret = -EINVAL; | ||
| 3848 | goto out; | ||
| 3623 | } | 3849 | } |
| 3624 | 3850 | ||
| 3625 | key.objectid = sctx->cmp_key->objectid; | 3851 | key.objectid = sctx->cmp_key->objectid; |
| 3626 | key.type = BTRFS_INODE_REF_KEY; | 3852 | key.type = BTRFS_INODE_REF_KEY; |
| 3627 | key.offset = 0; | 3853 | key.offset = 0; |
| 3628 | while (1) { | 3854 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 3629 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3855 | if (ret < 0) |
| 3630 | if (ret < 0) | 3856 | goto out; |
| 3631 | goto out; | ||
| 3632 | if (ret) | ||
| 3633 | break; | ||
| 3634 | 3857 | ||
| 3858 | while (1) { | ||
| 3635 | eb = path->nodes[0]; | 3859 | eb = path->nodes[0]; |
| 3636 | slot = path->slots[0]; | 3860 | slot = path->slots[0]; |
| 3861 | if (slot >= btrfs_header_nritems(eb)) { | ||
| 3862 | ret = btrfs_next_leaf(root, path); | ||
| 3863 | if (ret < 0) | ||
| 3864 | goto out; | ||
| 3865 | else if (ret > 0) | ||
| 3866 | break; | ||
| 3867 | continue; | ||
| 3868 | } | ||
| 3869 | |||
| 3637 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3870 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
| 3638 | 3871 | ||
| 3639 | if (found_key.objectid != key.objectid || | 3872 | if (found_key.objectid != key.objectid || |
| @@ -3642,11 +3875,10 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3642 | break; | 3875 | break; |
| 3643 | 3876 | ||
| 3644 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); | 3877 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); |
| 3645 | btrfs_release_path(path); | ||
| 3646 | if (ret < 0) | 3878 | if (ret < 0) |
| 3647 | goto out; | 3879 | goto out; |
| 3648 | 3880 | ||
| 3649 | key.offset = found_key.offset + 1; | 3881 | path->slots[0]++; |
| 3650 | } | 3882 | } |
| 3651 | btrfs_release_path(path); | 3883 | btrfs_release_path(path); |
| 3652 | 3884 | ||
| @@ -3927,19 +4159,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
| 3927 | key.objectid = sctx->cmp_key->objectid; | 4159 | key.objectid = sctx->cmp_key->objectid; |
| 3928 | key.type = BTRFS_XATTR_ITEM_KEY; | 4160 | key.type = BTRFS_XATTR_ITEM_KEY; |
| 3929 | key.offset = 0; | 4161 | key.offset = 0; |
| 3930 | while (1) { | 4162 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 3931 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 4163 | if (ret < 0) |
| 3932 | if (ret < 0) | 4164 | goto out; |
| 3933 | goto out; | ||
| 3934 | if (ret) { | ||
| 3935 | ret = 0; | ||
| 3936 | goto out; | ||
| 3937 | } | ||
| 3938 | 4165 | ||
| 4166 | while (1) { | ||
| 3939 | eb = path->nodes[0]; | 4167 | eb = path->nodes[0]; |
| 3940 | slot = path->slots[0]; | 4168 | slot = path->slots[0]; |
| 3941 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 4169 | if (slot >= btrfs_header_nritems(eb)) { |
| 4170 | ret = btrfs_next_leaf(root, path); | ||
| 4171 | if (ret < 0) { | ||
| 4172 | goto out; | ||
| 4173 | } else if (ret > 0) { | ||
| 4174 | ret = 0; | ||
| 4175 | break; | ||
| 4176 | } | ||
| 4177 | continue; | ||
| 4178 | } | ||
| 3942 | 4179 | ||
| 4180 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 3943 | if (found_key.objectid != key.objectid || | 4181 | if (found_key.objectid != key.objectid || |
| 3944 | found_key.type != key.type) { | 4182 | found_key.type != key.type) { |
| 3945 | ret = 0; | 4183 | ret = 0; |
| @@ -3951,8 +4189,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
| 3951 | if (ret < 0) | 4189 | if (ret < 0) |
| 3952 | goto out; | 4190 | goto out; |
| 3953 | 4191 | ||
| 3954 | btrfs_release_path(path); | 4192 | path->slots[0]++; |
| 3955 | key.offset = found_key.offset + 1; | ||
| 3956 | } | 4193 | } |
| 3957 | 4194 | ||
| 3958 | out: | 4195 | out: |
| @@ -3991,6 +4228,13 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) | |||
| 3991 | goto out; | 4228 | goto out; |
| 3992 | 4229 | ||
| 3993 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; | 4230 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; |
| 4231 | |||
| 4232 | /* initial readahead */ | ||
| 4233 | memset(&sctx->ra, 0, sizeof(struct file_ra_state)); | ||
| 4234 | file_ra_state_init(&sctx->ra, inode->i_mapping); | ||
| 4235 | btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, | ||
| 4236 | last_index - index + 1); | ||
| 4237 | |||
| 3994 | while (index <= last_index) { | 4238 | while (index <= last_index) { |
| 3995 | unsigned cur_len = min_t(unsigned, len, | 4239 | unsigned cur_len = min_t(unsigned, len, |
| 3996 | PAGE_CACHE_SIZE - pg_offset); | 4240 | PAGE_CACHE_SIZE - pg_offset); |
| @@ -4763,18 +5007,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
| 4763 | ret = apply_children_dir_moves(sctx); | 5007 | ret = apply_children_dir_moves(sctx); |
| 4764 | if (ret) | 5008 | if (ret) |
| 4765 | goto out; | 5009 | goto out; |
| 5010 | /* | ||
| 5011 | * Need to send that every time, no matter if it actually | ||
| 5012 | * changed between the two trees as we have done changes to | ||
| 5013 | * the inode before. If our inode is a directory and it's | ||
| 5014 | * waiting to be moved/renamed, we will send its utimes when | ||
| 5015 | * it's moved/renamed, therefore we don't need to do it here. | ||
| 5016 | */ | ||
| 5017 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 5018 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
| 5019 | if (ret < 0) | ||
| 5020 | goto out; | ||
| 4766 | } | 5021 | } |
| 4767 | 5022 | ||
| 4768 | /* | ||
| 4769 | * Need to send that every time, no matter if it actually | ||
| 4770 | * changed between the two trees as we have done changes to | ||
| 4771 | * the inode before. | ||
| 4772 | */ | ||
| 4773 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 4774 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
| 4775 | if (ret < 0) | ||
| 4776 | goto out; | ||
| 4777 | |||
| 4778 | out: | 5023 | out: |
| 4779 | return ret; | 5024 | return ret; |
| 4780 | } | 5025 | } |
| @@ -4840,6 +5085,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4840 | sctx->left_path->nodes[0], left_ii); | 5085 | sctx->left_path->nodes[0], left_ii); |
| 4841 | sctx->cur_inode_mode = btrfs_inode_mode( | 5086 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4842 | sctx->left_path->nodes[0], left_ii); | 5087 | sctx->left_path->nodes[0], left_ii); |
| 5088 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
| 5089 | sctx->left_path->nodes[0], left_ii); | ||
| 4843 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 5090 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
| 4844 | ret = send_create_inode_if_needed(sctx); | 5091 | ret = send_create_inode_if_needed(sctx); |
| 4845 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 5092 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
| @@ -4884,6 +5131,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4884 | sctx->left_path->nodes[0], left_ii); | 5131 | sctx->left_path->nodes[0], left_ii); |
| 4885 | sctx->cur_inode_mode = btrfs_inode_mode( | 5132 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4886 | sctx->left_path->nodes[0], left_ii); | 5133 | sctx->left_path->nodes[0], left_ii); |
| 5134 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
| 5135 | sctx->left_path->nodes[0], left_ii); | ||
| 4887 | ret = send_create_inode_if_needed(sctx); | 5136 | ret = send_create_inode_if_needed(sctx); |
| 4888 | if (ret < 0) | 5137 | if (ret < 0) |
| 4889 | goto out; | 5138 | goto out; |
| @@ -5118,6 +5367,7 @@ out: | |||
| 5118 | static int full_send_tree(struct send_ctx *sctx) | 5367 | static int full_send_tree(struct send_ctx *sctx) |
| 5119 | { | 5368 | { |
| 5120 | int ret; | 5369 | int ret; |
| 5370 | struct btrfs_trans_handle *trans = NULL; | ||
| 5121 | struct btrfs_root *send_root = sctx->send_root; | 5371 | struct btrfs_root *send_root = sctx->send_root; |
| 5122 | struct btrfs_key key; | 5372 | struct btrfs_key key; |
| 5123 | struct btrfs_key found_key; | 5373 | struct btrfs_key found_key; |
| @@ -5139,6 +5389,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
| 5139 | key.type = BTRFS_INODE_ITEM_KEY; | 5389 | key.type = BTRFS_INODE_ITEM_KEY; |
| 5140 | key.offset = 0; | 5390 | key.offset = 0; |
| 5141 | 5391 | ||
| 5392 | join_trans: | ||
| 5393 | /* | ||
| 5394 | * We need to make sure the transaction does not get committed | ||
| 5395 | * while we do anything on commit roots. Join a transaction to prevent | ||
| 5396 | * this. | ||
| 5397 | */ | ||
| 5398 | trans = btrfs_join_transaction(send_root); | ||
| 5399 | if (IS_ERR(trans)) { | ||
| 5400 | ret = PTR_ERR(trans); | ||
| 5401 | trans = NULL; | ||
| 5402 | goto out; | ||
| 5403 | } | ||
| 5404 | |||
| 5142 | /* | 5405 | /* |
| 5143 | * Make sure the tree has not changed after re-joining. We detect this | 5406 | * Make sure the tree has not changed after re-joining. We detect this |
| 5144 | * by comparing start_ctransid and ctransid. They should always match. | 5407 | * by comparing start_ctransid and ctransid. They should always match. |
| @@ -5162,6 +5425,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
| 5162 | goto out_finish; | 5425 | goto out_finish; |
| 5163 | 5426 | ||
| 5164 | while (1) { | 5427 | while (1) { |
| 5428 | /* | ||
| 5429 | * When someone want to commit while we iterate, end the | ||
| 5430 | * joined transaction and rejoin. | ||
| 5431 | */ | ||
| 5432 | if (btrfs_should_end_transaction(trans, send_root)) { | ||
| 5433 | ret = btrfs_end_transaction(trans, send_root); | ||
| 5434 | trans = NULL; | ||
| 5435 | if (ret < 0) | ||
| 5436 | goto out; | ||
| 5437 | btrfs_release_path(path); | ||
| 5438 | goto join_trans; | ||
| 5439 | } | ||
| 5440 | |||
| 5165 | eb = path->nodes[0]; | 5441 | eb = path->nodes[0]; |
| 5166 | slot = path->slots[0]; | 5442 | slot = path->slots[0]; |
| 5167 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 5443 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
| @@ -5189,6 +5465,12 @@ out_finish: | |||
| 5189 | 5465 | ||
| 5190 | out: | 5466 | out: |
| 5191 | btrfs_free_path(path); | 5467 | btrfs_free_path(path); |
| 5468 | if (trans) { | ||
| 5469 | if (!ret) | ||
| 5470 | ret = btrfs_end_transaction(trans, send_root); | ||
| 5471 | else | ||
| 5472 | btrfs_end_transaction(trans, send_root); | ||
| 5473 | } | ||
| 5192 | return ret; | 5474 | return ret; |
| 5193 | } | 5475 | } |
| 5194 | 5476 | ||
| @@ -5340,6 +5622,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 5340 | 5622 | ||
| 5341 | sctx->pending_dir_moves = RB_ROOT; | 5623 | sctx->pending_dir_moves = RB_ROOT; |
| 5342 | sctx->waiting_dir_moves = RB_ROOT; | 5624 | sctx->waiting_dir_moves = RB_ROOT; |
| 5625 | sctx->orphan_dirs = RB_ROOT; | ||
| 5343 | 5626 | ||
| 5344 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * | 5627 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * |
| 5345 | (arg->clone_sources_count + 1)); | 5628 | (arg->clone_sources_count + 1)); |
| @@ -5477,6 +5760,16 @@ out: | |||
| 5477 | kfree(dm); | 5760 | kfree(dm); |
| 5478 | } | 5761 | } |
| 5479 | 5762 | ||
| 5763 | WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); | ||
| 5764 | while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { | ||
| 5765 | struct rb_node *n; | ||
| 5766 | struct orphan_dir_info *odi; | ||
| 5767 | |||
| 5768 | n = rb_first(&sctx->orphan_dirs); | ||
| 5769 | odi = rb_entry(n, struct orphan_dir_info, node); | ||
| 5770 | free_orphan_dir_info(sctx, odi); | ||
| 5771 | } | ||
| 5772 | |||
| 5480 | if (sort_clone_roots) { | 5773 | if (sort_clone_roots) { |
| 5481 | for (i = 0; i < sctx->clone_roots_cnt; i++) | 5774 | for (i = 0; i < sctx->clone_roots_cnt; i++) |
| 5482 | btrfs_root_dec_send_in_progress( | 5775 | btrfs_root_dec_send_in_progress( |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d04db817be5c..9dbf42395153 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -1305,13 +1305,6 @@ error_fs_info: | |||
| 1305 | return ERR_PTR(error); | 1305 | return ERR_PTR(error); |
| 1306 | } | 1306 | } |
| 1307 | 1307 | ||
| 1308 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
| 1309 | { | ||
| 1310 | spin_lock_irq(&workers->lock); | ||
| 1311 | workers->max_workers = new_limit; | ||
| 1312 | spin_unlock_irq(&workers->lock); | ||
| 1313 | } | ||
| 1314 | |||
| 1315 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | 1308 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, |
| 1316 | int new_pool_size, int old_pool_size) | 1309 | int new_pool_size, int old_pool_size) |
| 1317 | { | 1310 | { |
| @@ -1323,21 +1316,20 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | |||
| 1323 | btrfs_info(fs_info, "resize thread pool %d -> %d", | 1316 | btrfs_info(fs_info, "resize thread pool %d -> %d", |
| 1324 | old_pool_size, new_pool_size); | 1317 | old_pool_size, new_pool_size); |
| 1325 | 1318 | ||
| 1326 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | 1319 | btrfs_workqueue_set_max(fs_info->workers, new_pool_size); |
| 1327 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | 1320 | btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); |
| 1328 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | 1321 | btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size); |
| 1329 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | 1322 | btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); |
| 1330 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | 1323 | btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); |
| 1331 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | 1324 | btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); |
| 1332 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | 1325 | btrfs_workqueue_set_max(fs_info->endio_meta_write_workers, |
| 1333 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | 1326 | new_pool_size); |
| 1334 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | 1327 | btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); |
| 1335 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | 1328 | btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); |
| 1336 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | 1329 | btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); |
| 1337 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | 1330 | btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size); |
| 1338 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | 1331 | btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers, |
| 1339 | btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers, | 1332 | new_pool_size); |
| 1340 | new_pool_size); | ||
| 1341 | } | 1333 | } |
| 1342 | 1334 | ||
| 1343 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) | 1335 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) |
| @@ -1388,6 +1380,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1388 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; | 1380 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; |
| 1389 | int ret; | 1381 | int ret; |
| 1390 | 1382 | ||
| 1383 | sync_filesystem(sb); | ||
| 1391 | btrfs_remount_prepare(fs_info); | 1384 | btrfs_remount_prepare(fs_info); |
| 1392 | 1385 | ||
| 1393 | ret = btrfs_parse_options(root, data); | 1386 | ret = btrfs_parse_options(root, data); |
| @@ -1479,6 +1472,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1479 | sb->s_flags &= ~MS_RDONLY; | 1472 | sb->s_flags &= ~MS_RDONLY; |
| 1480 | } | 1473 | } |
| 1481 | out: | 1474 | out: |
| 1475 | wake_up_process(fs_info->transaction_kthread); | ||
| 1482 | btrfs_remount_cleanup(fs_info, old_opts); | 1476 | btrfs_remount_cleanup(fs_info, old_opts); |
| 1483 | return 0; | 1477 | return 0; |
| 1484 | 1478 | ||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 865f4cf9a769..c5eb2143dc66 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/kobject.h> | 24 | #include <linux/kobject.h> |
| 25 | #include <linux/bug.h> | 25 | #include <linux/bug.h> |
| 26 | #include <linux/genhd.h> | 26 | #include <linux/genhd.h> |
| 27 | #include <linux/debugfs.h> | ||
| 27 | 28 | ||
| 28 | #include "ctree.h" | 29 | #include "ctree.h" |
| 29 | #include "disk-io.h" | 30 | #include "disk-io.h" |
| @@ -599,6 +600,12 @@ static int add_device_membership(struct btrfs_fs_info *fs_info) | |||
| 599 | /* /sys/fs/btrfs/ entry */ | 600 | /* /sys/fs/btrfs/ entry */ |
| 600 | static struct kset *btrfs_kset; | 601 | static struct kset *btrfs_kset; |
| 601 | 602 | ||
| 603 | /* /sys/kernel/debug/btrfs */ | ||
| 604 | static struct dentry *btrfs_debugfs_root_dentry; | ||
| 605 | |||
| 606 | /* Debugging tunables and exported data */ | ||
| 607 | u64 btrfs_debugfs_test; | ||
| 608 | |||
| 602 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) | 609 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) |
| 603 | { | 610 | { |
| 604 | int error; | 611 | int error; |
| @@ -642,27 +649,41 @@ failure: | |||
| 642 | return error; | 649 | return error; |
| 643 | } | 650 | } |
| 644 | 651 | ||
| 652 | static int btrfs_init_debugfs(void) | ||
| 653 | { | ||
| 654 | #ifdef CONFIG_DEBUG_FS | ||
| 655 | btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); | ||
| 656 | if (!btrfs_debugfs_root_dentry) | ||
| 657 | return -ENOMEM; | ||
| 658 | |||
| 659 | debugfs_create_u64("test", S_IRUGO | S_IWUGO, btrfs_debugfs_root_dentry, | ||
| 660 | &btrfs_debugfs_test); | ||
| 661 | #endif | ||
| 662 | return 0; | ||
| 663 | } | ||
| 664 | |||
| 645 | int btrfs_init_sysfs(void) | 665 | int btrfs_init_sysfs(void) |
| 646 | { | 666 | { |
| 647 | int ret; | 667 | int ret; |
| 668 | |||
| 648 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); | 669 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); |
| 649 | if (!btrfs_kset) | 670 | if (!btrfs_kset) |
| 650 | return -ENOMEM; | 671 | return -ENOMEM; |
| 651 | 672 | ||
| 652 | init_feature_attrs(); | 673 | ret = btrfs_init_debugfs(); |
| 674 | if (ret) | ||
| 675 | return ret; | ||
| 653 | 676 | ||
| 677 | init_feature_attrs(); | ||
| 654 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 678 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
| 655 | if (ret) { | ||
| 656 | kset_unregister(btrfs_kset); | ||
| 657 | return ret; | ||
| 658 | } | ||
| 659 | 679 | ||
| 660 | return 0; | 680 | return ret; |
| 661 | } | 681 | } |
| 662 | 682 | ||
| 663 | void btrfs_exit_sysfs(void) | 683 | void btrfs_exit_sysfs(void) |
| 664 | { | 684 | { |
| 665 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 685 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
| 666 | kset_unregister(btrfs_kset); | 686 | kset_unregister(btrfs_kset); |
| 687 | debugfs_remove_recursive(btrfs_debugfs_root_dentry); | ||
| 667 | } | 688 | } |
| 668 | 689 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index f3cea3710d44..9ab576318a84 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
| @@ -1,6 +1,11 @@ | |||
| 1 | #ifndef _BTRFS_SYSFS_H_ | 1 | #ifndef _BTRFS_SYSFS_H_ |
| 2 | #define _BTRFS_SYSFS_H_ | 2 | #define _BTRFS_SYSFS_H_ |
| 3 | 3 | ||
| 4 | /* | ||
| 5 | * Data exported through sysfs | ||
| 6 | */ | ||
| 7 | extern u64 btrfs_debugfs_test; | ||
| 8 | |||
| 4 | enum btrfs_feature_set { | 9 | enum btrfs_feature_set { |
| 5 | FEAT_COMPAT, | 10 | FEAT_COMPAT, |
| 6 | FEAT_COMPAT_RO, | 11 | FEAT_COMPAT_RO, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 34cd83184c4a..a04707f740d6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -683,7 +683,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
| 684 | int err = 0; | 684 | int err = 0; |
| 685 | 685 | ||
| 686 | if (--trans->use_count) { | 686 | if (trans->use_count > 1) { |
| 687 | trans->use_count--; | ||
| 687 | trans->block_rsv = trans->orig_rsv; | 688 | trans->block_rsv = trans->orig_rsv; |
| 688 | return 0; | 689 | return 0; |
| 689 | } | 690 | } |
| @@ -731,17 +732,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 731 | } | 732 | } |
| 732 | 733 | ||
| 733 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 734 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
| 734 | if (throttle) { | 735 | if (throttle) |
| 735 | /* | ||
| 736 | * We may race with somebody else here so end up having | ||
| 737 | * to call end_transaction on ourselves again, so inc | ||
| 738 | * our use_count. | ||
| 739 | */ | ||
| 740 | trans->use_count++; | ||
| 741 | return btrfs_commit_transaction(trans, root); | 736 | return btrfs_commit_transaction(trans, root); |
| 742 | } else { | 737 | else |
| 743 | wake_up_process(info->transaction_kthread); | 738 | wake_up_process(info->transaction_kthread); |
| 744 | } | ||
| 745 | } | 739 | } |
| 746 | 740 | ||
| 747 | if (trans->type & __TRANS_FREEZABLE) | 741 | if (trans->type & __TRANS_FREEZABLE) |
| @@ -1578,10 +1572,9 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
| 1578 | 1572 | ||
| 1579 | trace_btrfs_transaction_commit(root); | 1573 | trace_btrfs_transaction_commit(root); |
| 1580 | 1574 | ||
| 1581 | btrfs_scrub_continue(root); | ||
| 1582 | |||
| 1583 | if (current->journal_info == trans) | 1575 | if (current->journal_info == trans) |
| 1584 | current->journal_info = NULL; | 1576 | current->journal_info = NULL; |
| 1577 | btrfs_scrub_cancel(root->fs_info); | ||
| 1585 | 1578 | ||
| 1586 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1579 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1587 | } | 1580 | } |
| @@ -1621,7 +1614,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
| 1621 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1614 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
| 1622 | { | 1615 | { |
| 1623 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | 1616 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
| 1624 | return btrfs_start_delalloc_roots(fs_info, 1); | 1617 | return btrfs_start_delalloc_roots(fs_info, 1, -1); |
| 1625 | return 0; | 1618 | return 0; |
| 1626 | } | 1619 | } |
| 1627 | 1620 | ||
| @@ -1754,7 +1747,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1754 | /* ->aborted might be set after the previous check, so check it */ | 1747 | /* ->aborted might be set after the previous check, so check it */ |
| 1755 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | 1748 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { |
| 1756 | ret = cur_trans->aborted; | 1749 | ret = cur_trans->aborted; |
| 1757 | goto cleanup_transaction; | 1750 | goto scrub_continue; |
| 1758 | } | 1751 | } |
| 1759 | /* | 1752 | /* |
| 1760 | * the reloc mutex makes sure that we stop | 1753 | * the reloc mutex makes sure that we stop |
| @@ -1771,7 +1764,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1771 | ret = create_pending_snapshots(trans, root->fs_info); | 1764 | ret = create_pending_snapshots(trans, root->fs_info); |
| 1772 | if (ret) { | 1765 | if (ret) { |
| 1773 | mutex_unlock(&root->fs_info->reloc_mutex); | 1766 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1774 | goto cleanup_transaction; | 1767 | goto scrub_continue; |
| 1775 | } | 1768 | } |
| 1776 | 1769 | ||
| 1777 | /* | 1770 | /* |
| @@ -1787,13 +1780,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1787 | ret = btrfs_run_delayed_items(trans, root); | 1780 | ret = btrfs_run_delayed_items(trans, root); |
| 1788 | if (ret) { | 1781 | if (ret) { |
| 1789 | mutex_unlock(&root->fs_info->reloc_mutex); | 1782 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1790 | goto cleanup_transaction; | 1783 | goto scrub_continue; |
| 1791 | } | 1784 | } |
| 1792 | 1785 | ||
| 1793 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1786 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
| 1794 | if (ret) { | 1787 | if (ret) { |
| 1795 | mutex_unlock(&root->fs_info->reloc_mutex); | 1788 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1796 | goto cleanup_transaction; | 1789 | goto scrub_continue; |
| 1797 | } | 1790 | } |
| 1798 | 1791 | ||
| 1799 | /* | 1792 | /* |
| @@ -1823,7 +1816,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1823 | if (ret) { | 1816 | if (ret) { |
| 1824 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1817 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1825 | mutex_unlock(&root->fs_info->reloc_mutex); | 1818 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1826 | goto cleanup_transaction; | 1819 | goto scrub_continue; |
| 1827 | } | 1820 | } |
| 1828 | 1821 | ||
| 1829 | /* | 1822 | /* |
| @@ -1844,7 +1837,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1844 | if (ret) { | 1837 | if (ret) { |
| 1845 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1838 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1846 | mutex_unlock(&root->fs_info->reloc_mutex); | 1839 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1847 | goto cleanup_transaction; | 1840 | goto scrub_continue; |
| 1848 | } | 1841 | } |
| 1849 | 1842 | ||
| 1850 | /* | 1843 | /* |
| @@ -1855,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1855 | ret = cur_trans->aborted; | 1848 | ret = cur_trans->aborted; |
| 1856 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1849 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1857 | mutex_unlock(&root->fs_info->reloc_mutex); | 1850 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1858 | goto cleanup_transaction; | 1851 | goto scrub_continue; |
| 1859 | } | 1852 | } |
| 1860 | 1853 | ||
| 1861 | btrfs_prepare_extent_commit(trans, root); | 1854 | btrfs_prepare_extent_commit(trans, root); |
| @@ -1891,13 +1884,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1891 | btrfs_error(root->fs_info, ret, | 1884 | btrfs_error(root->fs_info, ret, |
| 1892 | "Error while writing out transaction"); | 1885 | "Error while writing out transaction"); |
| 1893 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1886 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1894 | goto cleanup_transaction; | 1887 | goto scrub_continue; |
| 1895 | } | 1888 | } |
| 1896 | 1889 | ||
| 1897 | ret = write_ctree_super(trans, root, 0); | 1890 | ret = write_ctree_super(trans, root, 0); |
| 1898 | if (ret) { | 1891 | if (ret) { |
| 1899 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1892 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1900 | goto cleanup_transaction; | 1893 | goto scrub_continue; |
| 1901 | } | 1894 | } |
| 1902 | 1895 | ||
| 1903 | /* | 1896 | /* |
| @@ -1940,6 +1933,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1940 | 1933 | ||
| 1941 | return ret; | 1934 | return ret; |
| 1942 | 1935 | ||
| 1936 | scrub_continue: | ||
| 1937 | btrfs_scrub_continue(root); | ||
| 1943 | cleanup_transaction: | 1938 | cleanup_transaction: |
| 1944 | btrfs_trans_release_metadata(trans, root); | 1939 | btrfs_trans_release_metadata(trans, root); |
| 1945 | trans->block_rsv = NULL; | 1940 | trans->block_rsv = NULL; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 39d83da03e03..e2f45fc02610 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -136,13 +136,20 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
| 136 | * syncing the tree wait for us to finish | 136 | * syncing the tree wait for us to finish |
| 137 | */ | 137 | */ |
| 138 | static int start_log_trans(struct btrfs_trans_handle *trans, | 138 | static int start_log_trans(struct btrfs_trans_handle *trans, |
| 139 | struct btrfs_root *root) | 139 | struct btrfs_root *root, |
| 140 | struct btrfs_log_ctx *ctx) | ||
| 140 | { | 141 | { |
| 142 | int index; | ||
| 141 | int ret; | 143 | int ret; |
| 142 | int err = 0; | ||
| 143 | 144 | ||
| 144 | mutex_lock(&root->log_mutex); | 145 | mutex_lock(&root->log_mutex); |
| 145 | if (root->log_root) { | 146 | if (root->log_root) { |
| 147 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | ||
| 148 | trans->transid) { | ||
| 149 | ret = -EAGAIN; | ||
| 150 | goto out; | ||
| 151 | } | ||
| 152 | |||
| 146 | if (!root->log_start_pid) { | 153 | if (!root->log_start_pid) { |
| 147 | root->log_start_pid = current->pid; | 154 | root->log_start_pid = current->pid; |
| 148 | root->log_multiple_pids = false; | 155 | root->log_multiple_pids = false; |
| @@ -152,27 +159,40 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 152 | 159 | ||
| 153 | atomic_inc(&root->log_batch); | 160 | atomic_inc(&root->log_batch); |
| 154 | atomic_inc(&root->log_writers); | 161 | atomic_inc(&root->log_writers); |
| 162 | if (ctx) { | ||
| 163 | index = root->log_transid % 2; | ||
| 164 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
| 165 | ctx->log_transid = root->log_transid; | ||
| 166 | } | ||
| 155 | mutex_unlock(&root->log_mutex); | 167 | mutex_unlock(&root->log_mutex); |
| 156 | return 0; | 168 | return 0; |
| 157 | } | 169 | } |
| 158 | root->log_multiple_pids = false; | 170 | |
| 159 | root->log_start_pid = current->pid; | 171 | ret = 0; |
| 160 | mutex_lock(&root->fs_info->tree_log_mutex); | 172 | mutex_lock(&root->fs_info->tree_log_mutex); |
| 161 | if (!root->fs_info->log_root_tree) { | 173 | if (!root->fs_info->log_root_tree) |
| 162 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 174 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
| 163 | if (ret) | 175 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 164 | err = ret; | 176 | if (ret) |
| 165 | } | 177 | goto out; |
| 166 | if (err == 0 && !root->log_root) { | 178 | |
| 179 | if (!root->log_root) { | ||
| 167 | ret = btrfs_add_log_tree(trans, root); | 180 | ret = btrfs_add_log_tree(trans, root); |
| 168 | if (ret) | 181 | if (ret) |
| 169 | err = ret; | 182 | goto out; |
| 170 | } | 183 | } |
| 171 | mutex_unlock(&root->fs_info->tree_log_mutex); | 184 | root->log_multiple_pids = false; |
| 185 | root->log_start_pid = current->pid; | ||
| 172 | atomic_inc(&root->log_batch); | 186 | atomic_inc(&root->log_batch); |
| 173 | atomic_inc(&root->log_writers); | 187 | atomic_inc(&root->log_writers); |
| 188 | if (ctx) { | ||
| 189 | index = root->log_transid % 2; | ||
| 190 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
| 191 | ctx->log_transid = root->log_transid; | ||
| 192 | } | ||
| 193 | out: | ||
| 174 | mutex_unlock(&root->log_mutex); | 194 | mutex_unlock(&root->log_mutex); |
| 175 | return err; | 195 | return ret; |
| 176 | } | 196 | } |
| 177 | 197 | ||
| 178 | /* | 198 | /* |
| @@ -2359,8 +2379,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
| 2359 | return ret; | 2379 | return ret; |
| 2360 | } | 2380 | } |
| 2361 | 2381 | ||
| 2362 | static int wait_log_commit(struct btrfs_trans_handle *trans, | 2382 | static void wait_log_commit(struct btrfs_trans_handle *trans, |
| 2363 | struct btrfs_root *root, unsigned long transid) | 2383 | struct btrfs_root *root, int transid) |
| 2364 | { | 2384 | { |
| 2365 | DEFINE_WAIT(wait); | 2385 | DEFINE_WAIT(wait); |
| 2366 | int index = transid % 2; | 2386 | int index = transid % 2; |
| @@ -2375,36 +2395,63 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, | |||
| 2375 | &wait, TASK_UNINTERRUPTIBLE); | 2395 | &wait, TASK_UNINTERRUPTIBLE); |
| 2376 | mutex_unlock(&root->log_mutex); | 2396 | mutex_unlock(&root->log_mutex); |
| 2377 | 2397 | ||
| 2378 | if (root->fs_info->last_trans_log_full_commit != | 2398 | if (root->log_transid_committed < transid && |
| 2379 | trans->transid && root->log_transid < transid + 2 && | ||
| 2380 | atomic_read(&root->log_commit[index])) | 2399 | atomic_read(&root->log_commit[index])) |
| 2381 | schedule(); | 2400 | schedule(); |
| 2382 | 2401 | ||
| 2383 | finish_wait(&root->log_commit_wait[index], &wait); | 2402 | finish_wait(&root->log_commit_wait[index], &wait); |
| 2384 | mutex_lock(&root->log_mutex); | 2403 | mutex_lock(&root->log_mutex); |
| 2385 | } while (root->fs_info->last_trans_log_full_commit != | 2404 | } while (root->log_transid_committed < transid && |
| 2386 | trans->transid && root->log_transid < transid + 2 && | ||
| 2387 | atomic_read(&root->log_commit[index])); | 2405 | atomic_read(&root->log_commit[index])); |
| 2388 | return 0; | ||
| 2389 | } | 2406 | } |
| 2390 | 2407 | ||
| 2391 | static void wait_for_writer(struct btrfs_trans_handle *trans, | 2408 | static void wait_for_writer(struct btrfs_trans_handle *trans, |
| 2392 | struct btrfs_root *root) | 2409 | struct btrfs_root *root) |
| 2393 | { | 2410 | { |
| 2394 | DEFINE_WAIT(wait); | 2411 | DEFINE_WAIT(wait); |
| 2395 | while (root->fs_info->last_trans_log_full_commit != | 2412 | |
| 2396 | trans->transid && atomic_read(&root->log_writers)) { | 2413 | while (atomic_read(&root->log_writers)) { |
| 2397 | prepare_to_wait(&root->log_writer_wait, | 2414 | prepare_to_wait(&root->log_writer_wait, |
| 2398 | &wait, TASK_UNINTERRUPTIBLE); | 2415 | &wait, TASK_UNINTERRUPTIBLE); |
| 2399 | mutex_unlock(&root->log_mutex); | 2416 | mutex_unlock(&root->log_mutex); |
| 2400 | if (root->fs_info->last_trans_log_full_commit != | 2417 | if (atomic_read(&root->log_writers)) |
| 2401 | trans->transid && atomic_read(&root->log_writers)) | ||
| 2402 | schedule(); | 2418 | schedule(); |
| 2403 | mutex_lock(&root->log_mutex); | 2419 | mutex_lock(&root->log_mutex); |
| 2404 | finish_wait(&root->log_writer_wait, &wait); | 2420 | finish_wait(&root->log_writer_wait, &wait); |
| 2405 | } | 2421 | } |
| 2406 | } | 2422 | } |
| 2407 | 2423 | ||
| 2424 | static inline void btrfs_remove_log_ctx(struct btrfs_root *root, | ||
| 2425 | struct btrfs_log_ctx *ctx) | ||
| 2426 | { | ||
| 2427 | if (!ctx) | ||
| 2428 | return; | ||
| 2429 | |||
| 2430 | mutex_lock(&root->log_mutex); | ||
| 2431 | list_del_init(&ctx->list); | ||
| 2432 | mutex_unlock(&root->log_mutex); | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | /* | ||
| 2436 | * Invoked in log mutex context, or be sure there is no other task which | ||
| 2437 | * can access the list. | ||
| 2438 | */ | ||
| 2439 | static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, | ||
| 2440 | int index, int error) | ||
| 2441 | { | ||
| 2442 | struct btrfs_log_ctx *ctx; | ||
| 2443 | |||
| 2444 | if (!error) { | ||
| 2445 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
| 2446 | return; | ||
| 2447 | } | ||
| 2448 | |||
| 2449 | list_for_each_entry(ctx, &root->log_ctxs[index], list) | ||
| 2450 | ctx->log_ret = error; | ||
| 2451 | |||
| 2452 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
| 2453 | } | ||
| 2454 | |||
| 2408 | /* | 2455 | /* |
| 2409 | * btrfs_sync_log does sends a given tree log down to the disk and | 2456 | * btrfs_sync_log does sends a given tree log down to the disk and |
| 2410 | * updates the super blocks to record it. When this call is done, | 2457 | * updates the super blocks to record it. When this call is done, |
| @@ -2418,7 +2465,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, | |||
| 2418 | * that has happened. | 2465 | * that has happened. |
| 2419 | */ | 2466 | */ |
| 2420 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 2467 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 2421 | struct btrfs_root *root) | 2468 | struct btrfs_root *root, struct btrfs_log_ctx *ctx) |
| 2422 | { | 2469 | { |
| 2423 | int index1; | 2470 | int index1; |
| 2424 | int index2; | 2471 | int index2; |
| @@ -2426,22 +2473,30 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2426 | int ret; | 2473 | int ret; |
| 2427 | struct btrfs_root *log = root->log_root; | 2474 | struct btrfs_root *log = root->log_root; |
| 2428 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 2475 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
| 2429 | unsigned long log_transid = 0; | 2476 | int log_transid = 0; |
| 2477 | struct btrfs_log_ctx root_log_ctx; | ||
| 2430 | struct blk_plug plug; | 2478 | struct blk_plug plug; |
| 2431 | 2479 | ||
| 2432 | mutex_lock(&root->log_mutex); | 2480 | mutex_lock(&root->log_mutex); |
| 2433 | log_transid = root->log_transid; | 2481 | log_transid = ctx->log_transid; |
| 2434 | index1 = root->log_transid % 2; | 2482 | if (root->log_transid_committed >= log_transid) { |
| 2483 | mutex_unlock(&root->log_mutex); | ||
| 2484 | return ctx->log_ret; | ||
| 2485 | } | ||
| 2486 | |||
| 2487 | index1 = log_transid % 2; | ||
| 2435 | if (atomic_read(&root->log_commit[index1])) { | 2488 | if (atomic_read(&root->log_commit[index1])) { |
| 2436 | wait_log_commit(trans, root, root->log_transid); | 2489 | wait_log_commit(trans, root, log_transid); |
| 2437 | mutex_unlock(&root->log_mutex); | 2490 | mutex_unlock(&root->log_mutex); |
| 2438 | return 0; | 2491 | return ctx->log_ret; |
| 2439 | } | 2492 | } |
| 2493 | ASSERT(log_transid == root->log_transid); | ||
| 2440 | atomic_set(&root->log_commit[index1], 1); | 2494 | atomic_set(&root->log_commit[index1], 1); |
| 2441 | 2495 | ||
| 2442 | /* wait for previous tree log sync to complete */ | 2496 | /* wait for previous tree log sync to complete */ |
| 2443 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2497 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 2444 | wait_log_commit(trans, root, root->log_transid - 1); | 2498 | wait_log_commit(trans, root, log_transid - 1); |
| 2499 | |||
| 2445 | while (1) { | 2500 | while (1) { |
| 2446 | int batch = atomic_read(&root->log_batch); | 2501 | int batch = atomic_read(&root->log_batch); |
| 2447 | /* when we're on an ssd, just kick the log commit out */ | 2502 | /* when we're on an ssd, just kick the log commit out */ |
| @@ -2456,7 +2511,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2456 | } | 2511 | } |
| 2457 | 2512 | ||
| 2458 | /* bail out if we need to do a full commit */ | 2513 | /* bail out if we need to do a full commit */ |
| 2459 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2514 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
| 2515 | trans->transid) { | ||
| 2460 | ret = -EAGAIN; | 2516 | ret = -EAGAIN; |
| 2461 | btrfs_free_logged_extents(log, log_transid); | 2517 | btrfs_free_logged_extents(log, log_transid); |
| 2462 | mutex_unlock(&root->log_mutex); | 2518 | mutex_unlock(&root->log_mutex); |
| @@ -2477,6 +2533,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2477 | blk_finish_plug(&plug); | 2533 | blk_finish_plug(&plug); |
| 2478 | btrfs_abort_transaction(trans, root, ret); | 2534 | btrfs_abort_transaction(trans, root, ret); |
| 2479 | btrfs_free_logged_extents(log, log_transid); | 2535 | btrfs_free_logged_extents(log, log_transid); |
| 2536 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2537 | trans->transid; | ||
| 2480 | mutex_unlock(&root->log_mutex); | 2538 | mutex_unlock(&root->log_mutex); |
| 2481 | goto out; | 2539 | goto out; |
| 2482 | } | 2540 | } |
| @@ -2486,7 +2544,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2486 | root->log_transid++; | 2544 | root->log_transid++; |
| 2487 | log->log_transid = root->log_transid; | 2545 | log->log_transid = root->log_transid; |
| 2488 | root->log_start_pid = 0; | 2546 | root->log_start_pid = 0; |
| 2489 | smp_mb(); | ||
| 2490 | /* | 2547 | /* |
| 2491 | * IO has been started, blocks of the log tree have WRITTEN flag set | 2548 | * IO has been started, blocks of the log tree have WRITTEN flag set |
| 2492 | * in their headers. new modifications of the log will be written to | 2549 | * in their headers. new modifications of the log will be written to |
| @@ -2494,9 +2551,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2494 | */ | 2551 | */ |
| 2495 | mutex_unlock(&root->log_mutex); | 2552 | mutex_unlock(&root->log_mutex); |
| 2496 | 2553 | ||
| 2554 | btrfs_init_log_ctx(&root_log_ctx); | ||
| 2555 | |||
| 2497 | mutex_lock(&log_root_tree->log_mutex); | 2556 | mutex_lock(&log_root_tree->log_mutex); |
| 2498 | atomic_inc(&log_root_tree->log_batch); | 2557 | atomic_inc(&log_root_tree->log_batch); |
| 2499 | atomic_inc(&log_root_tree->log_writers); | 2558 | atomic_inc(&log_root_tree->log_writers); |
| 2559 | |||
| 2560 | index2 = log_root_tree->log_transid % 2; | ||
| 2561 | list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); | ||
| 2562 | root_log_ctx.log_transid = log_root_tree->log_transid; | ||
| 2563 | |||
| 2500 | mutex_unlock(&log_root_tree->log_mutex); | 2564 | mutex_unlock(&log_root_tree->log_mutex); |
| 2501 | 2565 | ||
| 2502 | ret = update_log_root(trans, log); | 2566 | ret = update_log_root(trans, log); |
| @@ -2509,13 +2573,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2509 | } | 2573 | } |
| 2510 | 2574 | ||
| 2511 | if (ret) { | 2575 | if (ret) { |
| 2576 | if (!list_empty(&root_log_ctx.list)) | ||
| 2577 | list_del_init(&root_log_ctx.list); | ||
| 2578 | |||
| 2512 | blk_finish_plug(&plug); | 2579 | blk_finish_plug(&plug); |
| 2580 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2581 | trans->transid; | ||
| 2513 | if (ret != -ENOSPC) { | 2582 | if (ret != -ENOSPC) { |
| 2514 | btrfs_abort_transaction(trans, root, ret); | 2583 | btrfs_abort_transaction(trans, root, ret); |
| 2515 | mutex_unlock(&log_root_tree->log_mutex); | 2584 | mutex_unlock(&log_root_tree->log_mutex); |
| 2516 | goto out; | 2585 | goto out; |
| 2517 | } | 2586 | } |
| 2518 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2519 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2587 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2520 | btrfs_free_logged_extents(log, log_transid); | 2588 | btrfs_free_logged_extents(log, log_transid); |
| 2521 | mutex_unlock(&log_root_tree->log_mutex); | 2589 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2523,22 +2591,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2523 | goto out; | 2591 | goto out; |
| 2524 | } | 2592 | } |
| 2525 | 2593 | ||
| 2526 | index2 = log_root_tree->log_transid % 2; | 2594 | if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { |
| 2595 | mutex_unlock(&log_root_tree->log_mutex); | ||
| 2596 | ret = root_log_ctx.log_ret; | ||
| 2597 | goto out; | ||
| 2598 | } | ||
| 2599 | |||
| 2600 | index2 = root_log_ctx.log_transid % 2; | ||
| 2527 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2601 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
| 2528 | blk_finish_plug(&plug); | 2602 | blk_finish_plug(&plug); |
| 2529 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2603 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2530 | wait_log_commit(trans, log_root_tree, | 2604 | wait_log_commit(trans, log_root_tree, |
| 2531 | log_root_tree->log_transid); | 2605 | root_log_ctx.log_transid); |
| 2532 | btrfs_free_logged_extents(log, log_transid); | 2606 | btrfs_free_logged_extents(log, log_transid); |
| 2533 | mutex_unlock(&log_root_tree->log_mutex); | 2607 | mutex_unlock(&log_root_tree->log_mutex); |
| 2534 | ret = 0; | 2608 | ret = root_log_ctx.log_ret; |
| 2535 | goto out; | 2609 | goto out; |
| 2536 | } | 2610 | } |
| 2611 | ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); | ||
| 2537 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2612 | atomic_set(&log_root_tree->log_commit[index2], 1); |
| 2538 | 2613 | ||
| 2539 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { | 2614 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
| 2540 | wait_log_commit(trans, log_root_tree, | 2615 | wait_log_commit(trans, log_root_tree, |
| 2541 | log_root_tree->log_transid - 1); | 2616 | root_log_ctx.log_transid - 1); |
| 2542 | } | 2617 | } |
| 2543 | 2618 | ||
| 2544 | wait_for_writer(trans, log_root_tree); | 2619 | wait_for_writer(trans, log_root_tree); |
| @@ -2547,7 +2622,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2547 | * now that we've moved on to the tree of log tree roots, | 2622 | * now that we've moved on to the tree of log tree roots, |
| 2548 | * check the full commit flag again | 2623 | * check the full commit flag again |
| 2549 | */ | 2624 | */ |
| 2550 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2625 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
| 2626 | trans->transid) { | ||
| 2551 | blk_finish_plug(&plug); | 2627 | blk_finish_plug(&plug); |
| 2552 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2628 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2553 | btrfs_free_logged_extents(log, log_transid); | 2629 | btrfs_free_logged_extents(log, log_transid); |
| @@ -2561,6 +2637,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2561 | EXTENT_DIRTY | EXTENT_NEW); | 2637 | EXTENT_DIRTY | EXTENT_NEW); |
| 2562 | blk_finish_plug(&plug); | 2638 | blk_finish_plug(&plug); |
| 2563 | if (ret) { | 2639 | if (ret) { |
| 2640 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2641 | trans->transid; | ||
| 2564 | btrfs_abort_transaction(trans, root, ret); | 2642 | btrfs_abort_transaction(trans, root, ret); |
| 2565 | btrfs_free_logged_extents(log, log_transid); | 2643 | btrfs_free_logged_extents(log, log_transid); |
| 2566 | mutex_unlock(&log_root_tree->log_mutex); | 2644 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2578,8 +2656,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2578 | btrfs_header_level(log_root_tree->node)); | 2656 | btrfs_header_level(log_root_tree->node)); |
| 2579 | 2657 | ||
| 2580 | log_root_tree->log_transid++; | 2658 | log_root_tree->log_transid++; |
| 2581 | smp_mb(); | ||
| 2582 | |||
| 2583 | mutex_unlock(&log_root_tree->log_mutex); | 2659 | mutex_unlock(&log_root_tree->log_mutex); |
| 2584 | 2660 | ||
| 2585 | /* | 2661 | /* |
| @@ -2591,6 +2667,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2591 | */ | 2667 | */ |
| 2592 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); | 2668 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2593 | if (ret) { | 2669 | if (ret) { |
| 2670 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2671 | trans->transid; | ||
| 2594 | btrfs_abort_transaction(trans, root, ret); | 2672 | btrfs_abort_transaction(trans, root, ret); |
| 2595 | goto out_wake_log_root; | 2673 | goto out_wake_log_root; |
| 2596 | } | 2674 | } |
| @@ -2601,13 +2679,28 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2601 | mutex_unlock(&root->log_mutex); | 2679 | mutex_unlock(&root->log_mutex); |
| 2602 | 2680 | ||
| 2603 | out_wake_log_root: | 2681 | out_wake_log_root: |
| 2682 | /* | ||
| 2683 | * We needn't get log_mutex here because we are sure all | ||
| 2684 | * the other tasks are blocked. | ||
| 2685 | */ | ||
| 2686 | btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); | ||
| 2687 | |||
| 2688 | mutex_lock(&log_root_tree->log_mutex); | ||
| 2689 | log_root_tree->log_transid_committed++; | ||
| 2604 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2690 | atomic_set(&log_root_tree->log_commit[index2], 0); |
| 2605 | smp_mb(); | 2691 | mutex_unlock(&log_root_tree->log_mutex); |
| 2692 | |||
| 2606 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2693 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
| 2607 | wake_up(&log_root_tree->log_commit_wait[index2]); | 2694 | wake_up(&log_root_tree->log_commit_wait[index2]); |
| 2608 | out: | 2695 | out: |
| 2696 | /* See above. */ | ||
| 2697 | btrfs_remove_all_log_ctxs(root, index1, ret); | ||
| 2698 | |||
| 2699 | mutex_lock(&root->log_mutex); | ||
| 2700 | root->log_transid_committed++; | ||
| 2609 | atomic_set(&root->log_commit[index1], 0); | 2701 | atomic_set(&root->log_commit[index1], 0); |
| 2610 | smp_mb(); | 2702 | mutex_unlock(&root->log_mutex); |
| 2703 | |||
| 2611 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2704 | if (waitqueue_active(&root->log_commit_wait[index1])) |
| 2612 | wake_up(&root->log_commit_wait[index1]); | 2705 | wake_up(&root->log_commit_wait[index1]); |
| 2613 | return ret; | 2706 | return ret; |
| @@ -3479,7 +3572,8 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
| 3479 | 3572 | ||
| 3480 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3573 | static int log_one_extent(struct btrfs_trans_handle *trans, |
| 3481 | struct inode *inode, struct btrfs_root *root, | 3574 | struct inode *inode, struct btrfs_root *root, |
| 3482 | struct extent_map *em, struct btrfs_path *path) | 3575 | struct extent_map *em, struct btrfs_path *path, |
| 3576 | struct list_head *logged_list) | ||
| 3483 | { | 3577 | { |
| 3484 | struct btrfs_root *log = root->log_root; | 3578 | struct btrfs_root *log = root->log_root; |
| 3485 | struct btrfs_file_extent_item *fi; | 3579 | struct btrfs_file_extent_item *fi; |
| @@ -3495,7 +3589,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
| 3495 | u64 extent_offset = em->start - em->orig_start; | 3589 | u64 extent_offset = em->start - em->orig_start; |
| 3496 | u64 block_len; | 3590 | u64 block_len; |
| 3497 | int ret; | 3591 | int ret; |
| 3498 | int index = log->log_transid % 2; | ||
| 3499 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3592 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
| 3500 | int extent_inserted = 0; | 3593 | int extent_inserted = 0; |
| 3501 | 3594 | ||
| @@ -3579,17 +3672,12 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
| 3579 | * First check and see if our csums are on our outstanding ordered | 3672 | * First check and see if our csums are on our outstanding ordered |
| 3580 | * extents. | 3673 | * extents. |
| 3581 | */ | 3674 | */ |
| 3582 | again: | 3675 | list_for_each_entry(ordered, logged_list, log_list) { |
| 3583 | spin_lock_irq(&log->log_extents_lock[index]); | ||
| 3584 | list_for_each_entry(ordered, &log->logged_list[index], log_list) { | ||
| 3585 | struct btrfs_ordered_sum *sum; | 3676 | struct btrfs_ordered_sum *sum; |
| 3586 | 3677 | ||
| 3587 | if (!mod_len) | 3678 | if (!mod_len) |
| 3588 | break; | 3679 | break; |
| 3589 | 3680 | ||
| 3590 | if (ordered->inode != inode) | ||
| 3591 | continue; | ||
| 3592 | |||
| 3593 | if (ordered->file_offset + ordered->len <= mod_start || | 3681 | if (ordered->file_offset + ordered->len <= mod_start || |
| 3594 | mod_start + mod_len <= ordered->file_offset) | 3682 | mod_start + mod_len <= ordered->file_offset) |
| 3595 | continue; | 3683 | continue; |
| @@ -3632,12 +3720,6 @@ again: | |||
| 3632 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, | 3720 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, |
| 3633 | &ordered->flags)) | 3721 | &ordered->flags)) |
| 3634 | continue; | 3722 | continue; |
| 3635 | atomic_inc(&ordered->refs); | ||
| 3636 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 3637 | /* | ||
| 3638 | * we've dropped the lock, we must either break or | ||
| 3639 | * start over after this. | ||
| 3640 | */ | ||
| 3641 | 3723 | ||
| 3642 | if (ordered->csum_bytes_left) { | 3724 | if (ordered->csum_bytes_left) { |
| 3643 | btrfs_start_ordered_extent(inode, ordered, 0); | 3725 | btrfs_start_ordered_extent(inode, ordered, 0); |
| @@ -3647,16 +3729,11 @@ again: | |||
| 3647 | 3729 | ||
| 3648 | list_for_each_entry(sum, &ordered->list, list) { | 3730 | list_for_each_entry(sum, &ordered->list, list) { |
| 3649 | ret = btrfs_csum_file_blocks(trans, log, sum); | 3731 | ret = btrfs_csum_file_blocks(trans, log, sum); |
| 3650 | if (ret) { | 3732 | if (ret) |
| 3651 | btrfs_put_ordered_extent(ordered); | ||
| 3652 | goto unlocked; | 3733 | goto unlocked; |
| 3653 | } | ||
| 3654 | } | 3734 | } |
| 3655 | btrfs_put_ordered_extent(ordered); | ||
| 3656 | goto again; | ||
| 3657 | 3735 | ||
| 3658 | } | 3736 | } |
| 3659 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 3660 | unlocked: | 3737 | unlocked: |
| 3661 | 3738 | ||
| 3662 | if (!mod_len || ret) | 3739 | if (!mod_len || ret) |
| @@ -3694,7 +3771,8 @@ unlocked: | |||
| 3694 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3771 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
| 3695 | struct btrfs_root *root, | 3772 | struct btrfs_root *root, |
| 3696 | struct inode *inode, | 3773 | struct inode *inode, |
| 3697 | struct btrfs_path *path) | 3774 | struct btrfs_path *path, |
| 3775 | struct list_head *logged_list) | ||
| 3698 | { | 3776 | { |
| 3699 | struct extent_map *em, *n; | 3777 | struct extent_map *em, *n; |
| 3700 | struct list_head extents; | 3778 | struct list_head extents; |
| @@ -3752,7 +3830,7 @@ process: | |||
| 3752 | 3830 | ||
| 3753 | write_unlock(&tree->lock); | 3831 | write_unlock(&tree->lock); |
| 3754 | 3832 | ||
| 3755 | ret = log_one_extent(trans, inode, root, em, path); | 3833 | ret = log_one_extent(trans, inode, root, em, path, logged_list); |
| 3756 | write_lock(&tree->lock); | 3834 | write_lock(&tree->lock); |
| 3757 | clear_em_logging(tree, em); | 3835 | clear_em_logging(tree, em); |
| 3758 | free_extent_map(em); | 3836 | free_extent_map(em); |
| @@ -3788,6 +3866,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3788 | struct btrfs_key max_key; | 3866 | struct btrfs_key max_key; |
| 3789 | struct btrfs_root *log = root->log_root; | 3867 | struct btrfs_root *log = root->log_root; |
| 3790 | struct extent_buffer *src = NULL; | 3868 | struct extent_buffer *src = NULL; |
| 3869 | LIST_HEAD(logged_list); | ||
| 3791 | u64 last_extent = 0; | 3870 | u64 last_extent = 0; |
| 3792 | int err = 0; | 3871 | int err = 0; |
| 3793 | int ret; | 3872 | int ret; |
| @@ -3836,7 +3915,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3836 | 3915 | ||
| 3837 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3916 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
| 3838 | 3917 | ||
| 3839 | btrfs_get_logged_extents(log, inode); | 3918 | btrfs_get_logged_extents(inode, &logged_list); |
| 3840 | 3919 | ||
| 3841 | /* | 3920 | /* |
| 3842 | * a brute force approach to making sure we get the most uptodate | 3921 | * a brute force approach to making sure we get the most uptodate |
| @@ -3962,7 +4041,8 @@ log_extents: | |||
| 3962 | btrfs_release_path(path); | 4041 | btrfs_release_path(path); |
| 3963 | btrfs_release_path(dst_path); | 4042 | btrfs_release_path(dst_path); |
| 3964 | if (fast_search) { | 4043 | if (fast_search) { |
| 3965 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); | 4044 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
| 4045 | &logged_list); | ||
| 3966 | if (ret) { | 4046 | if (ret) { |
| 3967 | err = ret; | 4047 | err = ret; |
| 3968 | goto out_unlock; | 4048 | goto out_unlock; |
| @@ -3987,8 +4067,10 @@ log_extents: | |||
| 3987 | BTRFS_I(inode)->logged_trans = trans->transid; | 4067 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 3988 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4068 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
| 3989 | out_unlock: | 4069 | out_unlock: |
| 3990 | if (err) | 4070 | if (unlikely(err)) |
| 3991 | btrfs_free_logged_extents(log, log->log_transid); | 4071 | btrfs_put_logged_extents(&logged_list); |
| 4072 | else | ||
| 4073 | btrfs_submit_logged_extents(&logged_list, log); | ||
| 3992 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 4074 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 3993 | 4075 | ||
| 3994 | btrfs_free_path(path); | 4076 | btrfs_free_path(path); |
| @@ -4079,7 +4161,8 @@ out: | |||
| 4079 | */ | 4161 | */ |
| 4080 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | 4162 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
| 4081 | struct btrfs_root *root, struct inode *inode, | 4163 | struct btrfs_root *root, struct inode *inode, |
| 4082 | struct dentry *parent, int exists_only) | 4164 | struct dentry *parent, int exists_only, |
| 4165 | struct btrfs_log_ctx *ctx) | ||
| 4083 | { | 4166 | { |
| 4084 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 4167 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
| 4085 | struct super_block *sb; | 4168 | struct super_block *sb; |
| @@ -4116,9 +4199,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 4116 | goto end_no_trans; | 4199 | goto end_no_trans; |
| 4117 | } | 4200 | } |
| 4118 | 4201 | ||
| 4119 | ret = start_log_trans(trans, root); | 4202 | ret = start_log_trans(trans, root, ctx); |
| 4120 | if (ret) | 4203 | if (ret) |
| 4121 | goto end_trans; | 4204 | goto end_no_trans; |
| 4122 | 4205 | ||
| 4123 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 4206 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 4124 | if (ret) | 4207 | if (ret) |
| @@ -4166,6 +4249,9 @@ end_trans: | |||
| 4166 | root->fs_info->last_trans_log_full_commit = trans->transid; | 4249 | root->fs_info->last_trans_log_full_commit = trans->transid; |
| 4167 | ret = 1; | 4250 | ret = 1; |
| 4168 | } | 4251 | } |
| 4252 | |||
| 4253 | if (ret) | ||
| 4254 | btrfs_remove_log_ctx(root, ctx); | ||
| 4169 | btrfs_end_log_trans(root); | 4255 | btrfs_end_log_trans(root); |
| 4170 | end_no_trans: | 4256 | end_no_trans: |
| 4171 | return ret; | 4257 | return ret; |
| @@ -4178,12 +4264,14 @@ end_no_trans: | |||
| 4178 | * data on disk. | 4264 | * data on disk. |
| 4179 | */ | 4265 | */ |
| 4180 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 4266 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 4181 | struct btrfs_root *root, struct dentry *dentry) | 4267 | struct btrfs_root *root, struct dentry *dentry, |
| 4268 | struct btrfs_log_ctx *ctx) | ||
| 4182 | { | 4269 | { |
| 4183 | struct dentry *parent = dget_parent(dentry); | 4270 | struct dentry *parent = dget_parent(dentry); |
| 4184 | int ret; | 4271 | int ret; |
| 4185 | 4272 | ||
| 4186 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | 4273 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, |
| 4274 | 0, ctx); | ||
| 4187 | dput(parent); | 4275 | dput(parent); |
| 4188 | 4276 | ||
| 4189 | return ret; | 4277 | return ret; |
| @@ -4420,6 +4508,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, | |||
| 4420 | root->fs_info->last_trans_committed)) | 4508 | root->fs_info->last_trans_committed)) |
| 4421 | return 0; | 4509 | return 0; |
| 4422 | 4510 | ||
| 4423 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | 4511 | return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); |
| 4424 | } | 4512 | } |
| 4425 | 4513 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1d4ae0d15a70..91b145fce333 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
| @@ -22,14 +22,28 @@ | |||
| 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ |
| 23 | #define BTRFS_NO_LOG_SYNC 256 | 23 | #define BTRFS_NO_LOG_SYNC 256 |
| 24 | 24 | ||
| 25 | struct btrfs_log_ctx { | ||
| 26 | int log_ret; | ||
| 27 | int log_transid; | ||
| 28 | struct list_head list; | ||
| 29 | }; | ||
| 30 | |||
| 31 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | ||
| 32 | { | ||
| 33 | ctx->log_ret = 0; | ||
| 34 | ctx->log_transid = 0; | ||
| 35 | INIT_LIST_HEAD(&ctx->list); | ||
| 36 | } | ||
| 37 | |||
| 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 38 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 26 | struct btrfs_root *root); | 39 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); |
| 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 40 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
| 28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | 41 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, |
| 29 | struct btrfs_fs_info *fs_info); | 42 | struct btrfs_fs_info *fs_info); |
| 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 43 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
| 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 44 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 32 | struct btrfs_root *root, struct dentry *dentry); | 45 | struct btrfs_root *root, struct dentry *dentry, |
| 46 | struct btrfs_log_ctx *ctx); | ||
| 33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 47 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
| 34 | struct btrfs_root *root, | 48 | struct btrfs_root *root, |
| 35 | const char *name, int name_len, | 49 | const char *name, int name_len, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bab0b84d8f80..d241130a32fd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -415,7 +415,8 @@ loop_lock: | |||
| 415 | device->running_pending = 1; | 415 | device->running_pending = 1; |
| 416 | 416 | ||
| 417 | spin_unlock(&device->io_lock); | 417 | spin_unlock(&device->io_lock); |
| 418 | btrfs_requeue_work(&device->work); | 418 | btrfs_queue_work(fs_info->submit_workers, |
| 419 | &device->work); | ||
| 419 | goto done; | 420 | goto done; |
| 420 | } | 421 | } |
| 421 | /* unplug every 64 requests just for good measure */ | 422 | /* unplug every 64 requests just for good measure */ |
| @@ -5263,6 +5264,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 5263 | static void btrfs_end_bio(struct bio *bio, int err) | 5264 | static void btrfs_end_bio(struct bio *bio, int err) |
| 5264 | { | 5265 | { |
| 5265 | struct btrfs_bio *bbio = bio->bi_private; | 5266 | struct btrfs_bio *bbio = bio->bi_private; |
| 5267 | struct btrfs_device *dev = bbio->stripes[0].dev; | ||
| 5266 | int is_orig_bio = 0; | 5268 | int is_orig_bio = 0; |
| 5267 | 5269 | ||
| 5268 | if (err) { | 5270 | if (err) { |
| @@ -5270,7 +5272,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5270 | if (err == -EIO || err == -EREMOTEIO) { | 5272 | if (err == -EIO || err == -EREMOTEIO) { |
| 5271 | unsigned int stripe_index = | 5273 | unsigned int stripe_index = |
| 5272 | btrfs_io_bio(bio)->stripe_index; | 5274 | btrfs_io_bio(bio)->stripe_index; |
| 5273 | struct btrfs_device *dev; | ||
| 5274 | 5275 | ||
| 5275 | BUG_ON(stripe_index >= bbio->num_stripes); | 5276 | BUG_ON(stripe_index >= bbio->num_stripes); |
| 5276 | dev = bbio->stripes[stripe_index].dev; | 5277 | dev = bbio->stripes[stripe_index].dev; |
| @@ -5292,6 +5293,8 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5292 | if (bio == bbio->orig_bio) | 5293 | if (bio == bbio->orig_bio) |
| 5293 | is_orig_bio = 1; | 5294 | is_orig_bio = 1; |
| 5294 | 5295 | ||
| 5296 | btrfs_bio_counter_dec(bbio->fs_info); | ||
| 5297 | |||
| 5295 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | 5298 | if (atomic_dec_and_test(&bbio->stripes_pending)) { |
| 5296 | if (!is_orig_bio) { | 5299 | if (!is_orig_bio) { |
| 5297 | bio_put(bio); | 5300 | bio_put(bio); |
| @@ -5328,13 +5331,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5328 | } | 5331 | } |
| 5329 | } | 5332 | } |
| 5330 | 5333 | ||
| 5331 | struct async_sched { | ||
| 5332 | struct bio *bio; | ||
| 5333 | int rw; | ||
| 5334 | struct btrfs_fs_info *info; | ||
| 5335 | struct btrfs_work work; | ||
| 5336 | }; | ||
| 5337 | |||
| 5338 | /* | 5334 | /* |
| 5339 | * see run_scheduled_bios for a description of why bios are collected for | 5335 | * see run_scheduled_bios for a description of why bios are collected for |
| 5340 | * async submit. | 5336 | * async submit. |
| @@ -5391,8 +5387,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, | |||
| 5391 | spin_unlock(&device->io_lock); | 5387 | spin_unlock(&device->io_lock); |
| 5392 | 5388 | ||
| 5393 | if (should_queue) | 5389 | if (should_queue) |
| 5394 | btrfs_queue_worker(&root->fs_info->submit_workers, | 5390 | btrfs_queue_work(root->fs_info->submit_workers, |
| 5395 | &device->work); | 5391 | &device->work); |
| 5396 | } | 5392 | } |
| 5397 | 5393 | ||
| 5398 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | 5394 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, |
| @@ -5447,6 +5443,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | |||
| 5447 | } | 5443 | } |
| 5448 | #endif | 5444 | #endif |
| 5449 | bio->bi_bdev = dev->bdev; | 5445 | bio->bi_bdev = dev->bdev; |
| 5446 | |||
| 5447 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
| 5448 | |||
| 5450 | if (async) | 5449 | if (async) |
| 5451 | btrfs_schedule_bio(root, dev, rw, bio); | 5450 | btrfs_schedule_bio(root, dev, rw, bio); |
| 5452 | else | 5451 | else |
| @@ -5515,28 +5514,38 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 5515 | length = bio->bi_iter.bi_size; | 5514 | length = bio->bi_iter.bi_size; |
| 5516 | map_length = length; | 5515 | map_length = length; |
| 5517 | 5516 | ||
| 5517 | btrfs_bio_counter_inc_blocked(root->fs_info); | ||
| 5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, | 5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
| 5519 | mirror_num, &raid_map); | 5519 | mirror_num, &raid_map); |
| 5520 | if (ret) /* -ENOMEM */ | 5520 | if (ret) { |
| 5521 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5521 | return ret; | 5522 | return ret; |
| 5523 | } | ||
| 5522 | 5524 | ||
| 5523 | total_devs = bbio->num_stripes; | 5525 | total_devs = bbio->num_stripes; |
| 5524 | bbio->orig_bio = first_bio; | 5526 | bbio->orig_bio = first_bio; |
| 5525 | bbio->private = first_bio->bi_private; | 5527 | bbio->private = first_bio->bi_private; |
| 5526 | bbio->end_io = first_bio->bi_end_io; | 5528 | bbio->end_io = first_bio->bi_end_io; |
| 5529 | bbio->fs_info = root->fs_info; | ||
| 5527 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 5530 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
| 5528 | 5531 | ||
| 5529 | if (raid_map) { | 5532 | if (raid_map) { |
| 5530 | /* In this case, map_length has been set to the length of | 5533 | /* In this case, map_length has been set to the length of |
| 5531 | a single stripe; not the whole write */ | 5534 | a single stripe; not the whole write */ |
| 5532 | if (rw & WRITE) { | 5535 | if (rw & WRITE) { |
| 5533 | return raid56_parity_write(root, bio, bbio, | 5536 | ret = raid56_parity_write(root, bio, bbio, |
| 5534 | raid_map, map_length); | 5537 | raid_map, map_length); |
| 5535 | } else { | 5538 | } else { |
| 5536 | return raid56_parity_recover(root, bio, bbio, | 5539 | ret = raid56_parity_recover(root, bio, bbio, |
| 5537 | raid_map, map_length, | 5540 | raid_map, map_length, |
| 5538 | mirror_num); | 5541 | mirror_num); |
| 5539 | } | 5542 | } |
| 5543 | /* | ||
| 5544 | * FIXME, replace dosen't support raid56 yet, please fix | ||
| 5545 | * it in the future. | ||
| 5546 | */ | ||
| 5547 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5548 | return ret; | ||
| 5540 | } | 5549 | } |
| 5541 | 5550 | ||
| 5542 | if (map_length < length) { | 5551 | if (map_length < length) { |
| @@ -5578,6 +5587,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 5578 | async_submit); | 5587 | async_submit); |
| 5579 | dev_nr++; | 5588 | dev_nr++; |
| 5580 | } | 5589 | } |
| 5590 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5581 | return 0; | 5591 | return 0; |
| 5582 | } | 5592 | } |
| 5583 | 5593 | ||
| @@ -5666,7 +5676,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, | |||
| 5666 | else | 5676 | else |
| 5667 | generate_random_uuid(dev->uuid); | 5677 | generate_random_uuid(dev->uuid); |
| 5668 | 5678 | ||
| 5669 | dev->work.func = pending_bios_fn; | 5679 | btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); |
| 5670 | 5680 | ||
| 5671 | return dev; | 5681 | return dev; |
| 5672 | } | 5682 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8b3cd142b373..80754f9dd3df 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -192,6 +192,7 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | |||
| 192 | 192 | ||
| 193 | struct btrfs_bio { | 193 | struct btrfs_bio { |
| 194 | atomic_t stripes_pending; | 194 | atomic_t stripes_pending; |
| 195 | struct btrfs_fs_info *fs_info; | ||
| 195 | bio_end_io_t *end_io; | 196 | bio_end_io_t *end_io; |
| 196 | struct bio *orig_bio; | 197 | struct bio *orig_bio; |
| 197 | void *private; | 198 | void *private; |
diff --git a/fs/buffer.c b/fs/buffer.c index 27265a8b43c1..8c53a2b15ecb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -3088,7 +3088,7 @@ EXPORT_SYMBOL(submit_bh); | |||
| 3088 | * until the buffer gets unlocked). | 3088 | * until the buffer gets unlocked). |
| 3089 | * | 3089 | * |
| 3090 | * ll_rw_block sets b_end_io to simple completion handler that marks | 3090 | * ll_rw_block sets b_end_io to simple completion handler that marks |
| 3091 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes | 3091 | * the buffer up-to-date (if appropriate), unlocks the buffer and wakes |
| 3092 | * any waiters. | 3092 | * any waiters. |
| 3093 | * | 3093 | * |
| 3094 | * All of the buffers must be for the same device, and must also be a | 3094 | * All of the buffers must be for the same device, and must also be a |
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ca65f39dc8dc..6494d9f673aa 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c | |||
| @@ -391,12 +391,12 @@ try_again: | |||
| 391 | path.dentry = dir; | 391 | path.dentry = dir; |
| 392 | path_to_graveyard.mnt = cache->mnt; | 392 | path_to_graveyard.mnt = cache->mnt; |
| 393 | path_to_graveyard.dentry = cache->graveyard; | 393 | path_to_graveyard.dentry = cache->graveyard; |
| 394 | ret = security_path_rename(&path, rep, &path_to_graveyard, grave); | 394 | ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); |
| 395 | if (ret < 0) { | 395 | if (ret < 0) { |
| 396 | cachefiles_io_error(cache, "Rename security error %d", ret); | 396 | cachefiles_io_error(cache, "Rename security error %d", ret); |
| 397 | } else { | 397 | } else { |
| 398 | ret = vfs_rename(dir->d_inode, rep, | 398 | ret = vfs_rename(dir->d_inode, rep, |
| 399 | cache->graveyard->d_inode, grave, NULL); | 399 | cache->graveyard->d_inode, grave, NULL, 0); |
| 400 | if (ret != 0 && ret != -ENOMEM) | 400 | if (ret != 0 && ret != -ENOMEM) |
| 401 | cachefiles_io_error(cache, | 401 | cachefiles_io_error(cache, |
| 402 | "Rename failed with error %d", ret); | 402 | "Rename failed with error %d", ret); |
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index ebaff368120d..4b1fb5ca65b8 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
| @@ -265,24 +265,22 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | |||
| 265 | goto nomem_monitor; | 265 | goto nomem_monitor; |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | ret = add_to_page_cache(newpage, bmapping, | 268 | ret = add_to_page_cache_lru(newpage, bmapping, |
| 269 | netpage->index, cachefiles_gfp); | 269 | netpage->index, cachefiles_gfp); |
| 270 | if (ret == 0) | 270 | if (ret == 0) |
| 271 | goto installed_new_backing_page; | 271 | goto installed_new_backing_page; |
| 272 | if (ret != -EEXIST) | 272 | if (ret != -EEXIST) |
| 273 | goto nomem_page; | 273 | goto nomem_page; |
| 274 | } | 274 | } |
| 275 | 275 | ||
| 276 | /* we've installed a new backing page, so now we need to add it | 276 | /* we've installed a new backing page, so now we need to start |
| 277 | * to the LRU list and start it reading */ | 277 | * it reading */ |
| 278 | installed_new_backing_page: | 278 | installed_new_backing_page: |
| 279 | _debug("- new %p", newpage); | 279 | _debug("- new %p", newpage); |
| 280 | 280 | ||
| 281 | backpage = newpage; | 281 | backpage = newpage; |
| 282 | newpage = NULL; | 282 | newpage = NULL; |
| 283 | 283 | ||
| 284 | lru_cache_add_file(backpage); | ||
| 285 | |||
| 286 | read_backing_page: | 284 | read_backing_page: |
| 287 | ret = bmapping->a_ops->readpage(NULL, backpage); | 285 | ret = bmapping->a_ops->readpage(NULL, backpage); |
| 288 | if (ret < 0) | 286 | if (ret < 0) |
| @@ -510,24 +508,23 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 510 | goto nomem; | 508 | goto nomem; |
| 511 | } | 509 | } |
| 512 | 510 | ||
| 513 | ret = add_to_page_cache(newpage, bmapping, | 511 | ret = add_to_page_cache_lru(newpage, bmapping, |
| 514 | netpage->index, cachefiles_gfp); | 512 | netpage->index, |
| 513 | cachefiles_gfp); | ||
| 515 | if (ret == 0) | 514 | if (ret == 0) |
| 516 | goto installed_new_backing_page; | 515 | goto installed_new_backing_page; |
| 517 | if (ret != -EEXIST) | 516 | if (ret != -EEXIST) |
| 518 | goto nomem; | 517 | goto nomem; |
| 519 | } | 518 | } |
| 520 | 519 | ||
| 521 | /* we've installed a new backing page, so now we need to add it | 520 | /* we've installed a new backing page, so now we need |
| 522 | * to the LRU list and start it reading */ | 521 | * to start it reading */ |
| 523 | installed_new_backing_page: | 522 | installed_new_backing_page: |
| 524 | _debug("- new %p", newpage); | 523 | _debug("- new %p", newpage); |
| 525 | 524 | ||
| 526 | backpage = newpage; | 525 | backpage = newpage; |
| 527 | newpage = NULL; | 526 | newpage = NULL; |
| 528 | 527 | ||
| 529 | lru_cache_add_file(backpage); | ||
| 530 | |||
| 531 | reread_backing_page: | 528 | reread_backing_page: |
| 532 | ret = bmapping->a_ops->readpage(NULL, backpage); | 529 | ret = bmapping->a_ops->readpage(NULL, backpage); |
| 533 | if (ret < 0) | 530 | if (ret < 0) |
| @@ -538,8 +535,8 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 538 | monitor_backing_page: | 535 | monitor_backing_page: |
| 539 | _debug("- monitor add"); | 536 | _debug("- monitor add"); |
| 540 | 537 | ||
| 541 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | 538 | ret = add_to_page_cache_lru(netpage, op->mapping, |
| 542 | cachefiles_gfp); | 539 | netpage->index, cachefiles_gfp); |
| 543 | if (ret < 0) { | 540 | if (ret < 0) { |
| 544 | if (ret == -EEXIST) { | 541 | if (ret == -EEXIST) { |
| 545 | page_cache_release(netpage); | 542 | page_cache_release(netpage); |
| @@ -549,8 +546,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 549 | goto nomem; | 546 | goto nomem; |
| 550 | } | 547 | } |
| 551 | 548 | ||
| 552 | lru_cache_add_file(netpage); | ||
| 553 | |||
| 554 | /* install a monitor */ | 549 | /* install a monitor */ |
| 555 | page_cache_get(netpage); | 550 | page_cache_get(netpage); |
| 556 | monitor->netfs_page = netpage; | 551 | monitor->netfs_page = netpage; |
| @@ -613,8 +608,8 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 613 | backing_page_already_uptodate: | 608 | backing_page_already_uptodate: |
| 614 | _debug("- uptodate"); | 609 | _debug("- uptodate"); |
| 615 | 610 | ||
| 616 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | 611 | ret = add_to_page_cache_lru(netpage, op->mapping, |
| 617 | cachefiles_gfp); | 612 | netpage->index, cachefiles_gfp); |
| 618 | if (ret < 0) { | 613 | if (ret < 0) { |
| 619 | if (ret == -EEXIST) { | 614 | if (ret == -EEXIST) { |
| 620 | page_cache_release(netpage); | 615 | page_cache_release(netpage); |
| @@ -631,8 +626,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
| 631 | 626 | ||
| 632 | fscache_mark_page_cached(op, netpage); | 627 | fscache_mark_page_cached(op, netpage); |
| 633 | 628 | ||
| 634 | lru_cache_add_file(netpage); | ||
| 635 | |||
| 636 | /* the netpage is unlocked and marked up to date here */ | 629 | /* the netpage is unlocked and marked up to date here */ |
| 637 | fscache_end_io(op, netpage, 0); | 630 | fscache_end_io(op, netpage, 0); |
| 638 | page_cache_release(netpage); | 631 | page_cache_release(netpage); |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 849f6132b327..2c70cbe35d39 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -286,7 +286,7 @@ cifs_destroy_inode(struct inode *inode) | |||
| 286 | static void | 286 | static void |
| 287 | cifs_evict_inode(struct inode *inode) | 287 | cifs_evict_inode(struct inode *inode) |
| 288 | { | 288 | { |
| 289 | truncate_inode_pages(&inode->i_data, 0); | 289 | truncate_inode_pages_final(&inode->i_data); |
| 290 | clear_inode(inode); | 290 | clear_inode(inode); |
| 291 | cifs_fscache_release_inode_cookie(inode); | 291 | cifs_fscache_release_inode_cookie(inode); |
| 292 | } | 292 | } |
| @@ -541,6 +541,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) | |||
| 541 | 541 | ||
| 542 | static int cifs_remount(struct super_block *sb, int *flags, char *data) | 542 | static int cifs_remount(struct super_block *sb, int *flags, char *data) |
| 543 | { | 543 | { |
| 544 | sync_filesystem(sb); | ||
| 544 | *flags |= MS_NODIRATIME; | 545 | *flags |= MS_NODIRATIME; |
| 545 | return 0; | 546 | return 0; |
| 546 | } | 547 | } |
| @@ -1005,7 +1006,7 @@ cifs_init_once(void *inode) | |||
| 1005 | init_rwsem(&cifsi->lock_sem); | 1006 | init_rwsem(&cifsi->lock_sem); |
| 1006 | } | 1007 | } |
| 1007 | 1008 | ||
| 1008 | static int | 1009 | static int __init |
| 1009 | cifs_init_inodecache(void) | 1010 | cifs_init_inodecache(void) |
| 1010 | { | 1011 | { |
| 1011 | cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", | 1012 | cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cf32f0393369..c0f3718b77a8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
| @@ -513,7 +513,7 @@ struct cifs_mnt_data { | |||
| 513 | static inline unsigned int | 513 | static inline unsigned int |
| 514 | get_rfc1002_length(void *buf) | 514 | get_rfc1002_length(void *buf) |
| 515 | { | 515 | { |
| 516 | return be32_to_cpu(*((__be32 *)buf)); | 516 | return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; |
| 517 | } | 517 | } |
| 518 | 518 | ||
| 519 | static inline void | 519 | static inline void |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 53c15074bb36..834fce759d80 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -2579,31 +2579,19 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, | |||
| 2579 | struct cifsInodeInfo *cinode = CIFS_I(inode); | 2579 | struct cifsInodeInfo *cinode = CIFS_I(inode); |
| 2580 | struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; | 2580 | struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; |
| 2581 | ssize_t rc = -EACCES; | 2581 | ssize_t rc = -EACCES; |
| 2582 | loff_t lock_pos = pos; | ||
| 2582 | 2583 | ||
| 2583 | BUG_ON(iocb->ki_pos != pos); | 2584 | if (file->f_flags & O_APPEND) |
| 2584 | 2585 | lock_pos = i_size_read(inode); | |
| 2585 | /* | 2586 | /* |
| 2586 | * We need to hold the sem to be sure nobody modifies lock list | 2587 | * We need to hold the sem to be sure nobody modifies lock list |
| 2587 | * with a brlock that prevents writing. | 2588 | * with a brlock that prevents writing. |
| 2588 | */ | 2589 | */ |
| 2589 | down_read(&cinode->lock_sem); | 2590 | down_read(&cinode->lock_sem); |
| 2590 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), | 2591 | if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), |
| 2591 | server->vals->exclusive_lock_type, NULL, | 2592 | server->vals->exclusive_lock_type, NULL, |
| 2592 | CIFS_WRITE_OP)) { | 2593 | CIFS_WRITE_OP)) |
| 2593 | mutex_lock(&inode->i_mutex); | 2594 | rc = generic_file_aio_write(iocb, iov, nr_segs, pos); |
| 2594 | rc = __generic_file_aio_write(iocb, iov, nr_segs, | ||
| 2595 | &iocb->ki_pos); | ||
| 2596 | mutex_unlock(&inode->i_mutex); | ||
| 2597 | } | ||
| 2598 | |||
| 2599 | if (rc > 0) { | ||
| 2600 | ssize_t err; | ||
| 2601 | |||
| 2602 | err = generic_write_sync(file, iocb->ki_pos - rc, rc); | ||
| 2603 | if (err < 0) | ||
| 2604 | rc = err; | ||
| 2605 | } | ||
| 2606 | |||
| 2607 | up_read(&cinode->lock_sem); | 2595 | up_read(&cinode->lock_sem); |
| 2608 | return rc; | 2596 | return rc; |
| 2609 | } | 2597 | } |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b37570952846..18cd5650a5fc 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
| @@ -270,6 +270,26 @@ cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, | |||
| 270 | iov->iov_len = rqst->rq_pagesz; | 270 | iov->iov_len = rqst->rq_pagesz; |
| 271 | } | 271 | } |
| 272 | 272 | ||
| 273 | static unsigned long | ||
| 274 | rqst_len(struct smb_rqst *rqst) | ||
| 275 | { | ||
| 276 | unsigned int i; | ||
| 277 | struct kvec *iov = rqst->rq_iov; | ||
| 278 | unsigned long buflen = 0; | ||
| 279 | |||
| 280 | /* total up iov array first */ | ||
| 281 | for (i = 0; i < rqst->rq_nvec; i++) | ||
| 282 | buflen += iov[i].iov_len; | ||
| 283 | |||
| 284 | /* add in the page array if there is one */ | ||
| 285 | if (rqst->rq_npages) { | ||
| 286 | buflen += rqst->rq_pagesz * (rqst->rq_npages - 1); | ||
| 287 | buflen += rqst->rq_tailsz; | ||
| 288 | } | ||
| 289 | |||
| 290 | return buflen; | ||
| 291 | } | ||
| 292 | |||
| 273 | static int | 293 | static int |
| 274 | smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) | 294 | smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) |
| 275 | { | 295 | { |
| @@ -277,6 +297,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) | |||
| 277 | struct kvec *iov = rqst->rq_iov; | 297 | struct kvec *iov = rqst->rq_iov; |
| 278 | int n_vec = rqst->rq_nvec; | 298 | int n_vec = rqst->rq_nvec; |
| 279 | unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); | 299 | unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); |
| 300 | unsigned long send_length; | ||
| 280 | unsigned int i; | 301 | unsigned int i; |
| 281 | size_t total_len = 0, sent; | 302 | size_t total_len = 0, sent; |
| 282 | struct socket *ssocket = server->ssocket; | 303 | struct socket *ssocket = server->ssocket; |
| @@ -285,6 +306,14 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) | |||
| 285 | if (ssocket == NULL) | 306 | if (ssocket == NULL) |
| 286 | return -ENOTSOCK; | 307 | return -ENOTSOCK; |
| 287 | 308 | ||
| 309 | /* sanity check send length */ | ||
| 310 | send_length = rqst_len(rqst); | ||
| 311 | if (send_length != smb_buf_length + 4) { | ||
| 312 | WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n", | ||
| 313 | send_length, smb_buf_length); | ||
| 314 | return -EIO; | ||
| 315 | } | ||
| 316 | |||
| 288 | cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); | 317 | cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); |
| 289 | dump_smb(iov[0].iov_base, iov[0].iov_len); | 318 | dump_smb(iov[0].iov_base, iov[0].iov_len); |
| 290 | 319 | ||
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index b7143cf783ac..381c993b1427 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h | |||
| @@ -10,7 +10,7 @@ extern int coda_hard; | |||
| 10 | extern int coda_fake_statfs; | 10 | extern int coda_fake_statfs; |
| 11 | 11 | ||
| 12 | void coda_destroy_inodecache(void); | 12 | void coda_destroy_inodecache(void); |
| 13 | int coda_init_inodecache(void); | 13 | int __init coda_init_inodecache(void); |
| 14 | int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync); | 14 | int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync); |
| 15 | void coda_sysctl_init(void); | 15 | void coda_sysctl_init(void); |
| 16 | void coda_sysctl_clean(void); | 16 | void coda_sysctl_clean(void); |
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 506de34a4ef3..d9c7751f10ac 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
| @@ -73,7 +73,7 @@ static void init_once(void *foo) | |||
| 73 | inode_init_once(&ei->vfs_inode); | 73 | inode_init_once(&ei->vfs_inode); |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | int coda_init_inodecache(void) | 76 | int __init coda_init_inodecache(void) |
| 77 | { | 77 | { |
| 78 | coda_inode_cachep = kmem_cache_create("coda_inode_cache", | 78 | coda_inode_cachep = kmem_cache_create("coda_inode_cache", |
| 79 | sizeof(struct coda_inode_info), | 79 | sizeof(struct coda_inode_info), |
| @@ -96,6 +96,7 @@ void coda_destroy_inodecache(void) | |||
| 96 | 96 | ||
| 97 | static int coda_remount(struct super_block *sb, int *flags, char *data) | 97 | static int coda_remount(struct super_block *sb, int *flags, char *data) |
| 98 | { | 98 | { |
| 99 | sync_filesystem(sb); | ||
| 99 | *flags |= MS_NOATIME; | 100 | *flags |= MS_NOATIME; |
| 100 | return 0; | 101 | return 0; |
| 101 | } | 102 | } |
| @@ -250,7 +251,7 @@ static void coda_put_super(struct super_block *sb) | |||
| 250 | 251 | ||
| 251 | static void coda_evict_inode(struct inode *inode) | 252 | static void coda_evict_inode(struct inode *inode) |
| 252 | { | 253 | { |
| 253 | truncate_inode_pages(&inode->i_data, 0); | 254 | truncate_inode_pages_final(&inode->i_data); |
| 254 | clear_inode(inode); | 255 | clear_inode(inode); |
| 255 | coda_cache_clear_inode(inode); | 256 | coda_cache_clear_inode(inode); |
| 256 | } | 257 | } |
diff --git a/fs/compat.c b/fs/compat.c index 6af20de2c1a3..ca926ad0430c 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
| @@ -72,8 +72,8 @@ int compat_printk(const char *fmt, ...) | |||
| 72 | * Not all architectures have sys_utime, so implement this in terms | 72 | * Not all architectures have sys_utime, so implement this in terms |
| 73 | * of sys_utimes. | 73 | * of sys_utimes. |
| 74 | */ | 74 | */ |
| 75 | asmlinkage long compat_sys_utime(const char __user *filename, | 75 | COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, |
| 76 | struct compat_utimbuf __user *t) | 76 | struct compat_utimbuf __user *, t) |
| 77 | { | 77 | { |
| 78 | struct timespec tv[2]; | 78 | struct timespec tv[2]; |
| 79 | 79 | ||
| @@ -87,13 +87,13 @@ asmlinkage long compat_sys_utime(const char __user *filename, | |||
| 87 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); | 87 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags) | 90 | COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) |
| 91 | { | 91 | { |
| 92 | struct timespec tv[2]; | 92 | struct timespec tv[2]; |
| 93 | 93 | ||
| 94 | if (t) { | 94 | if (t) { |
| 95 | if (get_compat_timespec(&tv[0], &t[0]) || | 95 | if (compat_get_timespec(&tv[0], &t[0]) || |
| 96 | get_compat_timespec(&tv[1], &t[1])) | 96 | compat_get_timespec(&tv[1], &t[1])) |
| 97 | return -EFAULT; | 97 | return -EFAULT; |
| 98 | 98 | ||
| 99 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) | 99 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) |
| @@ -102,7 +102,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filena | |||
| 102 | return do_utimes(dfd, filename, t ? tv : NULL, flags); | 102 | return do_utimes(dfd, filename, t ? tv : NULL, flags); |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t) | 105 | COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) |
| 106 | { | 106 | { |
| 107 | struct timespec tv[2]; | 107 | struct timespec tv[2]; |
| 108 | 108 | ||
| @@ -121,7 +121,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filena | |||
| 121 | return do_utimes(dfd, filename, t ? tv : NULL, 0); | 121 | return do_utimes(dfd, filename, t ? tv : NULL, 0); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t) | 124 | COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) |
| 125 | { | 125 | { |
| 126 | return compat_sys_futimesat(AT_FDCWD, filename, t); | 126 | return compat_sys_futimesat(AT_FDCWD, filename, t); |
| 127 | } | 127 | } |
| @@ -159,8 +159,8 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) | |||
| 159 | return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; | 159 | return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; |
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | asmlinkage long compat_sys_newstat(const char __user * filename, | 162 | COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, |
| 163 | struct compat_stat __user *statbuf) | 163 | struct compat_stat __user *, statbuf) |
| 164 | { | 164 | { |
| 165 | struct kstat stat; | 165 | struct kstat stat; |
| 166 | int error; | 166 | int error; |
| @@ -171,8 +171,8 @@ asmlinkage long compat_sys_newstat(const char __user * filename, | |||
| 171 | return cp_compat_stat(&stat, statbuf); | 171 | return cp_compat_stat(&stat, statbuf); |
| 172 | } | 172 | } |
| 173 | 173 | ||
| 174 | asmlinkage long compat_sys_newlstat(const char __user * filename, | 174 | COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, |
| 175 | struct compat_stat __user *statbuf) | 175 | struct compat_stat __user *, statbuf) |
| 176 | { | 176 | { |
| 177 | struct kstat stat; | 177 | struct kstat stat; |
| 178 | int error; | 178 | int error; |
| @@ -184,9 +184,9 @@ asmlinkage long compat_sys_newlstat(const char __user * filename, | |||
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | #ifndef __ARCH_WANT_STAT64 | 186 | #ifndef __ARCH_WANT_STAT64 |
| 187 | asmlinkage long compat_sys_newfstatat(unsigned int dfd, | 187 | COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, |
| 188 | const char __user *filename, | 188 | const char __user *, filename, |
| 189 | struct compat_stat __user *statbuf, int flag) | 189 | struct compat_stat __user *, statbuf, int, flag) |
| 190 | { | 190 | { |
| 191 | struct kstat stat; | 191 | struct kstat stat; |
| 192 | int error; | 192 | int error; |
| @@ -198,8 +198,8 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, | |||
| 198 | } | 198 | } |
| 199 | #endif | 199 | #endif |
| 200 | 200 | ||
| 201 | asmlinkage long compat_sys_newfstat(unsigned int fd, | 201 | COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, |
| 202 | struct compat_stat __user * statbuf) | 202 | struct compat_stat __user *, statbuf) |
| 203 | { | 203 | { |
| 204 | struct kstat stat; | 204 | struct kstat stat; |
| 205 | int error = vfs_fstat(fd, &stat); | 205 | int error = vfs_fstat(fd, &stat); |
| @@ -247,7 +247,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * | |||
| 247 | * The following statfs calls are copies of code from fs/statfs.c and | 247 | * The following statfs calls are copies of code from fs/statfs.c and |
| 248 | * should be checked against those from time to time | 248 | * should be checked against those from time to time |
| 249 | */ | 249 | */ |
| 250 | asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) | 250 | COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) |
| 251 | { | 251 | { |
| 252 | struct kstatfs tmp; | 252 | struct kstatfs tmp; |
| 253 | int error = user_statfs(pathname, &tmp); | 253 | int error = user_statfs(pathname, &tmp); |
| @@ -256,7 +256,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta | |||
| 256 | return error; | 256 | return error; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) | 259 | COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) |
| 260 | { | 260 | { |
| 261 | struct kstatfs tmp; | 261 | struct kstatfs tmp; |
| 262 | int error = fd_statfs(fd, &tmp); | 262 | int error = fd_statfs(fd, &tmp); |
| @@ -298,7 +298,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat | |||
| 298 | return 0; | 298 | return 0; |
| 299 | } | 299 | } |
| 300 | 300 | ||
| 301 | asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) | 301 | COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) |
| 302 | { | 302 | { |
| 303 | struct kstatfs tmp; | 303 | struct kstatfs tmp; |
| 304 | int error; | 304 | int error; |
| @@ -312,7 +312,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s | |||
| 312 | return error; | 312 | return error; |
| 313 | } | 313 | } |
| 314 | 314 | ||
| 315 | asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) | 315 | COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) |
| 316 | { | 316 | { |
| 317 | struct kstatfs tmp; | 317 | struct kstatfs tmp; |
| 318 | int error; | 318 | int error; |
| @@ -331,7 +331,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c | |||
| 331 | * Given how simple this syscall is that apporach is more maintainable | 331 | * Given how simple this syscall is that apporach is more maintainable |
| 332 | * than the various conversion hacks. | 332 | * than the various conversion hacks. |
| 333 | */ | 333 | */ |
| 334 | asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) | 334 | COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) |
| 335 | { | 335 | { |
| 336 | struct compat_ustat tmp; | 336 | struct compat_ustat tmp; |
| 337 | struct kstatfs sbuf; | 337 | struct kstatfs sbuf; |
| @@ -399,12 +399,28 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u | |||
| 399 | } | 399 | } |
| 400 | #endif | 400 | #endif |
| 401 | 401 | ||
| 402 | asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, | 402 | static unsigned int |
| 403 | unsigned long arg) | 403 | convert_fcntl_cmd(unsigned int cmd) |
| 404 | { | ||
| 405 | switch (cmd) { | ||
| 406 | case F_GETLK64: | ||
| 407 | return F_GETLK; | ||
| 408 | case F_SETLK64: | ||
| 409 | return F_SETLK; | ||
| 410 | case F_SETLKW64: | ||
| 411 | return F_SETLKW; | ||
| 412 | } | ||
| 413 | |||
| 414 | return cmd; | ||
| 415 | } | ||
| 416 | |||
| 417 | COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | ||
| 418 | compat_ulong_t, arg) | ||
| 404 | { | 419 | { |
| 405 | mm_segment_t old_fs; | 420 | mm_segment_t old_fs; |
| 406 | struct flock f; | 421 | struct flock f; |
| 407 | long ret; | 422 | long ret; |
| 423 | unsigned int conv_cmd; | ||
| 408 | 424 | ||
| 409 | switch (cmd) { | 425 | switch (cmd) { |
| 410 | case F_GETLK: | 426 | case F_GETLK: |
| @@ -441,16 +457,18 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, | |||
| 441 | case F_GETLK64: | 457 | case F_GETLK64: |
| 442 | case F_SETLK64: | 458 | case F_SETLK64: |
| 443 | case F_SETLKW64: | 459 | case F_SETLKW64: |
| 460 | case F_GETLKP: | ||
| 461 | case F_SETLKP: | ||
| 462 | case F_SETLKPW: | ||
| 444 | ret = get_compat_flock64(&f, compat_ptr(arg)); | 463 | ret = get_compat_flock64(&f, compat_ptr(arg)); |
| 445 | if (ret != 0) | 464 | if (ret != 0) |
| 446 | break; | 465 | break; |
| 447 | old_fs = get_fs(); | 466 | old_fs = get_fs(); |
| 448 | set_fs(KERNEL_DS); | 467 | set_fs(KERNEL_DS); |
| 449 | ret = sys_fcntl(fd, (cmd == F_GETLK64) ? F_GETLK : | 468 | conv_cmd = convert_fcntl_cmd(cmd); |
| 450 | ((cmd == F_SETLK64) ? F_SETLK : F_SETLKW), | 469 | ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); |
| 451 | (unsigned long)&f); | ||
| 452 | set_fs(old_fs); | 470 | set_fs(old_fs); |
| 453 | if (cmd == F_GETLK64 && ret == 0) { | 471 | if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { |
| 454 | /* need to return lock information - see above for commentary */ | 472 | /* need to return lock information - see above for commentary */ |
| 455 | if (f.l_start > COMPAT_LOFF_T_MAX) | 473 | if (f.l_start > COMPAT_LOFF_T_MAX) |
| 456 | ret = -EOVERFLOW; | 474 | ret = -EOVERFLOW; |
| @@ -468,16 +486,22 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, | |||
| 468 | return ret; | 486 | return ret; |
| 469 | } | 487 | } |
| 470 | 488 | ||
| 471 | asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, | 489 | COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, |
| 472 | unsigned long arg) | 490 | compat_ulong_t, arg) |
| 473 | { | 491 | { |
| 474 | if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64)) | 492 | switch (cmd) { |
| 493 | case F_GETLK64: | ||
| 494 | case F_SETLK64: | ||
| 495 | case F_SETLKW64: | ||
| 496 | case F_GETLKP: | ||
| 497 | case F_SETLKP: | ||
| 498 | case F_SETLKPW: | ||
| 475 | return -EINVAL; | 499 | return -EINVAL; |
| 500 | } | ||
| 476 | return compat_sys_fcntl64(fd, cmd, arg); | 501 | return compat_sys_fcntl64(fd, cmd, arg); |
| 477 | } | 502 | } |
| 478 | 503 | ||
| 479 | asmlinkage long | 504 | COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) |
| 480 | compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) | ||
| 481 | { | 505 | { |
| 482 | long ret; | 506 | long ret; |
| 483 | aio_context_t ctx64; | 507 | aio_context_t ctx64; |
| @@ -496,32 +520,24 @@ compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) | |||
| 496 | return ret; | 520 | return ret; |
| 497 | } | 521 | } |
| 498 | 522 | ||
| 499 | asmlinkage long | 523 | COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, |
| 500 | compat_sys_io_getevents(aio_context_t ctx_id, | 524 | compat_long_t, min_nr, |
| 501 | unsigned long min_nr, | 525 | compat_long_t, nr, |
| 502 | unsigned long nr, | 526 | struct io_event __user *, events, |
| 503 | struct io_event __user *events, | 527 | struct compat_timespec __user *, timeout) |
| 504 | struct compat_timespec __user *timeout) | ||
| 505 | { | 528 | { |
| 506 | long ret; | ||
| 507 | struct timespec t; | 529 | struct timespec t; |
| 508 | struct timespec __user *ut = NULL; | 530 | struct timespec __user *ut = NULL; |
| 509 | 531 | ||
| 510 | ret = -EFAULT; | ||
| 511 | if (unlikely(!access_ok(VERIFY_WRITE, events, | ||
| 512 | nr * sizeof(struct io_event)))) | ||
| 513 | goto out; | ||
| 514 | if (timeout) { | 532 | if (timeout) { |
| 515 | if (get_compat_timespec(&t, timeout)) | 533 | if (compat_get_timespec(&t, timeout)) |
| 516 | goto out; | 534 | return -EFAULT; |
| 517 | 535 | ||
| 518 | ut = compat_alloc_user_space(sizeof(*ut)); | 536 | ut = compat_alloc_user_space(sizeof(*ut)); |
| 519 | if (copy_to_user(ut, &t, sizeof(t)) ) | 537 | if (copy_to_user(ut, &t, sizeof(t)) ) |
| 520 | goto out; | 538 | return -EFAULT; |
| 521 | } | 539 | } |
| 522 | ret = sys_io_getevents(ctx_id, min_nr, nr, events, ut); | 540 | return sys_io_getevents(ctx_id, min_nr, nr, events, ut); |
| 523 | out: | ||
| 524 | return ret; | ||
| 525 | } | 541 | } |
| 526 | 542 | ||
| 527 | /* A write operation does a read from user space and vice versa */ | 543 | /* A write operation does a read from user space and vice versa */ |
| @@ -617,8 +633,8 @@ copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) | |||
| 617 | 633 | ||
| 618 | #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) | 634 | #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) |
| 619 | 635 | ||
| 620 | asmlinkage long | 636 | COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, |
| 621 | compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) | 637 | int, nr, u32 __user *, iocb) |
| 622 | { | 638 | { |
| 623 | struct iocb __user * __user *iocb64; | 639 | struct iocb __user * __user *iocb64; |
| 624 | long ret; | 640 | long ret; |
| @@ -770,10 +786,10 @@ static int do_nfs4_super_data_conv(void *raw_data) | |||
| 770 | #define NCPFS_NAME "ncpfs" | 786 | #define NCPFS_NAME "ncpfs" |
| 771 | #define NFS4_NAME "nfs4" | 787 | #define NFS4_NAME "nfs4" |
| 772 | 788 | ||
| 773 | asmlinkage long compat_sys_mount(const char __user * dev_name, | 789 | COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, |
| 774 | const char __user * dir_name, | 790 | const char __user *, dir_name, |
| 775 | const char __user * type, unsigned long flags, | 791 | const char __user *, type, compat_ulong_t, flags, |
| 776 | const void __user * data) | 792 | const void __user *, data) |
| 777 | { | 793 | { |
| 778 | char *kernel_type; | 794 | char *kernel_type; |
| 779 | unsigned long data_page; | 795 | unsigned long data_page; |
| @@ -869,8 +885,8 @@ efault: | |||
| 869 | return -EFAULT; | 885 | return -EFAULT; |
| 870 | } | 886 | } |
| 871 | 887 | ||
| 872 | asmlinkage long compat_sys_old_readdir(unsigned int fd, | 888 | COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, |
| 873 | struct compat_old_linux_dirent __user *dirent, unsigned int count) | 889 | struct compat_old_linux_dirent __user *, dirent, unsigned int, count) |
| 874 | { | 890 | { |
| 875 | int error; | 891 | int error; |
| 876 | struct fd f = fdget(fd); | 892 | struct fd f = fdget(fd); |
| @@ -948,8 +964,8 @@ efault: | |||
| 948 | return -EFAULT; | 964 | return -EFAULT; |
| 949 | } | 965 | } |
| 950 | 966 | ||
| 951 | asmlinkage long compat_sys_getdents(unsigned int fd, | 967 | COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, |
| 952 | struct compat_linux_dirent __user *dirent, unsigned int count) | 968 | struct compat_linux_dirent __user *, dirent, unsigned int, count) |
| 953 | { | 969 | { |
| 954 | struct fd f; | 970 | struct fd f; |
| 955 | struct compat_linux_dirent __user * lastdirent; | 971 | struct compat_linux_dirent __user * lastdirent; |
| @@ -981,7 +997,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
| 981 | return error; | 997 | return error; |
| 982 | } | 998 | } |
| 983 | 999 | ||
| 984 | #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 | 1000 | #ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 |
| 985 | 1001 | ||
| 986 | struct compat_getdents_callback64 { | 1002 | struct compat_getdents_callback64 { |
| 987 | struct dir_context ctx; | 1003 | struct dir_context ctx; |
| @@ -1033,8 +1049,8 @@ efault: | |||
| 1033 | return -EFAULT; | 1049 | return -EFAULT; |
| 1034 | } | 1050 | } |
| 1035 | 1051 | ||
| 1036 | asmlinkage long compat_sys_getdents64(unsigned int fd, | 1052 | COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, |
| 1037 | struct linux_dirent64 __user * dirent, unsigned int count) | 1053 | struct linux_dirent64 __user *, dirent, unsigned int, count) |
| 1038 | { | 1054 | { |
| 1039 | struct fd f; | 1055 | struct fd f; |
| 1040 | struct linux_dirent64 __user * lastdirent; | 1056 | struct linux_dirent64 __user * lastdirent; |
| @@ -1066,7 +1082,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
| 1066 | fdput(f); | 1082 | fdput(f); |
| 1067 | return error; | 1083 | return error; |
| 1068 | } | 1084 | } |
| 1069 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ | 1085 | #endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ |
| 1070 | 1086 | ||
| 1071 | /* | 1087 | /* |
| 1072 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the | 1088 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the |
| @@ -1287,9 +1303,9 @@ out_nofds: | |||
| 1287 | return ret; | 1303 | return ret; |
| 1288 | } | 1304 | } |
| 1289 | 1305 | ||
| 1290 | asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, | 1306 | COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, |
| 1291 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | 1307 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
| 1292 | struct compat_timeval __user *tvp) | 1308 | struct compat_timeval __user *, tvp) |
| 1293 | { | 1309 | { |
| 1294 | struct timespec end_time, *to = NULL; | 1310 | struct timespec end_time, *to = NULL; |
| 1295 | struct compat_timeval tv; | 1311 | struct compat_timeval tv; |
| @@ -1320,7 +1336,7 @@ struct compat_sel_arg_struct { | |||
| 1320 | compat_uptr_t tvp; | 1336 | compat_uptr_t tvp; |
| 1321 | }; | 1337 | }; |
| 1322 | 1338 | ||
| 1323 | asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg) | 1339 | COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) |
| 1324 | { | 1340 | { |
| 1325 | struct compat_sel_arg_struct a; | 1341 | struct compat_sel_arg_struct a; |
| 1326 | 1342 | ||
| @@ -1381,9 +1397,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, | |||
| 1381 | return ret; | 1397 | return ret; |
| 1382 | } | 1398 | } |
| 1383 | 1399 | ||
| 1384 | asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, | 1400 | COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, |
| 1385 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | 1401 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
| 1386 | struct compat_timespec __user *tsp, void __user *sig) | 1402 | struct compat_timespec __user *, tsp, void __user *, sig) |
| 1387 | { | 1403 | { |
| 1388 | compat_size_t sigsetsize = 0; | 1404 | compat_size_t sigsetsize = 0; |
| 1389 | compat_uptr_t up = 0; | 1405 | compat_uptr_t up = 0; |
| @@ -1400,9 +1416,9 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, | |||
| 1400 | sigsetsize); | 1416 | sigsetsize); |
| 1401 | } | 1417 | } |
| 1402 | 1418 | ||
| 1403 | asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | 1419 | COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, |
| 1404 | unsigned int nfds, struct compat_timespec __user *tsp, | 1420 | unsigned int, nfds, struct compat_timespec __user *, tsp, |
| 1405 | const compat_sigset_t __user *sigmask, compat_size_t sigsetsize) | 1421 | const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) |
| 1406 | { | 1422 | { |
| 1407 | compat_sigset_t ss32; | 1423 | compat_sigset_t ss32; |
| 1408 | sigset_t ksigmask, sigsaved; | 1424 | sigset_t ksigmask, sigsaved; |
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index a81147e2e4ef..4d24d17bcfc1 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c | |||
| @@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime, | |||
| 88 | #define ELF_HWCAP COMPAT_ELF_HWCAP | 88 | #define ELF_HWCAP COMPAT_ELF_HWCAP |
| 89 | #endif | 89 | #endif |
| 90 | 90 | ||
| 91 | #ifdef COMPAT_ELF_HWCAP2 | ||
| 92 | #undef ELF_HWCAP2 | ||
| 93 | #define ELF_HWCAP2 COMPAT_ELF_HWCAP2 | ||
| 94 | #endif | ||
| 95 | |||
| 91 | #ifdef COMPAT_ARCH_DLINFO | 96 | #ifdef COMPAT_ARCH_DLINFO |
| 92 | #undef ARCH_DLINFO | 97 | #undef ARCH_DLINFO |
| 93 | #define ARCH_DLINFO COMPAT_ARCH_DLINFO | 98 | #define ARCH_DLINFO COMPAT_ARCH_DLINFO |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3881610b6438..e82289047272 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
| @@ -1538,9 +1538,10 @@ static int compat_ioctl_check_table(unsigned int xcmd) | |||
| 1538 | return ioctl_pointer[i] == xcmd; | 1538 | return ioctl_pointer[i] == xcmd; |
| 1539 | } | 1539 | } |
| 1540 | 1540 | ||
| 1541 | asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | 1541 | COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, |
| 1542 | unsigned long arg) | 1542 | compat_ulong_t, arg32) |
| 1543 | { | 1543 | { |
| 1544 | unsigned long arg = arg32; | ||
| 1544 | struct fd f = fdget(fd); | 1545 | struct fd f = fdget(fd); |
| 1545 | int error = -EBADF; | 1546 | int error = -EBADF; |
| 1546 | if (!f.file) | 1547 | if (!f.file) |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 06610cf94d57..ddcfe590b8a8 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
| @@ -195,8 +195,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i | |||
| 195 | struct page *page = NULL; | 195 | struct page *page = NULL; |
| 196 | 196 | ||
| 197 | if (blocknr + i < devsize) { | 197 | if (blocknr + i < devsize) { |
| 198 | page = read_mapping_page_async(mapping, blocknr + i, | 198 | page = read_mapping_page(mapping, blocknr + i, NULL); |
| 199 | NULL); | ||
| 200 | /* synchronous error? */ | 199 | /* synchronous error? */ |
| 201 | if (IS_ERR(page)) | 200 | if (IS_ERR(page)) |
| 202 | page = NULL; | 201 | page = NULL; |
| @@ -244,6 +243,7 @@ static void cramfs_kill_sb(struct super_block *sb) | |||
| 244 | 243 | ||
| 245 | static int cramfs_remount(struct super_block *sb, int *flags, char *data) | 244 | static int cramfs_remount(struct super_block *sb, int *flags, char *data) |
| 246 | { | 245 | { |
| 246 | sync_filesystem(sb); | ||
| 247 | *flags |= MS_RDONLY; | 247 | *flags |= MS_RDONLY; |
| 248 | return 0; | 248 | return 0; |
| 249 | } | 249 | } |
diff --git a/fs/dcache.c b/fs/dcache.c index 265e0ce9769c..66cba5a8a346 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -2483,12 +2483,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) | |||
| 2483 | dentry->d_name.name = dentry->d_iname; | 2483 | dentry->d_name.name = dentry->d_iname; |
| 2484 | } else { | 2484 | } else { |
| 2485 | /* | 2485 | /* |
| 2486 | * Both are internal. Just copy target to dentry | 2486 | * Both are internal. |
| 2487 | */ | 2487 | */ |
| 2488 | memcpy(dentry->d_iname, target->d_name.name, | 2488 | unsigned int i; |
| 2489 | target->d_name.len + 1); | 2489 | BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); |
| 2490 | dentry->d_name.len = target->d_name.len; | 2490 | for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { |
| 2491 | return; | 2491 | swap(((long *) &dentry->d_iname)[i], |
| 2492 | ((long *) &target->d_iname)[i]); | ||
| 2493 | } | ||
| 2492 | } | 2494 | } |
| 2493 | } | 2495 | } |
| 2494 | swap(dentry->d_name.len, target->d_name.len); | 2496 | swap(dentry->d_name.len, target->d_name.len); |
| @@ -2545,13 +2547,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, | |||
| 2545 | * __d_move - move a dentry | 2547 | * __d_move - move a dentry |
| 2546 | * @dentry: entry to move | 2548 | * @dentry: entry to move |
| 2547 | * @target: new dentry | 2549 | * @target: new dentry |
| 2550 | * @exchange: exchange the two dentries | ||
| 2548 | * | 2551 | * |
| 2549 | * Update the dcache to reflect the move of a file name. Negative | 2552 | * Update the dcache to reflect the move of a file name. Negative |
| 2550 | * dcache entries should not be moved in this way. Caller must hold | 2553 | * dcache entries should not be moved in this way. Caller must hold |
| 2551 | * rename_lock, the i_mutex of the source and target directories, | 2554 | * rename_lock, the i_mutex of the source and target directories, |
| 2552 | * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). | 2555 | * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). |
| 2553 | */ | 2556 | */ |
| 2554 | static void __d_move(struct dentry * dentry, struct dentry * target) | 2557 | static void __d_move(struct dentry *dentry, struct dentry *target, |
| 2558 | bool exchange) | ||
| 2555 | { | 2559 | { |
| 2556 | if (!dentry->d_inode) | 2560 | if (!dentry->d_inode) |
| 2557 | printk(KERN_WARNING "VFS: moving negative dcache entry\n"); | 2561 | printk(KERN_WARNING "VFS: moving negative dcache entry\n"); |
| @@ -2573,8 +2577,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) | |||
| 2573 | __d_drop(dentry); | 2577 | __d_drop(dentry); |
| 2574 | __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); | 2578 | __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); |
| 2575 | 2579 | ||
| 2576 | /* Unhash the target: dput() will then get rid of it */ | 2580 | /* |
| 2581 | * Unhash the target (d_delete() is not usable here). If exchanging | ||
| 2582 | * the two dentries, then rehash onto the other's hash queue. | ||
| 2583 | */ | ||
| 2577 | __d_drop(target); | 2584 | __d_drop(target); |
| 2585 | if (exchange) { | ||
| 2586 | __d_rehash(target, | ||
| 2587 | d_hash(dentry->d_parent, dentry->d_name.hash)); | ||
| 2588 | } | ||
| 2578 | 2589 | ||
| 2579 | list_del(&dentry->d_u.d_child); | 2590 | list_del(&dentry->d_u.d_child); |
| 2580 | list_del(&target->d_u.d_child); | 2591 | list_del(&target->d_u.d_child); |
| @@ -2601,6 +2612,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) | |||
| 2601 | write_seqcount_end(&dentry->d_seq); | 2612 | write_seqcount_end(&dentry->d_seq); |
| 2602 | 2613 | ||
| 2603 | dentry_unlock_parents_for_move(dentry, target); | 2614 | dentry_unlock_parents_for_move(dentry, target); |
| 2615 | if (exchange) | ||
| 2616 | fsnotify_d_move(target); | ||
| 2604 | spin_unlock(&target->d_lock); | 2617 | spin_unlock(&target->d_lock); |
| 2605 | fsnotify_d_move(dentry); | 2618 | fsnotify_d_move(dentry); |
| 2606 | spin_unlock(&dentry->d_lock); | 2619 | spin_unlock(&dentry->d_lock); |
| @@ -2618,11 +2631,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target) | |||
| 2618 | void d_move(struct dentry *dentry, struct dentry *target) | 2631 | void d_move(struct dentry *dentry, struct dentry *target) |
| 2619 | { | 2632 | { |
| 2620 | write_seqlock(&rename_lock); | 2633 | write_seqlock(&rename_lock); |
| 2621 | __d_move(dentry, target); | 2634 | __d_move(dentry, target, false); |
| 2622 | write_sequnlock(&rename_lock); | 2635 | write_sequnlock(&rename_lock); |
| 2623 | } | 2636 | } |
| 2624 | EXPORT_SYMBOL(d_move); | 2637 | EXPORT_SYMBOL(d_move); |
| 2625 | 2638 | ||
| 2639 | /* | ||
| 2640 | * d_exchange - exchange two dentries | ||
| 2641 | * @dentry1: first dentry | ||
| 2642 | * @dentry2: second dentry | ||
| 2643 | */ | ||
| 2644 | void d_exchange(struct dentry *dentry1, struct dentry *dentry2) | ||
| 2645 | { | ||
| 2646 | write_seqlock(&rename_lock); | ||
| 2647 | |||
| 2648 | WARN_ON(!dentry1->d_inode); | ||
| 2649 | WARN_ON(!dentry2->d_inode); | ||
| 2650 | WARN_ON(IS_ROOT(dentry1)); | ||
| 2651 | WARN_ON(IS_ROOT(dentry2)); | ||
| 2652 | |||
| 2653 | __d_move(dentry1, dentry2, true); | ||
| 2654 | |||
| 2655 | write_sequnlock(&rename_lock); | ||
| 2656 | } | ||
| 2657 | |||
| 2626 | /** | 2658 | /** |
| 2627 | * d_ancestor - search for an ancestor | 2659 | * d_ancestor - search for an ancestor |
| 2628 | * @p1: ancestor dentry | 2660 | * @p1: ancestor dentry |
| @@ -2670,7 +2702,7 @@ static struct dentry *__d_unalias(struct inode *inode, | |||
| 2670 | m2 = &alias->d_parent->d_inode->i_mutex; | 2702 | m2 = &alias->d_parent->d_inode->i_mutex; |
| 2671 | out_unalias: | 2703 | out_unalias: |
| 2672 | if (likely(!d_mountpoint(alias))) { | 2704 | if (likely(!d_mountpoint(alias))) { |
| 2673 | __d_move(alias, dentry); | 2705 | __d_move(alias, dentry, false); |
| 2674 | ret = alias; | 2706 | ret = alias; |
| 2675 | } | 2707 | } |
| 2676 | out_err: | 2708 | out_err: |
| @@ -2833,9 +2865,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) | |||
| 2833 | u32 dlen = ACCESS_ONCE(name->len); | 2865 | u32 dlen = ACCESS_ONCE(name->len); |
| 2834 | char *p; | 2866 | char *p; |
| 2835 | 2867 | ||
| 2836 | if (*buflen < dlen + 1) | ||
| 2837 | return -ENAMETOOLONG; | ||
| 2838 | *buflen -= dlen + 1; | 2868 | *buflen -= dlen + 1; |
| 2869 | if (*buflen < 0) | ||
| 2870 | return -ENAMETOOLONG; | ||
| 2839 | p = *buffer -= dlen + 1; | 2871 | p = *buffer -= dlen + 1; |
| 2840 | *p++ = '/'; | 2872 | *p++ = '/'; |
| 2841 | while (dlen--) { | 2873 | while (dlen--) { |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 9c0444cccbe1..8c41b52da358 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
| @@ -218,6 +218,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 218 | int err; | 218 | int err; |
| 219 | struct debugfs_fs_info *fsi = sb->s_fs_info; | 219 | struct debugfs_fs_info *fsi = sb->s_fs_info; |
| 220 | 220 | ||
| 221 | sync_filesystem(sb); | ||
| 221 | err = debugfs_parse_options(data, &fsi->mount_opts); | 222 | err = debugfs_parse_options(data, &fsi->mount_opts); |
| 222 | if (err) | 223 | if (err) |
| 223 | goto fail; | 224 | goto fail; |
| @@ -358,7 +359,7 @@ exit: | |||
| 358 | * @name: a pointer to a string containing the name of the file to create. | 359 | * @name: a pointer to a string containing the name of the file to create. |
| 359 | * @mode: the permission that the file should have. | 360 | * @mode: the permission that the file should have. |
| 360 | * @parent: a pointer to the parent dentry for this file. This should be a | 361 | * @parent: a pointer to the parent dentry for this file. This should be a |
| 361 | * directory dentry if set. If this paramater is NULL, then the | 362 | * directory dentry if set. If this parameter is NULL, then the |
| 362 | * file will be created in the root of the debugfs filesystem. | 363 | * file will be created in the root of the debugfs filesystem. |
| 363 | * @data: a pointer to something that the caller will want to get to later | 364 | * @data: a pointer to something that the caller will want to get to later |
| 364 | * on. The inode.i_private pointer will point to this value on | 365 | * on. The inode.i_private pointer will point to this value on |
| @@ -400,7 +401,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file); | |||
| 400 | * @name: a pointer to a string containing the name of the directory to | 401 | * @name: a pointer to a string containing the name of the directory to |
| 401 | * create. | 402 | * create. |
| 402 | * @parent: a pointer to the parent dentry for this file. This should be a | 403 | * @parent: a pointer to the parent dentry for this file. This should be a |
| 403 | * directory dentry if set. If this paramater is NULL, then the | 404 | * directory dentry if set. If this parameter is NULL, then the |
| 404 | * directory will be created in the root of the debugfs filesystem. | 405 | * directory will be created in the root of the debugfs filesystem. |
| 405 | * | 406 | * |
| 406 | * This function creates a directory in debugfs with the given name. | 407 | * This function creates a directory in debugfs with the given name. |
| @@ -425,7 +426,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); | |||
| 425 | * @name: a pointer to a string containing the name of the symbolic link to | 426 | * @name: a pointer to a string containing the name of the symbolic link to |
| 426 | * create. | 427 | * create. |
| 427 | * @parent: a pointer to the parent dentry for this symbolic link. This | 428 | * @parent: a pointer to the parent dentry for this symbolic link. This |
| 428 | * should be a directory dentry if set. If this paramater is NULL, | 429 | * should be a directory dentry if set. If this parameter is NULL, |
| 429 | * then the symbolic link will be created in the root of the debugfs | 430 | * then the symbolic link will be created in the root of the debugfs |
| 430 | * filesystem. | 431 | * filesystem. |
| 431 | * @target: a pointer to a string containing the path to the target of the | 432 | * @target: a pointer to a string containing the path to the target of the |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index a726b9f29cb7..c71038079b47 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
| @@ -313,6 +313,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) | |||
| 313 | struct pts_fs_info *fsi = DEVPTS_SB(sb); | 313 | struct pts_fs_info *fsi = DEVPTS_SB(sb); |
| 314 | struct pts_mount_opts *opts = &fsi->mount_opts; | 314 | struct pts_mount_opts *opts = &fsi->mount_opts; |
| 315 | 315 | ||
| 316 | sync_filesystem(sb); | ||
| 316 | err = parse_mount_options(data, PARSE_REMOUNT, opts); | 317 | err = parse_mount_options(data, PARSE_REMOUNT, opts); |
| 317 | 318 | ||
| 318 | /* | 319 | /* |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 160a5489a939..31ba0935e32e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -664,7 +664,6 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio, | |||
| 664 | goto out; | 664 | goto out; |
| 665 | sector = start_sector << (sdio->blkbits - 9); | 665 | sector = start_sector << (sdio->blkbits - 9); |
| 666 | nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev)); | 666 | nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev)); |
| 667 | nr_pages = min(nr_pages, BIO_MAX_PAGES); | ||
| 668 | BUG_ON(nr_pages <= 0); | 667 | BUG_ON(nr_pages <= 0); |
| 669 | dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); | 668 | dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); |
| 670 | sdio->boundary = 0; | 669 | sdio->boundary = 0; |
| @@ -1194,13 +1193,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1194 | } | 1193 | } |
| 1195 | 1194 | ||
| 1196 | /* | 1195 | /* |
| 1197 | * For file extending writes updating i_size before data | 1196 | * For file extending writes updating i_size before data writeouts |
| 1198 | * writeouts complete can expose uninitialized blocks. So | 1197 | * complete can expose uninitialized blocks in dumb filesystems. |
| 1199 | * even for AIO, we need to wait for i/o to complete before | 1198 | * In that case we need to wait for I/O completion even if asked |
| 1200 | * returning in this case. | 1199 | * for an asynchronous write. |
| 1201 | */ | 1200 | */ |
| 1202 | dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && | 1201 | if (is_sync_kiocb(iocb)) |
| 1203 | (end > i_size_read(inode))); | 1202 | dio->is_async = false; |
| 1203 | else if (!(dio->flags & DIO_ASYNC_EXTEND) && | ||
| 1204 | (rw & WRITE) && end > i_size_read(inode)) | ||
| 1205 | dio->is_async = false; | ||
| 1206 | else | ||
| 1207 | dio->is_async = true; | ||
| 1208 | |||
| 1204 | dio->inode = inode; | 1209 | dio->inode = inode; |
| 1205 | dio->rw = rw; | 1210 | dio->rw = rw; |
| 1206 | 1211 | ||
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 0e90f0c91b93..dcea1e37a1b7 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "dlm_internal.h" | 14 | #include "dlm_internal.h" |
| 15 | #include "lock.h" | 15 | #include "lock.h" |
| 16 | #include "user.h" | 16 | #include "user.h" |
| 17 | #include "ast.h" | ||
| 17 | 18 | ||
| 18 | static uint64_t dlm_cb_seq; | 19 | static uint64_t dlm_cb_seq; |
| 19 | static DEFINE_SPINLOCK(dlm_cb_seq_spin); | 20 | static DEFINE_SPINLOCK(dlm_cb_seq_spin); |
| @@ -308,6 +309,6 @@ void dlm_callback_resume(struct dlm_ls *ls) | |||
| 308 | mutex_unlock(&ls->ls_cb_mutex); | 309 | mutex_unlock(&ls->ls_cb_mutex); |
| 309 | 310 | ||
| 310 | if (count) | 311 | if (count) |
| 311 | log_debug(ls, "dlm_callback_resume %d", count); | 312 | log_rinfo(ls, "dlm_callback_resume %d", count); |
| 312 | } | 313 | } |
| 313 | 314 | ||
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 278a75cda446..d975851a7e1e 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
| @@ -68,7 +68,7 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
| 68 | uint16_t namelen; | 68 | uint16_t namelen; |
| 69 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; | 69 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; |
| 70 | 70 | ||
| 71 | log_debug(ls, "dlm_recover_directory"); | 71 | log_rinfo(ls, "dlm_recover_directory"); |
| 72 | 72 | ||
| 73 | if (dlm_no_directory(ls)) | 73 | if (dlm_no_directory(ls)) |
| 74 | goto out_status; | 74 | goto out_status; |
| @@ -189,7 +189,7 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
| 189 | error = 0; | 189 | error = 0; |
| 190 | dlm_set_recover_status(ls, DLM_RS_DIR); | 190 | dlm_set_recover_status(ls, DLM_RS_DIR); |
| 191 | 191 | ||
| 192 | log_debug(ls, "dlm_recover_directory %u in %u new", | 192 | log_rinfo(ls, "dlm_recover_directory %u in %u new", |
| 193 | count, count_add); | 193 | count, count_add); |
| 194 | out_free: | 194 | out_free: |
| 195 | kfree(last_name); | 195 | kfree(last_name); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index e7665c31f7b1..5eff6ea3e27f 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
| @@ -65,6 +65,8 @@ struct dlm_mhandle; | |||
| 65 | printk(KERN_ERR "dlm: "fmt"\n" , ##args) | 65 | printk(KERN_ERR "dlm: "fmt"\n" , ##args) |
| 66 | #define log_error(ls, fmt, args...) \ | 66 | #define log_error(ls, fmt, args...) \ |
| 67 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) | 67 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) |
| 68 | #define log_rinfo(ls, fmt, args...) \ | ||
| 69 | printk(KERN_INFO "dlm: %s: " fmt "\n", (ls)->ls_name , ##args); | ||
| 68 | 70 | ||
| 69 | #define log_debug(ls, fmt, args...) \ | 71 | #define log_debug(ls, fmt, args...) \ |
| 70 | do { \ | 72 | do { \ |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index e223a911a834..83f3d5520307 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
| @@ -687,6 +687,7 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, | |||
| 687 | log_error(ls, "find_rsb new from_other %d dir %d our %d %s", | 687 | log_error(ls, "find_rsb new from_other %d dir %d our %d %s", |
| 688 | from_nodeid, dir_nodeid, our_nodeid, r->res_name); | 688 | from_nodeid, dir_nodeid, our_nodeid, r->res_name); |
| 689 | dlm_free_rsb(r); | 689 | dlm_free_rsb(r); |
| 690 | r = NULL; | ||
| 690 | error = -ENOTBLK; | 691 | error = -ENOTBLK; |
| 691 | goto out_unlock; | 692 | goto out_unlock; |
| 692 | } | 693 | } |
| @@ -5462,7 +5463,7 @@ void dlm_recover_purge(struct dlm_ls *ls) | |||
| 5462 | up_write(&ls->ls_root_sem); | 5463 | up_write(&ls->ls_root_sem); |
| 5463 | 5464 | ||
| 5464 | if (lkb_count) | 5465 | if (lkb_count) |
| 5465 | log_debug(ls, "dlm_recover_purge %u locks for %u nodes", | 5466 | log_rinfo(ls, "dlm_recover_purge %u locks for %u nodes", |
| 5466 | lkb_count, nodes_count); | 5467 | lkb_count, nodes_count); |
| 5467 | } | 5468 | } |
| 5468 | 5469 | ||
| @@ -5536,7 +5537,7 @@ void dlm_recover_grant(struct dlm_ls *ls) | |||
| 5536 | } | 5537 | } |
| 5537 | 5538 | ||
| 5538 | if (lkb_count) | 5539 | if (lkb_count) |
| 5539 | log_debug(ls, "dlm_recover_grant %u locks on %u resources", | 5540 | log_rinfo(ls, "dlm_recover_grant %u locks on %u resources", |
| 5540 | lkb_count, rsb_count); | 5541 | lkb_count, rsb_count); |
| 5541 | } | 5542 | } |
| 5542 | 5543 | ||
| @@ -5695,7 +5696,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
| 5695 | put_rsb(r); | 5696 | put_rsb(r); |
| 5696 | out: | 5697 | out: |
| 5697 | if (error && error != -EEXIST) | 5698 | if (error && error != -EEXIST) |
| 5698 | log_debug(ls, "dlm_recover_master_copy remote %d %x error %d", | 5699 | log_rinfo(ls, "dlm_recover_master_copy remote %d %x error %d", |
| 5699 | from_nodeid, remid, error); | 5700 | from_nodeid, remid, error); |
| 5700 | rl->rl_result = cpu_to_le32(error); | 5701 | rl->rl_result = cpu_to_le32(error); |
| 5701 | return error; | 5702 | return error; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index d5abafd56a6d..04d6398c1f1c 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
| @@ -190,7 +190,7 @@ static int do_uevent(struct dlm_ls *ls, int in) | |||
| 190 | else | 190 | else |
| 191 | kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); | 191 | kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); |
| 192 | 192 | ||
| 193 | log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving"); | 193 | log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving"); |
| 194 | 194 | ||
| 195 | /* dlm_controld will see the uevent, do the necessary group management | 195 | /* dlm_controld will see the uevent, do the necessary group management |
| 196 | and then write to sysfs to wake us */ | 196 | and then write to sysfs to wake us */ |
| @@ -198,7 +198,7 @@ static int do_uevent(struct dlm_ls *ls, int in) | |||
| 198 | error = wait_event_interruptible(ls->ls_uevent_wait, | 198 | error = wait_event_interruptible(ls->ls_uevent_wait, |
| 199 | test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); | 199 | test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); |
| 200 | 200 | ||
| 201 | log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result); | 201 | log_rinfo(ls, "group event done %d %d", error, ls->ls_uevent_result); |
| 202 | 202 | ||
| 203 | if (error) | 203 | if (error) |
| 204 | goto out; | 204 | goto out; |
| @@ -640,7 +640,7 @@ static int new_lockspace(const char *name, const char *cluster, | |||
| 640 | 640 | ||
| 641 | dlm_create_debug_file(ls); | 641 | dlm_create_debug_file(ls); |
| 642 | 642 | ||
| 643 | log_debug(ls, "join complete"); | 643 | log_rinfo(ls, "join complete"); |
| 644 | *lockspace = ls; | 644 | *lockspace = ls; |
| 645 | return 0; | 645 | return 0; |
| 646 | 646 | ||
| @@ -835,7 +835,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
| 835 | dlm_clear_members(ls); | 835 | dlm_clear_members(ls); |
| 836 | dlm_clear_members_gone(ls); | 836 | dlm_clear_members_gone(ls); |
| 837 | kfree(ls->ls_node_array); | 837 | kfree(ls->ls_node_array); |
| 838 | log_debug(ls, "release_lockspace final free"); | 838 | log_rinfo(ls, "release_lockspace final free"); |
| 839 | kobject_put(&ls->ls_kobj); | 839 | kobject_put(&ls->ls_kobj); |
| 840 | /* The ls structure will be freed when the kobject is done with */ | 840 | /* The ls structure will be freed when the kobject is done with */ |
| 841 | 841 | ||
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 476557b54921..9c47f1c14a8b 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
| @@ -60,18 +60,15 @@ void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
| 60 | 60 | ||
| 61 | #define SLOT_DEBUG_LINE 128 | 61 | #define SLOT_DEBUG_LINE 128 |
| 62 | 62 | ||
| 63 | static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, | 63 | static void log_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, |
| 64 | struct rcom_slot *ro0, struct dlm_slot *array, | 64 | struct rcom_slot *ro0, struct dlm_slot *array, |
| 65 | int array_size) | 65 | int array_size) |
| 66 | { | 66 | { |
| 67 | char line[SLOT_DEBUG_LINE]; | 67 | char line[SLOT_DEBUG_LINE]; |
| 68 | int len = SLOT_DEBUG_LINE - 1; | 68 | int len = SLOT_DEBUG_LINE - 1; |
| 69 | int pos = 0; | 69 | int pos = 0; |
| 70 | int ret, i; | 70 | int ret, i; |
| 71 | 71 | ||
| 72 | if (!dlm_config.ci_log_debug) | ||
| 73 | return; | ||
| 74 | |||
| 75 | memset(line, 0, sizeof(line)); | 72 | memset(line, 0, sizeof(line)); |
| 76 | 73 | ||
| 77 | if (array) { | 74 | if (array) { |
| @@ -95,7 +92,7 @@ static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, | |||
| 95 | } | 92 | } |
| 96 | } | 93 | } |
| 97 | 94 | ||
| 98 | log_debug(ls, "generation %u slots %d%s", gen, num_slots, line); | 95 | log_rinfo(ls, "generation %u slots %d%s", gen, num_slots, line); |
| 99 | } | 96 | } |
| 100 | 97 | ||
| 101 | int dlm_slots_copy_in(struct dlm_ls *ls) | 98 | int dlm_slots_copy_in(struct dlm_ls *ls) |
| @@ -129,7 +126,7 @@ int dlm_slots_copy_in(struct dlm_ls *ls) | |||
| 129 | ro->ro_slot = le16_to_cpu(ro->ro_slot); | 126 | ro->ro_slot = le16_to_cpu(ro->ro_slot); |
| 130 | } | 127 | } |
| 131 | 128 | ||
| 132 | log_debug_slots(ls, gen, num_slots, ro0, NULL, 0); | 129 | log_slots(ls, gen, num_slots, ro0, NULL, 0); |
| 133 | 130 | ||
| 134 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 131 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
| 135 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { | 132 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { |
| @@ -274,7 +271,7 @@ int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, | |||
| 274 | 271 | ||
| 275 | gen++; | 272 | gen++; |
| 276 | 273 | ||
| 277 | log_debug_slots(ls, gen, num, NULL, array, array_size); | 274 | log_slots(ls, gen, num, NULL, array, array_size); |
| 278 | 275 | ||
| 279 | max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - | 276 | max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - |
| 280 | sizeof(struct rcom_config)) / sizeof(struct rcom_slot); | 277 | sizeof(struct rcom_config)) / sizeof(struct rcom_slot); |
| @@ -447,7 +444,7 @@ static int ping_members(struct dlm_ls *ls) | |||
| 447 | break; | 444 | break; |
| 448 | } | 445 | } |
| 449 | if (error) | 446 | if (error) |
| 450 | log_debug(ls, "ping_members aborted %d last nodeid %d", | 447 | log_rinfo(ls, "ping_members aborted %d last nodeid %d", |
| 451 | error, ls->ls_recover_nodeid); | 448 | error, ls->ls_recover_nodeid); |
| 452 | return error; | 449 | return error; |
| 453 | } | 450 | } |
| @@ -539,7 +536,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
| 539 | count as a negative change so the "neg" recovery steps will happen */ | 536 | count as a negative change so the "neg" recovery steps will happen */ |
| 540 | 537 | ||
| 541 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { | 538 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { |
| 542 | log_debug(ls, "prev removed member %d", memb->nodeid); | 539 | log_rinfo(ls, "prev removed member %d", memb->nodeid); |
| 543 | neg++; | 540 | neg++; |
| 544 | } | 541 | } |
| 545 | 542 | ||
| @@ -551,10 +548,10 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
| 551 | continue; | 548 | continue; |
| 552 | 549 | ||
| 553 | if (!node) { | 550 | if (!node) { |
| 554 | log_debug(ls, "remove member %d", memb->nodeid); | 551 | log_rinfo(ls, "remove member %d", memb->nodeid); |
| 555 | } else { | 552 | } else { |
| 556 | /* removed and re-added */ | 553 | /* removed and re-added */ |
| 557 | log_debug(ls, "remove member %d comm_seq %u %u", | 554 | log_rinfo(ls, "remove member %d comm_seq %u %u", |
| 558 | memb->nodeid, memb->comm_seq, node->comm_seq); | 555 | memb->nodeid, memb->comm_seq, node->comm_seq); |
| 559 | } | 556 | } |
| 560 | 557 | ||
| @@ -571,7 +568,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
| 571 | if (dlm_is_member(ls, node->nodeid)) | 568 | if (dlm_is_member(ls, node->nodeid)) |
| 572 | continue; | 569 | continue; |
| 573 | dlm_add_member(ls, node); | 570 | dlm_add_member(ls, node); |
| 574 | log_debug(ls, "add member %d", node->nodeid); | 571 | log_rinfo(ls, "add member %d", node->nodeid); |
| 575 | } | 572 | } |
| 576 | 573 | ||
| 577 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 574 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
| @@ -591,7 +588,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
| 591 | complete(&ls->ls_members_done); | 588 | complete(&ls->ls_members_done); |
| 592 | } | 589 | } |
| 593 | 590 | ||
| 594 | log_debug(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); | 591 | log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); |
| 595 | return error; | 592 | return error; |
| 596 | } | 593 | } |
| 597 | 594 | ||
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index a6bc63f6e31b..eaea789bf97d 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
| @@ -526,7 +526,7 @@ int dlm_recover_masters(struct dlm_ls *ls) | |||
| 526 | int nodir = dlm_no_directory(ls); | 526 | int nodir = dlm_no_directory(ls); |
| 527 | int error; | 527 | int error; |
| 528 | 528 | ||
| 529 | log_debug(ls, "dlm_recover_masters"); | 529 | log_rinfo(ls, "dlm_recover_masters"); |
| 530 | 530 | ||
| 531 | down_read(&ls->ls_root_sem); | 531 | down_read(&ls->ls_root_sem); |
| 532 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 532 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
| @@ -552,7 +552,7 @@ int dlm_recover_masters(struct dlm_ls *ls) | |||
| 552 | } | 552 | } |
| 553 | up_read(&ls->ls_root_sem); | 553 | up_read(&ls->ls_root_sem); |
| 554 | 554 | ||
| 555 | log_debug(ls, "dlm_recover_masters %u of %u", count, total); | 555 | log_rinfo(ls, "dlm_recover_masters %u of %u", count, total); |
| 556 | 556 | ||
| 557 | error = dlm_wait_function(ls, &recover_idr_empty); | 557 | error = dlm_wait_function(ls, &recover_idr_empty); |
| 558 | out: | 558 | out: |
| @@ -685,7 +685,7 @@ int dlm_recover_locks(struct dlm_ls *ls) | |||
| 685 | } | 685 | } |
| 686 | up_read(&ls->ls_root_sem); | 686 | up_read(&ls->ls_root_sem); |
| 687 | 687 | ||
| 688 | log_debug(ls, "dlm_recover_locks %d out", count); | 688 | log_rinfo(ls, "dlm_recover_locks %d out", count); |
| 689 | 689 | ||
| 690 | error = dlm_wait_function(ls, &recover_list_empty); | 690 | error = dlm_wait_function(ls, &recover_list_empty); |
| 691 | out: | 691 | out: |
| @@ -883,7 +883,7 @@ void dlm_recover_rsbs(struct dlm_ls *ls) | |||
| 883 | up_read(&ls->ls_root_sem); | 883 | up_read(&ls->ls_root_sem); |
| 884 | 884 | ||
| 885 | if (count) | 885 | if (count) |
| 886 | log_debug(ls, "dlm_recover_rsbs %d done", count); | 886 | log_rinfo(ls, "dlm_recover_rsbs %d done", count); |
| 887 | } | 887 | } |
| 888 | 888 | ||
| 889 | /* Create a single list of all root rsb's to be used during recovery */ | 889 | /* Create a single list of all root rsb's to be used during recovery */ |
| @@ -950,6 +950,6 @@ void dlm_clear_toss(struct dlm_ls *ls) | |||
| 950 | } | 950 | } |
| 951 | 951 | ||
| 952 | if (count) | 952 | if (count) |
| 953 | log_debug(ls, "dlm_clear_toss %u done", count); | 953 | log_rinfo(ls, "dlm_clear_toss %u done", count); |
| 954 | } | 954 | } |
| 955 | 955 | ||
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 32f9f8926ec3..6859b4bf971e 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
| @@ -55,7 +55,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 55 | unsigned long start; | 55 | unsigned long start; |
| 56 | int error, neg = 0; | 56 | int error, neg = 0; |
| 57 | 57 | ||
| 58 | log_debug(ls, "dlm_recover %llu", (unsigned long long)rv->seq); | 58 | log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq); |
| 59 | 59 | ||
| 60 | mutex_lock(&ls->ls_recoverd_active); | 60 | mutex_lock(&ls->ls_recoverd_active); |
| 61 | 61 | ||
| @@ -76,7 +76,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 76 | 76 | ||
| 77 | error = dlm_recover_members(ls, rv, &neg); | 77 | error = dlm_recover_members(ls, rv, &neg); |
| 78 | if (error) { | 78 | if (error) { |
| 79 | log_debug(ls, "dlm_recover_members error %d", error); | 79 | log_rinfo(ls, "dlm_recover_members error %d", error); |
| 80 | goto fail; | 80 | goto fail; |
| 81 | } | 81 | } |
| 82 | 82 | ||
| @@ -90,7 +90,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 90 | 90 | ||
| 91 | error = dlm_recover_members_wait(ls); | 91 | error = dlm_recover_members_wait(ls); |
| 92 | if (error) { | 92 | if (error) { |
| 93 | log_debug(ls, "dlm_recover_members_wait error %d", error); | 93 | log_rinfo(ls, "dlm_recover_members_wait error %d", error); |
| 94 | goto fail; | 94 | goto fail; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| @@ -103,7 +103,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 103 | 103 | ||
| 104 | error = dlm_recover_directory(ls); | 104 | error = dlm_recover_directory(ls); |
| 105 | if (error) { | 105 | if (error) { |
| 106 | log_debug(ls, "dlm_recover_directory error %d", error); | 106 | log_rinfo(ls, "dlm_recover_directory error %d", error); |
| 107 | goto fail; | 107 | goto fail; |
| 108 | } | 108 | } |
| 109 | 109 | ||
| @@ -111,11 +111,11 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 111 | 111 | ||
| 112 | error = dlm_recover_directory_wait(ls); | 112 | error = dlm_recover_directory_wait(ls); |
| 113 | if (error) { | 113 | if (error) { |
| 114 | log_debug(ls, "dlm_recover_directory_wait error %d", error); | 114 | log_rinfo(ls, "dlm_recover_directory_wait error %d", error); |
| 115 | goto fail; | 115 | goto fail; |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | log_debug(ls, "dlm_recover_directory %u out %u messages", | 118 | log_rinfo(ls, "dlm_recover_directory %u out %u messages", |
| 119 | ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); | 119 | ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); |
| 120 | 120 | ||
| 121 | /* | 121 | /* |
| @@ -144,7 +144,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 144 | 144 | ||
| 145 | error = dlm_recover_masters(ls); | 145 | error = dlm_recover_masters(ls); |
| 146 | if (error) { | 146 | if (error) { |
| 147 | log_debug(ls, "dlm_recover_masters error %d", error); | 147 | log_rinfo(ls, "dlm_recover_masters error %d", error); |
| 148 | goto fail; | 148 | goto fail; |
| 149 | } | 149 | } |
| 150 | 150 | ||
| @@ -154,7 +154,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 154 | 154 | ||
| 155 | error = dlm_recover_locks(ls); | 155 | error = dlm_recover_locks(ls); |
| 156 | if (error) { | 156 | if (error) { |
| 157 | log_debug(ls, "dlm_recover_locks error %d", error); | 157 | log_rinfo(ls, "dlm_recover_locks error %d", error); |
| 158 | goto fail; | 158 | goto fail; |
| 159 | } | 159 | } |
| 160 | 160 | ||
| @@ -162,11 +162,11 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 162 | 162 | ||
| 163 | error = dlm_recover_locks_wait(ls); | 163 | error = dlm_recover_locks_wait(ls); |
| 164 | if (error) { | 164 | if (error) { |
| 165 | log_debug(ls, "dlm_recover_locks_wait error %d", error); | 165 | log_rinfo(ls, "dlm_recover_locks_wait error %d", error); |
| 166 | goto fail; | 166 | goto fail; |
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | log_debug(ls, "dlm_recover_locks %u in", | 169 | log_rinfo(ls, "dlm_recover_locks %u in", |
| 170 | ls->ls_recover_locks_in); | 170 | ls->ls_recover_locks_in); |
| 171 | 171 | ||
| 172 | /* | 172 | /* |
| @@ -186,7 +186,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 186 | 186 | ||
| 187 | error = dlm_recover_locks_wait(ls); | 187 | error = dlm_recover_locks_wait(ls); |
| 188 | if (error) { | 188 | if (error) { |
| 189 | log_debug(ls, "dlm_recover_locks_wait error %d", error); | 189 | log_rinfo(ls, "dlm_recover_locks_wait error %d", error); |
| 190 | goto fail; | 190 | goto fail; |
| 191 | } | 191 | } |
| 192 | } | 192 | } |
| @@ -205,7 +205,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 205 | 205 | ||
| 206 | error = dlm_recover_done_wait(ls); | 206 | error = dlm_recover_done_wait(ls); |
| 207 | if (error) { | 207 | if (error) { |
| 208 | log_debug(ls, "dlm_recover_done_wait error %d", error); | 208 | log_rinfo(ls, "dlm_recover_done_wait error %d", error); |
| 209 | goto fail; | 209 | goto fail; |
| 210 | } | 210 | } |
| 211 | 211 | ||
| @@ -217,25 +217,25 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 217 | 217 | ||
| 218 | error = enable_locking(ls, rv->seq); | 218 | error = enable_locking(ls, rv->seq); |
| 219 | if (error) { | 219 | if (error) { |
| 220 | log_debug(ls, "enable_locking error %d", error); | 220 | log_rinfo(ls, "enable_locking error %d", error); |
| 221 | goto fail; | 221 | goto fail; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | error = dlm_process_requestqueue(ls); | 224 | error = dlm_process_requestqueue(ls); |
| 225 | if (error) { | 225 | if (error) { |
| 226 | log_debug(ls, "dlm_process_requestqueue error %d", error); | 226 | log_rinfo(ls, "dlm_process_requestqueue error %d", error); |
| 227 | goto fail; | 227 | goto fail; |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | error = dlm_recover_waiters_post(ls); | 230 | error = dlm_recover_waiters_post(ls); |
| 231 | if (error) { | 231 | if (error) { |
| 232 | log_debug(ls, "dlm_recover_waiters_post error %d", error); | 232 | log_rinfo(ls, "dlm_recover_waiters_post error %d", error); |
| 233 | goto fail; | 233 | goto fail; |
| 234 | } | 234 | } |
| 235 | 235 | ||
| 236 | dlm_recover_grant(ls); | 236 | dlm_recover_grant(ls); |
| 237 | 237 | ||
| 238 | log_debug(ls, "dlm_recover %llu generation %u done: %u ms", | 238 | log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms", |
| 239 | (unsigned long long)rv->seq, ls->ls_generation, | 239 | (unsigned long long)rv->seq, ls->ls_generation, |
| 240 | jiffies_to_msecs(jiffies - start)); | 240 | jiffies_to_msecs(jiffies - start)); |
| 241 | mutex_unlock(&ls->ls_recoverd_active); | 241 | mutex_unlock(&ls->ls_recoverd_active); |
| @@ -245,7 +245,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
| 245 | 245 | ||
| 246 | fail: | 246 | fail: |
| 247 | dlm_release_root_list(ls); | 247 | dlm_release_root_list(ls); |
| 248 | log_debug(ls, "dlm_recover %llu error %d", | 248 | log_rinfo(ls, "dlm_recover %llu error %d", |
| 249 | (unsigned long long)rv->seq, error); | 249 | (unsigned long long)rv->seq, error); |
| 250 | mutex_unlock(&ls->ls_recoverd_active); | 250 | mutex_unlock(&ls->ls_recoverd_active); |
| 251 | return error; | 251 | return error; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 9fd702f5bfb2..9280202e488c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
| @@ -59,10 +59,22 @@ int drop_caches_sysctl_handler(ctl_table *table, int write, | |||
| 59 | if (ret) | 59 | if (ret) |
| 60 | return ret; | 60 | return ret; |
| 61 | if (write) { | 61 | if (write) { |
| 62 | if (sysctl_drop_caches & 1) | 62 | static int stfu; |
| 63 | |||
| 64 | if (sysctl_drop_caches & 1) { | ||
| 63 | iterate_supers(drop_pagecache_sb, NULL); | 65 | iterate_supers(drop_pagecache_sb, NULL); |
| 64 | if (sysctl_drop_caches & 2) | 66 | count_vm_event(DROP_PAGECACHE); |
| 67 | } | ||
| 68 | if (sysctl_drop_caches & 2) { | ||
| 65 | drop_slab(); | 69 | drop_slab(); |
| 70 | count_vm_event(DROP_SLAB); | ||
| 71 | } | ||
| 72 | if (!stfu) { | ||
| 73 | pr_info("%s (%d): drop_caches: %d\n", | ||
| 74 | current->comm, task_pid_nr(current), | ||
| 75 | sysctl_drop_caches); | ||
| 76 | } | ||
| 77 | stfu |= sysctl_drop_caches & 4; | ||
| 66 | } | 78 | } |
| 67 | return 0; | 79 | return 0; |
| 68 | } | 80 | } |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index b167ca48b8ee..d4a9431ec73c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
| @@ -641,7 +641,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 641 | } | 641 | } |
| 642 | rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, | 642 | rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, |
| 643 | lower_new_dir_dentry->d_inode, lower_new_dentry, | 643 | lower_new_dir_dentry->d_inode, lower_new_dentry, |
| 644 | NULL); | 644 | NULL, 0); |
| 645 | if (rc) | 645 | if (rc) |
| 646 | goto out_lock; | 646 | goto out_lock; |
| 647 | if (target_inode) | 647 | if (target_inode) |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index e879cf8ff0b1..afa1b81c3418 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
| @@ -132,7 +132,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 132 | */ | 132 | */ |
| 133 | static void ecryptfs_evict_inode(struct inode *inode) | 133 | static void ecryptfs_evict_inode(struct inode *inode) |
| 134 | { | 134 | { |
| 135 | truncate_inode_pages(&inode->i_data, 0); | 135 | truncate_inode_pages_final(&inode->i_data); |
| 136 | clear_inode(inode); | 136 | clear_inode(inode); |
| 137 | iput(ecryptfs_inode_to_lower(inode)); | 137 | iput(ecryptfs_inode_to_lower(inode)); |
| 138 | } | 138 | } |
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 8dd524f32284..cdb2971192a5 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c | |||
| @@ -21,7 +21,7 @@ static ssize_t efivarfs_file_write(struct file *file, | |||
| 21 | u32 attributes; | 21 | u32 attributes; |
| 22 | struct inode *inode = file->f_mapping->host; | 22 | struct inode *inode = file->f_mapping->host; |
| 23 | unsigned long datasize = count - sizeof(attributes); | 23 | unsigned long datasize = count - sizeof(attributes); |
| 24 | ssize_t bytes = 0; | 24 | ssize_t bytes; |
| 25 | bool set = false; | 25 | bool set = false; |
| 26 | 26 | ||
| 27 | if (count < sizeof(attributes)) | 27 | if (count < sizeof(attributes)) |
| @@ -33,14 +33,9 @@ static ssize_t efivarfs_file_write(struct file *file, | |||
| 33 | if (attributes & ~(EFI_VARIABLE_MASK)) | 33 | if (attributes & ~(EFI_VARIABLE_MASK)) |
| 34 | return -EINVAL; | 34 | return -EINVAL; |
| 35 | 35 | ||
| 36 | data = kmalloc(datasize, GFP_KERNEL); | 36 | data = memdup_user(userbuf + sizeof(attributes), datasize); |
| 37 | if (!data) | 37 | if (IS_ERR(data)) |
| 38 | return -ENOMEM; | 38 | return PTR_ERR(data); |
| 39 | |||
| 40 | if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) { | ||
| 41 | bytes = -EFAULT; | ||
| 42 | goto out; | ||
| 43 | } | ||
| 44 | 39 | ||
| 45 | bytes = efivar_entry_set_get_size(var, attributes, &datasize, | 40 | bytes = efivar_entry_set_get_size(var, attributes, &datasize, |
| 46 | data, &set); | 41 | data, &set); |
diff --git a/fs/efs/super.c b/fs/efs/super.c index 50215bbd6463..3befcc9f5d63 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
| @@ -91,7 +91,7 @@ static void init_once(void *foo) | |||
| 91 | inode_init_once(&ei->vfs_inode); | 91 | inode_init_once(&ei->vfs_inode); |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | static int init_inodecache(void) | 94 | static int __init init_inodecache(void) |
| 95 | { | 95 | { |
| 96 | efs_inode_cachep = kmem_cache_create("efs_inode_cache", | 96 | efs_inode_cachep = kmem_cache_create("efs_inode_cache", |
| 97 | sizeof(struct efs_inode_info), | 97 | sizeof(struct efs_inode_info), |
| @@ -114,6 +114,7 @@ static void destroy_inodecache(void) | |||
| 114 | 114 | ||
| 115 | static int efs_remount(struct super_block *sb, int *flags, char *data) | 115 | static int efs_remount(struct super_block *sb, int *flags, char *data) |
| 116 | { | 116 | { |
| 117 | sync_filesystem(sb); | ||
| 117 | *flags |= MS_RDONLY; | 118 | *flags |= MS_RDONLY; |
| 118 | return 0; | 119 | return 0; |
| 119 | } | 120 | } |
| @@ -97,6 +97,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt) | |||
| 97 | module_put(fmt->module); | 97 | module_put(fmt->module); |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | #ifdef CONFIG_USELIB | ||
| 100 | /* | 101 | /* |
| 101 | * Note that a shared library must be both readable and executable due to | 102 | * Note that a shared library must be both readable and executable due to |
| 102 | * security reasons. | 103 | * security reasons. |
| @@ -156,6 +157,7 @@ exit: | |||
| 156 | out: | 157 | out: |
| 157 | return error; | 158 | return error; |
| 158 | } | 159 | } |
| 160 | #endif /* #ifdef CONFIG_USELIB */ | ||
| 159 | 161 | ||
| 160 | #ifdef CONFIG_MMU | 162 | #ifdef CONFIG_MMU |
| 161 | /* | 163 | /* |
| @@ -1619,9 +1621,9 @@ SYSCALL_DEFINE3(execve, | |||
| 1619 | return do_execve(getname(filename), argv, envp); | 1621 | return do_execve(getname(filename), argv, envp); |
| 1620 | } | 1622 | } |
| 1621 | #ifdef CONFIG_COMPAT | 1623 | #ifdef CONFIG_COMPAT |
| 1622 | asmlinkage long compat_sys_execve(const char __user * filename, | 1624 | COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, |
| 1623 | const compat_uptr_t __user * argv, | 1625 | const compat_uptr_t __user *, argv, |
| 1624 | const compat_uptr_t __user * envp) | 1626 | const compat_uptr_t __user *, envp) |
| 1625 | { | 1627 | { |
| 1626 | return compat_do_execve(getname(filename), argv, envp); | 1628 | return compat_do_execve(getname(filename), argv, envp); |
| 1627 | } | 1629 | } |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ee4317faccb1..d1c244d67667 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
| @@ -1486,7 +1486,7 @@ void exofs_evict_inode(struct inode *inode) | |||
| 1486 | struct ore_io_state *ios; | 1486 | struct ore_io_state *ios; |
| 1487 | int ret; | 1487 | int ret; |
| 1488 | 1488 | ||
| 1489 | truncate_inode_pages(&inode->i_data, 0); | 1489 | truncate_inode_pages_final(&inode->i_data); |
| 1490 | 1490 | ||
| 1491 | /* TODO: should do better here */ | 1491 | /* TODO: should do better here */ |
| 1492 | if (inode->i_nlink || is_bad_inode(inode)) | 1492 | if (inode->i_nlink || is_bad_inode(inode)) |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 94ed36849b71..b1d2a4675d42 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
| @@ -78,7 +78,7 @@ void ext2_evict_inode(struct inode * inode) | |||
| 78 | dquot_drop(inode); | 78 | dquot_drop(inode); |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | truncate_inode_pages(&inode->i_data, 0); | 81 | truncate_inode_pages_final(&inode->i_data); |
| 82 | 82 | ||
| 83 | if (want_delete) { | 83 | if (want_delete) { |
| 84 | sb_start_intwrite(inode->i_sb); | 84 | sb_start_intwrite(inode->i_sb); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 20d6697bd638..d260115c0350 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
| @@ -1254,6 +1254,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
| 1254 | unsigned long old_sb_flags; | 1254 | unsigned long old_sb_flags; |
| 1255 | int err; | 1255 | int err; |
| 1256 | 1256 | ||
| 1257 | sync_filesystem(sb); | ||
| 1257 | spin_lock(&sbi->s_lock); | 1258 | spin_lock(&sbi->s_lock); |
| 1258 | 1259 | ||
| 1259 | /* Store the old options */ | 1260 | /* Store the old options */ |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 384b6ebb655f..efce2bbfb5e5 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -228,7 +228,7 @@ void ext3_evict_inode (struct inode *inode) | |||
| 228 | log_wait_commit(journal, commit_tid); | 228 | log_wait_commit(journal, commit_tid); |
| 229 | filemap_write_and_wait(&inode->i_data); | 229 | filemap_write_and_wait(&inode->i_data); |
| 230 | } | 230 | } |
| 231 | truncate_inode_pages(&inode->i_data, 0); | 231 | truncate_inode_pages_final(&inode->i_data); |
| 232 | 232 | ||
| 233 | ext3_discard_reservation(inode); | 233 | ext3_discard_reservation(inode); |
| 234 | rsv = ei->i_block_alloc_info; | 234 | rsv = ei->i_block_alloc_info; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 37fd31ed16e7..95c6c5a6d0c5 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -2649,6 +2649,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
| 2649 | int i; | 2649 | int i; |
| 2650 | #endif | 2650 | #endif |
| 2651 | 2651 | ||
| 2652 | sync_filesystem(sb); | ||
| 2653 | |||
| 2652 | /* Store the original options */ | 2654 | /* Store the original options */ |
| 2653 | old_sb_flags = sb->s_flags; | 2655 | old_sb_flags = sb->s_flags; |
| 2654 | old_opts.s_mount_opt = sbi->s_mount_opt; | 2656 | old_opts.s_mount_opt = sbi->s_mount_opt; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d3a534fdc5ff..f1c65dc7cc0a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/percpu_counter.h> | 31 | #include <linux/percpu_counter.h> |
| 32 | #include <linux/ratelimit.h> | 32 | #include <linux/ratelimit.h> |
| 33 | #include <crypto/hash.h> | 33 | #include <crypto/hash.h> |
| 34 | #include <linux/falloc.h> | ||
| 34 | #ifdef __KERNEL__ | 35 | #ifdef __KERNEL__ |
| 35 | #include <linux/compat.h> | 36 | #include <linux/compat.h> |
| 36 | #endif | 37 | #endif |
| @@ -567,6 +568,8 @@ enum { | |||
| 567 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 | 568 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 |
| 568 | /* Do not put hole in extent cache */ | 569 | /* Do not put hole in extent cache */ |
| 569 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 | 570 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 |
| 571 | /* Convert written extents to unwritten */ | ||
| 572 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400 | ||
| 570 | 573 | ||
| 571 | /* | 574 | /* |
| 572 | * The bit position of these flags must not overlap with any of the | 575 | * The bit position of these flags must not overlap with any of the |
| @@ -998,6 +1001,8 @@ struct ext4_inode_info { | |||
| 998 | #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group | 1001 | #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group |
| 999 | size of blocksize * 8 | 1002 | size of blocksize * 8 |
| 1000 | blocks */ | 1003 | blocks */ |
| 1004 | #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated | ||
| 1005 | file systems */ | ||
| 1001 | 1006 | ||
| 1002 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ | 1007 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ |
| 1003 | ~EXT4_MOUNT_##opt | 1008 | ~EXT4_MOUNT_##opt |
| @@ -1326,6 +1331,7 @@ struct ext4_sb_info { | |||
| 1326 | struct list_head s_es_lru; | 1331 | struct list_head s_es_lru; |
| 1327 | unsigned long s_es_last_sorted; | 1332 | unsigned long s_es_last_sorted; |
| 1328 | struct percpu_counter s_extent_cache_cnt; | 1333 | struct percpu_counter s_extent_cache_cnt; |
| 1334 | struct mb_cache *s_mb_cache; | ||
| 1329 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1335 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
| 1330 | 1336 | ||
| 1331 | /* Ratelimit ext4 messages. */ | 1337 | /* Ratelimit ext4 messages. */ |
| @@ -2133,8 +2139,6 @@ extern int ext4_writepage_trans_blocks(struct inode *); | |||
| 2133 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 2139 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
| 2134 | extern int ext4_block_truncate_page(handle_t *handle, | 2140 | extern int ext4_block_truncate_page(handle_t *handle, |
| 2135 | struct address_space *mapping, loff_t from); | 2141 | struct address_space *mapping, loff_t from); |
| 2136 | extern int ext4_block_zero_page_range(handle_t *handle, | ||
| 2137 | struct address_space *mapping, loff_t from, loff_t length); | ||
| 2138 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 2142 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
| 2139 | loff_t lstart, loff_t lend); | 2143 | loff_t lstart, loff_t lend); |
| 2140 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2144 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
| @@ -2757,6 +2761,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); | |||
| 2757 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2761 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 2758 | __u64 start, __u64 len); | 2762 | __u64 start, __u64 len); |
| 2759 | extern int ext4_ext_precache(struct inode *inode); | 2763 | extern int ext4_ext_precache(struct inode *inode); |
| 2764 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); | ||
| 2760 | 2765 | ||
| 2761 | /* move_extent.c */ | 2766 | /* move_extent.c */ |
| 2762 | extern void ext4_double_down_write_data_sem(struct inode *first, | 2767 | extern void ext4_double_down_write_data_sem(struct inode *first, |
| @@ -2766,6 +2771,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode, | |||
| 2766 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | 2771 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, |
| 2767 | __u64 start_orig, __u64 start_donor, | 2772 | __u64 start_orig, __u64 start_donor, |
| 2768 | __u64 len, __u64 *moved_len); | 2773 | __u64 len, __u64 *moved_len); |
| 2774 | extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
| 2775 | struct ext4_extent **extent); | ||
| 2769 | 2776 | ||
| 2770 | /* page-io.c */ | 2777 | /* page-io.c */ |
| 2771 | extern int __init ext4_init_pageio(void); | 2778 | extern int __init ext4_init_pageio(void); |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 3fe29de832c8..c3fb607413ed 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
| @@ -259,6 +259,16 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
| 259 | if (WARN_ON_ONCE(err)) { | 259 | if (WARN_ON_ONCE(err)) { |
| 260 | ext4_journal_abort_handle(where, line, __func__, bh, | 260 | ext4_journal_abort_handle(where, line, __func__, bh, |
| 261 | handle, err); | 261 | handle, err); |
| 262 | if (inode == NULL) { | ||
| 263 | pr_err("EXT4: jbd2_journal_dirty_metadata " | ||
| 264 | "failed: handle type %u started at " | ||
| 265 | "line %u, credits %u/%u, errcode %d", | ||
| 266 | handle->h_type, | ||
| 267 | handle->h_line_no, | ||
| 268 | handle->h_requested_credits, | ||
| 269 | handle->h_buffer_credits, err); | ||
| 270 | return err; | ||
| 271 | } | ||
| 262 | ext4_error_inode(inode, where, line, | 272 | ext4_error_inode(inode, where, line, |
| 263 | bh->b_blocknr, | 273 | bh->b_blocknr, |
| 264 | "journal_dirty_metadata failed: " | 274 | "journal_dirty_metadata failed: " |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74bc2d549c58..82df3ce9874a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | #include <linux/quotaops.h> | 37 | #include <linux/quotaops.h> |
| 38 | #include <linux/string.h> | 38 | #include <linux/string.h> |
| 39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
| 40 | #include <linux/falloc.h> | ||
| 41 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
| 42 | #include <linux/fiemap.h> | 41 | #include <linux/fiemap.h> |
| 43 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
| @@ -1691,7 +1690,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
| 1691 | * the extent that was written properly split out and conversion to | 1690 | * the extent that was written properly split out and conversion to |
| 1692 | * initialized is trivial. | 1691 | * initialized is trivial. |
| 1693 | */ | 1692 | */ |
| 1694 | if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) | 1693 | if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) |
| 1695 | return 0; | 1694 | return 0; |
| 1696 | 1695 | ||
| 1697 | ext1_ee_len = ext4_ext_get_actual_len(ex1); | 1696 | ext1_ee_len = ext4_ext_get_actual_len(ex1); |
| @@ -1708,6 +1707,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
| 1708 | */ | 1707 | */ |
| 1709 | if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) | 1708 | if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) |
| 1710 | return 0; | 1709 | return 0; |
| 1710 | if (ext4_ext_is_uninitialized(ex1) && | ||
| 1711 | (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || | ||
| 1712 | atomic_read(&EXT4_I(inode)->i_unwritten) || | ||
| 1713 | (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) | ||
| 1714 | return 0; | ||
| 1711 | #ifdef AGGRESSIVE_TEST | 1715 | #ifdef AGGRESSIVE_TEST |
| 1712 | if (ext1_ee_len >= 4) | 1716 | if (ext1_ee_len >= 4) |
| 1713 | return 0; | 1717 | return 0; |
| @@ -1731,7 +1735,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
| 1731 | { | 1735 | { |
| 1732 | struct ext4_extent_header *eh; | 1736 | struct ext4_extent_header *eh; |
| 1733 | unsigned int depth, len; | 1737 | unsigned int depth, len; |
| 1734 | int merge_done = 0; | 1738 | int merge_done = 0, uninit; |
| 1735 | 1739 | ||
| 1736 | depth = ext_depth(inode); | 1740 | depth = ext_depth(inode); |
| 1737 | BUG_ON(path[depth].p_hdr == NULL); | 1741 | BUG_ON(path[depth].p_hdr == NULL); |
| @@ -1741,8 +1745,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
| 1741 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) | 1745 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) |
| 1742 | break; | 1746 | break; |
| 1743 | /* merge with next extent! */ | 1747 | /* merge with next extent! */ |
| 1748 | uninit = ext4_ext_is_uninitialized(ex); | ||
| 1744 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1749 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
| 1745 | + ext4_ext_get_actual_len(ex + 1)); | 1750 | + ext4_ext_get_actual_len(ex + 1)); |
| 1751 | if (uninit) | ||
| 1752 | ext4_ext_mark_uninitialized(ex); | ||
| 1746 | 1753 | ||
| 1747 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { | 1754 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { |
| 1748 | len = (EXT_LAST_EXTENT(eh) - ex - 1) | 1755 | len = (EXT_LAST_EXTENT(eh) - ex - 1) |
| @@ -1896,7 +1903,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1896 | struct ext4_ext_path *npath = NULL; | 1903 | struct ext4_ext_path *npath = NULL; |
| 1897 | int depth, len, err; | 1904 | int depth, len, err; |
| 1898 | ext4_lblk_t next; | 1905 | ext4_lblk_t next; |
| 1899 | int mb_flags = 0; | 1906 | int mb_flags = 0, uninit; |
| 1900 | 1907 | ||
| 1901 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1908 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
| 1902 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1909 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
| @@ -1946,9 +1953,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1946 | path + depth); | 1953 | path + depth); |
| 1947 | if (err) | 1954 | if (err) |
| 1948 | return err; | 1955 | return err; |
| 1949 | 1956 | uninit = ext4_ext_is_uninitialized(ex); | |
| 1950 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1957 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
| 1951 | + ext4_ext_get_actual_len(newext)); | 1958 | + ext4_ext_get_actual_len(newext)); |
| 1959 | if (uninit) | ||
| 1960 | ext4_ext_mark_uninitialized(ex); | ||
| 1952 | eh = path[depth].p_hdr; | 1961 | eh = path[depth].p_hdr; |
| 1953 | nearex = ex; | 1962 | nearex = ex; |
| 1954 | goto merge; | 1963 | goto merge; |
| @@ -1971,10 +1980,13 @@ prepend: | |||
| 1971 | if (err) | 1980 | if (err) |
| 1972 | return err; | 1981 | return err; |
| 1973 | 1982 | ||
| 1983 | uninit = ext4_ext_is_uninitialized(ex); | ||
| 1974 | ex->ee_block = newext->ee_block; | 1984 | ex->ee_block = newext->ee_block; |
| 1975 | ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); | 1985 | ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); |
| 1976 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1986 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
| 1977 | + ext4_ext_get_actual_len(newext)); | 1987 | + ext4_ext_get_actual_len(newext)); |
| 1988 | if (uninit) | ||
| 1989 | ext4_ext_mark_uninitialized(ex); | ||
| 1978 | eh = path[depth].p_hdr; | 1990 | eh = path[depth].p_hdr; |
| 1979 | nearex = ex; | 1991 | nearex = ex; |
| 1980 | goto merge; | 1992 | goto merge; |
| @@ -2585,6 +2597,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2585 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2597 | ex_ee_block = le32_to_cpu(ex->ee_block); |
| 2586 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2598 | ex_ee_len = ext4_ext_get_actual_len(ex); |
| 2587 | 2599 | ||
| 2600 | /* | ||
| 2601 | * If we're starting with an extent other than the last one in the | ||
| 2602 | * node, we need to see if it shares a cluster with the extent to | ||
| 2603 | * the right (towards the end of the file). If its leftmost cluster | ||
| 2604 | * is this extent's rightmost cluster and it is not cluster aligned, | ||
| 2605 | * we'll mark it as a partial that is not to be deallocated. | ||
| 2606 | */ | ||
| 2607 | |||
| 2608 | if (ex != EXT_LAST_EXTENT(eh)) { | ||
| 2609 | ext4_fsblk_t current_pblk, right_pblk; | ||
| 2610 | long long current_cluster, right_cluster; | ||
| 2611 | |||
| 2612 | current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; | ||
| 2613 | current_cluster = (long long)EXT4_B2C(sbi, current_pblk); | ||
| 2614 | right_pblk = ext4_ext_pblock(ex + 1); | ||
| 2615 | right_cluster = (long long)EXT4_B2C(sbi, right_pblk); | ||
| 2616 | if (current_cluster == right_cluster && | ||
| 2617 | EXT4_PBLK_COFF(sbi, right_pblk)) | ||
| 2618 | *partial_cluster = -right_cluster; | ||
| 2619 | } | ||
| 2620 | |||
| 2588 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); | 2621 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); |
| 2589 | 2622 | ||
| 2590 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2623 | while (ex >= EXT_FIRST_EXTENT(eh) && |
| @@ -2710,10 +2743,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2710 | err = ext4_ext_correct_indexes(handle, inode, path); | 2743 | err = ext4_ext_correct_indexes(handle, inode, path); |
| 2711 | 2744 | ||
| 2712 | /* | 2745 | /* |
| 2713 | * Free the partial cluster only if the current extent does not | 2746 | * If there's a partial cluster and at least one extent remains in |
| 2714 | * reference it. Otherwise we might free used cluster. | 2747 | * the leaf, free the partial cluster if it isn't shared with the |
| 2748 | * current extent. If there's a partial cluster and no extents | ||
| 2749 | * remain in the leaf, it can't be freed here. It can only be | ||
| 2750 | * freed when it's possible to determine if it's not shared with | ||
| 2751 | * any other extent - when the next leaf is processed or when space | ||
| 2752 | * removal is complete. | ||
| 2715 | */ | 2753 | */ |
| 2716 | if (*partial_cluster > 0 && | 2754 | if (*partial_cluster > 0 && eh->eh_entries && |
| 2717 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2755 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != |
| 2718 | *partial_cluster)) { | 2756 | *partial_cluster)) { |
| 2719 | int flags = get_default_free_blocks_flags(inode); | 2757 | int flags = get_default_free_blocks_flags(inode); |
| @@ -3569,6 +3607,8 @@ out: | |||
| 3569 | * b> Splits in two extents: Write is happening at either end of the extent | 3607 | * b> Splits in two extents: Write is happening at either end of the extent |
| 3570 | * c> Splits in three extents: Somone is writing in middle of the extent | 3608 | * c> Splits in three extents: Somone is writing in middle of the extent |
| 3571 | * | 3609 | * |
| 3610 | * This works the same way in the case of initialized -> unwritten conversion. | ||
| 3611 | * | ||
| 3572 | * One of more index blocks maybe needed if the extent tree grow after | 3612 | * One of more index blocks maybe needed if the extent tree grow after |
| 3573 | * the uninitialized extent split. To prevent ENOSPC occur at the IO | 3613 | * the uninitialized extent split. To prevent ENOSPC occur at the IO |
| 3574 | * complete, we need to split the uninitialized extent before DIO submit | 3614 | * complete, we need to split the uninitialized extent before DIO submit |
| @@ -3579,7 +3619,7 @@ out: | |||
| 3579 | * | 3619 | * |
| 3580 | * Returns the size of uninitialized extent to be written on success. | 3620 | * Returns the size of uninitialized extent to be written on success. |
| 3581 | */ | 3621 | */ |
| 3582 | static int ext4_split_unwritten_extents(handle_t *handle, | 3622 | static int ext4_split_convert_extents(handle_t *handle, |
| 3583 | struct inode *inode, | 3623 | struct inode *inode, |
| 3584 | struct ext4_map_blocks *map, | 3624 | struct ext4_map_blocks *map, |
| 3585 | struct ext4_ext_path *path, | 3625 | struct ext4_ext_path *path, |
| @@ -3591,9 +3631,9 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
| 3591 | unsigned int ee_len; | 3631 | unsigned int ee_len; |
| 3592 | int split_flag = 0, depth; | 3632 | int split_flag = 0, depth; |
| 3593 | 3633 | ||
| 3594 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" | 3634 | ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n", |
| 3595 | "block %llu, max_blocks %u\n", inode->i_ino, | 3635 | __func__, inode->i_ino, |
| 3596 | (unsigned long long)map->m_lblk, map->m_len); | 3636 | (unsigned long long)map->m_lblk, map->m_len); |
| 3597 | 3637 | ||
| 3598 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3638 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
| 3599 | inode->i_sb->s_blocksize_bits; | 3639 | inode->i_sb->s_blocksize_bits; |
| @@ -3608,14 +3648,73 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
| 3608 | ee_block = le32_to_cpu(ex->ee_block); | 3648 | ee_block = le32_to_cpu(ex->ee_block); |
| 3609 | ee_len = ext4_ext_get_actual_len(ex); | 3649 | ee_len = ext4_ext_get_actual_len(ex); |
| 3610 | 3650 | ||
| 3611 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3651 | /* Convert to unwritten */ |
| 3612 | split_flag |= EXT4_EXT_MARK_UNINIT2; | 3652 | if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { |
| 3613 | if (flags & EXT4_GET_BLOCKS_CONVERT) | 3653 | split_flag |= EXT4_EXT_DATA_VALID1; |
| 3614 | split_flag |= EXT4_EXT_DATA_VALID2; | 3654 | /* Convert to initialized */ |
| 3655 | } else if (flags & EXT4_GET_BLOCKS_CONVERT) { | ||
| 3656 | split_flag |= ee_block + ee_len <= eof_block ? | ||
| 3657 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
| 3658 | split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); | ||
| 3659 | } | ||
| 3615 | flags |= EXT4_GET_BLOCKS_PRE_IO; | 3660 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
| 3616 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); | 3661 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); |
| 3617 | } | 3662 | } |
| 3618 | 3663 | ||
| 3664 | static int ext4_convert_initialized_extents(handle_t *handle, | ||
| 3665 | struct inode *inode, | ||
| 3666 | struct ext4_map_blocks *map, | ||
| 3667 | struct ext4_ext_path *path) | ||
| 3668 | { | ||
| 3669 | struct ext4_extent *ex; | ||
| 3670 | ext4_lblk_t ee_block; | ||
| 3671 | unsigned int ee_len; | ||
| 3672 | int depth; | ||
| 3673 | int err = 0; | ||
| 3674 | |||
| 3675 | depth = ext_depth(inode); | ||
| 3676 | ex = path[depth].p_ext; | ||
| 3677 | ee_block = le32_to_cpu(ex->ee_block); | ||
| 3678 | ee_len = ext4_ext_get_actual_len(ex); | ||
| 3679 | |||
| 3680 | ext_debug("%s: inode %lu, logical" | ||
| 3681 | "block %llu, max_blocks %u\n", __func__, inode->i_ino, | ||
| 3682 | (unsigned long long)ee_block, ee_len); | ||
| 3683 | |||
| 3684 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
| 3685 | err = ext4_split_convert_extents(handle, inode, map, path, | ||
| 3686 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); | ||
| 3687 | if (err < 0) | ||
| 3688 | goto out; | ||
| 3689 | ext4_ext_drop_refs(path); | ||
| 3690 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | ||
| 3691 | if (IS_ERR(path)) { | ||
| 3692 | err = PTR_ERR(path); | ||
| 3693 | goto out; | ||
| 3694 | } | ||
| 3695 | depth = ext_depth(inode); | ||
| 3696 | ex = path[depth].p_ext; | ||
| 3697 | } | ||
| 3698 | |||
| 3699 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
| 3700 | if (err) | ||
| 3701 | goto out; | ||
| 3702 | /* first mark the extent as uninitialized */ | ||
| 3703 | ext4_ext_mark_uninitialized(ex); | ||
| 3704 | |||
| 3705 | /* note: ext4_ext_correct_indexes() isn't needed here because | ||
| 3706 | * borders are not changed | ||
| 3707 | */ | ||
| 3708 | ext4_ext_try_to_merge(handle, inode, path, ex); | ||
| 3709 | |||
| 3710 | /* Mark modified extent as dirty */ | ||
| 3711 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | ||
| 3712 | out: | ||
| 3713 | ext4_ext_show_leaf(inode, path); | ||
| 3714 | return err; | ||
| 3715 | } | ||
| 3716 | |||
| 3717 | |||
| 3619 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3718 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
| 3620 | struct inode *inode, | 3719 | struct inode *inode, |
| 3621 | struct ext4_map_blocks *map, | 3720 | struct ext4_map_blocks *map, |
| @@ -3649,8 +3748,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
| 3649 | inode->i_ino, (unsigned long long)ee_block, ee_len, | 3748 | inode->i_ino, (unsigned long long)ee_block, ee_len, |
| 3650 | (unsigned long long)map->m_lblk, map->m_len); | 3749 | (unsigned long long)map->m_lblk, map->m_len); |
| 3651 | #endif | 3750 | #endif |
| 3652 | err = ext4_split_unwritten_extents(handle, inode, map, path, | 3751 | err = ext4_split_convert_extents(handle, inode, map, path, |
| 3653 | EXT4_GET_BLOCKS_CONVERT); | 3752 | EXT4_GET_BLOCKS_CONVERT); |
| 3654 | if (err < 0) | 3753 | if (err < 0) |
| 3655 | goto out; | 3754 | goto out; |
| 3656 | ext4_ext_drop_refs(path); | 3755 | ext4_ext_drop_refs(path); |
| @@ -3851,6 +3950,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
| 3851 | } | 3950 | } |
| 3852 | 3951 | ||
| 3853 | static int | 3952 | static int |
| 3953 | ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, | ||
| 3954 | struct ext4_map_blocks *map, | ||
| 3955 | struct ext4_ext_path *path, int flags, | ||
| 3956 | unsigned int allocated, ext4_fsblk_t newblock) | ||
| 3957 | { | ||
| 3958 | int ret = 0; | ||
| 3959 | int err = 0; | ||
| 3960 | |||
| 3961 | /* | ||
| 3962 | * Make sure that the extent is no bigger than we support with | ||
| 3963 | * uninitialized extent | ||
| 3964 | */ | ||
| 3965 | if (map->m_len > EXT_UNINIT_MAX_LEN) | ||
| 3966 | map->m_len = EXT_UNINIT_MAX_LEN / 2; | ||
| 3967 | |||
| 3968 | ret = ext4_convert_initialized_extents(handle, inode, map, | ||
| 3969 | path); | ||
| 3970 | if (ret >= 0) { | ||
| 3971 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
| 3972 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | ||
| 3973 | path, map->m_len); | ||
| 3974 | } else | ||
| 3975 | err = ret; | ||
| 3976 | map->m_flags |= EXT4_MAP_UNWRITTEN; | ||
| 3977 | if (allocated > map->m_len) | ||
| 3978 | allocated = map->m_len; | ||
| 3979 | map->m_len = allocated; | ||
| 3980 | |||
| 3981 | return err ? err : allocated; | ||
| 3982 | } | ||
| 3983 | |||
| 3984 | static int | ||
| 3854 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3985 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
| 3855 | struct ext4_map_blocks *map, | 3986 | struct ext4_map_blocks *map, |
| 3856 | struct ext4_ext_path *path, int flags, | 3987 | struct ext4_ext_path *path, int flags, |
| @@ -3877,8 +4008,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
| 3877 | 4008 | ||
| 3878 | /* get_block() before submit the IO, split the extent */ | 4009 | /* get_block() before submit the IO, split the extent */ |
| 3879 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4010 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
| 3880 | ret = ext4_split_unwritten_extents(handle, inode, map, | 4011 | ret = ext4_split_convert_extents(handle, inode, map, |
| 3881 | path, flags); | 4012 | path, flags | EXT4_GET_BLOCKS_CONVERT); |
| 3882 | if (ret <= 0) | 4013 | if (ret <= 0) |
| 3883 | goto out; | 4014 | goto out; |
| 3884 | /* | 4015 | /* |
| @@ -3993,10 +4124,6 @@ out1: | |||
| 3993 | map->m_pblk = newblock; | 4124 | map->m_pblk = newblock; |
| 3994 | map->m_len = allocated; | 4125 | map->m_len = allocated; |
| 3995 | out2: | 4126 | out2: |
| 3996 | if (path) { | ||
| 3997 | ext4_ext_drop_refs(path); | ||
| 3998 | kfree(path); | ||
| 3999 | } | ||
| 4000 | return err ? err : allocated; | 4127 | return err ? err : allocated; |
| 4001 | } | 4128 | } |
| 4002 | 4129 | ||
| @@ -4128,7 +4255,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4128 | struct ext4_extent newex, *ex, *ex2; | 4255 | struct ext4_extent newex, *ex, *ex2; |
| 4129 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4256 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 4130 | ext4_fsblk_t newblock = 0; | 4257 | ext4_fsblk_t newblock = 0; |
| 4131 | int free_on_err = 0, err = 0, depth; | 4258 | int free_on_err = 0, err = 0, depth, ret; |
| 4132 | unsigned int allocated = 0, offset = 0; | 4259 | unsigned int allocated = 0, offset = 0; |
| 4133 | unsigned int allocated_clusters = 0; | 4260 | unsigned int allocated_clusters = 0; |
| 4134 | struct ext4_allocation_request ar; | 4261 | struct ext4_allocation_request ar; |
| @@ -4170,6 +4297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4170 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | 4297 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
| 4171 | unsigned short ee_len; | 4298 | unsigned short ee_len; |
| 4172 | 4299 | ||
| 4300 | |||
| 4173 | /* | 4301 | /* |
| 4174 | * Uninitialized extents are treated as holes, except that | 4302 | * Uninitialized extents are treated as holes, except that |
| 4175 | * we split out initialized portions during a write. | 4303 | * we split out initialized portions during a write. |
| @@ -4186,13 +4314,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4186 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, | 4314 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
| 4187 | ee_block, ee_len, newblock); | 4315 | ee_block, ee_len, newblock); |
| 4188 | 4316 | ||
| 4189 | if (!ext4_ext_is_uninitialized(ex)) | 4317 | /* |
| 4318 | * If the extent is initialized check whether the | ||
| 4319 | * caller wants to convert it to unwritten. | ||
| 4320 | */ | ||
| 4321 | if ((!ext4_ext_is_uninitialized(ex)) && | ||
| 4322 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { | ||
| 4323 | allocated = ext4_ext_convert_initialized_extent( | ||
| 4324 | handle, inode, map, path, flags, | ||
| 4325 | allocated, newblock); | ||
| 4326 | goto out2; | ||
| 4327 | } else if (!ext4_ext_is_uninitialized(ex)) | ||
| 4190 | goto out; | 4328 | goto out; |
| 4191 | 4329 | ||
| 4192 | allocated = ext4_ext_handle_uninitialized_extents( | 4330 | ret = ext4_ext_handle_uninitialized_extents( |
| 4193 | handle, inode, map, path, flags, | 4331 | handle, inode, map, path, flags, |
| 4194 | allocated, newblock); | 4332 | allocated, newblock); |
| 4195 | goto out3; | 4333 | if (ret < 0) |
| 4334 | err = ret; | ||
| 4335 | else | ||
| 4336 | allocated = ret; | ||
| 4337 | goto out2; | ||
| 4196 | } | 4338 | } |
| 4197 | } | 4339 | } |
| 4198 | 4340 | ||
| @@ -4473,7 +4615,6 @@ out2: | |||
| 4473 | kfree(path); | 4615 | kfree(path); |
| 4474 | } | 4616 | } |
| 4475 | 4617 | ||
| 4476 | out3: | ||
| 4477 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4618 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
| 4478 | err ? err : allocated); | 4619 | err ? err : allocated); |
| 4479 | ext4_es_lru_add(inode); | 4620 | ext4_es_lru_add(inode); |
| @@ -4514,34 +4655,200 @@ retry: | |||
| 4514 | ext4_std_error(inode->i_sb, err); | 4655 | ext4_std_error(inode->i_sb, err); |
| 4515 | } | 4656 | } |
| 4516 | 4657 | ||
| 4517 | static void ext4_falloc_update_inode(struct inode *inode, | 4658 | static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, |
| 4518 | int mode, loff_t new_size, int update_ctime) | 4659 | ext4_lblk_t len, int flags, int mode) |
| 4519 | { | 4660 | { |
| 4520 | struct timespec now; | 4661 | struct inode *inode = file_inode(file); |
| 4662 | handle_t *handle; | ||
| 4663 | int ret = 0; | ||
| 4664 | int ret2 = 0; | ||
| 4665 | int retries = 0; | ||
| 4666 | struct ext4_map_blocks map; | ||
| 4667 | unsigned int credits; | ||
| 4521 | 4668 | ||
| 4522 | if (update_ctime) { | 4669 | map.m_lblk = offset; |
| 4523 | now = current_fs_time(inode->i_sb); | 4670 | /* |
| 4524 | if (!timespec_equal(&inode->i_ctime, &now)) | 4671 | * Don't normalize the request if it can fit in one extent so |
| 4525 | inode->i_ctime = now; | 4672 | * that it doesn't get unnecessarily split into multiple |
| 4673 | * extents. | ||
| 4674 | */ | ||
| 4675 | if (len <= EXT_UNINIT_MAX_LEN) | ||
| 4676 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
| 4677 | |||
| 4678 | /* | ||
| 4679 | * credits to insert 1 extent into extent tree | ||
| 4680 | */ | ||
| 4681 | credits = ext4_chunk_trans_blocks(inode, len); | ||
| 4682 | |||
| 4683 | retry: | ||
| 4684 | while (ret >= 0 && ret < len) { | ||
| 4685 | map.m_lblk = map.m_lblk + ret; | ||
| 4686 | map.m_len = len = len - ret; | ||
| 4687 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||
| 4688 | credits); | ||
| 4689 | if (IS_ERR(handle)) { | ||
| 4690 | ret = PTR_ERR(handle); | ||
| 4691 | break; | ||
| 4692 | } | ||
| 4693 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
| 4694 | if (ret <= 0) { | ||
| 4695 | ext4_debug("inode #%lu: block %u: len %u: " | ||
| 4696 | "ext4_ext_map_blocks returned %d", | ||
| 4697 | inode->i_ino, map.m_lblk, | ||
| 4698 | map.m_len, ret); | ||
| 4699 | ext4_mark_inode_dirty(handle, inode); | ||
| 4700 | ret2 = ext4_journal_stop(handle); | ||
| 4701 | break; | ||
| 4702 | } | ||
| 4703 | ret2 = ext4_journal_stop(handle); | ||
| 4704 | if (ret2) | ||
| 4705 | break; | ||
| 4706 | } | ||
| 4707 | if (ret == -ENOSPC && | ||
| 4708 | ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
| 4709 | ret = 0; | ||
| 4710 | goto retry; | ||
| 4526 | } | 4711 | } |
| 4712 | |||
| 4713 | return ret > 0 ? ret2 : ret; | ||
| 4714 | } | ||
| 4715 | |||
| 4716 | static long ext4_zero_range(struct file *file, loff_t offset, | ||
| 4717 | loff_t len, int mode) | ||
| 4718 | { | ||
| 4719 | struct inode *inode = file_inode(file); | ||
| 4720 | handle_t *handle = NULL; | ||
| 4721 | unsigned int max_blocks; | ||
| 4722 | loff_t new_size = 0; | ||
| 4723 | int ret = 0; | ||
| 4724 | int flags; | ||
| 4725 | int partial; | ||
| 4726 | loff_t start, end; | ||
| 4727 | ext4_lblk_t lblk; | ||
| 4728 | struct address_space *mapping = inode->i_mapping; | ||
| 4729 | unsigned int blkbits = inode->i_blkbits; | ||
| 4730 | |||
| 4731 | trace_ext4_zero_range(inode, offset, len, mode); | ||
| 4732 | |||
| 4733 | /* | ||
| 4734 | * Write out all dirty pages to avoid race conditions | ||
| 4735 | * Then release them. | ||
| 4736 | */ | ||
| 4737 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
| 4738 | ret = filemap_write_and_wait_range(mapping, offset, | ||
| 4739 | offset + len - 1); | ||
| 4740 | if (ret) | ||
| 4741 | return ret; | ||
| 4742 | } | ||
| 4743 | |||
| 4527 | /* | 4744 | /* |
| 4528 | * Update only when preallocation was requested beyond | 4745 | * Round up offset. This is not fallocate, we neet to zero out |
| 4529 | * the file size. | 4746 | * blocks, so convert interior block aligned part of the range to |
| 4747 | * unwritten and possibly manually zero out unaligned parts of the | ||
| 4748 | * range. | ||
| 4530 | */ | 4749 | */ |
| 4531 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { | 4750 | start = round_up(offset, 1 << blkbits); |
| 4751 | end = round_down((offset + len), 1 << blkbits); | ||
| 4752 | |||
| 4753 | if (start < offset || end > offset + len) | ||
| 4754 | return -EINVAL; | ||
| 4755 | partial = (offset + len) & ((1 << blkbits) - 1); | ||
| 4756 | |||
| 4757 | lblk = start >> blkbits; | ||
| 4758 | max_blocks = (end >> blkbits); | ||
| 4759 | if (max_blocks < lblk) | ||
| 4760 | max_blocks = 0; | ||
| 4761 | else | ||
| 4762 | max_blocks -= lblk; | ||
| 4763 | |||
| 4764 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | | ||
| 4765 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; | ||
| 4766 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
| 4767 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | ||
| 4768 | |||
| 4769 | mutex_lock(&inode->i_mutex); | ||
| 4770 | |||
| 4771 | /* | ||
| 4772 | * Indirect files do not support unwritten extnets | ||
| 4773 | */ | ||
| 4774 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | ||
| 4775 | ret = -EOPNOTSUPP; | ||
| 4776 | goto out_mutex; | ||
| 4777 | } | ||
| 4778 | |||
| 4779 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
| 4780 | offset + len > i_size_read(inode)) { | ||
| 4781 | new_size = offset + len; | ||
| 4782 | ret = inode_newsize_ok(inode, new_size); | ||
| 4783 | if (ret) | ||
| 4784 | goto out_mutex; | ||
| 4785 | /* | ||
| 4786 | * If we have a partial block after EOF we have to allocate | ||
| 4787 | * the entire block. | ||
| 4788 | */ | ||
| 4789 | if (partial) | ||
| 4790 | max_blocks += 1; | ||
| 4791 | } | ||
| 4792 | |||
| 4793 | if (max_blocks > 0) { | ||
| 4794 | |||
| 4795 | /* Now release the pages and zero block aligned part of pages*/ | ||
| 4796 | truncate_pagecache_range(inode, start, end - 1); | ||
| 4797 | |||
| 4798 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
| 4799 | ext4_inode_block_unlocked_dio(inode); | ||
| 4800 | inode_dio_wait(inode); | ||
| 4801 | |||
| 4802 | /* | ||
| 4803 | * Remove entire range from the extent status tree. | ||
| 4804 | */ | ||
| 4805 | ret = ext4_es_remove_extent(inode, lblk, max_blocks); | ||
| 4806 | if (ret) | ||
| 4807 | goto out_dio; | ||
| 4808 | |||
| 4809 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, | ||
| 4810 | mode); | ||
| 4811 | if (ret) | ||
| 4812 | goto out_dio; | ||
| 4813 | } | ||
| 4814 | |||
| 4815 | handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); | ||
| 4816 | if (IS_ERR(handle)) { | ||
| 4817 | ret = PTR_ERR(handle); | ||
| 4818 | ext4_std_error(inode->i_sb, ret); | ||
| 4819 | goto out_dio; | ||
| 4820 | } | ||
| 4821 | |||
| 4822 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
| 4823 | |||
| 4824 | if (new_size) { | ||
| 4532 | if (new_size > i_size_read(inode)) | 4825 | if (new_size > i_size_read(inode)) |
| 4533 | i_size_write(inode, new_size); | 4826 | i_size_write(inode, new_size); |
| 4534 | if (new_size > EXT4_I(inode)->i_disksize) | 4827 | if (new_size > EXT4_I(inode)->i_disksize) |
| 4535 | ext4_update_i_disksize(inode, new_size); | 4828 | ext4_update_i_disksize(inode, new_size); |
| 4536 | } else { | 4829 | } else { |
| 4537 | /* | 4830 | /* |
| 4538 | * Mark that we allocate beyond EOF so the subsequent truncate | 4831 | * Mark that we allocate beyond EOF so the subsequent truncate |
| 4539 | * can proceed even if the new size is the same as i_size. | 4832 | * can proceed even if the new size is the same as i_size. |
| 4540 | */ | 4833 | */ |
| 4541 | if (new_size > i_size_read(inode)) | 4834 | if ((offset + len) > i_size_read(inode)) |
| 4542 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 4835 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
| 4543 | } | 4836 | } |
| 4544 | 4837 | ||
| 4838 | ext4_mark_inode_dirty(handle, inode); | ||
| 4839 | |||
| 4840 | /* Zero out partial block at the edges of the range */ | ||
| 4841 | ret = ext4_zero_partial_blocks(handle, inode, offset, len); | ||
| 4842 | |||
| 4843 | if (file->f_flags & O_SYNC) | ||
| 4844 | ext4_handle_sync(handle); | ||
| 4845 | |||
| 4846 | ext4_journal_stop(handle); | ||
| 4847 | out_dio: | ||
| 4848 | ext4_inode_resume_unlocked_dio(inode); | ||
| 4849 | out_mutex: | ||
| 4850 | mutex_unlock(&inode->i_mutex); | ||
| 4851 | return ret; | ||
| 4545 | } | 4852 | } |
| 4546 | 4853 | ||
| 4547 | /* | 4854 | /* |
| @@ -4555,22 +4862,25 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 4555 | { | 4862 | { |
| 4556 | struct inode *inode = file_inode(file); | 4863 | struct inode *inode = file_inode(file); |
| 4557 | handle_t *handle; | 4864 | handle_t *handle; |
| 4558 | loff_t new_size; | 4865 | loff_t new_size = 0; |
| 4559 | unsigned int max_blocks; | 4866 | unsigned int max_blocks; |
| 4560 | int ret = 0; | 4867 | int ret = 0; |
| 4561 | int ret2 = 0; | ||
| 4562 | int retries = 0; | ||
| 4563 | int flags; | 4868 | int flags; |
| 4564 | struct ext4_map_blocks map; | 4869 | ext4_lblk_t lblk; |
| 4565 | unsigned int credits, blkbits = inode->i_blkbits; | 4870 | struct timespec tv; |
| 4871 | unsigned int blkbits = inode->i_blkbits; | ||
| 4566 | 4872 | ||
| 4567 | /* Return error if mode is not supported */ | 4873 | /* Return error if mode is not supported */ |
| 4568 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 4874 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
| 4875 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
| 4569 | return -EOPNOTSUPP; | 4876 | return -EOPNOTSUPP; |
| 4570 | 4877 | ||
| 4571 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4878 | if (mode & FALLOC_FL_PUNCH_HOLE) |
| 4572 | return ext4_punch_hole(inode, offset, len); | 4879 | return ext4_punch_hole(inode, offset, len); |
| 4573 | 4880 | ||
| 4881 | if (mode & FALLOC_FL_COLLAPSE_RANGE) | ||
| 4882 | return ext4_collapse_range(inode, offset, len); | ||
| 4883 | |||
| 4574 | ret = ext4_convert_inline_data(inode); | 4884 | ret = ext4_convert_inline_data(inode); |
| 4575 | if (ret) | 4885 | if (ret) |
| 4576 | return ret; | 4886 | return ret; |
| @@ -4582,83 +4892,66 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 4582 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4892 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
| 4583 | return -EOPNOTSUPP; | 4893 | return -EOPNOTSUPP; |
| 4584 | 4894 | ||
| 4895 | if (mode & FALLOC_FL_ZERO_RANGE) | ||
| 4896 | return ext4_zero_range(file, offset, len, mode); | ||
| 4897 | |||
| 4585 | trace_ext4_fallocate_enter(inode, offset, len, mode); | 4898 | trace_ext4_fallocate_enter(inode, offset, len, mode); |
| 4586 | map.m_lblk = offset >> blkbits; | 4899 | lblk = offset >> blkbits; |
| 4587 | /* | 4900 | /* |
| 4588 | * We can't just convert len to max_blocks because | 4901 | * We can't just convert len to max_blocks because |
| 4589 | * If blocksize = 4096 offset = 3072 and len = 2048 | 4902 | * If blocksize = 4096 offset = 3072 and len = 2048 |
| 4590 | */ | 4903 | */ |
| 4591 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 4904 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
| 4592 | - map.m_lblk; | 4905 | - lblk; |
| 4593 | /* | 4906 | |
| 4594 | * credits to insert 1 extent into extent tree | ||
| 4595 | */ | ||
| 4596 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
| 4597 | mutex_lock(&inode->i_mutex); | ||
| 4598 | ret = inode_newsize_ok(inode, (len + offset)); | ||
| 4599 | if (ret) { | ||
| 4600 | mutex_unlock(&inode->i_mutex); | ||
| 4601 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | ||
| 4602 | return ret; | ||
| 4603 | } | ||
| 4604 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; | 4907 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; |
| 4605 | if (mode & FALLOC_FL_KEEP_SIZE) | 4908 | if (mode & FALLOC_FL_KEEP_SIZE) |
| 4606 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | 4909 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; |
| 4607 | /* | ||
| 4608 | * Don't normalize the request if it can fit in one extent so | ||
| 4609 | * that it doesn't get unnecessarily split into multiple | ||
| 4610 | * extents. | ||
| 4611 | */ | ||
| 4612 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) | ||
| 4613 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
| 4614 | 4910 | ||
| 4615 | retry: | 4911 | mutex_lock(&inode->i_mutex); |
| 4616 | while (ret >= 0 && ret < max_blocks) { | ||
| 4617 | map.m_lblk = map.m_lblk + ret; | ||
| 4618 | map.m_len = max_blocks = max_blocks - ret; | ||
| 4619 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||
| 4620 | credits); | ||
| 4621 | if (IS_ERR(handle)) { | ||
| 4622 | ret = PTR_ERR(handle); | ||
| 4623 | break; | ||
| 4624 | } | ||
| 4625 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
| 4626 | if (ret <= 0) { | ||
| 4627 | #ifdef EXT4FS_DEBUG | ||
| 4628 | ext4_warning(inode->i_sb, | ||
| 4629 | "inode #%lu: block %u: len %u: " | ||
| 4630 | "ext4_ext_map_blocks returned %d", | ||
| 4631 | inode->i_ino, map.m_lblk, | ||
| 4632 | map.m_len, ret); | ||
| 4633 | #endif | ||
| 4634 | ext4_mark_inode_dirty(handle, inode); | ||
| 4635 | ret2 = ext4_journal_stop(handle); | ||
| 4636 | break; | ||
| 4637 | } | ||
| 4638 | if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, | ||
| 4639 | blkbits) >> blkbits)) | ||
| 4640 | new_size = offset + len; | ||
| 4641 | else | ||
| 4642 | new_size = ((loff_t) map.m_lblk + ret) << blkbits; | ||
| 4643 | 4912 | ||
| 4644 | ext4_falloc_update_inode(inode, mode, new_size, | 4913 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
| 4645 | (map.m_flags & EXT4_MAP_NEW)); | 4914 | offset + len > i_size_read(inode)) { |
| 4646 | ext4_mark_inode_dirty(handle, inode); | 4915 | new_size = offset + len; |
| 4647 | if ((file->f_flags & O_SYNC) && ret >= max_blocks) | 4916 | ret = inode_newsize_ok(inode, new_size); |
| 4648 | ext4_handle_sync(handle); | 4917 | if (ret) |
| 4649 | ret2 = ext4_journal_stop(handle); | 4918 | goto out; |
| 4650 | if (ret2) | ||
| 4651 | break; | ||
| 4652 | } | 4919 | } |
| 4653 | if (ret == -ENOSPC && | 4920 | |
| 4654 | ext4_should_retry_alloc(inode->i_sb, &retries)) { | 4921 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); |
| 4655 | ret = 0; | 4922 | if (ret) |
| 4656 | goto retry; | 4923 | goto out; |
| 4924 | |||
| 4925 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||
| 4926 | if (IS_ERR(handle)) | ||
| 4927 | goto out; | ||
| 4928 | |||
| 4929 | tv = inode->i_ctime = ext4_current_time(inode); | ||
| 4930 | |||
| 4931 | if (new_size) { | ||
| 4932 | if (new_size > i_size_read(inode)) { | ||
| 4933 | i_size_write(inode, new_size); | ||
| 4934 | inode->i_mtime = tv; | ||
| 4935 | } | ||
| 4936 | if (new_size > EXT4_I(inode)->i_disksize) | ||
| 4937 | ext4_update_i_disksize(inode, new_size); | ||
| 4938 | } else { | ||
| 4939 | /* | ||
| 4940 | * Mark that we allocate beyond EOF so the subsequent truncate | ||
| 4941 | * can proceed even if the new size is the same as i_size. | ||
| 4942 | */ | ||
| 4943 | if ((offset + len) > i_size_read(inode)) | ||
| 4944 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
| 4657 | } | 4945 | } |
| 4946 | ext4_mark_inode_dirty(handle, inode); | ||
| 4947 | if (file->f_flags & O_SYNC) | ||
| 4948 | ext4_handle_sync(handle); | ||
| 4949 | |||
| 4950 | ext4_journal_stop(handle); | ||
| 4951 | out: | ||
| 4658 | mutex_unlock(&inode->i_mutex); | 4952 | mutex_unlock(&inode->i_mutex); |
| 4659 | trace_ext4_fallocate_exit(inode, offset, max_blocks, | 4953 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |
| 4660 | ret > 0 ? ret2 : ret); | 4954 | return ret; |
| 4661 | return ret > 0 ? ret2 : ret; | ||
| 4662 | } | 4955 | } |
| 4663 | 4956 | ||
| 4664 | /* | 4957 | /* |
| @@ -4869,3 +5162,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
| 4869 | ext4_es_lru_add(inode); | 5162 | ext4_es_lru_add(inode); |
| 4870 | return error; | 5163 | return error; |
| 4871 | } | 5164 | } |
| 5165 | |||
| 5166 | /* | ||
| 5167 | * ext4_access_path: | ||
| 5168 | * Function to access the path buffer for marking it dirty. | ||
| 5169 | * It also checks if there are sufficient credits left in the journal handle | ||
| 5170 | * to update path. | ||
| 5171 | */ | ||
| 5172 | static int | ||
| 5173 | ext4_access_path(handle_t *handle, struct inode *inode, | ||
| 5174 | struct ext4_ext_path *path) | ||
| 5175 | { | ||
| 5176 | int credits, err; | ||
| 5177 | |||
| 5178 | if (!ext4_handle_valid(handle)) | ||
| 5179 | return 0; | ||
| 5180 | |||
| 5181 | /* | ||
| 5182 | * Check if need to extend journal credits | ||
| 5183 | * 3 for leaf, sb, and inode plus 2 (bmap and group | ||
| 5184 | * descriptor) for each block group; assume two block | ||
| 5185 | * groups | ||
| 5186 | */ | ||
| 5187 | if (handle->h_buffer_credits < 7) { | ||
| 5188 | credits = ext4_writepage_trans_blocks(inode); | ||
| 5189 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); | ||
| 5190 | /* EAGAIN is success */ | ||
| 5191 | if (err && err != -EAGAIN) | ||
| 5192 | return err; | ||
| 5193 | } | ||
| 5194 | |||
| 5195 | err = ext4_ext_get_access(handle, inode, path); | ||
| 5196 | return err; | ||
| 5197 | } | ||
| 5198 | |||
| 5199 | /* | ||
| 5200 | * ext4_ext_shift_path_extents: | ||
| 5201 | * Shift the extents of a path structure lying between path[depth].p_ext | ||
| 5202 | * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift | ||
| 5203 | * from starting block for each extent. | ||
| 5204 | */ | ||
| 5205 | static int | ||
| 5206 | ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, | ||
| 5207 | struct inode *inode, handle_t *handle, | ||
| 5208 | ext4_lblk_t *start) | ||
| 5209 | { | ||
| 5210 | int depth, err = 0; | ||
| 5211 | struct ext4_extent *ex_start, *ex_last; | ||
| 5212 | bool update = 0; | ||
| 5213 | depth = path->p_depth; | ||
| 5214 | |||
| 5215 | while (depth >= 0) { | ||
| 5216 | if (depth == path->p_depth) { | ||
| 5217 | ex_start = path[depth].p_ext; | ||
| 5218 | if (!ex_start) | ||
| 5219 | return -EIO; | ||
| 5220 | |||
| 5221 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); | ||
| 5222 | if (!ex_last) | ||
| 5223 | return -EIO; | ||
| 5224 | |||
| 5225 | err = ext4_access_path(handle, inode, path + depth); | ||
| 5226 | if (err) | ||
| 5227 | goto out; | ||
| 5228 | |||
| 5229 | if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) | ||
| 5230 | update = 1; | ||
| 5231 | |||
| 5232 | *start = ex_last->ee_block + | ||
| 5233 | ext4_ext_get_actual_len(ex_last); | ||
| 5234 | |||
| 5235 | while (ex_start <= ex_last) { | ||
| 5236 | ex_start->ee_block -= shift; | ||
| 5237 | if (ex_start > | ||
| 5238 | EXT_FIRST_EXTENT(path[depth].p_hdr)) { | ||
| 5239 | if (ext4_ext_try_to_merge_right(inode, | ||
| 5240 | path, ex_start - 1)) | ||
| 5241 | ex_last--; | ||
| 5242 | } | ||
| 5243 | ex_start++; | ||
| 5244 | } | ||
| 5245 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
| 5246 | if (err) | ||
| 5247 | goto out; | ||
| 5248 | |||
| 5249 | if (--depth < 0 || !update) | ||
| 5250 | break; | ||
| 5251 | } | ||
| 5252 | |||
| 5253 | /* Update index too */ | ||
| 5254 | err = ext4_access_path(handle, inode, path + depth); | ||
| 5255 | if (err) | ||
| 5256 | goto out; | ||
| 5257 | |||
| 5258 | path[depth].p_idx->ei_block -= shift; | ||
| 5259 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
| 5260 | if (err) | ||
| 5261 | goto out; | ||
| 5262 | |||
| 5263 | /* we are done if current index is not a starting index */ | ||
| 5264 | if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) | ||
| 5265 | break; | ||
| 5266 | |||
| 5267 | depth--; | ||
| 5268 | } | ||
| 5269 | |||
| 5270 | out: | ||
| 5271 | return err; | ||
| 5272 | } | ||
| 5273 | |||
| 5274 | /* | ||
| 5275 | * ext4_ext_shift_extents: | ||
| 5276 | * All the extents which lies in the range from start to the last allocated | ||
| 5277 | * block for the file are shifted downwards by shift blocks. | ||
| 5278 | * On success, 0 is returned, error otherwise. | ||
| 5279 | */ | ||
| 5280 | static int | ||
| 5281 | ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | ||
| 5282 | ext4_lblk_t start, ext4_lblk_t shift) | ||
| 5283 | { | ||
| 5284 | struct ext4_ext_path *path; | ||
| 5285 | int ret = 0, depth; | ||
| 5286 | struct ext4_extent *extent; | ||
| 5287 | ext4_lblk_t stop_block, current_block; | ||
| 5288 | ext4_lblk_t ex_start, ex_end; | ||
| 5289 | |||
| 5290 | /* Let path point to the last extent */ | ||
| 5291 | path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); | ||
| 5292 | if (IS_ERR(path)) | ||
| 5293 | return PTR_ERR(path); | ||
| 5294 | |||
| 5295 | depth = path->p_depth; | ||
| 5296 | extent = path[depth].p_ext; | ||
| 5297 | if (!extent) { | ||
| 5298 | ext4_ext_drop_refs(path); | ||
| 5299 | kfree(path); | ||
| 5300 | return ret; | ||
| 5301 | } | ||
| 5302 | |||
| 5303 | stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
| 5304 | ext4_ext_drop_refs(path); | ||
| 5305 | kfree(path); | ||
| 5306 | |||
| 5307 | /* Nothing to shift, if hole is at the end of file */ | ||
| 5308 | if (start >= stop_block) | ||
| 5309 | return ret; | ||
| 5310 | |||
| 5311 | /* | ||
| 5312 | * Don't start shifting extents until we make sure the hole is big | ||
| 5313 | * enough to accomodate the shift. | ||
| 5314 | */ | ||
| 5315 | path = ext4_ext_find_extent(inode, start - 1, NULL, 0); | ||
| 5316 | depth = path->p_depth; | ||
| 5317 | extent = path[depth].p_ext; | ||
| 5318 | ex_start = extent->ee_block; | ||
| 5319 | ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
| 5320 | ext4_ext_drop_refs(path); | ||
| 5321 | kfree(path); | ||
| 5322 | |||
| 5323 | if ((start == ex_start && shift > ex_start) || | ||
| 5324 | (shift > start - ex_end)) | ||
| 5325 | return -EINVAL; | ||
| 5326 | |||
| 5327 | /* Its safe to start updating extents */ | ||
| 5328 | while (start < stop_block) { | ||
| 5329 | path = ext4_ext_find_extent(inode, start, NULL, 0); | ||
| 5330 | if (IS_ERR(path)) | ||
| 5331 | return PTR_ERR(path); | ||
| 5332 | depth = path->p_depth; | ||
| 5333 | extent = path[depth].p_ext; | ||
| 5334 | current_block = extent->ee_block; | ||
| 5335 | if (start > current_block) { | ||
| 5336 | /* Hole, move to the next extent */ | ||
| 5337 | ret = mext_next_extent(inode, path, &extent); | ||
| 5338 | if (ret != 0) { | ||
| 5339 | ext4_ext_drop_refs(path); | ||
| 5340 | kfree(path); | ||
| 5341 | if (ret == 1) | ||
| 5342 | ret = 0; | ||
| 5343 | break; | ||
| 5344 | } | ||
| 5345 | } | ||
| 5346 | ret = ext4_ext_shift_path_extents(path, shift, inode, | ||
| 5347 | handle, &start); | ||
| 5348 | ext4_ext_drop_refs(path); | ||
| 5349 | kfree(path); | ||
| 5350 | if (ret) | ||
| 5351 | break; | ||
| 5352 | } | ||
| 5353 | |||
| 5354 | return ret; | ||
| 5355 | } | ||
| 5356 | |||
| 5357 | /* | ||
| 5358 | * ext4_collapse_range: | ||
| 5359 | * This implements the fallocate's collapse range functionality for ext4 | ||
| 5360 | * Returns: 0 and non-zero on error. | ||
| 5361 | */ | ||
| 5362 | int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | ||
| 5363 | { | ||
| 5364 | struct super_block *sb = inode->i_sb; | ||
| 5365 | ext4_lblk_t punch_start, punch_stop; | ||
| 5366 | handle_t *handle; | ||
| 5367 | unsigned int credits; | ||
| 5368 | loff_t new_size; | ||
| 5369 | int ret; | ||
| 5370 | |||
| 5371 | BUG_ON(offset + len > i_size_read(inode)); | ||
| 5372 | |||
| 5373 | /* Collapse range works only on fs block size aligned offsets. */ | ||
| 5374 | if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || | ||
| 5375 | len & (EXT4_BLOCK_SIZE(sb) - 1)) | ||
| 5376 | return -EINVAL; | ||
| 5377 | |||
| 5378 | if (!S_ISREG(inode->i_mode)) | ||
| 5379 | return -EOPNOTSUPP; | ||
| 5380 | |||
| 5381 | trace_ext4_collapse_range(inode, offset, len); | ||
| 5382 | |||
| 5383 | punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); | ||
| 5384 | punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
| 5385 | |||
| 5386 | /* Write out all dirty pages */ | ||
| 5387 | ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); | ||
| 5388 | if (ret) | ||
| 5389 | return ret; | ||
| 5390 | |||
| 5391 | /* Take mutex lock */ | ||
| 5392 | mutex_lock(&inode->i_mutex); | ||
| 5393 | |||
| 5394 | /* It's not possible punch hole on append only file */ | ||
| 5395 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
| 5396 | ret = -EPERM; | ||
| 5397 | goto out_mutex; | ||
| 5398 | } | ||
| 5399 | |||
| 5400 | if (IS_SWAPFILE(inode)) { | ||
| 5401 | ret = -ETXTBSY; | ||
| 5402 | goto out_mutex; | ||
| 5403 | } | ||
| 5404 | |||
| 5405 | /* Currently just for extent based files */ | ||
| 5406 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
| 5407 | ret = -EOPNOTSUPP; | ||
| 5408 | goto out_mutex; | ||
| 5409 | } | ||
| 5410 | |||
| 5411 | truncate_pagecache_range(inode, offset, -1); | ||
| 5412 | |||
| 5413 | /* Wait for existing dio to complete */ | ||
| 5414 | ext4_inode_block_unlocked_dio(inode); | ||
| 5415 | inode_dio_wait(inode); | ||
| 5416 | |||
| 5417 | credits = ext4_writepage_trans_blocks(inode); | ||
| 5418 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | ||
| 5419 | if (IS_ERR(handle)) { | ||
| 5420 | ret = PTR_ERR(handle); | ||
| 5421 | goto out_dio; | ||
| 5422 | } | ||
| 5423 | |||
| 5424 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 5425 | ext4_discard_preallocations(inode); | ||
| 5426 | |||
| 5427 | ret = ext4_es_remove_extent(inode, punch_start, | ||
| 5428 | EXT_MAX_BLOCKS - punch_start - 1); | ||
| 5429 | if (ret) { | ||
| 5430 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 5431 | goto out_stop; | ||
| 5432 | } | ||
| 5433 | |||
| 5434 | ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); | ||
| 5435 | if (ret) { | ||
| 5436 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 5437 | goto out_stop; | ||
| 5438 | } | ||
| 5439 | |||
| 5440 | ret = ext4_ext_shift_extents(inode, handle, punch_stop, | ||
| 5441 | punch_stop - punch_start); | ||
| 5442 | if (ret) { | ||
| 5443 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 5444 | goto out_stop; | ||
| 5445 | } | ||
| 5446 | |||
| 5447 | new_size = i_size_read(inode) - len; | ||
| 5448 | truncate_setsize(inode, new_size); | ||
| 5449 | EXT4_I(inode)->i_disksize = new_size; | ||
| 5450 | |||
| 5451 | ext4_discard_preallocations(inode); | ||
| 5452 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 5453 | if (IS_SYNC(inode)) | ||
| 5454 | ext4_handle_sync(handle); | ||
| 5455 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
| 5456 | ext4_mark_inode_dirty(handle, inode); | ||
| 5457 | |||
| 5458 | out_stop: | ||
| 5459 | ext4_journal_stop(handle); | ||
| 5460 | out_dio: | ||
| 5461 | ext4_inode_resume_unlocked_dio(inode); | ||
| 5462 | out_mutex: | ||
| 5463 | mutex_unlock(&inode->i_mutex); | ||
| 5464 | return ret; | ||
| 5465 | } | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 3981ff783950..0a014a7194b2 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
| @@ -184,7 +184,7 @@ static void ext4_es_print_tree(struct inode *inode) | |||
| 184 | while (node) { | 184 | while (node) { |
| 185 | struct extent_status *es; | 185 | struct extent_status *es; |
| 186 | es = rb_entry(node, struct extent_status, rb_node); | 186 | es = rb_entry(node, struct extent_status, rb_node); |
| 187 | printk(KERN_DEBUG " [%u/%u) %llu %llx", | 187 | printk(KERN_DEBUG " [%u/%u) %llu %x", |
| 188 | es->es_lblk, es->es_len, | 188 | es->es_lblk, es->es_len, |
| 189 | ext4_es_pblock(es), ext4_es_status(es)); | 189 | ext4_es_pblock(es), ext4_es_status(es)); |
| 190 | node = rb_next(node); | 190 | node = rb_next(node); |
| @@ -445,8 +445,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
| 445 | pr_warn("ES insert assertion failed for " | 445 | pr_warn("ES insert assertion failed for " |
| 446 | "inode: %lu we can find an extent " | 446 | "inode: %lu we can find an extent " |
| 447 | "at block [%d/%d/%llu/%c], but we " | 447 | "at block [%d/%d/%llu/%c], but we " |
| 448 | "want to add an delayed/hole extent " | 448 | "want to add a delayed/hole extent " |
| 449 | "[%d/%d/%llu/%llx]\n", | 449 | "[%d/%d/%llu/%x]\n", |
| 450 | inode->i_ino, ee_block, ee_len, | 450 | inode->i_ino, ee_block, ee_len, |
| 451 | ee_start, ee_status ? 'u' : 'w', | 451 | ee_start, ee_status ? 'u' : 'w', |
| 452 | es->es_lblk, es->es_len, | 452 | es->es_lblk, es->es_len, |
| @@ -486,8 +486,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
| 486 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | 486 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { |
| 487 | pr_warn("ES insert assertion failed for inode: %lu " | 487 | pr_warn("ES insert assertion failed for inode: %lu " |
| 488 | "can't find an extent at block %d but we want " | 488 | "can't find an extent at block %d but we want " |
| 489 | "to add an written/unwritten extent " | 489 | "to add a written/unwritten extent " |
| 490 | "[%d/%d/%llu/%llx]\n", inode->i_ino, | 490 | "[%d/%d/%llu/%x]\n", inode->i_ino, |
| 491 | es->es_lblk, es->es_lblk, es->es_len, | 491 | es->es_lblk, es->es_lblk, es->es_len, |
| 492 | ext4_es_pblock(es), ext4_es_status(es)); | 492 | ext4_es_pblock(es), ext4_es_status(es)); |
| 493 | } | 493 | } |
| @@ -524,7 +524,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
| 524 | */ | 524 | */ |
| 525 | pr_warn("ES insert assertion failed for inode: %lu " | 525 | pr_warn("ES insert assertion failed for inode: %lu " |
| 526 | "We can find blocks but we want to add a " | 526 | "We can find blocks but we want to add a " |
| 527 | "delayed/hole extent [%d/%d/%llu/%llx]\n", | 527 | "delayed/hole extent [%d/%d/%llu/%x]\n", |
| 528 | inode->i_ino, es->es_lblk, es->es_len, | 528 | inode->i_ino, es->es_lblk, es->es_len, |
| 529 | ext4_es_pblock(es), ext4_es_status(es)); | 529 | ext4_es_pblock(es), ext4_es_status(es)); |
| 530 | return; | 530 | return; |
| @@ -554,7 +554,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
| 554 | if (ext4_es_is_written(es)) { | 554 | if (ext4_es_is_written(es)) { |
| 555 | pr_warn("ES insert assertion failed for inode: %lu " | 555 | pr_warn("ES insert assertion failed for inode: %lu " |
| 556 | "We can't find the block but we want to add " | 556 | "We can't find the block but we want to add " |
| 557 | "an written extent [%d/%d/%llu/%llx]\n", | 557 | "a written extent [%d/%d/%llu/%x]\n", |
| 558 | inode->i_ino, es->es_lblk, es->es_len, | 558 | inode->i_ino, es->es_lblk, es->es_len, |
| 559 | ext4_es_pblock(es), ext4_es_status(es)); | 559 | ext4_es_pblock(es), ext4_es_status(es)); |
| 560 | return; | 560 | return; |
| @@ -658,8 +658,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 658 | 658 | ||
| 659 | newes.es_lblk = lblk; | 659 | newes.es_lblk = lblk; |
| 660 | newes.es_len = len; | 660 | newes.es_len = len; |
| 661 | ext4_es_store_pblock(&newes, pblk); | 661 | ext4_es_store_pblock_status(&newes, pblk, status); |
| 662 | ext4_es_store_status(&newes, status); | ||
| 663 | trace_ext4_es_insert_extent(inode, &newes); | 662 | trace_ext4_es_insert_extent(inode, &newes); |
| 664 | 663 | ||
| 665 | ext4_es_insert_extent_check(inode, &newes); | 664 | ext4_es_insert_extent_check(inode, &newes); |
| @@ -699,8 +698,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 699 | 698 | ||
| 700 | newes.es_lblk = lblk; | 699 | newes.es_lblk = lblk; |
| 701 | newes.es_len = len; | 700 | newes.es_len = len; |
| 702 | ext4_es_store_pblock(&newes, pblk); | 701 | ext4_es_store_pblock_status(&newes, pblk, status); |
| 703 | ext4_es_store_status(&newes, status); | ||
| 704 | trace_ext4_es_cache_extent(inode, &newes); | 702 | trace_ext4_es_cache_extent(inode, &newes); |
| 705 | 703 | ||
| 706 | if (!len) | 704 | if (!len) |
| @@ -812,13 +810,13 @@ retry: | |||
| 812 | 810 | ||
| 813 | newes.es_lblk = end + 1; | 811 | newes.es_lblk = end + 1; |
| 814 | newes.es_len = len2; | 812 | newes.es_len = len2; |
| 813 | block = 0x7FDEADBEEF; | ||
| 815 | if (ext4_es_is_written(&orig_es) || | 814 | if (ext4_es_is_written(&orig_es) || |
| 816 | ext4_es_is_unwritten(&orig_es)) { | 815 | ext4_es_is_unwritten(&orig_es)) |
| 817 | block = ext4_es_pblock(&orig_es) + | 816 | block = ext4_es_pblock(&orig_es) + |
| 818 | orig_es.es_len - len2; | 817 | orig_es.es_len - len2; |
| 819 | ext4_es_store_pblock(&newes, block); | 818 | ext4_es_store_pblock_status(&newes, block, |
| 820 | } | 819 | ext4_es_status(&orig_es)); |
| 821 | ext4_es_store_status(&newes, ext4_es_status(&orig_es)); | ||
| 822 | err = __es_insert_extent(inode, &newes); | 820 | err = __es_insert_extent(inode, &newes); |
| 823 | if (err) { | 821 | if (err) { |
| 824 | es->es_lblk = orig_es.es_lblk; | 822 | es->es_lblk = orig_es.es_lblk; |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 167f4ab8ecc3..f1b62a419920 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
| @@ -129,6 +129,15 @@ static inline void ext4_es_store_status(struct extent_status *es, | |||
| 129 | (es->es_pblk & ~ES_MASK)); | 129 | (es->es_pblk & ~ES_MASK)); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | static inline void ext4_es_store_pblock_status(struct extent_status *es, | ||
| 133 | ext4_fsblk_t pb, | ||
| 134 | unsigned int status) | ||
| 135 | { | ||
| 136 | es->es_pblk = (((ext4_fsblk_t) | ||
| 137 | (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | | ||
| 138 | (pb & ~ES_MASK)); | ||
| 139 | } | ||
| 140 | |||
| 132 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 141 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
| 133 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 142 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
| 134 | extern void ext4_es_lru_add(struct inode *inode); | 143 | extern void ext4_es_lru_add(struct inode *inode); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1a5073959f32..6db7f7db7777 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -153,7 +153,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 153 | ssize_t err; | 153 | ssize_t err; |
| 154 | 154 | ||
| 155 | err = generic_write_sync(file, iocb->ki_pos - ret, ret); | 155 | err = generic_write_sync(file, iocb->ki_pos - ret, ret); |
| 156 | if (err < 0 && ret > 0) | 156 | if (err < 0) |
| 157 | ret = err; | 157 | ret = err; |
| 158 | } | 158 | } |
| 159 | blk_finish_plug(&plug); | 159 | blk_finish_plug(&plug); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6e39895a91b8..5b0d2c7d5408 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
| 39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
| 40 | #include <linux/aio.h> | 40 | #include <linux/aio.h> |
| 41 | #include <linux/bitops.h> | ||
| 41 | 42 | ||
| 42 | #include "ext4_jbd2.h" | 43 | #include "ext4_jbd2.h" |
| 43 | #include "xattr.h" | 44 | #include "xattr.h" |
| @@ -214,7 +215,7 @@ void ext4_evict_inode(struct inode *inode) | |||
| 214 | jbd2_complete_transaction(journal, commit_tid); | 215 | jbd2_complete_transaction(journal, commit_tid); |
| 215 | filemap_write_and_wait(&inode->i_data); | 216 | filemap_write_and_wait(&inode->i_data); |
| 216 | } | 217 | } |
| 217 | truncate_inode_pages(&inode->i_data, 0); | 218 | truncate_inode_pages_final(&inode->i_data); |
| 218 | 219 | ||
| 219 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 220 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
| 220 | goto no_delete; | 221 | goto no_delete; |
| @@ -225,7 +226,7 @@ void ext4_evict_inode(struct inode *inode) | |||
| 225 | 226 | ||
| 226 | if (ext4_should_order_data(inode)) | 227 | if (ext4_should_order_data(inode)) |
| 227 | ext4_begin_ordered_truncate(inode, 0); | 228 | ext4_begin_ordered_truncate(inode, 0); |
| 228 | truncate_inode_pages(&inode->i_data, 0); | 229 | truncate_inode_pages_final(&inode->i_data); |
| 229 | 230 | ||
| 230 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 231 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
| 231 | if (is_bad_inode(inode)) | 232 | if (is_bad_inode(inode)) |
| @@ -503,6 +504,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 503 | { | 504 | { |
| 504 | struct extent_status es; | 505 | struct extent_status es; |
| 505 | int retval; | 506 | int retval; |
| 507 | int ret = 0; | ||
| 506 | #ifdef ES_AGGRESSIVE_TEST | 508 | #ifdef ES_AGGRESSIVE_TEST |
| 507 | struct ext4_map_blocks orig_map; | 509 | struct ext4_map_blocks orig_map; |
| 508 | 510 | ||
| @@ -514,6 +516,12 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 514 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | 516 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
| 515 | (unsigned long) map->m_lblk); | 517 | (unsigned long) map->m_lblk); |
| 516 | 518 | ||
| 519 | /* | ||
| 520 | * ext4_map_blocks returns an int, and m_len is an unsigned int | ||
| 521 | */ | ||
| 522 | if (unlikely(map->m_len > INT_MAX)) | ||
| 523 | map->m_len = INT_MAX; | ||
| 524 | |||
| 517 | /* Lookup extent status tree firstly */ | 525 | /* Lookup extent status tree firstly */ |
| 518 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 526 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
| 519 | ext4_es_lru_add(inode); | 527 | ext4_es_lru_add(inode); |
| @@ -552,7 +560,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 552 | EXT4_GET_BLOCKS_KEEP_SIZE); | 560 | EXT4_GET_BLOCKS_KEEP_SIZE); |
| 553 | } | 561 | } |
| 554 | if (retval > 0) { | 562 | if (retval > 0) { |
| 555 | int ret; | ||
| 556 | unsigned int status; | 563 | unsigned int status; |
| 557 | 564 | ||
| 558 | if (unlikely(retval != map->m_len)) { | 565 | if (unlikely(retval != map->m_len)) { |
| @@ -579,7 +586,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 579 | 586 | ||
| 580 | found: | 587 | found: |
| 581 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 588 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
| 582 | int ret = check_block_validity(inode, map); | 589 | ret = check_block_validity(inode, map); |
| 583 | if (ret != 0) | 590 | if (ret != 0) |
| 584 | return ret; | 591 | return ret; |
| 585 | } | 592 | } |
| @@ -596,7 +603,13 @@ found: | |||
| 596 | * with buffer head unmapped. | 603 | * with buffer head unmapped. |
| 597 | */ | 604 | */ |
| 598 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 605 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
| 599 | return retval; | 606 | /* |
| 607 | * If we need to convert extent to unwritten | ||
| 608 | * we continue and do the actual work in | ||
| 609 | * ext4_ext_map_blocks() | ||
| 610 | */ | ||
| 611 | if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) | ||
| 612 | return retval; | ||
| 600 | 613 | ||
| 601 | /* | 614 | /* |
| 602 | * Here we clear m_flags because after allocating an new extent, | 615 | * Here we clear m_flags because after allocating an new extent, |
| @@ -652,7 +665,6 @@ found: | |||
| 652 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 665 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
| 653 | 666 | ||
| 654 | if (retval > 0) { | 667 | if (retval > 0) { |
| 655 | int ret; | ||
| 656 | unsigned int status; | 668 | unsigned int status; |
| 657 | 669 | ||
| 658 | if (unlikely(retval != map->m_len)) { | 670 | if (unlikely(retval != map->m_len)) { |
| @@ -687,7 +699,7 @@ found: | |||
| 687 | has_zeroout: | 699 | has_zeroout: |
| 688 | up_write((&EXT4_I(inode)->i_data_sem)); | 700 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 689 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 701 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
| 690 | int ret = check_block_validity(inode, map); | 702 | ret = check_block_validity(inode, map); |
| 691 | if (ret != 0) | 703 | if (ret != 0) |
| 692 | return ret; | 704 | return ret; |
| 693 | } | 705 | } |
| @@ -3312,33 +3324,13 @@ void ext4_set_aops(struct inode *inode) | |||
| 3312 | } | 3324 | } |
| 3313 | 3325 | ||
| 3314 | /* | 3326 | /* |
| 3315 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | ||
| 3316 | * up to the end of the block which corresponds to `from'. | ||
| 3317 | * This required during truncate. We need to physically zero the tail end | ||
| 3318 | * of that block so it doesn't yield old data if the file is later grown. | ||
| 3319 | */ | ||
| 3320 | int ext4_block_truncate_page(handle_t *handle, | ||
| 3321 | struct address_space *mapping, loff_t from) | ||
| 3322 | { | ||
| 3323 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
| 3324 | unsigned length; | ||
| 3325 | unsigned blocksize; | ||
| 3326 | struct inode *inode = mapping->host; | ||
| 3327 | |||
| 3328 | blocksize = inode->i_sb->s_blocksize; | ||
| 3329 | length = blocksize - (offset & (blocksize - 1)); | ||
| 3330 | |||
| 3331 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
| 3332 | } | ||
| 3333 | |||
| 3334 | /* | ||
| 3335 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | 3327 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
| 3336 | * starting from file offset 'from'. The range to be zero'd must | 3328 | * starting from file offset 'from'. The range to be zero'd must |
| 3337 | * be contained with in one block. If the specified range exceeds | 3329 | * be contained with in one block. If the specified range exceeds |
| 3338 | * the end of the block it will be shortened to end of the block | 3330 | * the end of the block it will be shortened to end of the block |
| 3339 | * that cooresponds to 'from' | 3331 | * that cooresponds to 'from' |
| 3340 | */ | 3332 | */ |
| 3341 | int ext4_block_zero_page_range(handle_t *handle, | 3333 | static int ext4_block_zero_page_range(handle_t *handle, |
| 3342 | struct address_space *mapping, loff_t from, loff_t length) | 3334 | struct address_space *mapping, loff_t from, loff_t length) |
| 3343 | { | 3335 | { |
| 3344 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3336 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
| @@ -3428,6 +3420,26 @@ unlock: | |||
| 3428 | return err; | 3420 | return err; |
| 3429 | } | 3421 | } |
| 3430 | 3422 | ||
| 3423 | /* | ||
| 3424 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | ||
| 3425 | * up to the end of the block which corresponds to `from'. | ||
| 3426 | * This required during truncate. We need to physically zero the tail end | ||
| 3427 | * of that block so it doesn't yield old data if the file is later grown. | ||
| 3428 | */ | ||
| 3429 | int ext4_block_truncate_page(handle_t *handle, | ||
| 3430 | struct address_space *mapping, loff_t from) | ||
| 3431 | { | ||
| 3432 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
| 3433 | unsigned length; | ||
| 3434 | unsigned blocksize; | ||
| 3435 | struct inode *inode = mapping->host; | ||
| 3436 | |||
| 3437 | blocksize = inode->i_sb->s_blocksize; | ||
| 3438 | length = blocksize - (offset & (blocksize - 1)); | ||
| 3439 | |||
| 3440 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
| 3441 | } | ||
| 3442 | |||
| 3431 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 3443 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
| 3432 | loff_t lstart, loff_t length) | 3444 | loff_t lstart, loff_t length) |
| 3433 | { | 3445 | { |
| @@ -3501,7 +3513,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
| 3501 | if (!S_ISREG(inode->i_mode)) | 3513 | if (!S_ISREG(inode->i_mode)) |
| 3502 | return -EOPNOTSUPP; | 3514 | return -EOPNOTSUPP; |
| 3503 | 3515 | ||
| 3504 | trace_ext4_punch_hole(inode, offset, length); | 3516 | trace_ext4_punch_hole(inode, offset, length, 0); |
| 3505 | 3517 | ||
| 3506 | /* | 3518 | /* |
| 3507 | * Write out all dirty pages to avoid race conditions | 3519 | * Write out all dirty pages to avoid race conditions |
| @@ -3608,6 +3620,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
| 3608 | up_write(&EXT4_I(inode)->i_data_sem); | 3620 | up_write(&EXT4_I(inode)->i_data_sem); |
| 3609 | if (IS_SYNC(inode)) | 3621 | if (IS_SYNC(inode)) |
| 3610 | ext4_handle_sync(handle); | 3622 | ext4_handle_sync(handle); |
| 3623 | |||
| 3624 | /* Now release the pages again to reduce race window */ | ||
| 3625 | if (last_block_offset > first_block_offset) | ||
| 3626 | truncate_pagecache_range(inode, first_block_offset, | ||
| 3627 | last_block_offset); | ||
| 3628 | |||
| 3611 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3629 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
| 3612 | ext4_mark_inode_dirty(handle, inode); | 3630 | ext4_mark_inode_dirty(handle, inode); |
| 3613 | out_stop: | 3631 | out_stop: |
| @@ -3681,7 +3699,7 @@ void ext4_truncate(struct inode *inode) | |||
| 3681 | 3699 | ||
| 3682 | /* | 3700 | /* |
| 3683 | * There is a possibility that we're either freeing the inode | 3701 | * There is a possibility that we're either freeing the inode |
| 3684 | * or it completely new indode. In those cases we might not | 3702 | * or it's a completely new inode. In those cases we might not |
| 3685 | * have i_mutex locked because it's not necessary. | 3703 | * have i_mutex locked because it's not necessary. |
| 3686 | */ | 3704 | */ |
| 3687 | if (!(inode->i_state & (I_NEW|I_FREEING))) | 3705 | if (!(inode->i_state & (I_NEW|I_FREEING))) |
| @@ -3921,18 +3939,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) | |||
| 3921 | void ext4_set_inode_flags(struct inode *inode) | 3939 | void ext4_set_inode_flags(struct inode *inode) |
| 3922 | { | 3940 | { |
| 3923 | unsigned int flags = EXT4_I(inode)->i_flags; | 3941 | unsigned int flags = EXT4_I(inode)->i_flags; |
| 3942 | unsigned int new_fl = 0; | ||
| 3924 | 3943 | ||
| 3925 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | ||
| 3926 | if (flags & EXT4_SYNC_FL) | 3944 | if (flags & EXT4_SYNC_FL) |
| 3927 | inode->i_flags |= S_SYNC; | 3945 | new_fl |= S_SYNC; |
| 3928 | if (flags & EXT4_APPEND_FL) | 3946 | if (flags & EXT4_APPEND_FL) |
| 3929 | inode->i_flags |= S_APPEND; | 3947 | new_fl |= S_APPEND; |
| 3930 | if (flags & EXT4_IMMUTABLE_FL) | 3948 | if (flags & EXT4_IMMUTABLE_FL) |
| 3931 | inode->i_flags |= S_IMMUTABLE; | 3949 | new_fl |= S_IMMUTABLE; |
| 3932 | if (flags & EXT4_NOATIME_FL) | 3950 | if (flags & EXT4_NOATIME_FL) |
| 3933 | inode->i_flags |= S_NOATIME; | 3951 | new_fl |= S_NOATIME; |
| 3934 | if (flags & EXT4_DIRSYNC_FL) | 3952 | if (flags & EXT4_DIRSYNC_FL) |
| 3935 | inode->i_flags |= S_DIRSYNC; | 3953 | new_fl |= S_DIRSYNC; |
| 3954 | inode_set_flags(inode, new_fl, | ||
| 3955 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | ||
| 3936 | } | 3956 | } |
| 3937 | 3957 | ||
| 3938 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ | 3958 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ |
| @@ -4151,11 +4171,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 4151 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); | 4171 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); |
| 4152 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); | 4172 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); |
| 4153 | 4173 | ||
| 4154 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); | 4174 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { |
| 4155 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4175 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); |
| 4156 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 4176 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
| 4157 | inode->i_version |= | 4177 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
| 4158 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | 4178 | inode->i_version |= |
| 4179 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | ||
| 4180 | } | ||
| 4159 | } | 4181 | } |
| 4160 | 4182 | ||
| 4161 | ret = 0; | 4183 | ret = 0; |
| @@ -4325,8 +4347,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 4325 | goto out_brelse; | 4347 | goto out_brelse; |
| 4326 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4348 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
| 4327 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); | 4349 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); |
| 4328 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4350 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) |
| 4329 | cpu_to_le32(EXT4_OS_HURD)) | ||
| 4330 | raw_inode->i_file_acl_high = | 4351 | raw_inode->i_file_acl_high = |
| 4331 | cpu_to_le16(ei->i_file_acl >> 32); | 4352 | cpu_to_le16(ei->i_file_acl >> 32); |
| 4332 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | 4353 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); |
| @@ -4371,12 +4392,15 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 4371 | raw_inode->i_block[block] = ei->i_data[block]; | 4392 | raw_inode->i_block[block] = ei->i_data[block]; |
| 4372 | } | 4393 | } |
| 4373 | 4394 | ||
| 4374 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); | 4395 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { |
| 4375 | if (ei->i_extra_isize) { | 4396 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
| 4376 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 4397 | if (ei->i_extra_isize) { |
| 4377 | raw_inode->i_version_hi = | 4398 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
| 4378 | cpu_to_le32(inode->i_version >> 32); | 4399 | raw_inode->i_version_hi = |
| 4379 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4400 | cpu_to_le32(inode->i_version >> 32); |
| 4401 | raw_inode->i_extra_isize = | ||
| 4402 | cpu_to_le16(ei->i_extra_isize); | ||
| 4403 | } | ||
| 4380 | } | 4404 | } |
| 4381 | 4405 | ||
| 4382 | ext4_inode_csum_set(inode, raw_inode, ei); | 4406 | ext4_inode_csum_set(inode, raw_inode, ei); |
| @@ -4443,7 +4467,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 4443 | return -EIO; | 4467 | return -EIO; |
| 4444 | } | 4468 | } |
| 4445 | 4469 | ||
| 4446 | if (wbc->sync_mode != WB_SYNC_ALL) | 4470 | /* |
| 4471 | * No need to force transaction in WB_SYNC_NONE mode. Also | ||
| 4472 | * ext4_sync_fs() will force the commit after everything is | ||
| 4473 | * written. | ||
| 4474 | */ | ||
| 4475 | if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) | ||
| 4447 | return 0; | 4476 | return 0; |
| 4448 | 4477 | ||
| 4449 | err = ext4_force_commit(inode->i_sb); | 4478 | err = ext4_force_commit(inode->i_sb); |
| @@ -4453,7 +4482,11 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 4453 | err = __ext4_get_inode_loc(inode, &iloc, 0); | 4482 | err = __ext4_get_inode_loc(inode, &iloc, 0); |
| 4454 | if (err) | 4483 | if (err) |
| 4455 | return err; | 4484 | return err; |
| 4456 | if (wbc->sync_mode == WB_SYNC_ALL) | 4485 | /* |
| 4486 | * sync(2) will flush the whole buffer cache. No need to do | ||
| 4487 | * it here separately for each inode. | ||
| 4488 | */ | ||
| 4489 | if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) | ||
| 4457 | sync_dirty_buffer(iloc.bh); | 4490 | sync_dirty_buffer(iloc.bh); |
| 4458 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 4491 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
| 4459 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, | 4492 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a2a837f00407..0f2252ec274d 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
| @@ -104,21 +104,15 @@ static long swap_inode_boot_loader(struct super_block *sb, | |||
| 104 | struct ext4_inode_info *ei_bl; | 104 | struct ext4_inode_info *ei_bl; |
| 105 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 105 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 106 | 106 | ||
| 107 | if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) { | 107 | if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) |
| 108 | err = -EINVAL; | 108 | return -EINVAL; |
| 109 | goto swap_boot_out; | ||
| 110 | } | ||
| 111 | 109 | ||
| 112 | if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) { | 110 | if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) |
| 113 | err = -EPERM; | 111 | return -EPERM; |
| 114 | goto swap_boot_out; | ||
| 115 | } | ||
| 116 | 112 | ||
| 117 | inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); | 113 | inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); |
| 118 | if (IS_ERR(inode_bl)) { | 114 | if (IS_ERR(inode_bl)) |
| 119 | err = PTR_ERR(inode_bl); | 115 | return PTR_ERR(inode_bl); |
| 120 | goto swap_boot_out; | ||
| 121 | } | ||
| 122 | ei_bl = EXT4_I(inode_bl); | 116 | ei_bl = EXT4_I(inode_bl); |
| 123 | 117 | ||
| 124 | filemap_flush(inode->i_mapping); | 118 | filemap_flush(inode->i_mapping); |
| @@ -193,20 +187,14 @@ static long swap_inode_boot_loader(struct super_block *sb, | |||
| 193 | ext4_mark_inode_dirty(handle, inode); | 187 | ext4_mark_inode_dirty(handle, inode); |
| 194 | } | 188 | } |
| 195 | } | 189 | } |
| 196 | |||
| 197 | ext4_journal_stop(handle); | 190 | ext4_journal_stop(handle); |
| 198 | |||
| 199 | ext4_double_up_write_data_sem(inode, inode_bl); | 191 | ext4_double_up_write_data_sem(inode, inode_bl); |
| 200 | 192 | ||
| 201 | journal_err_out: | 193 | journal_err_out: |
| 202 | ext4_inode_resume_unlocked_dio(inode); | 194 | ext4_inode_resume_unlocked_dio(inode); |
| 203 | ext4_inode_resume_unlocked_dio(inode_bl); | 195 | ext4_inode_resume_unlocked_dio(inode_bl); |
| 204 | |||
| 205 | unlock_two_nondirectories(inode, inode_bl); | 196 | unlock_two_nondirectories(inode, inode_bl); |
| 206 | |||
| 207 | iput(inode_bl); | 197 | iput(inode_bl); |
| 208 | |||
| 209 | swap_boot_out: | ||
| 210 | return err; | 198 | return err; |
| 211 | } | 199 | } |
| 212 | 200 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 04a5c7504be9..a888cac76e9c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -1808,6 +1808,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
| 1808 | ext4_lock_group(ac->ac_sb, group); | 1808 | ext4_lock_group(ac->ac_sb, group); |
| 1809 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, | 1809 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, |
| 1810 | ac->ac_g_ex.fe_len, &ex); | 1810 | ac->ac_g_ex.fe_len, &ex); |
| 1811 | ex.fe_logical = 0xDEADFA11; /* debug value */ | ||
| 1811 | 1812 | ||
| 1812 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { | 1813 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { |
| 1813 | ext4_fsblk_t start; | 1814 | ext4_fsblk_t start; |
| @@ -1936,7 +1937,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
| 1936 | */ | 1937 | */ |
| 1937 | break; | 1938 | break; |
| 1938 | } | 1939 | } |
| 1939 | 1940 | ex.fe_logical = 0xDEADC0DE; /* debug value */ | |
| 1940 | ext4_mb_measure_extent(ac, &ex, e4b); | 1941 | ext4_mb_measure_extent(ac, &ex, e4b); |
| 1941 | 1942 | ||
| 1942 | i += ex.fe_len; | 1943 | i += ex.fe_len; |
| @@ -1977,6 +1978,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
| 1977 | max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); | 1978 | max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); |
| 1978 | if (max >= sbi->s_stripe) { | 1979 | if (max >= sbi->s_stripe) { |
| 1979 | ac->ac_found++; | 1980 | ac->ac_found++; |
| 1981 | ex.fe_logical = 0xDEADF00D; /* debug value */ | ||
| 1980 | ac->ac_b_ex = ex; | 1982 | ac->ac_b_ex = ex; |
| 1981 | ext4_mb_use_best_found(ac, e4b); | 1983 | ext4_mb_use_best_found(ac, e4b); |
| 1982 | break; | 1984 | break; |
| @@ -4006,8 +4008,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
| 4006 | (unsigned long)ac->ac_b_ex.fe_len, | 4008 | (unsigned long)ac->ac_b_ex.fe_len, |
| 4007 | (unsigned long)ac->ac_b_ex.fe_logical, | 4009 | (unsigned long)ac->ac_b_ex.fe_logical, |
| 4008 | (int)ac->ac_criteria); | 4010 | (int)ac->ac_criteria); |
| 4009 | ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", | 4011 | ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found); |
| 4010 | ac->ac_ex_scanned, ac->ac_found); | ||
| 4011 | ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); | 4012 | ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); |
| 4012 | ngroups = ext4_get_groups_count(sb); | 4013 | ngroups = ext4_get_groups_count(sb); |
| 4013 | for (i = 0; i < ngroups; i++) { | 4014 | for (i = 0; i < ngroups; i++) { |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 08481ee84cd5..d634e183b4d4 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
| @@ -48,7 +48,7 @@ extern ushort ext4_mballoc_debug; | |||
| 48 | } \ | 48 | } \ |
| 49 | } while (0) | 49 | } while (0) |
| 50 | #else | 50 | #else |
| 51 | #define mb_debug(n, fmt, a...) | 51 | #define mb_debug(n, fmt, a...) no_printk(fmt, ## a) |
| 52 | #endif | 52 | #endif |
| 53 | 53 | ||
| 54 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ | 54 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ |
| @@ -175,8 +175,6 @@ struct ext4_allocation_context { | |||
| 175 | /* copy of the best found extent taken before preallocation efforts */ | 175 | /* copy of the best found extent taken before preallocation efforts */ |
| 176 | struct ext4_free_extent ac_f_ex; | 176 | struct ext4_free_extent ac_f_ex; |
| 177 | 177 | ||
| 178 | /* number of iterations done. we have to track to limit searching */ | ||
| 179 | unsigned long ac_ex_scanned; | ||
| 180 | __u16 ac_groups_scanned; | 178 | __u16 ac_groups_scanned; |
| 181 | __u16 ac_found; | 179 | __u16 ac_found; |
| 182 | __u16 ac_tail; | 180 | __u16 ac_tail; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 773b503bd18c..58ee7dc87669 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
| @@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) | |||
| 76 | * ext4_ext_path structure refers to the last extent, or a negative error | 76 | * ext4_ext_path structure refers to the last extent, or a negative error |
| 77 | * value on failure. | 77 | * value on failure. |
| 78 | */ | 78 | */ |
| 79 | static int | 79 | int |
| 80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | 80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, |
| 81 | struct ext4_extent **extent) | 81 | struct ext4_extent **extent) |
| 82 | { | 82 | { |
| @@ -861,8 +861,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) | |||
| 861 | } | 861 | } |
| 862 | if (!buffer_mapped(bh)) { | 862 | if (!buffer_mapped(bh)) { |
| 863 | zero_user(page, block_start, blocksize); | 863 | zero_user(page, block_start, blocksize); |
| 864 | if (!err) | 864 | set_buffer_uptodate(bh); |
| 865 | set_buffer_uptodate(bh); | ||
| 866 | continue; | 865 | continue; |
| 867 | } | 866 | } |
| 868 | } | 867 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d050e043e884..1cb84f78909e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -3000,6 +3000,154 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, | |||
| 3000 | return ext4_get_first_inline_block(inode, parent_de, retval); | 3000 | return ext4_get_first_inline_block(inode, parent_de, retval); |
| 3001 | } | 3001 | } |
| 3002 | 3002 | ||
| 3003 | struct ext4_renament { | ||
| 3004 | struct inode *dir; | ||
| 3005 | struct dentry *dentry; | ||
| 3006 | struct inode *inode; | ||
| 3007 | bool is_dir; | ||
| 3008 | int dir_nlink_delta; | ||
| 3009 | |||
| 3010 | /* entry for "dentry" */ | ||
| 3011 | struct buffer_head *bh; | ||
| 3012 | struct ext4_dir_entry_2 *de; | ||
| 3013 | int inlined; | ||
| 3014 | |||
| 3015 | /* entry for ".." in inode if it's a directory */ | ||
| 3016 | struct buffer_head *dir_bh; | ||
| 3017 | struct ext4_dir_entry_2 *parent_de; | ||
| 3018 | int dir_inlined; | ||
| 3019 | }; | ||
| 3020 | |||
| 3021 | static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) | ||
| 3022 | { | ||
| 3023 | int retval; | ||
| 3024 | |||
| 3025 | ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode, | ||
| 3026 | &retval, &ent->parent_de, | ||
| 3027 | &ent->dir_inlined); | ||
| 3028 | if (!ent->dir_bh) | ||
| 3029 | return retval; | ||
| 3030 | if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino) | ||
| 3031 | return -EIO; | ||
| 3032 | BUFFER_TRACE(ent->dir_bh, "get_write_access"); | ||
| 3033 | return ext4_journal_get_write_access(handle, ent->dir_bh); | ||
| 3034 | } | ||
| 3035 | |||
| 3036 | static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, | ||
| 3037 | unsigned dir_ino) | ||
| 3038 | { | ||
| 3039 | int retval; | ||
| 3040 | |||
| 3041 | ent->parent_de->inode = cpu_to_le32(dir_ino); | ||
| 3042 | BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata"); | ||
| 3043 | if (!ent->dir_inlined) { | ||
| 3044 | if (is_dx(ent->inode)) { | ||
| 3045 | retval = ext4_handle_dirty_dx_node(handle, | ||
| 3046 | ent->inode, | ||
| 3047 | ent->dir_bh); | ||
| 3048 | } else { | ||
| 3049 | retval = ext4_handle_dirty_dirent_node(handle, | ||
| 3050 | ent->inode, | ||
| 3051 | ent->dir_bh); | ||
| 3052 | } | ||
| 3053 | } else { | ||
| 3054 | retval = ext4_mark_inode_dirty(handle, ent->inode); | ||
| 3055 | } | ||
| 3056 | if (retval) { | ||
| 3057 | ext4_std_error(ent->dir->i_sb, retval); | ||
| 3058 | return retval; | ||
| 3059 | } | ||
| 3060 | return 0; | ||
| 3061 | } | ||
| 3062 | |||
| 3063 | static int ext4_setent(handle_t *handle, struct ext4_renament *ent, | ||
| 3064 | unsigned ino, unsigned file_type) | ||
| 3065 | { | ||
| 3066 | int retval; | ||
| 3067 | |||
| 3068 | BUFFER_TRACE(ent->bh, "get write access"); | ||
| 3069 | retval = ext4_journal_get_write_access(handle, ent->bh); | ||
| 3070 | if (retval) | ||
| 3071 | return retval; | ||
| 3072 | ent->de->inode = cpu_to_le32(ino); | ||
| 3073 | if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb, | ||
| 3074 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | ||
| 3075 | ent->de->file_type = file_type; | ||
| 3076 | ent->dir->i_version++; | ||
| 3077 | ent->dir->i_ctime = ent->dir->i_mtime = | ||
| 3078 | ext4_current_time(ent->dir); | ||
| 3079 | ext4_mark_inode_dirty(handle, ent->dir); | ||
| 3080 | BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); | ||
| 3081 | if (!ent->inlined) { | ||
| 3082 | retval = ext4_handle_dirty_dirent_node(handle, | ||
| 3083 | ent->dir, ent->bh); | ||
| 3084 | if (unlikely(retval)) { | ||
| 3085 | ext4_std_error(ent->dir->i_sb, retval); | ||
| 3086 | return retval; | ||
| 3087 | } | ||
| 3088 | } | ||
| 3089 | brelse(ent->bh); | ||
| 3090 | ent->bh = NULL; | ||
| 3091 | |||
| 3092 | return 0; | ||
| 3093 | } | ||
| 3094 | |||
| 3095 | static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, | ||
| 3096 | const struct qstr *d_name) | ||
| 3097 | { | ||
| 3098 | int retval = -ENOENT; | ||
| 3099 | struct buffer_head *bh; | ||
| 3100 | struct ext4_dir_entry_2 *de; | ||
| 3101 | |||
| 3102 | bh = ext4_find_entry(dir, d_name, &de, NULL); | ||
| 3103 | if (bh) { | ||
| 3104 | retval = ext4_delete_entry(handle, dir, de, bh); | ||
| 3105 | brelse(bh); | ||
| 3106 | } | ||
| 3107 | return retval; | ||
| 3108 | } | ||
| 3109 | |||
| 3110 | static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) | ||
| 3111 | { | ||
| 3112 | int retval; | ||
| 3113 | /* | ||
| 3114 | * ent->de could have moved from under us during htree split, so make | ||
| 3115 | * sure that we are deleting the right entry. We might also be pointing | ||
| 3116 | * to a stale entry in the unused part of ent->bh so just checking inum | ||
| 3117 | * and the name isn't enough. | ||
| 3118 | */ | ||
| 3119 | if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || | ||
| 3120 | ent->de->name_len != ent->dentry->d_name.len || | ||
| 3121 | strncmp(ent->de->name, ent->dentry->d_name.name, | ||
| 3122 | ent->de->name_len)) { | ||
| 3123 | retval = ext4_find_delete_entry(handle, ent->dir, | ||
| 3124 | &ent->dentry->d_name); | ||
| 3125 | } else { | ||
| 3126 | retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh); | ||
| 3127 | if (retval == -ENOENT) { | ||
| 3128 | retval = ext4_find_delete_entry(handle, ent->dir, | ||
| 3129 | &ent->dentry->d_name); | ||
| 3130 | } | ||
| 3131 | } | ||
| 3132 | |||
| 3133 | if (retval) { | ||
| 3134 | ext4_warning(ent->dir->i_sb, | ||
| 3135 | "Deleting old file (%lu), %d, error=%d", | ||
| 3136 | ent->dir->i_ino, ent->dir->i_nlink, retval); | ||
| 3137 | } | ||
| 3138 | } | ||
| 3139 | |||
| 3140 | static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | ||
| 3141 | { | ||
| 3142 | if (ent->dir_nlink_delta) { | ||
| 3143 | if (ent->dir_nlink_delta == -1) | ||
| 3144 | ext4_dec_count(handle, ent->dir); | ||
| 3145 | else | ||
| 3146 | ext4_inc_count(handle, ent->dir); | ||
| 3147 | ext4_mark_inode_dirty(handle, ent->dir); | ||
| 3148 | } | ||
| 3149 | } | ||
| 3150 | |||
| 3003 | /* | 3151 | /* |
| 3004 | * Anybody can rename anything with this: the permission checks are left to the | 3152 | * Anybody can rename anything with this: the permission checks are left to the |
| 3005 | * higher-level routines. | 3153 | * higher-level routines. |
| @@ -3012,198 +3160,267 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3012 | struct inode *new_dir, struct dentry *new_dentry) | 3160 | struct inode *new_dir, struct dentry *new_dentry) |
| 3013 | { | 3161 | { |
| 3014 | handle_t *handle = NULL; | 3162 | handle_t *handle = NULL; |
| 3015 | struct inode *old_inode, *new_inode; | 3163 | struct ext4_renament old = { |
| 3016 | struct buffer_head *old_bh, *new_bh, *dir_bh; | 3164 | .dir = old_dir, |
| 3017 | struct ext4_dir_entry_2 *old_de, *new_de; | 3165 | .dentry = old_dentry, |
| 3166 | .inode = old_dentry->d_inode, | ||
| 3167 | }; | ||
| 3168 | struct ext4_renament new = { | ||
| 3169 | .dir = new_dir, | ||
| 3170 | .dentry = new_dentry, | ||
| 3171 | .inode = new_dentry->d_inode, | ||
| 3172 | }; | ||
| 3018 | int retval; | 3173 | int retval; |
| 3019 | int inlined = 0, new_inlined = 0; | ||
| 3020 | struct ext4_dir_entry_2 *parent_de; | ||
| 3021 | 3174 | ||
| 3022 | dquot_initialize(old_dir); | 3175 | dquot_initialize(old.dir); |
| 3023 | dquot_initialize(new_dir); | 3176 | dquot_initialize(new.dir); |
| 3024 | |||
| 3025 | old_bh = new_bh = dir_bh = NULL; | ||
| 3026 | 3177 | ||
| 3027 | /* Initialize quotas before so that eventual writes go | 3178 | /* Initialize quotas before so that eventual writes go |
| 3028 | * in separate transaction */ | 3179 | * in separate transaction */ |
| 3029 | if (new_dentry->d_inode) | 3180 | if (new.inode) |
| 3030 | dquot_initialize(new_dentry->d_inode); | 3181 | dquot_initialize(new.inode); |
| 3031 | 3182 | ||
| 3032 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL); | 3183 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); |
| 3033 | /* | 3184 | /* |
| 3034 | * Check for inode number is _not_ due to possible IO errors. | 3185 | * Check for inode number is _not_ due to possible IO errors. |
| 3035 | * We might rmdir the source, keep it as pwd of some process | 3186 | * We might rmdir the source, keep it as pwd of some process |
| 3036 | * and merrily kill the link to whatever was created under the | 3187 | * and merrily kill the link to whatever was created under the |
| 3037 | * same name. Goodbye sticky bit ;-< | 3188 | * same name. Goodbye sticky bit ;-< |
| 3038 | */ | 3189 | */ |
| 3039 | old_inode = old_dentry->d_inode; | ||
| 3040 | retval = -ENOENT; | 3190 | retval = -ENOENT; |
| 3041 | if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) | 3191 | if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) |
| 3042 | goto end_rename; | 3192 | goto end_rename; |
| 3043 | 3193 | ||
| 3044 | new_inode = new_dentry->d_inode; | 3194 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, |
| 3045 | new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, | 3195 | &new.de, &new.inlined); |
| 3046 | &new_de, &new_inlined); | 3196 | if (new.bh) { |
| 3047 | if (new_bh) { | 3197 | if (!new.inode) { |
| 3048 | if (!new_inode) { | 3198 | brelse(new.bh); |
| 3049 | brelse(new_bh); | 3199 | new.bh = NULL; |
| 3050 | new_bh = NULL; | ||
| 3051 | } | 3200 | } |
| 3052 | } | 3201 | } |
| 3053 | if (new_inode && !test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC)) | 3202 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) |
| 3054 | ext4_alloc_da_blocks(old_inode); | 3203 | ext4_alloc_da_blocks(old.inode); |
| 3055 | 3204 | ||
| 3056 | handle = ext4_journal_start(old_dir, EXT4_HT_DIR, | 3205 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, |
| 3057 | (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + | 3206 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + |
| 3058 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | 3207 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); |
| 3059 | if (IS_ERR(handle)) | 3208 | if (IS_ERR(handle)) |
| 3060 | return PTR_ERR(handle); | 3209 | return PTR_ERR(handle); |
| 3061 | 3210 | ||
| 3062 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 3211 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
| 3063 | ext4_handle_sync(handle); | 3212 | ext4_handle_sync(handle); |
| 3064 | 3213 | ||
| 3065 | if (S_ISDIR(old_inode->i_mode)) { | 3214 | if (S_ISDIR(old.inode->i_mode)) { |
| 3066 | if (new_inode) { | 3215 | if (new.inode) { |
| 3067 | retval = -ENOTEMPTY; | 3216 | retval = -ENOTEMPTY; |
| 3068 | if (!empty_dir(new_inode)) | 3217 | if (!empty_dir(new.inode)) |
| 3218 | goto end_rename; | ||
| 3219 | } else { | ||
| 3220 | retval = -EMLINK; | ||
| 3221 | if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) | ||
| 3069 | goto end_rename; | 3222 | goto end_rename; |
| 3070 | } | 3223 | } |
| 3071 | retval = -EIO; | 3224 | retval = ext4_rename_dir_prepare(handle, &old); |
| 3072 | dir_bh = ext4_get_first_dir_block(handle, old_inode, | ||
| 3073 | &retval, &parent_de, | ||
| 3074 | &inlined); | ||
| 3075 | if (!dir_bh) | ||
| 3076 | goto end_rename; | ||
| 3077 | if (le32_to_cpu(parent_de->inode) != old_dir->i_ino) | ||
| 3078 | goto end_rename; | ||
| 3079 | retval = -EMLINK; | ||
| 3080 | if (!new_inode && new_dir != old_dir && | ||
| 3081 | EXT4_DIR_LINK_MAX(new_dir)) | ||
| 3082 | goto end_rename; | ||
| 3083 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
| 3084 | retval = ext4_journal_get_write_access(handle, dir_bh); | ||
| 3085 | if (retval) | 3225 | if (retval) |
| 3086 | goto end_rename; | 3226 | goto end_rename; |
| 3087 | } | 3227 | } |
| 3088 | if (!new_bh) { | 3228 | if (!new.bh) { |
| 3089 | retval = ext4_add_entry(handle, new_dentry, old_inode); | 3229 | retval = ext4_add_entry(handle, new.dentry, old.inode); |
| 3090 | if (retval) | 3230 | if (retval) |
| 3091 | goto end_rename; | 3231 | goto end_rename; |
| 3092 | } else { | 3232 | } else { |
| 3093 | BUFFER_TRACE(new_bh, "get write access"); | 3233 | retval = ext4_setent(handle, &new, |
| 3094 | retval = ext4_journal_get_write_access(handle, new_bh); | 3234 | old.inode->i_ino, old.de->file_type); |
| 3095 | if (retval) | 3235 | if (retval) |
| 3096 | goto end_rename; | 3236 | goto end_rename; |
| 3097 | new_de->inode = cpu_to_le32(old_inode->i_ino); | ||
| 3098 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, | ||
| 3099 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | ||
| 3100 | new_de->file_type = old_de->file_type; | ||
| 3101 | new_dir->i_version++; | ||
| 3102 | new_dir->i_ctime = new_dir->i_mtime = | ||
| 3103 | ext4_current_time(new_dir); | ||
| 3104 | ext4_mark_inode_dirty(handle, new_dir); | ||
| 3105 | BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); | ||
| 3106 | if (!new_inlined) { | ||
| 3107 | retval = ext4_handle_dirty_dirent_node(handle, | ||
| 3108 | new_dir, new_bh); | ||
| 3109 | if (unlikely(retval)) { | ||
| 3110 | ext4_std_error(new_dir->i_sb, retval); | ||
| 3111 | goto end_rename; | ||
| 3112 | } | ||
| 3113 | } | ||
| 3114 | brelse(new_bh); | ||
| 3115 | new_bh = NULL; | ||
| 3116 | } | 3237 | } |
| 3117 | 3238 | ||
| 3118 | /* | 3239 | /* |
| 3119 | * Like most other Unix systems, set the ctime for inodes on a | 3240 | * Like most other Unix systems, set the ctime for inodes on a |
| 3120 | * rename. | 3241 | * rename. |
| 3121 | */ | 3242 | */ |
| 3122 | old_inode->i_ctime = ext4_current_time(old_inode); | 3243 | old.inode->i_ctime = ext4_current_time(old.inode); |
| 3123 | ext4_mark_inode_dirty(handle, old_inode); | 3244 | ext4_mark_inode_dirty(handle, old.inode); |
| 3124 | 3245 | ||
| 3125 | /* | 3246 | /* |
| 3126 | * ok, that's it | 3247 | * ok, that's it |
| 3127 | */ | 3248 | */ |
| 3128 | if (le32_to_cpu(old_de->inode) != old_inode->i_ino || | 3249 | ext4_rename_delete(handle, &old); |
| 3129 | old_de->name_len != old_dentry->d_name.len || | 3250 | |
| 3130 | strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || | 3251 | if (new.inode) { |
| 3131 | (retval = ext4_delete_entry(handle, old_dir, | 3252 | ext4_dec_count(handle, new.inode); |
| 3132 | old_de, old_bh)) == -ENOENT) { | 3253 | new.inode->i_ctime = ext4_current_time(new.inode); |
| 3133 | /* old_de could have moved from under us during htree split, so | ||
| 3134 | * make sure that we are deleting the right entry. We might | ||
| 3135 | * also be pointing to a stale entry in the unused part of | ||
| 3136 | * old_bh so just checking inum and the name isn't enough. */ | ||
| 3137 | struct buffer_head *old_bh2; | ||
| 3138 | struct ext4_dir_entry_2 *old_de2; | ||
| 3139 | |||
| 3140 | old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, | ||
| 3141 | &old_de2, NULL); | ||
| 3142 | if (old_bh2) { | ||
| 3143 | retval = ext4_delete_entry(handle, old_dir, | ||
| 3144 | old_de2, old_bh2); | ||
| 3145 | brelse(old_bh2); | ||
| 3146 | } | ||
| 3147 | } | 3254 | } |
| 3148 | if (retval) { | 3255 | old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir); |
| 3149 | ext4_warning(old_dir->i_sb, | 3256 | ext4_update_dx_flag(old.dir); |
| 3150 | "Deleting old file (%lu), %d, error=%d", | 3257 | if (old.dir_bh) { |
| 3151 | old_dir->i_ino, old_dir->i_nlink, retval); | 3258 | retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); |
| 3152 | } | 3259 | if (retval) |
| 3153 | |||
| 3154 | if (new_inode) { | ||
| 3155 | ext4_dec_count(handle, new_inode); | ||
| 3156 | new_inode->i_ctime = ext4_current_time(new_inode); | ||
| 3157 | } | ||
| 3158 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); | ||
| 3159 | ext4_update_dx_flag(old_dir); | ||
| 3160 | if (dir_bh) { | ||
| 3161 | parent_de->inode = cpu_to_le32(new_dir->i_ino); | ||
| 3162 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | ||
| 3163 | if (!inlined) { | ||
| 3164 | if (is_dx(old_inode)) { | ||
| 3165 | retval = ext4_handle_dirty_dx_node(handle, | ||
| 3166 | old_inode, | ||
| 3167 | dir_bh); | ||
| 3168 | } else { | ||
| 3169 | retval = ext4_handle_dirty_dirent_node(handle, | ||
| 3170 | old_inode, dir_bh); | ||
| 3171 | } | ||
| 3172 | } else { | ||
| 3173 | retval = ext4_mark_inode_dirty(handle, old_inode); | ||
| 3174 | } | ||
| 3175 | if (retval) { | ||
| 3176 | ext4_std_error(old_dir->i_sb, retval); | ||
| 3177 | goto end_rename; | 3260 | goto end_rename; |
| 3178 | } | 3261 | |
| 3179 | ext4_dec_count(handle, old_dir); | 3262 | ext4_dec_count(handle, old.dir); |
| 3180 | if (new_inode) { | 3263 | if (new.inode) { |
| 3181 | /* checked empty_dir above, can't have another parent, | 3264 | /* checked empty_dir above, can't have another parent, |
| 3182 | * ext4_dec_count() won't work for many-linked dirs */ | 3265 | * ext4_dec_count() won't work for many-linked dirs */ |
| 3183 | clear_nlink(new_inode); | 3266 | clear_nlink(new.inode); |
| 3184 | } else { | 3267 | } else { |
| 3185 | ext4_inc_count(handle, new_dir); | 3268 | ext4_inc_count(handle, new.dir); |
| 3186 | ext4_update_dx_flag(new_dir); | 3269 | ext4_update_dx_flag(new.dir); |
| 3187 | ext4_mark_inode_dirty(handle, new_dir); | 3270 | ext4_mark_inode_dirty(handle, new.dir); |
| 3188 | } | 3271 | } |
| 3189 | } | 3272 | } |
| 3190 | ext4_mark_inode_dirty(handle, old_dir); | 3273 | ext4_mark_inode_dirty(handle, old.dir); |
| 3191 | if (new_inode) { | 3274 | if (new.inode) { |
| 3192 | ext4_mark_inode_dirty(handle, new_inode); | 3275 | ext4_mark_inode_dirty(handle, new.inode); |
| 3193 | if (!new_inode->i_nlink) | 3276 | if (!new.inode->i_nlink) |
| 3194 | ext4_orphan_add(handle, new_inode); | 3277 | ext4_orphan_add(handle, new.inode); |
| 3195 | } | 3278 | } |
| 3196 | retval = 0; | 3279 | retval = 0; |
| 3197 | 3280 | ||
| 3198 | end_rename: | 3281 | end_rename: |
| 3199 | brelse(dir_bh); | 3282 | brelse(old.dir_bh); |
| 3200 | brelse(old_bh); | 3283 | brelse(old.bh); |
| 3201 | brelse(new_bh); | 3284 | brelse(new.bh); |
| 3202 | if (handle) | 3285 | if (handle) |
| 3203 | ext4_journal_stop(handle); | 3286 | ext4_journal_stop(handle); |
| 3204 | return retval; | 3287 | return retval; |
| 3205 | } | 3288 | } |
| 3206 | 3289 | ||
| 3290 | static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
| 3291 | struct inode *new_dir, struct dentry *new_dentry) | ||
| 3292 | { | ||
| 3293 | handle_t *handle = NULL; | ||
| 3294 | struct ext4_renament old = { | ||
| 3295 | .dir = old_dir, | ||
| 3296 | .dentry = old_dentry, | ||
| 3297 | .inode = old_dentry->d_inode, | ||
| 3298 | }; | ||
| 3299 | struct ext4_renament new = { | ||
| 3300 | .dir = new_dir, | ||
| 3301 | .dentry = new_dentry, | ||
| 3302 | .inode = new_dentry->d_inode, | ||
| 3303 | }; | ||
| 3304 | u8 new_file_type; | ||
| 3305 | int retval; | ||
| 3306 | |||
| 3307 | dquot_initialize(old.dir); | ||
| 3308 | dquot_initialize(new.dir); | ||
| 3309 | |||
| 3310 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, | ||
| 3311 | &old.de, &old.inlined); | ||
| 3312 | /* | ||
| 3313 | * Check for inode number is _not_ due to possible IO errors. | ||
| 3314 | * We might rmdir the source, keep it as pwd of some process | ||
| 3315 | * and merrily kill the link to whatever was created under the | ||
| 3316 | * same name. Goodbye sticky bit ;-< | ||
| 3317 | */ | ||
| 3318 | retval = -ENOENT; | ||
| 3319 | if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) | ||
| 3320 | goto end_rename; | ||
| 3321 | |||
| 3322 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, | ||
| 3323 | &new.de, &new.inlined); | ||
| 3324 | |||
| 3325 | /* RENAME_EXCHANGE case: old *and* new must both exist */ | ||
| 3326 | if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) | ||
| 3327 | goto end_rename; | ||
| 3328 | |||
| 3329 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, | ||
| 3330 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + | ||
| 3331 | 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | ||
| 3332 | if (IS_ERR(handle)) | ||
| 3333 | return PTR_ERR(handle); | ||
| 3334 | |||
| 3335 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | ||
| 3336 | ext4_handle_sync(handle); | ||
| 3337 | |||
| 3338 | if (S_ISDIR(old.inode->i_mode)) { | ||
| 3339 | old.is_dir = true; | ||
| 3340 | retval = ext4_rename_dir_prepare(handle, &old); | ||
| 3341 | if (retval) | ||
| 3342 | goto end_rename; | ||
| 3343 | } | ||
| 3344 | if (S_ISDIR(new.inode->i_mode)) { | ||
| 3345 | new.is_dir = true; | ||
| 3346 | retval = ext4_rename_dir_prepare(handle, &new); | ||
| 3347 | if (retval) | ||
| 3348 | goto end_rename; | ||
| 3349 | } | ||
| 3350 | |||
| 3351 | /* | ||
| 3352 | * Other than the special case of overwriting a directory, parents' | ||
| 3353 | * nlink only needs to be modified if this is a cross directory rename. | ||
| 3354 | */ | ||
| 3355 | if (old.dir != new.dir && old.is_dir != new.is_dir) { | ||
| 3356 | old.dir_nlink_delta = old.is_dir ? -1 : 1; | ||
| 3357 | new.dir_nlink_delta = -old.dir_nlink_delta; | ||
| 3358 | retval = -EMLINK; | ||
| 3359 | if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) || | ||
| 3360 | (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir))) | ||
| 3361 | goto end_rename; | ||
| 3362 | } | ||
| 3363 | |||
| 3364 | new_file_type = new.de->file_type; | ||
| 3365 | retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type); | ||
| 3366 | if (retval) | ||
| 3367 | goto end_rename; | ||
| 3368 | |||
| 3369 | retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type); | ||
| 3370 | if (retval) | ||
| 3371 | goto end_rename; | ||
| 3372 | |||
| 3373 | /* | ||
| 3374 | * Like most other Unix systems, set the ctime for inodes on a | ||
| 3375 | * rename. | ||
| 3376 | */ | ||
| 3377 | old.inode->i_ctime = ext4_current_time(old.inode); | ||
| 3378 | new.inode->i_ctime = ext4_current_time(new.inode); | ||
| 3379 | ext4_mark_inode_dirty(handle, old.inode); | ||
| 3380 | ext4_mark_inode_dirty(handle, new.inode); | ||
| 3381 | |||
| 3382 | if (old.dir_bh) { | ||
| 3383 | retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); | ||
| 3384 | if (retval) | ||
| 3385 | goto end_rename; | ||
| 3386 | } | ||
| 3387 | if (new.dir_bh) { | ||
| 3388 | retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino); | ||
| 3389 | if (retval) | ||
| 3390 | goto end_rename; | ||
| 3391 | } | ||
| 3392 | ext4_update_dir_count(handle, &old); | ||
| 3393 | ext4_update_dir_count(handle, &new); | ||
| 3394 | retval = 0; | ||
| 3395 | |||
| 3396 | end_rename: | ||
| 3397 | brelse(old.dir_bh); | ||
| 3398 | brelse(new.dir_bh); | ||
| 3399 | brelse(old.bh); | ||
| 3400 | brelse(new.bh); | ||
| 3401 | if (handle) | ||
| 3402 | ext4_journal_stop(handle); | ||
| 3403 | return retval; | ||
| 3404 | } | ||
| 3405 | |||
| 3406 | static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, | ||
| 3407 | struct inode *new_dir, struct dentry *new_dentry, | ||
| 3408 | unsigned int flags) | ||
| 3409 | { | ||
| 3410 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | ||
| 3411 | return -EINVAL; | ||
| 3412 | |||
| 3413 | if (flags & RENAME_EXCHANGE) { | ||
| 3414 | return ext4_cross_rename(old_dir, old_dentry, | ||
| 3415 | new_dir, new_dentry); | ||
| 3416 | } | ||
| 3417 | /* | ||
| 3418 | * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" | ||
| 3419 | * is equivalent to regular rename. | ||
| 3420 | */ | ||
| 3421 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
| 3422 | } | ||
| 3423 | |||
| 3207 | /* | 3424 | /* |
| 3208 | * directories can handle most operations... | 3425 | * directories can handle most operations... |
| 3209 | */ | 3426 | */ |
| @@ -3218,6 +3435,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
| 3218 | .mknod = ext4_mknod, | 3435 | .mknod = ext4_mknod, |
| 3219 | .tmpfile = ext4_tmpfile, | 3436 | .tmpfile = ext4_tmpfile, |
| 3220 | .rename = ext4_rename, | 3437 | .rename = ext4_rename, |
| 3438 | .rename2 = ext4_rename2, | ||
| 3221 | .setattr = ext4_setattr, | 3439 | .setattr = ext4_setattr, |
| 3222 | .setxattr = generic_setxattr, | 3440 | .setxattr = generic_setxattr, |
| 3223 | .getxattr = generic_getxattr, | 3441 | .getxattr = generic_getxattr, |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 710fed2377d4..f3c667091618 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -59,6 +59,7 @@ static struct kset *ext4_kset; | |||
| 59 | static struct ext4_lazy_init *ext4_li_info; | 59 | static struct ext4_lazy_init *ext4_li_info; |
| 60 | static struct mutex ext4_li_mtx; | 60 | static struct mutex ext4_li_mtx; |
| 61 | static struct ext4_features *ext4_feat; | 61 | static struct ext4_features *ext4_feat; |
| 62 | static int ext4_mballoc_ready; | ||
| 62 | 63 | ||
| 63 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 64 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
| 64 | unsigned long journal_devnum); | 65 | unsigned long journal_devnum); |
| @@ -845,6 +846,10 @@ static void ext4_put_super(struct super_block *sb) | |||
| 845 | invalidate_bdev(sbi->journal_bdev); | 846 | invalidate_bdev(sbi->journal_bdev); |
| 846 | ext4_blkdev_remove(sbi); | 847 | ext4_blkdev_remove(sbi); |
| 847 | } | 848 | } |
| 849 | if (sbi->s_mb_cache) { | ||
| 850 | ext4_xattr_destroy_cache(sbi->s_mb_cache); | ||
| 851 | sbi->s_mb_cache = NULL; | ||
| 852 | } | ||
| 848 | if (sbi->s_mmp_tsk) | 853 | if (sbi->s_mmp_tsk) |
| 849 | kthread_stop(sbi->s_mmp_tsk); | 854 | kthread_stop(sbi->s_mmp_tsk); |
| 850 | sb->s_fs_info = NULL; | 855 | sb->s_fs_info = NULL; |
| @@ -940,7 +945,7 @@ static void init_once(void *foo) | |||
| 940 | inode_init_once(&ei->vfs_inode); | 945 | inode_init_once(&ei->vfs_inode); |
| 941 | } | 946 | } |
| 942 | 947 | ||
| 943 | static int init_inodecache(void) | 948 | static int __init init_inodecache(void) |
| 944 | { | 949 | { |
| 945 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", | 950 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", |
| 946 | sizeof(struct ext4_inode_info), | 951 | sizeof(struct ext4_inode_info), |
| @@ -3575,6 +3580,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3575 | "feature flags set on rev 0 fs, " | 3580 | "feature flags set on rev 0 fs, " |
| 3576 | "running e2fsck is recommended"); | 3581 | "running e2fsck is recommended"); |
| 3577 | 3582 | ||
| 3583 | if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { | ||
| 3584 | set_opt2(sb, HURD_COMPAT); | ||
| 3585 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
| 3586 | EXT4_FEATURE_INCOMPAT_64BIT)) { | ||
| 3587 | ext4_msg(sb, KERN_ERR, | ||
| 3588 | "The Hurd can't support 64-bit file systems"); | ||
| 3589 | goto failed_mount; | ||
| 3590 | } | ||
| 3591 | } | ||
| 3592 | |||
| 3578 | if (IS_EXT2_SB(sb)) { | 3593 | if (IS_EXT2_SB(sb)) { |
| 3579 | if (ext2_feature_set_ok(sb)) | 3594 | if (ext2_feature_set_ok(sb)) |
| 3580 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " | 3595 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " |
| @@ -4010,6 +4025,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 4010 | percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); | 4025 | percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); |
| 4011 | 4026 | ||
| 4012 | no_journal: | 4027 | no_journal: |
| 4028 | if (ext4_mballoc_ready) { | ||
| 4029 | sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); | ||
| 4030 | if (!sbi->s_mb_cache) { | ||
| 4031 | ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); | ||
| 4032 | goto failed_mount_wq; | ||
| 4033 | } | ||
| 4034 | } | ||
| 4035 | |||
| 4013 | /* | 4036 | /* |
| 4014 | * Get the # of file system overhead blocks from the | 4037 | * Get the # of file system overhead blocks from the |
| 4015 | * superblock if present. | 4038 | * superblock if present. |
| @@ -4835,6 +4858,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
| 4835 | } | 4858 | } |
| 4836 | 4859 | ||
| 4837 | if (*flags & MS_RDONLY) { | 4860 | if (*flags & MS_RDONLY) { |
| 4861 | err = sync_filesystem(sb); | ||
| 4862 | if (err < 0) | ||
| 4863 | goto restore_opts; | ||
| 4838 | err = dquot_suspend(sb, -1); | 4864 | err = dquot_suspend(sb, -1); |
| 4839 | if (err < 0) | 4865 | if (err < 0) |
| 4840 | goto restore_opts; | 4866 | goto restore_opts; |
| @@ -5516,11 +5542,9 @@ static int __init ext4_init_fs(void) | |||
| 5516 | 5542 | ||
| 5517 | err = ext4_init_mballoc(); | 5543 | err = ext4_init_mballoc(); |
| 5518 | if (err) | 5544 | if (err) |
| 5519 | goto out3; | ||
| 5520 | |||
| 5521 | err = ext4_init_xattr(); | ||
| 5522 | if (err) | ||
| 5523 | goto out2; | 5545 | goto out2; |
| 5546 | else | ||
| 5547 | ext4_mballoc_ready = 1; | ||
| 5524 | err = init_inodecache(); | 5548 | err = init_inodecache(); |
| 5525 | if (err) | 5549 | if (err) |
| 5526 | goto out1; | 5550 | goto out1; |
| @@ -5536,10 +5560,9 @@ out: | |||
| 5536 | unregister_as_ext3(); | 5560 | unregister_as_ext3(); |
| 5537 | destroy_inodecache(); | 5561 | destroy_inodecache(); |
| 5538 | out1: | 5562 | out1: |
| 5539 | ext4_exit_xattr(); | 5563 | ext4_mballoc_ready = 0; |
| 5540 | out2: | ||
| 5541 | ext4_exit_mballoc(); | 5564 | ext4_exit_mballoc(); |
| 5542 | out3: | 5565 | out2: |
| 5543 | ext4_exit_feat_adverts(); | 5566 | ext4_exit_feat_adverts(); |
| 5544 | out4: | 5567 | out4: |
| 5545 | if (ext4_proc_root) | 5568 | if (ext4_proc_root) |
| @@ -5562,7 +5585,6 @@ static void __exit ext4_exit_fs(void) | |||
| 5562 | unregister_as_ext3(); | 5585 | unregister_as_ext3(); |
| 5563 | unregister_filesystem(&ext4_fs_type); | 5586 | unregister_filesystem(&ext4_fs_type); |
| 5564 | destroy_inodecache(); | 5587 | destroy_inodecache(); |
| 5565 | ext4_exit_xattr(); | ||
| 5566 | ext4_exit_mballoc(); | 5588 | ext4_exit_mballoc(); |
| 5567 | ext4_exit_feat_adverts(); | 5589 | ext4_exit_feat_adverts(); |
| 5568 | remove_proc_entry("fs/ext4", NULL); | 5590 | remove_proc_entry("fs/ext4", NULL); |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e175e94116ac..1f5cf5880718 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -81,7 +81,7 @@ | |||
| 81 | # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) | 81 | # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
| 82 | #endif | 82 | #endif |
| 83 | 83 | ||
| 84 | static void ext4_xattr_cache_insert(struct buffer_head *); | 84 | static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); |
| 85 | static struct buffer_head *ext4_xattr_cache_find(struct inode *, | 85 | static struct buffer_head *ext4_xattr_cache_find(struct inode *, |
| 86 | struct ext4_xattr_header *, | 86 | struct ext4_xattr_header *, |
| 87 | struct mb_cache_entry **); | 87 | struct mb_cache_entry **); |
| @@ -90,8 +90,6 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *, | |||
| 90 | static int ext4_xattr_list(struct dentry *dentry, char *buffer, | 90 | static int ext4_xattr_list(struct dentry *dentry, char *buffer, |
| 91 | size_t buffer_size); | 91 | size_t buffer_size); |
| 92 | 92 | ||
| 93 | static struct mb_cache *ext4_xattr_cache; | ||
| 94 | |||
| 95 | static const struct xattr_handler *ext4_xattr_handler_map[] = { | 93 | static const struct xattr_handler *ext4_xattr_handler_map[] = { |
| 96 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 94 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
| 97 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 95 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| @@ -117,6 +115,9 @@ const struct xattr_handler *ext4_xattr_handlers[] = { | |||
| 117 | NULL | 115 | NULL |
| 118 | }; | 116 | }; |
| 119 | 117 | ||
| 118 | #define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \ | ||
| 119 | inode->i_sb->s_fs_info)->s_mb_cache) | ||
| 120 | |||
| 120 | static __le32 ext4_xattr_block_csum(struct inode *inode, | 121 | static __le32 ext4_xattr_block_csum(struct inode *inode, |
| 121 | sector_t block_nr, | 122 | sector_t block_nr, |
| 122 | struct ext4_xattr_header *hdr) | 123 | struct ext4_xattr_header *hdr) |
| @@ -265,6 +266,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
| 265 | struct ext4_xattr_entry *entry; | 266 | struct ext4_xattr_entry *entry; |
| 266 | size_t size; | 267 | size_t size; |
| 267 | int error; | 268 | int error; |
| 269 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
| 268 | 270 | ||
| 269 | ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", | 271 | ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", |
| 270 | name_index, name, buffer, (long)buffer_size); | 272 | name_index, name, buffer, (long)buffer_size); |
| @@ -286,7 +288,7 @@ bad_block: | |||
| 286 | error = -EIO; | 288 | error = -EIO; |
| 287 | goto cleanup; | 289 | goto cleanup; |
| 288 | } | 290 | } |
| 289 | ext4_xattr_cache_insert(bh); | 291 | ext4_xattr_cache_insert(ext4_mb_cache, bh); |
| 290 | entry = BFIRST(bh); | 292 | entry = BFIRST(bh); |
| 291 | error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); | 293 | error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); |
| 292 | if (error == -EIO) | 294 | if (error == -EIO) |
| @@ -409,6 +411,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
| 409 | struct inode *inode = dentry->d_inode; | 411 | struct inode *inode = dentry->d_inode; |
| 410 | struct buffer_head *bh = NULL; | 412 | struct buffer_head *bh = NULL; |
| 411 | int error; | 413 | int error; |
| 414 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
| 412 | 415 | ||
| 413 | ea_idebug(inode, "buffer=%p, buffer_size=%ld", | 416 | ea_idebug(inode, "buffer=%p, buffer_size=%ld", |
| 414 | buffer, (long)buffer_size); | 417 | buffer, (long)buffer_size); |
| @@ -430,7 +433,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
| 430 | error = -EIO; | 433 | error = -EIO; |
| 431 | goto cleanup; | 434 | goto cleanup; |
| 432 | } | 435 | } |
| 433 | ext4_xattr_cache_insert(bh); | 436 | ext4_xattr_cache_insert(ext4_mb_cache, bh); |
| 434 | error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); | 437 | error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); |
| 435 | 438 | ||
| 436 | cleanup: | 439 | cleanup: |
| @@ -526,8 +529,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | |||
| 526 | { | 529 | { |
| 527 | struct mb_cache_entry *ce = NULL; | 530 | struct mb_cache_entry *ce = NULL; |
| 528 | int error = 0; | 531 | int error = 0; |
| 532 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
| 529 | 533 | ||
| 530 | ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); | 534 | ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); |
| 531 | error = ext4_journal_get_write_access(handle, bh); | 535 | error = ext4_journal_get_write_access(handle, bh); |
| 532 | if (error) | 536 | if (error) |
| 533 | goto out; | 537 | goto out; |
| @@ -567,12 +571,13 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, | |||
| 567 | size_t *min_offs, void *base, int *total) | 571 | size_t *min_offs, void *base, int *total) |
| 568 | { | 572 | { |
| 569 | for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { | 573 | for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { |
| 570 | *total += EXT4_XATTR_LEN(last->e_name_len); | ||
| 571 | if (!last->e_value_block && last->e_value_size) { | 574 | if (!last->e_value_block && last->e_value_size) { |
| 572 | size_t offs = le16_to_cpu(last->e_value_offs); | 575 | size_t offs = le16_to_cpu(last->e_value_offs); |
| 573 | if (offs < *min_offs) | 576 | if (offs < *min_offs) |
| 574 | *min_offs = offs; | 577 | *min_offs = offs; |
| 575 | } | 578 | } |
| 579 | if (total) | ||
| 580 | *total += EXT4_XATTR_LEN(last->e_name_len); | ||
| 576 | } | 581 | } |
| 577 | return (*min_offs - ((void *)last - base) - sizeof(__u32)); | 582 | return (*min_offs - ((void *)last - base) - sizeof(__u32)); |
| 578 | } | 583 | } |
| @@ -745,13 +750,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
| 745 | struct ext4_xattr_search *s = &bs->s; | 750 | struct ext4_xattr_search *s = &bs->s; |
| 746 | struct mb_cache_entry *ce = NULL; | 751 | struct mb_cache_entry *ce = NULL; |
| 747 | int error = 0; | 752 | int error = 0; |
| 753 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
| 748 | 754 | ||
| 749 | #define header(x) ((struct ext4_xattr_header *)(x)) | 755 | #define header(x) ((struct ext4_xattr_header *)(x)) |
| 750 | 756 | ||
| 751 | if (i->value && i->value_len > sb->s_blocksize) | 757 | if (i->value && i->value_len > sb->s_blocksize) |
| 752 | return -ENOSPC; | 758 | return -ENOSPC; |
| 753 | if (s->base) { | 759 | if (s->base) { |
| 754 | ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, | 760 | ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, |
| 755 | bs->bh->b_blocknr); | 761 | bs->bh->b_blocknr); |
| 756 | error = ext4_journal_get_write_access(handle, bs->bh); | 762 | error = ext4_journal_get_write_access(handle, bs->bh); |
| 757 | if (error) | 763 | if (error) |
| @@ -769,7 +775,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
| 769 | if (!IS_LAST_ENTRY(s->first)) | 775 | if (!IS_LAST_ENTRY(s->first)) |
| 770 | ext4_xattr_rehash(header(s->base), | 776 | ext4_xattr_rehash(header(s->base), |
| 771 | s->here); | 777 | s->here); |
| 772 | ext4_xattr_cache_insert(bs->bh); | 778 | ext4_xattr_cache_insert(ext4_mb_cache, |
| 779 | bs->bh); | ||
| 773 | } | 780 | } |
| 774 | unlock_buffer(bs->bh); | 781 | unlock_buffer(bs->bh); |
| 775 | if (error == -EIO) | 782 | if (error == -EIO) |
| @@ -905,7 +912,7 @@ getblk_failed: | |||
| 905 | memcpy(new_bh->b_data, s->base, new_bh->b_size); | 912 | memcpy(new_bh->b_data, s->base, new_bh->b_size); |
| 906 | set_buffer_uptodate(new_bh); | 913 | set_buffer_uptodate(new_bh); |
| 907 | unlock_buffer(new_bh); | 914 | unlock_buffer(new_bh); |
| 908 | ext4_xattr_cache_insert(new_bh); | 915 | ext4_xattr_cache_insert(ext4_mb_cache, new_bh); |
| 909 | error = ext4_handle_dirty_xattr_block(handle, | 916 | error = ext4_handle_dirty_xattr_block(handle, |
| 910 | inode, new_bh); | 917 | inode, new_bh); |
| 911 | if (error) | 918 | if (error) |
| @@ -1228,7 +1235,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
| 1228 | struct ext4_xattr_block_find *bs = NULL; | 1235 | struct ext4_xattr_block_find *bs = NULL; |
| 1229 | char *buffer = NULL, *b_entry_name = NULL; | 1236 | char *buffer = NULL, *b_entry_name = NULL; |
| 1230 | size_t min_offs, free; | 1237 | size_t min_offs, free; |
| 1231 | int total_ino, total_blk; | 1238 | int total_ino; |
| 1232 | void *base, *start, *end; | 1239 | void *base, *start, *end; |
| 1233 | int extra_isize = 0, error = 0, tried_min_extra_isize = 0; | 1240 | int extra_isize = 0, error = 0, tried_min_extra_isize = 0; |
| 1234 | int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); | 1241 | int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); |
| @@ -1286,8 +1293,7 @@ retry: | |||
| 1286 | first = BFIRST(bh); | 1293 | first = BFIRST(bh); |
| 1287 | end = bh->b_data + bh->b_size; | 1294 | end = bh->b_data + bh->b_size; |
| 1288 | min_offs = end - base; | 1295 | min_offs = end - base; |
| 1289 | free = ext4_xattr_free_space(first, &min_offs, base, | 1296 | free = ext4_xattr_free_space(first, &min_offs, base, NULL); |
| 1290 | &total_blk); | ||
| 1291 | if (free < new_extra_isize) { | 1297 | if (free < new_extra_isize) { |
| 1292 | if (!tried_min_extra_isize && s_min_extra_isize) { | 1298 | if (!tried_min_extra_isize && s_min_extra_isize) { |
| 1293 | tried_min_extra_isize++; | 1299 | tried_min_extra_isize++; |
| @@ -1495,13 +1501,13 @@ ext4_xattr_put_super(struct super_block *sb) | |||
| 1495 | * Returns 0, or a negative error number on failure. | 1501 | * Returns 0, or a negative error number on failure. |
| 1496 | */ | 1502 | */ |
| 1497 | static void | 1503 | static void |
| 1498 | ext4_xattr_cache_insert(struct buffer_head *bh) | 1504 | ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) |
| 1499 | { | 1505 | { |
| 1500 | __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); | 1506 | __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); |
| 1501 | struct mb_cache_entry *ce; | 1507 | struct mb_cache_entry *ce; |
| 1502 | int error; | 1508 | int error; |
| 1503 | 1509 | ||
| 1504 | ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS); | 1510 | ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); |
| 1505 | if (!ce) { | 1511 | if (!ce) { |
| 1506 | ea_bdebug(bh, "out of memory"); | 1512 | ea_bdebug(bh, "out of memory"); |
| 1507 | return; | 1513 | return; |
| @@ -1573,12 +1579,13 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, | |||
| 1573 | { | 1579 | { |
| 1574 | __u32 hash = le32_to_cpu(header->h_hash); | 1580 | __u32 hash = le32_to_cpu(header->h_hash); |
| 1575 | struct mb_cache_entry *ce; | 1581 | struct mb_cache_entry *ce; |
| 1582 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
| 1576 | 1583 | ||
| 1577 | if (!header->h_hash) | 1584 | if (!header->h_hash) |
| 1578 | return NULL; /* never share */ | 1585 | return NULL; /* never share */ |
| 1579 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); | 1586 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); |
| 1580 | again: | 1587 | again: |
| 1581 | ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, | 1588 | ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev, |
| 1582 | hash); | 1589 | hash); |
| 1583 | while (ce) { | 1590 | while (ce) { |
| 1584 | struct buffer_head *bh; | 1591 | struct buffer_head *bh; |
| @@ -1676,19 +1683,17 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, | |||
| 1676 | 1683 | ||
| 1677 | #undef BLOCK_HASH_SHIFT | 1684 | #undef BLOCK_HASH_SHIFT |
| 1678 | 1685 | ||
| 1679 | int __init | 1686 | #define HASH_BUCKET_BITS 10 |
| 1680 | ext4_init_xattr(void) | 1687 | |
| 1688 | struct mb_cache * | ||
| 1689 | ext4_xattr_create_cache(char *name) | ||
| 1681 | { | 1690 | { |
| 1682 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); | 1691 | return mb_cache_create(name, HASH_BUCKET_BITS); |
| 1683 | if (!ext4_xattr_cache) | ||
| 1684 | return -ENOMEM; | ||
| 1685 | return 0; | ||
| 1686 | } | 1692 | } |
| 1687 | 1693 | ||
| 1688 | void | 1694 | void ext4_xattr_destroy_cache(struct mb_cache *cache) |
| 1689 | ext4_exit_xattr(void) | ||
| 1690 | { | 1695 | { |
| 1691 | if (ext4_xattr_cache) | 1696 | if (cache) |
| 1692 | mb_cache_destroy(ext4_xattr_cache); | 1697 | mb_cache_destroy(cache); |
| 1693 | ext4_xattr_cache = NULL; | ||
| 1694 | } | 1698 | } |
| 1699 | |||
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 819d6398833f..29bedf5589f6 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
| @@ -110,9 +110,6 @@ extern void ext4_xattr_put_super(struct super_block *); | |||
| 110 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | 110 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, |
| 111 | struct ext4_inode *raw_inode, handle_t *handle); | 111 | struct ext4_inode *raw_inode, handle_t *handle); |
| 112 | 112 | ||
| 113 | extern int __init ext4_init_xattr(void); | ||
| 114 | extern void ext4_exit_xattr(void); | ||
| 115 | |||
| 116 | extern const struct xattr_handler *ext4_xattr_handlers[]; | 113 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
| 117 | 114 | ||
| 118 | extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, | 115 | extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, |
| @@ -124,6 +121,9 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, | |||
| 124 | struct ext4_xattr_info *i, | 121 | struct ext4_xattr_info *i, |
| 125 | struct ext4_xattr_ibody_find *is); | 122 | struct ext4_xattr_ibody_find *is); |
| 126 | 123 | ||
| 124 | extern struct mb_cache *ext4_xattr_create_cache(char *name); | ||
| 125 | extern void ext4_xattr_destroy_cache(struct mb_cache *); | ||
| 126 | |||
| 127 | #ifdef CONFIG_EXT4_FS_SECURITY | 127 | #ifdef CONFIG_EXT4_FS_SECURITY |
| 128 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 128 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
| 129 | struct inode *dir, const struct qstr *qstr); | 129 | struct inode *dir, const struct qstr *qstr); |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4d67ed736dca..28cea76d78c6 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
| @@ -260,7 +260,7 @@ void f2fs_evict_inode(struct inode *inode) | |||
| 260 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 260 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
| 261 | 261 | ||
| 262 | trace_f2fs_evict_inode(inode); | 262 | trace_f2fs_evict_inode(inode); |
| 263 | truncate_inode_pages(&inode->i_data, 0); | 263 | truncate_inode_pages_final(&inode->i_data); |
| 264 | 264 | ||
| 265 | if (inode->i_ino == F2FS_NODE_INO(sbi) || | 265 | if (inode->i_ino == F2FS_NODE_INO(sbi) || |
| 266 | inode->i_ino == F2FS_META_INO(sbi)) | 266 | inode->i_ino == F2FS_META_INO(sbi)) |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1a85f83abd53..856bdf994c0a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
| @@ -568,6 +568,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) | |||
| 568 | struct f2fs_mount_info org_mount_opt; | 568 | struct f2fs_mount_info org_mount_opt; |
| 569 | int err, active_logs; | 569 | int err, active_logs; |
| 570 | 570 | ||
| 571 | sync_filesystem(sb); | ||
| 572 | |||
| 571 | /* | 573 | /* |
| 572 | * Save the old mount options in case we | 574 | * Save the old mount options in case we |
| 573 | * need to restore them. | 575 | * need to restore them. |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 854b578f6695..b3361fe2bcb5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(fat_build_inode); | |||
| 490 | 490 | ||
| 491 | static void fat_evict_inode(struct inode *inode) | 491 | static void fat_evict_inode(struct inode *inode) |
| 492 | { | 492 | { |
| 493 | truncate_inode_pages(&inode->i_data, 0); | 493 | truncate_inode_pages_final(&inode->i_data); |
| 494 | if (!inode->i_nlink) { | 494 | if (!inode->i_nlink) { |
| 495 | inode->i_size = 0; | 495 | inode->i_size = 0; |
| 496 | fat_truncate_blocks(inode, 0); | 496 | fat_truncate_blocks(inode, 0); |
| @@ -635,6 +635,8 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) | |||
| 635 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 635 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
| 636 | *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); | 636 | *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); |
| 637 | 637 | ||
| 638 | sync_filesystem(sb); | ||
| 639 | |||
| 638 | /* make sure we update state on remount. */ | 640 | /* make sure we update state on remount. */ |
| 639 | new_rdonly = *flags & MS_RDONLY; | 641 | new_rdonly = *flags & MS_RDONLY; |
| 640 | if (new_rdonly != (sb->s_flags & MS_RDONLY)) { | 642 | if (new_rdonly != (sb->s_flags & MS_RDONLY)) { |
diff --git a/fs/fcntl.c b/fs/fcntl.c index ef6866592a0f..9ead1596399a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
| @@ -272,9 +272,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
| 272 | case F_SETFL: | 272 | case F_SETFL: |
| 273 | err = setfl(fd, filp, arg); | 273 | err = setfl(fd, filp, arg); |
| 274 | break; | 274 | break; |
| 275 | #if BITS_PER_LONG != 32 | ||
| 276 | /* 32-bit arches must use fcntl64() */ | ||
| 277 | case F_GETLKP: | ||
| 278 | #endif | ||
| 275 | case F_GETLK: | 279 | case F_GETLK: |
| 276 | err = fcntl_getlk(filp, (struct flock __user *) arg); | 280 | err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); |
| 277 | break; | 281 | break; |
| 282 | #if BITS_PER_LONG != 32 | ||
| 283 | /* 32-bit arches must use fcntl64() */ | ||
| 284 | case F_SETLKP: | ||
| 285 | case F_SETLKPW: | ||
| 286 | #endif | ||
| 287 | /* Fallthrough */ | ||
| 278 | case F_SETLK: | 288 | case F_SETLK: |
| 279 | case F_SETLKW: | 289 | case F_SETLKW: |
| 280 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); | 290 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); |
| @@ -388,17 +398,20 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | |||
| 388 | goto out1; | 398 | goto out1; |
| 389 | 399 | ||
| 390 | switch (cmd) { | 400 | switch (cmd) { |
| 391 | case F_GETLK64: | 401 | case F_GETLK64: |
| 392 | err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); | 402 | case F_GETLKP: |
| 393 | break; | 403 | err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); |
| 394 | case F_SETLK64: | 404 | break; |
| 395 | case F_SETLKW64: | 405 | case F_SETLK64: |
| 396 | err = fcntl_setlk64(fd, f.file, cmd, | 406 | case F_SETLKW64: |
| 397 | (struct flock64 __user *) arg); | 407 | case F_SETLKP: |
| 398 | break; | 408 | case F_SETLKPW: |
| 399 | default: | 409 | err = fcntl_setlk64(fd, f.file, cmd, |
| 400 | err = do_fcntl(fd, cmd, arg, f.file); | 410 | (struct flock64 __user *) arg); |
| 401 | break; | 411 | break; |
| 412 | default: | ||
| 413 | err = do_fcntl(fd, cmd, arg, f.file); | ||
| 414 | break; | ||
| 402 | } | 415 | } |
| 403 | out1: | 416 | out1: |
| 404 | fdput(f); | 417 | fdput(f); |
| @@ -497,7 +497,7 @@ repeat: | |||
| 497 | error = fd; | 497 | error = fd; |
| 498 | #if 1 | 498 | #if 1 |
| 499 | /* Sanity check */ | 499 | /* Sanity check */ |
| 500 | if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { | 500 | if (rcu_access_pointer(fdt->fd[fd]) != NULL) { |
| 501 | printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); | 501 | printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); |
| 502 | rcu_assign_pointer(fdt->fd[fd], NULL); | 502 | rcu_assign_pointer(fdt->fd[fd], NULL); |
| 503 | } | 503 | } |
| @@ -683,35 +683,54 @@ EXPORT_SYMBOL(fget_raw); | |||
| 683 | * The fput_needed flag returned by fget_light should be passed to the | 683 | * The fput_needed flag returned by fget_light should be passed to the |
| 684 | * corresponding fput_light. | 684 | * corresponding fput_light. |
| 685 | */ | 685 | */ |
| 686 | struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed) | 686 | static unsigned long __fget_light(unsigned int fd, fmode_t mask) |
| 687 | { | 687 | { |
| 688 | struct files_struct *files = current->files; | 688 | struct files_struct *files = current->files; |
| 689 | struct file *file; | 689 | struct file *file; |
| 690 | 690 | ||
| 691 | *fput_needed = 0; | ||
| 692 | if (atomic_read(&files->count) == 1) { | 691 | if (atomic_read(&files->count) == 1) { |
| 693 | file = __fcheck_files(files, fd); | 692 | file = __fcheck_files(files, fd); |
| 694 | if (file && (file->f_mode & mask)) | 693 | if (!file || unlikely(file->f_mode & mask)) |
| 695 | file = NULL; | 694 | return 0; |
| 695 | return (unsigned long)file; | ||
| 696 | } else { | 696 | } else { |
| 697 | file = __fget(fd, mask); | 697 | file = __fget(fd, mask); |
| 698 | if (file) | 698 | if (!file) |
| 699 | *fput_needed = 1; | 699 | return 0; |
| 700 | return FDPUT_FPUT | (unsigned long)file; | ||
| 700 | } | 701 | } |
| 701 | |||
| 702 | return file; | ||
| 703 | } | 702 | } |
| 704 | struct file *fget_light(unsigned int fd, int *fput_needed) | 703 | unsigned long __fdget(unsigned int fd) |
| 705 | { | 704 | { |
| 706 | return __fget_light(fd, FMODE_PATH, fput_needed); | 705 | return __fget_light(fd, FMODE_PATH); |
| 707 | } | 706 | } |
| 708 | EXPORT_SYMBOL(fget_light); | 707 | EXPORT_SYMBOL(__fdget); |
| 709 | 708 | ||
| 710 | struct file *fget_raw_light(unsigned int fd, int *fput_needed) | 709 | unsigned long __fdget_raw(unsigned int fd) |
| 711 | { | 710 | { |
| 712 | return __fget_light(fd, 0, fput_needed); | 711 | return __fget_light(fd, 0); |
| 713 | } | 712 | } |
| 714 | 713 | ||
| 714 | unsigned long __fdget_pos(unsigned int fd) | ||
| 715 | { | ||
| 716 | unsigned long v = __fdget(fd); | ||
| 717 | struct file *file = (struct file *)(v & ~3); | ||
| 718 | |||
| 719 | if (file && (file->f_mode & FMODE_ATOMIC_POS)) { | ||
| 720 | if (file_count(file) > 1) { | ||
| 721 | v |= FDPUT_POS_UNLOCK; | ||
| 722 | mutex_lock(&file->f_pos_lock); | ||
| 723 | } | ||
| 724 | } | ||
| 725 | return v; | ||
| 726 | } | ||
| 727 | |||
| 728 | /* | ||
| 729 | * We only lock f_pos if we have threads or if the file might be | ||
| 730 | * shared with another process. In both cases we'll have an elevated | ||
| 731 | * file count (done either by fdget() or by fork()). | ||
| 732 | */ | ||
| 733 | |||
| 715 | void set_close_on_exec(unsigned int fd, int flag) | 734 | void set_close_on_exec(unsigned int fd, int flag) |
| 716 | { | 735 | { |
| 717 | struct files_struct *files = current->files; | 736 | struct files_struct *files = current->files; |
diff --git a/fs/file_table.c b/fs/file_table.c index 5fff9030be34..01071c4d752e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
| @@ -135,6 +135,7 @@ struct file *get_empty_filp(void) | |||
| 135 | atomic_long_set(&f->f_count, 1); | 135 | atomic_long_set(&f->f_count, 1); |
| 136 | rwlock_init(&f->f_owner.lock); | 136 | rwlock_init(&f->f_owner.lock); |
| 137 | spin_lock_init(&f->f_lock); | 137 | spin_lock_init(&f->f_lock); |
| 138 | mutex_init(&f->f_pos_lock); | ||
| 138 | eventpoll_init_file(f); | 139 | eventpoll_init_file(f); |
| 139 | /* f->f_version: 0 */ | 140 | /* f->f_version: 0 */ |
| 140 | return f; | 141 | return f; |
| @@ -234,7 +235,7 @@ static void __fput(struct file *file) | |||
| 234 | * in the file cleanup chain. | 235 | * in the file cleanup chain. |
| 235 | */ | 236 | */ |
| 236 | eventpoll_release(file); | 237 | eventpoll_release(file); |
| 237 | locks_remove_flock(file); | 238 | locks_remove_file(file); |
| 238 | 239 | ||
| 239 | if (unlikely(file->f_flags & FASYNC)) { | 240 | if (unlikely(file->f_flags & FASYNC)) { |
| 240 | if (file->f_op->fasync) | 241 | if (file->f_op->fasync) |
diff --git a/fs/filesystems.c b/fs/filesystems.c index 92567d95ba6a..5797d45a78cb 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
| @@ -121,6 +121,7 @@ int unregister_filesystem(struct file_system_type * fs) | |||
| 121 | 121 | ||
| 122 | EXPORT_SYMBOL(unregister_filesystem); | 122 | EXPORT_SYMBOL(unregister_filesystem); |
| 123 | 123 | ||
| 124 | #ifdef CONFIG_SYSFS_SYSCALL | ||
| 124 | static int fs_index(const char __user * __name) | 125 | static int fs_index(const char __user * __name) |
| 125 | { | 126 | { |
| 126 | struct file_system_type * tmp; | 127 | struct file_system_type * tmp; |
| @@ -199,6 +200,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) | |||
| 199 | } | 200 | } |
| 200 | return retval; | 201 | return retval; |
| 201 | } | 202 | } |
| 203 | #endif | ||
| 202 | 204 | ||
| 203 | int __init get_filesystem_list(char *buf) | 205 | int __init get_filesystem_list(char *buf) |
| 204 | { | 206 | { |
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index f47df72cef17..363e3ae25f6b 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c | |||
| @@ -354,7 +354,7 @@ static void vxfs_i_callback(struct rcu_head *head) | |||
| 354 | void | 354 | void |
| 355 | vxfs_evict_inode(struct inode *ip) | 355 | vxfs_evict_inode(struct inode *ip) |
| 356 | { | 356 | { |
| 357 | truncate_inode_pages(&ip->i_data, 0); | 357 | truncate_inode_pages_final(&ip->i_data); |
| 358 | clear_inode(ip); | 358 | clear_inode(ip); |
| 359 | call_rcu(&ip->i_rcu, vxfs_i_callback); | 359 | call_rcu(&ip->i_rcu, vxfs_i_callback); |
| 360 | } | 360 | } |
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 25d4099a4aea..99c7f0a37af4 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
| @@ -192,7 +192,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp) | |||
| 192 | * vxfs_lookup - lookup pathname component | 192 | * vxfs_lookup - lookup pathname component |
| 193 | * @dip: dir in which we lookup | 193 | * @dip: dir in which we lookup |
| 194 | * @dp: dentry we lookup | 194 | * @dp: dentry we lookup |
| 195 | * @nd: lookup nameidata | 195 | * @flags: lookup flags |
| 196 | * | 196 | * |
| 197 | * Description: | 197 | * Description: |
| 198 | * vxfs_lookup tries to lookup the pathname component described | 198 | * vxfs_lookup tries to lookup the pathname component described |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index e37eb274e492..7ca8c75d50d3 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
| @@ -124,6 +124,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) | |||
| 124 | 124 | ||
| 125 | static int vxfs_remount(struct super_block *sb, int *flags, char *data) | 125 | static int vxfs_remount(struct super_block *sb, int *flags, char *data) |
| 126 | { | 126 | { |
| 127 | sync_filesystem(sb); | ||
| 127 | *flags |= MS_RDONLY; | 128 | *flags |= MS_RDONLY; |
| 128 | return 0; | 129 | return 0; |
| 129 | } | 130 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d754e3cf99a8..be568b7311d6 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -89,16 +89,31 @@ static inline struct inode *wb_inode(struct list_head *head) | |||
| 89 | #define CREATE_TRACE_POINTS | 89 | #define CREATE_TRACE_POINTS |
| 90 | #include <trace/events/writeback.h> | 90 | #include <trace/events/writeback.h> |
| 91 | 91 | ||
| 92 | EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage); | ||
| 93 | |||
| 94 | static void bdi_wakeup_thread(struct backing_dev_info *bdi) | ||
| 95 | { | ||
| 96 | spin_lock_bh(&bdi->wb_lock); | ||
| 97 | if (test_bit(BDI_registered, &bdi->state)) | ||
| 98 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | ||
| 99 | spin_unlock_bh(&bdi->wb_lock); | ||
| 100 | } | ||
| 101 | |||
| 92 | static void bdi_queue_work(struct backing_dev_info *bdi, | 102 | static void bdi_queue_work(struct backing_dev_info *bdi, |
| 93 | struct wb_writeback_work *work) | 103 | struct wb_writeback_work *work) |
| 94 | { | 104 | { |
| 95 | trace_writeback_queue(bdi, work); | 105 | trace_writeback_queue(bdi, work); |
| 96 | 106 | ||
| 97 | spin_lock_bh(&bdi->wb_lock); | 107 | spin_lock_bh(&bdi->wb_lock); |
| 108 | if (!test_bit(BDI_registered, &bdi->state)) { | ||
| 109 | if (work->done) | ||
| 110 | complete(work->done); | ||
| 111 | goto out_unlock; | ||
| 112 | } | ||
| 98 | list_add_tail(&work->list, &bdi->work_list); | 113 | list_add_tail(&work->list, &bdi->work_list); |
| 99 | spin_unlock_bh(&bdi->wb_lock); | ||
| 100 | |||
| 101 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | 114 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
| 115 | out_unlock: | ||
| 116 | spin_unlock_bh(&bdi->wb_lock); | ||
| 102 | } | 117 | } |
| 103 | 118 | ||
| 104 | static void | 119 | static void |
| @@ -114,7 +129,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
| 114 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 129 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
| 115 | if (!work) { | 130 | if (!work) { |
| 116 | trace_writeback_nowork(bdi); | 131 | trace_writeback_nowork(bdi); |
| 117 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | 132 | bdi_wakeup_thread(bdi); |
| 118 | return; | 133 | return; |
| 119 | } | 134 | } |
| 120 | 135 | ||
| @@ -161,7 +176,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
| 161 | * writeback as soon as there is no other work to do. | 176 | * writeback as soon as there is no other work to do. |
| 162 | */ | 177 | */ |
| 163 | trace_writeback_wake_background(bdi); | 178 | trace_writeback_wake_background(bdi); |
| 164 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | 179 | bdi_wakeup_thread(bdi); |
| 165 | } | 180 | } |
| 166 | 181 | ||
| 167 | /* | 182 | /* |
| @@ -1017,7 +1032,7 @@ void bdi_writeback_workfn(struct work_struct *work) | |||
| 1017 | current->flags |= PF_SWAPWRITE; | 1032 | current->flags |= PF_SWAPWRITE; |
| 1018 | 1033 | ||
| 1019 | if (likely(!current_is_workqueue_rescuer() || | 1034 | if (likely(!current_is_workqueue_rescuer() || |
| 1020 | list_empty(&bdi->bdi_list))) { | 1035 | !test_bit(BDI_registered, &bdi->state))) { |
| 1021 | /* | 1036 | /* |
| 1022 | * The normal path. Keep writing back @bdi until its | 1037 | * The normal path. Keep writing back @bdi until its |
| 1023 | * work_list is empty. Note that this path is also taken | 1038 | * work_list is empty. Note that this path is also taken |
| @@ -1039,10 +1054,10 @@ void bdi_writeback_workfn(struct work_struct *work) | |||
| 1039 | trace_writeback_pages_written(pages_written); | 1054 | trace_writeback_pages_written(pages_written); |
| 1040 | } | 1055 | } |
| 1041 | 1056 | ||
| 1042 | if (!list_empty(&bdi->work_list) || | 1057 | if (!list_empty(&bdi->work_list)) |
| 1043 | (wb_has_dirty_io(wb) && dirty_writeback_interval)) | 1058 | mod_delayed_work(bdi_wq, &wb->dwork, 0); |
| 1044 | queue_delayed_work(bdi_wq, &wb->dwork, | 1059 | else if (wb_has_dirty_io(wb) && dirty_writeback_interval) |
| 1045 | msecs_to_jiffies(dirty_writeback_interval * 10)); | 1060 | bdi_wakeup_thread_delayed(bdi); |
| 1046 | 1061 | ||
| 1047 | current->flags &= ~PF_SWAPWRITE; | 1062 | current->flags &= ~PF_SWAPWRITE; |
| 1048 | } | 1063 | } |
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b96a49b37d66..13b691a8a7d2 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
| @@ -95,7 +95,7 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, | |||
| 95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; | 95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; |
| 96 | struct fuse_io_priv io = { .async = 0, .file = file }; | 96 | struct fuse_io_priv io = { .async = 0, .file = file }; |
| 97 | 97 | ||
| 98 | return fuse_direct_io(&io, &iov, 1, count, &pos, 0); | 98 | return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE); |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | static ssize_t cuse_write(struct file *file, const char __user *buf, | 101 | static ssize_t cuse_write(struct file *file, const char __user *buf, |
| @@ -109,7 +109,8 @@ static ssize_t cuse_write(struct file *file, const char __user *buf, | |||
| 109 | * No locking or generic_write_checks(), the server is | 109 | * No locking or generic_write_checks(), the server is |
| 110 | * responsible for locking and sanity checks. | 110 | * responsible for locking and sanity checks. |
| 111 | */ | 111 | */ |
| 112 | return fuse_direct_io(&io, &iov, 1, count, &pos, 1); | 112 | return fuse_direct_io(&io, &iov, 1, count, &pos, |
| 113 | FUSE_DIO_WRITE | FUSE_DIO_CUSE); | ||
| 113 | } | 114 | } |
| 114 | 115 | ||
| 115 | static int cuse_open(struct inode *inode, struct file *file) | 116 | static int cuse_open(struct inode *inode, struct file *file) |
| @@ -568,7 +569,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev, | |||
| 568 | 569 | ||
| 569 | return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); | 570 | return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); |
| 570 | } | 571 | } |
| 571 | static DEVICE_ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL); | 572 | static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL); |
| 572 | 573 | ||
| 573 | static ssize_t cuse_class_abort_store(struct device *dev, | 574 | static ssize_t cuse_class_abort_store(struct device *dev, |
| 574 | struct device_attribute *attr, | 575 | struct device_attribute *attr, |
| @@ -579,7 +580,7 @@ static ssize_t cuse_class_abort_store(struct device *dev, | |||
| 579 | fuse_abort_conn(&cc->fc); | 580 | fuse_abort_conn(&cc->fc); |
| 580 | return count; | 581 | return count; |
| 581 | } | 582 | } |
| 582 | static DEVICE_ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store); | 583 | static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store); |
| 583 | 584 | ||
| 584 | static struct attribute *cuse_class_dev_attrs[] = { | 585 | static struct attribute *cuse_class_dev_attrs[] = { |
| 585 | &dev_attr_waiting.attr, | 586 | &dev_attr_waiting.attr, |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 1d1292c581c3..5b4e035b364c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
| @@ -839,6 +839,14 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, | |||
| 839 | struct kstat *stat) | 839 | struct kstat *stat) |
| 840 | { | 840 | { |
| 841 | unsigned int blkbits; | 841 | unsigned int blkbits; |
| 842 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
| 843 | |||
| 844 | /* see the comment in fuse_change_attributes() */ | ||
| 845 | if (fc->writeback_cache && S_ISREG(inode->i_mode)) { | ||
| 846 | attr->size = i_size_read(inode); | ||
| 847 | attr->mtime = inode->i_mtime.tv_sec; | ||
| 848 | attr->mtimensec = inode->i_mtime.tv_nsec; | ||
| 849 | } | ||
| 842 | 850 | ||
| 843 | stat->dev = inode->i_sb->s_dev; | 851 | stat->dev = inode->i_sb->s_dev; |
| 844 | stat->ino = attr->ino; | 852 | stat->ino = attr->ino; |
| @@ -1477,12 +1485,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, | |||
| 1477 | FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); | 1485 | FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); |
| 1478 | } | 1486 | } |
| 1479 | 1487 | ||
| 1480 | static bool update_mtime(unsigned ivalid) | 1488 | static bool update_mtime(unsigned ivalid, bool trust_local_mtime) |
| 1481 | { | 1489 | { |
| 1482 | /* Always update if mtime is explicitly set */ | 1490 | /* Always update if mtime is explicitly set */ |
| 1483 | if (ivalid & ATTR_MTIME_SET) | 1491 | if (ivalid & ATTR_MTIME_SET) |
| 1484 | return true; | 1492 | return true; |
| 1485 | 1493 | ||
| 1494 | /* Or if kernel i_mtime is the official one */ | ||
| 1495 | if (trust_local_mtime) | ||
| 1496 | return true; | ||
| 1497 | |||
| 1486 | /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ | 1498 | /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ |
| 1487 | if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) | 1499 | if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) |
| 1488 | return false; | 1500 | return false; |
| @@ -1491,7 +1503,8 @@ static bool update_mtime(unsigned ivalid) | |||
| 1491 | return true; | 1503 | return true; |
| 1492 | } | 1504 | } |
| 1493 | 1505 | ||
| 1494 | static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) | 1506 | static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, |
| 1507 | bool trust_local_mtime) | ||
| 1495 | { | 1508 | { |
| 1496 | unsigned ivalid = iattr->ia_valid; | 1509 | unsigned ivalid = iattr->ia_valid; |
| 1497 | 1510 | ||
| @@ -1510,11 +1523,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) | |||
| 1510 | if (!(ivalid & ATTR_ATIME_SET)) | 1523 | if (!(ivalid & ATTR_ATIME_SET)) |
| 1511 | arg->valid |= FATTR_ATIME_NOW; | 1524 | arg->valid |= FATTR_ATIME_NOW; |
| 1512 | } | 1525 | } |
| 1513 | if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { | 1526 | if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) { |
| 1514 | arg->valid |= FATTR_MTIME; | 1527 | arg->valid |= FATTR_MTIME; |
| 1515 | arg->mtime = iattr->ia_mtime.tv_sec; | 1528 | arg->mtime = iattr->ia_mtime.tv_sec; |
| 1516 | arg->mtimensec = iattr->ia_mtime.tv_nsec; | 1529 | arg->mtimensec = iattr->ia_mtime.tv_nsec; |
| 1517 | if (!(ivalid & ATTR_MTIME_SET)) | 1530 | if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime) |
| 1518 | arg->valid |= FATTR_MTIME_NOW; | 1531 | arg->valid |= FATTR_MTIME_NOW; |
| 1519 | } | 1532 | } |
| 1520 | } | 1533 | } |
| @@ -1563,6 +1576,63 @@ void fuse_release_nowrite(struct inode *inode) | |||
| 1563 | spin_unlock(&fc->lock); | 1576 | spin_unlock(&fc->lock); |
| 1564 | } | 1577 | } |
| 1565 | 1578 | ||
| 1579 | static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, | ||
| 1580 | struct inode *inode, | ||
| 1581 | struct fuse_setattr_in *inarg_p, | ||
| 1582 | struct fuse_attr_out *outarg_p) | ||
| 1583 | { | ||
| 1584 | req->in.h.opcode = FUSE_SETATTR; | ||
| 1585 | req->in.h.nodeid = get_node_id(inode); | ||
| 1586 | req->in.numargs = 1; | ||
| 1587 | req->in.args[0].size = sizeof(*inarg_p); | ||
| 1588 | req->in.args[0].value = inarg_p; | ||
| 1589 | req->out.numargs = 1; | ||
| 1590 | if (fc->minor < 9) | ||
| 1591 | req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; | ||
| 1592 | else | ||
| 1593 | req->out.args[0].size = sizeof(*outarg_p); | ||
| 1594 | req->out.args[0].value = outarg_p; | ||
| 1595 | } | ||
| 1596 | |||
| 1597 | /* | ||
| 1598 | * Flush inode->i_mtime to the server | ||
| 1599 | */ | ||
| 1600 | int fuse_flush_mtime(struct file *file, bool nofail) | ||
| 1601 | { | ||
| 1602 | struct inode *inode = file->f_mapping->host; | ||
| 1603 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
| 1604 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
| 1605 | struct fuse_req *req = NULL; | ||
| 1606 | struct fuse_setattr_in inarg; | ||
| 1607 | struct fuse_attr_out outarg; | ||
| 1608 | int err; | ||
| 1609 | |||
| 1610 | if (nofail) { | ||
| 1611 | req = fuse_get_req_nofail_nopages(fc, file); | ||
| 1612 | } else { | ||
| 1613 | req = fuse_get_req_nopages(fc); | ||
| 1614 | if (IS_ERR(req)) | ||
| 1615 | return PTR_ERR(req); | ||
| 1616 | } | ||
| 1617 | |||
| 1618 | memset(&inarg, 0, sizeof(inarg)); | ||
| 1619 | memset(&outarg, 0, sizeof(outarg)); | ||
| 1620 | |||
| 1621 | inarg.valid |= FATTR_MTIME; | ||
| 1622 | inarg.mtime = inode->i_mtime.tv_sec; | ||
| 1623 | inarg.mtimensec = inode->i_mtime.tv_nsec; | ||
| 1624 | |||
| 1625 | fuse_setattr_fill(fc, req, inode, &inarg, &outarg); | ||
| 1626 | fuse_request_send(fc, req); | ||
| 1627 | err = req->out.h.error; | ||
| 1628 | fuse_put_request(fc, req); | ||
| 1629 | |||
| 1630 | if (!err) | ||
| 1631 | clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); | ||
| 1632 | |||
| 1633 | return err; | ||
| 1634 | } | ||
| 1635 | |||
| 1566 | /* | 1636 | /* |
| 1567 | * Set attributes, and at the same time refresh them. | 1637 | * Set attributes, and at the same time refresh them. |
| 1568 | * | 1638 | * |
| @@ -1580,8 +1650,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
| 1580 | struct fuse_setattr_in inarg; | 1650 | struct fuse_setattr_in inarg; |
| 1581 | struct fuse_attr_out outarg; | 1651 | struct fuse_attr_out outarg; |
| 1582 | bool is_truncate = false; | 1652 | bool is_truncate = false; |
| 1653 | bool is_wb = fc->writeback_cache; | ||
| 1583 | loff_t oldsize; | 1654 | loff_t oldsize; |
| 1584 | int err; | 1655 | int err; |
| 1656 | bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); | ||
| 1585 | 1657 | ||
| 1586 | if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) | 1658 | if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) |
| 1587 | attr->ia_valid |= ATTR_FORCE; | 1659 | attr->ia_valid |= ATTR_FORCE; |
| @@ -1610,7 +1682,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
| 1610 | 1682 | ||
| 1611 | memset(&inarg, 0, sizeof(inarg)); | 1683 | memset(&inarg, 0, sizeof(inarg)); |
| 1612 | memset(&outarg, 0, sizeof(outarg)); | 1684 | memset(&outarg, 0, sizeof(outarg)); |
| 1613 | iattr_to_fattr(attr, &inarg); | 1685 | iattr_to_fattr(attr, &inarg, trust_local_mtime); |
| 1614 | if (file) { | 1686 | if (file) { |
| 1615 | struct fuse_file *ff = file->private_data; | 1687 | struct fuse_file *ff = file->private_data; |
| 1616 | inarg.valid |= FATTR_FH; | 1688 | inarg.valid |= FATTR_FH; |
| @@ -1621,17 +1693,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
| 1621 | inarg.valid |= FATTR_LOCKOWNER; | 1693 | inarg.valid |= FATTR_LOCKOWNER; |
| 1622 | inarg.lock_owner = fuse_lock_owner_id(fc, current->files); | 1694 | inarg.lock_owner = fuse_lock_owner_id(fc, current->files); |
| 1623 | } | 1695 | } |
| 1624 | req->in.h.opcode = FUSE_SETATTR; | 1696 | fuse_setattr_fill(fc, req, inode, &inarg, &outarg); |
| 1625 | req->in.h.nodeid = get_node_id(inode); | ||
| 1626 | req->in.numargs = 1; | ||
| 1627 | req->in.args[0].size = sizeof(inarg); | ||
| 1628 | req->in.args[0].value = &inarg; | ||
| 1629 | req->out.numargs = 1; | ||
| 1630 | if (fc->minor < 9) | ||
| 1631 | req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; | ||
| 1632 | else | ||
| 1633 | req->out.args[0].size = sizeof(outarg); | ||
| 1634 | req->out.args[0].value = &outarg; | ||
| 1635 | fuse_request_send(fc, req); | 1697 | fuse_request_send(fc, req); |
| 1636 | err = req->out.h.error; | 1698 | err = req->out.h.error; |
| 1637 | fuse_put_request(fc, req); | 1699 | fuse_put_request(fc, req); |
| @@ -1648,10 +1710,18 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
| 1648 | } | 1710 | } |
| 1649 | 1711 | ||
| 1650 | spin_lock(&fc->lock); | 1712 | spin_lock(&fc->lock); |
| 1713 | /* the kernel maintains i_mtime locally */ | ||
| 1714 | if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { | ||
| 1715 | inode->i_mtime = attr->ia_mtime; | ||
| 1716 | clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); | ||
| 1717 | } | ||
| 1718 | |||
| 1651 | fuse_change_attributes_common(inode, &outarg.attr, | 1719 | fuse_change_attributes_common(inode, &outarg.attr, |
| 1652 | attr_timeout(&outarg)); | 1720 | attr_timeout(&outarg)); |
| 1653 | oldsize = inode->i_size; | 1721 | oldsize = inode->i_size; |
| 1654 | i_size_write(inode, outarg.attr.size); | 1722 | /* see the comment in fuse_change_attributes() */ |
| 1723 | if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) | ||
| 1724 | i_size_write(inode, outarg.attr.size); | ||
| 1655 | 1725 | ||
| 1656 | if (is_truncate) { | 1726 | if (is_truncate) { |
| 1657 | /* NOTE: this may release/reacquire fc->lock */ | 1727 | /* NOTE: this may release/reacquire fc->lock */ |
| @@ -1663,7 +1733,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
| 1663 | * Only call invalidate_inode_pages2() after removing | 1733 | * Only call invalidate_inode_pages2() after removing |
| 1664 | * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. | 1734 | * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. |
| 1665 | */ | 1735 | */ |
| 1666 | if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { | 1736 | if ((is_truncate || !is_wb) && |
| 1737 | S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { | ||
| 1667 | truncate_pagecache(inode, outarg.attr.size); | 1738 | truncate_pagecache(inode, outarg.attr.size); |
| 1668 | invalidate_inode_pages2(inode->i_mapping); | 1739 | invalidate_inode_pages2(inode->i_mapping); |
| 1669 | } | 1740 | } |
| @@ -1875,6 +1946,17 @@ static int fuse_removexattr(struct dentry *entry, const char *name) | |||
| 1875 | return err; | 1946 | return err; |
| 1876 | } | 1947 | } |
| 1877 | 1948 | ||
| 1949 | static int fuse_update_time(struct inode *inode, struct timespec *now, | ||
| 1950 | int flags) | ||
| 1951 | { | ||
| 1952 | if (flags & S_MTIME) { | ||
| 1953 | inode->i_mtime = *now; | ||
| 1954 | set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state); | ||
| 1955 | BUG_ON(!S_ISREG(inode->i_mode)); | ||
| 1956 | } | ||
| 1957 | return 0; | ||
| 1958 | } | ||
| 1959 | |||
| 1878 | static const struct inode_operations fuse_dir_inode_operations = { | 1960 | static const struct inode_operations fuse_dir_inode_operations = { |
| 1879 | .lookup = fuse_lookup, | 1961 | .lookup = fuse_lookup, |
| 1880 | .mkdir = fuse_mkdir, | 1962 | .mkdir = fuse_mkdir, |
| @@ -1914,6 +1996,7 @@ static const struct inode_operations fuse_common_inode_operations = { | |||
| 1914 | .getxattr = fuse_getxattr, | 1996 | .getxattr = fuse_getxattr, |
| 1915 | .listxattr = fuse_listxattr, | 1997 | .listxattr = fuse_listxattr, |
| 1916 | .removexattr = fuse_removexattr, | 1998 | .removexattr = fuse_removexattr, |
| 1999 | .update_time = fuse_update_time, | ||
| 1917 | }; | 2000 | }; |
| 1918 | 2001 | ||
| 1919 | static const struct inode_operations fuse_symlink_inode_operations = { | 2002 | static const struct inode_operations fuse_symlink_inode_operations = { |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 77bcc303c3ae..65df7d8be4f5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
| @@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | |||
| 188 | } | 188 | } |
| 189 | EXPORT_SYMBOL_GPL(fuse_do_open); | 189 | EXPORT_SYMBOL_GPL(fuse_do_open); |
| 190 | 190 | ||
| 191 | static void fuse_link_write_file(struct file *file) | ||
| 192 | { | ||
| 193 | struct inode *inode = file_inode(file); | ||
| 194 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
| 195 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
| 196 | struct fuse_file *ff = file->private_data; | ||
| 197 | /* | ||
| 198 | * file may be written through mmap, so chain it onto the | ||
| 199 | * inodes's write_file list | ||
| 200 | */ | ||
| 201 | spin_lock(&fc->lock); | ||
| 202 | if (list_empty(&ff->write_entry)) | ||
| 203 | list_add(&ff->write_entry, &fi->write_files); | ||
| 204 | spin_unlock(&fc->lock); | ||
| 205 | } | ||
| 206 | |||
| 191 | void fuse_finish_open(struct inode *inode, struct file *file) | 207 | void fuse_finish_open(struct inode *inode, struct file *file) |
| 192 | { | 208 | { |
| 193 | struct fuse_file *ff = file->private_data; | 209 | struct fuse_file *ff = file->private_data; |
| @@ -208,6 +224,8 @@ void fuse_finish_open(struct inode *inode, struct file *file) | |||
| 208 | spin_unlock(&fc->lock); | 224 | spin_unlock(&fc->lock); |
| 209 | fuse_invalidate_attr(inode); | 225 | fuse_invalidate_attr(inode); |
| 210 | } | 226 | } |
| 227 | if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) | ||
| 228 | fuse_link_write_file(file); | ||
| 211 | } | 229 | } |
| 212 | 230 | ||
| 213 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir) | 231 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir) |
| @@ -292,6 +310,15 @@ static int fuse_open(struct inode *inode, struct file *file) | |||
| 292 | 310 | ||
| 293 | static int fuse_release(struct inode *inode, struct file *file) | 311 | static int fuse_release(struct inode *inode, struct file *file) |
| 294 | { | 312 | { |
| 313 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
| 314 | |||
| 315 | /* see fuse_vma_close() for !writeback_cache case */ | ||
| 316 | if (fc->writeback_cache) | ||
| 317 | filemap_write_and_wait(file->f_mapping); | ||
| 318 | |||
| 319 | if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) | ||
| 320 | fuse_flush_mtime(file, true); | ||
| 321 | |||
| 295 | fuse_release_common(file, FUSE_RELEASE); | 322 | fuse_release_common(file, FUSE_RELEASE); |
| 296 | 323 | ||
| 297 | /* return value is ignored by VFS */ | 324 | /* return value is ignored by VFS */ |
| @@ -333,12 +360,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) | |||
| 333 | } | 360 | } |
| 334 | 361 | ||
| 335 | /* | 362 | /* |
| 336 | * Check if page is under writeback | 363 | * Check if any page in a range is under writeback |
| 337 | * | 364 | * |
| 338 | * This is currently done by walking the list of writepage requests | 365 | * This is currently done by walking the list of writepage requests |
| 339 | * for the inode, which can be pretty inefficient. | 366 | * for the inode, which can be pretty inefficient. |
| 340 | */ | 367 | */ |
| 341 | static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | 368 | static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, |
| 369 | pgoff_t idx_to) | ||
| 342 | { | 370 | { |
| 343 | struct fuse_conn *fc = get_fuse_conn(inode); | 371 | struct fuse_conn *fc = get_fuse_conn(inode); |
| 344 | struct fuse_inode *fi = get_fuse_inode(inode); | 372 | struct fuse_inode *fi = get_fuse_inode(inode); |
| @@ -351,8 +379,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | |||
| 351 | 379 | ||
| 352 | BUG_ON(req->inode != inode); | 380 | BUG_ON(req->inode != inode); |
| 353 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; | 381 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; |
| 354 | if (curr_index <= index && | 382 | if (idx_from < curr_index + req->num_pages && |
| 355 | index < curr_index + req->num_pages) { | 383 | curr_index <= idx_to) { |
| 356 | found = true; | 384 | found = true; |
| 357 | break; | 385 | break; |
| 358 | } | 386 | } |
| @@ -362,6 +390,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | |||
| 362 | return found; | 390 | return found; |
| 363 | } | 391 | } |
| 364 | 392 | ||
| 393 | static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | ||
| 394 | { | ||
| 395 | return fuse_range_is_writeback(inode, index, index); | ||
| 396 | } | ||
| 397 | |||
| 365 | /* | 398 | /* |
| 366 | * Wait for page writeback to be completed. | 399 | * Wait for page writeback to be completed. |
| 367 | * | 400 | * |
| @@ -376,6 +409,21 @@ static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) | |||
| 376 | return 0; | 409 | return 0; |
| 377 | } | 410 | } |
| 378 | 411 | ||
| 412 | /* | ||
| 413 | * Wait for all pending writepages on the inode to finish. | ||
| 414 | * | ||
| 415 | * This is currently done by blocking further writes with FUSE_NOWRITE | ||
| 416 | * and waiting for all sent writes to complete. | ||
| 417 | * | ||
| 418 | * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage | ||
| 419 | * could conflict with truncation. | ||
| 420 | */ | ||
| 421 | static void fuse_sync_writes(struct inode *inode) | ||
| 422 | { | ||
| 423 | fuse_set_nowrite(inode); | ||
| 424 | fuse_release_nowrite(inode); | ||
| 425 | } | ||
| 426 | |||
| 379 | static int fuse_flush(struct file *file, fl_owner_t id) | 427 | static int fuse_flush(struct file *file, fl_owner_t id) |
| 380 | { | 428 | { |
| 381 | struct inode *inode = file_inode(file); | 429 | struct inode *inode = file_inode(file); |
| @@ -391,6 +439,14 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
| 391 | if (fc->no_flush) | 439 | if (fc->no_flush) |
| 392 | return 0; | 440 | return 0; |
| 393 | 441 | ||
| 442 | err = filemap_write_and_wait(file->f_mapping); | ||
| 443 | if (err) | ||
| 444 | return err; | ||
| 445 | |||
| 446 | mutex_lock(&inode->i_mutex); | ||
| 447 | fuse_sync_writes(inode); | ||
| 448 | mutex_unlock(&inode->i_mutex); | ||
| 449 | |||
| 394 | req = fuse_get_req_nofail_nopages(fc, file); | 450 | req = fuse_get_req_nofail_nopages(fc, file); |
| 395 | memset(&inarg, 0, sizeof(inarg)); | 451 | memset(&inarg, 0, sizeof(inarg)); |
| 396 | inarg.fh = ff->fh; | 452 | inarg.fh = ff->fh; |
| @@ -411,21 +467,6 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
| 411 | return err; | 467 | return err; |
| 412 | } | 468 | } |
| 413 | 469 | ||
| 414 | /* | ||
| 415 | * Wait for all pending writepages on the inode to finish. | ||
| 416 | * | ||
| 417 | * This is currently done by blocking further writes with FUSE_NOWRITE | ||
| 418 | * and waiting for all sent writes to complete. | ||
| 419 | * | ||
| 420 | * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage | ||
| 421 | * could conflict with truncation. | ||
| 422 | */ | ||
| 423 | static void fuse_sync_writes(struct inode *inode) | ||
| 424 | { | ||
| 425 | fuse_set_nowrite(inode); | ||
| 426 | fuse_release_nowrite(inode); | ||
| 427 | } | ||
| 428 | |||
| 429 | int fuse_fsync_common(struct file *file, loff_t start, loff_t end, | 470 | int fuse_fsync_common(struct file *file, loff_t start, loff_t end, |
| 430 | int datasync, int isdir) | 471 | int datasync, int isdir) |
| 431 | { | 472 | { |
| @@ -459,6 +500,12 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, | |||
| 459 | 500 | ||
| 460 | fuse_sync_writes(inode); | 501 | fuse_sync_writes(inode); |
| 461 | 502 | ||
| 503 | if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) { | ||
| 504 | int err = fuse_flush_mtime(file, false); | ||
| 505 | if (err) | ||
| 506 | goto out; | ||
| 507 | } | ||
| 508 | |||
| 462 | req = fuse_get_req_nopages(fc); | 509 | req = fuse_get_req_nopages(fc); |
| 463 | if (IS_ERR(req)) { | 510 | if (IS_ERR(req)) { |
| 464 | err = PTR_ERR(req); | 511 | err = PTR_ERR(req); |
| @@ -655,7 +702,33 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, | |||
| 655 | spin_unlock(&fc->lock); | 702 | spin_unlock(&fc->lock); |
| 656 | } | 703 | } |
| 657 | 704 | ||
| 658 | static int fuse_readpage(struct file *file, struct page *page) | 705 | static void fuse_short_read(struct fuse_req *req, struct inode *inode, |
| 706 | u64 attr_ver) | ||
| 707 | { | ||
| 708 | size_t num_read = req->out.args[0].size; | ||
| 709 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
| 710 | |||
| 711 | if (fc->writeback_cache) { | ||
| 712 | /* | ||
| 713 | * A hole in a file. Some data after the hole are in page cache, | ||
| 714 | * but have not reached the client fs yet. So, the hole is not | ||
| 715 | * present there. | ||
| 716 | */ | ||
| 717 | int i; | ||
| 718 | int start_idx = num_read >> PAGE_CACHE_SHIFT; | ||
| 719 | size_t off = num_read & (PAGE_CACHE_SIZE - 1); | ||
| 720 | |||
| 721 | for (i = start_idx; i < req->num_pages; i++) { | ||
| 722 | zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE); | ||
| 723 | off = 0; | ||
| 724 | } | ||
| 725 | } else { | ||
| 726 | loff_t pos = page_offset(req->pages[0]) + num_read; | ||
| 727 | fuse_read_update_size(inode, pos, attr_ver); | ||
| 728 | } | ||
| 729 | } | ||
| 730 | |||
| 731 | static int fuse_do_readpage(struct file *file, struct page *page) | ||
| 659 | { | 732 | { |
| 660 | struct fuse_io_priv io = { .async = 0, .file = file }; | 733 | struct fuse_io_priv io = { .async = 0, .file = file }; |
| 661 | struct inode *inode = page->mapping->host; | 734 | struct inode *inode = page->mapping->host; |
| @@ -667,10 +740,6 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
| 667 | u64 attr_ver; | 740 | u64 attr_ver; |
| 668 | int err; | 741 | int err; |
| 669 | 742 | ||
| 670 | err = -EIO; | ||
| 671 | if (is_bad_inode(inode)) | ||
| 672 | goto out; | ||
| 673 | |||
| 674 | /* | 743 | /* |
| 675 | * Page writeback can extend beyond the lifetime of the | 744 | * Page writeback can extend beyond the lifetime of the |
| 676 | * page-cache page, so make sure we read a properly synced | 745 | * page-cache page, so make sure we read a properly synced |
| @@ -679,9 +748,8 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
| 679 | fuse_wait_on_page_writeback(inode, page->index); | 748 | fuse_wait_on_page_writeback(inode, page->index); |
| 680 | 749 | ||
| 681 | req = fuse_get_req(fc, 1); | 750 | req = fuse_get_req(fc, 1); |
| 682 | err = PTR_ERR(req); | ||
| 683 | if (IS_ERR(req)) | 751 | if (IS_ERR(req)) |
| 684 | goto out; | 752 | return PTR_ERR(req); |
| 685 | 753 | ||
| 686 | attr_ver = fuse_get_attr_version(fc); | 754 | attr_ver = fuse_get_attr_version(fc); |
| 687 | 755 | ||
| @@ -692,18 +760,32 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
| 692 | req->page_descs[0].length = count; | 760 | req->page_descs[0].length = count; |
| 693 | num_read = fuse_send_read(req, &io, pos, count, NULL); | 761 | num_read = fuse_send_read(req, &io, pos, count, NULL); |
| 694 | err = req->out.h.error; | 762 | err = req->out.h.error; |
| 695 | fuse_put_request(fc, req); | ||
| 696 | 763 | ||
| 697 | if (!err) { | 764 | if (!err) { |
| 698 | /* | 765 | /* |
| 699 | * Short read means EOF. If file size is larger, truncate it | 766 | * Short read means EOF. If file size is larger, truncate it |
| 700 | */ | 767 | */ |
| 701 | if (num_read < count) | 768 | if (num_read < count) |
| 702 | fuse_read_update_size(inode, pos + num_read, attr_ver); | 769 | fuse_short_read(req, inode, attr_ver); |
| 703 | 770 | ||
| 704 | SetPageUptodate(page); | 771 | SetPageUptodate(page); |
| 705 | } | 772 | } |
| 706 | 773 | ||
| 774 | fuse_put_request(fc, req); | ||
| 775 | |||
| 776 | return err; | ||
| 777 | } | ||
| 778 | |||
| 779 | static int fuse_readpage(struct file *file, struct page *page) | ||
| 780 | { | ||
| 781 | struct inode *inode = page->mapping->host; | ||
| 782 | int err; | ||
| 783 | |||
| 784 | err = -EIO; | ||
| 785 | if (is_bad_inode(inode)) | ||
| 786 | goto out; | ||
| 787 | |||
| 788 | err = fuse_do_readpage(file, page); | ||
| 707 | fuse_invalidate_atime(inode); | 789 | fuse_invalidate_atime(inode); |
| 708 | out: | 790 | out: |
| 709 | unlock_page(page); | 791 | unlock_page(page); |
| @@ -726,13 +808,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | |||
| 726 | /* | 808 | /* |
| 727 | * Short read means EOF. If file size is larger, truncate it | 809 | * Short read means EOF. If file size is larger, truncate it |
| 728 | */ | 810 | */ |
| 729 | if (!req->out.h.error && num_read < count) { | 811 | if (!req->out.h.error && num_read < count) |
| 730 | loff_t pos; | 812 | fuse_short_read(req, inode, req->misc.read.attr_ver); |
| 731 | 813 | ||
| 732 | pos = page_offset(req->pages[0]) + num_read; | ||
| 733 | fuse_read_update_size(inode, pos, | ||
| 734 | req->misc.read.attr_ver); | ||
| 735 | } | ||
| 736 | fuse_invalidate_atime(inode); | 814 | fuse_invalidate_atime(inode); |
| 737 | } | 815 | } |
| 738 | 816 | ||
| @@ -922,16 +1000,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, | |||
| 922 | return req->misc.write.out.size; | 1000 | return req->misc.write.out.size; |
| 923 | } | 1001 | } |
| 924 | 1002 | ||
| 925 | void fuse_write_update_size(struct inode *inode, loff_t pos) | 1003 | bool fuse_write_update_size(struct inode *inode, loff_t pos) |
| 926 | { | 1004 | { |
| 927 | struct fuse_conn *fc = get_fuse_conn(inode); | 1005 | struct fuse_conn *fc = get_fuse_conn(inode); |
| 928 | struct fuse_inode *fi = get_fuse_inode(inode); | 1006 | struct fuse_inode *fi = get_fuse_inode(inode); |
| 1007 | bool ret = false; | ||
| 929 | 1008 | ||
| 930 | spin_lock(&fc->lock); | 1009 | spin_lock(&fc->lock); |
| 931 | fi->attr_version = ++fc->attr_version; | 1010 | fi->attr_version = ++fc->attr_version; |
| 932 | if (pos > inode->i_size) | 1011 | if (pos > inode->i_size) { |
| 933 | i_size_write(inode, pos); | 1012 | i_size_write(inode, pos); |
| 1013 | ret = true; | ||
| 1014 | } | ||
| 934 | spin_unlock(&fc->lock); | 1015 | spin_unlock(&fc->lock); |
| 1016 | |||
| 1017 | return ret; | ||
| 935 | } | 1018 | } |
| 936 | 1019 | ||
| 937 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, | 1020 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, |
| @@ -1116,6 +1199,15 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1116 | struct iov_iter i; | 1199 | struct iov_iter i; |
| 1117 | loff_t endbyte = 0; | 1200 | loff_t endbyte = 0; |
| 1118 | 1201 | ||
| 1202 | if (get_fuse_conn(inode)->writeback_cache) { | ||
| 1203 | /* Update size (EOF optimization) and mode (SUID clearing) */ | ||
| 1204 | err = fuse_update_attributes(mapping->host, NULL, file, NULL); | ||
| 1205 | if (err) | ||
| 1206 | return err; | ||
| 1207 | |||
| 1208 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
| 1209 | } | ||
| 1210 | |||
| 1119 | WARN_ON(iocb->ki_pos != pos); | 1211 | WARN_ON(iocb->ki_pos != pos); |
| 1120 | 1212 | ||
| 1121 | ocount = 0; | 1213 | ocount = 0; |
| @@ -1289,13 +1381,18 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p) | |||
| 1289 | 1381 | ||
| 1290 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 1382 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, |
| 1291 | unsigned long nr_segs, size_t count, loff_t *ppos, | 1383 | unsigned long nr_segs, size_t count, loff_t *ppos, |
| 1292 | int write) | 1384 | int flags) |
| 1293 | { | 1385 | { |
| 1386 | int write = flags & FUSE_DIO_WRITE; | ||
| 1387 | int cuse = flags & FUSE_DIO_CUSE; | ||
| 1294 | struct file *file = io->file; | 1388 | struct file *file = io->file; |
| 1389 | struct inode *inode = file->f_mapping->host; | ||
| 1295 | struct fuse_file *ff = file->private_data; | 1390 | struct fuse_file *ff = file->private_data; |
| 1296 | struct fuse_conn *fc = ff->fc; | 1391 | struct fuse_conn *fc = ff->fc; |
| 1297 | size_t nmax = write ? fc->max_write : fc->max_read; | 1392 | size_t nmax = write ? fc->max_write : fc->max_read; |
| 1298 | loff_t pos = *ppos; | 1393 | loff_t pos = *ppos; |
| 1394 | pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; | ||
| 1395 | pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; | ||
| 1299 | ssize_t res = 0; | 1396 | ssize_t res = 0; |
| 1300 | struct fuse_req *req; | 1397 | struct fuse_req *req; |
| 1301 | struct iov_iter ii; | 1398 | struct iov_iter ii; |
| @@ -1309,6 +1406,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | |||
| 1309 | if (IS_ERR(req)) | 1406 | if (IS_ERR(req)) |
| 1310 | return PTR_ERR(req); | 1407 | return PTR_ERR(req); |
| 1311 | 1408 | ||
| 1409 | if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { | ||
| 1410 | if (!write) | ||
| 1411 | mutex_lock(&inode->i_mutex); | ||
| 1412 | fuse_sync_writes(inode); | ||
| 1413 | if (!write) | ||
| 1414 | mutex_unlock(&inode->i_mutex); | ||
| 1415 | } | ||
| 1416 | |||
| 1312 | while (count) { | 1417 | while (count) { |
| 1313 | size_t nres; | 1418 | size_t nres; |
| 1314 | fl_owner_t owner = current->files; | 1419 | fl_owner_t owner = current->files; |
| @@ -1397,7 +1502,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io, | |||
| 1397 | 1502 | ||
| 1398 | res = generic_write_checks(file, ppos, &count, 0); | 1503 | res = generic_write_checks(file, ppos, &count, 0); |
| 1399 | if (!res) | 1504 | if (!res) |
| 1400 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); | 1505 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, |
| 1506 | FUSE_DIO_WRITE); | ||
| 1401 | 1507 | ||
| 1402 | fuse_invalidate_attr(inode); | 1508 | fuse_invalidate_attr(inode); |
| 1403 | 1509 | ||
| @@ -1885,6 +1991,77 @@ out: | |||
| 1885 | return err; | 1991 | return err; |
| 1886 | } | 1992 | } |
| 1887 | 1993 | ||
| 1994 | /* | ||
| 1995 | * It's worthy to make sure that space is reserved on disk for the write, | ||
| 1996 | * but how to implement it without killing performance need more thinking. | ||
| 1997 | */ | ||
| 1998 | static int fuse_write_begin(struct file *file, struct address_space *mapping, | ||
| 1999 | loff_t pos, unsigned len, unsigned flags, | ||
| 2000 | struct page **pagep, void **fsdata) | ||
| 2001 | { | ||
| 2002 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
| 2003 | struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode); | ||
| 2004 | struct page *page; | ||
| 2005 | loff_t fsize; | ||
| 2006 | int err = -ENOMEM; | ||
| 2007 | |||
| 2008 | WARN_ON(!fc->writeback_cache); | ||
| 2009 | |||
| 2010 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
| 2011 | if (!page) | ||
| 2012 | goto error; | ||
| 2013 | |||
| 2014 | fuse_wait_on_page_writeback(mapping->host, page->index); | ||
| 2015 | |||
| 2016 | if (PageUptodate(page) || len == PAGE_CACHE_SIZE) | ||
| 2017 | goto success; | ||
| 2018 | /* | ||
| 2019 | * Check if the start this page comes after the end of file, in which | ||
| 2020 | * case the readpage can be optimized away. | ||
| 2021 | */ | ||
| 2022 | fsize = i_size_read(mapping->host); | ||
| 2023 | if (fsize <= (pos & PAGE_CACHE_MASK)) { | ||
| 2024 | size_t off = pos & ~PAGE_CACHE_MASK; | ||
| 2025 | if (off) | ||
| 2026 | zero_user_segment(page, 0, off); | ||
| 2027 | goto success; | ||
| 2028 | } | ||
| 2029 | err = fuse_do_readpage(file, page); | ||
| 2030 | if (err) | ||
| 2031 | goto cleanup; | ||
| 2032 | success: | ||
| 2033 | *pagep = page; | ||
| 2034 | return 0; | ||
| 2035 | |||
| 2036 | cleanup: | ||
| 2037 | unlock_page(page); | ||
| 2038 | page_cache_release(page); | ||
| 2039 | error: | ||
| 2040 | return err; | ||
| 2041 | } | ||
| 2042 | |||
| 2043 | static int fuse_write_end(struct file *file, struct address_space *mapping, | ||
| 2044 | loff_t pos, unsigned len, unsigned copied, | ||
| 2045 | struct page *page, void *fsdata) | ||
| 2046 | { | ||
| 2047 | struct inode *inode = page->mapping->host; | ||
| 2048 | |||
| 2049 | if (!PageUptodate(page)) { | ||
| 2050 | /* Zero any unwritten bytes at the end of the page */ | ||
| 2051 | size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK; | ||
| 2052 | if (endoff) | ||
| 2053 | zero_user_segment(page, endoff, PAGE_CACHE_SIZE); | ||
| 2054 | SetPageUptodate(page); | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | fuse_write_update_size(inode, pos + copied); | ||
| 2058 | set_page_dirty(page); | ||
| 2059 | unlock_page(page); | ||
| 2060 | page_cache_release(page); | ||
| 2061 | |||
| 2062 | return copied; | ||
| 2063 | } | ||
| 2064 | |||
| 1888 | static int fuse_launder_page(struct page *page) | 2065 | static int fuse_launder_page(struct page *page) |
| 1889 | { | 2066 | { |
| 1890 | int err = 0; | 2067 | int err = 0; |
| @@ -1946,20 +2123,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = { | |||
| 1946 | 2123 | ||
| 1947 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | 2124 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) |
| 1948 | { | 2125 | { |
| 1949 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { | 2126 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) |
| 1950 | struct inode *inode = file_inode(file); | 2127 | fuse_link_write_file(file); |
| 1951 | struct fuse_conn *fc = get_fuse_conn(inode); | 2128 | |
| 1952 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
| 1953 | struct fuse_file *ff = file->private_data; | ||
| 1954 | /* | ||
| 1955 | * file may be written through mmap, so chain it onto the | ||
| 1956 | * inodes's write_file list | ||
| 1957 | */ | ||
| 1958 | spin_lock(&fc->lock); | ||
| 1959 | if (list_empty(&ff->write_entry)) | ||
| 1960 | list_add(&ff->write_entry, &fi->write_files); | ||
| 1961 | spin_unlock(&fc->lock); | ||
| 1962 | } | ||
| 1963 | file_accessed(file); | 2129 | file_accessed(file); |
| 1964 | vma->vm_ops = &fuse_file_vm_ops; | 2130 | vma->vm_ops = &fuse_file_vm_ops; |
| 1965 | return 0; | 2131 | return 0; |
| @@ -2606,7 +2772,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc, | |||
| 2606 | { | 2772 | { |
| 2607 | spin_lock(&fc->lock); | 2773 | spin_lock(&fc->lock); |
| 2608 | if (RB_EMPTY_NODE(&ff->polled_node)) { | 2774 | if (RB_EMPTY_NODE(&ff->polled_node)) { |
| 2609 | struct rb_node **link, *parent; | 2775 | struct rb_node **link, *uninitialized_var(parent); |
| 2610 | 2776 | ||
| 2611 | link = fuse_find_polled_node(fc, ff->kh, &parent); | 2777 | link = fuse_find_polled_node(fc, ff->kh, &parent); |
| 2612 | BUG_ON(*link); | 2778 | BUG_ON(*link); |
| @@ -2850,8 +3016,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, | |||
| 2850 | goto out; | 3016 | goto out; |
| 2851 | 3017 | ||
| 2852 | /* we could have extended the file */ | 3018 | /* we could have extended the file */ |
| 2853 | if (!(mode & FALLOC_FL_KEEP_SIZE)) | 3019 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { |
| 2854 | fuse_write_update_size(inode, offset + length); | 3020 | bool changed = fuse_write_update_size(inode, offset + length); |
| 3021 | |||
| 3022 | if (changed && fc->writeback_cache) { | ||
| 3023 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
| 3024 | |||
| 3025 | inode->i_mtime = current_fs_time(inode->i_sb); | ||
| 3026 | set_bit(FUSE_I_MTIME_DIRTY, &fi->state); | ||
| 3027 | } | ||
| 3028 | } | ||
| 2855 | 3029 | ||
| 2856 | if (mode & FALLOC_FL_PUNCH_HOLE) | 3030 | if (mode & FALLOC_FL_PUNCH_HOLE) |
| 2857 | truncate_pagecache_range(inode, offset, offset + length - 1); | 3031 | truncate_pagecache_range(inode, offset, offset + length - 1); |
| @@ -2915,6 +3089,8 @@ static const struct address_space_operations fuse_file_aops = { | |||
| 2915 | .set_page_dirty = __set_page_dirty_nobuffers, | 3089 | .set_page_dirty = __set_page_dirty_nobuffers, |
| 2916 | .bmap = fuse_bmap, | 3090 | .bmap = fuse_bmap, |
| 2917 | .direct_IO = fuse_direct_IO, | 3091 | .direct_IO = fuse_direct_IO, |
| 3092 | .write_begin = fuse_write_begin, | ||
| 3093 | .write_end = fuse_write_end, | ||
| 2918 | }; | 3094 | }; |
| 2919 | 3095 | ||
| 2920 | void fuse_init_file_inode(struct inode *inode) | 3096 | void fuse_init_file_inode(struct inode *inode) |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2da5db2c8bdb..a257ed8ebee6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
| @@ -119,6 +119,8 @@ enum { | |||
| 119 | FUSE_I_INIT_RDPLUS, | 119 | FUSE_I_INIT_RDPLUS, |
| 120 | /** An operation changing file size is in progress */ | 120 | /** An operation changing file size is in progress */ |
| 121 | FUSE_I_SIZE_UNSTABLE, | 121 | FUSE_I_SIZE_UNSTABLE, |
| 122 | /** i_mtime has been updated locally; a flush to userspace needed */ | ||
| 123 | FUSE_I_MTIME_DIRTY, | ||
| 122 | }; | 124 | }; |
| 123 | 125 | ||
| 124 | struct fuse_conn; | 126 | struct fuse_conn; |
| @@ -480,6 +482,9 @@ struct fuse_conn { | |||
| 480 | /** Set if bdi is valid */ | 482 | /** Set if bdi is valid */ |
| 481 | unsigned bdi_initialized:1; | 483 | unsigned bdi_initialized:1; |
| 482 | 484 | ||
| 485 | /** write-back cache policy (default is write-through) */ | ||
| 486 | unsigned writeback_cache:1; | ||
| 487 | |||
| 483 | /* | 488 | /* |
| 484 | * The following bitfields are only for optimization purposes | 489 | * The following bitfields are only for optimization purposes |
| 485 | * and hence races in setting them will not cause malfunction | 490 | * and hence races in setting them will not cause malfunction |
| @@ -863,9 +868,20 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, | |||
| 863 | 868 | ||
| 864 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | 869 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
| 865 | bool isdir); | 870 | bool isdir); |
| 871 | |||
| 872 | /** | ||
| 873 | * fuse_direct_io() flags | ||
| 874 | */ | ||
| 875 | |||
| 876 | /** If set, it is WRITE; otherwise - READ */ | ||
| 877 | #define FUSE_DIO_WRITE (1 << 0) | ||
| 878 | |||
| 879 | /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ | ||
| 880 | #define FUSE_DIO_CUSE (1 << 1) | ||
| 881 | |||
| 866 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 882 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, |
| 867 | unsigned long nr_segs, size_t count, loff_t *ppos, | 883 | unsigned long nr_segs, size_t count, loff_t *ppos, |
| 868 | int write); | 884 | int flags); |
| 869 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | 885 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, |
| 870 | unsigned int flags); | 886 | unsigned int flags); |
| 871 | long fuse_ioctl_common(struct file *file, unsigned int cmd, | 887 | long fuse_ioctl_common(struct file *file, unsigned int cmd, |
| @@ -873,7 +889,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, | |||
| 873 | unsigned fuse_file_poll(struct file *file, poll_table *wait); | 889 | unsigned fuse_file_poll(struct file *file, poll_table *wait); |
| 874 | int fuse_dev_release(struct inode *inode, struct file *file); | 890 | int fuse_dev_release(struct inode *inode, struct file *file); |
| 875 | 891 | ||
| 876 | void fuse_write_update_size(struct inode *inode, loff_t pos); | 892 | bool fuse_write_update_size(struct inode *inode, loff_t pos); |
| 893 | |||
| 894 | int fuse_flush_mtime(struct file *file, bool nofail); | ||
| 877 | 895 | ||
| 878 | int fuse_do_setattr(struct inode *inode, struct iattr *attr, | 896 | int fuse_do_setattr(struct inode *inode, struct iattr *attr, |
| 879 | struct file *file); | 897 | struct file *file); |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d468643a68b2..8d611696fcad 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
| @@ -123,7 +123,7 @@ static void fuse_destroy_inode(struct inode *inode) | |||
| 123 | 123 | ||
| 124 | static void fuse_evict_inode(struct inode *inode) | 124 | static void fuse_evict_inode(struct inode *inode) |
| 125 | { | 125 | { |
| 126 | truncate_inode_pages(&inode->i_data, 0); | 126 | truncate_inode_pages_final(&inode->i_data); |
| 127 | clear_inode(inode); | 127 | clear_inode(inode); |
| 128 | if (inode->i_sb->s_flags & MS_ACTIVE) { | 128 | if (inode->i_sb->s_flags & MS_ACTIVE) { |
| 129 | struct fuse_conn *fc = get_fuse_conn(inode); | 129 | struct fuse_conn *fc = get_fuse_conn(inode); |
| @@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode) | |||
| 135 | 135 | ||
| 136 | static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) | 136 | static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) |
| 137 | { | 137 | { |
| 138 | sync_filesystem(sb); | ||
| 138 | if (*flags & MS_MANDLOCK) | 139 | if (*flags & MS_MANDLOCK) |
| 139 | return -EINVAL; | 140 | return -EINVAL; |
| 140 | 141 | ||
| @@ -170,8 +171,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, | |||
| 170 | inode->i_blocks = attr->blocks; | 171 | inode->i_blocks = attr->blocks; |
| 171 | inode->i_atime.tv_sec = attr->atime; | 172 | inode->i_atime.tv_sec = attr->atime; |
| 172 | inode->i_atime.tv_nsec = attr->atimensec; | 173 | inode->i_atime.tv_nsec = attr->atimensec; |
| 173 | inode->i_mtime.tv_sec = attr->mtime; | 174 | /* mtime from server may be stale due to local buffered write */ |
| 174 | inode->i_mtime.tv_nsec = attr->mtimensec; | 175 | if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { |
| 176 | inode->i_mtime.tv_sec = attr->mtime; | ||
| 177 | inode->i_mtime.tv_nsec = attr->mtimensec; | ||
| 178 | } | ||
| 175 | inode->i_ctime.tv_sec = attr->ctime; | 179 | inode->i_ctime.tv_sec = attr->ctime; |
| 176 | inode->i_ctime.tv_nsec = attr->ctimensec; | 180 | inode->i_ctime.tv_nsec = attr->ctimensec; |
| 177 | 181 | ||
| @@ -197,6 +201,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
| 197 | { | 201 | { |
| 198 | struct fuse_conn *fc = get_fuse_conn(inode); | 202 | struct fuse_conn *fc = get_fuse_conn(inode); |
| 199 | struct fuse_inode *fi = get_fuse_inode(inode); | 203 | struct fuse_inode *fi = get_fuse_inode(inode); |
| 204 | bool is_wb = fc->writeback_cache; | ||
| 200 | loff_t oldsize; | 205 | loff_t oldsize; |
| 201 | struct timespec old_mtime; | 206 | struct timespec old_mtime; |
| 202 | 207 | ||
| @@ -211,10 +216,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
| 211 | fuse_change_attributes_common(inode, attr, attr_valid); | 216 | fuse_change_attributes_common(inode, attr, attr_valid); |
| 212 | 217 | ||
| 213 | oldsize = inode->i_size; | 218 | oldsize = inode->i_size; |
| 214 | i_size_write(inode, attr->size); | 219 | /* |
| 220 | * In case of writeback_cache enabled, the cached writes beyond EOF | ||
| 221 | * extend local i_size without keeping userspace server in sync. So, | ||
| 222 | * attr->size coming from server can be stale. We cannot trust it. | ||
| 223 | */ | ||
| 224 | if (!is_wb || !S_ISREG(inode->i_mode)) | ||
| 225 | i_size_write(inode, attr->size); | ||
| 215 | spin_unlock(&fc->lock); | 226 | spin_unlock(&fc->lock); |
| 216 | 227 | ||
| 217 | if (S_ISREG(inode->i_mode)) { | 228 | if (!is_wb && S_ISREG(inode->i_mode)) { |
| 218 | bool inval = false; | 229 | bool inval = false; |
| 219 | 230 | ||
| 220 | if (oldsize != attr->size) { | 231 | if (oldsize != attr->size) { |
| @@ -243,6 +254,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) | |||
| 243 | { | 254 | { |
| 244 | inode->i_mode = attr->mode & S_IFMT; | 255 | inode->i_mode = attr->mode & S_IFMT; |
| 245 | inode->i_size = attr->size; | 256 | inode->i_size = attr->size; |
| 257 | inode->i_mtime.tv_sec = attr->mtime; | ||
| 258 | inode->i_mtime.tv_nsec = attr->mtimensec; | ||
| 246 | if (S_ISREG(inode->i_mode)) { | 259 | if (S_ISREG(inode->i_mode)) { |
| 247 | fuse_init_common(inode); | 260 | fuse_init_common(inode); |
| 248 | fuse_init_file_inode(inode); | 261 | fuse_init_file_inode(inode); |
| @@ -289,7 +302,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, | |||
| 289 | return NULL; | 302 | return NULL; |
| 290 | 303 | ||
| 291 | if ((inode->i_state & I_NEW)) { | 304 | if ((inode->i_state & I_NEW)) { |
| 292 | inode->i_flags |= S_NOATIME|S_NOCMTIME; | 305 | inode->i_flags |= S_NOATIME; |
| 306 | if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) | ||
| 307 | inode->i_flags |= S_NOCMTIME; | ||
| 293 | inode->i_generation = generation; | 308 | inode->i_generation = generation; |
| 294 | inode->i_data.backing_dev_info = &fc->bdi; | 309 | inode->i_data.backing_dev_info = &fc->bdi; |
| 295 | fuse_init_inode(inode, attr); | 310 | fuse_init_inode(inode, attr); |
| @@ -873,6 +888,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
| 873 | } | 888 | } |
| 874 | if (arg->flags & FUSE_ASYNC_DIO) | 889 | if (arg->flags & FUSE_ASYNC_DIO) |
| 875 | fc->async_dio = 1; | 890 | fc->async_dio = 1; |
| 891 | if (arg->flags & FUSE_WRITEBACK_CACHE) | ||
| 892 | fc->writeback_cache = 1; | ||
| 876 | } else { | 893 | } else { |
| 877 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; | 894 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; |
| 878 | fc->no_lock = 1; | 895 | fc->no_lock = 1; |
| @@ -900,7 +917,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) | |||
| 900 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | | 917 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | |
| 901 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | | 918 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | |
| 902 | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | | 919 | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | |
| 903 | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO; | 920 | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | |
| 921 | FUSE_WRITEBACK_CACHE; | ||
| 904 | req->in.h.opcode = FUSE_INIT; | 922 | req->in.h.opcode = FUSE_INIT; |
| 905 | req->in.numargs = 1; | 923 | req->in.numargs = 1; |
| 906 | req->in.args[0].size = sizeof(*arg); | 924 | req->in.args[0].size = sizeof(*arg); |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index ba9456685f47..3088e2a38e30 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
| @@ -64,18 +64,6 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) | |||
| 64 | return acl; | 64 | return acl; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | static int gfs2_set_mode(struct inode *inode, umode_t mode) | ||
| 68 | { | ||
| 69 | int error = 0; | ||
| 70 | |||
| 71 | if (mode != inode->i_mode) { | ||
| 72 | inode->i_mode = mode; | ||
| 73 | mark_inode_dirty(inode); | ||
| 74 | } | ||
| 75 | |||
| 76 | return error; | ||
| 77 | } | ||
| 78 | |||
| 79 | int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | 67 | int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) |
| 80 | { | 68 | { |
| 81 | int error; | 69 | int error; |
| @@ -85,8 +73,8 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
| 85 | 73 | ||
| 86 | BUG_ON(name == NULL); | 74 | BUG_ON(name == NULL); |
| 87 | 75 | ||
| 88 | if (acl->a_count > GFS2_ACL_MAX_ENTRIES) | 76 | if (acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) |
| 89 | return -EINVAL; | 77 | return -E2BIG; |
| 90 | 78 | ||
| 91 | if (type == ACL_TYPE_ACCESS) { | 79 | if (type == ACL_TYPE_ACCESS) { |
| 92 | umode_t mode = inode->i_mode; | 80 | umode_t mode = inode->i_mode; |
| @@ -98,9 +86,10 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
| 98 | if (error == 0) | 86 | if (error == 0) |
| 99 | acl = NULL; | 87 | acl = NULL; |
| 100 | 88 | ||
| 101 | error = gfs2_set_mode(inode, mode); | 89 | if (mode != inode->i_mode) { |
| 102 | if (error) | 90 | inode->i_mode = mode; |
| 103 | return error; | 91 | mark_inode_dirty(inode); |
| 92 | } | ||
| 104 | } | 93 | } |
| 105 | 94 | ||
| 106 | if (acl) { | 95 | if (acl) { |
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 301260c999ba..2d65ec4cd4be 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | 14 | ||
| 15 | #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" | 15 | #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" |
| 16 | #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" | 16 | #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" |
| 17 | #define GFS2_ACL_MAX_ENTRIES 25 | 17 | #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) |
| 18 | 18 | ||
| 19 | extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); | 19 | extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); |
| 20 | extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); | 20 | extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 49436fa7cd4f..ce62dcac90b6 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
| 22 | #include <linux/backing-dev.h> | 22 | #include <linux/backing-dev.h> |
| 23 | #include <linux/aio.h> | 23 | #include <linux/aio.h> |
| 24 | #include <trace/events/writeback.h> | ||
| 24 | 25 | ||
| 25 | #include "gfs2.h" | 26 | #include "gfs2.h" |
| 26 | #include "incore.h" | 27 | #include "incore.h" |
| @@ -230,13 +231,11 @@ static int gfs2_writepages(struct address_space *mapping, | |||
| 230 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, | 231 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, |
| 231 | struct writeback_control *wbc, | 232 | struct writeback_control *wbc, |
| 232 | struct pagevec *pvec, | 233 | struct pagevec *pvec, |
| 233 | int nr_pages, pgoff_t end) | 234 | int nr_pages, pgoff_t end, |
| 235 | pgoff_t *done_index) | ||
| 234 | { | 236 | { |
| 235 | struct inode *inode = mapping->host; | 237 | struct inode *inode = mapping->host; |
| 236 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 238 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
| 237 | loff_t i_size = i_size_read(inode); | ||
| 238 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 239 | unsigned offset = i_size & (PAGE_CACHE_SIZE-1); | ||
| 240 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); | 239 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); |
| 241 | int i; | 240 | int i; |
| 242 | int ret; | 241 | int ret; |
| @@ -248,40 +247,83 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, | |||
| 248 | for(i = 0; i < nr_pages; i++) { | 247 | for(i = 0; i < nr_pages; i++) { |
| 249 | struct page *page = pvec->pages[i]; | 248 | struct page *page = pvec->pages[i]; |
| 250 | 249 | ||
| 250 | /* | ||
| 251 | * At this point, the page may be truncated or | ||
| 252 | * invalidated (changing page->mapping to NULL), or | ||
| 253 | * even swizzled back from swapper_space to tmpfs file | ||
| 254 | * mapping. However, page->index will not change | ||
| 255 | * because we have a reference on the page. | ||
| 256 | */ | ||
| 257 | if (page->index > end) { | ||
| 258 | /* | ||
| 259 | * can't be range_cyclic (1st pass) because | ||
| 260 | * end == -1 in that case. | ||
| 261 | */ | ||
| 262 | ret = 1; | ||
| 263 | break; | ||
| 264 | } | ||
| 265 | |||
| 266 | *done_index = page->index; | ||
| 267 | |||
| 251 | lock_page(page); | 268 | lock_page(page); |
| 252 | 269 | ||
| 253 | if (unlikely(page->mapping != mapping)) { | 270 | if (unlikely(page->mapping != mapping)) { |
| 271 | continue_unlock: | ||
| 254 | unlock_page(page); | 272 | unlock_page(page); |
| 255 | continue; | 273 | continue; |
| 256 | } | 274 | } |
| 257 | 275 | ||
| 258 | if (!wbc->range_cyclic && page->index > end) { | 276 | if (!PageDirty(page)) { |
| 259 | ret = 1; | 277 | /* someone wrote it for us */ |
| 260 | unlock_page(page); | 278 | goto continue_unlock; |
| 261 | continue; | ||
| 262 | } | 279 | } |
| 263 | 280 | ||
| 264 | if (wbc->sync_mode != WB_SYNC_NONE) | 281 | if (PageWriteback(page)) { |
| 265 | wait_on_page_writeback(page); | 282 | if (wbc->sync_mode != WB_SYNC_NONE) |
| 266 | 283 | wait_on_page_writeback(page); | |
| 267 | if (PageWriteback(page) || | 284 | else |
| 268 | !clear_page_dirty_for_io(page)) { | 285 | goto continue_unlock; |
| 269 | unlock_page(page); | ||
| 270 | continue; | ||
| 271 | } | 286 | } |
| 272 | 287 | ||
| 273 | /* Is the page fully outside i_size? (truncate in progress) */ | 288 | BUG_ON(PageWriteback(page)); |
| 274 | if (page->index > end_index || (page->index == end_index && !offset)) { | 289 | if (!clear_page_dirty_for_io(page)) |
| 275 | page->mapping->a_ops->invalidatepage(page, 0, | 290 | goto continue_unlock; |
| 276 | PAGE_CACHE_SIZE); | 291 | |
| 277 | unlock_page(page); | 292 | trace_wbc_writepage(wbc, mapping->backing_dev_info); |
| 278 | continue; | ||
| 279 | } | ||
| 280 | 293 | ||
| 281 | ret = __gfs2_jdata_writepage(page, wbc); | 294 | ret = __gfs2_jdata_writepage(page, wbc); |
| 295 | if (unlikely(ret)) { | ||
| 296 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
| 297 | unlock_page(page); | ||
| 298 | ret = 0; | ||
| 299 | } else { | ||
| 300 | |||
| 301 | /* | ||
| 302 | * done_index is set past this page, | ||
| 303 | * so media errors will not choke | ||
| 304 | * background writeout for the entire | ||
| 305 | * file. This has consequences for | ||
| 306 | * range_cyclic semantics (ie. it may | ||
| 307 | * not be suitable for data integrity | ||
| 308 | * writeout). | ||
| 309 | */ | ||
| 310 | *done_index = page->index + 1; | ||
| 311 | ret = 1; | ||
| 312 | break; | ||
| 313 | } | ||
| 314 | } | ||
| 282 | 315 | ||
| 283 | if (ret || (--(wbc->nr_to_write) <= 0)) | 316 | /* |
| 317 | * We stop writing back only if we are not doing | ||
| 318 | * integrity sync. In case of integrity sync we have to | ||
| 319 | * keep going until we have written all the pages | ||
| 320 | * we tagged for writeback prior to entering this loop. | ||
| 321 | */ | ||
| 322 | if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { | ||
| 284 | ret = 1; | 323 | ret = 1; |
| 324 | break; | ||
| 325 | } | ||
| 326 | |||
| 285 | } | 327 | } |
| 286 | gfs2_trans_end(sdp); | 328 | gfs2_trans_end(sdp); |
| 287 | return ret; | 329 | return ret; |
| @@ -306,51 +348,69 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, | |||
| 306 | int done = 0; | 348 | int done = 0; |
| 307 | struct pagevec pvec; | 349 | struct pagevec pvec; |
| 308 | int nr_pages; | 350 | int nr_pages; |
| 351 | pgoff_t uninitialized_var(writeback_index); | ||
| 309 | pgoff_t index; | 352 | pgoff_t index; |
| 310 | pgoff_t end; | 353 | pgoff_t end; |
| 311 | int scanned = 0; | 354 | pgoff_t done_index; |
| 355 | int cycled; | ||
| 312 | int range_whole = 0; | 356 | int range_whole = 0; |
| 357 | int tag; | ||
| 313 | 358 | ||
| 314 | pagevec_init(&pvec, 0); | 359 | pagevec_init(&pvec, 0); |
| 315 | if (wbc->range_cyclic) { | 360 | if (wbc->range_cyclic) { |
| 316 | index = mapping->writeback_index; /* Start from prev offset */ | 361 | writeback_index = mapping->writeback_index; /* prev offset */ |
| 362 | index = writeback_index; | ||
| 363 | if (index == 0) | ||
| 364 | cycled = 1; | ||
| 365 | else | ||
| 366 | cycled = 0; | ||
| 317 | end = -1; | 367 | end = -1; |
| 318 | } else { | 368 | } else { |
| 319 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 369 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
| 320 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 370 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
| 321 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 371 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
| 322 | range_whole = 1; | 372 | range_whole = 1; |
| 323 | scanned = 1; | 373 | cycled = 1; /* ignore range_cyclic tests */ |
| 324 | } | 374 | } |
| 375 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | ||
| 376 | tag = PAGECACHE_TAG_TOWRITE; | ||
| 377 | else | ||
| 378 | tag = PAGECACHE_TAG_DIRTY; | ||
| 325 | 379 | ||
| 326 | retry: | 380 | retry: |
| 327 | while (!done && (index <= end) && | 381 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
| 328 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 382 | tag_pages_for_writeback(mapping, index, end); |
| 329 | PAGECACHE_TAG_DIRTY, | 383 | done_index = index; |
| 330 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 384 | while (!done && (index <= end)) { |
| 331 | scanned = 1; | 385 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
| 332 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); | 386 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
| 387 | if (nr_pages == 0) | ||
| 388 | break; | ||
| 389 | |||
| 390 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index); | ||
| 333 | if (ret) | 391 | if (ret) |
| 334 | done = 1; | 392 | done = 1; |
| 335 | if (ret > 0) | 393 | if (ret > 0) |
| 336 | ret = 0; | 394 | ret = 0; |
| 337 | |||
| 338 | pagevec_release(&pvec); | 395 | pagevec_release(&pvec); |
| 339 | cond_resched(); | 396 | cond_resched(); |
| 340 | } | 397 | } |
| 341 | 398 | ||
| 342 | if (!scanned && !done) { | 399 | if (!cycled && !done) { |
| 343 | /* | 400 | /* |
| 401 | * range_cyclic: | ||
| 344 | * We hit the last page and there is more work to be done: wrap | 402 | * We hit the last page and there is more work to be done: wrap |
| 345 | * back to the start of the file | 403 | * back to the start of the file |
| 346 | */ | 404 | */ |
| 347 | scanned = 1; | 405 | cycled = 1; |
| 348 | index = 0; | 406 | index = 0; |
| 407 | end = writeback_index - 1; | ||
| 349 | goto retry; | 408 | goto retry; |
| 350 | } | 409 | } |
| 351 | 410 | ||
| 352 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 411 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
| 353 | mapping->writeback_index = index; | 412 | mapping->writeback_index = done_index; |
| 413 | |||
| 354 | return ret; | 414 | return ret; |
| 355 | } | 415 | } |
| 356 | 416 | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index fe0500c0af7a..c62d4b9f51dc 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
| @@ -1328,6 +1328,121 @@ int gfs2_file_dealloc(struct gfs2_inode *ip) | |||
| 1328 | } | 1328 | } |
| 1329 | 1329 | ||
| 1330 | /** | 1330 | /** |
| 1331 | * gfs2_free_journal_extents - Free cached journal bmap info | ||
| 1332 | * @jd: The journal | ||
| 1333 | * | ||
| 1334 | */ | ||
| 1335 | |||
| 1336 | void gfs2_free_journal_extents(struct gfs2_jdesc *jd) | ||
| 1337 | { | ||
| 1338 | struct gfs2_journal_extent *jext; | ||
| 1339 | |||
| 1340 | while(!list_empty(&jd->extent_list)) { | ||
| 1341 | jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list); | ||
| 1342 | list_del(&jext->list); | ||
| 1343 | kfree(jext); | ||
| 1344 | } | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | /** | ||
| 1348 | * gfs2_add_jextent - Add or merge a new extent to extent cache | ||
| 1349 | * @jd: The journal descriptor | ||
| 1350 | * @lblock: The logical block at start of new extent | ||
| 1351 | * @pblock: The physical block at start of new extent | ||
| 1352 | * @blocks: Size of extent in fs blocks | ||
| 1353 | * | ||
| 1354 | * Returns: 0 on success or -ENOMEM | ||
| 1355 | */ | ||
| 1356 | |||
| 1357 | static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) | ||
| 1358 | { | ||
| 1359 | struct gfs2_journal_extent *jext; | ||
| 1360 | |||
| 1361 | if (!list_empty(&jd->extent_list)) { | ||
| 1362 | jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list); | ||
| 1363 | if ((jext->dblock + jext->blocks) == dblock) { | ||
| 1364 | jext->blocks += blocks; | ||
| 1365 | return 0; | ||
| 1366 | } | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); | ||
| 1370 | if (jext == NULL) | ||
| 1371 | return -ENOMEM; | ||
| 1372 | jext->dblock = dblock; | ||
| 1373 | jext->lblock = lblock; | ||
| 1374 | jext->blocks = blocks; | ||
| 1375 | list_add_tail(&jext->list, &jd->extent_list); | ||
| 1376 | jd->nr_extents++; | ||
| 1377 | return 0; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | /** | ||
| 1381 | * gfs2_map_journal_extents - Cache journal bmap info | ||
| 1382 | * @sdp: The super block | ||
| 1383 | * @jd: The journal to map | ||
| 1384 | * | ||
| 1385 | * Create a reusable "extent" mapping from all logical | ||
| 1386 | * blocks to all physical blocks for the given journal. This will save | ||
| 1387 | * us time when writing journal blocks. Most journals will have only one | ||
| 1388 | * extent that maps all their logical blocks. That's because gfs2.mkfs | ||
| 1389 | * arranges the journal blocks sequentially to maximize performance. | ||
| 1390 | * So the extent would map the first block for the entire file length. | ||
| 1391 | * However, gfs2_jadd can happen while file activity is happening, so | ||
| 1392 | * those journals may not be sequential. Less likely is the case where | ||
| 1393 | * the users created their own journals by mounting the metafs and | ||
| 1394 | * laying it out. But it's still possible. These journals might have | ||
| 1395 | * several extents. | ||
| 1396 | * | ||
| 1397 | * Returns: 0 on success, or error on failure | ||
| 1398 | */ | ||
| 1399 | |||
| 1400 | int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) | ||
| 1401 | { | ||
| 1402 | u64 lblock = 0; | ||
| 1403 | u64 lblock_stop; | ||
| 1404 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 1405 | struct buffer_head bh; | ||
| 1406 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; | ||
| 1407 | u64 size; | ||
| 1408 | int rc; | ||
| 1409 | |||
| 1410 | lblock_stop = i_size_read(jd->jd_inode) >> shift; | ||
| 1411 | size = (lblock_stop - lblock) << shift; | ||
| 1412 | jd->nr_extents = 0; | ||
| 1413 | WARN_ON(!list_empty(&jd->extent_list)); | ||
| 1414 | |||
| 1415 | do { | ||
| 1416 | bh.b_state = 0; | ||
| 1417 | bh.b_blocknr = 0; | ||
| 1418 | bh.b_size = size; | ||
| 1419 | rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); | ||
| 1420 | if (rc || !buffer_mapped(&bh)) | ||
| 1421 | goto fail; | ||
| 1422 | rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); | ||
| 1423 | if (rc) | ||
| 1424 | goto fail; | ||
| 1425 | size -= bh.b_size; | ||
| 1426 | lblock += (bh.b_size >> ip->i_inode.i_blkbits); | ||
| 1427 | } while(size > 0); | ||
| 1428 | |||
| 1429 | fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid, | ||
| 1430 | jd->nr_extents); | ||
| 1431 | return 0; | ||
| 1432 | |||
| 1433 | fail: | ||
| 1434 | fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", | ||
| 1435 | rc, jd->jd_jid, | ||
| 1436 | (unsigned long long)(i_size_read(jd->jd_inode) - size), | ||
| 1437 | jd->nr_extents); | ||
| 1438 | fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", | ||
| 1439 | rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, | ||
| 1440 | bh.b_state, (unsigned long long)bh.b_size); | ||
| 1441 | gfs2_free_journal_extents(jd); | ||
| 1442 | return rc; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | /** | ||
| 1331 | * gfs2_write_alloc_required - figure out if a write will require an allocation | 1446 | * gfs2_write_alloc_required - figure out if a write will require an allocation |
| 1332 | * @ip: the file being written to | 1447 | * @ip: the file being written to |
| 1333 | * @offset: the offset to write to | 1448 | * @offset: the offset to write to |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 42fea03e2bd9..81ded5e2aaa2 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
| @@ -55,5 +55,7 @@ extern int gfs2_truncatei_resume(struct gfs2_inode *ip); | |||
| 55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); | 55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); |
| 56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | 56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, |
| 57 | unsigned int len); | 57 | unsigned int len); |
| 58 | extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); | ||
| 59 | extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd); | ||
| 58 | 60 | ||
| 59 | #endif /* __BMAP_DOT_H__ */ | 61 | #endif /* __BMAP_DOT_H__ */ |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index fa32655449c8..1a349f9a9685 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
| @@ -53,6 +53,8 @@ | |||
| 53 | * but never before the maximum hash table size has been reached. | 53 | * but never before the maximum hash table size has been reached. |
| 54 | */ | 54 | */ |
| 55 | 55 | ||
| 56 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 57 | |||
| 56 | #include <linux/slab.h> | 58 | #include <linux/slab.h> |
| 57 | #include <linux/spinlock.h> | 59 | #include <linux/spinlock.h> |
| 58 | #include <linux/buffer_head.h> | 60 | #include <linux/buffer_head.h> |
| @@ -507,8 +509,8 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, | |||
| 507 | goto error; | 509 | goto error; |
| 508 | return 0; | 510 | return 0; |
| 509 | error: | 511 | error: |
| 510 | printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg, | 512 | pr_warn("%s: %s (%s)\n", |
| 511 | first ? "first in block" : "not first in block"); | 513 | __func__, msg, first ? "first in block" : "not first in block"); |
| 512 | return -EIO; | 514 | return -EIO; |
| 513 | } | 515 | } |
| 514 | 516 | ||
| @@ -531,8 +533,7 @@ static int gfs2_dirent_offset(const void *buf) | |||
| 531 | } | 533 | } |
| 532 | return offset; | 534 | return offset; |
| 533 | wrong_type: | 535 | wrong_type: |
| 534 | printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n", | 536 | pr_warn("%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type)); |
| 535 | be32_to_cpu(h->mh_type)); | ||
| 536 | return -1; | 537 | return -1; |
| 537 | } | 538 | } |
| 538 | 539 | ||
| @@ -728,7 +729,7 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, | |||
| 728 | 729 | ||
| 729 | error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); | 730 | error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); |
| 730 | if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { | 731 | if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { |
| 731 | /* printk(KERN_INFO "block num=%llu\n", leaf_no); */ | 732 | /* pr_info("block num=%llu\n", leaf_no); */ |
| 732 | error = -EIO; | 733 | error = -EIO; |
| 733 | } | 734 | } |
| 734 | 735 | ||
| @@ -1006,7 +1007,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
| 1006 | len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); | 1007 | len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); |
| 1007 | half_len = len >> 1; | 1008 | half_len = len >> 1; |
| 1008 | if (!half_len) { | 1009 | if (!half_len) { |
| 1009 | printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); | 1010 | pr_warn("i_depth %u lf_depth %u index %u\n", |
| 1011 | dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); | ||
| 1010 | gfs2_consist_inode(dip); | 1012 | gfs2_consist_inode(dip); |
| 1011 | error = -EIO; | 1013 | error = -EIO; |
| 1012 | goto fail_brelse; | 1014 | goto fail_brelse; |
| @@ -1684,6 +1686,14 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
| 1684 | return 0; | 1686 | return 0; |
| 1685 | } | 1687 | } |
| 1686 | 1688 | ||
| 1689 | static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip) | ||
| 1690 | { | ||
| 1691 | u64 where = ip->i_no_addr + 1; | ||
| 1692 | if (ip->i_eattr == where) | ||
| 1693 | return 1; | ||
| 1694 | return 0; | ||
| 1695 | } | ||
| 1696 | |||
| 1687 | /** | 1697 | /** |
| 1688 | * gfs2_dir_add - Add new filename into directory | 1698 | * gfs2_dir_add - Add new filename into directory |
| 1689 | * @inode: The directory inode | 1699 | * @inode: The directory inode |
| @@ -1721,6 +1731,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
| 1721 | dent = gfs2_init_dirent(inode, dent, name, bh); | 1731 | dent = gfs2_init_dirent(inode, dent, name, bh); |
| 1722 | gfs2_inum_out(nip, dent); | 1732 | gfs2_inum_out(nip, dent); |
| 1723 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); | 1733 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); |
| 1734 | dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); | ||
| 1724 | tv = CURRENT_TIME; | 1735 | tv = CURRENT_TIME; |
| 1725 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { | 1736 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { |
| 1726 | leaf = (struct gfs2_leaf *)bh->b_data; | 1737 | leaf = (struct gfs2_leaf *)bh->b_data; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index efc078f0ee4e..6c794085abac 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -811,6 +811,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
| 811 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); | 811 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); |
| 812 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | 812 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; |
| 813 | loff_t max_chunk_size = UINT_MAX & bsize_mask; | 813 | loff_t max_chunk_size = UINT_MAX & bsize_mask; |
| 814 | struct gfs2_holder gh; | ||
| 815 | |||
| 814 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | 816 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; |
| 815 | 817 | ||
| 816 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 818 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ |
| @@ -831,8 +833,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
| 831 | if (error) | 833 | if (error) |
| 832 | return error; | 834 | return error; |
| 833 | 835 | ||
| 834 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | 836 | mutex_lock(&inode->i_mutex); |
| 835 | error = gfs2_glock_nq(&ip->i_gh); | 837 | |
| 838 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
| 839 | error = gfs2_glock_nq(&gh); | ||
| 836 | if (unlikely(error)) | 840 | if (unlikely(error)) |
| 837 | goto out_uninit; | 841 | goto out_uninit; |
| 838 | 842 | ||
| @@ -900,9 +904,10 @@ out_trans_fail: | |||
| 900 | out_qunlock: | 904 | out_qunlock: |
| 901 | gfs2_quota_unlock(ip); | 905 | gfs2_quota_unlock(ip); |
| 902 | out_unlock: | 906 | out_unlock: |
| 903 | gfs2_glock_dq(&ip->i_gh); | 907 | gfs2_glock_dq(&gh); |
| 904 | out_uninit: | 908 | out_uninit: |
| 905 | gfs2_holder_uninit(&ip->i_gh); | 909 | gfs2_holder_uninit(&gh); |
| 910 | mutex_unlock(&inode->i_mutex); | ||
| 906 | return error; | 911 | return error; |
| 907 | } | 912 | } |
| 908 | 913 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ca0be6c69a26..aec7f73832f0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
| 11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
| 12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
| @@ -468,7 +470,7 @@ retry: | |||
| 468 | do_xmote(gl, gh, LM_ST_UNLOCKED); | 470 | do_xmote(gl, gh, LM_ST_UNLOCKED); |
| 469 | break; | 471 | break; |
| 470 | default: /* Everything else */ | 472 | default: /* Everything else */ |
| 471 | printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); | 473 | pr_err("wanted %u got %u\n", gl->gl_target, state); |
| 472 | GLOCK_BUG_ON(gl, 1); | 474 | GLOCK_BUG_ON(gl, 1); |
| 473 | } | 475 | } |
| 474 | spin_unlock(&gl->gl_spin); | 476 | spin_unlock(&gl->gl_spin); |
| @@ -542,7 +544,7 @@ __acquires(&gl->gl_spin) | |||
| 542 | /* lock_dlm */ | 544 | /* lock_dlm */ |
| 543 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); | 545 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); |
| 544 | if (ret) { | 546 | if (ret) { |
| 545 | printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); | 547 | pr_err("lm_lock ret %d\n", ret); |
| 546 | GLOCK_BUG_ON(gl, 1); | 548 | GLOCK_BUG_ON(gl, 1); |
| 547 | } | 549 | } |
| 548 | } else { /* lock_nolock */ | 550 | } else { /* lock_nolock */ |
| @@ -935,7 +937,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) | |||
| 935 | vaf.fmt = fmt; | 937 | vaf.fmt = fmt; |
| 936 | vaf.va = &args; | 938 | vaf.va = &args; |
| 937 | 939 | ||
| 938 | printk(KERN_ERR " %pV", &vaf); | 940 | pr_err("%pV", &vaf); |
| 939 | } | 941 | } |
| 940 | 942 | ||
| 941 | va_end(args); | 943 | va_end(args); |
| @@ -1010,13 +1012,13 @@ do_cancel: | |||
| 1010 | return; | 1012 | return; |
| 1011 | 1013 | ||
| 1012 | trap_recursive: | 1014 | trap_recursive: |
| 1013 | printk(KERN_ERR "original: %pSR\n", (void *)gh2->gh_ip); | 1015 | pr_err("original: %pSR\n", (void *)gh2->gh_ip); |
| 1014 | printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); | 1016 | pr_err("pid: %d\n", pid_nr(gh2->gh_owner_pid)); |
| 1015 | printk(KERN_ERR "lock type: %d req lock state : %d\n", | 1017 | pr_err("lock type: %d req lock state : %d\n", |
| 1016 | gh2->gh_gl->gl_name.ln_type, gh2->gh_state); | 1018 | gh2->gh_gl->gl_name.ln_type, gh2->gh_state); |
| 1017 | printk(KERN_ERR "new: %pSR\n", (void *)gh->gh_ip); | 1019 | pr_err("new: %pSR\n", (void *)gh->gh_ip); |
| 1018 | printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); | 1020 | pr_err("pid: %d\n", pid_nr(gh->gh_owner_pid)); |
| 1019 | printk(KERN_ERR "lock type: %d req lock state : %d\n", | 1021 | pr_err("lock type: %d req lock state : %d\n", |
| 1020 | gh->gh_gl->gl_name.ln_type, gh->gh_state); | 1022 | gh->gh_gl->gl_name.ln_type, gh->gh_state); |
| 1021 | gfs2_dump_glock(NULL, gl); | 1023 | gfs2_dump_glock(NULL, gl); |
| 1022 | BUG(); | 1024 | BUG(); |
| @@ -1045,9 +1047,13 @@ int gfs2_glock_nq(struct gfs2_holder *gh) | |||
| 1045 | 1047 | ||
| 1046 | spin_lock(&gl->gl_spin); | 1048 | spin_lock(&gl->gl_spin); |
| 1047 | add_to_queue(gh); | 1049 | add_to_queue(gh); |
| 1048 | if ((LM_FLAG_NOEXP & gh->gh_flags) && | 1050 | if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && |
| 1049 | test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) | 1051 | test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { |
| 1050 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); | 1052 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); |
| 1053 | gl->gl_lockref.count++; | ||
| 1054 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
| 1055 | gl->gl_lockref.count--; | ||
| 1056 | } | ||
| 1051 | run_queue(gl, 1); | 1057 | run_queue(gl, 1); |
| 1052 | spin_unlock(&gl->gl_spin); | 1058 | spin_unlock(&gl->gl_spin); |
| 1053 | 1059 | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 3bf0631b5d56..54b66809e818 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
| @@ -82,6 +82,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
| 82 | struct gfs2_trans tr; | 82 | struct gfs2_trans tr; |
| 83 | 83 | ||
| 84 | memset(&tr, 0, sizeof(tr)); | 84 | memset(&tr, 0, sizeof(tr)); |
| 85 | INIT_LIST_HEAD(&tr.tr_buf); | ||
| 86 | INIT_LIST_HEAD(&tr.tr_databuf); | ||
| 85 | tr.tr_revokes = atomic_read(&gl->gl_ail_count); | 87 | tr.tr_revokes = atomic_read(&gl->gl_ail_count); |
| 86 | 88 | ||
| 87 | if (!tr.tr_revokes) | 89 | if (!tr.tr_revokes) |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index cf0e34400f71..bdf70c18610c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -52,7 +52,7 @@ struct gfs2_log_header_host { | |||
| 52 | */ | 52 | */ |
| 53 | 53 | ||
| 54 | struct gfs2_log_operations { | 54 | struct gfs2_log_operations { |
| 55 | void (*lo_before_commit) (struct gfs2_sbd *sdp); | 55 | void (*lo_before_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); |
| 56 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); | 56 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); |
| 57 | void (*lo_before_scan) (struct gfs2_jdesc *jd, | 57 | void (*lo_before_scan) (struct gfs2_jdesc *jd, |
| 58 | struct gfs2_log_header_host *head, int pass); | 58 | struct gfs2_log_header_host *head, int pass); |
| @@ -371,6 +371,7 @@ enum { | |||
| 371 | GIF_ALLOC_FAILED = 2, | 371 | GIF_ALLOC_FAILED = 2, |
| 372 | GIF_SW_PAGED = 3, | 372 | GIF_SW_PAGED = 3, |
| 373 | GIF_ORDERED = 4, | 373 | GIF_ORDERED = 4, |
| 374 | GIF_FREE_VFS_INODE = 5, | ||
| 374 | }; | 375 | }; |
| 375 | 376 | ||
| 376 | struct gfs2_inode { | 377 | struct gfs2_inode { |
| @@ -462,11 +463,11 @@ struct gfs2_trans { | |||
| 462 | unsigned int tr_blocks; | 463 | unsigned int tr_blocks; |
| 463 | unsigned int tr_revokes; | 464 | unsigned int tr_revokes; |
| 464 | unsigned int tr_reserved; | 465 | unsigned int tr_reserved; |
| 466 | unsigned int tr_touched:1; | ||
| 467 | unsigned int tr_attached:1; | ||
| 465 | 468 | ||
| 466 | struct gfs2_holder tr_t_gh; | 469 | struct gfs2_holder tr_t_gh; |
| 467 | 470 | ||
| 468 | int tr_touched; | ||
| 469 | int tr_attached; | ||
| 470 | 471 | ||
| 471 | unsigned int tr_num_buf_new; | 472 | unsigned int tr_num_buf_new; |
| 472 | unsigned int tr_num_databuf_new; | 473 | unsigned int tr_num_databuf_new; |
| @@ -476,6 +477,8 @@ struct gfs2_trans { | |||
| 476 | unsigned int tr_num_revoke_rm; | 477 | unsigned int tr_num_revoke_rm; |
| 477 | 478 | ||
| 478 | struct list_head tr_list; | 479 | struct list_head tr_list; |
| 480 | struct list_head tr_databuf; | ||
| 481 | struct list_head tr_buf; | ||
| 479 | 482 | ||
| 480 | unsigned int tr_first; | 483 | unsigned int tr_first; |
| 481 | struct list_head tr_ail1_list; | 484 | struct list_head tr_ail1_list; |
| @@ -483,7 +486,7 @@ struct gfs2_trans { | |||
| 483 | }; | 486 | }; |
| 484 | 487 | ||
| 485 | struct gfs2_journal_extent { | 488 | struct gfs2_journal_extent { |
| 486 | struct list_head extent_list; | 489 | struct list_head list; |
| 487 | 490 | ||
| 488 | unsigned int lblock; /* First logical block */ | 491 | unsigned int lblock; /* First logical block */ |
| 489 | u64 dblock; /* First disk block */ | 492 | u64 dblock; /* First disk block */ |
| @@ -493,6 +496,7 @@ struct gfs2_journal_extent { | |||
| 493 | struct gfs2_jdesc { | 496 | struct gfs2_jdesc { |
| 494 | struct list_head jd_list; | 497 | struct list_head jd_list; |
| 495 | struct list_head extent_list; | 498 | struct list_head extent_list; |
| 499 | unsigned int nr_extents; | ||
| 496 | struct work_struct jd_work; | 500 | struct work_struct jd_work; |
| 497 | struct inode *jd_inode; | 501 | struct inode *jd_inode; |
| 498 | unsigned long jd_flags; | 502 | unsigned long jd_flags; |
| @@ -500,6 +504,15 @@ struct gfs2_jdesc { | |||
| 500 | unsigned int jd_jid; | 504 | unsigned int jd_jid; |
| 501 | unsigned int jd_blocks; | 505 | unsigned int jd_blocks; |
| 502 | int jd_recover_error; | 506 | int jd_recover_error; |
| 507 | /* Replay stuff */ | ||
| 508 | |||
| 509 | unsigned int jd_found_blocks; | ||
| 510 | unsigned int jd_found_revokes; | ||
| 511 | unsigned int jd_replayed_blocks; | ||
| 512 | |||
| 513 | struct list_head jd_revoke_list; | ||
| 514 | unsigned int jd_replay_tail; | ||
| 515 | |||
| 503 | }; | 516 | }; |
| 504 | 517 | ||
| 505 | struct gfs2_statfs_change_host { | 518 | struct gfs2_statfs_change_host { |
| @@ -746,19 +759,12 @@ struct gfs2_sbd { | |||
| 746 | 759 | ||
| 747 | struct gfs2_trans *sd_log_tr; | 760 | struct gfs2_trans *sd_log_tr; |
| 748 | unsigned int sd_log_blks_reserved; | 761 | unsigned int sd_log_blks_reserved; |
| 749 | unsigned int sd_log_commited_buf; | ||
| 750 | unsigned int sd_log_commited_databuf; | ||
| 751 | int sd_log_commited_revoke; | 762 | int sd_log_commited_revoke; |
| 752 | 763 | ||
| 753 | atomic_t sd_log_pinned; | 764 | atomic_t sd_log_pinned; |
| 754 | unsigned int sd_log_num_buf; | ||
| 755 | unsigned int sd_log_num_revoke; | 765 | unsigned int sd_log_num_revoke; |
| 756 | unsigned int sd_log_num_rg; | ||
| 757 | unsigned int sd_log_num_databuf; | ||
| 758 | 766 | ||
| 759 | struct list_head sd_log_le_buf; | ||
| 760 | struct list_head sd_log_le_revoke; | 767 | struct list_head sd_log_le_revoke; |
| 761 | struct list_head sd_log_le_databuf; | ||
| 762 | struct list_head sd_log_le_ordered; | 768 | struct list_head sd_log_le_ordered; |
| 763 | spinlock_t sd_ordered_lock; | 769 | spinlock_t sd_ordered_lock; |
| 764 | 770 | ||
| @@ -786,15 +792,6 @@ struct gfs2_sbd { | |||
| 786 | struct list_head sd_ail1_list; | 792 | struct list_head sd_ail1_list; |
| 787 | struct list_head sd_ail2_list; | 793 | struct list_head sd_ail2_list; |
| 788 | 794 | ||
| 789 | /* Replay stuff */ | ||
| 790 | |||
| 791 | struct list_head sd_revoke_list; | ||
| 792 | unsigned int sd_replay_tail; | ||
| 793 | |||
| 794 | unsigned int sd_found_blocks; | ||
| 795 | unsigned int sd_found_revokes; | ||
| 796 | unsigned int sd_replayed_blocks; | ||
| 797 | |||
| 798 | /* For quiescing the filesystem */ | 795 | /* For quiescing the filesystem */ |
| 799 | struct gfs2_holder sd_freeze_gh; | 796 | struct gfs2_holder sd_freeze_gh; |
| 800 | 797 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5c524180c98e..28cc7bf6575a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -376,12 +376,11 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, | |||
| 376 | inode->i_gid = current_fsgid(); | 376 | inode->i_gid = current_fsgid(); |
| 377 | } | 377 | } |
| 378 | 378 | ||
| 379 | static int alloc_dinode(struct gfs2_inode *ip, u32 flags) | 379 | static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) |
| 380 | { | 380 | { |
| 381 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 381 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 382 | struct gfs2_alloc_parms ap = { .target = RES_DINODE, .aflags = flags, }; | 382 | struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; |
| 383 | int error; | 383 | int error; |
| 384 | int dblocks = 1; | ||
| 385 | 384 | ||
| 386 | error = gfs2_quota_lock_check(ip); | 385 | error = gfs2_quota_lock_check(ip); |
| 387 | if (error) | 386 | if (error) |
| @@ -391,11 +390,11 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags) | |||
| 391 | if (error) | 390 | if (error) |
| 392 | goto out_quota; | 391 | goto out_quota; |
| 393 | 392 | ||
| 394 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 0); | 393 | error = gfs2_trans_begin(sdp, (*dblocks * RES_RG_BIT) + RES_STATFS + RES_QUOTA, 0); |
| 395 | if (error) | 394 | if (error) |
| 396 | goto out_ipreserv; | 395 | goto out_ipreserv; |
| 397 | 396 | ||
| 398 | error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); | 397 | error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); |
| 399 | ip->i_no_formal_ino = ip->i_generation; | 398 | ip->i_no_formal_ino = ip->i_generation; |
| 400 | ip->i_inode.i_ino = ip->i_no_addr; | 399 | ip->i_inode.i_ino = ip->i_no_addr; |
| 401 | ip->i_goal = ip->i_no_addr; | 400 | ip->i_goal = ip->i_no_addr; |
| @@ -428,6 +427,33 @@ static void gfs2_init_dir(struct buffer_head *dibh, | |||
| 428 | } | 427 | } |
| 429 | 428 | ||
| 430 | /** | 429 | /** |
| 430 | * gfs2_init_xattr - Initialise an xattr block for a new inode | ||
| 431 | * @ip: The inode in question | ||
| 432 | * | ||
| 433 | * This sets up an empty xattr block for a new inode, ready to | ||
| 434 | * take any ACLs, LSM xattrs, etc. | ||
| 435 | */ | ||
| 436 | |||
| 437 | static void gfs2_init_xattr(struct gfs2_inode *ip) | ||
| 438 | { | ||
| 439 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 440 | struct buffer_head *bh; | ||
| 441 | struct gfs2_ea_header *ea; | ||
| 442 | |||
| 443 | bh = gfs2_meta_new(ip->i_gl, ip->i_eattr); | ||
| 444 | gfs2_trans_add_meta(ip->i_gl, bh); | ||
| 445 | gfs2_metatype_set(bh, GFS2_METATYPE_EA, GFS2_FORMAT_EA); | ||
| 446 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); | ||
| 447 | |||
| 448 | ea = GFS2_EA_BH2FIRST(bh); | ||
| 449 | ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize); | ||
| 450 | ea->ea_type = GFS2_EATYPE_UNUSED; | ||
| 451 | ea->ea_flags = GFS2_EAFLAG_LAST; | ||
| 452 | |||
| 453 | brelse(bh); | ||
| 454 | } | ||
| 455 | |||
| 456 | /** | ||
| 431 | * init_dinode - Fill in a new dinode structure | 457 | * init_dinode - Fill in a new dinode structure |
| 432 | * @dip: The directory this inode is being created in | 458 | * @dip: The directory this inode is being created in |
| 433 | * @ip: The inode | 459 | * @ip: The inode |
| @@ -545,13 +571,6 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, | |||
| 545 | return err; | 571 | return err; |
| 546 | } | 572 | } |
| 547 | 573 | ||
| 548 | static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | ||
| 549 | const struct qstr *qstr) | ||
| 550 | { | ||
| 551 | return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, | ||
| 552 | &gfs2_initxattrs, NULL); | ||
| 553 | } | ||
| 554 | |||
| 555 | /** | 574 | /** |
| 556 | * gfs2_create_inode - Create a new inode | 575 | * gfs2_create_inode - Create a new inode |
| 557 | * @dir: The parent directory | 576 | * @dir: The parent directory |
| @@ -578,8 +597,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
| 578 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 597 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
| 579 | struct gfs2_glock *io_gl; | 598 | struct gfs2_glock *io_gl; |
| 580 | struct dentry *d; | 599 | struct dentry *d; |
| 581 | int error; | 600 | int error, free_vfs_inode = 0; |
| 582 | u32 aflags = 0; | 601 | u32 aflags = 0; |
| 602 | unsigned blocks = 1; | ||
| 583 | struct gfs2_diradd da = { .bh = NULL, }; | 603 | struct gfs2_diradd da = { .bh = NULL, }; |
| 584 | 604 | ||
| 585 | if (!name->len || name->len > GFS2_FNAMESIZE) | 605 | if (!name->len || name->len > GFS2_FNAMESIZE) |
| @@ -676,10 +696,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
| 676 | (dip->i_diskflags & GFS2_DIF_TOPDIR)) | 696 | (dip->i_diskflags & GFS2_DIF_TOPDIR)) |
| 677 | aflags |= GFS2_AF_ORLOV; | 697 | aflags |= GFS2_AF_ORLOV; |
| 678 | 698 | ||
| 679 | error = alloc_dinode(ip, aflags); | 699 | if (default_acl || acl) |
| 700 | blocks++; | ||
| 701 | |||
| 702 | error = alloc_dinode(ip, aflags, &blocks); | ||
| 680 | if (error) | 703 | if (error) |
| 681 | goto fail_free_inode; | 704 | goto fail_free_inode; |
| 682 | 705 | ||
| 706 | gfs2_set_inode_blocks(inode, blocks); | ||
| 707 | |||
| 683 | error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); | 708 | error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); |
| 684 | if (error) | 709 | if (error) |
| 685 | goto fail_free_inode; | 710 | goto fail_free_inode; |
| @@ -689,10 +714,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
| 689 | if (error) | 714 | if (error) |
| 690 | goto fail_free_inode; | 715 | goto fail_free_inode; |
| 691 | 716 | ||
| 692 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 717 | error = gfs2_trans_begin(sdp, blocks, 0); |
| 693 | if (error) | 718 | if (error) |
| 694 | goto fail_gunlock2; | 719 | goto fail_gunlock2; |
| 695 | 720 | ||
| 721 | if (blocks > 1) { | ||
| 722 | ip->i_eattr = ip->i_no_addr + 1; | ||
| 723 | gfs2_init_xattr(ip); | ||
| 724 | } | ||
| 696 | init_dinode(dip, ip, symname); | 725 | init_dinode(dip, ip, symname); |
| 697 | gfs2_trans_end(sdp); | 726 | gfs2_trans_end(sdp); |
| 698 | 727 | ||
| @@ -722,7 +751,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
| 722 | if (error) | 751 | if (error) |
| 723 | goto fail_gunlock3; | 752 | goto fail_gunlock3; |
| 724 | 753 | ||
| 725 | error = gfs2_security_init(dip, ip, name); | 754 | error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, |
| 755 | &gfs2_initxattrs, NULL); | ||
| 726 | if (error) | 756 | if (error) |
| 727 | goto fail_gunlock3; | 757 | goto fail_gunlock3; |
| 728 | 758 | ||
| @@ -758,15 +788,16 @@ fail_free_acls: | |||
| 758 | if (acl) | 788 | if (acl) |
| 759 | posix_acl_release(acl); | 789 | posix_acl_release(acl); |
| 760 | fail_free_vfs_inode: | 790 | fail_free_vfs_inode: |
| 761 | free_inode_nonrcu(inode); | 791 | free_vfs_inode = 1; |
| 762 | inode = NULL; | ||
| 763 | fail_gunlock: | 792 | fail_gunlock: |
| 764 | gfs2_dir_no_add(&da); | 793 | gfs2_dir_no_add(&da); |
| 765 | gfs2_glock_dq_uninit(ghs); | 794 | gfs2_glock_dq_uninit(ghs); |
| 766 | if (inode && !IS_ERR(inode)) { | 795 | if (inode && !IS_ERR(inode)) { |
| 767 | clear_nlink(inode); | 796 | clear_nlink(inode); |
| 768 | mark_inode_dirty(inode); | 797 | if (!free_vfs_inode) |
| 769 | set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags); | 798 | mark_inode_dirty(inode); |
| 799 | set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, | ||
| 800 | &GFS2_I(inode)->i_flags); | ||
| 770 | iput(inode); | 801 | iput(inode); |
| 771 | } | 802 | } |
| 772 | fail: | 803 | fail: |
| @@ -1263,6 +1294,10 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
| 1263 | } | 1294 | } |
| 1264 | 1295 | ||
| 1265 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); | 1296 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); |
| 1297 | if (!tmp) { | ||
| 1298 | error = -ENOENT; | ||
| 1299 | break; | ||
| 1300 | } | ||
| 1266 | if (IS_ERR(tmp)) { | 1301 | if (IS_ERR(tmp)) { |
| 1267 | error = PTR_ERR(tmp); | 1302 | error = PTR_ERR(tmp); |
| 1268 | break; | 1303 | break; |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 2a6ba06bee6f..c1eb555dc588 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
| 11 | #include <linux/dlm.h> | 13 | #include <linux/dlm.h> |
| 12 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| @@ -176,7 +178,7 @@ static void gdlm_bast(void *arg, int mode) | |||
| 176 | gfs2_glock_cb(gl, LM_ST_SHARED); | 178 | gfs2_glock_cb(gl, LM_ST_SHARED); |
| 177 | break; | 179 | break; |
| 178 | default: | 180 | default: |
| 179 | printk(KERN_ERR "unknown bast mode %d", mode); | 181 | pr_err("unknown bast mode %d\n", mode); |
| 180 | BUG(); | 182 | BUG(); |
| 181 | } | 183 | } |
| 182 | } | 184 | } |
| @@ -195,7 +197,7 @@ static int make_mode(const unsigned int lmstate) | |||
| 195 | case LM_ST_SHARED: | 197 | case LM_ST_SHARED: |
| 196 | return DLM_LOCK_PR; | 198 | return DLM_LOCK_PR; |
| 197 | } | 199 | } |
| 198 | printk(KERN_ERR "unknown LM state %d", lmstate); | 200 | pr_err("unknown LM state %d\n", lmstate); |
| 199 | BUG(); | 201 | BUG(); |
| 200 | return -1; | 202 | return -1; |
| 201 | } | 203 | } |
| @@ -308,7 +310,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) | |||
| 308 | error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, | 310 | error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, |
| 309 | NULL, gl); | 311 | NULL, gl); |
| 310 | if (error) { | 312 | if (error) { |
| 311 | printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n", | 313 | pr_err("gdlm_unlock %x,%llx err=%d\n", |
| 312 | gl->gl_name.ln_type, | 314 | gl->gl_name.ln_type, |
| 313 | (unsigned long long)gl->gl_name.ln_number, error); | 315 | (unsigned long long)gl->gl_name.ln_number, error); |
| 314 | return; | 316 | return; |
| @@ -1102,7 +1104,7 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) | |||
| 1102 | } | 1104 | } |
| 1103 | 1105 | ||
| 1104 | if (ls->ls_recover_submit[jid]) { | 1106 | if (ls->ls_recover_submit[jid]) { |
| 1105 | fs_info(sdp, "recover_slot jid %d gen %u prev %u", | 1107 | fs_info(sdp, "recover_slot jid %d gen %u prev %u\n", |
| 1106 | jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); | 1108 | jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); |
| 1107 | } | 1109 | } |
| 1108 | ls->ls_recover_submit[jid] = ls->ls_recover_block; | 1110 | ls->ls_recover_submit[jid] = ls->ls_recover_block; |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 9dcb9777a5f8..4a14d504ef83 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/kthread.h> | 18 | #include <linux/kthread.h> |
| 19 | #include <linux/freezer.h> | 19 | #include <linux/freezer.h> |
| 20 | #include <linux/bio.h> | 20 | #include <linux/bio.h> |
| 21 | #include <linux/blkdev.h> | ||
| 21 | #include <linux/writeback.h> | 22 | #include <linux/writeback.h> |
| 22 | #include <linux/list_sort.h> | 23 | #include <linux/list_sort.h> |
| 23 | 24 | ||
| @@ -145,8 +146,10 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) | |||
| 145 | { | 146 | { |
| 146 | struct list_head *head = &sdp->sd_ail1_list; | 147 | struct list_head *head = &sdp->sd_ail1_list; |
| 147 | struct gfs2_trans *tr; | 148 | struct gfs2_trans *tr; |
| 149 | struct blk_plug plug; | ||
| 148 | 150 | ||
| 149 | trace_gfs2_ail_flush(sdp, wbc, 1); | 151 | trace_gfs2_ail_flush(sdp, wbc, 1); |
| 152 | blk_start_plug(&plug); | ||
| 150 | spin_lock(&sdp->sd_ail_lock); | 153 | spin_lock(&sdp->sd_ail_lock); |
| 151 | restart: | 154 | restart: |
| 152 | list_for_each_entry_reverse(tr, head, tr_list) { | 155 | list_for_each_entry_reverse(tr, head, tr_list) { |
| @@ -156,6 +159,7 @@ restart: | |||
| 156 | goto restart; | 159 | goto restart; |
| 157 | } | 160 | } |
| 158 | spin_unlock(&sdp->sd_ail_lock); | 161 | spin_unlock(&sdp->sd_ail_lock); |
| 162 | blk_finish_plug(&plug); | ||
| 159 | trace_gfs2_ail_flush(sdp, wbc, 0); | 163 | trace_gfs2_ail_flush(sdp, wbc, 0); |
| 160 | } | 164 | } |
| 161 | 165 | ||
| @@ -410,24 +414,22 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer | |||
| 410 | static unsigned int calc_reserved(struct gfs2_sbd *sdp) | 414 | static unsigned int calc_reserved(struct gfs2_sbd *sdp) |
| 411 | { | 415 | { |
| 412 | unsigned int reserved = 0; | 416 | unsigned int reserved = 0; |
| 413 | unsigned int mbuf_limit, metabufhdrs_needed; | 417 | unsigned int mbuf; |
| 414 | unsigned int dbuf_limit, databufhdrs_needed; | 418 | unsigned int dbuf; |
| 415 | unsigned int revokes = 0; | 419 | struct gfs2_trans *tr = sdp->sd_log_tr; |
| 416 | 420 | ||
| 417 | mbuf_limit = buf_limit(sdp); | 421 | if (tr) { |
| 418 | metabufhdrs_needed = (sdp->sd_log_commited_buf + | 422 | mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; |
| 419 | (mbuf_limit - 1)) / mbuf_limit; | 423 | dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; |
| 420 | dbuf_limit = databuf_limit(sdp); | 424 | reserved = mbuf + dbuf; |
| 421 | databufhdrs_needed = (sdp->sd_log_commited_databuf + | 425 | /* Account for header blocks */ |
| 422 | (dbuf_limit - 1)) / dbuf_limit; | 426 | reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); |
| 427 | reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); | ||
| 428 | } | ||
| 423 | 429 | ||
| 424 | if (sdp->sd_log_commited_revoke > 0) | 430 | if (sdp->sd_log_commited_revoke > 0) |
| 425 | revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, | 431 | reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, |
| 426 | sizeof(u64)); | 432 | sizeof(u64)); |
| 427 | |||
| 428 | reserved = sdp->sd_log_commited_buf + metabufhdrs_needed + | ||
| 429 | sdp->sd_log_commited_databuf + databufhdrs_needed + | ||
| 430 | revokes; | ||
| 431 | /* One for the overall header */ | 433 | /* One for the overall header */ |
| 432 | if (reserved) | 434 | if (reserved) |
| 433 | reserved++; | 435 | reserved++; |
| @@ -682,36 +684,25 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
| 682 | } | 684 | } |
| 683 | trace_gfs2_log_flush(sdp, 1); | 685 | trace_gfs2_log_flush(sdp, 1); |
| 684 | 686 | ||
| 687 | sdp->sd_log_flush_head = sdp->sd_log_head; | ||
| 688 | sdp->sd_log_flush_wrapped = 0; | ||
| 685 | tr = sdp->sd_log_tr; | 689 | tr = sdp->sd_log_tr; |
| 686 | if (tr) { | 690 | if (tr) { |
| 687 | sdp->sd_log_tr = NULL; | 691 | sdp->sd_log_tr = NULL; |
| 688 | INIT_LIST_HEAD(&tr->tr_ail1_list); | 692 | INIT_LIST_HEAD(&tr->tr_ail1_list); |
| 689 | INIT_LIST_HEAD(&tr->tr_ail2_list); | 693 | INIT_LIST_HEAD(&tr->tr_ail2_list); |
| 694 | tr->tr_first = sdp->sd_log_flush_head; | ||
| 690 | } | 695 | } |
| 691 | 696 | ||
| 692 | if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) { | ||
| 693 | printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf, | ||
| 694 | sdp->sd_log_commited_buf); | ||
| 695 | gfs2_assert_withdraw(sdp, 0); | ||
| 696 | } | ||
| 697 | if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) { | ||
| 698 | printk(KERN_INFO "GFS2: log databuf %u %u\n", | ||
| 699 | sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf); | ||
| 700 | gfs2_assert_withdraw(sdp, 0); | ||
| 701 | } | ||
| 702 | gfs2_assert_withdraw(sdp, | 697 | gfs2_assert_withdraw(sdp, |
| 703 | sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); | 698 | sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); |
| 704 | 699 | ||
| 705 | sdp->sd_log_flush_head = sdp->sd_log_head; | ||
| 706 | sdp->sd_log_flush_wrapped = 0; | ||
| 707 | if (tr) | ||
| 708 | tr->tr_first = sdp->sd_log_flush_head; | ||
| 709 | |||
| 710 | gfs2_ordered_write(sdp); | 700 | gfs2_ordered_write(sdp); |
| 711 | lops_before_commit(sdp); | 701 | lops_before_commit(sdp, tr); |
| 712 | gfs2_log_flush_bio(sdp, WRITE); | 702 | gfs2_log_flush_bio(sdp, WRITE); |
| 713 | 703 | ||
| 714 | if (sdp->sd_log_head != sdp->sd_log_flush_head) { | 704 | if (sdp->sd_log_head != sdp->sd_log_flush_head) { |
| 705 | log_flush_wait(sdp); | ||
| 715 | log_write_header(sdp, 0); | 706 | log_write_header(sdp, 0); |
| 716 | } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ | 707 | } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ |
| 717 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ | 708 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ |
| @@ -723,8 +714,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
| 723 | gfs2_log_lock(sdp); | 714 | gfs2_log_lock(sdp); |
| 724 | sdp->sd_log_head = sdp->sd_log_flush_head; | 715 | sdp->sd_log_head = sdp->sd_log_flush_head; |
| 725 | sdp->sd_log_blks_reserved = 0; | 716 | sdp->sd_log_blks_reserved = 0; |
| 726 | sdp->sd_log_commited_buf = 0; | ||
| 727 | sdp->sd_log_commited_databuf = 0; | ||
| 728 | sdp->sd_log_commited_revoke = 0; | 717 | sdp->sd_log_commited_revoke = 0; |
| 729 | 718 | ||
| 730 | spin_lock(&sdp->sd_ail_lock); | 719 | spin_lock(&sdp->sd_ail_lock); |
| @@ -740,34 +729,54 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
| 740 | kfree(tr); | 729 | kfree(tr); |
| 741 | } | 730 | } |
| 742 | 731 | ||
| 732 | /** | ||
| 733 | * gfs2_merge_trans - Merge a new transaction into a cached transaction | ||
| 734 | * @old: Original transaction to be expanded | ||
| 735 | * @new: New transaction to be merged | ||
| 736 | */ | ||
| 737 | |||
| 738 | static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) | ||
| 739 | { | ||
| 740 | WARN_ON_ONCE(old->tr_attached != 1); | ||
| 741 | |||
| 742 | old->tr_num_buf_new += new->tr_num_buf_new; | ||
| 743 | old->tr_num_databuf_new += new->tr_num_databuf_new; | ||
| 744 | old->tr_num_buf_rm += new->tr_num_buf_rm; | ||
| 745 | old->tr_num_databuf_rm += new->tr_num_databuf_rm; | ||
| 746 | old->tr_num_revoke += new->tr_num_revoke; | ||
| 747 | old->tr_num_revoke_rm += new->tr_num_revoke_rm; | ||
| 748 | |||
| 749 | list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); | ||
| 750 | list_splice_tail_init(&new->tr_buf, &old->tr_buf); | ||
| 751 | } | ||
| 752 | |||
| 743 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 753 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| 744 | { | 754 | { |
| 745 | unsigned int reserved; | 755 | unsigned int reserved; |
| 746 | unsigned int unused; | 756 | unsigned int unused; |
| 757 | unsigned int maxres; | ||
| 747 | 758 | ||
| 748 | gfs2_log_lock(sdp); | 759 | gfs2_log_lock(sdp); |
| 749 | 760 | ||
| 750 | sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; | 761 | if (sdp->sd_log_tr) { |
| 751 | sdp->sd_log_commited_databuf += tr->tr_num_databuf_new - | 762 | gfs2_merge_trans(sdp->sd_log_tr, tr); |
| 752 | tr->tr_num_databuf_rm; | 763 | } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { |
| 753 | gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) || | 764 | gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl); |
| 754 | (((int)sdp->sd_log_commited_databuf) >= 0)); | 765 | sdp->sd_log_tr = tr; |
| 766 | tr->tr_attached = 1; | ||
| 767 | } | ||
| 768 | |||
| 755 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; | 769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; |
| 756 | reserved = calc_reserved(sdp); | 770 | reserved = calc_reserved(sdp); |
| 757 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); | 771 | maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; |
| 758 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; | 772 | gfs2_assert_withdraw(sdp, maxres >= reserved); |
| 773 | unused = maxres - reserved; | ||
| 759 | atomic_add(unused, &sdp->sd_log_blks_free); | 774 | atomic_add(unused, &sdp->sd_log_blks_free); |
| 760 | trace_gfs2_log_blocks(sdp, unused); | 775 | trace_gfs2_log_blocks(sdp, unused); |
| 761 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= | 776 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= |
| 762 | sdp->sd_jdesc->jd_blocks); | 777 | sdp->sd_jdesc->jd_blocks); |
| 763 | sdp->sd_log_blks_reserved = reserved; | 778 | sdp->sd_log_blks_reserved = reserved; |
| 764 | 779 | ||
| 765 | if (sdp->sd_log_tr == NULL && | ||
| 766 | (tr->tr_num_buf_new || tr->tr_num_databuf_new)) { | ||
| 767 | gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl); | ||
| 768 | sdp->sd_log_tr = tr; | ||
| 769 | tr->tr_attached = 1; | ||
| 770 | } | ||
| 771 | gfs2_log_unlock(sdp); | 780 | gfs2_log_unlock(sdp); |
| 772 | } | 781 | } |
| 773 | 782 | ||
| @@ -807,10 +816,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) | |||
| 807 | down_write(&sdp->sd_log_flush_lock); | 816 | down_write(&sdp->sd_log_flush_lock); |
| 808 | 817 | ||
| 809 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); | 818 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); |
| 810 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); | ||
| 811 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | 819 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
| 812 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); | ||
| 813 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); | ||
| 814 | gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); | 820 | gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); |
| 815 | 821 | ||
| 816 | sdp->sd_log_flush_head = sdp->sd_log_head; | 822 | sdp->sd_log_flush_head = sdp->sd_log_head; |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 76693793cedd..a294d8d8bcd4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
| @@ -146,8 +146,8 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp) | |||
| 146 | struct gfs2_journal_extent *je; | 146 | struct gfs2_journal_extent *je; |
| 147 | u64 block; | 147 | u64 block; |
| 148 | 148 | ||
| 149 | list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { | 149 | list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) { |
| 150 | if (lbn >= je->lblock && lbn < je->lblock + je->blocks) { | 150 | if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) { |
| 151 | block = je->dblock + lbn - je->lblock; | 151 | block = je->dblock + lbn - je->lblock; |
| 152 | gfs2_log_incr_head(sdp); | 152 | gfs2_log_incr_head(sdp); |
| 153 | return block; | 153 | return block; |
| @@ -491,44 +491,40 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, | |||
| 491 | gfs2_log_unlock(sdp); | 491 | gfs2_log_unlock(sdp); |
| 492 | } | 492 | } |
| 493 | 493 | ||
| 494 | static void buf_lo_before_commit(struct gfs2_sbd *sdp) | 494 | static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| 495 | { | 495 | { |
| 496 | unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */ | 496 | unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */ |
| 497 | 497 | unsigned int nbuf; | |
| 498 | gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf, | 498 | if (tr == NULL) |
| 499 | &sdp->sd_log_le_buf, 0); | 499 | return; |
| 500 | nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; | ||
| 501 | gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0); | ||
| 500 | } | 502 | } |
| 501 | 503 | ||
| 502 | static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 504 | static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| 503 | { | 505 | { |
| 504 | struct list_head *head = &sdp->sd_log_le_buf; | 506 | struct list_head *head; |
| 505 | struct gfs2_bufdata *bd; | 507 | struct gfs2_bufdata *bd; |
| 506 | 508 | ||
| 507 | if (tr == NULL) { | 509 | if (tr == NULL) |
| 508 | gfs2_assert(sdp, list_empty(head)); | ||
| 509 | return; | 510 | return; |
| 510 | } | ||
| 511 | 511 | ||
| 512 | head = &tr->tr_buf; | ||
| 512 | while (!list_empty(head)) { | 513 | while (!list_empty(head)) { |
| 513 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); | 514 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); |
| 514 | list_del_init(&bd->bd_list); | 515 | list_del_init(&bd->bd_list); |
| 515 | sdp->sd_log_num_buf--; | ||
| 516 | |||
| 517 | gfs2_unpin(sdp, bd->bd_bh, tr); | 516 | gfs2_unpin(sdp, bd->bd_bh, tr); |
| 518 | } | 517 | } |
| 519 | gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); | ||
| 520 | } | 518 | } |
| 521 | 519 | ||
| 522 | static void buf_lo_before_scan(struct gfs2_jdesc *jd, | 520 | static void buf_lo_before_scan(struct gfs2_jdesc *jd, |
| 523 | struct gfs2_log_header_host *head, int pass) | 521 | struct gfs2_log_header_host *head, int pass) |
| 524 | { | 522 | { |
| 525 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 526 | |||
| 527 | if (pass != 0) | 523 | if (pass != 0) |
| 528 | return; | 524 | return; |
| 529 | 525 | ||
| 530 | sdp->sd_found_blocks = 0; | 526 | jd->jd_found_blocks = 0; |
| 531 | sdp->sd_replayed_blocks = 0; | 527 | jd->jd_replayed_blocks = 0; |
| 532 | } | 528 | } |
| 533 | 529 | ||
| 534 | static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | 530 | static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, |
| @@ -551,9 +547,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
| 551 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { | 547 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { |
| 552 | blkno = be64_to_cpu(*ptr++); | 548 | blkno = be64_to_cpu(*ptr++); |
| 553 | 549 | ||
| 554 | sdp->sd_found_blocks++; | 550 | jd->jd_found_blocks++; |
| 555 | 551 | ||
| 556 | if (gfs2_revoke_check(sdp, blkno, start)) | 552 | if (gfs2_revoke_check(jd, blkno, start)) |
| 557 | continue; | 553 | continue; |
| 558 | 554 | ||
| 559 | error = gfs2_replay_read_block(jd, start, &bh_log); | 555 | error = gfs2_replay_read_block(jd, start, &bh_log); |
| @@ -574,7 +570,7 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
| 574 | if (error) | 570 | if (error) |
| 575 | break; | 571 | break; |
| 576 | 572 | ||
| 577 | sdp->sd_replayed_blocks++; | 573 | jd->jd_replayed_blocks++; |
| 578 | } | 574 | } |
| 579 | 575 | ||
| 580 | return error; | 576 | return error; |
| @@ -617,10 +613,10 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
| 617 | gfs2_meta_sync(ip->i_gl); | 613 | gfs2_meta_sync(ip->i_gl); |
| 618 | 614 | ||
| 619 | fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", | 615 | fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", |
| 620 | jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); | 616 | jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); |
| 621 | } | 617 | } |
| 622 | 618 | ||
| 623 | static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | 619 | static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| 624 | { | 620 | { |
| 625 | struct gfs2_meta_header *mh; | 621 | struct gfs2_meta_header *mh; |
| 626 | unsigned int offset; | 622 | unsigned int offset; |
| @@ -679,13 +675,11 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
| 679 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, | 675 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, |
| 680 | struct gfs2_log_header_host *head, int pass) | 676 | struct gfs2_log_header_host *head, int pass) |
| 681 | { | 677 | { |
| 682 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 683 | |||
| 684 | if (pass != 0) | 678 | if (pass != 0) |
| 685 | return; | 679 | return; |
| 686 | 680 | ||
| 687 | sdp->sd_found_revokes = 0; | 681 | jd->jd_found_revokes = 0; |
| 688 | sdp->sd_replay_tail = head->lh_tail; | 682 | jd->jd_replay_tail = head->lh_tail; |
| 689 | } | 683 | } |
| 690 | 684 | ||
| 691 | static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | 685 | static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, |
| @@ -717,13 +711,13 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
| 717 | while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { | 711 | while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { |
| 718 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); | 712 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); |
| 719 | 713 | ||
| 720 | error = gfs2_revoke_add(sdp, blkno, start); | 714 | error = gfs2_revoke_add(jd, blkno, start); |
| 721 | if (error < 0) { | 715 | if (error < 0) { |
| 722 | brelse(bh); | 716 | brelse(bh); |
| 723 | return error; | 717 | return error; |
| 724 | } | 718 | } |
| 725 | else if (error) | 719 | else if (error) |
| 726 | sdp->sd_found_revokes++; | 720 | jd->jd_found_revokes++; |
| 727 | 721 | ||
| 728 | if (!--revokes) | 722 | if (!--revokes) |
| 729 | break; | 723 | break; |
| @@ -743,16 +737,16 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
| 743 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 737 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
| 744 | 738 | ||
| 745 | if (error) { | 739 | if (error) { |
| 746 | gfs2_revoke_clean(sdp); | 740 | gfs2_revoke_clean(jd); |
| 747 | return; | 741 | return; |
| 748 | } | 742 | } |
| 749 | if (pass != 1) | 743 | if (pass != 1) |
| 750 | return; | 744 | return; |
| 751 | 745 | ||
| 752 | fs_info(sdp, "jid=%u: Found %u revoke tags\n", | 746 | fs_info(sdp, "jid=%u: Found %u revoke tags\n", |
| 753 | jd->jd_jid, sdp->sd_found_revokes); | 747 | jd->jd_jid, jd->jd_found_revokes); |
| 754 | 748 | ||
| 755 | gfs2_revoke_clean(sdp); | 749 | gfs2_revoke_clean(jd); |
| 756 | } | 750 | } |
| 757 | 751 | ||
| 758 | /** | 752 | /** |
| @@ -760,12 +754,14 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
| 760 | * | 754 | * |
| 761 | */ | 755 | */ |
| 762 | 756 | ||
| 763 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp) | 757 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| 764 | { | 758 | { |
| 765 | unsigned int limit = buf_limit(sdp) / 2; | 759 | unsigned int limit = databuf_limit(sdp); |
| 766 | 760 | unsigned int nbuf; | |
| 767 | gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf, | 761 | if (tr == NULL) |
| 768 | &sdp->sd_log_le_databuf, 1); | 762 | return; |
| 763 | nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; | ||
| 764 | gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1); | ||
| 769 | } | 765 | } |
| 770 | 766 | ||
| 771 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | 767 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, |
| @@ -789,9 +785,9 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
| 789 | blkno = be64_to_cpu(*ptr++); | 785 | blkno = be64_to_cpu(*ptr++); |
| 790 | esc = be64_to_cpu(*ptr++); | 786 | esc = be64_to_cpu(*ptr++); |
| 791 | 787 | ||
| 792 | sdp->sd_found_blocks++; | 788 | jd->jd_found_blocks++; |
| 793 | 789 | ||
| 794 | if (gfs2_revoke_check(sdp, blkno, start)) | 790 | if (gfs2_revoke_check(jd, blkno, start)) |
| 795 | continue; | 791 | continue; |
| 796 | 792 | ||
| 797 | error = gfs2_replay_read_block(jd, start, &bh_log); | 793 | error = gfs2_replay_read_block(jd, start, &bh_log); |
| @@ -811,7 +807,7 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
| 811 | brelse(bh_log); | 807 | brelse(bh_log); |
| 812 | brelse(bh_ip); | 808 | brelse(bh_ip); |
| 813 | 809 | ||
| 814 | sdp->sd_replayed_blocks++; | 810 | jd->jd_replayed_blocks++; |
| 815 | } | 811 | } |
| 816 | 812 | ||
| 817 | return error; | 813 | return error; |
| @@ -835,26 +831,23 @@ static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
| 835 | gfs2_meta_sync(ip->i_gl); | 831 | gfs2_meta_sync(ip->i_gl); |
| 836 | 832 | ||
| 837 | fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", | 833 | fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", |
| 838 | jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); | 834 | jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); |
| 839 | } | 835 | } |
| 840 | 836 | ||
| 841 | static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 837 | static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| 842 | { | 838 | { |
| 843 | struct list_head *head = &sdp->sd_log_le_databuf; | 839 | struct list_head *head; |
| 844 | struct gfs2_bufdata *bd; | 840 | struct gfs2_bufdata *bd; |
| 845 | 841 | ||
| 846 | if (tr == NULL) { | 842 | if (tr == NULL) |
| 847 | gfs2_assert(sdp, list_empty(head)); | ||
| 848 | return; | 843 | return; |
| 849 | } | ||
| 850 | 844 | ||
| 845 | head = &tr->tr_databuf; | ||
| 851 | while (!list_empty(head)) { | 846 | while (!list_empty(head)) { |
| 852 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); | 847 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); |
| 853 | list_del_init(&bd->bd_list); | 848 | list_del_init(&bd->bd_list); |
| 854 | sdp->sd_log_num_databuf--; | ||
| 855 | gfs2_unpin(sdp, bd->bd_bh, tr); | 849 | gfs2_unpin(sdp, bd->bd_bh, tr); |
| 856 | } | 850 | } |
| 857 | gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); | ||
| 858 | } | 851 | } |
| 859 | 852 | ||
| 860 | 853 | ||
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 9ca2e6438419..a65a7ba32ffd 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h | |||
| @@ -46,12 +46,13 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp) | |||
| 46 | return limit; | 46 | return limit; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | static inline void lops_before_commit(struct gfs2_sbd *sdp) | 49 | static inline void lops_before_commit(struct gfs2_sbd *sdp, |
| 50 | struct gfs2_trans *tr) | ||
| 50 | { | 51 | { |
| 51 | int x; | 52 | int x; |
| 52 | for (x = 0; gfs2_log_ops[x]; x++) | 53 | for (x = 0; gfs2_log_ops[x]; x++) |
| 53 | if (gfs2_log_ops[x]->lo_before_commit) | 54 | if (gfs2_log_ops[x]->lo_before_commit) |
| 54 | gfs2_log_ops[x]->lo_before_commit(sdp); | 55 | gfs2_log_ops[x]->lo_before_commit(sdp, tr); |
| 55 | } | 56 | } |
| 56 | 57 | ||
| 57 | static inline void lops_after_commit(struct gfs2_sbd *sdp, | 58 | static inline void lops_after_commit(struct gfs2_sbd *sdp, |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c272e73063de..82b6ac829656 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 11 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
| 12 | #include <linux/completion.h> | 14 | #include <linux/completion.h> |
| @@ -165,7 +167,7 @@ static int __init init_gfs2_fs(void) | |||
| 165 | 167 | ||
| 166 | gfs2_register_debugfs(); | 168 | gfs2_register_debugfs(); |
| 167 | 169 | ||
| 168 | printk("GFS2 installed\n"); | 170 | pr_info("GFS2 installed\n"); |
| 169 | 171 | ||
| 170 | return 0; | 172 | return 0; |
| 171 | 173 | ||
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index c7f24690ed05..2cf09b63a6b4 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
| @@ -97,6 +97,11 @@ const struct address_space_operations gfs2_meta_aops = { | |||
| 97 | .releasepage = gfs2_releasepage, | 97 | .releasepage = gfs2_releasepage, |
| 98 | }; | 98 | }; |
| 99 | 99 | ||
| 100 | const struct address_space_operations gfs2_rgrp_aops = { | ||
| 101 | .writepage = gfs2_aspace_writepage, | ||
| 102 | .releasepage = gfs2_releasepage, | ||
| 103 | }; | ||
| 104 | |||
| 100 | /** | 105 | /** |
| 101 | * gfs2_getbuf - Get a buffer with a given address space | 106 | * gfs2_getbuf - Get a buffer with a given address space |
| 102 | * @gl: the glock | 107 | * @gl: the glock |
| @@ -267,15 +272,10 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
| 267 | trace_gfs2_pin(bd, 0); | 272 | trace_gfs2_pin(bd, 0); |
| 268 | atomic_dec(&sdp->sd_log_pinned); | 273 | atomic_dec(&sdp->sd_log_pinned); |
| 269 | list_del_init(&bd->bd_list); | 274 | list_del_init(&bd->bd_list); |
| 270 | if (meta) { | 275 | if (meta) |
| 271 | gfs2_assert_warn(sdp, sdp->sd_log_num_buf); | ||
| 272 | sdp->sd_log_num_buf--; | ||
| 273 | tr->tr_num_buf_rm++; | 276 | tr->tr_num_buf_rm++; |
| 274 | } else { | 277 | else |
| 275 | gfs2_assert_warn(sdp, sdp->sd_log_num_databuf); | ||
| 276 | sdp->sd_log_num_databuf--; | ||
| 277 | tr->tr_num_databuf_rm++; | 278 | tr->tr_num_databuf_rm++; |
| 278 | } | ||
| 279 | tr->tr_touched = 1; | 279 | tr->tr_touched = 1; |
| 280 | was_pinned = 1; | 280 | was_pinned = 1; |
| 281 | brelse(bh); | 281 | brelse(bh); |
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 4823b934208a..ac5d8027d335 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h | |||
| @@ -38,12 +38,15 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, | |||
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | extern const struct address_space_operations gfs2_meta_aops; | 40 | extern const struct address_space_operations gfs2_meta_aops; |
| 41 | extern const struct address_space_operations gfs2_rgrp_aops; | ||
| 41 | 42 | ||
| 42 | static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) | 43 | static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) |
| 43 | { | 44 | { |
| 44 | struct inode *inode = mapping->host; | 45 | struct inode *inode = mapping->host; |
| 45 | if (mapping->a_ops == &gfs2_meta_aops) | 46 | if (mapping->a_ops == &gfs2_meta_aops) |
| 46 | return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; | 47 | return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; |
| 48 | else if (mapping->a_ops == &gfs2_rgrp_aops) | ||
| 49 | return container_of(mapping, struct gfs2_sbd, sd_aspace); | ||
| 47 | else | 50 | else |
| 48 | return inode->i_sb->s_fs_info; | 51 | return inode->i_sb->s_fs_info; |
| 49 | } | 52 | } |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c6872d09561a..22f954051bb8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
| 11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
| 12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
| @@ -104,7 +106,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 104 | mapping = &sdp->sd_aspace; | 106 | mapping = &sdp->sd_aspace; |
| 105 | 107 | ||
| 106 | address_space_init_once(mapping); | 108 | address_space_init_once(mapping); |
| 107 | mapping->a_ops = &gfs2_meta_aops; | 109 | mapping->a_ops = &gfs2_rgrp_aops; |
| 108 | mapping->host = sb->s_bdev->bd_inode; | 110 | mapping->host = sb->s_bdev->bd_inode; |
| 109 | mapping->flags = 0; | 111 | mapping->flags = 0; |
| 110 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 112 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
| @@ -114,9 +116,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 114 | 116 | ||
| 115 | spin_lock_init(&sdp->sd_log_lock); | 117 | spin_lock_init(&sdp->sd_log_lock); |
| 116 | atomic_set(&sdp->sd_log_pinned, 0); | 118 | atomic_set(&sdp->sd_log_pinned, 0); |
| 117 | INIT_LIST_HEAD(&sdp->sd_log_le_buf); | ||
| 118 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); | 119 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); |
| 119 | INIT_LIST_HEAD(&sdp->sd_log_le_databuf); | ||
| 120 | INIT_LIST_HEAD(&sdp->sd_log_le_ordered); | 120 | INIT_LIST_HEAD(&sdp->sd_log_le_ordered); |
| 121 | spin_lock_init(&sdp->sd_ordered_lock); | 121 | spin_lock_init(&sdp->sd_ordered_lock); |
| 122 | 122 | ||
| @@ -130,8 +130,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 130 | atomic_set(&sdp->sd_log_in_flight, 0); | 130 | atomic_set(&sdp->sd_log_in_flight, 0); |
| 131 | init_waitqueue_head(&sdp->sd_log_flush_wait); | 131 | init_waitqueue_head(&sdp->sd_log_flush_wait); |
| 132 | 132 | ||
| 133 | INIT_LIST_HEAD(&sdp->sd_revoke_list); | ||
| 134 | |||
| 135 | return sdp; | 133 | return sdp; |
| 136 | } | 134 | } |
| 137 | 135 | ||
| @@ -154,7 +152,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) | |||
| 154 | if (sb->sb_magic != GFS2_MAGIC || | 152 | if (sb->sb_magic != GFS2_MAGIC || |
| 155 | sb->sb_type != GFS2_METATYPE_SB) { | 153 | sb->sb_type != GFS2_METATYPE_SB) { |
| 156 | if (!silent) | 154 | if (!silent) |
| 157 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | 155 | pr_warn("not a GFS2 filesystem\n"); |
| 158 | return -EINVAL; | 156 | return -EINVAL; |
| 159 | } | 157 | } |
| 160 | 158 | ||
| @@ -176,7 +174,7 @@ static void end_bio_io_page(struct bio *bio, int error) | |||
| 176 | if (!error) | 174 | if (!error) |
| 177 | SetPageUptodate(page); | 175 | SetPageUptodate(page); |
| 178 | else | 176 | else |
| 179 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | 177 | pr_warn("error %d reading superblock\n", error); |
| 180 | unlock_page(page); | 178 | unlock_page(page); |
| 181 | } | 179 | } |
| 182 | 180 | ||
| @@ -519,67 +517,6 @@ out: | |||
| 519 | return ret; | 517 | return ret; |
| 520 | } | 518 | } |
| 521 | 519 | ||
| 522 | /** | ||
| 523 | * map_journal_extents - create a reusable "extent" mapping from all logical | ||
| 524 | * blocks to all physical blocks for the given journal. This will save | ||
| 525 | * us time when writing journal blocks. Most journals will have only one | ||
| 526 | * extent that maps all their logical blocks. That's because gfs2.mkfs | ||
| 527 | * arranges the journal blocks sequentially to maximize performance. | ||
| 528 | * So the extent would map the first block for the entire file length. | ||
| 529 | * However, gfs2_jadd can happen while file activity is happening, so | ||
| 530 | * those journals may not be sequential. Less likely is the case where | ||
| 531 | * the users created their own journals by mounting the metafs and | ||
| 532 | * laying it out. But it's still possible. These journals might have | ||
| 533 | * several extents. | ||
| 534 | * | ||
| 535 | * TODO: This should be done in bigger chunks rather than one block at a time, | ||
| 536 | * but since it's only done at mount time, I'm not worried about the | ||
| 537 | * time it takes. | ||
| 538 | */ | ||
| 539 | static int map_journal_extents(struct gfs2_sbd *sdp) | ||
| 540 | { | ||
| 541 | struct gfs2_jdesc *jd = sdp->sd_jdesc; | ||
| 542 | unsigned int lb; | ||
| 543 | u64 db, prev_db; /* logical block, disk block, prev disk block */ | ||
| 544 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 545 | struct gfs2_journal_extent *jext = NULL; | ||
| 546 | struct buffer_head bh; | ||
| 547 | int rc = 0; | ||
| 548 | |||
| 549 | prev_db = 0; | ||
| 550 | |||
| 551 | for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { | ||
| 552 | bh.b_state = 0; | ||
| 553 | bh.b_blocknr = 0; | ||
| 554 | bh.b_size = 1 << ip->i_inode.i_blkbits; | ||
| 555 | rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0); | ||
| 556 | db = bh.b_blocknr; | ||
| 557 | if (rc || !db) { | ||
| 558 | printk(KERN_INFO "GFS2 journal mapping error %d: lb=" | ||
| 559 | "%u db=%llu\n", rc, lb, (unsigned long long)db); | ||
| 560 | break; | ||
| 561 | } | ||
| 562 | if (!prev_db || db != prev_db + 1) { | ||
| 563 | jext = kzalloc(sizeof(struct gfs2_journal_extent), | ||
| 564 | GFP_KERNEL); | ||
| 565 | if (!jext) { | ||
| 566 | printk(KERN_INFO "GFS2 error: out of memory " | ||
| 567 | "mapping journal extents.\n"); | ||
| 568 | rc = -ENOMEM; | ||
| 569 | break; | ||
| 570 | } | ||
| 571 | jext->dblock = db; | ||
| 572 | jext->lblock = lb; | ||
| 573 | jext->blocks = 1; | ||
| 574 | list_add_tail(&jext->extent_list, &jd->extent_list); | ||
| 575 | } else { | ||
| 576 | jext->blocks++; | ||
| 577 | } | ||
| 578 | prev_db = db; | ||
| 579 | } | ||
| 580 | return rc; | ||
| 581 | } | ||
| 582 | |||
| 583 | static void gfs2_others_may_mount(struct gfs2_sbd *sdp) | 520 | static void gfs2_others_may_mount(struct gfs2_sbd *sdp) |
| 584 | { | 521 | { |
| 585 | char *message = "FIRSTMOUNT=Done"; | 522 | char *message = "FIRSTMOUNT=Done"; |
| @@ -638,6 +575,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
| 638 | break; | 575 | break; |
| 639 | 576 | ||
| 640 | INIT_LIST_HEAD(&jd->extent_list); | 577 | INIT_LIST_HEAD(&jd->extent_list); |
| 578 | INIT_LIST_HEAD(&jd->jd_revoke_list); | ||
| 579 | |||
| 641 | INIT_WORK(&jd->jd_work, gfs2_recover_func); | 580 | INIT_WORK(&jd->jd_work, gfs2_recover_func); |
| 642 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); | 581 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); |
| 643 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { | 582 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { |
| @@ -781,7 +720,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
| 781 | atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5); | 720 | atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5); |
| 782 | 721 | ||
| 783 | /* Map the extents for this journal's blocks */ | 722 | /* Map the extents for this journal's blocks */ |
| 784 | map_journal_extents(sdp); | 723 | gfs2_map_journal_extents(sdp, sdp->sd_jdesc); |
| 785 | } | 724 | } |
| 786 | trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); | 725 | trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); |
| 787 | 726 | ||
| @@ -1008,7 +947,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
| 1008 | lm = &gfs2_dlm_ops; | 947 | lm = &gfs2_dlm_ops; |
| 1009 | #endif | 948 | #endif |
| 1010 | } else { | 949 | } else { |
| 1011 | printk(KERN_INFO "GFS2: can't find protocol %s\n", proto); | 950 | pr_info("can't find protocol %s\n", proto); |
| 1012 | return -ENOENT; | 951 | return -ENOENT; |
| 1013 | } | 952 | } |
| 1014 | 953 | ||
| @@ -1115,7 +1054,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
| 1115 | 1054 | ||
| 1116 | sdp = init_sbd(sb); | 1055 | sdp = init_sbd(sb); |
| 1117 | if (!sdp) { | 1056 | if (!sdp) { |
| 1118 | printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); | 1057 | pr_warn("can't alloc struct gfs2_sbd\n"); |
| 1119 | return -ENOMEM; | 1058 | return -ENOMEM; |
| 1120 | } | 1059 | } |
| 1121 | sdp->sd_args = *args; | 1060 | sdp->sd_args = *args; |
| @@ -1363,7 +1302,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, | |||
| 1363 | 1302 | ||
| 1364 | error = gfs2_mount_args(&args, data); | 1303 | error = gfs2_mount_args(&args, data); |
| 1365 | if (error) { | 1304 | if (error) { |
| 1366 | printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); | 1305 | pr_warn("can't parse mount arguments\n"); |
| 1367 | goto error_super; | 1306 | goto error_super; |
| 1368 | } | 1307 | } |
| 1369 | 1308 | ||
| @@ -1413,15 +1352,15 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, | |||
| 1413 | 1352 | ||
| 1414 | error = kern_path(dev_name, LOOKUP_FOLLOW, &path); | 1353 | error = kern_path(dev_name, LOOKUP_FOLLOW, &path); |
| 1415 | if (error) { | 1354 | if (error) { |
| 1416 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", | 1355 | pr_warn("path_lookup on %s returned error %d\n", |
| 1417 | dev_name, error); | 1356 | dev_name, error); |
| 1418 | return ERR_PTR(error); | 1357 | return ERR_PTR(error); |
| 1419 | } | 1358 | } |
| 1420 | s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, | 1359 | s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, |
| 1421 | path.dentry->d_inode->i_sb->s_bdev); | 1360 | path.dentry->d_inode->i_sb->s_bdev); |
| 1422 | path_put(&path); | 1361 | path_put(&path); |
| 1423 | if (IS_ERR(s)) { | 1362 | if (IS_ERR(s)) { |
| 1424 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1363 | pr_warn("gfs2 mount does not exist\n"); |
| 1425 | return ERR_CAST(s); | 1364 | return ERR_CAST(s); |
| 1426 | } | 1365 | } |
| 1427 | if ((flags ^ s->s_flags) & MS_RDONLY) { | 1366 | if ((flags ^ s->s_flags) & MS_RDONLY) { |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8bec0e3192dd..c4effff7cf55 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -36,6 +36,8 @@ | |||
| 36 | * the quota file, so it is not being constantly read. | 36 | * the quota file, so it is not being constantly read. |
| 37 | */ | 37 | */ |
| 38 | 38 | ||
| 39 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 40 | |||
| 39 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
| 40 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
| 41 | #include <linux/mm.h> | 43 | #include <linux/mm.h> |
| @@ -330,6 +332,7 @@ static int slot_get(struct gfs2_quota_data *qd) | |||
| 330 | if (bit < sdp->sd_quota_slots) { | 332 | if (bit < sdp->sd_quota_slots) { |
| 331 | set_bit(bit, sdp->sd_quota_bitmap); | 333 | set_bit(bit, sdp->sd_quota_bitmap); |
| 332 | qd->qd_slot = bit; | 334 | qd->qd_slot = bit; |
| 335 | error = 0; | ||
| 333 | out: | 336 | out: |
| 334 | qd->qd_slot_count++; | 337 | qd->qd_slot_count++; |
| 335 | } | 338 | } |
| @@ -1081,10 +1084,10 @@ static int print_message(struct gfs2_quota_data *qd, char *type) | |||
| 1081 | { | 1084 | { |
| 1082 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 1085 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; |
| 1083 | 1086 | ||
| 1084 | printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", | 1087 | fs_info(sdp, "quota %s for %s %u\n", |
| 1085 | sdp->sd_fsname, type, | 1088 | type, |
| 1086 | (qd->qd_id.type == USRQUOTA) ? "user" : "group", | 1089 | (qd->qd_id.type == USRQUOTA) ? "user" : "group", |
| 1087 | from_kqid(&init_user_ns, qd->qd_id)); | 1090 | from_kqid(&init_user_ns, qd->qd_id)); |
| 1088 | 1091 | ||
| 1089 | return 0; | 1092 | return 0; |
| 1090 | } | 1093 | } |
| @@ -1242,14 +1245,13 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
| 1242 | bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); | 1245 | bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); |
| 1243 | bm_size *= sizeof(unsigned long); | 1246 | bm_size *= sizeof(unsigned long); |
| 1244 | error = -ENOMEM; | 1247 | error = -ENOMEM; |
| 1245 | sdp->sd_quota_bitmap = kmalloc(bm_size, GFP_NOFS|__GFP_NOWARN); | 1248 | sdp->sd_quota_bitmap = kzalloc(bm_size, GFP_NOFS | __GFP_NOWARN); |
| 1246 | if (sdp->sd_quota_bitmap == NULL) | 1249 | if (sdp->sd_quota_bitmap == NULL) |
| 1247 | sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS, PAGE_KERNEL); | 1250 | sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS | |
| 1251 | __GFP_ZERO, PAGE_KERNEL); | ||
| 1248 | if (!sdp->sd_quota_bitmap) | 1252 | if (!sdp->sd_quota_bitmap) |
| 1249 | return error; | 1253 | return error; |
| 1250 | 1254 | ||
| 1251 | memset(sdp->sd_quota_bitmap, 0, bm_size); | ||
| 1252 | |||
| 1253 | for (x = 0; x < blocks; x++) { | 1255 | for (x = 0; x < blocks; x++) { |
| 1254 | struct buffer_head *bh; | 1256 | struct buffer_head *bh; |
| 1255 | const struct gfs2_quota_change *qc; | 1257 | const struct gfs2_quota_change *qc; |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 963b2d75200c..7ad4094d68c0 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
| @@ -52,9 +52,9 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, | |||
| 52 | return error; | 52 | return error; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | 55 | int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) |
| 56 | { | 56 | { |
| 57 | struct list_head *head = &sdp->sd_revoke_list; | 57 | struct list_head *head = &jd->jd_revoke_list; |
| 58 | struct gfs2_revoke_replay *rr; | 58 | struct gfs2_revoke_replay *rr; |
| 59 | int found = 0; | 59 | int found = 0; |
| 60 | 60 | ||
| @@ -81,13 +81,13 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | |||
| 81 | return 1; | 81 | return 1; |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | 84 | int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) |
| 85 | { | 85 | { |
| 86 | struct gfs2_revoke_replay *rr; | 86 | struct gfs2_revoke_replay *rr; |
| 87 | int wrap, a, b, revoke; | 87 | int wrap, a, b, revoke; |
| 88 | int found = 0; | 88 | int found = 0; |
| 89 | 89 | ||
| 90 | list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { | 90 | list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) { |
| 91 | if (rr->rr_blkno == blkno) { | 91 | if (rr->rr_blkno == blkno) { |
| 92 | found = 1; | 92 | found = 1; |
| 93 | break; | 93 | break; |
| @@ -97,17 +97,17 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | |||
| 97 | if (!found) | 97 | if (!found) |
| 98 | return 0; | 98 | return 0; |
| 99 | 99 | ||
| 100 | wrap = (rr->rr_where < sdp->sd_replay_tail); | 100 | wrap = (rr->rr_where < jd->jd_replay_tail); |
| 101 | a = (sdp->sd_replay_tail < where); | 101 | a = (jd->jd_replay_tail < where); |
| 102 | b = (where < rr->rr_where); | 102 | b = (where < rr->rr_where); |
| 103 | revoke = (wrap) ? (a || b) : (a && b); | 103 | revoke = (wrap) ? (a || b) : (a && b); |
| 104 | 104 | ||
| 105 | return revoke; | 105 | return revoke; |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | void gfs2_revoke_clean(struct gfs2_sbd *sdp) | 108 | void gfs2_revoke_clean(struct gfs2_jdesc *jd) |
| 109 | { | 109 | { |
| 110 | struct list_head *head = &sdp->sd_revoke_list; | 110 | struct list_head *head = &jd->jd_revoke_list; |
| 111 | struct gfs2_revoke_replay *rr; | 111 | struct gfs2_revoke_replay *rr; |
| 112 | 112 | ||
| 113 | while (!list_empty(head)) { | 113 | while (!list_empty(head)) { |
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 2226136c7647..6142836cce96 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h | |||
| @@ -23,9 +23,9 @@ static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) | |||
| 23 | extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, | 23 | extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, |
| 24 | struct buffer_head **bh); | 24 | struct buffer_head **bh); |
| 25 | 25 | ||
| 26 | extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); | 26 | extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); |
| 27 | extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); | 27 | extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); |
| 28 | extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); | 28 | extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); |
| 29 | 29 | ||
| 30 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, | 30 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, |
| 31 | struct gfs2_log_header_host *head); | 31 | struct gfs2_log_header_host *head); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index a1da21349235..281a7716e3f3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 11 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
| 12 | #include <linux/completion.h> | 14 | #include <linux/completion.h> |
| @@ -99,12 +101,12 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, | |||
| 99 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; | 101 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; |
| 100 | 102 | ||
| 101 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { | 103 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { |
| 102 | printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " | 104 | pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n", |
| 103 | "new_state=%d\n", rbm->offset, cur_state, new_state); | 105 | rbm->offset, cur_state, new_state); |
| 104 | printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", | 106 | pr_warn("rgrp=0x%llx bi_start=0x%x\n", |
| 105 | (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); | 107 | (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); |
| 106 | printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", | 108 | pr_warn("bi_offset=0x%x bi_len=0x%x\n", |
| 107 | bi->bi_offset, bi->bi_len); | 109 | bi->bi_offset, bi->bi_len); |
| 108 | dump_stack(); | 110 | dump_stack(); |
| 109 | gfs2_consist_rgrpd(rbm->rgd); | 111 | gfs2_consist_rgrpd(rbm->rgd); |
| 110 | return; | 112 | return; |
| @@ -736,11 +738,11 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) | |||
| 736 | 738 | ||
| 737 | static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) | 739 | static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) |
| 738 | { | 740 | { |
| 739 | printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); | 741 | pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); |
| 740 | printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); | 742 | pr_info("ri_length = %u\n", rgd->rd_length); |
| 741 | printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); | 743 | pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); |
| 742 | printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); | 744 | pr_info("ri_data = %u\n", rgd->rd_data); |
| 743 | printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); | 745 | pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes); |
| 744 | } | 746 | } |
| 745 | 747 | ||
| 746 | /** | 748 | /** |
| @@ -1102,7 +1104,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) | |||
| 1102 | * Returns: errno | 1104 | * Returns: errno |
| 1103 | */ | 1105 | */ |
| 1104 | 1106 | ||
| 1105 | int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | 1107 | static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) |
| 1106 | { | 1108 | { |
| 1107 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 1109 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
| 1108 | struct gfs2_glock *gl = rgd->rd_gl; | 1110 | struct gfs2_glock *gl = rgd->rd_gl; |
| @@ -1169,7 +1171,7 @@ fail: | |||
| 1169 | return error; | 1171 | return error; |
| 1170 | } | 1172 | } |
| 1171 | 1173 | ||
| 1172 | int update_rgrp_lvb(struct gfs2_rgrpd *rgd) | 1174 | static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) |
| 1173 | { | 1175 | { |
| 1174 | u32 rl_flags; | 1176 | u32 rl_flags; |
| 1175 | 1177 | ||
| @@ -2278,7 +2280,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
| 2278 | } | 2280 | } |
| 2279 | } | 2281 | } |
| 2280 | if (rbm.rgd->rd_free < *nblocks) { | 2282 | if (rbm.rgd->rd_free < *nblocks) { |
| 2281 | printk(KERN_WARNING "nblocks=%u\n", *nblocks); | 2283 | pr_warn("nblocks=%u\n", *nblocks); |
| 2282 | goto rgrp_error; | 2284 | goto rgrp_error; |
| 2283 | } | 2285 | } |
| 2284 | 2286 | ||
| @@ -2296,7 +2298,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
| 2296 | 2298 | ||
| 2297 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); | 2299 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); |
| 2298 | if (dinode) | 2300 | if (dinode) |
| 2299 | gfs2_trans_add_unrevoke(sdp, block, 1); | 2301 | gfs2_trans_add_unrevoke(sdp, block, *nblocks); |
| 2300 | 2302 | ||
| 2301 | gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); | 2303 | gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); |
| 2302 | 2304 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 60f60f6181f3..de8afad89e51 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/bio.h> | 12 | #include <linux/bio.h> |
| 11 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
| 12 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| @@ -175,8 +177,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 175 | break; | 177 | break; |
| 176 | case Opt_debug: | 178 | case Opt_debug: |
| 177 | if (args->ar_errors == GFS2_ERRORS_PANIC) { | 179 | if (args->ar_errors == GFS2_ERRORS_PANIC) { |
| 178 | printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " | 180 | pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); |
| 179 | "are mutually exclusive.\n"); | ||
| 180 | return -EINVAL; | 181 | return -EINVAL; |
| 181 | } | 182 | } |
| 182 | args->ar_debug = 1; | 183 | args->ar_debug = 1; |
| @@ -228,21 +229,21 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 228 | case Opt_commit: | 229 | case Opt_commit: |
| 229 | rv = match_int(&tmp[0], &args->ar_commit); | 230 | rv = match_int(&tmp[0], &args->ar_commit); |
| 230 | if (rv || args->ar_commit <= 0) { | 231 | if (rv || args->ar_commit <= 0) { |
| 231 | printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n"); | 232 | pr_warn("commit mount option requires a positive numeric argument\n"); |
| 232 | return rv ? rv : -EINVAL; | 233 | return rv ? rv : -EINVAL; |
| 233 | } | 234 | } |
| 234 | break; | 235 | break; |
| 235 | case Opt_statfs_quantum: | 236 | case Opt_statfs_quantum: |
| 236 | rv = match_int(&tmp[0], &args->ar_statfs_quantum); | 237 | rv = match_int(&tmp[0], &args->ar_statfs_quantum); |
| 237 | if (rv || args->ar_statfs_quantum < 0) { | 238 | if (rv || args->ar_statfs_quantum < 0) { |
| 238 | printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n"); | 239 | pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n"); |
| 239 | return rv ? rv : -EINVAL; | 240 | return rv ? rv : -EINVAL; |
| 240 | } | 241 | } |
| 241 | break; | 242 | break; |
| 242 | case Opt_quota_quantum: | 243 | case Opt_quota_quantum: |
| 243 | rv = match_int(&tmp[0], &args->ar_quota_quantum); | 244 | rv = match_int(&tmp[0], &args->ar_quota_quantum); |
| 244 | if (rv || args->ar_quota_quantum <= 0) { | 245 | if (rv || args->ar_quota_quantum <= 0) { |
| 245 | printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n"); | 246 | pr_warn("quota_quantum mount option requires a positive numeric argument\n"); |
| 246 | return rv ? rv : -EINVAL; | 247 | return rv ? rv : -EINVAL; |
| 247 | } | 248 | } |
| 248 | break; | 249 | break; |
| @@ -250,7 +251,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 250 | rv = match_int(&tmp[0], &args->ar_statfs_percent); | 251 | rv = match_int(&tmp[0], &args->ar_statfs_percent); |
| 251 | if (rv || args->ar_statfs_percent < 0 || | 252 | if (rv || args->ar_statfs_percent < 0 || |
| 252 | args->ar_statfs_percent > 100) { | 253 | args->ar_statfs_percent > 100) { |
| 253 | printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n"); | 254 | pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n"); |
| 254 | return rv ? rv : -EINVAL; | 255 | return rv ? rv : -EINVAL; |
| 255 | } | 256 | } |
| 256 | break; | 257 | break; |
| @@ -259,8 +260,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 259 | break; | 260 | break; |
| 260 | case Opt_err_panic: | 261 | case Opt_err_panic: |
| 261 | if (args->ar_debug) { | 262 | if (args->ar_debug) { |
| 262 | printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " | 263 | pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); |
| 263 | "are mutually exclusive.\n"); | ||
| 264 | return -EINVAL; | 264 | return -EINVAL; |
| 265 | } | 265 | } |
| 266 | args->ar_errors = GFS2_ERRORS_PANIC; | 266 | args->ar_errors = GFS2_ERRORS_PANIC; |
| @@ -279,7 +279,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 279 | break; | 279 | break; |
| 280 | case Opt_error: | 280 | case Opt_error: |
| 281 | default: | 281 | default: |
| 282 | printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); | 282 | pr_warn("invalid mount option: %s\n", o); |
| 283 | return -EINVAL; | 283 | return -EINVAL; |
| 284 | } | 284 | } |
| 285 | } | 285 | } |
| @@ -295,9 +295,8 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 295 | 295 | ||
| 296 | void gfs2_jindex_free(struct gfs2_sbd *sdp) | 296 | void gfs2_jindex_free(struct gfs2_sbd *sdp) |
| 297 | { | 297 | { |
| 298 | struct list_head list, *head; | 298 | struct list_head list; |
| 299 | struct gfs2_jdesc *jd; | 299 | struct gfs2_jdesc *jd; |
| 300 | struct gfs2_journal_extent *jext; | ||
| 301 | 300 | ||
| 302 | spin_lock(&sdp->sd_jindex_spin); | 301 | spin_lock(&sdp->sd_jindex_spin); |
| 303 | list_add(&list, &sdp->sd_jindex_list); | 302 | list_add(&list, &sdp->sd_jindex_list); |
| @@ -307,14 +306,7 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) | |||
| 307 | 306 | ||
| 308 | while (!list_empty(&list)) { | 307 | while (!list_empty(&list)) { |
| 309 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); | 308 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); |
| 310 | head = &jd->extent_list; | 309 | gfs2_free_journal_extents(jd); |
| 311 | while (!list_empty(head)) { | ||
| 312 | jext = list_entry(head->next, | ||
| 313 | struct gfs2_journal_extent, | ||
| 314 | extent_list); | ||
| 315 | list_del(&jext->extent_list); | ||
| 316 | kfree(jext); | ||
| 317 | } | ||
| 318 | list_del(&jd->jd_list); | 310 | list_del(&jd->jd_list); |
| 319 | iput(jd->jd_inode); | 311 | iput(jd->jd_inode); |
| 320 | kfree(jd); | 312 | kfree(jd); |
| @@ -1175,6 +1167,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 1175 | struct gfs2_tune *gt = &sdp->sd_tune; | 1167 | struct gfs2_tune *gt = &sdp->sd_tune; |
| 1176 | int error; | 1168 | int error; |
| 1177 | 1169 | ||
| 1170 | sync_filesystem(sb); | ||
| 1171 | |||
| 1178 | spin_lock(>->gt_spin); | 1172 | spin_lock(>->gt_spin); |
| 1179 | args.ar_commit = gt->gt_logd_secs; | 1173 | args.ar_commit = gt->gt_logd_secs; |
| 1180 | args.ar_quota_quantum = gt->gt_quota_quantum; | 1174 | args.ar_quota_quantum = gt->gt_quota_quantum; |
| @@ -1256,7 +1250,7 @@ static int gfs2_drop_inode(struct inode *inode) | |||
| 1256 | { | 1250 | { |
| 1257 | struct gfs2_inode *ip = GFS2_I(inode); | 1251 | struct gfs2_inode *ip = GFS2_I(inode); |
| 1258 | 1252 | ||
| 1259 | if (inode->i_nlink) { | 1253 | if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) { |
| 1260 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; | 1254 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; |
| 1261 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) | 1255 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) |
| 1262 | clear_nlink(inode); | 1256 | clear_nlink(inode); |
| @@ -1471,6 +1465,11 @@ static void gfs2_evict_inode(struct inode *inode) | |||
| 1471 | struct gfs2_holder gh; | 1465 | struct gfs2_holder gh; |
| 1472 | int error; | 1466 | int error; |
| 1473 | 1467 | ||
| 1468 | if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { | ||
| 1469 | clear_inode(inode); | ||
| 1470 | return; | ||
| 1471 | } | ||
| 1472 | |||
| 1474 | if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) | 1473 | if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) |
| 1475 | goto out; | 1474 | goto out; |
| 1476 | 1475 | ||
| @@ -1558,7 +1557,7 @@ out_unlock: | |||
| 1558 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); | 1557 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); |
| 1559 | out: | 1558 | out: |
| 1560 | /* Case 3 starts here */ | 1559 | /* Case 3 starts here */ |
| 1561 | truncate_inode_pages(&inode->i_data, 0); | 1560 | truncate_inode_pages_final(&inode->i_data); |
| 1562 | gfs2_rs_delete(ip, NULL); | 1561 | gfs2_rs_delete(ip, NULL); |
| 1563 | gfs2_ordered_del_inode(ip); | 1562 | gfs2_ordered_del_inode(ip); |
| 1564 | clear_inode(inode); | 1563 | clear_inode(inode); |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d09f6edda0ff..de25d5577e5d 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
| 11 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
| 12 | #include <linux/completion.h> | 14 | #include <linux/completion.h> |
| @@ -138,9 +140,8 @@ static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | |||
| 138 | if (simple_strtol(buf, NULL, 0) != 1) | 140 | if (simple_strtol(buf, NULL, 0) != 1) |
| 139 | return -EINVAL; | 141 | return -EINVAL; |
| 140 | 142 | ||
| 141 | gfs2_lm_withdraw(sdp, | 143 | gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n"); |
| 142 | "GFS2: fsid=%s: withdrawing from cluster at user's request\n", | 144 | |
| 143 | sdp->sd_fsname); | ||
| 144 | return len; | 145 | return len; |
| 145 | } | 146 | } |
| 146 | 147 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 2b20d7046bf3..bead90d27bad 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
| 11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
| 12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
| @@ -51,6 +53,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
| 51 | if (revokes) | 53 | if (revokes) |
| 52 | tr->tr_reserved += gfs2_struct2blk(sdp, revokes, | 54 | tr->tr_reserved += gfs2_struct2blk(sdp, revokes, |
| 53 | sizeof(u64)); | 55 | sizeof(u64)); |
| 56 | INIT_LIST_HEAD(&tr->tr_databuf); | ||
| 57 | INIT_LIST_HEAD(&tr->tr_buf); | ||
| 58 | |||
| 54 | sb_start_intwrite(sdp->sd_vfs); | 59 | sb_start_intwrite(sdp->sd_vfs); |
| 55 | gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); | 60 | gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); |
| 56 | 61 | ||
| @@ -96,14 +101,13 @@ static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) | |||
| 96 | 101 | ||
| 97 | static void gfs2_print_trans(const struct gfs2_trans *tr) | 102 | static void gfs2_print_trans(const struct gfs2_trans *tr) |
| 98 | { | 103 | { |
| 99 | printk(KERN_WARNING "GFS2: Transaction created at: %pSR\n", | 104 | pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip); |
| 100 | (void *)tr->tr_ip); | 105 | pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n", |
| 101 | printk(KERN_WARNING "GFS2: blocks=%u revokes=%u reserved=%u touched=%d\n", | 106 | tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); |
| 102 | tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); | 107 | pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n", |
| 103 | printk(KERN_WARNING "GFS2: Buf %u/%u Databuf %u/%u Revoke %u/%u\n", | 108 | tr->tr_num_buf_new, tr->tr_num_buf_rm, |
| 104 | tr->tr_num_buf_new, tr->tr_num_buf_rm, | 109 | tr->tr_num_databuf_new, tr->tr_num_databuf_rm, |
| 105 | tr->tr_num_databuf_new, tr->tr_num_databuf_rm, | 110 | tr->tr_num_revoke, tr->tr_num_revoke_rm); |
| 106 | tr->tr_num_revoke, tr->tr_num_revoke_rm); | ||
| 107 | } | 111 | } |
| 108 | 112 | ||
| 109 | void gfs2_trans_end(struct gfs2_sbd *sdp) | 113 | void gfs2_trans_end(struct gfs2_sbd *sdp) |
| @@ -210,8 +214,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) | |||
| 210 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | 214 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); |
| 211 | gfs2_pin(sdp, bd->bd_bh); | 215 | gfs2_pin(sdp, bd->bd_bh); |
| 212 | tr->tr_num_databuf_new++; | 216 | tr->tr_num_databuf_new++; |
| 213 | sdp->sd_log_num_databuf++; | 217 | list_add_tail(&bd->bd_list, &tr->tr_databuf); |
| 214 | list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf); | ||
| 215 | } | 218 | } |
| 216 | gfs2_log_unlock(sdp); | 219 | gfs2_log_unlock(sdp); |
| 217 | unlock_buffer(bh); | 220 | unlock_buffer(bh); |
| @@ -230,16 +233,14 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | |||
| 230 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | 233 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); |
| 231 | mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; | 234 | mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; |
| 232 | if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { | 235 | if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { |
| 233 | printk(KERN_ERR | 236 | pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n", |
| 234 | "Attempting to add uninitialised block to journal (inplace block=%lld)\n", | ||
| 235 | (unsigned long long)bd->bd_bh->b_blocknr); | 237 | (unsigned long long)bd->bd_bh->b_blocknr); |
| 236 | BUG(); | 238 | BUG(); |
| 237 | } | 239 | } |
| 238 | gfs2_pin(sdp, bd->bd_bh); | 240 | gfs2_pin(sdp, bd->bd_bh); |
| 239 | mh->__pad0 = cpu_to_be64(0); | 241 | mh->__pad0 = cpu_to_be64(0); |
| 240 | mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); | 242 | mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); |
| 241 | sdp->sd_log_num_buf++; | 243 | list_add(&bd->bd_list, &tr->tr_buf); |
| 242 | list_add(&bd->bd_list, &sdp->sd_log_le_buf); | ||
| 243 | tr->tr_num_buf_new++; | 244 | tr->tr_num_buf_new++; |
| 244 | } | 245 | } |
| 245 | 246 | ||
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index f7109f689e61..86d2035ac669 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
| 11 | #include <linux/completion.h> | 13 | #include <linux/completion.h> |
| 12 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
| @@ -30,22 +32,27 @@ mempool_t *gfs2_page_pool __read_mostly; | |||
| 30 | 32 | ||
| 31 | void gfs2_assert_i(struct gfs2_sbd *sdp) | 33 | void gfs2_assert_i(struct gfs2_sbd *sdp) |
| 32 | { | 34 | { |
| 33 | printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n", | 35 | fs_emerg(sdp, "fatal assertion failed\n"); |
| 34 | sdp->sd_fsname); | ||
| 35 | } | 36 | } |
| 36 | 37 | ||
| 37 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | 38 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) |
| 38 | { | 39 | { |
| 39 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 40 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
| 40 | const struct lm_lockops *lm = ls->ls_ops; | 41 | const struct lm_lockops *lm = ls->ls_ops; |
| 41 | va_list args; | 42 | va_list args; |
| 43 | struct va_format vaf; | ||
| 42 | 44 | ||
| 43 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && | 45 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && |
| 44 | test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | 46 | test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) |
| 45 | return 0; | 47 | return 0; |
| 46 | 48 | ||
| 47 | va_start(args, fmt); | 49 | va_start(args, fmt); |
| 48 | vprintk(fmt, args); | 50 | |
| 51 | vaf.fmt = fmt; | ||
| 52 | vaf.va = &args; | ||
| 53 | |||
| 54 | fs_err(sdp, "%pV", &vaf); | ||
| 55 | |||
| 49 | va_end(args); | 56 | va_end(args); |
| 50 | 57 | ||
| 51 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { | 58 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { |
| @@ -66,7 +73,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | |||
| 66 | } | 73 | } |
| 67 | 74 | ||
| 68 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) | 75 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) |
| 69 | panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname); | 76 | panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); |
| 70 | 77 | ||
| 71 | return -1; | 78 | return -1; |
| 72 | } | 79 | } |
| @@ -82,10 +89,9 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, | |||
| 82 | { | 89 | { |
| 83 | int me; | 90 | int me; |
| 84 | me = gfs2_lm_withdraw(sdp, | 91 | me = gfs2_lm_withdraw(sdp, |
| 85 | "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" | 92 | "fatal: assertion \"%s\" failed\n" |
| 86 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 93 | " function = %s, file = %s, line = %u\n", |
| 87 | sdp->sd_fsname, assertion, | 94 | assertion, function, file, line); |
| 88 | sdp->sd_fsname, function, file, line); | ||
| 89 | dump_stack(); | 95 | dump_stack(); |
| 90 | return (me) ? -1 : -2; | 96 | return (me) ? -1 : -2; |
| 91 | } | 97 | } |
| @@ -105,11 +111,8 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, | |||
| 105 | return -2; | 111 | return -2; |
| 106 | 112 | ||
| 107 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) | 113 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) |
| 108 | printk(KERN_WARNING | 114 | fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", |
| 109 | "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" | 115 | assertion, function, file, line); |
| 110 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 111 | sdp->sd_fsname, assertion, | ||
| 112 | sdp->sd_fsname, function, file, line); | ||
| 113 | 116 | ||
| 114 | if (sdp->sd_args.ar_debug) | 117 | if (sdp->sd_args.ar_debug) |
| 115 | BUG(); | 118 | BUG(); |
| @@ -138,10 +141,8 @@ int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, | |||
| 138 | { | 141 | { |
| 139 | int rv; | 142 | int rv; |
| 140 | rv = gfs2_lm_withdraw(sdp, | 143 | rv = gfs2_lm_withdraw(sdp, |
| 141 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | 144 | "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", |
| 142 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 145 | function, file, line); |
| 143 | sdp->sd_fsname, | ||
| 144 | sdp->sd_fsname, function, file, line); | ||
| 145 | return rv; | 146 | return rv; |
| 146 | } | 147 | } |
| 147 | 148 | ||
| @@ -157,13 +158,12 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, | |||
| 157 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 158 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 158 | int rv; | 159 | int rv; |
| 159 | rv = gfs2_lm_withdraw(sdp, | 160 | rv = gfs2_lm_withdraw(sdp, |
| 160 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | 161 | "fatal: filesystem consistency error\n" |
| 161 | "GFS2: fsid=%s: inode = %llu %llu\n" | 162 | " inode = %llu %llu\n" |
| 162 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 163 | " function = %s, file = %s, line = %u\n", |
| 163 | sdp->sd_fsname, | 164 | (unsigned long long)ip->i_no_formal_ino, |
| 164 | sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino, | 165 | (unsigned long long)ip->i_no_addr, |
| 165 | (unsigned long long)ip->i_no_addr, | 166 | function, file, line); |
| 166 | sdp->sd_fsname, function, file, line); | ||
| 167 | return rv; | 167 | return rv; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| @@ -179,12 +179,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, | |||
| 179 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 179 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
| 180 | int rv; | 180 | int rv; |
| 181 | rv = gfs2_lm_withdraw(sdp, | 181 | rv = gfs2_lm_withdraw(sdp, |
| 182 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | 182 | "fatal: filesystem consistency error\n" |
| 183 | "GFS2: fsid=%s: RG = %llu\n" | 183 | " RG = %llu\n" |
| 184 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 184 | " function = %s, file = %s, line = %u\n", |
| 185 | sdp->sd_fsname, | 185 | (unsigned long long)rgd->rd_addr, |
| 186 | sdp->sd_fsname, (unsigned long long)rgd->rd_addr, | 186 | function, file, line); |
| 187 | sdp->sd_fsname, function, file, line); | ||
| 188 | return rv; | 187 | return rv; |
| 189 | } | 188 | } |
| 190 | 189 | ||
| @@ -200,12 +199,11 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
| 200 | { | 199 | { |
| 201 | int me; | 200 | int me; |
| 202 | me = gfs2_lm_withdraw(sdp, | 201 | me = gfs2_lm_withdraw(sdp, |
| 203 | "GFS2: fsid=%s: fatal: invalid metadata block\n" | 202 | "fatal: invalid metadata block\n" |
| 204 | "GFS2: fsid=%s: bh = %llu (%s)\n" | 203 | " bh = %llu (%s)\n" |
| 205 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 204 | " function = %s, file = %s, line = %u\n", |
| 206 | sdp->sd_fsname, | 205 | (unsigned long long)bh->b_blocknr, type, |
| 207 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, | 206 | function, file, line); |
| 208 | sdp->sd_fsname, function, file, line); | ||
| 209 | return (me) ? -1 : -2; | 207 | return (me) ? -1 : -2; |
| 210 | } | 208 | } |
| 211 | 209 | ||
| @@ -221,12 +219,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
| 221 | { | 219 | { |
| 222 | int me; | 220 | int me; |
| 223 | me = gfs2_lm_withdraw(sdp, | 221 | me = gfs2_lm_withdraw(sdp, |
| 224 | "GFS2: fsid=%s: fatal: invalid metadata block\n" | 222 | "fatal: invalid metadata block\n" |
| 225 | "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n" | 223 | " bh = %llu (type: exp=%u, found=%u)\n" |
| 226 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 224 | " function = %s, file = %s, line = %u\n", |
| 227 | sdp->sd_fsname, | 225 | (unsigned long long)bh->b_blocknr, type, t, |
| 228 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t, | 226 | function, file, line); |
| 229 | sdp->sd_fsname, function, file, line); | ||
| 230 | return (me) ? -1 : -2; | 227 | return (me) ? -1 : -2; |
| 231 | } | 228 | } |
| 232 | 229 | ||
| @@ -241,10 +238,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, | |||
| 241 | { | 238 | { |
| 242 | int rv; | 239 | int rv; |
| 243 | rv = gfs2_lm_withdraw(sdp, | 240 | rv = gfs2_lm_withdraw(sdp, |
| 244 | "GFS2: fsid=%s: fatal: I/O error\n" | 241 | "fatal: I/O error\n" |
| 245 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 242 | " function = %s, file = %s, line = %u\n", |
| 246 | sdp->sd_fsname, | 243 | function, file, line); |
| 247 | sdp->sd_fsname, function, file, line); | ||
| 248 | return rv; | 244 | return rv; |
| 249 | } | 245 | } |
| 250 | 246 | ||
| @@ -259,12 +255,11 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
| 259 | { | 255 | { |
| 260 | int rv; | 256 | int rv; |
| 261 | rv = gfs2_lm_withdraw(sdp, | 257 | rv = gfs2_lm_withdraw(sdp, |
| 262 | "GFS2: fsid=%s: fatal: I/O error\n" | 258 | "fatal: I/O error\n" |
| 263 | "GFS2: fsid=%s: block = %llu\n" | 259 | " block = %llu\n" |
| 264 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 260 | " function = %s, file = %s, line = %u\n", |
| 265 | sdp->sd_fsname, | 261 | (unsigned long long)bh->b_blocknr, |
| 266 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, | 262 | function, file, line); |
| 267 | sdp->sd_fsname, function, file, line); | ||
| 268 | return rv; | 263 | return rv; |
| 269 | } | 264 | } |
| 270 | 265 | ||
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index b7ffb09b99ea..cbdcbdf39614 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h | |||
| @@ -10,22 +10,23 @@ | |||
| 10 | #ifndef __UTIL_DOT_H__ | 10 | #ifndef __UTIL_DOT_H__ |
| 11 | #define __UTIL_DOT_H__ | 11 | #define __UTIL_DOT_H__ |
| 12 | 12 | ||
| 13 | #ifdef pr_fmt | ||
| 14 | #undef pr_fmt | ||
| 15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 16 | #endif | ||
| 17 | |||
| 13 | #include <linux/mempool.h> | 18 | #include <linux/mempool.h> |
| 14 | 19 | ||
| 15 | #include "incore.h" | 20 | #include "incore.h" |
| 16 | 21 | ||
| 17 | #define fs_printk(level, fs, fmt, arg...) \ | 22 | #define fs_emerg(fs, fmt, ...) \ |
| 18 | printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg) | 23 | pr_emerg("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) |
| 19 | 24 | #define fs_warn(fs, fmt, ...) \ | |
| 20 | #define fs_info(fs, fmt, arg...) \ | 25 | pr_warn("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) |
| 21 | fs_printk(KERN_INFO , fs , fmt , ## arg) | 26 | #define fs_err(fs, fmt, ...) \ |
| 22 | 27 | pr_err("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) | |
| 23 | #define fs_warn(fs, fmt, arg...) \ | 28 | #define fs_info(fs, fmt, ...) \ |
| 24 | fs_printk(KERN_WARNING , fs , fmt , ## arg) | 29 | pr_info("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) |
| 25 | |||
| 26 | #define fs_err(fs, fmt, arg...) \ | ||
| 27 | fs_printk(KERN_ERR, fs , fmt , ## arg) | ||
| 28 | |||
| 29 | 30 | ||
| 30 | void gfs2_assert_i(struct gfs2_sbd *sdp); | 31 | void gfs2_assert_i(struct gfs2_sbd *sdp); |
| 31 | 32 | ||
| @@ -85,7 +86,7 @@ static inline int gfs2_meta_check(struct gfs2_sbd *sdp, | |||
| 85 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; | 86 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; |
| 86 | u32 magic = be32_to_cpu(mh->mh_magic); | 87 | u32 magic = be32_to_cpu(mh->mh_magic); |
| 87 | if (unlikely(magic != GFS2_MAGIC)) { | 88 | if (unlikely(magic != GFS2_MAGIC)) { |
| 88 | printk(KERN_ERR "GFS2: Magic number missing at %llu\n", | 89 | pr_err("Magic number missing at %llu\n", |
| 89 | (unsigned long long)bh->b_blocknr); | 90 | (unsigned long long)bh->b_blocknr); |
| 90 | return -EIO; | 91 | return -EIO; |
| 91 | } | 92 | } |
| @@ -164,7 +165,7 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, | |||
| 164 | #define gfs2_tune_get(sdp, field) \ | 165 | #define gfs2_tune_get(sdp, field) \ |
| 165 | gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) | 166 | gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) |
| 166 | 167 | ||
| 167 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); | 168 | __printf(2, 3) |
| 169 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...); | ||
| 168 | 170 | ||
| 169 | #endif /* __UTIL_DOT_H__ */ | 171 | #endif /* __UTIL_DOT_H__ */ |
| 170 | |||
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 380ab31b5e0f..9e2fecd62f62 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
| @@ -547,7 +547,7 @@ out: | |||
| 547 | 547 | ||
| 548 | void hfs_evict_inode(struct inode *inode) | 548 | void hfs_evict_inode(struct inode *inode) |
| 549 | { | 549 | { |
| 550 | truncate_inode_pages(&inode->i_data, 0); | 550 | truncate_inode_pages_final(&inode->i_data); |
| 551 | clear_inode(inode); | 551 | clear_inode(inode); |
| 552 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { | 552 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { |
| 553 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; | 553 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 2d2039e754cd..eee7206c38d1 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
| @@ -112,6 +112,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 112 | 112 | ||
| 113 | static int hfs_remount(struct super_block *sb, int *flags, char *data) | 113 | static int hfs_remount(struct super_block *sb, int *flags, char *data) |
| 114 | { | 114 | { |
| 115 | sync_filesystem(sb); | ||
| 115 | *flags |= MS_NODIRATIME; | 116 | *flags |= MS_NODIRATIME; |
| 116 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 117 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
| 117 | return 0; | 118 | return 0; |
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index 0f47890299c4..caf89a7be0a1 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | static struct kmem_cache *hfsplus_attr_tree_cachep; | 12 | static struct kmem_cache *hfsplus_attr_tree_cachep; |
| 13 | 13 | ||
| 14 | int hfsplus_create_attr_tree_cache(void) | 14 | int __init hfsplus_create_attr_tree_cache(void) |
| 15 | { | 15 | { |
| 16 | if (hfsplus_attr_tree_cachep) | 16 | if (hfsplus_attr_tree_cachep) |
| 17 | return -EEXIST; | 17 | return -EEXIST; |
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 968ce411db53..32602c667b4a 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
| @@ -103,6 +103,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, | |||
| 103 | folder = &entry->folder; | 103 | folder = &entry->folder; |
| 104 | memset(folder, 0, sizeof(*folder)); | 104 | memset(folder, 0, sizeof(*folder)); |
| 105 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); | 105 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); |
| 106 | if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) | ||
| 107 | folder->flags |= cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT); | ||
| 106 | folder->id = cpu_to_be32(inode->i_ino); | 108 | folder->id = cpu_to_be32(inode->i_ino); |
| 107 | HFSPLUS_I(inode)->create_date = | 109 | HFSPLUS_I(inode)->create_date = |
| 108 | folder->create_date = | 110 | folder->create_date = |
| @@ -203,6 +205,36 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, | |||
| 203 | return hfs_brec_find(fd, hfs_find_rec_by_key); | 205 | return hfs_brec_find(fd, hfs_find_rec_by_key); |
| 204 | } | 206 | } |
| 205 | 207 | ||
| 208 | static void hfsplus_subfolders_inc(struct inode *dir) | ||
| 209 | { | ||
| 210 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); | ||
| 211 | |||
| 212 | if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { | ||
| 213 | /* | ||
| 214 | * Increment subfolder count. Note, the value is only meaningful | ||
| 215 | * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. | ||
| 216 | */ | ||
| 217 | HFSPLUS_I(dir)->subfolders++; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | static void hfsplus_subfolders_dec(struct inode *dir) | ||
| 222 | { | ||
| 223 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); | ||
| 224 | |||
| 225 | if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { | ||
| 226 | /* | ||
| 227 | * Decrement subfolder count. Note, the value is only meaningful | ||
| 228 | * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. | ||
| 229 | * | ||
| 230 | * Check for zero. Some subfolders may have been created | ||
| 231 | * by an implementation ignorant of this counter. | ||
| 232 | */ | ||
| 233 | if (HFSPLUS_I(dir)->subfolders) | ||
| 234 | HFSPLUS_I(dir)->subfolders--; | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 206 | int hfsplus_create_cat(u32 cnid, struct inode *dir, | 238 | int hfsplus_create_cat(u32 cnid, struct inode *dir, |
| 207 | struct qstr *str, struct inode *inode) | 239 | struct qstr *str, struct inode *inode) |
| 208 | { | 240 | { |
| @@ -247,6 +279,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, | |||
| 247 | goto err1; | 279 | goto err1; |
| 248 | 280 | ||
| 249 | dir->i_size++; | 281 | dir->i_size++; |
| 282 | if (S_ISDIR(inode->i_mode)) | ||
| 283 | hfsplus_subfolders_inc(dir); | ||
| 250 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; | 284 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; |
| 251 | hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); | 285 | hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); |
| 252 | 286 | ||
| @@ -336,6 +370,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
| 336 | goto out; | 370 | goto out; |
| 337 | 371 | ||
| 338 | dir->i_size--; | 372 | dir->i_size--; |
| 373 | if (type == HFSPLUS_FOLDER) | ||
| 374 | hfsplus_subfolders_dec(dir); | ||
| 339 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; | 375 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; |
| 340 | hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); | 376 | hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); |
| 341 | 377 | ||
| @@ -380,6 +416,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
| 380 | 416 | ||
| 381 | hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, | 417 | hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, |
| 382 | src_fd.entrylength); | 418 | src_fd.entrylength); |
| 419 | type = be16_to_cpu(entry.type); | ||
| 383 | 420 | ||
| 384 | /* create new dir entry with the data from the old entry */ | 421 | /* create new dir entry with the data from the old entry */ |
| 385 | hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); | 422 | hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); |
| @@ -394,6 +431,8 @@ int hfsplus_rename_cat(u32 cnid, | |||
| 394 | if (err) | 431 | if (err) |
| 395 | goto out; | 432 | goto out; |
| 396 | dst_dir->i_size++; | 433 | dst_dir->i_size++; |
| 434 | if (type == HFSPLUS_FOLDER) | ||
| 435 | hfsplus_subfolders_inc(dst_dir); | ||
| 397 | dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; | 436 | dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; |
| 398 | 437 | ||
| 399 | /* finally remove the old entry */ | 438 | /* finally remove the old entry */ |
| @@ -405,6 +444,8 @@ int hfsplus_rename_cat(u32 cnid, | |||
| 405 | if (err) | 444 | if (err) |
| 406 | goto out; | 445 | goto out; |
| 407 | src_dir->i_size--; | 446 | src_dir->i_size--; |
| 447 | if (type == HFSPLUS_FOLDER) | ||
| 448 | hfsplus_subfolders_dec(src_dir); | ||
| 408 | src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; | 449 | src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; |
| 409 | 450 | ||
| 410 | /* remove old thread entry */ | 451 | /* remove old thread entry */ |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fbb212fbb1ef..a7aafb35b624 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
| @@ -227,10 +227,8 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
| 227 | u32 ablock, dblock, mask; | 227 | u32 ablock, dblock, mask; |
| 228 | sector_t sector; | 228 | sector_t sector; |
| 229 | int was_dirty = 0; | 229 | int was_dirty = 0; |
| 230 | int shift; | ||
| 231 | 230 | ||
| 232 | /* Convert inode block to disk allocation block */ | 231 | /* Convert inode block to disk allocation block */ |
| 233 | shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; | ||
| 234 | ablock = iblock >> sbi->fs_shift; | 232 | ablock = iblock >> sbi->fs_shift; |
| 235 | 233 | ||
| 236 | if (iblock >= hip->fs_blocks) { | 234 | if (iblock >= hip->fs_blocks) { |
| @@ -498,11 +496,13 @@ int hfsplus_file_extend(struct inode *inode) | |||
| 498 | goto insert_extent; | 496 | goto insert_extent; |
| 499 | } | 497 | } |
| 500 | out: | 498 | out: |
| 501 | mutex_unlock(&hip->extents_lock); | ||
| 502 | if (!res) { | 499 | if (!res) { |
| 503 | hip->alloc_blocks += len; | 500 | hip->alloc_blocks += len; |
| 501 | mutex_unlock(&hip->extents_lock); | ||
| 504 | hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); | 502 | hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); |
| 503 | return 0; | ||
| 505 | } | 504 | } |
| 505 | mutex_unlock(&hip->extents_lock); | ||
| 506 | return res; | 506 | return res; |
| 507 | 507 | ||
| 508 | insert_extent: | 508 | insert_extent: |
| @@ -556,11 +556,13 @@ void hfsplus_file_truncate(struct inode *inode) | |||
| 556 | 556 | ||
| 557 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> | 557 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> |
| 558 | HFSPLUS_SB(sb)->alloc_blksz_shift; | 558 | HFSPLUS_SB(sb)->alloc_blksz_shift; |
| 559 | |||
| 560 | mutex_lock(&hip->extents_lock); | ||
| 561 | |||
| 559 | alloc_cnt = hip->alloc_blocks; | 562 | alloc_cnt = hip->alloc_blocks; |
| 560 | if (blk_cnt == alloc_cnt) | 563 | if (blk_cnt == alloc_cnt) |
| 561 | goto out; | 564 | goto out_unlock; |
| 562 | 565 | ||
| 563 | mutex_lock(&hip->extents_lock); | ||
| 564 | res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); | 566 | res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
| 565 | if (res) { | 567 | if (res) { |
| 566 | mutex_unlock(&hip->extents_lock); | 568 | mutex_unlock(&hip->extents_lock); |
| @@ -592,10 +594,10 @@ void hfsplus_file_truncate(struct inode *inode) | |||
| 592 | hfs_brec_remove(&fd); | 594 | hfs_brec_remove(&fd); |
| 593 | } | 595 | } |
| 594 | hfs_find_exit(&fd); | 596 | hfs_find_exit(&fd); |
| 595 | mutex_unlock(&hip->extents_lock); | ||
| 596 | 597 | ||
| 597 | hip->alloc_blocks = blk_cnt; | 598 | hip->alloc_blocks = blk_cnt; |
| 598 | out: | 599 | out_unlock: |
| 600 | mutex_unlock(&hip->extents_lock); | ||
| 599 | hip->phys_size = inode->i_size; | 601 | hip->phys_size = inode->i_size; |
| 600 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> | 602 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> |
| 601 | sb->s_blocksize_bits; | 603 | sb->s_blocksize_bits; |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 08846425b67f..83dc29286b10 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
| @@ -242,6 +242,7 @@ struct hfsplus_inode_info { | |||
| 242 | */ | 242 | */ |
| 243 | sector_t fs_blocks; | 243 | sector_t fs_blocks; |
| 244 | u8 userflags; /* BSD user file flags */ | 244 | u8 userflags; /* BSD user file flags */ |
| 245 | u32 subfolders; /* Subfolder count (HFSX only) */ | ||
| 245 | struct list_head open_dir_list; | 246 | struct list_head open_dir_list; |
| 246 | loff_t phys_size; | 247 | loff_t phys_size; |
| 247 | 248 | ||
| @@ -366,7 +367,7 @@ typedef int (*search_strategy_t)(struct hfs_bnode *, | |||
| 366 | */ | 367 | */ |
| 367 | 368 | ||
| 368 | /* attributes.c */ | 369 | /* attributes.c */ |
| 369 | int hfsplus_create_attr_tree_cache(void); | 370 | int __init hfsplus_create_attr_tree_cache(void); |
| 370 | void hfsplus_destroy_attr_tree_cache(void); | 371 | void hfsplus_destroy_attr_tree_cache(void); |
| 371 | hfsplus_attr_entry *hfsplus_alloc_attr_entry(void); | 372 | hfsplus_attr_entry *hfsplus_alloc_attr_entry(void); |
| 372 | void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p); | 373 | void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p); |
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 8ffb3a8ffe75..5a126828d85e 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h | |||
| @@ -261,7 +261,7 @@ struct hfsplus_cat_folder { | |||
| 261 | struct DInfo user_info; | 261 | struct DInfo user_info; |
| 262 | struct DXInfo finder_info; | 262 | struct DXInfo finder_info; |
| 263 | __be32 text_encoding; | 263 | __be32 text_encoding; |
| 264 | u32 reserved; | 264 | __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */ |
| 265 | } __packed; | 265 | } __packed; |
| 266 | 266 | ||
| 267 | /* HFS file info (stolen from hfs.h) */ | 267 | /* HFS file info (stolen from hfs.h) */ |
| @@ -301,11 +301,13 @@ struct hfsplus_cat_file { | |||
| 301 | struct hfsplus_fork_raw rsrc_fork; | 301 | struct hfsplus_fork_raw rsrc_fork; |
| 302 | } __packed; | 302 | } __packed; |
| 303 | 303 | ||
| 304 | /* File attribute bits */ | 304 | /* File and folder flag bits */ |
| 305 | #define HFSPLUS_FILE_LOCKED 0x0001 | 305 | #define HFSPLUS_FILE_LOCKED 0x0001 |
| 306 | #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 | 306 | #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 |
| 307 | #define HFSPLUS_XATTR_EXISTS 0x0004 | 307 | #define HFSPLUS_XATTR_EXISTS 0x0004 |
| 308 | #define HFSPLUS_ACL_EXISTS 0x0008 | 308 | #define HFSPLUS_ACL_EXISTS 0x0008 |
| 309 | #define HFSPLUS_HAS_FOLDER_COUNT 0x0010 /* Folder has subfolder count | ||
| 310 | * (HFSX only) */ | ||
| 309 | 311 | ||
| 310 | /* HFS+ catalog thread (part of a cat_entry) */ | 312 | /* HFS+ catalog thread (part of a cat_entry) */ |
| 311 | struct hfsplus_cat_thread { | 313 | struct hfsplus_cat_thread { |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index fa929f325f87..a4f45bd88a63 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
| @@ -375,6 +375,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) | |||
| 375 | hip->extent_state = 0; | 375 | hip->extent_state = 0; |
| 376 | hip->flags = 0; | 376 | hip->flags = 0; |
| 377 | hip->userflags = 0; | 377 | hip->userflags = 0; |
| 378 | hip->subfolders = 0; | ||
| 378 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); | 379 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); |
| 379 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); | 380 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
| 380 | hip->alloc_blocks = 0; | 381 | hip->alloc_blocks = 0; |
| @@ -494,6 +495,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
| 494 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); | 495 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); |
| 495 | HFSPLUS_I(inode)->create_date = folder->create_date; | 496 | HFSPLUS_I(inode)->create_date = folder->create_date; |
| 496 | HFSPLUS_I(inode)->fs_blocks = 0; | 497 | HFSPLUS_I(inode)->fs_blocks = 0; |
| 498 | if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { | ||
| 499 | HFSPLUS_I(inode)->subfolders = | ||
| 500 | be32_to_cpu(folder->subfolders); | ||
| 501 | } | ||
| 497 | inode->i_op = &hfsplus_dir_inode_operations; | 502 | inode->i_op = &hfsplus_dir_inode_operations; |
| 498 | inode->i_fop = &hfsplus_dir_operations; | 503 | inode->i_fop = &hfsplus_dir_operations; |
| 499 | } else if (type == HFSPLUS_FILE) { | 504 | } else if (type == HFSPLUS_FILE) { |
| @@ -566,6 +571,10 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
| 566 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); | 571 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); |
| 567 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); | 572 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); |
| 568 | folder->valence = cpu_to_be32(inode->i_size - 2); | 573 | folder->valence = cpu_to_be32(inode->i_size - 2); |
| 574 | if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { | ||
| 575 | folder->subfolders = | ||
| 576 | cpu_to_be32(HFSPLUS_I(inode)->subfolders); | ||
| 577 | } | ||
| 569 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, | 578 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, |
| 570 | sizeof(struct hfsplus_cat_folder)); | 579 | sizeof(struct hfsplus_cat_folder)); |
| 571 | } else if (HFSPLUS_IS_RSRC(inode)) { | 580 | } else if (HFSPLUS_IS_RSRC(inode)) { |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 968eab5bc1f5..68537e8b7a09 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
| @@ -75,7 +75,7 @@ int hfsplus_parse_options_remount(char *input, int *force) | |||
| 75 | int token; | 75 | int token; |
| 76 | 76 | ||
| 77 | if (!input) | 77 | if (!input) |
| 78 | return 0; | 78 | return 1; |
| 79 | 79 | ||
| 80 | while ((p = strsep(&input, ",")) != NULL) { | 80 | while ((p = strsep(&input, ",")) != NULL) { |
| 81 | if (!*p) | 81 | if (!*p) |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 80875aa640ef..a513d2d36be9 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
| @@ -161,7 +161,7 @@ static int hfsplus_write_inode(struct inode *inode, | |||
| 161 | static void hfsplus_evict_inode(struct inode *inode) | 161 | static void hfsplus_evict_inode(struct inode *inode) |
| 162 | { | 162 | { |
| 163 | hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); | 163 | hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); |
| 164 | truncate_inode_pages(&inode->i_data, 0); | 164 | truncate_inode_pages_final(&inode->i_data); |
| 165 | clear_inode(inode); | 165 | clear_inode(inode); |
| 166 | if (HFSPLUS_IS_RSRC(inode)) { | 166 | if (HFSPLUS_IS_RSRC(inode)) { |
| 167 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; | 167 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
| @@ -323,6 +323,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 323 | 323 | ||
| 324 | static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | 324 | static int hfsplus_remount(struct super_block *sb, int *flags, char *data) |
| 325 | { | 325 | { |
| 326 | sync_filesystem(sb); | ||
| 326 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 327 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
| 327 | return 0; | 328 | return 0; |
| 328 | if (!(*flags & MS_RDONLY)) { | 329 | if (!(*flags & MS_RDONLY)) { |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fe649d325b1f..9c470fde9878 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
| @@ -230,7 +230,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) | |||
| 230 | 230 | ||
| 231 | static void hostfs_evict_inode(struct inode *inode) | 231 | static void hostfs_evict_inode(struct inode *inode) |
| 232 | { | 232 | { |
| 233 | truncate_inode_pages(&inode->i_data, 0); | 233 | truncate_inode_pages_final(&inode->i_data); |
| 234 | clear_inode(inode); | 234 | clear_inode(inode); |
| 235 | if (HOSTFS_I(inode)->fd != -1) { | 235 | if (HOSTFS_I(inode)->fd != -1) { |
| 236 | close_file(&HOSTFS_I(inode)->fd); | 236 | close_file(&HOSTFS_I(inode)->fd); |
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 9edeeb0ea97e..50a427313835 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
| @@ -304,7 +304,7 @@ void hpfs_write_if_changed(struct inode *inode) | |||
| 304 | 304 | ||
| 305 | void hpfs_evict_inode(struct inode *inode) | 305 | void hpfs_evict_inode(struct inode *inode) |
| 306 | { | 306 | { |
| 307 | truncate_inode_pages(&inode->i_data, 0); | 307 | truncate_inode_pages_final(&inode->i_data); |
| 308 | clear_inode(inode); | 308 | clear_inode(inode); |
| 309 | if (!inode->i_nlink) { | 309 | if (!inode->i_nlink) { |
| 310 | hpfs_lock(inode->i_sb); | 310 | hpfs_lock(inode->i_sb); |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 4534ff688b76..fe3463a43236 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
| @@ -421,6 +421,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) | |||
| 421 | struct hpfs_sb_info *sbi = hpfs_sb(s); | 421 | struct hpfs_sb_info *sbi = hpfs_sb(s); |
| 422 | char *new_opts = kstrdup(data, GFP_KERNEL); | 422 | char *new_opts = kstrdup(data, GFP_KERNEL); |
| 423 | 423 | ||
| 424 | sync_filesystem(s); | ||
| 425 | |||
| 424 | *flags |= MS_NOATIME; | 426 | *flags |= MS_NOATIME; |
| 425 | 427 | ||
| 426 | hpfs_lock(s); | 428 | hpfs_lock(s); |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d19b30ababf1..204027520937 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -366,7 +366,13 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) | |||
| 366 | 366 | ||
| 367 | static void hugetlbfs_evict_inode(struct inode *inode) | 367 | static void hugetlbfs_evict_inode(struct inode *inode) |
| 368 | { | 368 | { |
| 369 | struct resv_map *resv_map; | ||
| 370 | |||
| 369 | truncate_hugepages(inode, 0); | 371 | truncate_hugepages(inode, 0); |
| 372 | resv_map = (struct resv_map *)inode->i_mapping->private_data; | ||
| 373 | /* root inode doesn't have the resv_map, so we should check it */ | ||
| 374 | if (resv_map) | ||
| 375 | resv_map_release(&resv_map->refs); | ||
| 370 | clear_inode(inode); | 376 | clear_inode(inode); |
| 371 | } | 377 | } |
| 372 | 378 | ||
| @@ -476,6 +482,11 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
| 476 | umode_t mode, dev_t dev) | 482 | umode_t mode, dev_t dev) |
| 477 | { | 483 | { |
| 478 | struct inode *inode; | 484 | struct inode *inode; |
| 485 | struct resv_map *resv_map; | ||
| 486 | |||
| 487 | resv_map = resv_map_alloc(); | ||
| 488 | if (!resv_map) | ||
| 489 | return NULL; | ||
| 479 | 490 | ||
| 480 | inode = new_inode(sb); | 491 | inode = new_inode(sb); |
| 481 | if (inode) { | 492 | if (inode) { |
| @@ -487,7 +498,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
| 487 | inode->i_mapping->a_ops = &hugetlbfs_aops; | 498 | inode->i_mapping->a_ops = &hugetlbfs_aops; |
| 488 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; | 499 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; |
| 489 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 500 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 490 | INIT_LIST_HEAD(&inode->i_mapping->private_list); | 501 | inode->i_mapping->private_data = resv_map; |
| 491 | info = HUGETLBFS_I(inode); | 502 | info = HUGETLBFS_I(inode); |
| 492 | /* | 503 | /* |
| 493 | * The policy is initialized here even if we are creating a | 504 | * The policy is initialized here even if we are creating a |
| @@ -517,7 +528,9 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
| 517 | break; | 528 | break; |
| 518 | } | 529 | } |
| 519 | lockdep_annotate_inode_mutex_key(inode); | 530 | lockdep_annotate_inode_mutex_key(inode); |
| 520 | } | 531 | } else |
| 532 | kref_put(&resv_map->refs, resv_map_release); | ||
| 533 | |||
| 521 | return inode; | 534 | return inode; |
| 522 | } | 535 | } |
| 523 | 536 | ||
diff --git a/fs/inode.c b/fs/inode.c index 4bcdad3c9361..f96d2a6f88cc 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -503,6 +503,7 @@ void clear_inode(struct inode *inode) | |||
| 503 | */ | 503 | */ |
| 504 | spin_lock_irq(&inode->i_data.tree_lock); | 504 | spin_lock_irq(&inode->i_data.tree_lock); |
| 505 | BUG_ON(inode->i_data.nrpages); | 505 | BUG_ON(inode->i_data.nrpages); |
| 506 | BUG_ON(inode->i_data.nrshadows); | ||
| 506 | spin_unlock_irq(&inode->i_data.tree_lock); | 507 | spin_unlock_irq(&inode->i_data.tree_lock); |
| 507 | BUG_ON(!list_empty(&inode->i_data.private_list)); | 508 | BUG_ON(!list_empty(&inode->i_data.private_list)); |
| 508 | BUG_ON(!(inode->i_state & I_FREEING)); | 509 | BUG_ON(!(inode->i_state & I_FREEING)); |
| @@ -548,8 +549,7 @@ static void evict(struct inode *inode) | |||
| 548 | if (op->evict_inode) { | 549 | if (op->evict_inode) { |
| 549 | op->evict_inode(inode); | 550 | op->evict_inode(inode); |
| 550 | } else { | 551 | } else { |
| 551 | if (inode->i_data.nrpages) | 552 | truncate_inode_pages_final(&inode->i_data); |
| 552 | truncate_inode_pages(&inode->i_data, 0); | ||
| 553 | clear_inode(inode); | 553 | clear_inode(inode); |
| 554 | } | 554 | } |
| 555 | if (S_ISBLK(inode->i_mode) && inode->i_bdev) | 555 | if (S_ISBLK(inode->i_mode) && inode->i_bdev) |
| @@ -944,24 +944,22 @@ EXPORT_SYMBOL(unlock_new_inode); | |||
| 944 | 944 | ||
| 945 | /** | 945 | /** |
| 946 | * lock_two_nondirectories - take two i_mutexes on non-directory objects | 946 | * lock_two_nondirectories - take two i_mutexes on non-directory objects |
| 947 | * | ||
| 948 | * Lock any non-NULL argument that is not a directory. | ||
| 949 | * Zero, one or two objects may be locked by this function. | ||
| 950 | * | ||
| 947 | * @inode1: first inode to lock | 951 | * @inode1: first inode to lock |
| 948 | * @inode2: second inode to lock | 952 | * @inode2: second inode to lock |
| 949 | */ | 953 | */ |
| 950 | void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) | 954 | void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) |
| 951 | { | 955 | { |
| 952 | WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); | 956 | if (inode1 > inode2) |
| 953 | if (inode1 == inode2 || !inode2) { | 957 | swap(inode1, inode2); |
| 954 | mutex_lock(&inode1->i_mutex); | 958 | |
| 955 | return; | 959 | if (inode1 && !S_ISDIR(inode1->i_mode)) |
| 956 | } | ||
| 957 | WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); | ||
| 958 | if (inode1 < inode2) { | ||
| 959 | mutex_lock(&inode1->i_mutex); | 960 | mutex_lock(&inode1->i_mutex); |
| 961 | if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1) | ||
| 960 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2); | 962 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2); |
| 961 | } else { | ||
| 962 | mutex_lock(&inode2->i_mutex); | ||
| 963 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2); | ||
| 964 | } | ||
| 965 | } | 963 | } |
| 966 | EXPORT_SYMBOL(lock_two_nondirectories); | 964 | EXPORT_SYMBOL(lock_two_nondirectories); |
| 967 | 965 | ||
| @@ -972,8 +970,9 @@ EXPORT_SYMBOL(lock_two_nondirectories); | |||
| 972 | */ | 970 | */ |
| 973 | void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) | 971 | void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) |
| 974 | { | 972 | { |
| 975 | mutex_unlock(&inode1->i_mutex); | 973 | if (inode1 && !S_ISDIR(inode1->i_mode)) |
| 976 | if (inode2 && inode2 != inode1) | 974 | mutex_unlock(&inode1->i_mutex); |
| 975 | if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1) | ||
| 977 | mutex_unlock(&inode2->i_mutex); | 976 | mutex_unlock(&inode2->i_mutex); |
| 978 | } | 977 | } |
| 979 | EXPORT_SYMBOL(unlock_two_nondirectories); | 978 | EXPORT_SYMBOL(unlock_two_nondirectories); |
| @@ -1899,3 +1898,34 @@ void inode_dio_done(struct inode *inode) | |||
| 1899 | wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); | 1898 | wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); |
| 1900 | } | 1899 | } |
| 1901 | EXPORT_SYMBOL(inode_dio_done); | 1900 | EXPORT_SYMBOL(inode_dio_done); |
| 1901 | |||
| 1902 | /* | ||
| 1903 | * inode_set_flags - atomically set some inode flags | ||
| 1904 | * | ||
| 1905 | * Note: the caller should be holding i_mutex, or else be sure that | ||
| 1906 | * they have exclusive access to the inode structure (i.e., while the | ||
| 1907 | * inode is being instantiated). The reason for the cmpxchg() loop | ||
| 1908 | * --- which wouldn't be necessary if all code paths which modify | ||
| 1909 | * i_flags actually followed this rule, is that there is at least one | ||
| 1910 | * code path which doesn't today --- for example, | ||
| 1911 | * __generic_file_aio_write() calls file_remove_suid() without holding | ||
| 1912 | * i_mutex --- so we use cmpxchg() out of an abundance of caution. | ||
| 1913 | * | ||
| 1914 | * In the long run, i_mutex is overkill, and we should probably look | ||
| 1915 | * at using the i_lock spinlock to protect i_flags, and then make sure | ||
| 1916 | * it is so documented in include/linux/fs.h and that all code follows | ||
| 1917 | * the locking convention!! | ||
| 1918 | */ | ||
| 1919 | void inode_set_flags(struct inode *inode, unsigned int flags, | ||
| 1920 | unsigned int mask) | ||
| 1921 | { | ||
| 1922 | unsigned int old_flags, new_flags; | ||
| 1923 | |||
| 1924 | WARN_ON_ONCE(flags & ~mask); | ||
| 1925 | do { | ||
| 1926 | old_flags = ACCESS_ONCE(inode->i_flags); | ||
| 1927 | new_flags = (old_flags & ~mask) | flags; | ||
| 1928 | } while (unlikely(cmpxchg(&inode->i_flags, old_flags, | ||
| 1929 | new_flags) != old_flags)); | ||
| 1930 | } | ||
| 1931 | EXPORT_SYMBOL(inode_set_flags); | ||
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4a9e10ea13f2..6af66ee56390 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
| @@ -117,6 +117,7 @@ static void destroy_inodecache(void) | |||
| 117 | 117 | ||
| 118 | static int isofs_remount(struct super_block *sb, int *flags, char *data) | 118 | static int isofs_remount(struct super_block *sb, int *flags, char *data) |
| 119 | { | 119 | { |
| 120 | sync_filesystem(sb); | ||
| 120 | if (!(*flags & MS_RDONLY)) | 121 | if (!(*flags & MS_RDONLY)) |
| 121 | return -EROFS; | 122 | return -EROFS; |
| 122 | return 0; | 123 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index cf2fc0594063..5f26139a165a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -555,7 +555,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 555 | blk_start_plug(&plug); | 555 | blk_start_plug(&plug); |
| 556 | jbd2_journal_write_revoke_records(journal, commit_transaction, | 556 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
| 557 | &log_bufs, WRITE_SYNC); | 557 | &log_bufs, WRITE_SYNC); |
| 558 | blk_finish_plug(&plug); | ||
| 559 | 558 | ||
| 560 | jbd_debug(3, "JBD2: commit phase 2b\n"); | 559 | jbd_debug(3, "JBD2: commit phase 2b\n"); |
| 561 | 560 | ||
| @@ -582,7 +581,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 582 | err = 0; | 581 | err = 0; |
| 583 | bufs = 0; | 582 | bufs = 0; |
| 584 | descriptor = NULL; | 583 | descriptor = NULL; |
| 585 | blk_start_plug(&plug); | ||
| 586 | while (commit_transaction->t_buffers) { | 584 | while (commit_transaction->t_buffers) { |
| 587 | 585 | ||
| 588 | /* Find the next buffer to be journaled... */ | 586 | /* Find the next buffer to be journaled... */ |
| @@ -1067,6 +1065,25 @@ restart_loop: | |||
| 1067 | goto restart_loop; | 1065 | goto restart_loop; |
| 1068 | } | 1066 | } |
| 1069 | 1067 | ||
| 1068 | /* Add the transaction to the checkpoint list | ||
| 1069 | * __journal_remove_checkpoint() can not destroy transaction | ||
| 1070 | * under us because it is not marked as T_FINISHED yet */ | ||
| 1071 | if (journal->j_checkpoint_transactions == NULL) { | ||
| 1072 | journal->j_checkpoint_transactions = commit_transaction; | ||
| 1073 | commit_transaction->t_cpnext = commit_transaction; | ||
| 1074 | commit_transaction->t_cpprev = commit_transaction; | ||
| 1075 | } else { | ||
| 1076 | commit_transaction->t_cpnext = | ||
| 1077 | journal->j_checkpoint_transactions; | ||
| 1078 | commit_transaction->t_cpprev = | ||
| 1079 | commit_transaction->t_cpnext->t_cpprev; | ||
| 1080 | commit_transaction->t_cpnext->t_cpprev = | ||
| 1081 | commit_transaction; | ||
| 1082 | commit_transaction->t_cpprev->t_cpnext = | ||
| 1083 | commit_transaction; | ||
| 1084 | } | ||
| 1085 | spin_unlock(&journal->j_list_lock); | ||
| 1086 | |||
| 1070 | /* Done with this transaction! */ | 1087 | /* Done with this transaction! */ |
| 1071 | 1088 | ||
| 1072 | jbd_debug(3, "JBD2: commit phase 7\n"); | 1089 | jbd_debug(3, "JBD2: commit phase 7\n"); |
| @@ -1085,24 +1102,7 @@ restart_loop: | |||
| 1085 | atomic_read(&commit_transaction->t_handle_count); | 1102 | atomic_read(&commit_transaction->t_handle_count); |
| 1086 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, | 1103 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, |
| 1087 | commit_transaction->t_tid, &stats.run); | 1104 | commit_transaction->t_tid, &stats.run); |
| 1088 | 1105 | stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0; | |
| 1089 | /* | ||
| 1090 | * Calculate overall stats | ||
| 1091 | */ | ||
| 1092 | spin_lock(&journal->j_history_lock); | ||
| 1093 | journal->j_stats.ts_tid++; | ||
| 1094 | if (commit_transaction->t_requested) | ||
| 1095 | journal->j_stats.ts_requested++; | ||
| 1096 | journal->j_stats.run.rs_wait += stats.run.rs_wait; | ||
| 1097 | journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; | ||
| 1098 | journal->j_stats.run.rs_running += stats.run.rs_running; | ||
| 1099 | journal->j_stats.run.rs_locked += stats.run.rs_locked; | ||
| 1100 | journal->j_stats.run.rs_flushing += stats.run.rs_flushing; | ||
| 1101 | journal->j_stats.run.rs_logging += stats.run.rs_logging; | ||
| 1102 | journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; | ||
| 1103 | journal->j_stats.run.rs_blocks += stats.run.rs_blocks; | ||
| 1104 | journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; | ||
| 1105 | spin_unlock(&journal->j_history_lock); | ||
| 1106 | 1106 | ||
| 1107 | commit_transaction->t_state = T_COMMIT_CALLBACK; | 1107 | commit_transaction->t_state = T_COMMIT_CALLBACK; |
| 1108 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 1108 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
| @@ -1122,24 +1122,6 @@ restart_loop: | |||
| 1122 | 1122 | ||
| 1123 | write_unlock(&journal->j_state_lock); | 1123 | write_unlock(&journal->j_state_lock); |
| 1124 | 1124 | ||
| 1125 | if (journal->j_checkpoint_transactions == NULL) { | ||
| 1126 | journal->j_checkpoint_transactions = commit_transaction; | ||
| 1127 | commit_transaction->t_cpnext = commit_transaction; | ||
| 1128 | commit_transaction->t_cpprev = commit_transaction; | ||
| 1129 | } else { | ||
| 1130 | commit_transaction->t_cpnext = | ||
| 1131 | journal->j_checkpoint_transactions; | ||
| 1132 | commit_transaction->t_cpprev = | ||
| 1133 | commit_transaction->t_cpnext->t_cpprev; | ||
| 1134 | commit_transaction->t_cpnext->t_cpprev = | ||
| 1135 | commit_transaction; | ||
| 1136 | commit_transaction->t_cpprev->t_cpnext = | ||
| 1137 | commit_transaction; | ||
| 1138 | } | ||
| 1139 | spin_unlock(&journal->j_list_lock); | ||
| 1140 | /* Drop all spin_locks because commit_callback may be block. | ||
| 1141 | * __journal_remove_checkpoint() can not destroy transaction | ||
| 1142 | * under us because it is not marked as T_FINISHED yet */ | ||
| 1143 | if (journal->j_commit_callback) | 1125 | if (journal->j_commit_callback) |
| 1144 | journal->j_commit_callback(journal, commit_transaction); | 1126 | journal->j_commit_callback(journal, commit_transaction); |
| 1145 | 1127 | ||
| @@ -1150,7 +1132,7 @@ restart_loop: | |||
| 1150 | write_lock(&journal->j_state_lock); | 1132 | write_lock(&journal->j_state_lock); |
| 1151 | spin_lock(&journal->j_list_lock); | 1133 | spin_lock(&journal->j_list_lock); |
| 1152 | commit_transaction->t_state = T_FINISHED; | 1134 | commit_transaction->t_state = T_FINISHED; |
| 1153 | /* Recheck checkpoint lists after j_list_lock was dropped */ | 1135 | /* Check if the transaction can be dropped now that we are finished */ |
| 1154 | if (commit_transaction->t_checkpoint_list == NULL && | 1136 | if (commit_transaction->t_checkpoint_list == NULL && |
| 1155 | commit_transaction->t_checkpoint_io_list == NULL) { | 1137 | commit_transaction->t_checkpoint_io_list == NULL) { |
| 1156 | __jbd2_journal_drop_transaction(journal, commit_transaction); | 1138 | __jbd2_journal_drop_transaction(journal, commit_transaction); |
| @@ -1159,4 +1141,21 @@ restart_loop: | |||
| 1159 | spin_unlock(&journal->j_list_lock); | 1141 | spin_unlock(&journal->j_list_lock); |
| 1160 | write_unlock(&journal->j_state_lock); | 1142 | write_unlock(&journal->j_state_lock); |
| 1161 | wake_up(&journal->j_wait_done_commit); | 1143 | wake_up(&journal->j_wait_done_commit); |
| 1144 | |||
| 1145 | /* | ||
| 1146 | * Calculate overall stats | ||
| 1147 | */ | ||
| 1148 | spin_lock(&journal->j_history_lock); | ||
| 1149 | journal->j_stats.ts_tid++; | ||
| 1150 | journal->j_stats.ts_requested += stats.ts_requested; | ||
| 1151 | journal->j_stats.run.rs_wait += stats.run.rs_wait; | ||
| 1152 | journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; | ||
| 1153 | journal->j_stats.run.rs_running += stats.run.rs_running; | ||
| 1154 | journal->j_stats.run.rs_locked += stats.run.rs_locked; | ||
| 1155 | journal->j_stats.run.rs_flushing += stats.run.rs_flushing; | ||
| 1156 | journal->j_stats.run.rs_logging += stats.run.rs_logging; | ||
| 1157 | journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; | ||
| 1158 | journal->j_stats.run.rs_blocks += stats.run.rs_blocks; | ||
| 1159 | journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; | ||
| 1160 | spin_unlock(&journal->j_history_lock); | ||
| 1162 | } | 1161 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5fa344afb49a..67b8e303946c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -122,7 +122,7 @@ EXPORT_SYMBOL(__jbd2_debug); | |||
| 122 | #endif | 122 | #endif |
| 123 | 123 | ||
| 124 | /* Checksumming functions */ | 124 | /* Checksumming functions */ |
| 125 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | 125 | static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) |
| 126 | { | 126 | { |
| 127 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 127 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 128 | return 1; | 128 | return 1; |
| @@ -143,7 +143,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) | |||
| 143 | return cpu_to_be32(csum); | 143 | return cpu_to_be32(csum); |
| 144 | } | 144 | } |
| 145 | 145 | ||
| 146 | int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) | 146 | static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) |
| 147 | { | 147 | { |
| 148 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 148 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 149 | return 1; | 149 | return 1; |
| @@ -151,7 +151,7 @@ int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) | |||
| 151 | return sb->s_checksum == jbd2_superblock_csum(j, sb); | 151 | return sb->s_checksum == jbd2_superblock_csum(j, sb); |
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) | 154 | static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) |
| 155 | { | 155 | { |
| 156 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 156 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 157 | return; | 157 | return; |
| @@ -302,8 +302,8 @@ static void journal_kill_thread(journal_t *journal) | |||
| 302 | journal->j_flags |= JBD2_UNMOUNT; | 302 | journal->j_flags |= JBD2_UNMOUNT; |
| 303 | 303 | ||
| 304 | while (journal->j_task) { | 304 | while (journal->j_task) { |
| 305 | wake_up(&journal->j_wait_commit); | ||
| 306 | write_unlock(&journal->j_state_lock); | 305 | write_unlock(&journal->j_state_lock); |
| 306 | wake_up(&journal->j_wait_commit); | ||
| 307 | wait_event(journal->j_wait_done_commit, journal->j_task == NULL); | 307 | wait_event(journal->j_wait_done_commit, journal->j_task == NULL); |
| 308 | write_lock(&journal->j_state_lock); | 308 | write_lock(&journal->j_state_lock); |
| 309 | } | 309 | } |
| @@ -710,8 +710,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
| 710 | while (tid_gt(tid, journal->j_commit_sequence)) { | 710 | while (tid_gt(tid, journal->j_commit_sequence)) { |
| 711 | jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", | 711 | jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", |
| 712 | tid, journal->j_commit_sequence); | 712 | tid, journal->j_commit_sequence); |
| 713 | wake_up(&journal->j_wait_commit); | ||
| 714 | read_unlock(&journal->j_state_lock); | 713 | read_unlock(&journal->j_state_lock); |
| 714 | wake_up(&journal->j_wait_commit); | ||
| 715 | wait_event(journal->j_wait_done_commit, | 715 | wait_event(journal->j_wait_done_commit, |
| 716 | !tid_gt(tid, journal->j_commit_sequence)); | 716 | !tid_gt(tid, journal->j_commit_sequence)); |
| 717 | read_lock(&journal->j_state_lock); | 717 | read_lock(&journal->j_state_lock); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 60bb365f54a5..38cfcf5f6fce 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -1073,7 +1073,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
| 1073 | * reused here. | 1073 | * reused here. |
| 1074 | */ | 1074 | */ |
| 1075 | jbd_lock_bh_state(bh); | 1075 | jbd_lock_bh_state(bh); |
| 1076 | spin_lock(&journal->j_list_lock); | ||
| 1077 | J_ASSERT_JH(jh, (jh->b_transaction == transaction || | 1076 | J_ASSERT_JH(jh, (jh->b_transaction == transaction || |
| 1078 | jh->b_transaction == NULL || | 1077 | jh->b_transaction == NULL || |
| 1079 | (jh->b_transaction == journal->j_committing_transaction && | 1078 | (jh->b_transaction == journal->j_committing_transaction && |
| @@ -1096,12 +1095,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
| 1096 | jh->b_modified = 0; | 1095 | jh->b_modified = 0; |
| 1097 | 1096 | ||
| 1098 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 1097 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
| 1098 | spin_lock(&journal->j_list_lock); | ||
| 1099 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); | 1099 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); |
| 1100 | } else if (jh->b_transaction == journal->j_committing_transaction) { | 1100 | } else if (jh->b_transaction == journal->j_committing_transaction) { |
| 1101 | /* first access by this transaction */ | 1101 | /* first access by this transaction */ |
| 1102 | jh->b_modified = 0; | 1102 | jh->b_modified = 0; |
| 1103 | 1103 | ||
| 1104 | JBUFFER_TRACE(jh, "set next transaction"); | 1104 | JBUFFER_TRACE(jh, "set next transaction"); |
| 1105 | spin_lock(&journal->j_list_lock); | ||
| 1105 | jh->b_next_transaction = transaction; | 1106 | jh->b_next_transaction = transaction; |
| 1106 | } | 1107 | } |
| 1107 | spin_unlock(&journal->j_list_lock); | 1108 | spin_unlock(&journal->j_list_lock); |
| @@ -1312,7 +1313,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1312 | journal->j_running_transaction)) { | 1313 | journal->j_running_transaction)) { |
| 1313 | printk(KERN_ERR "JBD2: %s: " | 1314 | printk(KERN_ERR "JBD2: %s: " |
| 1314 | "jh->b_transaction (%llu, %p, %u) != " | 1315 | "jh->b_transaction (%llu, %p, %u) != " |
| 1315 | "journal->j_running_transaction (%p, %u)", | 1316 | "journal->j_running_transaction (%p, %u)\n", |
| 1316 | journal->j_devname, | 1317 | journal->j_devname, |
| 1317 | (unsigned long long) bh->b_blocknr, | 1318 | (unsigned long long) bh->b_blocknr, |
| 1318 | jh->b_transaction, | 1319 | jh->b_transaction, |
| @@ -1335,30 +1336,25 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1335 | */ | 1336 | */ |
| 1336 | if (jh->b_transaction != transaction) { | 1337 | if (jh->b_transaction != transaction) { |
| 1337 | JBUFFER_TRACE(jh, "already on other transaction"); | 1338 | JBUFFER_TRACE(jh, "already on other transaction"); |
| 1338 | if (unlikely(jh->b_transaction != | 1339 | if (unlikely(((jh->b_transaction != |
| 1339 | journal->j_committing_transaction)) { | 1340 | journal->j_committing_transaction)) || |
| 1340 | printk(KERN_ERR "JBD2: %s: " | 1341 | (jh->b_next_transaction != transaction))) { |
| 1341 | "jh->b_transaction (%llu, %p, %u) != " | 1342 | printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: " |
| 1342 | "journal->j_committing_transaction (%p, %u)", | 1343 | "bad jh for block %llu: " |
| 1344 | "transaction (%p, %u), " | ||
| 1345 | "jh->b_transaction (%p, %u), " | ||
| 1346 | "jh->b_next_transaction (%p, %u), jlist %u\n", | ||
| 1343 | journal->j_devname, | 1347 | journal->j_devname, |
| 1344 | (unsigned long long) bh->b_blocknr, | 1348 | (unsigned long long) bh->b_blocknr, |
| 1349 | transaction, transaction->t_tid, | ||
| 1345 | jh->b_transaction, | 1350 | jh->b_transaction, |
| 1346 | jh->b_transaction ? jh->b_transaction->t_tid : 0, | 1351 | jh->b_transaction ? |
| 1347 | journal->j_committing_transaction, | 1352 | jh->b_transaction->t_tid : 0, |
| 1348 | journal->j_committing_transaction ? | ||
| 1349 | journal->j_committing_transaction->t_tid : 0); | ||
| 1350 | ret = -EINVAL; | ||
| 1351 | } | ||
| 1352 | if (unlikely(jh->b_next_transaction != transaction)) { | ||
| 1353 | printk(KERN_ERR "JBD2: %s: " | ||
| 1354 | "jh->b_next_transaction (%llu, %p, %u) != " | ||
| 1355 | "transaction (%p, %u)", | ||
| 1356 | journal->j_devname, | ||
| 1357 | (unsigned long long) bh->b_blocknr, | ||
| 1358 | jh->b_next_transaction, | 1353 | jh->b_next_transaction, |
| 1359 | jh->b_next_transaction ? | 1354 | jh->b_next_transaction ? |
| 1360 | jh->b_next_transaction->t_tid : 0, | 1355 | jh->b_next_transaction->t_tid : 0, |
| 1361 | transaction, transaction->t_tid); | 1356 | jh->b_jlist); |
| 1357 | WARN_ON(1); | ||
| 1362 | ret = -EINVAL; | 1358 | ret = -EINVAL; |
| 1363 | } | 1359 | } |
| 1364 | /* And this case is illegal: we can't reuse another | 1360 | /* And this case is illegal: we can't reuse another |
| @@ -1415,7 +1411,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
| 1415 | BUFFER_TRACE(bh, "entry"); | 1411 | BUFFER_TRACE(bh, "entry"); |
| 1416 | 1412 | ||
| 1417 | jbd_lock_bh_state(bh); | 1413 | jbd_lock_bh_state(bh); |
| 1418 | spin_lock(&journal->j_list_lock); | ||
| 1419 | 1414 | ||
| 1420 | if (!buffer_jbd(bh)) | 1415 | if (!buffer_jbd(bh)) |
| 1421 | goto not_jbd; | 1416 | goto not_jbd; |
| @@ -1468,6 +1463,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
| 1468 | * we know to remove the checkpoint after we commit. | 1463 | * we know to remove the checkpoint after we commit. |
| 1469 | */ | 1464 | */ |
| 1470 | 1465 | ||
| 1466 | spin_lock(&journal->j_list_lock); | ||
| 1471 | if (jh->b_cp_transaction) { | 1467 | if (jh->b_cp_transaction) { |
| 1472 | __jbd2_journal_temp_unlink_buffer(jh); | 1468 | __jbd2_journal_temp_unlink_buffer(jh); |
| 1473 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); | 1469 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |
| @@ -1480,6 +1476,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
| 1480 | goto drop; | 1476 | goto drop; |
| 1481 | } | 1477 | } |
| 1482 | } | 1478 | } |
| 1479 | spin_unlock(&journal->j_list_lock); | ||
| 1483 | } else if (jh->b_transaction) { | 1480 | } else if (jh->b_transaction) { |
| 1484 | J_ASSERT_JH(jh, (jh->b_transaction == | 1481 | J_ASSERT_JH(jh, (jh->b_transaction == |
| 1485 | journal->j_committing_transaction)); | 1482 | journal->j_committing_transaction)); |
| @@ -1491,7 +1488,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
| 1491 | 1488 | ||
| 1492 | if (jh->b_next_transaction) { | 1489 | if (jh->b_next_transaction) { |
| 1493 | J_ASSERT(jh->b_next_transaction == transaction); | 1490 | J_ASSERT(jh->b_next_transaction == transaction); |
| 1491 | spin_lock(&journal->j_list_lock); | ||
| 1494 | jh->b_next_transaction = NULL; | 1492 | jh->b_next_transaction = NULL; |
| 1493 | spin_unlock(&journal->j_list_lock); | ||
| 1495 | 1494 | ||
| 1496 | /* | 1495 | /* |
| 1497 | * only drop a reference if this transaction modified | 1496 | * only drop a reference if this transaction modified |
| @@ -1503,7 +1502,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
| 1503 | } | 1502 | } |
| 1504 | 1503 | ||
| 1505 | not_jbd: | 1504 | not_jbd: |
| 1506 | spin_unlock(&journal->j_list_lock); | ||
| 1507 | jbd_unlock_bh_state(bh); | 1505 | jbd_unlock_bh_state(bh); |
| 1508 | __brelse(bh); | 1506 | __brelse(bh); |
| 1509 | drop: | 1507 | drop: |
| @@ -1821,11 +1819,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1821 | if (buffer_locked(bh) || buffer_dirty(bh)) | 1819 | if (buffer_locked(bh) || buffer_dirty(bh)) |
| 1822 | goto out; | 1820 | goto out; |
| 1823 | 1821 | ||
| 1824 | if (jh->b_next_transaction != NULL) | 1822 | if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) |
| 1825 | goto out; | 1823 | goto out; |
| 1826 | 1824 | ||
| 1827 | spin_lock(&journal->j_list_lock); | 1825 | spin_lock(&journal->j_list_lock); |
| 1828 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1826 | if (jh->b_cp_transaction != NULL) { |
| 1829 | /* written-back checkpointed metadata buffer */ | 1827 | /* written-back checkpointed metadata buffer */ |
| 1830 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1828 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
| 1831 | __jbd2_journal_remove_checkpoint(jh); | 1829 | __jbd2_journal_remove_checkpoint(jh); |
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c index 16a5047903a6..406d9cc84ba8 100644 --- a/fs/jffs2/compr_rtime.c +++ b/fs/jffs2/compr_rtime.c | |||
| @@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in, | |||
| 33 | unsigned char *cpage_out, | 33 | unsigned char *cpage_out, |
| 34 | uint32_t *sourcelen, uint32_t *dstlen) | 34 | uint32_t *sourcelen, uint32_t *dstlen) |
| 35 | { | 35 | { |
| 36 | short positions[256]; | 36 | unsigned short positions[256]; |
| 37 | int outpos = 0; | 37 | int outpos = 0; |
| 38 | int pos=0; | 38 | int pos=0; |
| 39 | 39 | ||
| @@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in, | |||
| 74 | unsigned char *cpage_out, | 74 | unsigned char *cpage_out, |
| 75 | uint32_t srclen, uint32_t destlen) | 75 | uint32_t srclen, uint32_t destlen) |
| 76 | { | 76 | { |
| 77 | short positions[256]; | 77 | unsigned short positions[256]; |
| 78 | int outpos = 0; | 78 | int outpos = 0; |
| 79 | int pos=0; | 79 | int pos=0; |
| 80 | 80 | ||
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index a69e426435dd..601afd1afddf 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
| @@ -242,7 +242,7 @@ void jffs2_evict_inode (struct inode *inode) | |||
| 242 | 242 | ||
| 243 | jffs2_dbg(1, "%s(): ino #%lu mode %o\n", | 243 | jffs2_dbg(1, "%s(): ino #%lu mode %o\n", |
| 244 | __func__, inode->i_ino, inode->i_mode); | 244 | __func__, inode->i_ino, inode->i_mode); |
| 245 | truncate_inode_pages(&inode->i_data, 0); | 245 | truncate_inode_pages_final(&inode->i_data); |
| 246 | clear_inode(inode); | 246 | clear_inode(inode); |
| 247 | jffs2_do_clear_inode(c, f); | 247 | jffs2_do_clear_inode(c, f); |
| 248 | } | 248 | } |
| @@ -457,12 +457,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r | |||
| 457 | The umask is only applied if there's no default ACL */ | 457 | The umask is only applied if there's no default ACL */ |
| 458 | ret = jffs2_init_acl_pre(dir_i, inode, &mode); | 458 | ret = jffs2_init_acl_pre(dir_i, inode, &mode); |
| 459 | if (ret) { | 459 | if (ret) { |
| 460 | make_bad_inode(inode); | 460 | mutex_unlock(&f->sem); |
| 461 | iput(inode); | 461 | make_bad_inode(inode); |
| 462 | return ERR_PTR(ret); | 462 | iput(inode); |
| 463 | return ERR_PTR(ret); | ||
| 463 | } | 464 | } |
| 464 | ret = jffs2_do_new_inode (c, f, mode, ri); | 465 | ret = jffs2_do_new_inode (c, f, mode, ri); |
| 465 | if (ret) { | 466 | if (ret) { |
| 467 | mutex_unlock(&f->sem); | ||
| 466 | make_bad_inode(inode); | 468 | make_bad_inode(inode); |
| 467 | iput(inode); | 469 | iput(inode); |
| 468 | return ERR_PTR(ret); | 470 | return ERR_PTR(ret); |
| @@ -479,6 +481,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r | |||
| 479 | inode->i_size = 0; | 481 | inode->i_size = 0; |
| 480 | 482 | ||
| 481 | if (insert_inode_locked(inode) < 0) { | 483 | if (insert_inode_locked(inode) < 0) { |
| 484 | mutex_unlock(&f->sem); | ||
| 482 | make_bad_inode(inode); | 485 | make_bad_inode(inode); |
| 483 | iput(inode); | 486 | iput(inode); |
| 484 | return ERR_PTR(-EINVAL); | 487 | return ERR_PTR(-EINVAL); |
| @@ -687,7 +690,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, | |||
| 687 | struct inode *inode = OFNI_EDONI_2SFFJ(f); | 690 | struct inode *inode = OFNI_EDONI_2SFFJ(f); |
| 688 | struct page *pg; | 691 | struct page *pg; |
| 689 | 692 | ||
| 690 | pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, | 693 | pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, |
| 691 | (void *)jffs2_do_readpage_unlock, inode); | 694 | (void *)jffs2_do_readpage_unlock, inode); |
| 692 | if (IS_ERR(pg)) | 695 | if (IS_ERR(pg)) |
| 693 | return (void *)pg; | 696 | return (void *)pg; |
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index e4619b00f7c5..fa35ff79ab35 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h | |||
| @@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info | |||
| 231 | uint32_t version; | 231 | uint32_t version; |
| 232 | uint32_t data_crc; | 232 | uint32_t data_crc; |
| 233 | uint32_t partial_crc; | 233 | uint32_t partial_crc; |
| 234 | uint16_t csize; | 234 | uint32_t csize; |
| 235 | uint16_t overlapped; | 235 | uint16_t overlapped; |
| 236 | }; | 236 | }; |
| 237 | 237 | ||
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 03310721712f..b6bd4affd9ad 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c | |||
| @@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, | |||
| 179 | spin_unlock(&c->erase_completion_lock); | 179 | spin_unlock(&c->erase_completion_lock); |
| 180 | 180 | ||
| 181 | schedule(); | 181 | schedule(); |
| 182 | remove_wait_queue(&c->erase_wait, &wait); | ||
| 182 | } else | 183 | } else |
| 183 | spin_unlock(&c->erase_completion_lock); | 184 | spin_unlock(&c->erase_completion_lock); |
| 184 | } else if (ret) | 185 | } else if (ret) |
| @@ -211,20 +212,25 @@ out: | |||
| 211 | int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, | 212 | int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, |
| 212 | uint32_t *len, uint32_t sumsize) | 213 | uint32_t *len, uint32_t sumsize) |
| 213 | { | 214 | { |
| 214 | int ret = -EAGAIN; | 215 | int ret; |
| 215 | minsize = PAD(minsize); | 216 | minsize = PAD(minsize); |
| 216 | 217 | ||
| 217 | jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize); | 218 | jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize); |
| 218 | 219 | ||
| 219 | spin_lock(&c->erase_completion_lock); | 220 | while (true) { |
| 220 | while(ret == -EAGAIN) { | 221 | spin_lock(&c->erase_completion_lock); |
| 221 | ret = jffs2_do_reserve_space(c, minsize, len, sumsize); | 222 | ret = jffs2_do_reserve_space(c, minsize, len, sumsize); |
| 222 | if (ret) { | 223 | if (ret) { |
| 223 | jffs2_dbg(1, "%s(): looping, ret is %d\n", | 224 | jffs2_dbg(1, "%s(): looping, ret is %d\n", |
| 224 | __func__, ret); | 225 | __func__, ret); |
| 225 | } | 226 | } |
| 227 | spin_unlock(&c->erase_completion_lock); | ||
| 228 | |||
| 229 | if (ret == -EAGAIN) | ||
| 230 | cond_resched(); | ||
| 231 | else | ||
| 232 | break; | ||
| 226 | } | 233 | } |
| 227 | spin_unlock(&c->erase_completion_lock); | ||
| 228 | if (!ret) | 234 | if (!ret) |
| 229 | ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); | 235 | ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); |
| 230 | 236 | ||
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0defb1cc2a35..0918f0e2e266 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
| @@ -243,6 +243,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 243 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | 243 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); |
| 244 | int err; | 244 | int err; |
| 245 | 245 | ||
| 246 | sync_filesystem(sb); | ||
| 246 | err = jffs2_parse_options(c, data); | 247 | err = jffs2_parse_options(c, data); |
| 247 | if (err) | 248 | if (err) |
| 248 | return -EINVAL; | 249 | return -EINVAL; |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index f4aab719add5..6f8fe72c2a7a 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
| @@ -154,7 +154,7 @@ void jfs_evict_inode(struct inode *inode) | |||
| 154 | dquot_initialize(inode); | 154 | dquot_initialize(inode); |
| 155 | 155 | ||
| 156 | if (JFS_IP(inode)->fileset == FILESYSTEM_I) { | 156 | if (JFS_IP(inode)->fileset == FILESYSTEM_I) { |
| 157 | truncate_inode_pages(&inode->i_data, 0); | 157 | truncate_inode_pages_final(&inode->i_data); |
| 158 | 158 | ||
| 159 | if (test_cflag(COMMIT_Freewmap, inode)) | 159 | if (test_cflag(COMMIT_Freewmap, inode)) |
| 160 | jfs_free_zero_link(inode); | 160 | jfs_free_zero_link(inode); |
| @@ -168,7 +168,7 @@ void jfs_evict_inode(struct inode *inode) | |||
| 168 | dquot_free_inode(inode); | 168 | dquot_free_inode(inode); |
| 169 | } | 169 | } |
| 170 | } else { | 170 | } else { |
| 171 | truncate_inode_pages(&inode->i_data, 0); | 171 | truncate_inode_pages_final(&inode->i_data); |
| 172 | } | 172 | } |
| 173 | clear_inode(inode); | 173 | clear_inode(inode); |
| 174 | dquot_drop(inode); | 174 | dquot_drop(inode); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index e2b7483444fd..97f7fda51890 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -418,6 +418,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 418 | int flag = JFS_SBI(sb)->flag; | 418 | int flag = JFS_SBI(sb)->flag; |
| 419 | int ret; | 419 | int ret; |
| 420 | 420 | ||
| 421 | sync_filesystem(sb); | ||
| 421 | if (!parse_options(data, sb, &newLVSize, &flag)) { | 422 | if (!parse_options(data, sb, &newLVSize, &flag)) { |
| 422 | return -EINVAL; | 423 | return -EINVAL; |
| 423 | } | 424 | } |
diff --git a/fs/kernfs/Kconfig b/fs/kernfs/Kconfig new file mode 100644 index 000000000000..397b5f7a7a16 --- /dev/null +++ b/fs/kernfs/Kconfig | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | # | ||
| 2 | # KERNFS should be selected by its users | ||
| 3 | # | ||
| 4 | |||
| 5 | config KERNFS | ||
| 6 | bool | ||
| 7 | default n | ||
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index bd6e18be6e1a..78f3403300af 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | * This file is released under the GPLv2. | 8 | * This file is released under the GPLv2. |
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | #include <linux/sched.h> | ||
| 11 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
| 12 | #include <linux/namei.h> | 13 | #include <linux/namei.h> |
| 13 | #include <linux/idr.h> | 14 | #include <linux/idr.h> |
| @@ -18,9 +19,162 @@ | |||
| 18 | #include "kernfs-internal.h" | 19 | #include "kernfs-internal.h" |
| 19 | 20 | ||
| 20 | DEFINE_MUTEX(kernfs_mutex); | 21 | DEFINE_MUTEX(kernfs_mutex); |
| 22 | static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ | ||
| 23 | static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ | ||
| 21 | 24 | ||
| 22 | #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) | 25 | #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) |
| 23 | 26 | ||
| 27 | static bool kernfs_active(struct kernfs_node *kn) | ||
| 28 | { | ||
| 29 | lockdep_assert_held(&kernfs_mutex); | ||
| 30 | return atomic_read(&kn->active) >= 0; | ||
| 31 | } | ||
| 32 | |||
| 33 | static bool kernfs_lockdep(struct kernfs_node *kn) | ||
| 34 | { | ||
| 35 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 36 | return kn->flags & KERNFS_LOCKDEP; | ||
| 37 | #else | ||
| 38 | return false; | ||
| 39 | #endif | ||
| 40 | } | ||
| 41 | |||
| 42 | static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) | ||
| 43 | { | ||
| 44 | return strlcpy(buf, kn->parent ? kn->name : "/", buflen); | ||
| 45 | } | ||
| 46 | |||
| 47 | static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf, | ||
| 48 | size_t buflen) | ||
| 49 | { | ||
| 50 | char *p = buf + buflen; | ||
| 51 | int len; | ||
| 52 | |||
| 53 | *--p = '\0'; | ||
| 54 | |||
| 55 | do { | ||
| 56 | len = strlen(kn->name); | ||
| 57 | if (p - buf < len + 1) { | ||
| 58 | buf[0] = '\0'; | ||
| 59 | p = NULL; | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | p -= len; | ||
| 63 | memcpy(p, kn->name, len); | ||
| 64 | *--p = '/'; | ||
| 65 | kn = kn->parent; | ||
| 66 | } while (kn && kn->parent); | ||
| 67 | |||
| 68 | return p; | ||
| 69 | } | ||
| 70 | |||
| 71 | /** | ||
| 72 | * kernfs_name - obtain the name of a given node | ||
| 73 | * @kn: kernfs_node of interest | ||
| 74 | * @buf: buffer to copy @kn's name into | ||
| 75 | * @buflen: size of @buf | ||
| 76 | * | ||
| 77 | * Copies the name of @kn into @buf of @buflen bytes. The behavior is | ||
| 78 | * similar to strlcpy(). It returns the length of @kn's name and if @buf | ||
| 79 | * isn't long enough, it's filled upto @buflen-1 and nul terminated. | ||
| 80 | * | ||
| 81 | * This function can be called from any context. | ||
| 82 | */ | ||
| 83 | int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) | ||
| 84 | { | ||
| 85 | unsigned long flags; | ||
| 86 | int ret; | ||
| 87 | |||
| 88 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
| 89 | ret = kernfs_name_locked(kn, buf, buflen); | ||
| 90 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
| 91 | return ret; | ||
| 92 | } | ||
| 93 | |||
| 94 | /** | ||
| 95 | * kernfs_path - build full path of a given node | ||
| 96 | * @kn: kernfs_node of interest | ||
| 97 | * @buf: buffer to copy @kn's name into | ||
| 98 | * @buflen: size of @buf | ||
| 99 | * | ||
| 100 | * Builds and returns the full path of @kn in @buf of @buflen bytes. The | ||
| 101 | * path is built from the end of @buf so the returned pointer usually | ||
| 102 | * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated | ||
| 103 | * and %NULL is returned. | ||
| 104 | */ | ||
| 105 | char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) | ||
| 106 | { | ||
| 107 | unsigned long flags; | ||
| 108 | char *p; | ||
| 109 | |||
| 110 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
| 111 | p = kernfs_path_locked(kn, buf, buflen); | ||
| 112 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
| 113 | return p; | ||
| 114 | } | ||
| 115 | EXPORT_SYMBOL_GPL(kernfs_path); | ||
| 116 | |||
| 117 | /** | ||
| 118 | * pr_cont_kernfs_name - pr_cont name of a kernfs_node | ||
| 119 | * @kn: kernfs_node of interest | ||
| 120 | * | ||
| 121 | * This function can be called from any context. | ||
| 122 | */ | ||
| 123 | void pr_cont_kernfs_name(struct kernfs_node *kn) | ||
| 124 | { | ||
| 125 | unsigned long flags; | ||
| 126 | |||
| 127 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
| 128 | |||
| 129 | kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); | ||
| 130 | pr_cont("%s", kernfs_pr_cont_buf); | ||
| 131 | |||
| 132 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
| 133 | } | ||
| 134 | |||
| 135 | /** | ||
| 136 | * pr_cont_kernfs_path - pr_cont path of a kernfs_node | ||
| 137 | * @kn: kernfs_node of interest | ||
| 138 | * | ||
| 139 | * This function can be called from any context. | ||
| 140 | */ | ||
| 141 | void pr_cont_kernfs_path(struct kernfs_node *kn) | ||
| 142 | { | ||
| 143 | unsigned long flags; | ||
| 144 | char *p; | ||
| 145 | |||
| 146 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
| 147 | |||
| 148 | p = kernfs_path_locked(kn, kernfs_pr_cont_buf, | ||
| 149 | sizeof(kernfs_pr_cont_buf)); | ||
| 150 | if (p) | ||
| 151 | pr_cont("%s", p); | ||
| 152 | else | ||
| 153 | pr_cont("<name too long>"); | ||
| 154 | |||
| 155 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
| 156 | } | ||
| 157 | |||
| 158 | /** | ||
| 159 | * kernfs_get_parent - determine the parent node and pin it | ||
| 160 | * @kn: kernfs_node of interest | ||
| 161 | * | ||
| 162 | * Determines @kn's parent, pins and returns it. This function can be | ||
| 163 | * called from any context. | ||
| 164 | */ | ||
| 165 | struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) | ||
| 166 | { | ||
| 167 | struct kernfs_node *parent; | ||
| 168 | unsigned long flags; | ||
| 169 | |||
| 170 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
| 171 | parent = kn->parent; | ||
| 172 | kernfs_get(parent); | ||
| 173 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
| 174 | |||
| 175 | return parent; | ||
| 176 | } | ||
| 177 | |||
| 24 | /** | 178 | /** |
| 25 | * kernfs_name_hash | 179 | * kernfs_name_hash |
| 26 | * @name: Null terminated string to hash | 180 | * @name: Null terminated string to hash |
| @@ -37,7 +191,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) | |||
| 37 | hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); | 191 | hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); |
| 38 | hash &= 0x7fffffffU; | 192 | hash &= 0x7fffffffU; |
| 39 | /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ | 193 | /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ |
| 40 | if (hash < 1) | 194 | if (hash < 2) |
| 41 | hash += 2; | 195 | hash += 2; |
| 42 | if (hash >= INT_MAX) | 196 | if (hash >= INT_MAX) |
| 43 | hash = INT_MAX - 1; | 197 | hash = INT_MAX - 1; |
| @@ -105,18 +259,24 @@ static int kernfs_link_sibling(struct kernfs_node *kn) | |||
| 105 | * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree | 259 | * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree |
| 106 | * @kn: kernfs_node of interest | 260 | * @kn: kernfs_node of interest |
| 107 | * | 261 | * |
| 108 | * Unlink @kn from its sibling rbtree which starts from | 262 | * Try to unlink @kn from its sibling rbtree which starts from |
| 109 | * kn->parent->dir.children. | 263 | * kn->parent->dir.children. Returns %true if @kn was actually |
| 264 | * removed, %false if @kn wasn't on the rbtree. | ||
| 110 | * | 265 | * |
| 111 | * Locking: | 266 | * Locking: |
| 112 | * mutex_lock(kernfs_mutex) | 267 | * mutex_lock(kernfs_mutex) |
| 113 | */ | 268 | */ |
| 114 | static void kernfs_unlink_sibling(struct kernfs_node *kn) | 269 | static bool kernfs_unlink_sibling(struct kernfs_node *kn) |
| 115 | { | 270 | { |
| 271 | if (RB_EMPTY_NODE(&kn->rb)) | ||
| 272 | return false; | ||
| 273 | |||
| 116 | if (kernfs_type(kn) == KERNFS_DIR) | 274 | if (kernfs_type(kn) == KERNFS_DIR) |
| 117 | kn->parent->dir.subdirs--; | 275 | kn->parent->dir.subdirs--; |
| 118 | 276 | ||
| 119 | rb_erase(&kn->rb, &kn->parent->dir.children); | 277 | rb_erase(&kn->rb, &kn->parent->dir.children); |
| 278 | RB_CLEAR_NODE(&kn->rb); | ||
| 279 | return true; | ||
| 120 | } | 280 | } |
| 121 | 281 | ||
| 122 | /** | 282 | /** |
| @@ -137,7 +297,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) | |||
| 137 | if (!atomic_inc_unless_negative(&kn->active)) | 297 | if (!atomic_inc_unless_negative(&kn->active)) |
| 138 | return NULL; | 298 | return NULL; |
| 139 | 299 | ||
| 140 | if (kn->flags & KERNFS_LOCKDEP) | 300 | if (kernfs_lockdep(kn)) |
| 141 | rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); | 301 | rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); |
| 142 | return kn; | 302 | return kn; |
| 143 | } | 303 | } |
| @@ -151,59 +311,57 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) | |||
| 151 | */ | 311 | */ |
| 152 | void kernfs_put_active(struct kernfs_node *kn) | 312 | void kernfs_put_active(struct kernfs_node *kn) |
| 153 | { | 313 | { |
| 314 | struct kernfs_root *root = kernfs_root(kn); | ||
| 154 | int v; | 315 | int v; |
| 155 | 316 | ||
| 156 | if (unlikely(!kn)) | 317 | if (unlikely(!kn)) |
| 157 | return; | 318 | return; |
| 158 | 319 | ||
| 159 | if (kn->flags & KERNFS_LOCKDEP) | 320 | if (kernfs_lockdep(kn)) |
| 160 | rwsem_release(&kn->dep_map, 1, _RET_IP_); | 321 | rwsem_release(&kn->dep_map, 1, _RET_IP_); |
| 161 | v = atomic_dec_return(&kn->active); | 322 | v = atomic_dec_return(&kn->active); |
| 162 | if (likely(v != KN_DEACTIVATED_BIAS)) | 323 | if (likely(v != KN_DEACTIVATED_BIAS)) |
| 163 | return; | 324 | return; |
| 164 | 325 | ||
| 165 | /* | 326 | wake_up_all(&root->deactivate_waitq); |
| 166 | * atomic_dec_return() is a mb(), we'll always see the updated | ||
| 167 | * kn->u.completion. | ||
| 168 | */ | ||
| 169 | complete(kn->u.completion); | ||
| 170 | } | 327 | } |
| 171 | 328 | ||
| 172 | /** | 329 | /** |
| 173 | * kernfs_deactivate - deactivate kernfs_node | 330 | * kernfs_drain - drain kernfs_node |
| 174 | * @kn: kernfs_node to deactivate | 331 | * @kn: kernfs_node to drain |
| 175 | * | 332 | * |
| 176 | * Deny new active references and drain existing ones. | 333 | * Drain existing usages and nuke all existing mmaps of @kn. Mutiple |
| 334 | * removers may invoke this function concurrently on @kn and all will | ||
| 335 | * return after draining is complete. | ||
| 177 | */ | 336 | */ |
| 178 | static void kernfs_deactivate(struct kernfs_node *kn) | 337 | static void kernfs_drain(struct kernfs_node *kn) |
| 338 | __releases(&kernfs_mutex) __acquires(&kernfs_mutex) | ||
| 179 | { | 339 | { |
| 180 | DECLARE_COMPLETION_ONSTACK(wait); | 340 | struct kernfs_root *root = kernfs_root(kn); |
| 181 | int v; | ||
| 182 | 341 | ||
| 183 | BUG_ON(!(kn->flags & KERNFS_REMOVED)); | 342 | lockdep_assert_held(&kernfs_mutex); |
| 184 | 343 | WARN_ON_ONCE(kernfs_active(kn)); | |
| 185 | if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF)) | ||
| 186 | return; | ||
| 187 | 344 | ||
| 188 | kn->u.completion = (void *)&wait; | 345 | mutex_unlock(&kernfs_mutex); |
| 189 | 346 | ||
| 190 | if (kn->flags & KERNFS_LOCKDEP) | 347 | if (kernfs_lockdep(kn)) { |
| 191 | rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); | 348 | rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); |
| 192 | /* atomic_add_return() is a mb(), put_active() will always see | 349 | if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) |
| 193 | * the updated kn->u.completion. | ||
| 194 | */ | ||
| 195 | v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active); | ||
| 196 | |||
| 197 | if (v != KN_DEACTIVATED_BIAS) { | ||
| 198 | if (kn->flags & KERNFS_LOCKDEP) | ||
| 199 | lock_contended(&kn->dep_map, _RET_IP_); | 350 | lock_contended(&kn->dep_map, _RET_IP_); |
| 200 | wait_for_completion(&wait); | ||
| 201 | } | 351 | } |
| 202 | 352 | ||
| 203 | if (kn->flags & KERNFS_LOCKDEP) { | 353 | /* but everyone should wait for draining */ |
| 354 | wait_event(root->deactivate_waitq, | ||
| 355 | atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); | ||
| 356 | |||
| 357 | if (kernfs_lockdep(kn)) { | ||
| 204 | lock_acquired(&kn->dep_map, _RET_IP_); | 358 | lock_acquired(&kn->dep_map, _RET_IP_); |
| 205 | rwsem_release(&kn->dep_map, 1, _RET_IP_); | 359 | rwsem_release(&kn->dep_map, 1, _RET_IP_); |
| 206 | } | 360 | } |
| 361 | |||
| 362 | kernfs_unmap_bin_file(kn); | ||
| 363 | |||
| 364 | mutex_lock(&kernfs_mutex); | ||
| 207 | } | 365 | } |
| 208 | 366 | ||
| 209 | /** | 367 | /** |
| @@ -234,13 +392,15 @@ void kernfs_put(struct kernfs_node *kn) | |||
| 234 | return; | 392 | return; |
| 235 | root = kernfs_root(kn); | 393 | root = kernfs_root(kn); |
| 236 | repeat: | 394 | repeat: |
| 237 | /* Moving/renaming is always done while holding reference. | 395 | /* |
| 396 | * Moving/renaming is always done while holding reference. | ||
| 238 | * kn->parent won't change beneath us. | 397 | * kn->parent won't change beneath us. |
| 239 | */ | 398 | */ |
| 240 | parent = kn->parent; | 399 | parent = kn->parent; |
| 241 | 400 | ||
| 242 | WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n", | 401 | WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, |
| 243 | parent ? parent->name : "", kn->name); | 402 | "kernfs_put: %s/%s: released with incorrect active_ref %d\n", |
| 403 | parent ? parent->name : "", kn->name, atomic_read(&kn->active)); | ||
| 244 | 404 | ||
| 245 | if (kernfs_type(kn) == KERNFS_LINK) | 405 | if (kernfs_type(kn) == KERNFS_LINK) |
| 246 | kernfs_put(kn->symlink.target_kn); | 406 | kernfs_put(kn->symlink.target_kn); |
| @@ -282,8 +442,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) | |||
| 282 | kn = dentry->d_fsdata; | 442 | kn = dentry->d_fsdata; |
| 283 | mutex_lock(&kernfs_mutex); | 443 | mutex_lock(&kernfs_mutex); |
| 284 | 444 | ||
| 285 | /* The kernfs node has been deleted */ | 445 | /* The kernfs node has been deactivated */ |
| 286 | if (kn->flags & KERNFS_REMOVED) | 446 | if (!kernfs_active(kn)) |
| 287 | goto out_bad; | 447 | goto out_bad; |
| 288 | 448 | ||
| 289 | /* The kernfs node has been moved? */ | 449 | /* The kernfs node has been moved? */ |
| @@ -328,6 +488,24 @@ const struct dentry_operations kernfs_dops = { | |||
| 328 | .d_release = kernfs_dop_release, | 488 | .d_release = kernfs_dop_release, |
| 329 | }; | 489 | }; |
| 330 | 490 | ||
| 491 | /** | ||
| 492 | * kernfs_node_from_dentry - determine kernfs_node associated with a dentry | ||
| 493 | * @dentry: the dentry in question | ||
| 494 | * | ||
| 495 | * Return the kernfs_node associated with @dentry. If @dentry is not a | ||
| 496 | * kernfs one, %NULL is returned. | ||
| 497 | * | ||
| 498 | * While the returned kernfs_node will stay accessible as long as @dentry | ||
| 499 | * is accessible, the returned node can be in any state and the caller is | ||
| 500 | * fully responsible for determining what's accessible. | ||
| 501 | */ | ||
| 502 | struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) | ||
| 503 | { | ||
| 504 | if (dentry->d_sb->s_op == &kernfs_sops) | ||
| 505 | return dentry->d_fsdata; | ||
| 506 | return NULL; | ||
| 507 | } | ||
| 508 | |||
| 331 | static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | 509 | static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, |
| 332 | const char *name, umode_t mode, | 510 | const char *name, umode_t mode, |
| 333 | unsigned flags) | 511 | unsigned flags) |
| @@ -352,11 +530,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | |||
| 352 | kn->ino = ret; | 530 | kn->ino = ret; |
| 353 | 531 | ||
| 354 | atomic_set(&kn->count, 1); | 532 | atomic_set(&kn->count, 1); |
| 355 | atomic_set(&kn->active, 0); | 533 | atomic_set(&kn->active, KN_DEACTIVATED_BIAS); |
| 534 | RB_CLEAR_NODE(&kn->rb); | ||
| 356 | 535 | ||
| 357 | kn->name = name; | 536 | kn->name = name; |
| 358 | kn->mode = mode; | 537 | kn->mode = mode; |
| 359 | kn->flags = flags | KERNFS_REMOVED; | 538 | kn->flags = flags; |
| 360 | 539 | ||
| 361 | return kn; | 540 | return kn; |
| 362 | 541 | ||
| @@ -382,69 +561,44 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, | |||
| 382 | } | 561 | } |
| 383 | 562 | ||
| 384 | /** | 563 | /** |
| 385 | * kernfs_addrm_start - prepare for kernfs_node add/remove | ||
| 386 | * @acxt: pointer to kernfs_addrm_cxt to be used | ||
| 387 | * | ||
| 388 | * This function is called when the caller is about to add or remove | ||
| 389 | * kernfs_node. This function acquires kernfs_mutex. @acxt is used | ||
| 390 | * to keep and pass context to other addrm functions. | ||
| 391 | * | ||
| 392 | * LOCKING: | ||
| 393 | * Kernel thread context (may sleep). kernfs_mutex is locked on | ||
| 394 | * return. | ||
| 395 | */ | ||
| 396 | void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt) | ||
| 397 | __acquires(kernfs_mutex) | ||
| 398 | { | ||
| 399 | memset(acxt, 0, sizeof(*acxt)); | ||
| 400 | |||
| 401 | mutex_lock(&kernfs_mutex); | ||
| 402 | } | ||
| 403 | |||
| 404 | /** | ||
| 405 | * kernfs_add_one - add kernfs_node to parent without warning | 564 | * kernfs_add_one - add kernfs_node to parent without warning |
| 406 | * @acxt: addrm context to use | ||
| 407 | * @kn: kernfs_node to be added | 565 | * @kn: kernfs_node to be added |
| 408 | * | 566 | * |
| 409 | * The caller must already have initialized @kn->parent. This | 567 | * The caller must already have initialized @kn->parent. This |
| 410 | * function increments nlink of the parent's inode if @kn is a | 568 | * function increments nlink of the parent's inode if @kn is a |
| 411 | * directory and link into the children list of the parent. | 569 | * directory and link into the children list of the parent. |
| 412 | * | 570 | * |
| 413 | * This function should be called between calls to | ||
| 414 | * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed | ||
| 415 | * the same @acxt as passed to kernfs_addrm_start(). | ||
| 416 | * | ||
| 417 | * LOCKING: | ||
| 418 | * Determined by kernfs_addrm_start(). | ||
| 419 | * | ||
| 420 | * RETURNS: | 571 | * RETURNS: |
| 421 | * 0 on success, -EEXIST if entry with the given name already | 572 | * 0 on success, -EEXIST if entry with the given name already |
| 422 | * exists. | 573 | * exists. |
| 423 | */ | 574 | */ |
| 424 | int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) | 575 | int kernfs_add_one(struct kernfs_node *kn) |
| 425 | { | 576 | { |
| 426 | struct kernfs_node *parent = kn->parent; | 577 | struct kernfs_node *parent = kn->parent; |
| 427 | bool has_ns = kernfs_ns_enabled(parent); | ||
| 428 | struct kernfs_iattrs *ps_iattr; | 578 | struct kernfs_iattrs *ps_iattr; |
| 579 | bool has_ns; | ||
| 429 | int ret; | 580 | int ret; |
| 430 | 581 | ||
| 431 | if (has_ns != (bool)kn->ns) { | 582 | mutex_lock(&kernfs_mutex); |
| 432 | WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", | 583 | |
| 433 | has_ns ? "required" : "invalid", parent->name, kn->name); | 584 | ret = -EINVAL; |
| 434 | return -EINVAL; | 585 | has_ns = kernfs_ns_enabled(parent); |
| 435 | } | 586 | if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", |
| 587 | has_ns ? "required" : "invalid", parent->name, kn->name)) | ||
| 588 | goto out_unlock; | ||
| 436 | 589 | ||
| 437 | if (kernfs_type(parent) != KERNFS_DIR) | 590 | if (kernfs_type(parent) != KERNFS_DIR) |
| 438 | return -EINVAL; | 591 | goto out_unlock; |
| 439 | 592 | ||
| 440 | if (parent->flags & KERNFS_REMOVED) | 593 | ret = -ENOENT; |
| 441 | return -ENOENT; | 594 | if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) |
| 595 | goto out_unlock; | ||
| 442 | 596 | ||
| 443 | kn->hash = kernfs_name_hash(kn->name, kn->ns); | 597 | kn->hash = kernfs_name_hash(kn->name, kn->ns); |
| 444 | 598 | ||
| 445 | ret = kernfs_link_sibling(kn); | 599 | ret = kernfs_link_sibling(kn); |
| 446 | if (ret) | 600 | if (ret) |
| 447 | return ret; | 601 | goto out_unlock; |
| 448 | 602 | ||
| 449 | /* Update timestamps on the parent */ | 603 | /* Update timestamps on the parent */ |
| 450 | ps_iattr = parent->iattr; | 604 | ps_iattr = parent->iattr; |
| @@ -453,82 +607,22 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) | |||
| 453 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; | 607 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; |
| 454 | } | 608 | } |
| 455 | 609 | ||
| 456 | /* Mark the entry added into directory tree */ | 610 | mutex_unlock(&kernfs_mutex); |
| 457 | kn->flags &= ~KERNFS_REMOVED; | ||
| 458 | |||
| 459 | return 0; | ||
| 460 | } | ||
| 461 | |||
| 462 | /** | ||
| 463 | * kernfs_remove_one - remove kernfs_node from parent | ||
| 464 | * @acxt: addrm context to use | ||
| 465 | * @kn: kernfs_node to be removed | ||
| 466 | * | ||
| 467 | * Mark @kn removed and drop nlink of parent inode if @kn is a | ||
| 468 | * directory. @kn is unlinked from the children list. | ||
| 469 | * | ||
| 470 | * This function should be called between calls to | ||
| 471 | * kernfs_addrm_start() and kernfs_addrm_finish() and should be | ||
| 472 | * passed the same @acxt as passed to kernfs_addrm_start(). | ||
| 473 | * | ||
| 474 | * LOCKING: | ||
| 475 | * Determined by kernfs_addrm_start(). | ||
| 476 | */ | ||
| 477 | static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt, | ||
| 478 | struct kernfs_node *kn) | ||
| 479 | { | ||
| 480 | struct kernfs_iattrs *ps_iattr; | ||
| 481 | 611 | ||
| 482 | /* | 612 | /* |
| 483 | * Removal can be called multiple times on the same node. Only the | 613 | * Activate the new node unless CREATE_DEACTIVATED is requested. |
| 484 | * first invocation is effective and puts the base ref. | 614 | * If not activated here, the kernfs user is responsible for |
| 615 | * activating the node with kernfs_activate(). A node which hasn't | ||
| 616 | * been activated is not visible to userland and its removal won't | ||
| 617 | * trigger deactivation. | ||
| 485 | */ | 618 | */ |
| 486 | if (kn->flags & KERNFS_REMOVED) | 619 | if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) |
| 487 | return; | 620 | kernfs_activate(kn); |
| 488 | 621 | return 0; | |
| 489 | if (kn->parent) { | ||
| 490 | kernfs_unlink_sibling(kn); | ||
| 491 | |||
| 492 | /* Update timestamps on the parent */ | ||
| 493 | ps_iattr = kn->parent->iattr; | ||
| 494 | if (ps_iattr) { | ||
| 495 | ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; | ||
| 496 | ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; | ||
| 497 | } | ||
| 498 | } | ||
| 499 | |||
| 500 | kn->flags |= KERNFS_REMOVED; | ||
| 501 | kn->u.removed_list = acxt->removed; | ||
| 502 | acxt->removed = kn; | ||
| 503 | } | ||
| 504 | 622 | ||
| 505 | /** | 623 | out_unlock: |
| 506 | * kernfs_addrm_finish - finish up kernfs_node add/remove | ||
| 507 | * @acxt: addrm context to finish up | ||
| 508 | * | ||
| 509 | * Finish up kernfs_node add/remove. Resources acquired by | ||
| 510 | * kernfs_addrm_start() are released and removed kernfs_nodes are | ||
| 511 | * cleaned up. | ||
| 512 | * | ||
| 513 | * LOCKING: | ||
| 514 | * kernfs_mutex is released. | ||
| 515 | */ | ||
| 516 | void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt) | ||
| 517 | __releases(kernfs_mutex) | ||
| 518 | { | ||
| 519 | /* release resources acquired by kernfs_addrm_start() */ | ||
| 520 | mutex_unlock(&kernfs_mutex); | 624 | mutex_unlock(&kernfs_mutex); |
| 521 | 625 | return ret; | |
| 522 | /* kill removed kernfs_nodes */ | ||
| 523 | while (acxt->removed) { | ||
| 524 | struct kernfs_node *kn = acxt->removed; | ||
| 525 | |||
| 526 | acxt->removed = kn->u.removed_list; | ||
| 527 | |||
| 528 | kernfs_deactivate(kn); | ||
| 529 | kernfs_unmap_bin_file(kn); | ||
| 530 | kernfs_put(kn); | ||
| 531 | } | ||
| 532 | } | 626 | } |
| 533 | 627 | ||
| 534 | /** | 628 | /** |
| @@ -599,13 +693,15 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); | |||
| 599 | 693 | ||
| 600 | /** | 694 | /** |
| 601 | * kernfs_create_root - create a new kernfs hierarchy | 695 | * kernfs_create_root - create a new kernfs hierarchy |
| 602 | * @kdops: optional directory syscall operations for the hierarchy | 696 | * @scops: optional syscall operations for the hierarchy |
| 697 | * @flags: KERNFS_ROOT_* flags | ||
| 603 | * @priv: opaque data associated with the new directory | 698 | * @priv: opaque data associated with the new directory |
| 604 | * | 699 | * |
| 605 | * Returns the root of the new hierarchy on success, ERR_PTR() value on | 700 | * Returns the root of the new hierarchy on success, ERR_PTR() value on |
| 606 | * failure. | 701 | * failure. |
| 607 | */ | 702 | */ |
| 608 | struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) | 703 | struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, |
| 704 | unsigned int flags, void *priv) | ||
| 609 | { | 705 | { |
| 610 | struct kernfs_root *root; | 706 | struct kernfs_root *root; |
| 611 | struct kernfs_node *kn; | 707 | struct kernfs_node *kn; |
| @@ -624,12 +720,16 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) | |||
| 624 | return ERR_PTR(-ENOMEM); | 720 | return ERR_PTR(-ENOMEM); |
| 625 | } | 721 | } |
| 626 | 722 | ||
| 627 | kn->flags &= ~KERNFS_REMOVED; | ||
| 628 | kn->priv = priv; | 723 | kn->priv = priv; |
| 629 | kn->dir.root = root; | 724 | kn->dir.root = root; |
| 630 | 725 | ||
| 631 | root->dir_ops = kdops; | 726 | root->syscall_ops = scops; |
| 727 | root->flags = flags; | ||
| 632 | root->kn = kn; | 728 | root->kn = kn; |
| 729 | init_waitqueue_head(&root->deactivate_waitq); | ||
| 730 | |||
| 731 | if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) | ||
| 732 | kernfs_activate(kn); | ||
| 633 | 733 | ||
| 634 | return root; | 734 | return root; |
| 635 | } | 735 | } |
| @@ -660,7 +760,6 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, | |||
| 660 | const char *name, umode_t mode, | 760 | const char *name, umode_t mode, |
| 661 | void *priv, const void *ns) | 761 | void *priv, const void *ns) |
| 662 | { | 762 | { |
| 663 | struct kernfs_addrm_cxt acxt; | ||
| 664 | struct kernfs_node *kn; | 763 | struct kernfs_node *kn; |
| 665 | int rc; | 764 | int rc; |
| 666 | 765 | ||
| @@ -674,10 +773,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, | |||
| 674 | kn->priv = priv; | 773 | kn->priv = priv; |
| 675 | 774 | ||
| 676 | /* link in */ | 775 | /* link in */ |
| 677 | kernfs_addrm_start(&acxt); | 776 | rc = kernfs_add_one(kn); |
| 678 | rc = kernfs_add_one(&acxt, kn); | ||
| 679 | kernfs_addrm_finish(&acxt); | ||
| 680 | |||
| 681 | if (!rc) | 777 | if (!rc) |
| 682 | return kn; | 778 | return kn; |
| 683 | 779 | ||
| @@ -703,7 +799,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, | |||
| 703 | kn = kernfs_find_ns(parent, dentry->d_name.name, ns); | 799 | kn = kernfs_find_ns(parent, dentry->d_name.name, ns); |
| 704 | 800 | ||
| 705 | /* no such entry */ | 801 | /* no such entry */ |
| 706 | if (!kn) { | 802 | if (!kn || !kernfs_active(kn)) { |
| 707 | ret = NULL; | 803 | ret = NULL; |
| 708 | goto out_unlock; | 804 | goto out_unlock; |
| 709 | } | 805 | } |
| @@ -728,23 +824,37 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, | |||
| 728 | umode_t mode) | 824 | umode_t mode) |
| 729 | { | 825 | { |
| 730 | struct kernfs_node *parent = dir->i_private; | 826 | struct kernfs_node *parent = dir->i_private; |
| 731 | struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; | 827 | struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; |
| 828 | int ret; | ||
| 732 | 829 | ||
| 733 | if (!kdops || !kdops->mkdir) | 830 | if (!scops || !scops->mkdir) |
| 734 | return -EPERM; | 831 | return -EPERM; |
| 735 | 832 | ||
| 736 | return kdops->mkdir(parent, dentry->d_name.name, mode); | 833 | if (!kernfs_get_active(parent)) |
| 834 | return -ENODEV; | ||
| 835 | |||
| 836 | ret = scops->mkdir(parent, dentry->d_name.name, mode); | ||
| 837 | |||
| 838 | kernfs_put_active(parent); | ||
| 839 | return ret; | ||
| 737 | } | 840 | } |
| 738 | 841 | ||
| 739 | static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) | 842 | static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) |
| 740 | { | 843 | { |
| 741 | struct kernfs_node *kn = dentry->d_fsdata; | 844 | struct kernfs_node *kn = dentry->d_fsdata; |
| 742 | struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; | 845 | struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; |
| 846 | int ret; | ||
| 743 | 847 | ||
| 744 | if (!kdops || !kdops->rmdir) | 848 | if (!scops || !scops->rmdir) |
| 745 | return -EPERM; | 849 | return -EPERM; |
| 746 | 850 | ||
| 747 | return kdops->rmdir(kn); | 851 | if (!kernfs_get_active(kn)) |
| 852 | return -ENODEV; | ||
| 853 | |||
| 854 | ret = scops->rmdir(kn); | ||
| 855 | |||
| 856 | kernfs_put_active(kn); | ||
| 857 | return ret; | ||
| 748 | } | 858 | } |
| 749 | 859 | ||
| 750 | static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, | 860 | static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, |
| @@ -752,12 +862,25 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 752 | { | 862 | { |
| 753 | struct kernfs_node *kn = old_dentry->d_fsdata; | 863 | struct kernfs_node *kn = old_dentry->d_fsdata; |
| 754 | struct kernfs_node *new_parent = new_dir->i_private; | 864 | struct kernfs_node *new_parent = new_dir->i_private; |
| 755 | struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; | 865 | struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; |
| 866 | int ret; | ||
| 756 | 867 | ||
| 757 | if (!kdops || !kdops->rename) | 868 | if (!scops || !scops->rename) |
| 758 | return -EPERM; | 869 | return -EPERM; |
| 759 | 870 | ||
| 760 | return kdops->rename(kn, new_parent, new_dentry->d_name.name); | 871 | if (!kernfs_get_active(kn)) |
| 872 | return -ENODEV; | ||
| 873 | |||
| 874 | if (!kernfs_get_active(new_parent)) { | ||
| 875 | kernfs_put_active(kn); | ||
| 876 | return -ENODEV; | ||
| 877 | } | ||
| 878 | |||
| 879 | ret = scops->rename(kn, new_parent, new_dentry->d_name.name); | ||
| 880 | |||
| 881 | kernfs_put_active(new_parent); | ||
| 882 | kernfs_put_active(kn); | ||
| 883 | return ret; | ||
| 761 | } | 884 | } |
| 762 | 885 | ||
| 763 | const struct inode_operations kernfs_dir_iops = { | 886 | const struct inode_operations kernfs_dir_iops = { |
| @@ -830,23 +953,104 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, | |||
| 830 | return pos->parent; | 953 | return pos->parent; |
| 831 | } | 954 | } |
| 832 | 955 | ||
| 833 | static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, | 956 | /** |
| 834 | struct kernfs_node *kn) | 957 | * kernfs_activate - activate a node which started deactivated |
| 958 | * @kn: kernfs_node whose subtree is to be activated | ||
| 959 | * | ||
| 960 | * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node | ||
| 961 | * needs to be explicitly activated. A node which hasn't been activated | ||
| 962 | * isn't visible to userland and deactivation is skipped during its | ||
| 963 | * removal. This is useful to construct atomic init sequences where | ||
| 964 | * creation of multiple nodes should either succeed or fail atomically. | ||
| 965 | * | ||
| 966 | * The caller is responsible for ensuring that this function is not called | ||
| 967 | * after kernfs_remove*() is invoked on @kn. | ||
| 968 | */ | ||
| 969 | void kernfs_activate(struct kernfs_node *kn) | ||
| 835 | { | 970 | { |
| 836 | struct kernfs_node *pos, *next; | 971 | struct kernfs_node *pos; |
| 837 | 972 | ||
| 838 | if (!kn) | 973 | mutex_lock(&kernfs_mutex); |
| 974 | |||
| 975 | pos = NULL; | ||
| 976 | while ((pos = kernfs_next_descendant_post(pos, kn))) { | ||
| 977 | if (!pos || (pos->flags & KERNFS_ACTIVATED)) | ||
| 978 | continue; | ||
| 979 | |||
| 980 | WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); | ||
| 981 | WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); | ||
| 982 | |||
| 983 | atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); | ||
| 984 | pos->flags |= KERNFS_ACTIVATED; | ||
| 985 | } | ||
| 986 | |||
| 987 | mutex_unlock(&kernfs_mutex); | ||
| 988 | } | ||
| 989 | |||
| 990 | static void __kernfs_remove(struct kernfs_node *kn) | ||
| 991 | { | ||
| 992 | struct kernfs_node *pos; | ||
| 993 | |||
| 994 | lockdep_assert_held(&kernfs_mutex); | ||
| 995 | |||
| 996 | /* | ||
| 997 | * Short-circuit if non-root @kn has already finished removal. | ||
| 998 | * This is for kernfs_remove_self() which plays with active ref | ||
| 999 | * after removal. | ||
| 1000 | */ | ||
| 1001 | if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) | ||
| 839 | return; | 1002 | return; |
| 840 | 1003 | ||
| 841 | pr_debug("kernfs %s: removing\n", kn->name); | 1004 | pr_debug("kernfs %s: removing\n", kn->name); |
| 842 | 1005 | ||
| 843 | next = NULL; | 1006 | /* prevent any new usage under @kn by deactivating all nodes */ |
| 1007 | pos = NULL; | ||
| 1008 | while ((pos = kernfs_next_descendant_post(pos, kn))) | ||
| 1009 | if (kernfs_active(pos)) | ||
| 1010 | atomic_add(KN_DEACTIVATED_BIAS, &pos->active); | ||
| 1011 | |||
| 1012 | /* deactivate and unlink the subtree node-by-node */ | ||
| 844 | do { | 1013 | do { |
| 845 | pos = next; | 1014 | pos = kernfs_leftmost_descendant(kn); |
| 846 | next = kernfs_next_descendant_post(pos, kn); | 1015 | |
| 847 | if (pos) | 1016 | /* |
| 848 | kernfs_remove_one(acxt, pos); | 1017 | * kernfs_drain() drops kernfs_mutex temporarily and @pos's |
| 849 | } while (next); | 1018 | * base ref could have been put by someone else by the time |
| 1019 | * the function returns. Make sure it doesn't go away | ||
| 1020 | * underneath us. | ||
| 1021 | */ | ||
| 1022 | kernfs_get(pos); | ||
| 1023 | |||
| 1024 | /* | ||
| 1025 | * Drain iff @kn was activated. This avoids draining and | ||
| 1026 | * its lockdep annotations for nodes which have never been | ||
| 1027 | * activated and allows embedding kernfs_remove() in create | ||
| 1028 | * error paths without worrying about draining. | ||
| 1029 | */ | ||
| 1030 | if (kn->flags & KERNFS_ACTIVATED) | ||
| 1031 | kernfs_drain(pos); | ||
| 1032 | else | ||
| 1033 | WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); | ||
| 1034 | |||
| 1035 | /* | ||
| 1036 | * kernfs_unlink_sibling() succeeds once per node. Use it | ||
| 1037 | * to decide who's responsible for cleanups. | ||
| 1038 | */ | ||
| 1039 | if (!pos->parent || kernfs_unlink_sibling(pos)) { | ||
| 1040 | struct kernfs_iattrs *ps_iattr = | ||
| 1041 | pos->parent ? pos->parent->iattr : NULL; | ||
| 1042 | |||
| 1043 | /* update timestamps on the parent */ | ||
| 1044 | if (ps_iattr) { | ||
| 1045 | ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; | ||
| 1046 | ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | kernfs_put(pos); | ||
| 1050 | } | ||
| 1051 | |||
| 1052 | kernfs_put(pos); | ||
| 1053 | } while (pos != kn); | ||
| 850 | } | 1054 | } |
| 851 | 1055 | ||
| 852 | /** | 1056 | /** |
| @@ -857,11 +1061,140 @@ static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, | |||
| 857 | */ | 1061 | */ |
| 858 | void kernfs_remove(struct kernfs_node *kn) | 1062 | void kernfs_remove(struct kernfs_node *kn) |
| 859 | { | 1063 | { |
| 860 | struct kernfs_addrm_cxt acxt; | 1064 | mutex_lock(&kernfs_mutex); |
| 1065 | __kernfs_remove(kn); | ||
| 1066 | mutex_unlock(&kernfs_mutex); | ||
| 1067 | } | ||
| 861 | 1068 | ||
| 862 | kernfs_addrm_start(&acxt); | 1069 | /** |
| 863 | __kernfs_remove(&acxt, kn); | 1070 | * kernfs_break_active_protection - break out of active protection |
| 864 | kernfs_addrm_finish(&acxt); | 1071 | * @kn: the self kernfs_node |
| 1072 | * | ||
| 1073 | * The caller must be running off of a kernfs operation which is invoked | ||
| 1074 | * with an active reference - e.g. one of kernfs_ops. Each invocation of | ||
| 1075 | * this function must also be matched with an invocation of | ||
| 1076 | * kernfs_unbreak_active_protection(). | ||
| 1077 | * | ||
| 1078 | * This function releases the active reference of @kn the caller is | ||
| 1079 | * holding. Once this function is called, @kn may be removed at any point | ||
| 1080 | * and the caller is solely responsible for ensuring that the objects it | ||
| 1081 | * dereferences are accessible. | ||
| 1082 | */ | ||
| 1083 | void kernfs_break_active_protection(struct kernfs_node *kn) | ||
| 1084 | { | ||
| 1085 | /* | ||
| 1086 | * Take out ourself out of the active ref dependency chain. If | ||
| 1087 | * we're called without an active ref, lockdep will complain. | ||
| 1088 | */ | ||
| 1089 | kernfs_put_active(kn); | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | /** | ||
| 1093 | * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() | ||
| 1094 | * @kn: the self kernfs_node | ||
| 1095 | * | ||
| 1096 | * If kernfs_break_active_protection() was called, this function must be | ||
| 1097 | * invoked before finishing the kernfs operation. Note that while this | ||
| 1098 | * function restores the active reference, it doesn't and can't actually | ||
| 1099 | * restore the active protection - @kn may already or be in the process of | ||
| 1100 | * being removed. Once kernfs_break_active_protection() is invoked, that | ||
| 1101 | * protection is irreversibly gone for the kernfs operation instance. | ||
| 1102 | * | ||
| 1103 | * While this function may be called at any point after | ||
| 1104 | * kernfs_break_active_protection() is invoked, its most useful location | ||
| 1105 | * would be right before the enclosing kernfs operation returns. | ||
| 1106 | */ | ||
| 1107 | void kernfs_unbreak_active_protection(struct kernfs_node *kn) | ||
| 1108 | { | ||
| 1109 | /* | ||
| 1110 | * @kn->active could be in any state; however, the increment we do | ||
| 1111 | * here will be undone as soon as the enclosing kernfs operation | ||
| 1112 | * finishes and this temporary bump can't break anything. If @kn | ||
| 1113 | * is alive, nothing changes. If @kn is being deactivated, the | ||
| 1114 | * soon-to-follow put will either finish deactivation or restore | ||
| 1115 | * deactivated state. If @kn is already removed, the temporary | ||
| 1116 | * bump is guaranteed to be gone before @kn is released. | ||
| 1117 | */ | ||
| 1118 | atomic_inc(&kn->active); | ||
| 1119 | if (kernfs_lockdep(kn)) | ||
| 1120 | rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); | ||
| 1121 | } | ||
| 1122 | |||
| 1123 | /** | ||
| 1124 | * kernfs_remove_self - remove a kernfs_node from its own method | ||
| 1125 | * @kn: the self kernfs_node to remove | ||
| 1126 | * | ||
| 1127 | * The caller must be running off of a kernfs operation which is invoked | ||
| 1128 | * with an active reference - e.g. one of kernfs_ops. This can be used to | ||
| 1129 | * implement a file operation which deletes itself. | ||
| 1130 | * | ||
| 1131 | * For example, the "delete" file for a sysfs device directory can be | ||
| 1132 | * implemented by invoking kernfs_remove_self() on the "delete" file | ||
| 1133 | * itself. This function breaks the circular dependency of trying to | ||
| 1134 | * deactivate self while holding an active ref itself. It isn't necessary | ||
| 1135 | * to modify the usual removal path to use kernfs_remove_self(). The | ||
| 1136 | * "delete" implementation can simply invoke kernfs_remove_self() on self | ||
| 1137 | * before proceeding with the usual removal path. kernfs will ignore later | ||
| 1138 | * kernfs_remove() on self. | ||
| 1139 | * | ||
| 1140 | * kernfs_remove_self() can be called multiple times concurrently on the | ||
| 1141 | * same kernfs_node. Only the first one actually performs removal and | ||
| 1142 | * returns %true. All others will wait until the kernfs operation which | ||
| 1143 | * won self-removal finishes and return %false. Note that the losers wait | ||
| 1144 | * for the completion of not only the winning kernfs_remove_self() but also | ||
| 1145 | * the whole kernfs_ops which won the arbitration. This can be used to | ||
| 1146 | * guarantee, for example, all concurrent writes to a "delete" file to | ||
| 1147 | * finish only after the whole operation is complete. | ||
| 1148 | */ | ||
| 1149 | bool kernfs_remove_self(struct kernfs_node *kn) | ||
| 1150 | { | ||
| 1151 | bool ret; | ||
| 1152 | |||
| 1153 | mutex_lock(&kernfs_mutex); | ||
| 1154 | kernfs_break_active_protection(kn); | ||
| 1155 | |||
| 1156 | /* | ||
| 1157 | * SUICIDAL is used to arbitrate among competing invocations. Only | ||
| 1158 | * the first one will actually perform removal. When the removal | ||
| 1159 | * is complete, SUICIDED is set and the active ref is restored | ||
| 1160 | * while holding kernfs_mutex. The ones which lost arbitration | ||
| 1161 | * waits for SUICDED && drained which can happen only after the | ||
| 1162 | * enclosing kernfs operation which executed the winning instance | ||
| 1163 | * of kernfs_remove_self() finished. | ||
| 1164 | */ | ||
| 1165 | if (!(kn->flags & KERNFS_SUICIDAL)) { | ||
| 1166 | kn->flags |= KERNFS_SUICIDAL; | ||
| 1167 | __kernfs_remove(kn); | ||
| 1168 | kn->flags |= KERNFS_SUICIDED; | ||
| 1169 | ret = true; | ||
| 1170 | } else { | ||
| 1171 | wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; | ||
| 1172 | DEFINE_WAIT(wait); | ||
| 1173 | |||
| 1174 | while (true) { | ||
| 1175 | prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); | ||
| 1176 | |||
| 1177 | if ((kn->flags & KERNFS_SUICIDED) && | ||
| 1178 | atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) | ||
| 1179 | break; | ||
| 1180 | |||
| 1181 | mutex_unlock(&kernfs_mutex); | ||
| 1182 | schedule(); | ||
| 1183 | mutex_lock(&kernfs_mutex); | ||
| 1184 | } | ||
| 1185 | finish_wait(waitq, &wait); | ||
| 1186 | WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); | ||
| 1187 | ret = false; | ||
| 1188 | } | ||
| 1189 | |||
| 1190 | /* | ||
| 1191 | * This must be done while holding kernfs_mutex; otherwise, waiting | ||
| 1192 | * for SUICIDED && deactivated could finish prematurely. | ||
| 1193 | */ | ||
| 1194 | kernfs_unbreak_active_protection(kn); | ||
| 1195 | |||
| 1196 | mutex_unlock(&kernfs_mutex); | ||
| 1197 | return ret; | ||
| 865 | } | 1198 | } |
| 866 | 1199 | ||
| 867 | /** | 1200 | /** |
| @@ -876,7 +1209,6 @@ void kernfs_remove(struct kernfs_node *kn) | |||
| 876 | int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, | 1209 | int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, |
| 877 | const void *ns) | 1210 | const void *ns) |
| 878 | { | 1211 | { |
| 879 | struct kernfs_addrm_cxt acxt; | ||
| 880 | struct kernfs_node *kn; | 1212 | struct kernfs_node *kn; |
| 881 | 1213 | ||
| 882 | if (!parent) { | 1214 | if (!parent) { |
| @@ -885,13 +1217,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, | |||
| 885 | return -ENOENT; | 1217 | return -ENOENT; |
| 886 | } | 1218 | } |
| 887 | 1219 | ||
| 888 | kernfs_addrm_start(&acxt); | 1220 | mutex_lock(&kernfs_mutex); |
| 889 | 1221 | ||
| 890 | kn = kernfs_find_ns(parent, name, ns); | 1222 | kn = kernfs_find_ns(parent, name, ns); |
| 891 | if (kn) | 1223 | if (kn) |
| 892 | __kernfs_remove(&acxt, kn); | 1224 | __kernfs_remove(kn); |
| 893 | 1225 | ||
| 894 | kernfs_addrm_finish(&acxt); | 1226 | mutex_unlock(&kernfs_mutex); |
| 895 | 1227 | ||
| 896 | if (kn) | 1228 | if (kn) |
| 897 | return 0; | 1229 | return 0; |
| @@ -909,12 +1241,18 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, | |||
| 909 | int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | 1241 | int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, |
| 910 | const char *new_name, const void *new_ns) | 1242 | const char *new_name, const void *new_ns) |
| 911 | { | 1243 | { |
| 1244 | struct kernfs_node *old_parent; | ||
| 1245 | const char *old_name = NULL; | ||
| 912 | int error; | 1246 | int error; |
| 913 | 1247 | ||
| 1248 | /* can't move or rename root */ | ||
| 1249 | if (!kn->parent) | ||
| 1250 | return -EINVAL; | ||
| 1251 | |||
| 914 | mutex_lock(&kernfs_mutex); | 1252 | mutex_lock(&kernfs_mutex); |
| 915 | 1253 | ||
| 916 | error = -ENOENT; | 1254 | error = -ENOENT; |
| 917 | if ((kn->flags | new_parent->flags) & KERNFS_REMOVED) | 1255 | if (!kernfs_active(kn) || !kernfs_active(new_parent)) |
| 918 | goto out; | 1256 | goto out; |
| 919 | 1257 | ||
| 920 | error = 0; | 1258 | error = 0; |
| @@ -932,13 +1270,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
| 932 | new_name = kstrdup(new_name, GFP_KERNEL); | 1270 | new_name = kstrdup(new_name, GFP_KERNEL); |
| 933 | if (!new_name) | 1271 | if (!new_name) |
| 934 | goto out; | 1272 | goto out; |
| 935 | 1273 | } else { | |
| 936 | if (kn->flags & KERNFS_STATIC_NAME) | 1274 | new_name = NULL; |
| 937 | kn->flags &= ~KERNFS_STATIC_NAME; | ||
| 938 | else | ||
| 939 | kfree(kn->name); | ||
| 940 | |||
| 941 | kn->name = new_name; | ||
| 942 | } | 1275 | } |
| 943 | 1276 | ||
| 944 | /* | 1277 | /* |
| @@ -946,12 +1279,29 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
| 946 | */ | 1279 | */ |
| 947 | kernfs_unlink_sibling(kn); | 1280 | kernfs_unlink_sibling(kn); |
| 948 | kernfs_get(new_parent); | 1281 | kernfs_get(new_parent); |
| 949 | kernfs_put(kn->parent); | 1282 | |
| 1283 | /* rename_lock protects ->parent and ->name accessors */ | ||
| 1284 | spin_lock_irq(&kernfs_rename_lock); | ||
| 1285 | |||
| 1286 | old_parent = kn->parent; | ||
| 1287 | kn->parent = new_parent; | ||
| 1288 | |||
| 950 | kn->ns = new_ns; | 1289 | kn->ns = new_ns; |
| 1290 | if (new_name) { | ||
| 1291 | if (!(kn->flags & KERNFS_STATIC_NAME)) | ||
| 1292 | old_name = kn->name; | ||
| 1293 | kn->flags &= ~KERNFS_STATIC_NAME; | ||
| 1294 | kn->name = new_name; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | spin_unlock_irq(&kernfs_rename_lock); | ||
| 1298 | |||
| 951 | kn->hash = kernfs_name_hash(kn->name, kn->ns); | 1299 | kn->hash = kernfs_name_hash(kn->name, kn->ns); |
| 952 | kn->parent = new_parent; | ||
| 953 | kernfs_link_sibling(kn); | 1300 | kernfs_link_sibling(kn); |
| 954 | 1301 | ||
| 1302 | kernfs_put(old_parent); | ||
| 1303 | kfree(old_name); | ||
| 1304 | |||
| 955 | error = 0; | 1305 | error = 0; |
| 956 | out: | 1306 | out: |
| 957 | mutex_unlock(&kernfs_mutex); | 1307 | mutex_unlock(&kernfs_mutex); |
| @@ -974,7 +1324,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, | |||
| 974 | struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) | 1324 | struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) |
| 975 | { | 1325 | { |
| 976 | if (pos) { | 1326 | if (pos) { |
| 977 | int valid = !(pos->flags & KERNFS_REMOVED) && | 1327 | int valid = kernfs_active(pos) && |
| 978 | pos->parent == parent && hash == pos->hash; | 1328 | pos->parent == parent && hash == pos->hash; |
| 979 | kernfs_put(pos); | 1329 | kernfs_put(pos); |
| 980 | if (!valid) | 1330 | if (!valid) |
| @@ -993,8 +1343,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, | |||
| 993 | break; | 1343 | break; |
| 994 | } | 1344 | } |
| 995 | } | 1345 | } |
| 996 | /* Skip over entries in the wrong namespace */ | 1346 | /* Skip over entries which are dying/dead or in the wrong namespace */ |
| 997 | while (pos && pos->ns != ns) { | 1347 | while (pos && (!kernfs_active(pos) || pos->ns != ns)) { |
| 998 | struct rb_node *node = rb_next(&pos->rb); | 1348 | struct rb_node *node = rb_next(&pos->rb); |
| 999 | if (!node) | 1349 | if (!node) |
| 1000 | pos = NULL; | 1350 | pos = NULL; |
| @@ -1008,14 +1358,15 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns, | |||
| 1008 | struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) | 1358 | struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) |
| 1009 | { | 1359 | { |
| 1010 | pos = kernfs_dir_pos(ns, parent, ino, pos); | 1360 | pos = kernfs_dir_pos(ns, parent, ino, pos); |
| 1011 | if (pos) | 1361 | if (pos) { |
| 1012 | do { | 1362 | do { |
| 1013 | struct rb_node *node = rb_next(&pos->rb); | 1363 | struct rb_node *node = rb_next(&pos->rb); |
| 1014 | if (!node) | 1364 | if (!node) |
| 1015 | pos = NULL; | 1365 | pos = NULL; |
| 1016 | else | 1366 | else |
| 1017 | pos = rb_to_kn(node); | 1367 | pos = rb_to_kn(node); |
| 1018 | } while (pos && pos->ns != ns); | 1368 | } while (pos && (!kernfs_active(pos) || pos->ns != ns)); |
| 1369 | } | ||
| 1019 | return pos; | 1370 | return pos; |
| 1020 | } | 1371 | } |
| 1021 | 1372 | ||
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index dbf397bfdff2..8034706a7af8 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c | |||
| @@ -252,10 +252,18 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, | |||
| 252 | size_t count, loff_t *ppos) | 252 | size_t count, loff_t *ppos) |
| 253 | { | 253 | { |
| 254 | struct kernfs_open_file *of = kernfs_of(file); | 254 | struct kernfs_open_file *of = kernfs_of(file); |
| 255 | ssize_t len = min_t(size_t, count, PAGE_SIZE); | ||
| 256 | const struct kernfs_ops *ops; | 255 | const struct kernfs_ops *ops; |
| 256 | size_t len; | ||
| 257 | char *buf; | 257 | char *buf; |
| 258 | 258 | ||
| 259 | if (of->atomic_write_len) { | ||
| 260 | len = count; | ||
| 261 | if (len > of->atomic_write_len) | ||
| 262 | return -E2BIG; | ||
| 263 | } else { | ||
| 264 | len = min_t(size_t, count, PAGE_SIZE); | ||
| 265 | } | ||
| 266 | |||
| 259 | buf = kmalloc(len + 1, GFP_KERNEL); | 267 | buf = kmalloc(len + 1, GFP_KERNEL); |
| 260 | if (!buf) | 268 | if (!buf) |
| 261 | return -ENOMEM; | 269 | return -ENOMEM; |
| @@ -653,6 +661,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) | |||
| 653 | of->file = file; | 661 | of->file = file; |
| 654 | 662 | ||
| 655 | /* | 663 | /* |
| 664 | * Write path needs to atomic_write_len outside active reference. | ||
| 665 | * Cache it in open_file. See kernfs_fop_write() for details. | ||
| 666 | */ | ||
| 667 | of->atomic_write_len = ops->atomic_write_len; | ||
| 668 | |||
| 669 | /* | ||
| 656 | * Always instantiate seq_file even if read access doesn't use | 670 | * Always instantiate seq_file even if read access doesn't use |
| 657 | * seq_file or is not requested. This unifies private data access | 671 | * seq_file or is not requested. This unifies private data access |
| 658 | * and readable regular files are the vast majority anyway. | 672 | * and readable regular files are the vast majority anyway. |
| @@ -820,7 +834,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | |||
| 820 | bool name_is_static, | 834 | bool name_is_static, |
| 821 | struct lock_class_key *key) | 835 | struct lock_class_key *key) |
| 822 | { | 836 | { |
| 823 | struct kernfs_addrm_cxt acxt; | ||
| 824 | struct kernfs_node *kn; | 837 | struct kernfs_node *kn; |
| 825 | unsigned flags; | 838 | unsigned flags; |
| 826 | int rc; | 839 | int rc; |
| @@ -855,10 +868,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | |||
| 855 | if (ops->mmap) | 868 | if (ops->mmap) |
| 856 | kn->flags |= KERNFS_HAS_MMAP; | 869 | kn->flags |= KERNFS_HAS_MMAP; |
| 857 | 870 | ||
| 858 | kernfs_addrm_start(&acxt); | 871 | rc = kernfs_add_one(kn); |
| 859 | rc = kernfs_add_one(&acxt, kn); | ||
| 860 | kernfs_addrm_finish(&acxt); | ||
| 861 | |||
| 862 | if (rc) { | 872 | if (rc) { |
| 863 | kernfs_put(kn); | 873 | kernfs_put(kn); |
| 864 | return ERR_PTR(rc); | 874 | return ERR_PTR(rc); |
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index e55126f85bd2..abb0f1f53d93 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c | |||
| @@ -355,7 +355,7 @@ void kernfs_evict_inode(struct inode *inode) | |||
| 355 | { | 355 | { |
| 356 | struct kernfs_node *kn = inode->i_private; | 356 | struct kernfs_node *kn = inode->i_private; |
| 357 | 357 | ||
| 358 | truncate_inode_pages(&inode->i_data, 0); | 358 | truncate_inode_pages_final(&inode->i_data); |
| 359 | clear_inode(inode); | 359 | clear_inode(inode); |
| 360 | kernfs_put(kn); | 360 | kernfs_put(kn); |
| 361 | } | 361 | } |
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index eb536b76374a..8be13b2a079b 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h | |||
| @@ -26,7 +26,8 @@ struct kernfs_iattrs { | |||
| 26 | struct simple_xattrs xattrs; | 26 | struct simple_xattrs xattrs; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | #define KN_DEACTIVATED_BIAS INT_MIN | 29 | /* +1 to avoid triggering overflow warning when negating it */ |
| 30 | #define KN_DEACTIVATED_BIAS (INT_MIN + 1) | ||
| 30 | 31 | ||
| 31 | /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ | 32 | /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ |
| 32 | 33 | ||
| @@ -45,13 +46,6 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) | |||
| 45 | } | 46 | } |
| 46 | 47 | ||
| 47 | /* | 48 | /* |
| 48 | * Context structure to be used while adding/removing nodes. | ||
| 49 | */ | ||
| 50 | struct kernfs_addrm_cxt { | ||
| 51 | struct kernfs_node *removed; | ||
| 52 | }; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * mount.c | 49 | * mount.c |
| 56 | */ | 50 | */ |
| 57 | struct kernfs_super_info { | 51 | struct kernfs_super_info { |
| @@ -71,6 +65,7 @@ struct kernfs_super_info { | |||
| 71 | }; | 65 | }; |
| 72 | #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) | 66 | #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) |
| 73 | 67 | ||
| 68 | extern const struct super_operations kernfs_sops; | ||
| 74 | extern struct kmem_cache *kernfs_node_cache; | 69 | extern struct kmem_cache *kernfs_node_cache; |
| 75 | 70 | ||
| 76 | /* | 71 | /* |
| @@ -100,9 +95,7 @@ extern const struct inode_operations kernfs_dir_iops; | |||
| 100 | 95 | ||
| 101 | struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); | 96 | struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); |
| 102 | void kernfs_put_active(struct kernfs_node *kn); | 97 | void kernfs_put_active(struct kernfs_node *kn); |
| 103 | void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt); | 98 | int kernfs_add_one(struct kernfs_node *kn); |
| 104 | int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn); | ||
| 105 | void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt); | ||
| 106 | struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, | 99 | struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, |
| 107 | const char *name, umode_t mode, | 100 | const char *name, umode_t mode, |
| 108 | unsigned flags); | 101 | unsigned flags); |
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 0f4152defe7b..6a5f04ac8704 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c | |||
| @@ -19,12 +19,49 @@ | |||
| 19 | 19 | ||
| 20 | struct kmem_cache *kernfs_node_cache; | 20 | struct kmem_cache *kernfs_node_cache; |
| 21 | 21 | ||
| 22 | static const struct super_operations kernfs_sops = { | 22 | static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) |
| 23 | { | ||
| 24 | struct kernfs_root *root = kernfs_info(sb)->root; | ||
| 25 | struct kernfs_syscall_ops *scops = root->syscall_ops; | ||
| 26 | |||
| 27 | if (scops && scops->remount_fs) | ||
| 28 | return scops->remount_fs(root, flags, data); | ||
| 29 | return 0; | ||
| 30 | } | ||
| 31 | |||
| 32 | static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) | ||
| 33 | { | ||
| 34 | struct kernfs_root *root = kernfs_root(dentry->d_fsdata); | ||
| 35 | struct kernfs_syscall_ops *scops = root->syscall_ops; | ||
| 36 | |||
| 37 | if (scops && scops->show_options) | ||
| 38 | return scops->show_options(sf, root); | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | const struct super_operations kernfs_sops = { | ||
| 23 | .statfs = simple_statfs, | 43 | .statfs = simple_statfs, |
| 24 | .drop_inode = generic_delete_inode, | 44 | .drop_inode = generic_delete_inode, |
| 25 | .evict_inode = kernfs_evict_inode, | 45 | .evict_inode = kernfs_evict_inode, |
| 46 | |||
| 47 | .remount_fs = kernfs_sop_remount_fs, | ||
| 48 | .show_options = kernfs_sop_show_options, | ||
| 26 | }; | 49 | }; |
| 27 | 50 | ||
| 51 | /** | ||
| 52 | * kernfs_root_from_sb - determine kernfs_root associated with a super_block | ||
| 53 | * @sb: the super_block in question | ||
| 54 | * | ||
| 55 | * Return the kernfs_root associated with @sb. If @sb is not a kernfs one, | ||
| 56 | * %NULL is returned. | ||
| 57 | */ | ||
| 58 | struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) | ||
| 59 | { | ||
| 60 | if (sb->s_op == &kernfs_sops) | ||
| 61 | return kernfs_info(sb)->root; | ||
| 62 | return NULL; | ||
| 63 | } | ||
| 64 | |||
| 28 | static int kernfs_fill_super(struct super_block *sb) | 65 | static int kernfs_fill_super(struct super_block *sb) |
| 29 | { | 66 | { |
| 30 | struct kernfs_super_info *info = kernfs_info(sb); | 67 | struct kernfs_super_info *info = kernfs_info(sb); |
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 4d457055acb9..8a198898e39a 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c | |||
| @@ -27,7 +27,6 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, | |||
| 27 | struct kernfs_node *target) | 27 | struct kernfs_node *target) |
| 28 | { | 28 | { |
| 29 | struct kernfs_node *kn; | 29 | struct kernfs_node *kn; |
| 30 | struct kernfs_addrm_cxt acxt; | ||
| 31 | int error; | 30 | int error; |
| 32 | 31 | ||
| 33 | kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); | 32 | kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); |
| @@ -39,10 +38,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, | |||
| 39 | kn->symlink.target_kn = target; | 38 | kn->symlink.target_kn = target; |
| 40 | kernfs_get(target); /* ref owned by symlink */ | 39 | kernfs_get(target); /* ref owned by symlink */ |
| 41 | 40 | ||
| 42 | kernfs_addrm_start(&acxt); | 41 | error = kernfs_add_one(kn); |
| 43 | error = kernfs_add_one(&acxt, kn); | ||
| 44 | kernfs_addrm_finish(&acxt); | ||
| 45 | |||
| 46 | if (!error) | 42 | if (!error) |
| 47 | return kn; | 43 | return kn; |
| 48 | 44 | ||
diff --git a/fs/locks.c b/fs/locks.c index 92a0f0a52b06..13fc7a6d380a 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -135,6 +135,7 @@ | |||
| 135 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) | 135 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) |
| 136 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) | 136 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) |
| 137 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) | 137 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) |
| 138 | #define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) | ||
| 138 | 139 | ||
| 139 | static bool lease_breaking(struct file_lock *fl) | 140 | static bool lease_breaking(struct file_lock *fl) |
| 140 | { | 141 | { |
| @@ -344,48 +345,43 @@ static int assign_type(struct file_lock *fl, long type) | |||
| 344 | return 0; | 345 | return 0; |
| 345 | } | 346 | } |
| 346 | 347 | ||
| 347 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX | 348 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, |
| 348 | * style lock. | 349 | struct flock64 *l) |
| 349 | */ | ||
| 350 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | ||
| 351 | struct flock *l) | ||
| 352 | { | 350 | { |
| 353 | off_t start, end; | ||
| 354 | |||
| 355 | switch (l->l_whence) { | 351 | switch (l->l_whence) { |
| 356 | case SEEK_SET: | 352 | case SEEK_SET: |
| 357 | start = 0; | 353 | fl->fl_start = 0; |
| 358 | break; | 354 | break; |
| 359 | case SEEK_CUR: | 355 | case SEEK_CUR: |
| 360 | start = filp->f_pos; | 356 | fl->fl_start = filp->f_pos; |
| 361 | break; | 357 | break; |
| 362 | case SEEK_END: | 358 | case SEEK_END: |
| 363 | start = i_size_read(file_inode(filp)); | 359 | fl->fl_start = i_size_read(file_inode(filp)); |
| 364 | break; | 360 | break; |
| 365 | default: | 361 | default: |
| 366 | return -EINVAL; | 362 | return -EINVAL; |
| 367 | } | 363 | } |
| 364 | if (l->l_start > OFFSET_MAX - fl->fl_start) | ||
| 365 | return -EOVERFLOW; | ||
| 366 | fl->fl_start += l->l_start; | ||
| 367 | if (fl->fl_start < 0) | ||
| 368 | return -EINVAL; | ||
| 368 | 369 | ||
| 369 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; | 370 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; |
| 370 | POSIX-2001 defines it. */ | 371 | POSIX-2001 defines it. */ |
| 371 | start += l->l_start; | ||
| 372 | if (start < 0) | ||
| 373 | return -EINVAL; | ||
| 374 | fl->fl_end = OFFSET_MAX; | ||
| 375 | if (l->l_len > 0) { | 372 | if (l->l_len > 0) { |
| 376 | end = start + l->l_len - 1; | 373 | if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) |
| 377 | fl->fl_end = end; | 374 | return -EOVERFLOW; |
| 375 | fl->fl_end = fl->fl_start + l->l_len - 1; | ||
| 376 | |||
| 378 | } else if (l->l_len < 0) { | 377 | } else if (l->l_len < 0) { |
| 379 | end = start - 1; | 378 | if (fl->fl_start + l->l_len < 0) |
| 380 | fl->fl_end = end; | ||
| 381 | start += l->l_len; | ||
| 382 | if (start < 0) | ||
| 383 | return -EINVAL; | 379 | return -EINVAL; |
| 384 | } | 380 | fl->fl_end = fl->fl_start - 1; |
| 385 | fl->fl_start = start; /* we record the absolute position */ | 381 | fl->fl_start += l->l_len; |
| 386 | if (fl->fl_end < fl->fl_start) | 382 | } else |
| 387 | return -EOVERFLOW; | 383 | fl->fl_end = OFFSET_MAX; |
| 388 | 384 | ||
| 389 | fl->fl_owner = current->files; | 385 | fl->fl_owner = current->files; |
| 390 | fl->fl_pid = current->tgid; | 386 | fl->fl_pid = current->tgid; |
| 391 | fl->fl_file = filp; | 387 | fl->fl_file = filp; |
| @@ -393,55 +389,36 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | |||
| 393 | fl->fl_ops = NULL; | 389 | fl->fl_ops = NULL; |
| 394 | fl->fl_lmops = NULL; | 390 | fl->fl_lmops = NULL; |
| 395 | 391 | ||
| 396 | return assign_type(fl, l->l_type); | 392 | /* Ensure that fl->fl_filp has compatible f_mode */ |
| 397 | } | 393 | switch (l->l_type) { |
| 398 | 394 | case F_RDLCK: | |
| 399 | #if BITS_PER_LONG == 32 | 395 | if (!(filp->f_mode & FMODE_READ)) |
| 400 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, | 396 | return -EBADF; |
| 401 | struct flock64 *l) | ||
| 402 | { | ||
| 403 | loff_t start; | ||
| 404 | |||
| 405 | switch (l->l_whence) { | ||
| 406 | case SEEK_SET: | ||
| 407 | start = 0; | ||
| 408 | break; | ||
| 409 | case SEEK_CUR: | ||
| 410 | start = filp->f_pos; | ||
| 411 | break; | 397 | break; |
| 412 | case SEEK_END: | 398 | case F_WRLCK: |
| 413 | start = i_size_read(file_inode(filp)); | 399 | if (!(filp->f_mode & FMODE_WRITE)) |
| 400 | return -EBADF; | ||
| 414 | break; | 401 | break; |
| 415 | default: | ||
| 416 | return -EINVAL; | ||
| 417 | } | 402 | } |
| 418 | 403 | ||
| 419 | start += l->l_start; | ||
| 420 | if (start < 0) | ||
| 421 | return -EINVAL; | ||
| 422 | fl->fl_end = OFFSET_MAX; | ||
| 423 | if (l->l_len > 0) { | ||
| 424 | fl->fl_end = start + l->l_len - 1; | ||
| 425 | } else if (l->l_len < 0) { | ||
| 426 | fl->fl_end = start - 1; | ||
| 427 | start += l->l_len; | ||
| 428 | if (start < 0) | ||
| 429 | return -EINVAL; | ||
| 430 | } | ||
| 431 | fl->fl_start = start; /* we record the absolute position */ | ||
| 432 | if (fl->fl_end < fl->fl_start) | ||
| 433 | return -EOVERFLOW; | ||
| 434 | |||
| 435 | fl->fl_owner = current->files; | ||
| 436 | fl->fl_pid = current->tgid; | ||
| 437 | fl->fl_file = filp; | ||
| 438 | fl->fl_flags = FL_POSIX; | ||
| 439 | fl->fl_ops = NULL; | ||
| 440 | fl->fl_lmops = NULL; | ||
| 441 | |||
| 442 | return assign_type(fl, l->l_type); | 404 | return assign_type(fl, l->l_type); |
| 443 | } | 405 | } |
| 444 | #endif | 406 | |
| 407 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX | ||
| 408 | * style lock. | ||
| 409 | */ | ||
| 410 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | ||
| 411 | struct flock *l) | ||
| 412 | { | ||
| 413 | struct flock64 ll = { | ||
| 414 | .l_type = l->l_type, | ||
| 415 | .l_whence = l->l_whence, | ||
| 416 | .l_start = l->l_start, | ||
| 417 | .l_len = l->l_len, | ||
| 418 | }; | ||
| 419 | |||
| 420 | return flock64_to_posix_lock(filp, fl, &ll); | ||
| 421 | } | ||
| 445 | 422 | ||
| 446 | /* default lease lock manager operations */ | 423 | /* default lease lock manager operations */ |
| 447 | static void lease_break_callback(struct file_lock *fl) | 424 | static void lease_break_callback(struct file_lock *fl) |
| @@ -511,8 +488,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
| 511 | } | 488 | } |
| 512 | 489 | ||
| 513 | /* Must be called with the i_lock held! */ | 490 | /* Must be called with the i_lock held! */ |
| 514 | static inline void | 491 | static void locks_insert_global_locks(struct file_lock *fl) |
| 515 | locks_insert_global_locks(struct file_lock *fl) | ||
| 516 | { | 492 | { |
| 517 | lg_local_lock(&file_lock_lglock); | 493 | lg_local_lock(&file_lock_lglock); |
| 518 | fl->fl_link_cpu = smp_processor_id(); | 494 | fl->fl_link_cpu = smp_processor_id(); |
| @@ -521,8 +497,7 @@ locks_insert_global_locks(struct file_lock *fl) | |||
| 521 | } | 497 | } |
| 522 | 498 | ||
| 523 | /* Must be called with the i_lock held! */ | 499 | /* Must be called with the i_lock held! */ |
| 524 | static inline void | 500 | static void locks_delete_global_locks(struct file_lock *fl) |
| 525 | locks_delete_global_locks(struct file_lock *fl) | ||
| 526 | { | 501 | { |
| 527 | /* | 502 | /* |
| 528 | * Avoid taking lock if already unhashed. This is safe since this check | 503 | * Avoid taking lock if already unhashed. This is safe since this check |
| @@ -544,14 +519,12 @@ posix_owner_key(struct file_lock *fl) | |||
| 544 | return (unsigned long)fl->fl_owner; | 519 | return (unsigned long)fl->fl_owner; |
| 545 | } | 520 | } |
| 546 | 521 | ||
| 547 | static inline void | 522 | static void locks_insert_global_blocked(struct file_lock *waiter) |
| 548 | locks_insert_global_blocked(struct file_lock *waiter) | ||
| 549 | { | 523 | { |
| 550 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); | 524 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); |
| 551 | } | 525 | } |
| 552 | 526 | ||
| 553 | static inline void | 527 | static void locks_delete_global_blocked(struct file_lock *waiter) |
| 554 | locks_delete_global_blocked(struct file_lock *waiter) | ||
| 555 | { | 528 | { |
| 556 | hash_del(&waiter->fl_link); | 529 | hash_del(&waiter->fl_link); |
| 557 | } | 530 | } |
| @@ -581,7 +554,7 @@ static void locks_delete_block(struct file_lock *waiter) | |||
| 581 | * it seems like the reasonable thing to do. | 554 | * it seems like the reasonable thing to do. |
| 582 | * | 555 | * |
| 583 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block | 556 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block |
| 584 | * list itself is protected by the file_lock_list, but by ensuring that the | 557 | * list itself is protected by the blocked_lock_lock, but by ensuring that the |
| 585 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock | 558 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock |
| 586 | * in some cases when we see that the fl_block list is empty. | 559 | * in some cases when we see that the fl_block list is empty. |
| 587 | */ | 560 | */ |
| @@ -591,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker, | |||
| 591 | BUG_ON(!list_empty(&waiter->fl_block)); | 564 | BUG_ON(!list_empty(&waiter->fl_block)); |
| 592 | waiter->fl_next = blocker; | 565 | waiter->fl_next = blocker; |
| 593 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | 566 | list_add_tail(&waiter->fl_block, &blocker->fl_block); |
| 594 | if (IS_POSIX(blocker)) | 567 | if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) |
| 595 | locks_insert_global_blocked(waiter); | 568 | locks_insert_global_blocked(waiter); |
| 596 | } | 569 | } |
| 597 | 570 | ||
| @@ -652,15 +625,18 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
| 652 | locks_insert_global_locks(fl); | 625 | locks_insert_global_locks(fl); |
| 653 | } | 626 | } |
| 654 | 627 | ||
| 655 | /* | 628 | /** |
| 656 | * Delete a lock and then free it. | 629 | * locks_delete_lock - Delete a lock and then free it. |
| 657 | * Wake up processes that are blocked waiting for this lock, | 630 | * @thisfl_p: pointer that points to the fl_next field of the previous |
| 658 | * notify the FS that the lock has been cleared and | 631 | * inode->i_flock list entry |
| 659 | * finally free the lock. | 632 | * |
| 633 | * Unlink a lock from all lists and free the namespace reference, but don't | ||
| 634 | * free it yet. Wake up processes that are blocked waiting for this lock and | ||
| 635 | * notify the FS that the lock has been cleared. | ||
| 660 | * | 636 | * |
| 661 | * Must be called with the i_lock held! | 637 | * Must be called with the i_lock held! |
| 662 | */ | 638 | */ |
| 663 | static void locks_delete_lock(struct file_lock **thisfl_p) | 639 | static void locks_unlink_lock(struct file_lock **thisfl_p) |
| 664 | { | 640 | { |
| 665 | struct file_lock *fl = *thisfl_p; | 641 | struct file_lock *fl = *thisfl_p; |
| 666 | 642 | ||
| @@ -675,6 +651,18 @@ static void locks_delete_lock(struct file_lock **thisfl_p) | |||
| 675 | } | 651 | } |
| 676 | 652 | ||
| 677 | locks_wake_up_blocks(fl); | 653 | locks_wake_up_blocks(fl); |
| 654 | } | ||
| 655 | |||
| 656 | /* | ||
| 657 | * Unlink a lock from all lists and free it. | ||
| 658 | * | ||
| 659 | * Must be called with i_lock held! | ||
| 660 | */ | ||
| 661 | static void locks_delete_lock(struct file_lock **thisfl_p) | ||
| 662 | { | ||
| 663 | struct file_lock *fl = *thisfl_p; | ||
| 664 | |||
| 665 | locks_unlink_lock(thisfl_p); | ||
| 678 | locks_free_lock(fl); | 666 | locks_free_lock(fl); |
| 679 | } | 667 | } |
| 680 | 668 | ||
| @@ -769,8 +757,16 @@ EXPORT_SYMBOL(posix_test_lock); | |||
| 769 | * Note: the above assumption may not be true when handling lock | 757 | * Note: the above assumption may not be true when handling lock |
| 770 | * requests from a broken NFS client. It may also fail in the presence | 758 | * requests from a broken NFS client. It may also fail in the presence |
| 771 | * of tasks (such as posix threads) sharing the same open file table. | 759 | * of tasks (such as posix threads) sharing the same open file table. |
| 772 | * | ||
| 773 | * To handle those cases, we just bail out after a few iterations. | 760 | * To handle those cases, we just bail out after a few iterations. |
| 761 | * | ||
| 762 | * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. | ||
| 763 | * Because the owner is not even nominally tied to a thread of | ||
| 764 | * execution, the deadlock detection below can't reasonably work well. Just | ||
| 765 | * skip it for those. | ||
| 766 | * | ||
| 767 | * In principle, we could do a more limited deadlock detection on FL_FILE_PVT | ||
| 768 | * locks that just checks for the case where two tasks are attempting to | ||
| 769 | * upgrade from read to write locks on the same inode. | ||
| 774 | */ | 770 | */ |
| 775 | 771 | ||
| 776 | #define MAX_DEADLK_ITERATIONS 10 | 772 | #define MAX_DEADLK_ITERATIONS 10 |
| @@ -793,6 +789,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, | |||
| 793 | { | 789 | { |
| 794 | int i = 0; | 790 | int i = 0; |
| 795 | 791 | ||
| 792 | /* | ||
| 793 | * This deadlock detector can't reasonably detect deadlocks with | ||
| 794 | * FL_FILE_PVT locks, since they aren't owned by a process, per-se. | ||
| 795 | */ | ||
| 796 | if (IS_FILE_PVT(caller_fl)) | ||
| 797 | return 0; | ||
| 798 | |||
| 796 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { | 799 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { |
| 797 | if (i++ > MAX_DEADLK_ITERATIONS) | 800 | if (i++ > MAX_DEADLK_ITERATIONS) |
| 798 | return 0; | 801 | return 0; |
| @@ -1152,13 +1155,14 @@ EXPORT_SYMBOL(posix_lock_file_wait); | |||
| 1152 | 1155 | ||
| 1153 | /** | 1156 | /** |
| 1154 | * locks_mandatory_locked - Check for an active lock | 1157 | * locks_mandatory_locked - Check for an active lock |
| 1155 | * @inode: the file to check | 1158 | * @file: the file to check |
| 1156 | * | 1159 | * |
| 1157 | * Searches the inode's list of locks to find any POSIX locks which conflict. | 1160 | * Searches the inode's list of locks to find any POSIX locks which conflict. |
| 1158 | * This function is called from locks_verify_locked() only. | 1161 | * This function is called from locks_verify_locked() only. |
| 1159 | */ | 1162 | */ |
| 1160 | int locks_mandatory_locked(struct inode *inode) | 1163 | int locks_mandatory_locked(struct file *file) |
| 1161 | { | 1164 | { |
| 1165 | struct inode *inode = file_inode(file); | ||
| 1162 | fl_owner_t owner = current->files; | 1166 | fl_owner_t owner = current->files; |
| 1163 | struct file_lock *fl; | 1167 | struct file_lock *fl; |
| 1164 | 1168 | ||
| @@ -1169,7 +1173,7 @@ int locks_mandatory_locked(struct inode *inode) | |||
| 1169 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1173 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
| 1170 | if (!IS_POSIX(fl)) | 1174 | if (!IS_POSIX(fl)) |
| 1171 | continue; | 1175 | continue; |
| 1172 | if (fl->fl_owner != owner) | 1176 | if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) |
| 1173 | break; | 1177 | break; |
| 1174 | } | 1178 | } |
| 1175 | spin_unlock(&inode->i_lock); | 1179 | spin_unlock(&inode->i_lock); |
| @@ -1195,19 +1199,30 @@ int locks_mandatory_area(int read_write, struct inode *inode, | |||
| 1195 | { | 1199 | { |
| 1196 | struct file_lock fl; | 1200 | struct file_lock fl; |
| 1197 | int error; | 1201 | int error; |
| 1202 | bool sleep = false; | ||
| 1198 | 1203 | ||
| 1199 | locks_init_lock(&fl); | 1204 | locks_init_lock(&fl); |
| 1200 | fl.fl_owner = current->files; | ||
| 1201 | fl.fl_pid = current->tgid; | 1205 | fl.fl_pid = current->tgid; |
| 1202 | fl.fl_file = filp; | 1206 | fl.fl_file = filp; |
| 1203 | fl.fl_flags = FL_POSIX | FL_ACCESS; | 1207 | fl.fl_flags = FL_POSIX | FL_ACCESS; |
| 1204 | if (filp && !(filp->f_flags & O_NONBLOCK)) | 1208 | if (filp && !(filp->f_flags & O_NONBLOCK)) |
| 1205 | fl.fl_flags |= FL_SLEEP; | 1209 | sleep = true; |
| 1206 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; | 1210 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; |
| 1207 | fl.fl_start = offset; | 1211 | fl.fl_start = offset; |
| 1208 | fl.fl_end = offset + count - 1; | 1212 | fl.fl_end = offset + count - 1; |
| 1209 | 1213 | ||
| 1210 | for (;;) { | 1214 | for (;;) { |
| 1215 | if (filp) { | ||
| 1216 | fl.fl_owner = (fl_owner_t)filp; | ||
| 1217 | fl.fl_flags &= ~FL_SLEEP; | ||
| 1218 | error = __posix_lock_file(inode, &fl, NULL); | ||
| 1219 | if (!error) | ||
| 1220 | break; | ||
| 1221 | } | ||
| 1222 | |||
| 1223 | if (sleep) | ||
| 1224 | fl.fl_flags |= FL_SLEEP; | ||
| 1225 | fl.fl_owner = current->files; | ||
| 1211 | error = __posix_lock_file(inode, &fl, NULL); | 1226 | error = __posix_lock_file(inode, &fl, NULL); |
| 1212 | if (error != FILE_LOCK_DEFERRED) | 1227 | if (error != FILE_LOCK_DEFERRED) |
| 1213 | break; | 1228 | break; |
| @@ -1472,6 +1487,32 @@ int fcntl_getlease(struct file *filp) | |||
| 1472 | return type; | 1487 | return type; |
| 1473 | } | 1488 | } |
| 1474 | 1489 | ||
| 1490 | /** | ||
| 1491 | * check_conflicting_open - see if the given dentry points to a file that has | ||
| 1492 | * an existing open that would conflict with the | ||
| 1493 | * desired lease. | ||
| 1494 | * @dentry: dentry to check | ||
| 1495 | * @arg: type of lease that we're trying to acquire | ||
| 1496 | * | ||
| 1497 | * Check to see if there's an existing open fd on this file that would | ||
| 1498 | * conflict with the lease we're trying to set. | ||
| 1499 | */ | ||
| 1500 | static int | ||
| 1501 | check_conflicting_open(const struct dentry *dentry, const long arg) | ||
| 1502 | { | ||
| 1503 | int ret = 0; | ||
| 1504 | struct inode *inode = dentry->d_inode; | ||
| 1505 | |||
| 1506 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | ||
| 1507 | return -EAGAIN; | ||
| 1508 | |||
| 1509 | if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || | ||
| 1510 | (atomic_read(&inode->i_count) > 1))) | ||
| 1511 | ret = -EAGAIN; | ||
| 1512 | |||
| 1513 | return ret; | ||
| 1514 | } | ||
| 1515 | |||
| 1475 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | 1516 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) |
| 1476 | { | 1517 | { |
| 1477 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1518 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
| @@ -1499,12 +1540,8 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp | |||
| 1499 | return -EINVAL; | 1540 | return -EINVAL; |
| 1500 | } | 1541 | } |
| 1501 | 1542 | ||
| 1502 | error = -EAGAIN; | 1543 | error = check_conflicting_open(dentry, arg); |
| 1503 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1544 | if (error) |
| 1504 | goto out; | ||
| 1505 | if ((arg == F_WRLCK) | ||
| 1506 | && ((d_count(dentry) > 1) | ||
| 1507 | || (atomic_read(&inode->i_count) > 1))) | ||
| 1508 | goto out; | 1545 | goto out; |
| 1509 | 1546 | ||
| 1510 | /* | 1547 | /* |
| @@ -1549,7 +1586,19 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp | |||
| 1549 | goto out; | 1586 | goto out; |
| 1550 | 1587 | ||
| 1551 | locks_insert_lock(before, lease); | 1588 | locks_insert_lock(before, lease); |
| 1552 | error = 0; | 1589 | /* |
| 1590 | * The check in break_lease() is lockless. It's possible for another | ||
| 1591 | * open to race in after we did the earlier check for a conflicting | ||
| 1592 | * open but before the lease was inserted. Check again for a | ||
| 1593 | * conflicting open and cancel the lease if there is one. | ||
| 1594 | * | ||
| 1595 | * We also add a barrier here to ensure that the insertion of the lock | ||
| 1596 | * precedes these checks. | ||
| 1597 | */ | ||
| 1598 | smp_mb(); | ||
| 1599 | error = check_conflicting_open(dentry, arg); | ||
| 1600 | if (error) | ||
| 1601 | locks_unlink_lock(flp); | ||
| 1553 | out: | 1602 | out: |
| 1554 | if (is_deleg) | 1603 | if (is_deleg) |
| 1555 | mutex_unlock(&inode->i_mutex); | 1604 | mutex_unlock(&inode->i_mutex); |
| @@ -1842,7 +1891,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); | |||
| 1842 | 1891 | ||
| 1843 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) | 1892 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) |
| 1844 | { | 1893 | { |
| 1845 | flock->l_pid = fl->fl_pid; | 1894 | flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; |
| 1846 | #if BITS_PER_LONG == 32 | 1895 | #if BITS_PER_LONG == 32 |
| 1847 | /* | 1896 | /* |
| 1848 | * Make sure we can represent the posix lock via | 1897 | * Make sure we can represent the posix lock via |
| @@ -1864,7 +1913,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) | |||
| 1864 | #if BITS_PER_LONG == 32 | 1913 | #if BITS_PER_LONG == 32 |
| 1865 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) | 1914 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) |
| 1866 | { | 1915 | { |
| 1867 | flock->l_pid = fl->fl_pid; | 1916 | flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; |
| 1868 | flock->l_start = fl->fl_start; | 1917 | flock->l_start = fl->fl_start; |
| 1869 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : | 1918 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : |
| 1870 | fl->fl_end - fl->fl_start + 1; | 1919 | fl->fl_end - fl->fl_start + 1; |
| @@ -1876,7 +1925,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) | |||
| 1876 | /* Report the first existing lock that would conflict with l. | 1925 | /* Report the first existing lock that would conflict with l. |
| 1877 | * This implements the F_GETLK command of fcntl(). | 1926 | * This implements the F_GETLK command of fcntl(). |
| 1878 | */ | 1927 | */ |
| 1879 | int fcntl_getlk(struct file *filp, struct flock __user *l) | 1928 | int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) |
| 1880 | { | 1929 | { |
| 1881 | struct file_lock file_lock; | 1930 | struct file_lock file_lock; |
| 1882 | struct flock flock; | 1931 | struct flock flock; |
| @@ -1893,6 +1942,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) | |||
| 1893 | if (error) | 1942 | if (error) |
| 1894 | goto out; | 1943 | goto out; |
| 1895 | 1944 | ||
| 1945 | if (cmd == F_GETLKP) { | ||
| 1946 | error = -EINVAL; | ||
| 1947 | if (flock.l_pid != 0) | ||
| 1948 | goto out; | ||
| 1949 | |||
| 1950 | cmd = F_GETLK; | ||
| 1951 | file_lock.fl_flags |= FL_FILE_PVT; | ||
| 1952 | file_lock.fl_owner = (fl_owner_t)filp; | ||
| 1953 | } | ||
| 1954 | |||
| 1896 | error = vfs_test_lock(filp, &file_lock); | 1955 | error = vfs_test_lock(filp, &file_lock); |
| 1897 | if (error) | 1956 | if (error) |
| 1898 | goto out; | 1957 | goto out; |
| @@ -2012,25 +2071,32 @@ again: | |||
| 2012 | error = flock_to_posix_lock(filp, file_lock, &flock); | 2071 | error = flock_to_posix_lock(filp, file_lock, &flock); |
| 2013 | if (error) | 2072 | if (error) |
| 2014 | goto out; | 2073 | goto out; |
| 2015 | if (cmd == F_SETLKW) { | 2074 | |
| 2016 | file_lock->fl_flags |= FL_SLEEP; | 2075 | /* |
| 2017 | } | 2076 | * If the cmd is requesting file-private locks, then set the |
| 2018 | 2077 | * FL_FILE_PVT flag and override the owner. | |
| 2019 | error = -EBADF; | 2078 | */ |
| 2020 | switch (flock.l_type) { | 2079 | switch (cmd) { |
| 2021 | case F_RDLCK: | 2080 | case F_SETLKP: |
| 2022 | if (!(filp->f_mode & FMODE_READ)) | 2081 | error = -EINVAL; |
| 2023 | goto out; | 2082 | if (flock.l_pid != 0) |
| 2024 | break; | ||
| 2025 | case F_WRLCK: | ||
| 2026 | if (!(filp->f_mode & FMODE_WRITE)) | ||
| 2027 | goto out; | 2083 | goto out; |
| 2084 | |||
| 2085 | cmd = F_SETLK; | ||
| 2086 | file_lock->fl_flags |= FL_FILE_PVT; | ||
| 2087 | file_lock->fl_owner = (fl_owner_t)filp; | ||
| 2028 | break; | 2088 | break; |
| 2029 | case F_UNLCK: | 2089 | case F_SETLKPW: |
| 2030 | break; | ||
| 2031 | default: | ||
| 2032 | error = -EINVAL; | 2090 | error = -EINVAL; |
| 2033 | goto out; | 2091 | if (flock.l_pid != 0) |
| 2092 | goto out; | ||
| 2093 | |||
| 2094 | cmd = F_SETLKW; | ||
| 2095 | file_lock->fl_flags |= FL_FILE_PVT; | ||
| 2096 | file_lock->fl_owner = (fl_owner_t)filp; | ||
| 2097 | /* Fallthrough */ | ||
| 2098 | case F_SETLKW: | ||
| 2099 | file_lock->fl_flags |= FL_SLEEP; | ||
| 2034 | } | 2100 | } |
| 2035 | 2101 | ||
| 2036 | error = do_lock_file_wait(filp, cmd, file_lock); | 2102 | error = do_lock_file_wait(filp, cmd, file_lock); |
| @@ -2061,7 +2127,7 @@ out: | |||
| 2061 | /* Report the first existing lock that would conflict with l. | 2127 | /* Report the first existing lock that would conflict with l. |
| 2062 | * This implements the F_GETLK command of fcntl(). | 2128 | * This implements the F_GETLK command of fcntl(). |
| 2063 | */ | 2129 | */ |
| 2064 | int fcntl_getlk64(struct file *filp, struct flock64 __user *l) | 2130 | int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) |
| 2065 | { | 2131 | { |
| 2066 | struct file_lock file_lock; | 2132 | struct file_lock file_lock; |
| 2067 | struct flock64 flock; | 2133 | struct flock64 flock; |
| @@ -2078,6 +2144,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) | |||
| 2078 | if (error) | 2144 | if (error) |
| 2079 | goto out; | 2145 | goto out; |
| 2080 | 2146 | ||
| 2147 | if (cmd == F_GETLKP) { | ||
| 2148 | error = -EINVAL; | ||
| 2149 | if (flock.l_pid != 0) | ||
| 2150 | goto out; | ||
| 2151 | |||
| 2152 | cmd = F_GETLK64; | ||
| 2153 | file_lock.fl_flags |= FL_FILE_PVT; | ||
| 2154 | file_lock.fl_owner = (fl_owner_t)filp; | ||
| 2155 | } | ||
| 2156 | |||
| 2081 | error = vfs_test_lock(filp, &file_lock); | 2157 | error = vfs_test_lock(filp, &file_lock); |
| 2082 | if (error) | 2158 | if (error) |
| 2083 | goto out; | 2159 | goto out; |
| @@ -2130,25 +2206,32 @@ again: | |||
| 2130 | error = flock64_to_posix_lock(filp, file_lock, &flock); | 2206 | error = flock64_to_posix_lock(filp, file_lock, &flock); |
| 2131 | if (error) | 2207 | if (error) |
| 2132 | goto out; | 2208 | goto out; |
| 2133 | if (cmd == F_SETLKW64) { | 2209 | |
| 2134 | file_lock->fl_flags |= FL_SLEEP; | 2210 | /* |
| 2135 | } | 2211 | * If the cmd is requesting file-private locks, then set the |
| 2136 | 2212 | * FL_FILE_PVT flag and override the owner. | |
| 2137 | error = -EBADF; | 2213 | */ |
| 2138 | switch (flock.l_type) { | 2214 | switch (cmd) { |
| 2139 | case F_RDLCK: | 2215 | case F_SETLKP: |
| 2140 | if (!(filp->f_mode & FMODE_READ)) | 2216 | error = -EINVAL; |
| 2141 | goto out; | 2217 | if (flock.l_pid != 0) |
| 2142 | break; | ||
| 2143 | case F_WRLCK: | ||
| 2144 | if (!(filp->f_mode & FMODE_WRITE)) | ||
| 2145 | goto out; | 2218 | goto out; |
| 2219 | |||
| 2220 | cmd = F_SETLK64; | ||
| 2221 | file_lock->fl_flags |= FL_FILE_PVT; | ||
| 2222 | file_lock->fl_owner = (fl_owner_t)filp; | ||
| 2146 | break; | 2223 | break; |
| 2147 | case F_UNLCK: | 2224 | case F_SETLKPW: |
| 2148 | break; | ||
| 2149 | default: | ||
| 2150 | error = -EINVAL; | 2225 | error = -EINVAL; |
| 2151 | goto out; | 2226 | if (flock.l_pid != 0) |
| 2227 | goto out; | ||
| 2228 | |||
| 2229 | cmd = F_SETLKW64; | ||
| 2230 | file_lock->fl_flags |= FL_FILE_PVT; | ||
| 2231 | file_lock->fl_owner = (fl_owner_t)filp; | ||
| 2232 | /* Fallthrough */ | ||
| 2233 | case F_SETLKW64: | ||
| 2234 | file_lock->fl_flags |= FL_SLEEP; | ||
| 2152 | } | 2235 | } |
| 2153 | 2236 | ||
| 2154 | error = do_lock_file_wait(filp, cmd, file_lock); | 2237 | error = do_lock_file_wait(filp, cmd, file_lock); |
| @@ -2209,7 +2292,7 @@ EXPORT_SYMBOL(locks_remove_posix); | |||
| 2209 | /* | 2292 | /* |
| 2210 | * This function is called on the last close of an open file. | 2293 | * This function is called on the last close of an open file. |
| 2211 | */ | 2294 | */ |
| 2212 | void locks_remove_flock(struct file *filp) | 2295 | void locks_remove_file(struct file *filp) |
| 2213 | { | 2296 | { |
| 2214 | struct inode * inode = file_inode(filp); | 2297 | struct inode * inode = file_inode(filp); |
| 2215 | struct file_lock *fl; | 2298 | struct file_lock *fl; |
| @@ -2218,6 +2301,8 @@ void locks_remove_flock(struct file *filp) | |||
| 2218 | if (!inode->i_flock) | 2301 | if (!inode->i_flock) |
| 2219 | return; | 2302 | return; |
| 2220 | 2303 | ||
| 2304 | locks_remove_posix(filp, (fl_owner_t)filp); | ||
| 2305 | |||
| 2221 | if (filp->f_op->flock) { | 2306 | if (filp->f_op->flock) { |
| 2222 | struct file_lock fl = { | 2307 | struct file_lock fl = { |
| 2223 | .fl_pid = current->tgid, | 2308 | .fl_pid = current->tgid, |
| @@ -2236,16 +2321,28 @@ void locks_remove_flock(struct file *filp) | |||
| 2236 | 2321 | ||
| 2237 | while ((fl = *before) != NULL) { | 2322 | while ((fl = *before) != NULL) { |
| 2238 | if (fl->fl_file == filp) { | 2323 | if (fl->fl_file == filp) { |
| 2239 | if (IS_FLOCK(fl)) { | ||
| 2240 | locks_delete_lock(before); | ||
| 2241 | continue; | ||
| 2242 | } | ||
| 2243 | if (IS_LEASE(fl)) { | 2324 | if (IS_LEASE(fl)) { |
| 2244 | lease_modify(before, F_UNLCK); | 2325 | lease_modify(before, F_UNLCK); |
| 2245 | continue; | 2326 | continue; |
| 2246 | } | 2327 | } |
| 2247 | /* What? */ | 2328 | |
| 2248 | BUG(); | 2329 | /* |
| 2330 | * There's a leftover lock on the list of a type that | ||
| 2331 | * we didn't expect to see. Most likely a classic | ||
| 2332 | * POSIX lock that ended up not getting released | ||
| 2333 | * properly, or that raced onto the list somehow. Log | ||
| 2334 | * some info about it and then just remove it from | ||
| 2335 | * the list. | ||
| 2336 | */ | ||
| 2337 | WARN(!IS_FLOCK(fl), | ||
| 2338 | "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", | ||
| 2339 | MAJOR(inode->i_sb->s_dev), | ||
| 2340 | MINOR(inode->i_sb->s_dev), inode->i_ino, | ||
| 2341 | fl->fl_type, fl->fl_flags, | ||
| 2342 | fl->fl_start, fl->fl_end); | ||
| 2343 | |||
| 2344 | locks_delete_lock(before); | ||
| 2345 | continue; | ||
| 2249 | } | 2346 | } |
| 2250 | before = &fl->fl_next; | 2347 | before = &fl->fl_next; |
| 2251 | } | 2348 | } |
| @@ -2314,8 +2411,14 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
| 2314 | 2411 | ||
| 2315 | seq_printf(f, "%lld:%s ", id, pfx); | 2412 | seq_printf(f, "%lld:%s ", id, pfx); |
| 2316 | if (IS_POSIX(fl)) { | 2413 | if (IS_POSIX(fl)) { |
| 2317 | seq_printf(f, "%6s %s ", | 2414 | if (fl->fl_flags & FL_ACCESS) |
| 2318 | (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", | 2415 | seq_printf(f, "ACCESS"); |
| 2416 | else if (IS_FILE_PVT(fl)) | ||
| 2417 | seq_printf(f, "FLPVT "); | ||
| 2418 | else | ||
| 2419 | seq_printf(f, "POSIX "); | ||
| 2420 | |||
| 2421 | seq_printf(f, " %s ", | ||
| 2319 | (inode == NULL) ? "*NOINODE*" : | 2422 | (inode == NULL) ? "*NOINODE*" : |
| 2320 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); | 2423 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); |
| 2321 | } else if (IS_FLOCK(fl)) { | 2424 | } else if (IS_FLOCK(fl)) { |
| @@ -2385,6 +2488,7 @@ static int locks_show(struct seq_file *f, void *v) | |||
| 2385 | } | 2488 | } |
| 2386 | 2489 | ||
| 2387 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2490 | static void *locks_start(struct seq_file *f, loff_t *pos) |
| 2491 | __acquires(&blocked_lock_lock) | ||
| 2388 | { | 2492 | { |
| 2389 | struct locks_iterator *iter = f->private; | 2493 | struct locks_iterator *iter = f->private; |
| 2390 | 2494 | ||
| @@ -2403,6 +2507,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | |||
| 2403 | } | 2507 | } |
| 2404 | 2508 | ||
| 2405 | static void locks_stop(struct seq_file *f, void *v) | 2509 | static void locks_stop(struct seq_file *f, void *v) |
| 2510 | __releases(&blocked_lock_lock) | ||
| 2406 | { | 2511 | { |
| 2407 | spin_unlock(&blocked_lock_lock); | 2512 | spin_unlock(&blocked_lock_lock); |
| 2408 | lg_global_unlock(&file_lock_lglock); | 2513 | lg_global_unlock(&file_lock_lglock); |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 9a59cbade2fb..48140315f627 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
| @@ -2180,7 +2180,7 @@ void logfs_evict_inode(struct inode *inode) | |||
| 2180 | do_delete_inode(inode); | 2180 | do_delete_inode(inode); |
| 2181 | } | 2181 | } |
| 2182 | } | 2182 | } |
| 2183 | truncate_inode_pages(&inode->i_data, 0); | 2183 | truncate_inode_pages_final(&inode->i_data); |
| 2184 | clear_inode(inode); | 2184 | clear_inode(inode); |
| 2185 | 2185 | ||
| 2186 | /* Cheaper version of write_inode. All changes are concealed in | 2186 | /* Cheaper version of write_inode. All changes are concealed in |
diff --git a/fs/mbcache.c b/fs/mbcache.c index e519e45bf673..bf166e388f0d 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -26,6 +26,41 @@ | |||
| 26 | * back on the lru list. | 26 | * back on the lru list. |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | /* | ||
| 30 | * Lock descriptions and usage: | ||
| 31 | * | ||
| 32 | * Each hash chain of both the block and index hash tables now contains | ||
| 33 | * a built-in lock used to serialize accesses to the hash chain. | ||
| 34 | * | ||
| 35 | * Accesses to global data structures mb_cache_list and mb_cache_lru_list | ||
| 36 | * are serialized via the global spinlock mb_cache_spinlock. | ||
| 37 | * | ||
| 38 | * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize | ||
| 39 | * accesses to its local data, such as e_used and e_queued. | ||
| 40 | * | ||
| 41 | * Lock ordering: | ||
| 42 | * | ||
| 43 | * Each block hash chain's lock has the highest lock order, followed by an | ||
| 44 | * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's | ||
| 45 | * lock), and mb_cach_spinlock, with the lowest order. While holding | ||
| 46 | * either a block or index hash chain lock, a thread can acquire an | ||
| 47 | * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. | ||
| 48 | * | ||
| 49 | * Synchronization: | ||
| 50 | * | ||
| 51 | * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and | ||
| 52 | * index hash chian, it needs to lock the corresponding hash chain. For each | ||
| 53 | * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to | ||
| 54 | * prevent either any simultaneous release or free on the entry and also | ||
| 55 | * to serialize accesses to either the e_used or e_queued member of the entry. | ||
| 56 | * | ||
| 57 | * To avoid having a dangling reference to an already freed | ||
| 58 | * mb_cache_entry, an mb_cache_entry is only freed when it is not on a | ||
| 59 | * block hash chain and also no longer being referenced, both e_used, | ||
| 60 | * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is | ||
| 61 | * first removed from a block hash chain. | ||
| 62 | */ | ||
| 63 | |||
| 29 | #include <linux/kernel.h> | 64 | #include <linux/kernel.h> |
| 30 | #include <linux/module.h> | 65 | #include <linux/module.h> |
| 31 | 66 | ||
| @@ -34,9 +69,10 @@ | |||
| 34 | #include <linux/mm.h> | 69 | #include <linux/mm.h> |
| 35 | #include <linux/slab.h> | 70 | #include <linux/slab.h> |
| 36 | #include <linux/sched.h> | 71 | #include <linux/sched.h> |
| 37 | #include <linux/init.h> | 72 | #include <linux/list_bl.h> |
| 38 | #include <linux/mbcache.h> | 73 | #include <linux/mbcache.h> |
| 39 | 74 | #include <linux/init.h> | |
| 75 | #include <linux/blockgroup_lock.h> | ||
| 40 | 76 | ||
| 41 | #ifdef MB_CACHE_DEBUG | 77 | #ifdef MB_CACHE_DEBUG |
| 42 | # define mb_debug(f...) do { \ | 78 | # define mb_debug(f...) do { \ |
| @@ -57,8 +93,14 @@ | |||
| 57 | 93 | ||
| 58 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) | 94 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) |
| 59 | 95 | ||
| 96 | #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) | ||
| 97 | #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ | ||
| 98 | (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) | ||
| 99 | |||
| 60 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); | 100 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); |
| 61 | 101 | static struct blockgroup_lock *mb_cache_bg_lock; | |
| 102 | static struct kmem_cache *mb_cache_kmem_cache; | ||
| 103 | |||
| 62 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); | 104 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); |
| 63 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); | 105 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); |
| 64 | MODULE_LICENSE("GPL"); | 106 | MODULE_LICENSE("GPL"); |
| @@ -86,58 +128,110 @@ static LIST_HEAD(mb_cache_list); | |||
| 86 | static LIST_HEAD(mb_cache_lru_list); | 128 | static LIST_HEAD(mb_cache_lru_list); |
| 87 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 129 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
| 88 | 130 | ||
| 131 | static inline void | ||
| 132 | __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) | ||
| 133 | { | ||
| 134 | spin_lock(bgl_lock_ptr(mb_cache_bg_lock, | ||
| 135 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
| 136 | } | ||
| 137 | |||
| 138 | static inline void | ||
| 139 | __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) | ||
| 140 | { | ||
| 141 | spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, | ||
| 142 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
| 143 | } | ||
| 144 | |||
| 89 | static inline int | 145 | static inline int |
| 90 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 146 | __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) |
| 91 | { | 147 | { |
| 92 | return !list_empty(&ce->e_block_list); | 148 | return !hlist_bl_unhashed(&ce->e_block_list); |
| 93 | } | 149 | } |
| 94 | 150 | ||
| 95 | 151 | ||
| 96 | static void | 152 | static inline void |
| 97 | __mb_cache_entry_unhash(struct mb_cache_entry *ce) | 153 | __mb_cache_entry_unhash_block(struct mb_cache_entry *ce) |
| 98 | { | 154 | { |
| 99 | if (__mb_cache_entry_is_hashed(ce)) { | 155 | if (__mb_cache_entry_is_block_hashed(ce)) |
| 100 | list_del_init(&ce->e_block_list); | 156 | hlist_bl_del_init(&ce->e_block_list); |
| 101 | list_del(&ce->e_index.o_list); | ||
| 102 | } | ||
| 103 | } | 157 | } |
| 104 | 158 | ||
| 159 | static inline int | ||
| 160 | __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) | ||
| 161 | { | ||
| 162 | return !hlist_bl_unhashed(&ce->e_index.o_list); | ||
| 163 | } | ||
| 164 | |||
| 165 | static inline void | ||
| 166 | __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) | ||
| 167 | { | ||
| 168 | if (__mb_cache_entry_is_index_hashed(ce)) | ||
| 169 | hlist_bl_del_init(&ce->e_index.o_list); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * __mb_cache_entry_unhash_unlock() | ||
| 174 | * | ||
| 175 | * This function is called to unhash both the block and index hash | ||
| 176 | * chain. | ||
| 177 | * It assumes both the block and index hash chain is locked upon entry. | ||
| 178 | * It also unlock both hash chains both exit | ||
| 179 | */ | ||
| 180 | static inline void | ||
| 181 | __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) | ||
| 182 | { | ||
| 183 | __mb_cache_entry_unhash_index(ce); | ||
| 184 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 185 | __mb_cache_entry_unhash_block(ce); | ||
| 186 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 187 | } | ||
| 105 | 188 | ||
| 106 | static void | 189 | static void |
| 107 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) | 190 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) |
| 108 | { | 191 | { |
| 109 | struct mb_cache *cache = ce->e_cache; | 192 | struct mb_cache *cache = ce->e_cache; |
| 110 | 193 | ||
| 111 | mb_assert(!(ce->e_used || ce->e_queued)); | 194 | mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); |
| 112 | kmem_cache_free(cache->c_entry_cache, ce); | 195 | kmem_cache_free(cache->c_entry_cache, ce); |
| 113 | atomic_dec(&cache->c_entry_count); | 196 | atomic_dec(&cache->c_entry_count); |
| 114 | } | 197 | } |
| 115 | 198 | ||
| 116 | |||
| 117 | static void | 199 | static void |
| 118 | __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) | 200 | __mb_cache_entry_release(struct mb_cache_entry *ce) |
| 119 | __releases(mb_cache_spinlock) | ||
| 120 | { | 201 | { |
| 202 | /* First lock the entry to serialize access to its local data. */ | ||
| 203 | __spin_lock_mb_cache_entry(ce); | ||
| 121 | /* Wake up all processes queuing for this cache entry. */ | 204 | /* Wake up all processes queuing for this cache entry. */ |
| 122 | if (ce->e_queued) | 205 | if (ce->e_queued) |
| 123 | wake_up_all(&mb_cache_queue); | 206 | wake_up_all(&mb_cache_queue); |
| 124 | if (ce->e_used >= MB_CACHE_WRITER) | 207 | if (ce->e_used >= MB_CACHE_WRITER) |
| 125 | ce->e_used -= MB_CACHE_WRITER; | 208 | ce->e_used -= MB_CACHE_WRITER; |
| 209 | /* | ||
| 210 | * Make sure that all cache entries on lru_list have | ||
| 211 | * both e_used and e_qued of 0s. | ||
| 212 | */ | ||
| 126 | ce->e_used--; | 213 | ce->e_used--; |
| 127 | if (!(ce->e_used || ce->e_queued)) { | 214 | if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { |
| 128 | if (!__mb_cache_entry_is_hashed(ce)) | 215 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
| 216 | __spin_unlock_mb_cache_entry(ce); | ||
| 129 | goto forget; | 217 | goto forget; |
| 130 | mb_assert(list_empty(&ce->e_lru_list)); | 218 | } |
| 131 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | 219 | /* |
| 220 | * Need access to lru list, first drop entry lock, | ||
| 221 | * then reacquire the lock in the proper order. | ||
| 222 | */ | ||
| 223 | spin_lock(&mb_cache_spinlock); | ||
| 224 | if (list_empty(&ce->e_lru_list)) | ||
| 225 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | ||
| 226 | spin_unlock(&mb_cache_spinlock); | ||
| 132 | } | 227 | } |
| 133 | spin_unlock(&mb_cache_spinlock); | 228 | __spin_unlock_mb_cache_entry(ce); |
| 134 | return; | 229 | return; |
| 135 | forget: | 230 | forget: |
| 136 | spin_unlock(&mb_cache_spinlock); | 231 | mb_assert(list_empty(&ce->e_lru_list)); |
| 137 | __mb_cache_entry_forget(ce, GFP_KERNEL); | 232 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
| 138 | } | 233 | } |
| 139 | 234 | ||
| 140 | |||
| 141 | /* | 235 | /* |
| 142 | * mb_cache_shrink_scan() memory pressure callback | 236 | * mb_cache_shrink_scan() memory pressure callback |
| 143 | * | 237 | * |
| @@ -160,17 +254,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
| 160 | 254 | ||
| 161 | mb_debug("trying to free %d entries", nr_to_scan); | 255 | mb_debug("trying to free %d entries", nr_to_scan); |
| 162 | spin_lock(&mb_cache_spinlock); | 256 | spin_lock(&mb_cache_spinlock); |
| 163 | while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { | 257 | while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { |
| 164 | struct mb_cache_entry *ce = | 258 | struct mb_cache_entry *ce = |
| 165 | list_entry(mb_cache_lru_list.next, | 259 | list_entry(mb_cache_lru_list.next, |
| 166 | struct mb_cache_entry, e_lru_list); | 260 | struct mb_cache_entry, e_lru_list); |
| 167 | list_move_tail(&ce->e_lru_list, &free_list); | 261 | list_del_init(&ce->e_lru_list); |
| 168 | __mb_cache_entry_unhash(ce); | 262 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) |
| 169 | freed++; | 263 | continue; |
| 264 | spin_unlock(&mb_cache_spinlock); | ||
| 265 | /* Prevent any find or get operation on the entry */ | ||
| 266 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 267 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 268 | /* Ignore if it is touched by a find/get */ | ||
| 269 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || | ||
| 270 | !list_empty(&ce->e_lru_list)) { | ||
| 271 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 272 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 273 | spin_lock(&mb_cache_spinlock); | ||
| 274 | continue; | ||
| 275 | } | ||
| 276 | __mb_cache_entry_unhash_unlock(ce); | ||
| 277 | list_add_tail(&ce->e_lru_list, &free_list); | ||
| 278 | spin_lock(&mb_cache_spinlock); | ||
| 170 | } | 279 | } |
| 171 | spin_unlock(&mb_cache_spinlock); | 280 | spin_unlock(&mb_cache_spinlock); |
| 281 | |||
| 172 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | 282 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { |
| 173 | __mb_cache_entry_forget(entry, gfp_mask); | 283 | __mb_cache_entry_forget(entry, gfp_mask); |
| 284 | freed++; | ||
| 174 | } | 285 | } |
| 175 | return freed; | 286 | return freed; |
| 176 | } | 287 | } |
| @@ -215,29 +326,40 @@ mb_cache_create(const char *name, int bucket_bits) | |||
| 215 | int n, bucket_count = 1 << bucket_bits; | 326 | int n, bucket_count = 1 << bucket_bits; |
| 216 | struct mb_cache *cache = NULL; | 327 | struct mb_cache *cache = NULL; |
| 217 | 328 | ||
| 329 | if (!mb_cache_bg_lock) { | ||
| 330 | mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), | ||
| 331 | GFP_KERNEL); | ||
| 332 | if (!mb_cache_bg_lock) | ||
| 333 | return NULL; | ||
| 334 | bgl_lock_init(mb_cache_bg_lock); | ||
| 335 | } | ||
| 336 | |||
| 218 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); | 337 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); |
| 219 | if (!cache) | 338 | if (!cache) |
| 220 | return NULL; | 339 | return NULL; |
| 221 | cache->c_name = name; | 340 | cache->c_name = name; |
| 222 | atomic_set(&cache->c_entry_count, 0); | 341 | atomic_set(&cache->c_entry_count, 0); |
| 223 | cache->c_bucket_bits = bucket_bits; | 342 | cache->c_bucket_bits = bucket_bits; |
| 224 | cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), | 343 | cache->c_block_hash = kmalloc(bucket_count * |
| 225 | GFP_KERNEL); | 344 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
| 226 | if (!cache->c_block_hash) | 345 | if (!cache->c_block_hash) |
| 227 | goto fail; | 346 | goto fail; |
| 228 | for (n=0; n<bucket_count; n++) | 347 | for (n=0; n<bucket_count; n++) |
| 229 | INIT_LIST_HEAD(&cache->c_block_hash[n]); | 348 | INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); |
| 230 | cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), | 349 | cache->c_index_hash = kmalloc(bucket_count * |
| 231 | GFP_KERNEL); | 350 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
| 232 | if (!cache->c_index_hash) | 351 | if (!cache->c_index_hash) |
| 233 | goto fail; | 352 | goto fail; |
| 234 | for (n=0; n<bucket_count; n++) | 353 | for (n=0; n<bucket_count; n++) |
| 235 | INIT_LIST_HEAD(&cache->c_index_hash[n]); | 354 | INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); |
| 236 | cache->c_entry_cache = kmem_cache_create(name, | 355 | if (!mb_cache_kmem_cache) { |
| 237 | sizeof(struct mb_cache_entry), 0, | 356 | mb_cache_kmem_cache = kmem_cache_create(name, |
| 238 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); | 357 | sizeof(struct mb_cache_entry), 0, |
| 239 | if (!cache->c_entry_cache) | 358 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); |
| 240 | goto fail2; | 359 | if (!mb_cache_kmem_cache) |
| 360 | goto fail2; | ||
| 361 | } | ||
| 362 | cache->c_entry_cache = mb_cache_kmem_cache; | ||
| 241 | 363 | ||
| 242 | /* | 364 | /* |
| 243 | * Set an upper limit on the number of cache entries so that the hash | 365 | * Set an upper limit on the number of cache entries so that the hash |
| @@ -273,21 +395,47 @@ void | |||
| 273 | mb_cache_shrink(struct block_device *bdev) | 395 | mb_cache_shrink(struct block_device *bdev) |
| 274 | { | 396 | { |
| 275 | LIST_HEAD(free_list); | 397 | LIST_HEAD(free_list); |
| 276 | struct list_head *l, *ltmp; | 398 | struct list_head *l; |
| 399 | struct mb_cache_entry *ce, *tmp; | ||
| 277 | 400 | ||
| 401 | l = &mb_cache_lru_list; | ||
| 278 | spin_lock(&mb_cache_spinlock); | 402 | spin_lock(&mb_cache_spinlock); |
| 279 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 403 | while (!list_is_last(l, &mb_cache_lru_list)) { |
| 280 | struct mb_cache_entry *ce = | 404 | l = l->next; |
| 281 | list_entry(l, struct mb_cache_entry, e_lru_list); | 405 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
| 282 | if (ce->e_bdev == bdev) { | 406 | if (ce->e_bdev == bdev) { |
| 283 | list_move_tail(&ce->e_lru_list, &free_list); | 407 | list_del_init(&ce->e_lru_list); |
| 284 | __mb_cache_entry_unhash(ce); | 408 | if (ce->e_used || ce->e_queued || |
| 409 | atomic_read(&ce->e_refcnt)) | ||
| 410 | continue; | ||
| 411 | spin_unlock(&mb_cache_spinlock); | ||
| 412 | /* | ||
| 413 | * Prevent any find or get operation on the entry. | ||
| 414 | */ | ||
| 415 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 416 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 417 | /* Ignore if it is touched by a find/get */ | ||
| 418 | if (ce->e_used || ce->e_queued || | ||
| 419 | atomic_read(&ce->e_refcnt) || | ||
| 420 | !list_empty(&ce->e_lru_list)) { | ||
| 421 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 422 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 423 | l = &mb_cache_lru_list; | ||
| 424 | spin_lock(&mb_cache_spinlock); | ||
| 425 | continue; | ||
| 426 | } | ||
| 427 | __mb_cache_entry_unhash_unlock(ce); | ||
| 428 | mb_assert(!(ce->e_used || ce->e_queued || | ||
| 429 | atomic_read(&ce->e_refcnt))); | ||
| 430 | list_add_tail(&ce->e_lru_list, &free_list); | ||
| 431 | l = &mb_cache_lru_list; | ||
| 432 | spin_lock(&mb_cache_spinlock); | ||
| 285 | } | 433 | } |
| 286 | } | 434 | } |
| 287 | spin_unlock(&mb_cache_spinlock); | 435 | spin_unlock(&mb_cache_spinlock); |
| 288 | list_for_each_safe(l, ltmp, &free_list) { | 436 | |
| 289 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 437 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
| 290 | e_lru_list), GFP_KERNEL); | 438 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
| 291 | } | 439 | } |
| 292 | } | 440 | } |
| 293 | 441 | ||
| @@ -303,23 +451,27 @@ void | |||
| 303 | mb_cache_destroy(struct mb_cache *cache) | 451 | mb_cache_destroy(struct mb_cache *cache) |
| 304 | { | 452 | { |
| 305 | LIST_HEAD(free_list); | 453 | LIST_HEAD(free_list); |
| 306 | struct list_head *l, *ltmp; | 454 | struct mb_cache_entry *ce, *tmp; |
| 307 | 455 | ||
| 308 | spin_lock(&mb_cache_spinlock); | 456 | spin_lock(&mb_cache_spinlock); |
| 309 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 457 | list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { |
| 310 | struct mb_cache_entry *ce = | 458 | if (ce->e_cache == cache) |
| 311 | list_entry(l, struct mb_cache_entry, e_lru_list); | ||
| 312 | if (ce->e_cache == cache) { | ||
| 313 | list_move_tail(&ce->e_lru_list, &free_list); | 459 | list_move_tail(&ce->e_lru_list, &free_list); |
| 314 | __mb_cache_entry_unhash(ce); | ||
| 315 | } | ||
| 316 | } | 460 | } |
| 317 | list_del(&cache->c_cache_list); | 461 | list_del(&cache->c_cache_list); |
| 318 | spin_unlock(&mb_cache_spinlock); | 462 | spin_unlock(&mb_cache_spinlock); |
| 319 | 463 | ||
| 320 | list_for_each_safe(l, ltmp, &free_list) { | 464 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
| 321 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 465 | list_del_init(&ce->e_lru_list); |
| 322 | e_lru_list), GFP_KERNEL); | 466 | /* |
| 467 | * Prevent any find or get operation on the entry. | ||
| 468 | */ | ||
| 469 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 470 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 471 | mb_assert(!(ce->e_used || ce->e_queued || | ||
| 472 | atomic_read(&ce->e_refcnt))); | ||
| 473 | __mb_cache_entry_unhash_unlock(ce); | ||
| 474 | __mb_cache_entry_forget(ce, GFP_KERNEL); | ||
| 323 | } | 475 | } |
| 324 | 476 | ||
| 325 | if (atomic_read(&cache->c_entry_count) > 0) { | 477 | if (atomic_read(&cache->c_entry_count) > 0) { |
| @@ -328,8 +480,10 @@ mb_cache_destroy(struct mb_cache *cache) | |||
| 328 | atomic_read(&cache->c_entry_count)); | 480 | atomic_read(&cache->c_entry_count)); |
| 329 | } | 481 | } |
| 330 | 482 | ||
| 331 | kmem_cache_destroy(cache->c_entry_cache); | 483 | if (list_empty(&mb_cache_list)) { |
| 332 | 484 | kmem_cache_destroy(mb_cache_kmem_cache); | |
| 485 | mb_cache_kmem_cache = NULL; | ||
| 486 | } | ||
| 333 | kfree(cache->c_index_hash); | 487 | kfree(cache->c_index_hash); |
| 334 | kfree(cache->c_block_hash); | 488 | kfree(cache->c_block_hash); |
| 335 | kfree(cache); | 489 | kfree(cache); |
| @@ -346,28 +500,61 @@ mb_cache_destroy(struct mb_cache *cache) | |||
| 346 | struct mb_cache_entry * | 500 | struct mb_cache_entry * |
| 347 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | 501 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
| 348 | { | 502 | { |
| 349 | struct mb_cache_entry *ce = NULL; | 503 | struct mb_cache_entry *ce; |
| 350 | 504 | ||
| 351 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { | 505 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { |
| 506 | struct list_head *l; | ||
| 507 | |||
| 508 | l = &mb_cache_lru_list; | ||
| 352 | spin_lock(&mb_cache_spinlock); | 509 | spin_lock(&mb_cache_spinlock); |
| 353 | if (!list_empty(&mb_cache_lru_list)) { | 510 | while (!list_is_last(l, &mb_cache_lru_list)) { |
| 354 | ce = list_entry(mb_cache_lru_list.next, | 511 | l = l->next; |
| 355 | struct mb_cache_entry, e_lru_list); | 512 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
| 356 | list_del_init(&ce->e_lru_list); | 513 | if (ce->e_cache == cache) { |
| 357 | __mb_cache_entry_unhash(ce); | 514 | list_del_init(&ce->e_lru_list); |
| 515 | if (ce->e_used || ce->e_queued || | ||
| 516 | atomic_read(&ce->e_refcnt)) | ||
| 517 | continue; | ||
| 518 | spin_unlock(&mb_cache_spinlock); | ||
| 519 | /* | ||
| 520 | * Prevent any find or get operation on the | ||
| 521 | * entry. | ||
| 522 | */ | ||
| 523 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 524 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 525 | /* Ignore if it is touched by a find/get */ | ||
| 526 | if (ce->e_used || ce->e_queued || | ||
| 527 | atomic_read(&ce->e_refcnt) || | ||
| 528 | !list_empty(&ce->e_lru_list)) { | ||
| 529 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 530 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 531 | l = &mb_cache_lru_list; | ||
| 532 | spin_lock(&mb_cache_spinlock); | ||
| 533 | continue; | ||
| 534 | } | ||
| 535 | mb_assert(list_empty(&ce->e_lru_list)); | ||
| 536 | mb_assert(!(ce->e_used || ce->e_queued || | ||
| 537 | atomic_read(&ce->e_refcnt))); | ||
| 538 | __mb_cache_entry_unhash_unlock(ce); | ||
| 539 | goto found; | ||
| 540 | } | ||
| 358 | } | 541 | } |
| 359 | spin_unlock(&mb_cache_spinlock); | 542 | spin_unlock(&mb_cache_spinlock); |
| 360 | } | 543 | } |
| 361 | if (!ce) { | 544 | |
| 362 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | 545 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); |
| 363 | if (!ce) | 546 | if (!ce) |
| 364 | return NULL; | 547 | return NULL; |
| 365 | atomic_inc(&cache->c_entry_count); | 548 | atomic_inc(&cache->c_entry_count); |
| 366 | INIT_LIST_HEAD(&ce->e_lru_list); | 549 | INIT_LIST_HEAD(&ce->e_lru_list); |
| 367 | INIT_LIST_HEAD(&ce->e_block_list); | 550 | INIT_HLIST_BL_NODE(&ce->e_block_list); |
| 368 | ce->e_cache = cache; | 551 | INIT_HLIST_BL_NODE(&ce->e_index.o_list); |
| 369 | ce->e_queued = 0; | 552 | ce->e_cache = cache; |
| 370 | } | 553 | ce->e_queued = 0; |
| 554 | atomic_set(&ce->e_refcnt, 0); | ||
| 555 | found: | ||
| 556 | ce->e_block_hash_p = &cache->c_block_hash[0]; | ||
| 557 | ce->e_index_hash_p = &cache->c_index_hash[0]; | ||
| 371 | ce->e_used = 1 + MB_CACHE_WRITER; | 558 | ce->e_used = 1 + MB_CACHE_WRITER; |
| 372 | return ce; | 559 | return ce; |
| 373 | } | 560 | } |
| @@ -393,29 +580,38 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | |||
| 393 | { | 580 | { |
| 394 | struct mb_cache *cache = ce->e_cache; | 581 | struct mb_cache *cache = ce->e_cache; |
| 395 | unsigned int bucket; | 582 | unsigned int bucket; |
| 396 | struct list_head *l; | 583 | struct hlist_bl_node *l; |
| 397 | int error = -EBUSY; | 584 | struct hlist_bl_head *block_hash_p; |
| 585 | struct hlist_bl_head *index_hash_p; | ||
| 586 | struct mb_cache_entry *lce; | ||
| 398 | 587 | ||
| 588 | mb_assert(ce); | ||
| 399 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 589 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
| 400 | cache->c_bucket_bits); | 590 | cache->c_bucket_bits); |
| 401 | spin_lock(&mb_cache_spinlock); | 591 | block_hash_p = &cache->c_block_hash[bucket]; |
| 402 | list_for_each_prev(l, &cache->c_block_hash[bucket]) { | 592 | hlist_bl_lock(block_hash_p); |
| 403 | struct mb_cache_entry *ce = | 593 | hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { |
| 404 | list_entry(l, struct mb_cache_entry, e_block_list); | 594 | if (lce->e_bdev == bdev && lce->e_block == block) { |
| 405 | if (ce->e_bdev == bdev && ce->e_block == block) | 595 | hlist_bl_unlock(block_hash_p); |
| 406 | goto out; | 596 | return -EBUSY; |
| 597 | } | ||
| 407 | } | 598 | } |
| 408 | __mb_cache_entry_unhash(ce); | 599 | mb_assert(!__mb_cache_entry_is_block_hashed(ce)); |
| 600 | __mb_cache_entry_unhash_block(ce); | ||
| 601 | __mb_cache_entry_unhash_index(ce); | ||
| 409 | ce->e_bdev = bdev; | 602 | ce->e_bdev = bdev; |
| 410 | ce->e_block = block; | 603 | ce->e_block = block; |
| 411 | list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); | 604 | ce->e_block_hash_p = block_hash_p; |
| 412 | ce->e_index.o_key = key; | 605 | ce->e_index.o_key = key; |
| 606 | hlist_bl_add_head(&ce->e_block_list, block_hash_p); | ||
| 607 | hlist_bl_unlock(block_hash_p); | ||
| 413 | bucket = hash_long(key, cache->c_bucket_bits); | 608 | bucket = hash_long(key, cache->c_bucket_bits); |
| 414 | list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); | 609 | index_hash_p = &cache->c_index_hash[bucket]; |
| 415 | error = 0; | 610 | hlist_bl_lock(index_hash_p); |
| 416 | out: | 611 | ce->e_index_hash_p = index_hash_p; |
| 417 | spin_unlock(&mb_cache_spinlock); | 612 | hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); |
| 418 | return error; | 613 | hlist_bl_unlock(index_hash_p); |
| 614 | return 0; | ||
| 419 | } | 615 | } |
| 420 | 616 | ||
| 421 | 617 | ||
| @@ -429,24 +625,26 @@ out: | |||
| 429 | void | 625 | void |
| 430 | mb_cache_entry_release(struct mb_cache_entry *ce) | 626 | mb_cache_entry_release(struct mb_cache_entry *ce) |
| 431 | { | 627 | { |
| 432 | spin_lock(&mb_cache_spinlock); | 628 | __mb_cache_entry_release(ce); |
| 433 | __mb_cache_entry_release_unlock(ce); | ||
| 434 | } | 629 | } |
| 435 | 630 | ||
| 436 | 631 | ||
| 437 | /* | 632 | /* |
| 438 | * mb_cache_entry_free() | 633 | * mb_cache_entry_free() |
| 439 | * | 634 | * |
| 440 | * This is equivalent to the sequence mb_cache_entry_takeout() -- | ||
| 441 | * mb_cache_entry_release(). | ||
| 442 | */ | 635 | */ |
| 443 | void | 636 | void |
| 444 | mb_cache_entry_free(struct mb_cache_entry *ce) | 637 | mb_cache_entry_free(struct mb_cache_entry *ce) |
| 445 | { | 638 | { |
| 446 | spin_lock(&mb_cache_spinlock); | 639 | mb_assert(ce); |
| 447 | mb_assert(list_empty(&ce->e_lru_list)); | 640 | mb_assert(list_empty(&ce->e_lru_list)); |
| 448 | __mb_cache_entry_unhash(ce); | 641 | hlist_bl_lock(ce->e_index_hash_p); |
| 449 | __mb_cache_entry_release_unlock(ce); | 642 | __mb_cache_entry_unhash_index(ce); |
| 643 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 644 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 645 | __mb_cache_entry_unhash_block(ce); | ||
| 646 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 647 | __mb_cache_entry_release(ce); | ||
| 450 | } | 648 | } |
| 451 | 649 | ||
| 452 | 650 | ||
| @@ -463,84 +661,110 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, | |||
| 463 | sector_t block) | 661 | sector_t block) |
| 464 | { | 662 | { |
| 465 | unsigned int bucket; | 663 | unsigned int bucket; |
| 466 | struct list_head *l; | 664 | struct hlist_bl_node *l; |
| 467 | struct mb_cache_entry *ce; | 665 | struct mb_cache_entry *ce; |
| 666 | struct hlist_bl_head *block_hash_p; | ||
| 468 | 667 | ||
| 469 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 668 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
| 470 | cache->c_bucket_bits); | 669 | cache->c_bucket_bits); |
| 471 | spin_lock(&mb_cache_spinlock); | 670 | block_hash_p = &cache->c_block_hash[bucket]; |
| 472 | list_for_each(l, &cache->c_block_hash[bucket]) { | 671 | /* First serialize access to the block corresponding hash chain. */ |
| 473 | ce = list_entry(l, struct mb_cache_entry, e_block_list); | 672 | hlist_bl_lock(block_hash_p); |
| 673 | hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { | ||
| 674 | mb_assert(ce->e_block_hash_p == block_hash_p); | ||
| 474 | if (ce->e_bdev == bdev && ce->e_block == block) { | 675 | if (ce->e_bdev == bdev && ce->e_block == block) { |
| 475 | DEFINE_WAIT(wait); | 676 | /* |
| 677 | * Prevent a free from removing the entry. | ||
| 678 | */ | ||
| 679 | atomic_inc(&ce->e_refcnt); | ||
| 680 | hlist_bl_unlock(block_hash_p); | ||
| 681 | __spin_lock_mb_cache_entry(ce); | ||
| 682 | atomic_dec(&ce->e_refcnt); | ||
| 683 | if (ce->e_used > 0) { | ||
| 684 | DEFINE_WAIT(wait); | ||
| 685 | while (ce->e_used > 0) { | ||
| 686 | ce->e_queued++; | ||
| 687 | prepare_to_wait(&mb_cache_queue, &wait, | ||
| 688 | TASK_UNINTERRUPTIBLE); | ||
| 689 | __spin_unlock_mb_cache_entry(ce); | ||
| 690 | schedule(); | ||
| 691 | __spin_lock_mb_cache_entry(ce); | ||
| 692 | ce->e_queued--; | ||
| 693 | } | ||
| 694 | finish_wait(&mb_cache_queue, &wait); | ||
| 695 | } | ||
| 696 | ce->e_used += 1 + MB_CACHE_WRITER; | ||
| 697 | __spin_unlock_mb_cache_entry(ce); | ||
| 476 | 698 | ||
| 477 | if (!list_empty(&ce->e_lru_list)) | 699 | if (!list_empty(&ce->e_lru_list)) { |
| 700 | spin_lock(&mb_cache_spinlock); | ||
| 478 | list_del_init(&ce->e_lru_list); | 701 | list_del_init(&ce->e_lru_list); |
| 479 | |||
| 480 | while (ce->e_used > 0) { | ||
| 481 | ce->e_queued++; | ||
| 482 | prepare_to_wait(&mb_cache_queue, &wait, | ||
| 483 | TASK_UNINTERRUPTIBLE); | ||
| 484 | spin_unlock(&mb_cache_spinlock); | 702 | spin_unlock(&mb_cache_spinlock); |
| 485 | schedule(); | ||
| 486 | spin_lock(&mb_cache_spinlock); | ||
| 487 | ce->e_queued--; | ||
| 488 | } | 703 | } |
| 489 | finish_wait(&mb_cache_queue, &wait); | 704 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
| 490 | ce->e_used += 1 + MB_CACHE_WRITER; | 705 | __mb_cache_entry_release(ce); |
| 491 | |||
| 492 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
| 493 | __mb_cache_entry_release_unlock(ce); | ||
| 494 | return NULL; | 706 | return NULL; |
| 495 | } | 707 | } |
| 496 | goto cleanup; | 708 | return ce; |
| 497 | } | 709 | } |
| 498 | } | 710 | } |
| 499 | ce = NULL; | 711 | hlist_bl_unlock(block_hash_p); |
| 500 | 712 | return NULL; | |
| 501 | cleanup: | ||
| 502 | spin_unlock(&mb_cache_spinlock); | ||
| 503 | return ce; | ||
| 504 | } | 713 | } |
| 505 | 714 | ||
| 506 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) | 715 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) |
| 507 | 716 | ||
| 508 | static struct mb_cache_entry * | 717 | static struct mb_cache_entry * |
| 509 | __mb_cache_entry_find(struct list_head *l, struct list_head *head, | 718 | __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, |
| 510 | struct block_device *bdev, unsigned int key) | 719 | struct block_device *bdev, unsigned int key) |
| 511 | { | 720 | { |
| 512 | while (l != head) { | 721 | |
| 722 | /* The index hash chain is alredy acquire by caller. */ | ||
| 723 | while (l != NULL) { | ||
| 513 | struct mb_cache_entry *ce = | 724 | struct mb_cache_entry *ce = |
| 514 | list_entry(l, struct mb_cache_entry, e_index.o_list); | 725 | hlist_bl_entry(l, struct mb_cache_entry, |
| 726 | e_index.o_list); | ||
| 727 | mb_assert(ce->e_index_hash_p == head); | ||
| 515 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { | 728 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { |
| 516 | DEFINE_WAIT(wait); | 729 | /* |
| 517 | 730 | * Prevent a free from removing the entry. | |
| 518 | if (!list_empty(&ce->e_lru_list)) | 731 | */ |
| 519 | list_del_init(&ce->e_lru_list); | 732 | atomic_inc(&ce->e_refcnt); |
| 520 | 733 | hlist_bl_unlock(head); | |
| 734 | __spin_lock_mb_cache_entry(ce); | ||
| 735 | atomic_dec(&ce->e_refcnt); | ||
| 736 | ce->e_used++; | ||
| 521 | /* Incrementing before holding the lock gives readers | 737 | /* Incrementing before holding the lock gives readers |
| 522 | priority over writers. */ | 738 | priority over writers. */ |
| 523 | ce->e_used++; | 739 | if (ce->e_used >= MB_CACHE_WRITER) { |
| 524 | while (ce->e_used >= MB_CACHE_WRITER) { | 740 | DEFINE_WAIT(wait); |
| 525 | ce->e_queued++; | 741 | |
| 526 | prepare_to_wait(&mb_cache_queue, &wait, | 742 | while (ce->e_used >= MB_CACHE_WRITER) { |
| 527 | TASK_UNINTERRUPTIBLE); | 743 | ce->e_queued++; |
| 528 | spin_unlock(&mb_cache_spinlock); | 744 | prepare_to_wait(&mb_cache_queue, &wait, |
| 529 | schedule(); | 745 | TASK_UNINTERRUPTIBLE); |
| 530 | spin_lock(&mb_cache_spinlock); | 746 | __spin_unlock_mb_cache_entry(ce); |
| 531 | ce->e_queued--; | 747 | schedule(); |
| 748 | __spin_lock_mb_cache_entry(ce); | ||
| 749 | ce->e_queued--; | ||
| 750 | } | ||
| 751 | finish_wait(&mb_cache_queue, &wait); | ||
| 532 | } | 752 | } |
| 533 | finish_wait(&mb_cache_queue, &wait); | 753 | __spin_unlock_mb_cache_entry(ce); |
| 534 | 754 | if (!list_empty(&ce->e_lru_list)) { | |
| 535 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
| 536 | __mb_cache_entry_release_unlock(ce); | ||
| 537 | spin_lock(&mb_cache_spinlock); | 755 | spin_lock(&mb_cache_spinlock); |
| 756 | list_del_init(&ce->e_lru_list); | ||
| 757 | spin_unlock(&mb_cache_spinlock); | ||
| 758 | } | ||
| 759 | if (!__mb_cache_entry_is_block_hashed(ce)) { | ||
| 760 | __mb_cache_entry_release(ce); | ||
| 538 | return ERR_PTR(-EAGAIN); | 761 | return ERR_PTR(-EAGAIN); |
| 539 | } | 762 | } |
| 540 | return ce; | 763 | return ce; |
| 541 | } | 764 | } |
| 542 | l = l->next; | 765 | l = l->next; |
| 543 | } | 766 | } |
| 767 | hlist_bl_unlock(head); | ||
| 544 | return NULL; | 768 | return NULL; |
| 545 | } | 769 | } |
| 546 | 770 | ||
| @@ -562,13 +786,17 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, | |||
| 562 | unsigned int key) | 786 | unsigned int key) |
| 563 | { | 787 | { |
| 564 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 788 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
| 565 | struct list_head *l; | 789 | struct hlist_bl_node *l; |
| 566 | struct mb_cache_entry *ce; | 790 | struct mb_cache_entry *ce = NULL; |
| 567 | 791 | struct hlist_bl_head *index_hash_p; | |
| 568 | spin_lock(&mb_cache_spinlock); | 792 | |
| 569 | l = cache->c_index_hash[bucket].next; | 793 | index_hash_p = &cache->c_index_hash[bucket]; |
| 570 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 794 | hlist_bl_lock(index_hash_p); |
| 571 | spin_unlock(&mb_cache_spinlock); | 795 | if (!hlist_bl_empty(index_hash_p)) { |
| 796 | l = hlist_bl_first(index_hash_p); | ||
| 797 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); | ||
| 798 | } else | ||
| 799 | hlist_bl_unlock(index_hash_p); | ||
| 572 | return ce; | 800 | return ce; |
| 573 | } | 801 | } |
| 574 | 802 | ||
| @@ -597,13 +825,17 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, | |||
| 597 | { | 825 | { |
| 598 | struct mb_cache *cache = prev->e_cache; | 826 | struct mb_cache *cache = prev->e_cache; |
| 599 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 827 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
| 600 | struct list_head *l; | 828 | struct hlist_bl_node *l; |
| 601 | struct mb_cache_entry *ce; | 829 | struct mb_cache_entry *ce; |
| 830 | struct hlist_bl_head *index_hash_p; | ||
| 602 | 831 | ||
| 603 | spin_lock(&mb_cache_spinlock); | 832 | index_hash_p = &cache->c_index_hash[bucket]; |
| 833 | mb_assert(prev->e_index_hash_p == index_hash_p); | ||
| 834 | hlist_bl_lock(index_hash_p); | ||
| 835 | mb_assert(!hlist_bl_empty(index_hash_p)); | ||
| 604 | l = prev->e_index.o_list.next; | 836 | l = prev->e_index.o_list.next; |
| 605 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 837 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); |
| 606 | __mb_cache_entry_release_unlock(prev); | 838 | __mb_cache_entry_release(prev); |
| 607 | return ce; | 839 | return ce; |
| 608 | } | 840 | } |
| 609 | 841 | ||
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 0332109162a5..f007a3355570 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
| @@ -26,7 +26,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data); | |||
| 26 | 26 | ||
| 27 | static void minix_evict_inode(struct inode *inode) | 27 | static void minix_evict_inode(struct inode *inode) |
| 28 | { | 28 | { |
| 29 | truncate_inode_pages(&inode->i_data, 0); | 29 | truncate_inode_pages_final(&inode->i_data); |
| 30 | if (!inode->i_nlink) { | 30 | if (!inode->i_nlink) { |
| 31 | inode->i_size = 0; | 31 | inode->i_size = 0; |
| 32 | minix_truncate(inode); | 32 | minix_truncate(inode); |
| @@ -86,7 +86,7 @@ static void init_once(void *foo) | |||
| 86 | inode_init_once(&ei->vfs_inode); | 86 | inode_init_once(&ei->vfs_inode); |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | static int init_inodecache(void) | 89 | static int __init init_inodecache(void) |
| 90 | { | 90 | { |
| 91 | minix_inode_cachep = kmem_cache_create("minix_inode_cache", | 91 | minix_inode_cachep = kmem_cache_create("minix_inode_cache", |
| 92 | sizeof(struct minix_inode_info), | 92 | sizeof(struct minix_inode_info), |
| @@ -123,6 +123,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) | |||
| 123 | struct minix_sb_info * sbi = minix_sb(sb); | 123 | struct minix_sb_info * sbi = minix_sb(sb); |
| 124 | struct minix_super_block * ms; | 124 | struct minix_super_block * ms; |
| 125 | 125 | ||
| 126 | sync_filesystem(sb); | ||
| 126 | ms = sbi->s_ms; | 127 | ms = sbi->s_ms; |
| 127 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 128 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
| 128 | return 0; | 129 | return 0; |
diff --git a/fs/mount.h b/fs/mount.h index a17458ca6f29..b29e42f05f34 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
| @@ -19,13 +19,13 @@ struct mnt_pcp { | |||
| 19 | }; | 19 | }; |
| 20 | 20 | ||
| 21 | struct mountpoint { | 21 | struct mountpoint { |
| 22 | struct list_head m_hash; | 22 | struct hlist_node m_hash; |
| 23 | struct dentry *m_dentry; | 23 | struct dentry *m_dentry; |
| 24 | int m_count; | 24 | int m_count; |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| 27 | struct mount { | 27 | struct mount { |
| 28 | struct list_head mnt_hash; | 28 | struct hlist_node mnt_hash; |
| 29 | struct mount *mnt_parent; | 29 | struct mount *mnt_parent; |
| 30 | struct dentry *mnt_mountpoint; | 30 | struct dentry *mnt_mountpoint; |
| 31 | struct vfsmount mnt; | 31 | struct vfsmount mnt; |
diff --git a/fs/namei.c b/fs/namei.c index 385f7817bfcc..88339f59efb5 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
| 1109 | return false; | 1109 | return false; |
| 1110 | 1110 | ||
| 1111 | if (!d_mountpoint(path->dentry)) | 1111 | if (!d_mountpoint(path->dentry)) |
| 1112 | break; | 1112 | return true; |
| 1113 | 1113 | ||
| 1114 | mounted = __lookup_mnt(path->mnt, path->dentry); | 1114 | mounted = __lookup_mnt(path->mnt, path->dentry); |
| 1115 | if (!mounted) | 1115 | if (!mounted) |
| @@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
| 1125 | */ | 1125 | */ |
| 1126 | *inode = path->dentry->d_inode; | 1126 | *inode = path->dentry->d_inode; |
| 1127 | } | 1127 | } |
| 1128 | return true; | 1128 | return read_seqretry(&mount_lock, nd->m_seq); |
| 1129 | } | ||
| 1130 | |||
| 1131 | static void follow_mount_rcu(struct nameidata *nd) | ||
| 1132 | { | ||
| 1133 | while (d_mountpoint(nd->path.dentry)) { | ||
| 1134 | struct mount *mounted; | ||
| 1135 | mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); | ||
| 1136 | if (!mounted) | ||
| 1137 | break; | ||
| 1138 | nd->path.mnt = &mounted->mnt; | ||
| 1139 | nd->path.dentry = mounted->mnt.mnt_root; | ||
| 1140 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
| 1141 | } | ||
| 1142 | } | 1129 | } |
| 1143 | 1130 | ||
| 1144 | static int follow_dotdot_rcu(struct nameidata *nd) | 1131 | static int follow_dotdot_rcu(struct nameidata *nd) |
| @@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
| 1166 | break; | 1153 | break; |
| 1167 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 1154 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); |
| 1168 | } | 1155 | } |
| 1169 | follow_mount_rcu(nd); | 1156 | while (d_mountpoint(nd->path.dentry)) { |
| 1157 | struct mount *mounted; | ||
| 1158 | mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); | ||
| 1159 | if (!mounted) | ||
| 1160 | break; | ||
| 1161 | nd->path.mnt = &mounted->mnt; | ||
| 1162 | nd->path.dentry = mounted->mnt.mnt_root; | ||
| 1163 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
| 1164 | if (!read_seqretry(&mount_lock, nd->m_seq)) | ||
| 1165 | goto failed; | ||
| 1166 | } | ||
| 1170 | nd->inode = nd->path.dentry->d_inode; | 1167 | nd->inode = nd->path.dentry->d_inode; |
| 1171 | return 0; | 1168 | return 0; |
| 1172 | 1169 | ||
| @@ -1799,7 +1796,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
| 1799 | if (err) | 1796 | if (err) |
| 1800 | return err; | 1797 | return err; |
| 1801 | } | 1798 | } |
| 1802 | if (!d_is_directory(nd->path.dentry)) { | 1799 | if (!d_can_lookup(nd->path.dentry)) { |
| 1803 | err = -ENOTDIR; | 1800 | err = -ENOTDIR; |
| 1804 | break; | 1801 | break; |
| 1805 | } | 1802 | } |
| @@ -1820,7 +1817,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
| 1820 | struct dentry *root = nd->root.dentry; | 1817 | struct dentry *root = nd->root.dentry; |
| 1821 | struct inode *inode = root->d_inode; | 1818 | struct inode *inode = root->d_inode; |
| 1822 | if (*name) { | 1819 | if (*name) { |
| 1823 | if (!d_is_directory(root)) | 1820 | if (!d_can_lookup(root)) |
| 1824 | return -ENOTDIR; | 1821 | return -ENOTDIR; |
| 1825 | retval = inode_permission(inode, MAY_EXEC); | 1822 | retval = inode_permission(inode, MAY_EXEC); |
| 1826 | if (retval) | 1823 | if (retval) |
| @@ -1876,7 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
| 1876 | dentry = f.file->f_path.dentry; | 1873 | dentry = f.file->f_path.dentry; |
| 1877 | 1874 | ||
| 1878 | if (*name) { | 1875 | if (*name) { |
| 1879 | if (!d_is_directory(dentry)) { | 1876 | if (!d_can_lookup(dentry)) { |
| 1880 | fdput(f); | 1877 | fdput(f); |
| 1881 | return -ENOTDIR; | 1878 | return -ENOTDIR; |
| 1882 | } | 1879 | } |
| @@ -1884,7 +1881,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
| 1884 | 1881 | ||
| 1885 | nd->path = f.file->f_path; | 1882 | nd->path = f.file->f_path; |
| 1886 | if (flags & LOOKUP_RCU) { | 1883 | if (flags & LOOKUP_RCU) { |
| 1887 | if (f.need_put) | 1884 | if (f.flags & FDPUT_FPUT) |
| 1888 | *fp = f.file; | 1885 | *fp = f.file; |
| 1889 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1886 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
| 1890 | rcu_read_lock(); | 1887 | rcu_read_lock(); |
| @@ -1958,7 +1955,7 @@ static int path_lookupat(int dfd, const char *name, | |||
| 1958 | err = complete_walk(nd); | 1955 | err = complete_walk(nd); |
| 1959 | 1956 | ||
| 1960 | if (!err && nd->flags & LOOKUP_DIRECTORY) { | 1957 | if (!err && nd->flags & LOOKUP_DIRECTORY) { |
| 1961 | if (!d_is_directory(nd->path.dentry)) { | 1958 | if (!d_can_lookup(nd->path.dentry)) { |
| 1962 | path_put(&nd->path); | 1959 | path_put(&nd->path); |
| 1963 | err = -ENOTDIR; | 1960 | err = -ENOTDIR; |
| 1964 | } | 1961 | } |
| @@ -2417,11 +2414,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) | |||
| 2417 | IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) | 2414 | IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) |
| 2418 | return -EPERM; | 2415 | return -EPERM; |
| 2419 | if (isdir) { | 2416 | if (isdir) { |
| 2420 | if (!d_is_directory(victim) && !d_is_autodir(victim)) | 2417 | if (!d_is_dir(victim)) |
| 2421 | return -ENOTDIR; | 2418 | return -ENOTDIR; |
| 2422 | if (IS_ROOT(victim)) | 2419 | if (IS_ROOT(victim)) |
| 2423 | return -EBUSY; | 2420 | return -EBUSY; |
| 2424 | } else if (d_is_directory(victim) || d_is_autodir(victim)) | 2421 | } else if (d_is_dir(victim)) |
| 2425 | return -EISDIR; | 2422 | return -EISDIR; |
| 2426 | if (IS_DEADDIR(dir)) | 2423 | if (IS_DEADDIR(dir)) |
| 2427 | return -ENOENT; | 2424 | return -ENOENT; |
| @@ -2572,7 +2569,7 @@ static int handle_truncate(struct file *filp) | |||
| 2572 | /* | 2569 | /* |
| 2573 | * Refuse to truncate files with mandatory locks held on them. | 2570 | * Refuse to truncate files with mandatory locks held on them. |
| 2574 | */ | 2571 | */ |
| 2575 | error = locks_verify_locked(inode); | 2572 | error = locks_verify_locked(filp); |
| 2576 | if (!error) | 2573 | if (!error) |
| 2577 | error = security_path_truncate(path); | 2574 | error = security_path_truncate(path); |
| 2578 | if (!error) { | 2575 | if (!error) { |
| @@ -3019,11 +3016,10 @@ finish_open: | |||
| 3019 | } | 3016 | } |
| 3020 | audit_inode(name, nd->path.dentry, 0); | 3017 | audit_inode(name, nd->path.dentry, 0); |
| 3021 | error = -EISDIR; | 3018 | error = -EISDIR; |
| 3022 | if ((open_flag & O_CREAT) && | 3019 | if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) |
| 3023 | (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry))) | ||
| 3024 | goto out; | 3020 | goto out; |
| 3025 | error = -ENOTDIR; | 3021 | error = -ENOTDIR; |
| 3026 | if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) | 3022 | if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) |
| 3027 | goto out; | 3023 | goto out; |
| 3028 | if (!S_ISREG(nd->inode->i_mode)) | 3024 | if (!S_ISREG(nd->inode->i_mode)) |
| 3029 | will_truncate = false; | 3025 | will_truncate = false; |
| @@ -3747,7 +3743,7 @@ exit1: | |||
| 3747 | slashes: | 3743 | slashes: |
| 3748 | if (d_is_negative(dentry)) | 3744 | if (d_is_negative(dentry)) |
| 3749 | error = -ENOENT; | 3745 | error = -ENOENT; |
| 3750 | else if (d_is_directory(dentry) || d_is_autodir(dentry)) | 3746 | else if (d_is_dir(dentry)) |
| 3751 | error = -EISDIR; | 3747 | error = -EISDIR; |
| 3752 | else | 3748 | else |
| 3753 | error = -ENOTDIR; | 3749 | error = -ENOTDIR; |
| @@ -3977,7 +3973,28 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname | |||
| 3977 | return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); | 3973 | return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
| 3978 | } | 3974 | } |
| 3979 | 3975 | ||
| 3980 | /* | 3976 | /** |
| 3977 | * vfs_rename - rename a filesystem object | ||
| 3978 | * @old_dir: parent of source | ||
| 3979 | * @old_dentry: source | ||
| 3980 | * @new_dir: parent of destination | ||
| 3981 | * @new_dentry: destination | ||
| 3982 | * @delegated_inode: returns an inode needing a delegation break | ||
| 3983 | * @flags: rename flags | ||
| 3984 | * | ||
| 3985 | * The caller must hold multiple mutexes--see lock_rename()). | ||
| 3986 | * | ||
| 3987 | * If vfs_rename discovers a delegation in need of breaking at either | ||
| 3988 | * the source or destination, it will return -EWOULDBLOCK and return a | ||
| 3989 | * reference to the inode in delegated_inode. The caller should then | ||
| 3990 | * break the delegation and retry. Because breaking a delegation may | ||
| 3991 | * take a long time, the caller should drop all locks before doing | ||
| 3992 | * so. | ||
| 3993 | * | ||
| 3994 | * Alternatively, a caller may pass NULL for delegated_inode. This may | ||
| 3995 | * be appropriate for callers that expect the underlying filesystem not | ||
| 3996 | * to be NFS exported. | ||
| 3997 | * | ||
| 3981 | * The worst of all namespace operations - renaming directory. "Perverted" | 3998 | * The worst of all namespace operations - renaming directory. "Perverted" |
| 3982 | * doesn't even start to describe it. Somebody in UCB had a heck of a trip... | 3999 | * doesn't even start to describe it. Somebody in UCB had a heck of a trip... |
| 3983 | * Problems: | 4000 | * Problems: |
| @@ -4005,163 +4022,139 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname | |||
| 4005 | * ->i_mutex on parents, which works but leads to some truly excessive | 4022 | * ->i_mutex on parents, which works but leads to some truly excessive |
| 4006 | * locking]. | 4023 | * locking]. |
| 4007 | */ | 4024 | */ |
| 4008 | static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | 4025 | int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 4009 | struct inode *new_dir, struct dentry *new_dentry) | 4026 | struct inode *new_dir, struct dentry *new_dentry, |
| 4027 | struct inode **delegated_inode, unsigned int flags) | ||
| 4010 | { | 4028 | { |
| 4011 | int error = 0; | 4029 | int error; |
| 4030 | bool is_dir = d_is_dir(old_dentry); | ||
| 4031 | const unsigned char *old_name; | ||
| 4032 | struct inode *source = old_dentry->d_inode; | ||
| 4012 | struct inode *target = new_dentry->d_inode; | 4033 | struct inode *target = new_dentry->d_inode; |
| 4034 | bool new_is_dir = false; | ||
| 4013 | unsigned max_links = new_dir->i_sb->s_max_links; | 4035 | unsigned max_links = new_dir->i_sb->s_max_links; |
| 4014 | 4036 | ||
| 4037 | if (source == target) | ||
| 4038 | return 0; | ||
| 4039 | |||
| 4040 | error = may_delete(old_dir, old_dentry, is_dir); | ||
| 4041 | if (error) | ||
| 4042 | return error; | ||
| 4043 | |||
| 4044 | if (!target) { | ||
| 4045 | error = may_create(new_dir, new_dentry); | ||
| 4046 | } else { | ||
| 4047 | new_is_dir = d_is_dir(new_dentry); | ||
| 4048 | |||
| 4049 | if (!(flags & RENAME_EXCHANGE)) | ||
| 4050 | error = may_delete(new_dir, new_dentry, is_dir); | ||
| 4051 | else | ||
| 4052 | error = may_delete(new_dir, new_dentry, new_is_dir); | ||
| 4053 | } | ||
| 4054 | if (error) | ||
| 4055 | return error; | ||
| 4056 | |||
| 4057 | if (!old_dir->i_op->rename) | ||
| 4058 | return -EPERM; | ||
| 4059 | |||
| 4060 | if (flags && !old_dir->i_op->rename2) | ||
| 4061 | return -EINVAL; | ||
| 4062 | |||
| 4015 | /* | 4063 | /* |
| 4016 | * If we are going to change the parent - check write permissions, | 4064 | * If we are going to change the parent - check write permissions, |
| 4017 | * we'll need to flip '..'. | 4065 | * we'll need to flip '..'. |
| 4018 | */ | 4066 | */ |
| 4019 | if (new_dir != old_dir) { | 4067 | if (new_dir != old_dir) { |
| 4020 | error = inode_permission(old_dentry->d_inode, MAY_WRITE); | 4068 | if (is_dir) { |
| 4021 | if (error) | 4069 | error = inode_permission(source, MAY_WRITE); |
| 4022 | return error; | 4070 | if (error) |
| 4071 | return error; | ||
| 4072 | } | ||
| 4073 | if ((flags & RENAME_EXCHANGE) && new_is_dir) { | ||
| 4074 | error = inode_permission(target, MAY_WRITE); | ||
| 4075 | if (error) | ||
| 4076 | return error; | ||
| 4077 | } | ||
| 4023 | } | 4078 | } |
| 4024 | 4079 | ||
| 4025 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); | 4080 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, |
| 4081 | flags); | ||
| 4026 | if (error) | 4082 | if (error) |
| 4027 | return error; | 4083 | return error; |
| 4028 | 4084 | ||
| 4085 | old_name = fsnotify_oldname_init(old_dentry->d_name.name); | ||
| 4029 | dget(new_dentry); | 4086 | dget(new_dentry); |
| 4030 | if (target) | 4087 | if (!is_dir || (flags & RENAME_EXCHANGE)) |
| 4088 | lock_two_nondirectories(source, target); | ||
| 4089 | else if (target) | ||
| 4031 | mutex_lock(&target->i_mutex); | 4090 | mutex_lock(&target->i_mutex); |
| 4032 | 4091 | ||
| 4033 | error = -EBUSY; | 4092 | error = -EBUSY; |
| 4034 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) | 4093 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) |
| 4035 | goto out; | 4094 | goto out; |
| 4036 | 4095 | ||
| 4037 | error = -EMLINK; | 4096 | if (max_links && new_dir != old_dir) { |
| 4038 | if (max_links && !target && new_dir != old_dir && | 4097 | error = -EMLINK; |
| 4039 | new_dir->i_nlink >= max_links) | 4098 | if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) |
| 4040 | goto out; | 4099 | goto out; |
| 4041 | 4100 | if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && | |
| 4042 | if (target) | 4101 | old_dir->i_nlink >= max_links) |
| 4102 | goto out; | ||
| 4103 | } | ||
| 4104 | if (is_dir && !(flags & RENAME_EXCHANGE) && target) | ||
| 4043 | shrink_dcache_parent(new_dentry); | 4105 | shrink_dcache_parent(new_dentry); |
| 4044 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 4106 | if (!is_dir) { |
| 4045 | if (error) | 4107 | error = try_break_deleg(source, delegated_inode); |
| 4046 | goto out; | 4108 | if (error) |
| 4047 | 4109 | goto out; | |
| 4048 | if (target) { | ||
| 4049 | target->i_flags |= S_DEAD; | ||
| 4050 | dont_mount(new_dentry); | ||
| 4051 | } | 4110 | } |
| 4052 | out: | 4111 | if (target && !new_is_dir) { |
| 4053 | if (target) | ||
| 4054 | mutex_unlock(&target->i_mutex); | ||
| 4055 | dput(new_dentry); | ||
| 4056 | if (!error) | ||
| 4057 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | ||
| 4058 | d_move(old_dentry,new_dentry); | ||
| 4059 | return error; | ||
| 4060 | } | ||
| 4061 | |||
| 4062 | static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | ||
| 4063 | struct inode *new_dir, struct dentry *new_dentry, | ||
| 4064 | struct inode **delegated_inode) | ||
| 4065 | { | ||
| 4066 | struct inode *target = new_dentry->d_inode; | ||
| 4067 | struct inode *source = old_dentry->d_inode; | ||
| 4068 | int error; | ||
| 4069 | |||
| 4070 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
| 4071 | if (error) | ||
| 4072 | return error; | ||
| 4073 | |||
| 4074 | dget(new_dentry); | ||
| 4075 | lock_two_nondirectories(source, target); | ||
| 4076 | |||
| 4077 | error = -EBUSY; | ||
| 4078 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) | ||
| 4079 | goto out; | ||
| 4080 | |||
| 4081 | error = try_break_deleg(source, delegated_inode); | ||
| 4082 | if (error) | ||
| 4083 | goto out; | ||
| 4084 | if (target) { | ||
| 4085 | error = try_break_deleg(target, delegated_inode); | 4112 | error = try_break_deleg(target, delegated_inode); |
| 4086 | if (error) | 4113 | if (error) |
| 4087 | goto out; | 4114 | goto out; |
| 4088 | } | 4115 | } |
| 4089 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 4116 | if (!flags) { |
| 4117 | error = old_dir->i_op->rename(old_dir, old_dentry, | ||
| 4118 | new_dir, new_dentry); | ||
| 4119 | } else { | ||
| 4120 | error = old_dir->i_op->rename2(old_dir, old_dentry, | ||
| 4121 | new_dir, new_dentry, flags); | ||
| 4122 | } | ||
| 4090 | if (error) | 4123 | if (error) |
| 4091 | goto out; | 4124 | goto out; |
| 4092 | 4125 | ||
| 4093 | if (target) | 4126 | if (!(flags & RENAME_EXCHANGE) && target) { |
| 4127 | if (is_dir) | ||
| 4128 | target->i_flags |= S_DEAD; | ||
| 4094 | dont_mount(new_dentry); | 4129 | dont_mount(new_dentry); |
| 4095 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 4130 | } |
| 4096 | d_move(old_dentry, new_dentry); | 4131 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { |
| 4132 | if (!(flags & RENAME_EXCHANGE)) | ||
| 4133 | d_move(old_dentry, new_dentry); | ||
| 4134 | else | ||
| 4135 | d_exchange(old_dentry, new_dentry); | ||
| 4136 | } | ||
| 4097 | out: | 4137 | out: |
| 4098 | unlock_two_nondirectories(source, target); | 4138 | if (!is_dir || (flags & RENAME_EXCHANGE)) |
| 4139 | unlock_two_nondirectories(source, target); | ||
| 4140 | else if (target) | ||
| 4141 | mutex_unlock(&target->i_mutex); | ||
| 4099 | dput(new_dentry); | 4142 | dput(new_dentry); |
| 4100 | return error; | 4143 | if (!error) { |
| 4101 | } | ||
| 4102 | |||
| 4103 | /** | ||
| 4104 | * vfs_rename - rename a filesystem object | ||
| 4105 | * @old_dir: parent of source | ||
| 4106 | * @old_dentry: source | ||
| 4107 | * @new_dir: parent of destination | ||
| 4108 | * @new_dentry: destination | ||
| 4109 | * @delegated_inode: returns an inode needing a delegation break | ||
| 4110 | * | ||
| 4111 | * The caller must hold multiple mutexes--see lock_rename()). | ||
| 4112 | * | ||
| 4113 | * If vfs_rename discovers a delegation in need of breaking at either | ||
| 4114 | * the source or destination, it will return -EWOULDBLOCK and return a | ||
| 4115 | * reference to the inode in delegated_inode. The caller should then | ||
| 4116 | * break the delegation and retry. Because breaking a delegation may | ||
| 4117 | * take a long time, the caller should drop all locks before doing | ||
| 4118 | * so. | ||
| 4119 | * | ||
| 4120 | * Alternatively, a caller may pass NULL for delegated_inode. This may | ||
| 4121 | * be appropriate for callers that expect the underlying filesystem not | ||
| 4122 | * to be NFS exported. | ||
| 4123 | */ | ||
| 4124 | int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
| 4125 | struct inode *new_dir, struct dentry *new_dentry, | ||
| 4126 | struct inode **delegated_inode) | ||
| 4127 | { | ||
| 4128 | int error; | ||
| 4129 | int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry); | ||
| 4130 | const unsigned char *old_name; | ||
| 4131 | |||
| 4132 | if (old_dentry->d_inode == new_dentry->d_inode) | ||
| 4133 | return 0; | ||
| 4134 | |||
| 4135 | error = may_delete(old_dir, old_dentry, is_dir); | ||
| 4136 | if (error) | ||
| 4137 | return error; | ||
| 4138 | |||
| 4139 | if (!new_dentry->d_inode) | ||
| 4140 | error = may_create(new_dir, new_dentry); | ||
| 4141 | else | ||
| 4142 | error = may_delete(new_dir, new_dentry, is_dir); | ||
| 4143 | if (error) | ||
| 4144 | return error; | ||
| 4145 | |||
| 4146 | if (!old_dir->i_op->rename) | ||
| 4147 | return -EPERM; | ||
| 4148 | |||
| 4149 | old_name = fsnotify_oldname_init(old_dentry->d_name.name); | ||
| 4150 | |||
| 4151 | if (is_dir) | ||
| 4152 | error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); | ||
| 4153 | else | ||
| 4154 | error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); | ||
| 4155 | if (!error) | ||
| 4156 | fsnotify_move(old_dir, new_dir, old_name, is_dir, | 4144 | fsnotify_move(old_dir, new_dir, old_name, is_dir, |
| 4157 | new_dentry->d_inode, old_dentry); | 4145 | !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); |
| 4146 | if (flags & RENAME_EXCHANGE) { | ||
| 4147 | fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, | ||
| 4148 | new_is_dir, NULL, new_dentry); | ||
| 4149 | } | ||
| 4150 | } | ||
| 4158 | fsnotify_oldname_free(old_name); | 4151 | fsnotify_oldname_free(old_name); |
| 4159 | 4152 | ||
| 4160 | return error; | 4153 | return error; |
| 4161 | } | 4154 | } |
| 4162 | 4155 | ||
| 4163 | SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | 4156 | SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, |
| 4164 | int, newdfd, const char __user *, newname) | 4157 | int, newdfd, const char __user *, newname, unsigned int, flags) |
| 4165 | { | 4158 | { |
| 4166 | struct dentry *old_dir, *new_dir; | 4159 | struct dentry *old_dir, *new_dir; |
| 4167 | struct dentry *old_dentry, *new_dentry; | 4160 | struct dentry *old_dentry, *new_dentry; |
| @@ -4173,6 +4166,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | |||
| 4173 | unsigned int lookup_flags = 0; | 4166 | unsigned int lookup_flags = 0; |
| 4174 | bool should_retry = false; | 4167 | bool should_retry = false; |
| 4175 | int error; | 4168 | int error; |
| 4169 | |||
| 4170 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | ||
| 4171 | return -EINVAL; | ||
| 4172 | |||
| 4173 | if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) | ||
| 4174 | return -EINVAL; | ||
| 4175 | |||
| 4176 | retry: | 4176 | retry: |
| 4177 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); | 4177 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); |
| 4178 | if (IS_ERR(from)) { | 4178 | if (IS_ERR(from)) { |
| @@ -4196,6 +4196,8 @@ retry: | |||
| 4196 | goto exit2; | 4196 | goto exit2; |
| 4197 | 4197 | ||
| 4198 | new_dir = newnd.path.dentry; | 4198 | new_dir = newnd.path.dentry; |
| 4199 | if (flags & RENAME_NOREPLACE) | ||
| 4200 | error = -EEXIST; | ||
| 4199 | if (newnd.last_type != LAST_NORM) | 4201 | if (newnd.last_type != LAST_NORM) |
| 4200 | goto exit2; | 4202 | goto exit2; |
| 4201 | 4203 | ||
| @@ -4205,7 +4207,8 @@ retry: | |||
| 4205 | 4207 | ||
| 4206 | oldnd.flags &= ~LOOKUP_PARENT; | 4208 | oldnd.flags &= ~LOOKUP_PARENT; |
| 4207 | newnd.flags &= ~LOOKUP_PARENT; | 4209 | newnd.flags &= ~LOOKUP_PARENT; |
| 4208 | newnd.flags |= LOOKUP_RENAME_TARGET; | 4210 | if (!(flags & RENAME_EXCHANGE)) |
| 4211 | newnd.flags |= LOOKUP_RENAME_TARGET; | ||
| 4209 | 4212 | ||
| 4210 | retry_deleg: | 4213 | retry_deleg: |
| 4211 | trap = lock_rename(new_dir, old_dir); | 4214 | trap = lock_rename(new_dir, old_dir); |
| @@ -4218,34 +4221,49 @@ retry_deleg: | |||
| 4218 | error = -ENOENT; | 4221 | error = -ENOENT; |
| 4219 | if (d_is_negative(old_dentry)) | 4222 | if (d_is_negative(old_dentry)) |
| 4220 | goto exit4; | 4223 | goto exit4; |
| 4224 | new_dentry = lookup_hash(&newnd); | ||
| 4225 | error = PTR_ERR(new_dentry); | ||
| 4226 | if (IS_ERR(new_dentry)) | ||
| 4227 | goto exit4; | ||
| 4228 | error = -EEXIST; | ||
| 4229 | if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) | ||
| 4230 | goto exit5; | ||
| 4231 | if (flags & RENAME_EXCHANGE) { | ||
| 4232 | error = -ENOENT; | ||
| 4233 | if (d_is_negative(new_dentry)) | ||
| 4234 | goto exit5; | ||
| 4235 | |||
| 4236 | if (!d_is_dir(new_dentry)) { | ||
| 4237 | error = -ENOTDIR; | ||
| 4238 | if (newnd.last.name[newnd.last.len]) | ||
| 4239 | goto exit5; | ||
| 4240 | } | ||
| 4241 | } | ||
| 4221 | /* unless the source is a directory trailing slashes give -ENOTDIR */ | 4242 | /* unless the source is a directory trailing slashes give -ENOTDIR */ |
| 4222 | if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) { | 4243 | if (!d_is_dir(old_dentry)) { |
| 4223 | error = -ENOTDIR; | 4244 | error = -ENOTDIR; |
| 4224 | if (oldnd.last.name[oldnd.last.len]) | 4245 | if (oldnd.last.name[oldnd.last.len]) |
| 4225 | goto exit4; | 4246 | goto exit5; |
| 4226 | if (newnd.last.name[newnd.last.len]) | 4247 | if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) |
| 4227 | goto exit4; | 4248 | goto exit5; |
| 4228 | } | 4249 | } |
| 4229 | /* source should not be ancestor of target */ | 4250 | /* source should not be ancestor of target */ |
| 4230 | error = -EINVAL; | 4251 | error = -EINVAL; |
| 4231 | if (old_dentry == trap) | 4252 | if (old_dentry == trap) |
| 4232 | goto exit4; | 4253 | goto exit5; |
| 4233 | new_dentry = lookup_hash(&newnd); | ||
| 4234 | error = PTR_ERR(new_dentry); | ||
| 4235 | if (IS_ERR(new_dentry)) | ||
| 4236 | goto exit4; | ||
| 4237 | /* target should not be an ancestor of source */ | 4254 | /* target should not be an ancestor of source */ |
| 4238 | error = -ENOTEMPTY; | 4255 | if (!(flags & RENAME_EXCHANGE)) |
| 4256 | error = -ENOTEMPTY; | ||
| 4239 | if (new_dentry == trap) | 4257 | if (new_dentry == trap) |
| 4240 | goto exit5; | 4258 | goto exit5; |
| 4241 | 4259 | ||
| 4242 | error = security_path_rename(&oldnd.path, old_dentry, | 4260 | error = security_path_rename(&oldnd.path, old_dentry, |
| 4243 | &newnd.path, new_dentry); | 4261 | &newnd.path, new_dentry, flags); |
| 4244 | if (error) | 4262 | if (error) |
| 4245 | goto exit5; | 4263 | goto exit5; |
| 4246 | error = vfs_rename(old_dir->d_inode, old_dentry, | 4264 | error = vfs_rename(old_dir->d_inode, old_dentry, |
| 4247 | new_dir->d_inode, new_dentry, | 4265 | new_dir->d_inode, new_dentry, |
| 4248 | &delegated_inode); | 4266 | &delegated_inode, flags); |
| 4249 | exit5: | 4267 | exit5: |
| 4250 | dput(new_dentry); | 4268 | dput(new_dentry); |
| 4251 | exit4: | 4269 | exit4: |
| @@ -4275,9 +4293,15 @@ exit: | |||
| 4275 | return error; | 4293 | return error; |
| 4276 | } | 4294 | } |
| 4277 | 4295 | ||
| 4296 | SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | ||
| 4297 | int, newdfd, const char __user *, newname) | ||
| 4298 | { | ||
| 4299 | return sys_renameat2(olddfd, oldname, newdfd, newname, 0); | ||
| 4300 | } | ||
| 4301 | |||
| 4278 | SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) | 4302 | SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) |
| 4279 | { | 4303 | { |
| 4280 | return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); | 4304 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
| 4281 | } | 4305 | } |
| 4282 | 4306 | ||
| 4283 | int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) | 4307 | int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) |
diff --git a/fs/namespace.c b/fs/namespace.c index 22e536705c45..2ffc5a2905d4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -23,11 +23,34 @@ | |||
| 23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
| 24 | #include <linux/proc_ns.h> | 24 | #include <linux/proc_ns.h> |
| 25 | #include <linux/magic.h> | 25 | #include <linux/magic.h> |
| 26 | #include <linux/bootmem.h> | ||
| 26 | #include "pnode.h" | 27 | #include "pnode.h" |
| 27 | #include "internal.h" | 28 | #include "internal.h" |
| 28 | 29 | ||
| 29 | #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) | 30 | static unsigned int m_hash_mask __read_mostly; |
| 30 | #define HASH_SIZE (1UL << HASH_SHIFT) | 31 | static unsigned int m_hash_shift __read_mostly; |
| 32 | static unsigned int mp_hash_mask __read_mostly; | ||
| 33 | static unsigned int mp_hash_shift __read_mostly; | ||
| 34 | |||
| 35 | static __initdata unsigned long mhash_entries; | ||
| 36 | static int __init set_mhash_entries(char *str) | ||
| 37 | { | ||
| 38 | if (!str) | ||
| 39 | return 0; | ||
| 40 | mhash_entries = simple_strtoul(str, &str, 0); | ||
| 41 | return 1; | ||
| 42 | } | ||
| 43 | __setup("mhash_entries=", set_mhash_entries); | ||
| 44 | |||
| 45 | static __initdata unsigned long mphash_entries; | ||
| 46 | static int __init set_mphash_entries(char *str) | ||
| 47 | { | ||
| 48 | if (!str) | ||
| 49 | return 0; | ||
| 50 | mphash_entries = simple_strtoul(str, &str, 0); | ||
| 51 | return 1; | ||
| 52 | } | ||
| 53 | __setup("mphash_entries=", set_mphash_entries); | ||
| 31 | 54 | ||
| 32 | static int event; | 55 | static int event; |
| 33 | static DEFINE_IDA(mnt_id_ida); | 56 | static DEFINE_IDA(mnt_id_ida); |
| @@ -36,8 +59,8 @@ static DEFINE_SPINLOCK(mnt_id_lock); | |||
| 36 | static int mnt_id_start = 0; | 59 | static int mnt_id_start = 0; |
| 37 | static int mnt_group_start = 1; | 60 | static int mnt_group_start = 1; |
| 38 | 61 | ||
| 39 | static struct list_head *mount_hashtable __read_mostly; | 62 | static struct hlist_head *mount_hashtable __read_mostly; |
| 40 | static struct list_head *mountpoint_hashtable __read_mostly; | 63 | static struct hlist_head *mountpoint_hashtable __read_mostly; |
| 41 | static struct kmem_cache *mnt_cache __read_mostly; | 64 | static struct kmem_cache *mnt_cache __read_mostly; |
| 42 | static DECLARE_RWSEM(namespace_sem); | 65 | static DECLARE_RWSEM(namespace_sem); |
| 43 | 66 | ||
| @@ -55,12 +78,19 @@ EXPORT_SYMBOL_GPL(fs_kobj); | |||
| 55 | */ | 78 | */ |
| 56 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); | 79 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); |
| 57 | 80 | ||
| 58 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 81 | static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) |
| 59 | { | 82 | { |
| 60 | unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); | 83 | unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); |
| 61 | tmp += ((unsigned long)dentry / L1_CACHE_BYTES); | 84 | tmp += ((unsigned long)dentry / L1_CACHE_BYTES); |
| 62 | tmp = tmp + (tmp >> HASH_SHIFT); | 85 | tmp = tmp + (tmp >> m_hash_shift); |
| 63 | return tmp & (HASH_SIZE - 1); | 86 | return &mount_hashtable[tmp & m_hash_mask]; |
| 87 | } | ||
| 88 | |||
| 89 | static inline struct hlist_head *mp_hash(struct dentry *dentry) | ||
| 90 | { | ||
| 91 | unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); | ||
| 92 | tmp = tmp + (tmp >> mp_hash_shift); | ||
| 93 | return &mountpoint_hashtable[tmp & mp_hash_mask]; | ||
| 64 | } | 94 | } |
| 65 | 95 | ||
| 66 | /* | 96 | /* |
| @@ -187,7 +217,7 @@ static struct mount *alloc_vfsmnt(const char *name) | |||
| 187 | mnt->mnt_writers = 0; | 217 | mnt->mnt_writers = 0; |
| 188 | #endif | 218 | #endif |
| 189 | 219 | ||
| 190 | INIT_LIST_HEAD(&mnt->mnt_hash); | 220 | INIT_HLIST_NODE(&mnt->mnt_hash); |
| 191 | INIT_LIST_HEAD(&mnt->mnt_child); | 221 | INIT_LIST_HEAD(&mnt->mnt_child); |
| 192 | INIT_LIST_HEAD(&mnt->mnt_mounts); | 222 | INIT_LIST_HEAD(&mnt->mnt_mounts); |
| 193 | INIT_LIST_HEAD(&mnt->mnt_list); | 223 | INIT_LIST_HEAD(&mnt->mnt_list); |
| @@ -575,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) | |||
| 575 | */ | 605 | */ |
| 576 | struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | 606 | struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) |
| 577 | { | 607 | { |
| 578 | struct list_head *head = mount_hashtable + hash(mnt, dentry); | 608 | struct hlist_head *head = m_hash(mnt, dentry); |
| 579 | struct mount *p; | 609 | struct mount *p; |
| 580 | 610 | ||
| 581 | list_for_each_entry_rcu(p, head, mnt_hash) | 611 | hlist_for_each_entry_rcu(p, head, mnt_hash) |
| 582 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) | 612 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) |
| 583 | return p; | 613 | return p; |
| 584 | return NULL; | 614 | return NULL; |
| @@ -590,13 +620,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | |||
| 590 | */ | 620 | */ |
| 591 | struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) | 621 | struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) |
| 592 | { | 622 | { |
| 593 | struct list_head *head = mount_hashtable + hash(mnt, dentry); | 623 | struct mount *p, *res; |
| 594 | struct mount *p; | 624 | res = p = __lookup_mnt(mnt, dentry); |
| 595 | 625 | if (!p) | |
| 596 | list_for_each_entry_reverse(p, head, mnt_hash) | 626 | goto out; |
| 597 | if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) | 627 | hlist_for_each_entry_continue(p, mnt_hash) { |
| 598 | return p; | 628 | if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) |
| 599 | return NULL; | 629 | break; |
| 630 | res = p; | ||
| 631 | } | ||
| 632 | out: | ||
| 633 | return res; | ||
| 600 | } | 634 | } |
| 601 | 635 | ||
| 602 | /* | 636 | /* |
| @@ -633,11 +667,11 @@ struct vfsmount *lookup_mnt(struct path *path) | |||
| 633 | 667 | ||
| 634 | static struct mountpoint *new_mountpoint(struct dentry *dentry) | 668 | static struct mountpoint *new_mountpoint(struct dentry *dentry) |
| 635 | { | 669 | { |
| 636 | struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); | 670 | struct hlist_head *chain = mp_hash(dentry); |
| 637 | struct mountpoint *mp; | 671 | struct mountpoint *mp; |
| 638 | int ret; | 672 | int ret; |
| 639 | 673 | ||
| 640 | list_for_each_entry(mp, chain, m_hash) { | 674 | hlist_for_each_entry(mp, chain, m_hash) { |
| 641 | if (mp->m_dentry == dentry) { | 675 | if (mp->m_dentry == dentry) { |
| 642 | /* might be worth a WARN_ON() */ | 676 | /* might be worth a WARN_ON() */ |
| 643 | if (d_unlinked(dentry)) | 677 | if (d_unlinked(dentry)) |
| @@ -659,7 +693,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) | |||
| 659 | 693 | ||
| 660 | mp->m_dentry = dentry; | 694 | mp->m_dentry = dentry; |
| 661 | mp->m_count = 1; | 695 | mp->m_count = 1; |
| 662 | list_add(&mp->m_hash, chain); | 696 | hlist_add_head(&mp->m_hash, chain); |
| 663 | return mp; | 697 | return mp; |
| 664 | } | 698 | } |
| 665 | 699 | ||
| @@ -670,7 +704,7 @@ static void put_mountpoint(struct mountpoint *mp) | |||
| 670 | spin_lock(&dentry->d_lock); | 704 | spin_lock(&dentry->d_lock); |
| 671 | dentry->d_flags &= ~DCACHE_MOUNTED; | 705 | dentry->d_flags &= ~DCACHE_MOUNTED; |
| 672 | spin_unlock(&dentry->d_lock); | 706 | spin_unlock(&dentry->d_lock); |
| 673 | list_del(&mp->m_hash); | 707 | hlist_del(&mp->m_hash); |
| 674 | kfree(mp); | 708 | kfree(mp); |
| 675 | } | 709 | } |
| 676 | } | 710 | } |
| @@ -712,7 +746,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) | |||
| 712 | mnt->mnt_parent = mnt; | 746 | mnt->mnt_parent = mnt; |
| 713 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 747 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
| 714 | list_del_init(&mnt->mnt_child); | 748 | list_del_init(&mnt->mnt_child); |
| 715 | list_del_init(&mnt->mnt_hash); | 749 | hlist_del_init_rcu(&mnt->mnt_hash); |
| 716 | put_mountpoint(mnt->mnt_mp); | 750 | put_mountpoint(mnt->mnt_mp); |
| 717 | mnt->mnt_mp = NULL; | 751 | mnt->mnt_mp = NULL; |
| 718 | } | 752 | } |
| @@ -739,15 +773,14 @@ static void attach_mnt(struct mount *mnt, | |||
| 739 | struct mountpoint *mp) | 773 | struct mountpoint *mp) |
| 740 | { | 774 | { |
| 741 | mnt_set_mountpoint(parent, mp, mnt); | 775 | mnt_set_mountpoint(parent, mp, mnt); |
| 742 | list_add_tail(&mnt->mnt_hash, mount_hashtable + | 776 | hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); |
| 743 | hash(&parent->mnt, mp->m_dentry)); | ||
| 744 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); | 777 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); |
| 745 | } | 778 | } |
| 746 | 779 | ||
| 747 | /* | 780 | /* |
| 748 | * vfsmount lock must be held for write | 781 | * vfsmount lock must be held for write |
| 749 | */ | 782 | */ |
| 750 | static void commit_tree(struct mount *mnt) | 783 | static void commit_tree(struct mount *mnt, struct mount *shadows) |
| 751 | { | 784 | { |
| 752 | struct mount *parent = mnt->mnt_parent; | 785 | struct mount *parent = mnt->mnt_parent; |
| 753 | struct mount *m; | 786 | struct mount *m; |
| @@ -762,8 +795,11 @@ static void commit_tree(struct mount *mnt) | |||
| 762 | 795 | ||
| 763 | list_splice(&head, n->list.prev); | 796 | list_splice(&head, n->list.prev); |
| 764 | 797 | ||
| 765 | list_add_tail(&mnt->mnt_hash, mount_hashtable + | 798 | if (shadows) |
| 766 | hash(&parent->mnt, mnt->mnt_mountpoint)); | 799 | hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); |
| 800 | else | ||
| 801 | hlist_add_head_rcu(&mnt->mnt_hash, | ||
| 802 | m_hash(&parent->mnt, mnt->mnt_mountpoint)); | ||
| 767 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); | 803 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); |
| 768 | touch_mnt_namespace(n); | 804 | touch_mnt_namespace(n); |
| 769 | } | 805 | } |
| @@ -1153,26 +1189,28 @@ int may_umount(struct vfsmount *mnt) | |||
| 1153 | 1189 | ||
| 1154 | EXPORT_SYMBOL(may_umount); | 1190 | EXPORT_SYMBOL(may_umount); |
| 1155 | 1191 | ||
| 1156 | static LIST_HEAD(unmounted); /* protected by namespace_sem */ | 1192 | static HLIST_HEAD(unmounted); /* protected by namespace_sem */ |
| 1157 | 1193 | ||
| 1158 | static void namespace_unlock(void) | 1194 | static void namespace_unlock(void) |
| 1159 | { | 1195 | { |
| 1160 | struct mount *mnt; | 1196 | struct mount *mnt; |
| 1161 | LIST_HEAD(head); | 1197 | struct hlist_head head = unmounted; |
| 1162 | 1198 | ||
| 1163 | if (likely(list_empty(&unmounted))) { | 1199 | if (likely(hlist_empty(&head))) { |
| 1164 | up_write(&namespace_sem); | 1200 | up_write(&namespace_sem); |
| 1165 | return; | 1201 | return; |
| 1166 | } | 1202 | } |
| 1167 | 1203 | ||
| 1168 | list_splice_init(&unmounted, &head); | 1204 | head.first->pprev = &head.first; |
| 1205 | INIT_HLIST_HEAD(&unmounted); | ||
| 1206 | |||
| 1169 | up_write(&namespace_sem); | 1207 | up_write(&namespace_sem); |
| 1170 | 1208 | ||
| 1171 | synchronize_rcu(); | 1209 | synchronize_rcu(); |
| 1172 | 1210 | ||
| 1173 | while (!list_empty(&head)) { | 1211 | while (!hlist_empty(&head)) { |
| 1174 | mnt = list_first_entry(&head, struct mount, mnt_hash); | 1212 | mnt = hlist_entry(head.first, struct mount, mnt_hash); |
| 1175 | list_del_init(&mnt->mnt_hash); | 1213 | hlist_del_init(&mnt->mnt_hash); |
| 1176 | if (mnt->mnt_ex_mountpoint.mnt) | 1214 | if (mnt->mnt_ex_mountpoint.mnt) |
| 1177 | path_put(&mnt->mnt_ex_mountpoint); | 1215 | path_put(&mnt->mnt_ex_mountpoint); |
| 1178 | mntput(&mnt->mnt); | 1216 | mntput(&mnt->mnt); |
| @@ -1193,16 +1231,19 @@ static inline void namespace_lock(void) | |||
| 1193 | */ | 1231 | */ |
| 1194 | void umount_tree(struct mount *mnt, int how) | 1232 | void umount_tree(struct mount *mnt, int how) |
| 1195 | { | 1233 | { |
| 1196 | LIST_HEAD(tmp_list); | 1234 | HLIST_HEAD(tmp_list); |
| 1197 | struct mount *p; | 1235 | struct mount *p; |
| 1236 | struct mount *last = NULL; | ||
| 1198 | 1237 | ||
| 1199 | for (p = mnt; p; p = next_mnt(p, mnt)) | 1238 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
| 1200 | list_move(&p->mnt_hash, &tmp_list); | 1239 | hlist_del_init_rcu(&p->mnt_hash); |
| 1240 | hlist_add_head(&p->mnt_hash, &tmp_list); | ||
| 1241 | } | ||
| 1201 | 1242 | ||
| 1202 | if (how) | 1243 | if (how) |
| 1203 | propagate_umount(&tmp_list); | 1244 | propagate_umount(&tmp_list); |
| 1204 | 1245 | ||
| 1205 | list_for_each_entry(p, &tmp_list, mnt_hash) { | 1246 | hlist_for_each_entry(p, &tmp_list, mnt_hash) { |
| 1206 | list_del_init(&p->mnt_expire); | 1247 | list_del_init(&p->mnt_expire); |
| 1207 | list_del_init(&p->mnt_list); | 1248 | list_del_init(&p->mnt_list); |
| 1208 | __touch_mnt_namespace(p->mnt_ns); | 1249 | __touch_mnt_namespace(p->mnt_ns); |
| @@ -1220,8 +1261,13 @@ void umount_tree(struct mount *mnt, int how) | |||
| 1220 | p->mnt_mp = NULL; | 1261 | p->mnt_mp = NULL; |
| 1221 | } | 1262 | } |
| 1222 | change_mnt_propagation(p, MS_PRIVATE); | 1263 | change_mnt_propagation(p, MS_PRIVATE); |
| 1264 | last = p; | ||
| 1265 | } | ||
| 1266 | if (last) { | ||
| 1267 | last->mnt_hash.next = unmounted.first; | ||
| 1268 | unmounted.first = tmp_list.first; | ||
| 1269 | unmounted.first->pprev = &unmounted.first; | ||
| 1223 | } | 1270 | } |
| 1224 | list_splice(&tmp_list, &unmounted); | ||
| 1225 | } | 1271 | } |
| 1226 | 1272 | ||
| 1227 | static void shrink_submounts(struct mount *mnt); | 1273 | static void shrink_submounts(struct mount *mnt); |
| @@ -1605,24 +1651,23 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
| 1605 | struct mountpoint *dest_mp, | 1651 | struct mountpoint *dest_mp, |
| 1606 | struct path *parent_path) | 1652 | struct path *parent_path) |
| 1607 | { | 1653 | { |
| 1608 | LIST_HEAD(tree_list); | 1654 | HLIST_HEAD(tree_list); |
| 1609 | struct mount *child, *p; | 1655 | struct mount *child, *p; |
| 1656 | struct hlist_node *n; | ||
| 1610 | int err; | 1657 | int err; |
| 1611 | 1658 | ||
| 1612 | if (IS_MNT_SHARED(dest_mnt)) { | 1659 | if (IS_MNT_SHARED(dest_mnt)) { |
| 1613 | err = invent_group_ids(source_mnt, true); | 1660 | err = invent_group_ids(source_mnt, true); |
| 1614 | if (err) | 1661 | if (err) |
| 1615 | goto out; | 1662 | goto out; |
| 1616 | } | 1663 | err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); |
| 1617 | err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); | 1664 | if (err) |
| 1618 | if (err) | 1665 | goto out_cleanup_ids; |
| 1619 | goto out_cleanup_ids; | 1666 | lock_mount_hash(); |
| 1620 | |||
| 1621 | lock_mount_hash(); | ||
| 1622 | |||
| 1623 | if (IS_MNT_SHARED(dest_mnt)) { | ||
| 1624 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | 1667 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) |
| 1625 | set_mnt_shared(p); | 1668 | set_mnt_shared(p); |
| 1669 | } else { | ||
| 1670 | lock_mount_hash(); | ||
| 1626 | } | 1671 | } |
| 1627 | if (parent_path) { | 1672 | if (parent_path) { |
| 1628 | detach_mnt(source_mnt, parent_path); | 1673 | detach_mnt(source_mnt, parent_path); |
| @@ -1630,20 +1675,22 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
| 1630 | touch_mnt_namespace(source_mnt->mnt_ns); | 1675 | touch_mnt_namespace(source_mnt->mnt_ns); |
| 1631 | } else { | 1676 | } else { |
| 1632 | mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); | 1677 | mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); |
| 1633 | commit_tree(source_mnt); | 1678 | commit_tree(source_mnt, NULL); |
| 1634 | } | 1679 | } |
| 1635 | 1680 | ||
| 1636 | list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { | 1681 | hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { |
| 1637 | list_del_init(&child->mnt_hash); | 1682 | struct mount *q; |
| 1638 | commit_tree(child); | 1683 | hlist_del_init(&child->mnt_hash); |
| 1684 | q = __lookup_mnt_last(&child->mnt_parent->mnt, | ||
| 1685 | child->mnt_mountpoint); | ||
| 1686 | commit_tree(child, q); | ||
| 1639 | } | 1687 | } |
| 1640 | unlock_mount_hash(); | 1688 | unlock_mount_hash(); |
| 1641 | 1689 | ||
| 1642 | return 0; | 1690 | return 0; |
| 1643 | 1691 | ||
| 1644 | out_cleanup_ids: | 1692 | out_cleanup_ids: |
| 1645 | if (IS_MNT_SHARED(dest_mnt)) | 1693 | cleanup_group_ids(source_mnt, NULL); |
| 1646 | cleanup_group_ids(source_mnt, NULL); | ||
| 1647 | out: | 1694 | out: |
| 1648 | return err; | 1695 | return err; |
| 1649 | } | 1696 | } |
| @@ -2777,18 +2824,24 @@ void __init mnt_init(void) | |||
| 2777 | mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), | 2824 | mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), |
| 2778 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | 2825 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
| 2779 | 2826 | ||
| 2780 | mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); | 2827 | mount_hashtable = alloc_large_system_hash("Mount-cache", |
| 2781 | mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); | 2828 | sizeof(struct hlist_head), |
| 2829 | mhash_entries, 19, | ||
| 2830 | 0, | ||
| 2831 | &m_hash_shift, &m_hash_mask, 0, 0); | ||
| 2832 | mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", | ||
| 2833 | sizeof(struct hlist_head), | ||
| 2834 | mphash_entries, 19, | ||
| 2835 | 0, | ||
| 2836 | &mp_hash_shift, &mp_hash_mask, 0, 0); | ||
| 2782 | 2837 | ||
| 2783 | if (!mount_hashtable || !mountpoint_hashtable) | 2838 | if (!mount_hashtable || !mountpoint_hashtable) |
| 2784 | panic("Failed to allocate mount hash table\n"); | 2839 | panic("Failed to allocate mount hash table\n"); |
| 2785 | 2840 | ||
| 2786 | printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); | 2841 | for (u = 0; u <= m_hash_mask; u++) |
| 2787 | 2842 | INIT_HLIST_HEAD(&mount_hashtable[u]); | |
| 2788 | for (u = 0; u < HASH_SIZE; u++) | 2843 | for (u = 0; u <= mp_hash_mask; u++) |
| 2789 | INIT_LIST_HEAD(&mount_hashtable[u]); | 2844 | INIT_HLIST_HEAD(&mountpoint_hashtable[u]); |
| 2790 | for (u = 0; u < HASH_SIZE; u++) | ||
| 2791 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); | ||
| 2792 | 2845 | ||
| 2793 | kernfs_init(); | 2846 | kernfs_init(); |
| 2794 | 2847 | ||
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 2cf2ebecb55f..647d86d2db39 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
| @@ -99,6 +99,7 @@ static void destroy_inodecache(void) | |||
| 99 | 99 | ||
| 100 | static int ncp_remount(struct super_block *sb, int *flags, char* data) | 100 | static int ncp_remount(struct super_block *sb, int *flags, char* data) |
| 101 | { | 101 | { |
| 102 | sync_filesystem(sb); | ||
| 102 | *flags |= MS_NODIRATIME; | 103 | *flags |= MS_NODIRATIME; |
| 103 | return 0; | 104 | return 0; |
| 104 | } | 105 | } |
| @@ -296,7 +297,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) | |||
| 296 | static void | 297 | static void |
| 297 | ncp_evict_inode(struct inode *inode) | 298 | ncp_evict_inode(struct inode *inode) |
| 298 | { | 299 | { |
| 299 | truncate_inode_pages(&inode->i_data, 0); | 300 | truncate_inode_pages_final(&inode->i_data); |
| 300 | clear_inode(inode); | 301 | clear_inode(inode); |
| 301 | 302 | ||
| 302 | if (S_ISDIR(inode->i_mode)) { | 303 | if (S_ISDIR(inode->i_mode)) { |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 56ff823ca82e..65d849bdf77a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
| @@ -1213,7 +1213,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) | |||
| 1213 | end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); | 1213 | end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); |
| 1214 | if (end != NFS_I(inode)->npages) { | 1214 | if (end != NFS_I(inode)->npages) { |
| 1215 | rcu_read_lock(); | 1215 | rcu_read_lock(); |
| 1216 | end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); | 1216 | end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); |
| 1217 | rcu_read_unlock(); | 1217 | rcu_read_unlock(); |
| 1218 | } | 1218 | } |
| 1219 | 1219 | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ae2e87b95453..41db5258e7a7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
| @@ -112,7 +112,8 @@ out: | |||
| 112 | * TODO: keep track of all layouts (and delegations) in a hash table | 112 | * TODO: keep track of all layouts (and delegations) in a hash table |
| 113 | * hashed by filehandle. | 113 | * hashed by filehandle. |
| 114 | */ | 114 | */ |
| 115 | static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) | 115 | static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, |
| 116 | struct nfs_fh *fh, nfs4_stateid *stateid) | ||
| 116 | { | 117 | { |
| 117 | struct nfs_server *server; | 118 | struct nfs_server *server; |
| 118 | struct inode *ino; | 119 | struct inode *ino; |
| @@ -120,17 +121,19 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, | |||
| 120 | 121 | ||
| 121 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 122 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
| 122 | list_for_each_entry(lo, &server->layouts, plh_layouts) { | 123 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
| 124 | if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid)) | ||
| 125 | continue; | ||
| 123 | if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) | 126 | if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) |
| 124 | continue; | 127 | continue; |
| 125 | ino = igrab(lo->plh_inode); | 128 | ino = igrab(lo->plh_inode); |
| 126 | if (!ino) | 129 | if (!ino) |
| 127 | continue; | 130 | break; |
| 128 | spin_lock(&ino->i_lock); | 131 | spin_lock(&ino->i_lock); |
| 129 | /* Is this layout in the process of being freed? */ | 132 | /* Is this layout in the process of being freed? */ |
| 130 | if (NFS_I(ino)->layout != lo) { | 133 | if (NFS_I(ino)->layout != lo) { |
| 131 | spin_unlock(&ino->i_lock); | 134 | spin_unlock(&ino->i_lock); |
| 132 | iput(ino); | 135 | iput(ino); |
| 133 | continue; | 136 | break; |
| 134 | } | 137 | } |
| 135 | pnfs_get_layout_hdr(lo); | 138 | pnfs_get_layout_hdr(lo); |
| 136 | spin_unlock(&ino->i_lock); | 139 | spin_unlock(&ino->i_lock); |
| @@ -141,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, | |||
| 141 | return NULL; | 144 | return NULL; |
| 142 | } | 145 | } |
| 143 | 146 | ||
| 144 | static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) | 147 | static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, |
| 148 | struct nfs_fh *fh, nfs4_stateid *stateid) | ||
| 145 | { | 149 | { |
| 146 | struct pnfs_layout_hdr *lo; | 150 | struct pnfs_layout_hdr *lo; |
| 147 | 151 | ||
| 148 | spin_lock(&clp->cl_lock); | 152 | spin_lock(&clp->cl_lock); |
| 149 | rcu_read_lock(); | 153 | rcu_read_lock(); |
| 150 | lo = get_layout_by_fh_locked(clp, fh); | 154 | lo = get_layout_by_fh_locked(clp, fh, stateid); |
| 151 | rcu_read_unlock(); | 155 | rcu_read_unlock(); |
| 152 | spin_unlock(&clp->cl_lock); | 156 | spin_unlock(&clp->cl_lock); |
| 153 | 157 | ||
| @@ -162,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
| 162 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; | 166 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; |
| 163 | LIST_HEAD(free_me_list); | 167 | LIST_HEAD(free_me_list); |
| 164 | 168 | ||
| 165 | lo = get_layout_by_fh(clp, &args->cbl_fh); | 169 | lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); |
| 166 | if (!lo) | 170 | if (!lo) |
| 167 | return NFS4ERR_NOMATCHING_LAYOUT; | 171 | goto out; |
| 168 | 172 | ||
| 169 | ino = lo->plh_inode; | 173 | ino = lo->plh_inode; |
| 170 | spin_lock(&ino->i_lock); | 174 | spin_lock(&ino->i_lock); |
| @@ -179,6 +183,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
| 179 | pnfs_free_lseg_list(&free_me_list); | 183 | pnfs_free_lseg_list(&free_me_list); |
| 180 | pnfs_put_layout_hdr(lo); | 184 | pnfs_put_layout_hdr(lo); |
| 181 | iput(ino); | 185 | iput(ino); |
| 186 | out: | ||
| 182 | return rv; | 187 | return rv; |
| 183 | } | 188 | } |
| 184 | 189 | ||
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ef792f29f831..5d8ccecf5f5c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
| @@ -659,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, | |||
| 659 | 659 | ||
| 660 | rcu_read_lock(); | 660 | rcu_read_lock(); |
| 661 | delegation = rcu_dereference(NFS_I(inode)->delegation); | 661 | delegation = rcu_dereference(NFS_I(inode)->delegation); |
| 662 | if (delegation == NULL) | ||
| 663 | goto out_enoent; | ||
| 662 | 664 | ||
| 663 | if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { | 665 | if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) |
| 664 | rcu_read_unlock(); | 666 | goto out_enoent; |
| 665 | return -ENOENT; | ||
| 666 | } | ||
| 667 | nfs_mark_return_delegation(server, delegation); | 667 | nfs_mark_return_delegation(server, delegation); |
| 668 | rcu_read_unlock(); | 668 | rcu_read_unlock(); |
| 669 | 669 | ||
| 670 | nfs_delegation_run_state_manager(clp); | 670 | nfs_delegation_run_state_manager(clp); |
| 671 | return 0; | 671 | return 0; |
| 672 | out_enoent: | ||
| 673 | rcu_read_unlock(); | ||
| 674 | return -ENOENT; | ||
| 672 | } | 675 | } |
| 673 | 676 | ||
| 674 | static struct inode * | 677 | static struct inode * |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4a48fe4b84b6..d9f3d067cd15 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -69,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = { | |||
| 69 | 69 | ||
| 70 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) | 70 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) |
| 71 | { | 71 | { |
| 72 | struct nfs_inode *nfsi = NFS_I(dir); | ||
| 72 | struct nfs_open_dir_context *ctx; | 73 | struct nfs_open_dir_context *ctx; |
| 73 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 74 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
| 74 | if (ctx != NULL) { | 75 | if (ctx != NULL) { |
| 75 | ctx->duped = 0; | 76 | ctx->duped = 0; |
| 76 | ctx->attr_gencount = NFS_I(dir)->attr_gencount; | 77 | ctx->attr_gencount = nfsi->attr_gencount; |
| 77 | ctx->dir_cookie = 0; | 78 | ctx->dir_cookie = 0; |
| 78 | ctx->dup_cookie = 0; | 79 | ctx->dup_cookie = 0; |
| 79 | ctx->cred = get_rpccred(cred); | 80 | ctx->cred = get_rpccred(cred); |
| 81 | spin_lock(&dir->i_lock); | ||
| 82 | list_add(&ctx->list, &nfsi->open_files); | ||
| 83 | spin_unlock(&dir->i_lock); | ||
| 80 | return ctx; | 84 | return ctx; |
| 81 | } | 85 | } |
| 82 | return ERR_PTR(-ENOMEM); | 86 | return ERR_PTR(-ENOMEM); |
| 83 | } | 87 | } |
| 84 | 88 | ||
| 85 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) | 89 | static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx) |
| 86 | { | 90 | { |
| 91 | spin_lock(&dir->i_lock); | ||
| 92 | list_del(&ctx->list); | ||
| 93 | spin_unlock(&dir->i_lock); | ||
| 87 | put_rpccred(ctx->cred); | 94 | put_rpccred(ctx->cred); |
| 88 | kfree(ctx); | 95 | kfree(ctx); |
| 89 | } | 96 | } |
| @@ -126,7 +133,7 @@ out: | |||
| 126 | static int | 133 | static int |
| 127 | nfs_closedir(struct inode *inode, struct file *filp) | 134 | nfs_closedir(struct inode *inode, struct file *filp) |
| 128 | { | 135 | { |
| 129 | put_nfs_open_dir_context(filp->private_data); | 136 | put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data); |
| 130 | return 0; | 137 | return 0; |
| 131 | } | 138 | } |
| 132 | 139 | ||
| @@ -306,10 +313,9 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
| 306 | if (printk_ratelimit()) { | 313 | if (printk_ratelimit()) { |
| 307 | pr_notice("NFS: directory %pD2 contains a readdir loop." | 314 | pr_notice("NFS: directory %pD2 contains a readdir loop." |
| 308 | "Please contact your server vendor. " | 315 | "Please contact your server vendor. " |
| 309 | "The file: %s has duplicate cookie %llu\n", | 316 | "The file: %.*s has duplicate cookie %llu\n", |
| 310 | desc->file, | 317 | desc->file, array->array[i].string.len, |
| 311 | array->array[i].string.name, | 318 | array->array[i].string.name, *desc->dir_cookie); |
| 312 | *desc->dir_cookie); | ||
| 313 | } | 319 | } |
| 314 | status = -ELOOP; | 320 | status = -ELOOP; |
| 315 | goto out; | 321 | goto out; |
| @@ -437,6 +443,22 @@ void nfs_advise_use_readdirplus(struct inode *dir) | |||
| 437 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); | 443 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); |
| 438 | } | 444 | } |
| 439 | 445 | ||
| 446 | /* | ||
| 447 | * This function is mainly for use by nfs_getattr(). | ||
| 448 | * | ||
| 449 | * If this is an 'ls -l', we want to force use of readdirplus. | ||
| 450 | * Do this by checking if there is an active file descriptor | ||
| 451 | * and calling nfs_advise_use_readdirplus, then forcing a | ||
| 452 | * cache flush. | ||
| 453 | */ | ||
| 454 | void nfs_force_use_readdirplus(struct inode *dir) | ||
| 455 | { | ||
| 456 | if (!list_empty(&NFS_I(dir)->open_files)) { | ||
| 457 | nfs_advise_use_readdirplus(dir); | ||
| 458 | nfs_zap_mapping(dir, dir->i_mapping); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 440 | static | 462 | static |
| 441 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | 463 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) |
| 442 | { | 464 | { |
| @@ -815,6 +837,17 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) | |||
| 815 | goto out; | 837 | goto out; |
| 816 | } | 838 | } |
| 817 | 839 | ||
| 840 | static bool nfs_dir_mapping_need_revalidate(struct inode *dir) | ||
| 841 | { | ||
| 842 | struct nfs_inode *nfsi = NFS_I(dir); | ||
| 843 | |||
| 844 | if (nfs_attribute_cache_expired(dir)) | ||
| 845 | return true; | ||
| 846 | if (nfsi->cache_validity & NFS_INO_INVALID_DATA) | ||
| 847 | return true; | ||
| 848 | return false; | ||
| 849 | } | ||
| 850 | |||
| 818 | /* The file offset position represents the dirent entry number. A | 851 | /* The file offset position represents the dirent entry number. A |
| 819 | last cookie cache takes care of the common case of reading the | 852 | last cookie cache takes care of the common case of reading the |
| 820 | whole directory. | 853 | whole directory. |
| @@ -847,7 +880,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) | |||
| 847 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; | 880 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; |
| 848 | 881 | ||
| 849 | nfs_block_sillyrename(dentry); | 882 | nfs_block_sillyrename(dentry); |
| 850 | if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) | 883 | if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) |
| 851 | res = nfs_revalidate_mapping(inode, file->f_mapping); | 884 | res = nfs_revalidate_mapping(inode, file->f_mapping); |
| 852 | if (res < 0) | 885 | if (res < 0) |
| 853 | goto out; | 886 | goto out; |
| @@ -1911,6 +1944,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1911 | struct inode *old_inode = old_dentry->d_inode; | 1944 | struct inode *old_inode = old_dentry->d_inode; |
| 1912 | struct inode *new_inode = new_dentry->d_inode; | 1945 | struct inode *new_inode = new_dentry->d_inode; |
| 1913 | struct dentry *dentry = NULL, *rehash = NULL; | 1946 | struct dentry *dentry = NULL, *rehash = NULL; |
| 1947 | struct rpc_task *task; | ||
| 1914 | int error = -EBUSY; | 1948 | int error = -EBUSY; |
| 1915 | 1949 | ||
| 1916 | dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", | 1950 | dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", |
| @@ -1958,8 +1992,16 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1958 | if (new_inode != NULL) | 1992 | if (new_inode != NULL) |
| 1959 | NFS_PROTO(new_inode)->return_delegation(new_inode); | 1993 | NFS_PROTO(new_inode)->return_delegation(new_inode); |
| 1960 | 1994 | ||
| 1961 | error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, | 1995 | task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); |
| 1962 | new_dir, &new_dentry->d_name); | 1996 | if (IS_ERR(task)) { |
| 1997 | error = PTR_ERR(task); | ||
| 1998 | goto out; | ||
| 1999 | } | ||
| 2000 | |||
| 2001 | error = rpc_wait_for_completion_task(task); | ||
| 2002 | if (error == 0) | ||
| 2003 | error = task->tk_status; | ||
| 2004 | rpc_put_task(task); | ||
| 1963 | nfs_mark_for_revalidate(old_inode); | 2005 | nfs_mark_for_revalidate(old_inode); |
| 1964 | out: | 2006 | out: |
| 1965 | if (rehash) | 2007 | if (rehash) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 360114ae8b82..0c438973f3c8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -128,7 +128,7 @@ EXPORT_SYMBOL_GPL(nfs_clear_inode); | |||
| 128 | 128 | ||
| 129 | void nfs_evict_inode(struct inode *inode) | 129 | void nfs_evict_inode(struct inode *inode) |
| 130 | { | 130 | { |
| 131 | truncate_inode_pages(&inode->i_data, 0); | 131 | truncate_inode_pages_final(&inode->i_data); |
| 132 | clear_inode(inode); | 132 | clear_inode(inode); |
| 133 | nfs_clear_inode(inode); | 133 | nfs_clear_inode(inode); |
| 134 | } | 134 | } |
| @@ -588,6 +588,25 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
| 588 | } | 588 | } |
| 589 | EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); | 589 | EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); |
| 590 | 590 | ||
| 591 | static void nfs_request_parent_use_readdirplus(struct dentry *dentry) | ||
| 592 | { | ||
| 593 | struct dentry *parent; | ||
| 594 | |||
| 595 | parent = dget_parent(dentry); | ||
| 596 | nfs_force_use_readdirplus(parent->d_inode); | ||
| 597 | dput(parent); | ||
| 598 | } | ||
| 599 | |||
| 600 | static bool nfs_need_revalidate_inode(struct inode *inode) | ||
| 601 | { | ||
| 602 | if (NFS_I(inode)->cache_validity & | ||
| 603 | (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) | ||
| 604 | return true; | ||
| 605 | if (nfs_attribute_cache_expired(inode)) | ||
| 606 | return true; | ||
| 607 | return false; | ||
| 608 | } | ||
| 609 | |||
| 591 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 610 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
| 592 | { | 611 | { |
| 593 | struct inode *inode = dentry->d_inode; | 612 | struct inode *inode = dentry->d_inode; |
| @@ -616,10 +635,13 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
| 616 | ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) | 635 | ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) |
| 617 | need_atime = 0; | 636 | need_atime = 0; |
| 618 | 637 | ||
| 619 | if (need_atime) | 638 | if (need_atime || nfs_need_revalidate_inode(inode)) { |
| 620 | err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); | 639 | struct nfs_server *server = NFS_SERVER(inode); |
| 621 | else | 640 | |
| 622 | err = nfs_revalidate_inode(NFS_SERVER(inode), inode); | 641 | if (server->caps & NFS_CAP_READDIRPLUS) |
| 642 | nfs_request_parent_use_readdirplus(dentry); | ||
| 643 | err = __nfs_revalidate_inode(server, inode); | ||
| 644 | } | ||
| 623 | if (!err) { | 645 | if (!err) { |
| 624 | generic_fillattr(inode, stat); | 646 | generic_fillattr(inode, stat); |
| 625 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); | 647 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); |
| @@ -961,9 +983,7 @@ int nfs_attribute_cache_expired(struct inode *inode) | |||
| 961 | */ | 983 | */ |
| 962 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | 984 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) |
| 963 | { | 985 | { |
| 964 | if (!(NFS_I(inode)->cache_validity & | 986 | if (!nfs_need_revalidate_inode(inode)) |
| 965 | (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) | ||
| 966 | && !nfs_attribute_cache_expired(inode)) | ||
| 967 | return NFS_STALE(inode) ? -ESTALE : 0; | 987 | return NFS_STALE(inode) ? -ESTALE : 0; |
| 968 | return __nfs_revalidate_inode(server, inode); | 988 | return __nfs_revalidate_inode(server, inode); |
| 969 | } | 989 | } |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b46cf5a67329..dd8bfc2e2464 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -301,6 +301,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, | |||
| 301 | const char *ip_addr); | 301 | const char *ip_addr); |
| 302 | 302 | ||
| 303 | /* dir.c */ | 303 | /* dir.c */ |
| 304 | extern void nfs_force_use_readdirplus(struct inode *dir); | ||
| 304 | extern unsigned long nfs_access_cache_count(struct shrinker *shrink, | 305 | extern unsigned long nfs_access_cache_count(struct shrinker *shrink, |
| 305 | struct shrink_control *sc); | 306 | struct shrink_control *sc); |
| 306 | extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, | 307 | extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, |
| @@ -474,6 +475,13 @@ extern int nfs_migrate_page(struct address_space *, | |||
| 474 | #define nfs_migrate_page NULL | 475 | #define nfs_migrate_page NULL |
| 475 | #endif | 476 | #endif |
| 476 | 477 | ||
| 478 | /* unlink.c */ | ||
| 479 | extern struct rpc_task * | ||
| 480 | nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | ||
| 481 | struct dentry *old_dentry, struct dentry *new_dentry, | ||
| 482 | void (*complete)(struct rpc_task *, struct nfs_renamedata *)); | ||
| 483 | extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); | ||
| 484 | |||
| 477 | /* direct.c */ | 485 | /* direct.c */ |
| 478 | void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, | 486 | void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, |
| 479 | struct nfs_direct_req *dreq); | 487 | struct nfs_direct_req *dreq); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index a462ef0fb5d6..db60149c4579 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
| @@ -479,41 +479,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
| 479 | } | 479 | } |
| 480 | 480 | ||
| 481 | static int | 481 | static int |
| 482 | nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
| 483 | struct inode *new_dir, struct qstr *new_name) | ||
| 484 | { | ||
| 485 | struct nfs_renameargs arg = { | ||
| 486 | .old_dir = NFS_FH(old_dir), | ||
| 487 | .old_name = old_name, | ||
| 488 | .new_dir = NFS_FH(new_dir), | ||
| 489 | .new_name = new_name, | ||
| 490 | }; | ||
| 491 | struct nfs_renameres res; | ||
| 492 | struct rpc_message msg = { | ||
| 493 | .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], | ||
| 494 | .rpc_argp = &arg, | ||
| 495 | .rpc_resp = &res, | ||
| 496 | }; | ||
| 497 | int status = -ENOMEM; | ||
| 498 | |||
| 499 | dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); | ||
| 500 | |||
| 501 | res.old_fattr = nfs_alloc_fattr(); | ||
| 502 | res.new_fattr = nfs_alloc_fattr(); | ||
| 503 | if (res.old_fattr == NULL || res.new_fattr == NULL) | ||
| 504 | goto out; | ||
| 505 | |||
| 506 | status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); | ||
| 507 | nfs_post_op_update_inode(old_dir, res.old_fattr); | ||
| 508 | nfs_post_op_update_inode(new_dir, res.new_fattr); | ||
| 509 | out: | ||
| 510 | nfs_free_fattr(res.old_fattr); | ||
| 511 | nfs_free_fattr(res.new_fattr); | ||
| 512 | dprintk("NFS reply rename: %d\n", status); | ||
| 513 | return status; | ||
| 514 | } | ||
| 515 | |||
| 516 | static int | ||
| 517 | nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) | 482 | nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) |
| 518 | { | 483 | { |
| 519 | struct nfs3_linkargs arg = { | 484 | struct nfs3_linkargs arg = { |
| @@ -968,7 +933,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
| 968 | .unlink_setup = nfs3_proc_unlink_setup, | 933 | .unlink_setup = nfs3_proc_unlink_setup, |
| 969 | .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, | 934 | .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, |
| 970 | .unlink_done = nfs3_proc_unlink_done, | 935 | .unlink_done = nfs3_proc_unlink_done, |
| 971 | .rename = nfs3_proc_rename, | ||
| 972 | .rename_setup = nfs3_proc_rename_setup, | 936 | .rename_setup = nfs3_proc_rename_setup, |
| 973 | .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, | 937 | .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, |
| 974 | .rename_done = nfs3_proc_rename_done, | 938 | .rename_done = nfs3_proc_rename_done, |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a5b27c2d9689..e1d1badbe53c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
| @@ -427,6 +427,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); | |||
| 427 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); | 427 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); |
| 428 | extern void nfs_inode_find_state_and_recover(struct inode *inode, | 428 | extern void nfs_inode_find_state_and_recover(struct inode *inode, |
| 429 | const nfs4_stateid *stateid); | 429 | const nfs4_stateid *stateid); |
| 430 | extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *); | ||
| 430 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); | 431 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); |
| 431 | extern int nfs4_wait_clnt_recover(struct nfs_client *clp); | 432 | extern int nfs4_wait_clnt_recover(struct nfs_client *clp); |
| 432 | extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); | 433 | extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); |
| @@ -500,6 +501,16 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei | |||
| 500 | return memcmp(dst, src, sizeof(*dst)) == 0; | 501 | return memcmp(dst, src, sizeof(*dst)) == 0; |
| 501 | } | 502 | } |
| 502 | 503 | ||
| 504 | static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src) | ||
| 505 | { | ||
| 506 | return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0; | ||
| 507 | } | ||
| 508 | |||
| 509 | static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2) | ||
| 510 | { | ||
| 511 | return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; | ||
| 512 | } | ||
| 513 | |||
| 503 | static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) | 514 | static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) |
| 504 | { | 515 | { |
| 505 | return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; | 516 | return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0e46d3d1b6cc..aa9ef4876046 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
| @@ -531,6 +531,13 @@ int nfs40_walk_client_list(struct nfs_client *new, | |||
| 531 | *result = pos; | 531 | *result = pos; |
| 532 | dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", | 532 | dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", |
| 533 | __func__, pos, atomic_read(&pos->cl_count)); | 533 | __func__, pos, atomic_read(&pos->cl_count)); |
| 534 | goto out; | ||
| 535 | case -ERESTARTSYS: | ||
| 536 | case -ETIMEDOUT: | ||
| 537 | /* The callback path may have been inadvertently | ||
| 538 | * changed. Schedule recovery! | ||
| 539 | */ | ||
| 540 | nfs4_schedule_path_down_recovery(pos); | ||
| 534 | default: | 541 | default: |
| 535 | goto out; | 542 | goto out; |
| 536 | } | 543 | } |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 12c8132ad408..b9a35c05b60f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
| @@ -324,8 +324,9 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) | |||
| 324 | &rdata->res.seq_res, | 324 | &rdata->res.seq_res, |
| 325 | task)) | 325 | task)) |
| 326 | return; | 326 | return; |
| 327 | nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, | 327 | if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, |
| 328 | rdata->args.lock_context, FMODE_READ); | 328 | rdata->args.lock_context, FMODE_READ) == -EIO) |
| 329 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
| 329 | } | 330 | } |
| 330 | 331 | ||
| 331 | static void filelayout_read_call_done(struct rpc_task *task, void *data) | 332 | static void filelayout_read_call_done(struct rpc_task *task, void *data) |
| @@ -435,8 +436,9 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) | |||
| 435 | &wdata->res.seq_res, | 436 | &wdata->res.seq_res, |
| 436 | task)) | 437 | task)) |
| 437 | return; | 438 | return; |
| 438 | nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, | 439 | if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, |
| 439 | wdata->args.lock_context, FMODE_WRITE); | 440 | wdata->args.lock_context, FMODE_WRITE) == -EIO) |
| 441 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
| 440 | } | 442 | } |
| 441 | 443 | ||
| 442 | static void filelayout_write_call_done(struct rpc_task *task, void *data) | 444 | static void filelayout_write_call_done(struct rpc_task *task, void *data) |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2da6a698b8f7..397be39c6dc8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -1068,6 +1068,7 @@ static void nfs4_opendata_free(struct kref *kref) | |||
| 1068 | dput(p->dentry); | 1068 | dput(p->dentry); |
| 1069 | nfs_sb_deactive(sb); | 1069 | nfs_sb_deactive(sb); |
| 1070 | nfs_fattr_free_names(&p->f_attr); | 1070 | nfs_fattr_free_names(&p->f_attr); |
| 1071 | kfree(p->f_attr.mdsthreshold); | ||
| 1071 | kfree(p); | 1072 | kfree(p); |
| 1072 | } | 1073 | } |
| 1073 | 1074 | ||
| @@ -1137,12 +1138,71 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) | |||
| 1137 | nfs4_state_set_mode_locked(state, state->state | fmode); | 1138 | nfs4_state_set_mode_locked(state, state->state | fmode); |
| 1138 | } | 1139 | } |
| 1139 | 1140 | ||
| 1140 | static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | 1141 | static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) |
| 1142 | { | ||
| 1143 | struct nfs_client *clp = state->owner->so_server->nfs_client; | ||
| 1144 | bool need_recover = false; | ||
| 1145 | |||
| 1146 | if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly) | ||
| 1147 | need_recover = true; | ||
| 1148 | if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly) | ||
| 1149 | need_recover = true; | ||
| 1150 | if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr) | ||
| 1151 | need_recover = true; | ||
| 1152 | if (need_recover) | ||
| 1153 | nfs4_state_mark_reclaim_nograce(clp, state); | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | static bool nfs_need_update_open_stateid(struct nfs4_state *state, | ||
| 1157 | nfs4_stateid *stateid) | ||
| 1141 | { | 1158 | { |
| 1159 | if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) | ||
| 1160 | return true; | ||
| 1161 | if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { | ||
| 1162 | nfs_test_and_clear_all_open_stateid(state); | ||
| 1163 | return true; | ||
| 1164 | } | ||
| 1165 | if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) | ||
| 1166 | return true; | ||
| 1167 | return false; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | static void nfs_clear_open_stateid_locked(struct nfs4_state *state, | ||
| 1171 | nfs4_stateid *stateid, fmode_t fmode) | ||
| 1172 | { | ||
| 1173 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
| 1174 | switch (fmode & (FMODE_READ|FMODE_WRITE)) { | ||
| 1175 | case FMODE_WRITE: | ||
| 1176 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
| 1177 | break; | ||
| 1178 | case FMODE_READ: | ||
| 1179 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
| 1180 | break; | ||
| 1181 | case 0: | ||
| 1182 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
| 1183 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
| 1184 | clear_bit(NFS_OPEN_STATE, &state->flags); | ||
| 1185 | } | ||
| 1186 | if (stateid == NULL) | ||
| 1187 | return; | ||
| 1188 | if (!nfs_need_update_open_stateid(state, stateid)) | ||
| 1189 | return; | ||
| 1142 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | 1190 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
| 1143 | nfs4_stateid_copy(&state->stateid, stateid); | 1191 | nfs4_stateid_copy(&state->stateid, stateid); |
| 1144 | nfs4_stateid_copy(&state->open_stateid, stateid); | 1192 | nfs4_stateid_copy(&state->open_stateid, stateid); |
| 1145 | set_bit(NFS_OPEN_STATE, &state->flags); | 1193 | } |
| 1194 | |||
| 1195 | static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | ||
| 1196 | { | ||
| 1197 | write_seqlock(&state->seqlock); | ||
| 1198 | nfs_clear_open_stateid_locked(state, stateid, fmode); | ||
| 1199 | write_sequnlock(&state->seqlock); | ||
| 1200 | if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) | ||
| 1201 | nfs4_schedule_state_manager(state->owner->so_server->nfs_client); | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | ||
| 1205 | { | ||
| 1146 | switch (fmode) { | 1206 | switch (fmode) { |
| 1147 | case FMODE_READ: | 1207 | case FMODE_READ: |
| 1148 | set_bit(NFS_O_RDONLY_STATE, &state->flags); | 1208 | set_bit(NFS_O_RDONLY_STATE, &state->flags); |
| @@ -1153,13 +1213,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * | |||
| 1153 | case FMODE_READ|FMODE_WRITE: | 1213 | case FMODE_READ|FMODE_WRITE: |
| 1154 | set_bit(NFS_O_RDWR_STATE, &state->flags); | 1214 | set_bit(NFS_O_RDWR_STATE, &state->flags); |
| 1155 | } | 1215 | } |
| 1156 | } | 1216 | if (!nfs_need_update_open_stateid(state, stateid)) |
| 1157 | 1217 | return; | |
| 1158 | static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | 1218 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
| 1159 | { | 1219 | nfs4_stateid_copy(&state->stateid, stateid); |
| 1160 | write_seqlock(&state->seqlock); | 1220 | nfs4_stateid_copy(&state->open_stateid, stateid); |
| 1161 | nfs_set_open_stateid_locked(state, stateid, fmode); | ||
| 1162 | write_sequnlock(&state->seqlock); | ||
| 1163 | } | 1221 | } |
| 1164 | 1222 | ||
| 1165 | static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) | 1223 | static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) |
| @@ -1217,6 +1275,8 @@ no_delegation: | |||
| 1217 | __update_open_stateid(state, open_stateid, NULL, fmode); | 1275 | __update_open_stateid(state, open_stateid, NULL, fmode); |
| 1218 | ret = 1; | 1276 | ret = 1; |
| 1219 | } | 1277 | } |
| 1278 | if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) | ||
| 1279 | nfs4_schedule_state_manager(state->owner->so_server->nfs_client); | ||
| 1220 | 1280 | ||
| 1221 | return ret; | 1281 | return ret; |
| 1222 | } | 1282 | } |
| @@ -1450,12 +1510,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
| 1450 | struct nfs4_state *newstate; | 1510 | struct nfs4_state *newstate; |
| 1451 | int ret; | 1511 | int ret; |
| 1452 | 1512 | ||
| 1513 | /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ | ||
| 1514 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
| 1515 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
| 1516 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
| 1453 | /* memory barrier prior to reading state->n_* */ | 1517 | /* memory barrier prior to reading state->n_* */ |
| 1454 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 1518 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
| 1455 | clear_bit(NFS_OPEN_STATE, &state->flags); | 1519 | clear_bit(NFS_OPEN_STATE, &state->flags); |
| 1456 | smp_rmb(); | 1520 | smp_rmb(); |
| 1457 | if (state->n_rdwr != 0) { | 1521 | if (state->n_rdwr != 0) { |
| 1458 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
| 1459 | ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); | 1522 | ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); |
| 1460 | if (ret != 0) | 1523 | if (ret != 0) |
| 1461 | return ret; | 1524 | return ret; |
| @@ -1463,7 +1526,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
| 1463 | return -ESTALE; | 1526 | return -ESTALE; |
| 1464 | } | 1527 | } |
| 1465 | if (state->n_wronly != 0) { | 1528 | if (state->n_wronly != 0) { |
| 1466 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
| 1467 | ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); | 1529 | ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); |
| 1468 | if (ret != 0) | 1530 | if (ret != 0) |
| 1469 | return ret; | 1531 | return ret; |
| @@ -1471,7 +1533,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
| 1471 | return -ESTALE; | 1533 | return -ESTALE; |
| 1472 | } | 1534 | } |
| 1473 | if (state->n_rdonly != 0) { | 1535 | if (state->n_rdonly != 0) { |
| 1474 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
| 1475 | ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); | 1536 | ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); |
| 1476 | if (ret != 0) | 1537 | if (ret != 0) |
| 1477 | return ret; | 1538 | return ret; |
| @@ -2244,10 +2305,12 @@ static int _nfs4_do_open(struct inode *dir, | |||
| 2244 | } | 2305 | } |
| 2245 | } | 2306 | } |
| 2246 | 2307 | ||
| 2247 | if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { | 2308 | if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { |
| 2248 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); | 2309 | if (!opendata->f_attr.mdsthreshold) { |
| 2249 | if (!opendata->f_attr.mdsthreshold) | 2310 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); |
| 2250 | goto err_free_label; | 2311 | if (!opendata->f_attr.mdsthreshold) |
| 2312 | goto err_free_label; | ||
| 2313 | } | ||
| 2251 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; | 2314 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; |
| 2252 | } | 2315 | } |
| 2253 | if (dentry->d_inode != NULL) | 2316 | if (dentry->d_inode != NULL) |
| @@ -2275,11 +2338,10 @@ static int _nfs4_do_open(struct inode *dir, | |||
| 2275 | if (opendata->file_created) | 2338 | if (opendata->file_created) |
| 2276 | *opened |= FILE_CREATED; | 2339 | *opened |= FILE_CREATED; |
| 2277 | 2340 | ||
| 2278 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) | 2341 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) { |
| 2279 | *ctx_th = opendata->f_attr.mdsthreshold; | 2342 | *ctx_th = opendata->f_attr.mdsthreshold; |
| 2280 | else | 2343 | opendata->f_attr.mdsthreshold = NULL; |
| 2281 | kfree(opendata->f_attr.mdsthreshold); | 2344 | } |
| 2282 | opendata->f_attr.mdsthreshold = NULL; | ||
| 2283 | 2345 | ||
| 2284 | nfs4_label_free(olabel); | 2346 | nfs4_label_free(olabel); |
| 2285 | 2347 | ||
| @@ -2289,7 +2351,6 @@ static int _nfs4_do_open(struct inode *dir, | |||
| 2289 | err_free_label: | 2351 | err_free_label: |
| 2290 | nfs4_label_free(olabel); | 2352 | nfs4_label_free(olabel); |
| 2291 | err_opendata_put: | 2353 | err_opendata_put: |
| 2292 | kfree(opendata->f_attr.mdsthreshold); | ||
| 2293 | nfs4_opendata_put(opendata); | 2354 | nfs4_opendata_put(opendata); |
| 2294 | err_put_state_owner: | 2355 | err_put_state_owner: |
| 2295 | nfs4_put_state_owner(sp); | 2356 | nfs4_put_state_owner(sp); |
| @@ -2398,13 +2459,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
| 2398 | 2459 | ||
| 2399 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { | 2460 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { |
| 2400 | /* Use that stateid */ | 2461 | /* Use that stateid */ |
| 2401 | } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) { | 2462 | } else if (truncate && state != NULL) { |
| 2402 | struct nfs_lockowner lockowner = { | 2463 | struct nfs_lockowner lockowner = { |
| 2403 | .l_owner = current->files, | 2464 | .l_owner = current->files, |
| 2404 | .l_pid = current->tgid, | 2465 | .l_pid = current->tgid, |
| 2405 | }; | 2466 | }; |
| 2406 | nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, | 2467 | if (!nfs4_valid_open_stateid(state)) |
| 2407 | &lockowner); | 2468 | return -EBADF; |
| 2469 | if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, | ||
| 2470 | &lockowner) == -EIO) | ||
| 2471 | return -EBADF; | ||
| 2408 | } else | 2472 | } else |
| 2409 | nfs4_stateid_copy(&arg.stateid, &zero_stateid); | 2473 | nfs4_stateid_copy(&arg.stateid, &zero_stateid); |
| 2410 | 2474 | ||
| @@ -2476,26 +2540,6 @@ static void nfs4_free_closedata(void *data) | |||
| 2476 | kfree(calldata); | 2540 | kfree(calldata); |
| 2477 | } | 2541 | } |
| 2478 | 2542 | ||
| 2479 | static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, | ||
| 2480 | fmode_t fmode) | ||
| 2481 | { | ||
| 2482 | spin_lock(&state->owner->so_lock); | ||
| 2483 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
| 2484 | switch (fmode & (FMODE_READ|FMODE_WRITE)) { | ||
| 2485 | case FMODE_WRITE: | ||
| 2486 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
| 2487 | break; | ||
| 2488 | case FMODE_READ: | ||
| 2489 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
| 2490 | break; | ||
| 2491 | case 0: | ||
| 2492 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
| 2493 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
| 2494 | clear_bit(NFS_OPEN_STATE, &state->flags); | ||
| 2495 | } | ||
| 2496 | spin_unlock(&state->owner->so_lock); | ||
| 2497 | } | ||
| 2498 | |||
| 2499 | static void nfs4_close_done(struct rpc_task *task, void *data) | 2543 | static void nfs4_close_done(struct rpc_task *task, void *data) |
| 2500 | { | 2544 | { |
| 2501 | struct nfs4_closedata *calldata = data; | 2545 | struct nfs4_closedata *calldata = data; |
| @@ -2514,9 +2558,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
| 2514 | if (calldata->roc) | 2558 | if (calldata->roc) |
| 2515 | pnfs_roc_set_barrier(state->inode, | 2559 | pnfs_roc_set_barrier(state->inode, |
| 2516 | calldata->roc_barrier); | 2560 | calldata->roc_barrier); |
| 2517 | nfs_set_open_stateid(state, &calldata->res.stateid, 0); | 2561 | nfs_clear_open_stateid(state, &calldata->res.stateid, 0); |
| 2518 | renew_lease(server, calldata->timestamp); | 2562 | renew_lease(server, calldata->timestamp); |
| 2519 | break; | 2563 | goto out_release; |
| 2520 | case -NFS4ERR_ADMIN_REVOKED: | 2564 | case -NFS4ERR_ADMIN_REVOKED: |
| 2521 | case -NFS4ERR_STALE_STATEID: | 2565 | case -NFS4ERR_STALE_STATEID: |
| 2522 | case -NFS4ERR_OLD_STATEID: | 2566 | case -NFS4ERR_OLD_STATEID: |
| @@ -2530,7 +2574,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
| 2530 | goto out_release; | 2574 | goto out_release; |
| 2531 | } | 2575 | } |
| 2532 | } | 2576 | } |
| 2533 | nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); | 2577 | nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); |
| 2534 | out_release: | 2578 | out_release: |
| 2535 | nfs_release_seqid(calldata->arg.seqid); | 2579 | nfs_release_seqid(calldata->arg.seqid); |
| 2536 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); | 2580 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); |
| @@ -3504,49 +3548,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
| 3504 | return 1; | 3548 | return 1; |
| 3505 | } | 3549 | } |
| 3506 | 3550 | ||
| 3507 | static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
| 3508 | struct inode *new_dir, struct qstr *new_name) | ||
| 3509 | { | ||
| 3510 | struct nfs_server *server = NFS_SERVER(old_dir); | ||
| 3511 | struct nfs_renameargs arg = { | ||
| 3512 | .old_dir = NFS_FH(old_dir), | ||
| 3513 | .new_dir = NFS_FH(new_dir), | ||
| 3514 | .old_name = old_name, | ||
| 3515 | .new_name = new_name, | ||
| 3516 | }; | ||
| 3517 | struct nfs_renameres res = { | ||
| 3518 | .server = server, | ||
| 3519 | }; | ||
| 3520 | struct rpc_message msg = { | ||
| 3521 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], | ||
| 3522 | .rpc_argp = &arg, | ||
| 3523 | .rpc_resp = &res, | ||
| 3524 | }; | ||
| 3525 | int status = -ENOMEM; | ||
| 3526 | |||
| 3527 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | ||
| 3528 | if (!status) { | ||
| 3529 | update_changeattr(old_dir, &res.old_cinfo); | ||
| 3530 | update_changeattr(new_dir, &res.new_cinfo); | ||
| 3531 | } | ||
| 3532 | return status; | ||
| 3533 | } | ||
| 3534 | |||
| 3535 | static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
| 3536 | struct inode *new_dir, struct qstr *new_name) | ||
| 3537 | { | ||
| 3538 | struct nfs4_exception exception = { }; | ||
| 3539 | int err; | ||
| 3540 | do { | ||
| 3541 | err = _nfs4_proc_rename(old_dir, old_name, | ||
| 3542 | new_dir, new_name); | ||
| 3543 | trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err); | ||
| 3544 | err = nfs4_handle_exception(NFS_SERVER(old_dir), err, | ||
| 3545 | &exception); | ||
| 3546 | } while (exception.retry); | ||
| 3547 | return err; | ||
| 3548 | } | ||
| 3549 | |||
| 3550 | static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) | 3551 | static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) |
| 3551 | { | 3552 | { |
| 3552 | struct nfs_server *server = NFS_SERVER(inode); | 3553 | struct nfs_server *server = NFS_SERVER(inode); |
| @@ -4011,8 +4012,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, | |||
| 4011 | { | 4012 | { |
| 4012 | nfs4_stateid current_stateid; | 4013 | nfs4_stateid current_stateid; |
| 4013 | 4014 | ||
| 4014 | if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) | 4015 | /* If the current stateid represents a lost lock, then exit */ |
| 4015 | return false; | 4016 | if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) |
| 4017 | return true; | ||
| 4016 | return nfs4_stateid_match(stateid, ¤t_stateid); | 4018 | return nfs4_stateid_match(stateid, ¤t_stateid); |
| 4017 | } | 4019 | } |
| 4018 | 4020 | ||
| @@ -4880,6 +4882,20 @@ nfs4_init_uniform_client_string(const struct nfs_client *clp, | |||
| 4880 | nodename); | 4882 | nodename); |
| 4881 | } | 4883 | } |
| 4882 | 4884 | ||
| 4885 | /* | ||
| 4886 | * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback | ||
| 4887 | * services. Advertise one based on the address family of the | ||
| 4888 | * clientaddr. | ||
| 4889 | */ | ||
| 4890 | static unsigned int | ||
| 4891 | nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len) | ||
| 4892 | { | ||
| 4893 | if (strchr(clp->cl_ipaddr, ':') != NULL) | ||
| 4894 | return scnprintf(buf, len, "tcp6"); | ||
| 4895 | else | ||
| 4896 | return scnprintf(buf, len, "tcp"); | ||
| 4897 | } | ||
| 4898 | |||
| 4883 | /** | 4899 | /** |
| 4884 | * nfs4_proc_setclientid - Negotiate client ID | 4900 | * nfs4_proc_setclientid - Negotiate client ID |
| 4885 | * @clp: state data structure | 4901 | * @clp: state data structure |
| @@ -4921,12 +4937,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
| 4921 | setclientid.sc_name, | 4937 | setclientid.sc_name, |
| 4922 | sizeof(setclientid.sc_name)); | 4938 | sizeof(setclientid.sc_name)); |
| 4923 | /* cb_client4 */ | 4939 | /* cb_client4 */ |
| 4924 | rcu_read_lock(); | 4940 | setclientid.sc_netid_len = |
| 4925 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, | 4941 | nfs4_init_callback_netid(clp, |
| 4926 | sizeof(setclientid.sc_netid), "%s", | 4942 | setclientid.sc_netid, |
| 4927 | rpc_peeraddr2str(clp->cl_rpcclient, | 4943 | sizeof(setclientid.sc_netid)); |
| 4928 | RPC_DISPLAY_NETID)); | ||
| 4929 | rcu_read_unlock(); | ||
| 4930 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, | 4944 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, |
| 4931 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", | 4945 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", |
| 4932 | clp->cl_ipaddr, port >> 8, port & 255); | 4946 | clp->cl_ipaddr, port >> 8, port & 255); |
| @@ -5828,8 +5842,7 @@ struct nfs_release_lockowner_data { | |||
| 5828 | struct nfs4_lock_state *lsp; | 5842 | struct nfs4_lock_state *lsp; |
| 5829 | struct nfs_server *server; | 5843 | struct nfs_server *server; |
| 5830 | struct nfs_release_lockowner_args args; | 5844 | struct nfs_release_lockowner_args args; |
| 5831 | struct nfs4_sequence_args seq_args; | 5845 | struct nfs_release_lockowner_res res; |
| 5832 | struct nfs4_sequence_res seq_res; | ||
| 5833 | unsigned long timestamp; | 5846 | unsigned long timestamp; |
| 5834 | }; | 5847 | }; |
| 5835 | 5848 | ||
| @@ -5837,7 +5850,7 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata | |||
| 5837 | { | 5850 | { |
| 5838 | struct nfs_release_lockowner_data *data = calldata; | 5851 | struct nfs_release_lockowner_data *data = calldata; |
| 5839 | nfs40_setup_sequence(data->server, | 5852 | nfs40_setup_sequence(data->server, |
| 5840 | &data->seq_args, &data->seq_res, task); | 5853 | &data->args.seq_args, &data->res.seq_res, task); |
| 5841 | data->timestamp = jiffies; | 5854 | data->timestamp = jiffies; |
| 5842 | } | 5855 | } |
| 5843 | 5856 | ||
| @@ -5846,7 +5859,7 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) | |||
| 5846 | struct nfs_release_lockowner_data *data = calldata; | 5859 | struct nfs_release_lockowner_data *data = calldata; |
| 5847 | struct nfs_server *server = data->server; | 5860 | struct nfs_server *server = data->server; |
| 5848 | 5861 | ||
| 5849 | nfs40_sequence_done(task, &data->seq_res); | 5862 | nfs40_sequence_done(task, &data->res.seq_res); |
| 5850 | 5863 | ||
| 5851 | switch (task->tk_status) { | 5864 | switch (task->tk_status) { |
| 5852 | case 0: | 5865 | case 0: |
| @@ -5887,7 +5900,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st | |||
| 5887 | data = kmalloc(sizeof(*data), GFP_NOFS); | 5900 | data = kmalloc(sizeof(*data), GFP_NOFS); |
| 5888 | if (!data) | 5901 | if (!data) |
| 5889 | return -ENOMEM; | 5902 | return -ENOMEM; |
| 5890 | nfs4_init_sequence(&data->seq_args, &data->seq_res, 0); | ||
| 5891 | data->lsp = lsp; | 5903 | data->lsp = lsp; |
| 5892 | data->server = server; | 5904 | data->server = server; |
| 5893 | data->args.lock_owner.clientid = server->nfs_client->cl_clientid; | 5905 | data->args.lock_owner.clientid = server->nfs_client->cl_clientid; |
| @@ -5895,6 +5907,8 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st | |||
| 5895 | data->args.lock_owner.s_dev = server->s_dev; | 5907 | data->args.lock_owner.s_dev = server->s_dev; |
| 5896 | 5908 | ||
| 5897 | msg.rpc_argp = &data->args; | 5909 | msg.rpc_argp = &data->args; |
| 5910 | msg.rpc_resp = &data->res; | ||
| 5911 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); | ||
| 5898 | rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); | 5912 | rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); |
| 5899 | return 0; | 5913 | return 0; |
| 5900 | } | 5914 | } |
| @@ -8404,7 +8418,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
| 8404 | .unlink_setup = nfs4_proc_unlink_setup, | 8418 | .unlink_setup = nfs4_proc_unlink_setup, |
| 8405 | .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, | 8419 | .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, |
| 8406 | .unlink_done = nfs4_proc_unlink_done, | 8420 | .unlink_done = nfs4_proc_unlink_done, |
| 8407 | .rename = nfs4_proc_rename, | ||
| 8408 | .rename_setup = nfs4_proc_rename_setup, | 8421 | .rename_setup = nfs4_proc_rename_setup, |
| 8409 | .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, | 8422 | .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, |
| 8410 | .rename_done = nfs4_proc_rename_done, | 8423 | .rename_done = nfs4_proc_rename_done, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e1a47217c05e..2349518eef2c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
| @@ -974,9 +974,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, | |||
| 974 | else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { | 974 | else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { |
| 975 | nfs4_stateid_copy(dst, &lsp->ls_stateid); | 975 | nfs4_stateid_copy(dst, &lsp->ls_stateid); |
| 976 | ret = 0; | 976 | ret = 0; |
| 977 | smp_rmb(); | ||
| 978 | if (!list_empty(&lsp->ls_seqid.list)) | ||
| 979 | ret = -EWOULDBLOCK; | ||
| 980 | } | 977 | } |
| 981 | spin_unlock(&state->state_lock); | 978 | spin_unlock(&state->state_lock); |
| 982 | nfs4_put_lock_state(lsp); | 979 | nfs4_put_lock_state(lsp); |
| @@ -984,10 +981,9 @@ out: | |||
| 984 | return ret; | 981 | return ret; |
| 985 | } | 982 | } |
| 986 | 983 | ||
| 987 | static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | 984 | static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) |
| 988 | { | 985 | { |
| 989 | const nfs4_stateid *src; | 986 | const nfs4_stateid *src; |
| 990 | int ret; | ||
| 991 | int seq; | 987 | int seq; |
| 992 | 988 | ||
| 993 | do { | 989 | do { |
| @@ -996,12 +992,7 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | |||
| 996 | if (test_bit(NFS_OPEN_STATE, &state->flags)) | 992 | if (test_bit(NFS_OPEN_STATE, &state->flags)) |
| 997 | src = &state->open_stateid; | 993 | src = &state->open_stateid; |
| 998 | nfs4_stateid_copy(dst, src); | 994 | nfs4_stateid_copy(dst, src); |
| 999 | ret = 0; | ||
| 1000 | smp_rmb(); | ||
| 1001 | if (!list_empty(&state->owner->so_seqid.list)) | ||
| 1002 | ret = -EWOULDBLOCK; | ||
| 1003 | } while (read_seqretry(&state->seqlock, seq)); | 995 | } while (read_seqretry(&state->seqlock, seq)); |
| 1004 | return ret; | ||
| 1005 | } | 996 | } |
| 1006 | 997 | ||
| 1007 | /* | 998 | /* |
| @@ -1026,7 +1017,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, | |||
| 1026 | * choose to use. | 1017 | * choose to use. |
| 1027 | */ | 1018 | */ |
| 1028 | goto out; | 1019 | goto out; |
| 1029 | ret = nfs4_copy_open_stateid(dst, state); | 1020 | nfs4_copy_open_stateid(dst, state); |
| 1021 | ret = 0; | ||
| 1030 | out: | 1022 | out: |
| 1031 | if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) | 1023 | if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) |
| 1032 | dst->seqid = 0; | 1024 | dst->seqid = 0; |
| @@ -1324,7 +1316,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st | |||
| 1324 | return 1; | 1316 | return 1; |
| 1325 | } | 1317 | } |
| 1326 | 1318 | ||
| 1327 | static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) | 1319 | int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) |
| 1328 | { | 1320 | { |
| 1329 | set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); | 1321 | set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); |
| 1330 | clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); | 1322 | clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); |
| @@ -2083,8 +2075,10 @@ again: | |||
| 2083 | switch (status) { | 2075 | switch (status) { |
| 2084 | case 0: | 2076 | case 0: |
| 2085 | break; | 2077 | break; |
| 2086 | case -NFS4ERR_DELAY: | ||
| 2087 | case -ETIMEDOUT: | 2078 | case -ETIMEDOUT: |
| 2079 | if (clnt->cl_softrtry) | ||
| 2080 | break; | ||
| 2081 | case -NFS4ERR_DELAY: | ||
| 2088 | case -EAGAIN: | 2082 | case -EAGAIN: |
| 2089 | ssleep(1); | 2083 | ssleep(1); |
| 2090 | case -NFS4ERR_STALE_CLIENTID: | 2084 | case -NFS4ERR_STALE_CLIENTID: |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 808f29574412..6f340f02f2ba 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
| @@ -90,7 +90,7 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 90 | */ | 90 | */ |
| 91 | static void nfs4_evict_inode(struct inode *inode) | 91 | static void nfs4_evict_inode(struct inode *inode) |
| 92 | { | 92 | { |
| 93 | truncate_inode_pages(&inode->i_data, 0); | 93 | truncate_inode_pages_final(&inode->i_data); |
| 94 | clear_inode(inode); | 94 | clear_inode(inode); |
| 95 | pnfs_return_layout(inode); | 95 | pnfs_return_layout(inode); |
| 96 | pnfs_destroy_layout(NFS_I(inode)); | 96 | pnfs_destroy_layout(NFS_I(inode)); |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 72f3bf1754ef..73ce8d4fe2c8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
| @@ -203,8 +203,7 @@ static int nfs4_stat_to_errno(int); | |||
| 203 | 2 + encode_verifier_maxsz + 5 + \ | 203 | 2 + encode_verifier_maxsz + 5 + \ |
| 204 | nfs4_label_maxsz) | 204 | nfs4_label_maxsz) |
| 205 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ | 205 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ |
| 206 | decode_verifier_maxsz + \ | 206 | decode_verifier_maxsz) |
| 207 | nfs4_label_maxsz + nfs4_fattr_maxsz) | ||
| 208 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) | 207 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) |
| 209 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) | 208 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) |
| 210 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ | 209 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4755858e37a0..cb53d450ae32 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -662,7 +662,18 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
| 662 | */ | 662 | */ |
| 663 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | 663 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) |
| 664 | { | 664 | { |
| 665 | return (s32)s1 - (s32)s2 > 0; | 665 | return (s32)(s1 - s2) > 0; |
| 666 | } | ||
| 667 | |||
| 668 | static void | ||
| 669 | pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo, | ||
| 670 | const nfs4_stateid *new, | ||
| 671 | struct list_head *free_me_list) | ||
| 672 | { | ||
| 673 | if (nfs4_stateid_match_other(&lo->plh_stateid, new)) | ||
| 674 | return; | ||
| 675 | /* Layout is new! Kill existing layout segments */ | ||
| 676 | pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL); | ||
| 666 | } | 677 | } |
| 667 | 678 | ||
| 668 | /* update lo->plh_stateid with new if is more recent */ | 679 | /* update lo->plh_stateid with new if is more recent */ |
| @@ -1315,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
| 1315 | struct nfs4_layoutget_res *res = &lgp->res; | 1326 | struct nfs4_layoutget_res *res = &lgp->res; |
| 1316 | struct pnfs_layout_segment *lseg; | 1327 | struct pnfs_layout_segment *lseg; |
| 1317 | struct inode *ino = lo->plh_inode; | 1328 | struct inode *ino = lo->plh_inode; |
| 1329 | LIST_HEAD(free_me); | ||
| 1318 | int status = 0; | 1330 | int status = 0; |
| 1319 | 1331 | ||
| 1320 | /* Inject layout blob into I/O device driver */ | 1332 | /* Inject layout blob into I/O device driver */ |
| @@ -1341,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
| 1341 | goto out_forget_reply; | 1353 | goto out_forget_reply; |
| 1342 | } | 1354 | } |
| 1343 | 1355 | ||
| 1356 | /* Check that the new stateid matches the old stateid */ | ||
| 1357 | pnfs_verify_layout_stateid(lo, &res->stateid, &free_me); | ||
| 1344 | /* Done processing layoutget. Set the layout stateid */ | 1358 | /* Done processing layoutget. Set the layout stateid */ |
| 1345 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 1359 | pnfs_set_layout_stateid(lo, &res->stateid, false); |
| 1346 | 1360 | ||
| @@ -1355,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
| 1355 | } | 1369 | } |
| 1356 | 1370 | ||
| 1357 | spin_unlock(&ino->i_lock); | 1371 | spin_unlock(&ino->i_lock); |
| 1372 | pnfs_free_lseg_list(&free_me); | ||
| 1358 | return lseg; | 1373 | return lseg; |
| 1359 | out: | 1374 | out: |
| 1360 | return ERR_PTR(status); | 1375 | return ERR_PTR(status); |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index fddbba2d9eff..e55ce9e8b034 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
| @@ -357,30 +357,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
| 357 | } | 357 | } |
| 358 | 358 | ||
| 359 | static int | 359 | static int |
| 360 | nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
| 361 | struct inode *new_dir, struct qstr *new_name) | ||
| 362 | { | ||
| 363 | struct nfs_renameargs arg = { | ||
| 364 | .old_dir = NFS_FH(old_dir), | ||
| 365 | .old_name = old_name, | ||
| 366 | .new_dir = NFS_FH(new_dir), | ||
| 367 | .new_name = new_name, | ||
| 368 | }; | ||
| 369 | struct rpc_message msg = { | ||
| 370 | .rpc_proc = &nfs_procedures[NFSPROC_RENAME], | ||
| 371 | .rpc_argp = &arg, | ||
| 372 | }; | ||
| 373 | int status; | ||
| 374 | |||
| 375 | dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); | ||
| 376 | status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); | ||
| 377 | nfs_mark_for_revalidate(old_dir); | ||
| 378 | nfs_mark_for_revalidate(new_dir); | ||
| 379 | dprintk("NFS reply rename: %d\n", status); | ||
| 380 | return status; | ||
| 381 | } | ||
| 382 | |||
| 383 | static int | ||
| 384 | nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) | 360 | nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) |
| 385 | { | 361 | { |
| 386 | struct nfs_linkargs arg = { | 362 | struct nfs_linkargs arg = { |
| @@ -745,7 +721,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
| 745 | .unlink_setup = nfs_proc_unlink_setup, | 721 | .unlink_setup = nfs_proc_unlink_setup, |
| 746 | .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, | 722 | .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, |
| 747 | .unlink_done = nfs_proc_unlink_done, | 723 | .unlink_done = nfs_proc_unlink_done, |
| 748 | .rename = nfs_proc_rename, | ||
| 749 | .rename_setup = nfs_proc_rename_setup, | 724 | .rename_setup = nfs_proc_rename_setup, |
| 750 | .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, | 725 | .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, |
| 751 | .rename_done = nfs_proc_rename_done, | 726 | .rename_done = nfs_proc_rename_done, |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 910ed906eb82..2cb56943e232 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -2215,6 +2215,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
| 2215 | struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; | 2215 | struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; |
| 2216 | u32 nfsvers = nfss->nfs_client->rpc_ops->version; | 2216 | u32 nfsvers = nfss->nfs_client->rpc_ops->version; |
| 2217 | 2217 | ||
| 2218 | sync_filesystem(sb); | ||
| 2219 | |||
| 2218 | /* | 2220 | /* |
| 2219 | * Userspace mount programs that send binary options generally send | 2221 | * Userspace mount programs that send binary options generally send |
| 2220 | * them populated with default values. We have no way to know which | 2222 | * them populated with default values. We have no way to know which |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 11d78944de79..de54129336c6 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
| 15 | #include <linux/wait.h> | 15 | #include <linux/wait.h> |
| 16 | #include <linux/namei.h> | 16 | #include <linux/namei.h> |
| 17 | #include <linux/fsnotify.h> | ||
| 17 | 18 | ||
| 18 | #include "internal.h" | 19 | #include "internal.h" |
| 19 | #include "nfs4_fs.h" | 20 | #include "nfs4_fs.h" |
| @@ -353,8 +354,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | |||
| 353 | return; | 354 | return; |
| 354 | } | 355 | } |
| 355 | 356 | ||
| 356 | if (task->tk_status != 0) | 357 | if (data->complete) |
| 357 | nfs_cancel_async_unlink(old_dentry); | 358 | data->complete(task, data); |
| 358 | } | 359 | } |
| 359 | 360 | ||
| 360 | /** | 361 | /** |
| @@ -399,9 +400,10 @@ static const struct rpc_call_ops nfs_rename_ops = { | |||
| 399 | * | 400 | * |
| 400 | * It's expected that valid references to the dentries and inodes are held | 401 | * It's expected that valid references to the dentries and inodes are held |
| 401 | */ | 402 | */ |
| 402 | static struct rpc_task * | 403 | struct rpc_task * |
| 403 | nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | 404 | nfs_async_rename(struct inode *old_dir, struct inode *new_dir, |
| 404 | struct dentry *old_dentry, struct dentry *new_dentry) | 405 | struct dentry *old_dentry, struct dentry *new_dentry, |
| 406 | void (*complete)(struct rpc_task *, struct nfs_renamedata *)) | ||
| 405 | { | 407 | { |
| 406 | struct nfs_renamedata *data; | 408 | struct nfs_renamedata *data; |
| 407 | struct rpc_message msg = { }; | 409 | struct rpc_message msg = { }; |
| @@ -438,6 +440,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | |||
| 438 | data->new_dentry = dget(new_dentry); | 440 | data->new_dentry = dget(new_dentry); |
| 439 | nfs_fattr_init(&data->old_fattr); | 441 | nfs_fattr_init(&data->old_fattr); |
| 440 | nfs_fattr_init(&data->new_fattr); | 442 | nfs_fattr_init(&data->new_fattr); |
| 443 | data->complete = complete; | ||
| 441 | 444 | ||
| 442 | /* set up nfs_renameargs */ | 445 | /* set up nfs_renameargs */ |
| 443 | data->args.old_dir = NFS_FH(old_dir); | 446 | data->args.old_dir = NFS_FH(old_dir); |
| @@ -456,6 +459,27 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | |||
| 456 | return rpc_run_task(&task_setup_data); | 459 | return rpc_run_task(&task_setup_data); |
| 457 | } | 460 | } |
| 458 | 461 | ||
| 462 | /* | ||
| 463 | * Perform tasks needed when a sillyrename is done such as cancelling the | ||
| 464 | * queued async unlink if it failed. | ||
| 465 | */ | ||
| 466 | static void | ||
| 467 | nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) | ||
| 468 | { | ||
| 469 | struct dentry *dentry = data->old_dentry; | ||
| 470 | |||
| 471 | if (task->tk_status != 0) { | ||
| 472 | nfs_cancel_async_unlink(dentry); | ||
| 473 | return; | ||
| 474 | } | ||
| 475 | |||
| 476 | /* | ||
| 477 | * vfs_unlink and the like do not issue this when a file is | ||
| 478 | * sillyrenamed, so do it here. | ||
| 479 | */ | ||
| 480 | fsnotify_nameremove(dentry, 0); | ||
| 481 | } | ||
| 482 | |||
| 459 | #define SILLYNAME_PREFIX ".nfs" | 483 | #define SILLYNAME_PREFIX ".nfs" |
| 460 | #define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) | 484 | #define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) |
| 461 | #define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) | 485 | #define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) |
| @@ -548,7 +572,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
| 548 | } | 572 | } |
| 549 | 573 | ||
| 550 | /* run the rename task, undo unlink if it fails */ | 574 | /* run the rename task, undo unlink if it fails */ |
| 551 | task = nfs_async_rename(dir, dir, dentry, sdentry); | 575 | task = nfs_async_rename(dir, dir, dentry, sdentry, |
| 576 | nfs_complete_sillyrename); | ||
| 552 | if (IS_ERR(task)) { | 577 | if (IS_ERR(task)) { |
| 553 | error = -EBUSY; | 578 | error = -EBUSY; |
| 554 | nfs_cancel_async_unlink(dentry); | 579 | nfs_cancel_async_unlink(dentry); |
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 06cddd572264..2645be435e75 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
| @@ -71,10 +71,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
| 71 | if (gid_eq(new->fsgid, INVALID_GID)) | 71 | if (gid_eq(new->fsgid, INVALID_GID)) |
| 72 | new->fsgid = exp->ex_anon_gid; | 72 | new->fsgid = exp->ex_anon_gid; |
| 73 | 73 | ||
| 74 | ret = set_groups(new, gi); | 74 | set_groups(new, gi); |
| 75 | put_group_info(gi); | 75 | put_group_info(gi); |
| 76 | if (ret < 0) | ||
| 77 | goto error; | ||
| 78 | 76 | ||
| 79 | if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) | 77 | if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) |
| 80 | new->cap_effective = cap_drop_nfsd_set(new->cap_effective); | 78 | new->cap_effective = cap_drop_nfsd_set(new->cap_effective); |
| @@ -89,7 +87,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
| 89 | 87 | ||
| 90 | oom: | 88 | oom: |
| 91 | ret = -ENOMEM; | 89 | ret = -ENOMEM; |
| 92 | error: | ||
| 93 | abort_creds(new); | 90 | abort_creds(new); |
| 94 | return ret; | 91 | return ret; |
| 95 | } | 92 | } |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 017d3cb5e99b..915808b36df7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -449,6 +449,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
| 449 | fh_lock(fhp); | 449 | fh_lock(fhp); |
| 450 | host_err = notify_change(dentry, iap, NULL); | 450 | host_err = notify_change(dentry, iap, NULL); |
| 451 | fh_unlock(fhp); | 451 | fh_unlock(fhp); |
| 452 | err = nfserrno(host_err); | ||
| 452 | 453 | ||
| 453 | out_put_write_access: | 454 | out_put_write_access: |
| 454 | if (size_change) | 455 | if (size_change) |
| @@ -1693,7 +1694,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
| 1693 | if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) | 1694 | if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) |
| 1694 | goto out_dput_new; | 1695 | goto out_dput_new; |
| 1695 | 1696 | ||
| 1696 | host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); | 1697 | host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); |
| 1697 | if (!host_err) { | 1698 | if (!host_err) { |
| 1698 | host_err = commit_metadata(tfhp); | 1699 | host_err = commit_metadata(tfhp); |
| 1699 | if (!host_err) | 1700 | if (!host_err) |
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index deaa3d33a0aa..0d58075f34e2 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c | |||
| @@ -942,6 +942,18 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, | |||
| 942 | struct inode *cpfile; | 942 | struct inode *cpfile; |
| 943 | int err; | 943 | int err; |
| 944 | 944 | ||
| 945 | if (cpsize > sb->s_blocksize) { | ||
| 946 | printk(KERN_ERR | ||
| 947 | "NILFS: too large checkpoint size: %zu bytes.\n", | ||
| 948 | cpsize); | ||
| 949 | return -EINVAL; | ||
| 950 | } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) { | ||
| 951 | printk(KERN_ERR | ||
| 952 | "NILFS: too small checkpoint size: %zu bytes.\n", | ||
| 953 | cpsize); | ||
| 954 | return -EINVAL; | ||
| 955 | } | ||
| 956 | |||
| 945 | cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); | 957 | cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); |
| 946 | if (unlikely(!cpfile)) | 958 | if (unlikely(!cpfile)) |
| 947 | return -ENOMEM; | 959 | return -ENOMEM; |
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index fa0f80308c2d..0d5fada91191 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c | |||
| @@ -484,6 +484,18 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, | |||
| 484 | struct nilfs_dat_info *di; | 484 | struct nilfs_dat_info *di; |
| 485 | int err; | 485 | int err; |
| 486 | 486 | ||
| 487 | if (entry_size > sb->s_blocksize) { | ||
| 488 | printk(KERN_ERR | ||
| 489 | "NILFS: too large DAT entry size: %zu bytes.\n", | ||
| 490 | entry_size); | ||
| 491 | return -EINVAL; | ||
| 492 | } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { | ||
| 493 | printk(KERN_ERR | ||
| 494 | "NILFS: too small DAT entry size: %zu bytes.\n", | ||
| 495 | entry_size); | ||
| 496 | return -EINVAL; | ||
| 497 | } | ||
| 498 | |||
| 487 | dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); | 499 | dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); |
| 488 | if (unlikely(!dat)) | 500 | if (unlikely(!dat)) |
| 489 | return -ENOMEM; | 501 | return -ENOMEM; |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7e350c562e0e..b9c5726120e3 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
| @@ -783,16 +783,14 @@ void nilfs_evict_inode(struct inode *inode) | |||
| 783 | int ret; | 783 | int ret; |
| 784 | 784 | ||
| 785 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { | 785 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { |
| 786 | if (inode->i_data.nrpages) | 786 | truncate_inode_pages_final(&inode->i_data); |
| 787 | truncate_inode_pages(&inode->i_data, 0); | ||
| 788 | clear_inode(inode); | 787 | clear_inode(inode); |
| 789 | nilfs_clear_inode(inode); | 788 | nilfs_clear_inode(inode); |
| 790 | return; | 789 | return; |
| 791 | } | 790 | } |
| 792 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | 791 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
| 793 | 792 | ||
| 794 | if (inode->i_data.nrpages) | 793 | truncate_inode_pages_final(&inode->i_data); |
| 795 | truncate_inode_pages(&inode->i_data, 0); | ||
| 796 | 794 | ||
| 797 | /* TODO: some of the following operations may fail. */ | 795 | /* TODO: some of the following operations may fail. */ |
| 798 | nilfs_truncate_bmap(ii, 0); | 796 | nilfs_truncate_bmap(ii, 0); |
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 2b34021948e4..422fb54b7377 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
| @@ -1072,6 +1072,48 @@ out: | |||
| 1072 | } | 1072 | } |
| 1073 | 1073 | ||
| 1074 | /** | 1074 | /** |
| 1075 | * nilfs_ioctl_trim_fs() - trim ioctl handle function | ||
| 1076 | * @inode: inode object | ||
| 1077 | * @argp: pointer on argument from userspace | ||
| 1078 | * | ||
| 1079 | * Decription: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It | ||
| 1080 | * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which | ||
| 1081 | * performs the actual trim operation. | ||
| 1082 | * | ||
| 1083 | * Return Value: On success, 0 is returned or negative error code, otherwise. | ||
| 1084 | */ | ||
| 1085 | static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) | ||
| 1086 | { | ||
| 1087 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | ||
| 1088 | struct request_queue *q = bdev_get_queue(nilfs->ns_bdev); | ||
| 1089 | struct fstrim_range range; | ||
| 1090 | int ret; | ||
| 1091 | |||
| 1092 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1093 | return -EPERM; | ||
| 1094 | |||
| 1095 | if (!blk_queue_discard(q)) | ||
| 1096 | return -EOPNOTSUPP; | ||
| 1097 | |||
| 1098 | if (copy_from_user(&range, argp, sizeof(range))) | ||
| 1099 | return -EFAULT; | ||
| 1100 | |||
| 1101 | range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity); | ||
| 1102 | |||
| 1103 | down_read(&nilfs->ns_segctor_sem); | ||
| 1104 | ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range); | ||
| 1105 | up_read(&nilfs->ns_segctor_sem); | ||
| 1106 | |||
| 1107 | if (ret < 0) | ||
| 1108 | return ret; | ||
| 1109 | |||
| 1110 | if (copy_to_user(argp, &range, sizeof(range))) | ||
| 1111 | return -EFAULT; | ||
| 1112 | |||
| 1113 | return 0; | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | /** | ||
| 1075 | * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated | 1117 | * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated |
| 1076 | * @inode: inode object | 1118 | * @inode: inode object |
| 1077 | * @argp: pointer on argument from userspace | 1119 | * @argp: pointer on argument from userspace |
| @@ -1163,6 +1205,95 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, | |||
| 1163 | return ret; | 1205 | return ret; |
| 1164 | } | 1206 | } |
| 1165 | 1207 | ||
| 1208 | /** | ||
| 1209 | * nilfs_ioctl_set_suinfo - set segment usage info | ||
| 1210 | * @inode: inode object | ||
| 1211 | * @filp: file object | ||
| 1212 | * @cmd: ioctl's request code | ||
| 1213 | * @argp: pointer on argument from userspace | ||
| 1214 | * | ||
| 1215 | * Description: Expects an array of nilfs_suinfo_update structures | ||
| 1216 | * encapsulated in nilfs_argv and updates the segment usage info | ||
| 1217 | * according to the flags in nilfs_suinfo_update. | ||
| 1218 | * | ||
| 1219 | * Return Value: On success, 0 is returned. On error, one of the | ||
| 1220 | * following negative error codes is returned. | ||
| 1221 | * | ||
| 1222 | * %-EPERM - Not enough permissions | ||
| 1223 | * | ||
| 1224 | * %-EFAULT - Error copying input data | ||
| 1225 | * | ||
| 1226 | * %-EIO - I/O error. | ||
| 1227 | * | ||
| 1228 | * %-ENOMEM - Insufficient amount of memory available. | ||
| 1229 | * | ||
| 1230 | * %-EINVAL - Invalid values in input (segment number, flags or nblocks) | ||
| 1231 | */ | ||
| 1232 | static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, | ||
| 1233 | unsigned int cmd, void __user *argp) | ||
| 1234 | { | ||
| 1235 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | ||
| 1236 | struct nilfs_transaction_info ti; | ||
| 1237 | struct nilfs_argv argv; | ||
| 1238 | size_t len; | ||
| 1239 | void __user *base; | ||
| 1240 | void *kbuf; | ||
| 1241 | int ret; | ||
| 1242 | |||
| 1243 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1244 | return -EPERM; | ||
| 1245 | |||
| 1246 | ret = mnt_want_write_file(filp); | ||
| 1247 | if (ret) | ||
| 1248 | return ret; | ||
| 1249 | |||
| 1250 | ret = -EFAULT; | ||
| 1251 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
| 1252 | goto out; | ||
| 1253 | |||
| 1254 | ret = -EINVAL; | ||
| 1255 | if (argv.v_size < sizeof(struct nilfs_suinfo_update)) | ||
| 1256 | goto out; | ||
| 1257 | |||
| 1258 | if (argv.v_nmembs > nilfs->ns_nsegments) | ||
| 1259 | goto out; | ||
| 1260 | |||
| 1261 | if (argv.v_nmembs >= UINT_MAX / argv.v_size) | ||
| 1262 | goto out; | ||
| 1263 | |||
| 1264 | len = argv.v_size * argv.v_nmembs; | ||
| 1265 | if (!len) { | ||
| 1266 | ret = 0; | ||
| 1267 | goto out; | ||
| 1268 | } | ||
| 1269 | |||
| 1270 | base = (void __user *)(unsigned long)argv.v_base; | ||
| 1271 | kbuf = vmalloc(len); | ||
| 1272 | if (!kbuf) { | ||
| 1273 | ret = -ENOMEM; | ||
| 1274 | goto out; | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | if (copy_from_user(kbuf, base, len)) { | ||
| 1278 | ret = -EFAULT; | ||
| 1279 | goto out_free; | ||
| 1280 | } | ||
| 1281 | |||
| 1282 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
| 1283 | ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size, | ||
| 1284 | argv.v_nmembs); | ||
| 1285 | if (unlikely(ret < 0)) | ||
| 1286 | nilfs_transaction_abort(inode->i_sb); | ||
| 1287 | else | ||
| 1288 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
| 1289 | |||
| 1290 | out_free: | ||
| 1291 | vfree(kbuf); | ||
| 1292 | out: | ||
| 1293 | mnt_drop_write_file(filp); | ||
| 1294 | return ret; | ||
| 1295 | } | ||
| 1296 | |||
| 1166 | long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 1297 | long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
| 1167 | { | 1298 | { |
| 1168 | struct inode *inode = file_inode(filp); | 1299 | struct inode *inode = file_inode(filp); |
| @@ -1189,6 +1320,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 1189 | return nilfs_ioctl_get_info(inode, filp, cmd, argp, | 1320 | return nilfs_ioctl_get_info(inode, filp, cmd, argp, |
| 1190 | sizeof(struct nilfs_suinfo), | 1321 | sizeof(struct nilfs_suinfo), |
| 1191 | nilfs_ioctl_do_get_suinfo); | 1322 | nilfs_ioctl_do_get_suinfo); |
| 1323 | case NILFS_IOCTL_SET_SUINFO: | ||
| 1324 | return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp); | ||
| 1192 | case NILFS_IOCTL_GET_SUSTAT: | 1325 | case NILFS_IOCTL_GET_SUSTAT: |
| 1193 | return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); | 1326 | return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); |
| 1194 | case NILFS_IOCTL_GET_VINFO: | 1327 | case NILFS_IOCTL_GET_VINFO: |
| @@ -1205,6 +1338,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 1205 | return nilfs_ioctl_resize(inode, filp, argp); | 1338 | return nilfs_ioctl_resize(inode, filp, argp); |
| 1206 | case NILFS_IOCTL_SET_ALLOC_RANGE: | 1339 | case NILFS_IOCTL_SET_ALLOC_RANGE: |
| 1207 | return nilfs_ioctl_set_alloc_range(inode, argp); | 1340 | return nilfs_ioctl_set_alloc_range(inode, argp); |
| 1341 | case FITRIM: | ||
| 1342 | return nilfs_ioctl_trim_fs(inode, argp); | ||
| 1208 | default: | 1343 | default: |
| 1209 | return -ENOTTY; | 1344 | return -ENOTTY; |
| 1210 | } | 1345 | } |
| @@ -1228,6 +1363,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 1228 | case NILFS_IOCTL_GET_CPINFO: | 1363 | case NILFS_IOCTL_GET_CPINFO: |
| 1229 | case NILFS_IOCTL_GET_CPSTAT: | 1364 | case NILFS_IOCTL_GET_CPSTAT: |
| 1230 | case NILFS_IOCTL_GET_SUINFO: | 1365 | case NILFS_IOCTL_GET_SUINFO: |
| 1366 | case NILFS_IOCTL_SET_SUINFO: | ||
| 1231 | case NILFS_IOCTL_GET_SUSTAT: | 1367 | case NILFS_IOCTL_GET_SUSTAT: |
| 1232 | case NILFS_IOCTL_GET_VINFO: | 1368 | case NILFS_IOCTL_GET_VINFO: |
| 1233 | case NILFS_IOCTL_GET_BDESCS: | 1369 | case NILFS_IOCTL_GET_BDESCS: |
| @@ -1235,6 +1371,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 1235 | case NILFS_IOCTL_SYNC: | 1371 | case NILFS_IOCTL_SYNC: |
| 1236 | case NILFS_IOCTL_RESIZE: | 1372 | case NILFS_IOCTL_RESIZE: |
| 1237 | case NILFS_IOCTL_SET_ALLOC_RANGE: | 1373 | case NILFS_IOCTL_SET_ALLOC_RANGE: |
| 1374 | case FITRIM: | ||
| 1238 | break; | 1375 | break; |
| 1239 | default: | 1376 | default: |
| 1240 | return -ENOIOCTLCMD; | 1377 | return -ENOIOCTLCMD; |
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 3127e9f438a7..2a869c35c362 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c | |||
| @@ -870,6 +870,289 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, | |||
| 870 | } | 870 | } |
| 871 | 871 | ||
| 872 | /** | 872 | /** |
| 873 | * nilfs_sufile_set_suinfo - sets segment usage info | ||
| 874 | * @sufile: inode of segment usage file | ||
| 875 | * @buf: array of suinfo_update | ||
| 876 | * @supsz: byte size of suinfo_update | ||
| 877 | * @nsup: size of suinfo_update array | ||
| 878 | * | ||
| 879 | * Description: Takes an array of nilfs_suinfo_update structs and updates | ||
| 880 | * segment usage accordingly. Only the fields indicated by the sup_flags | ||
| 881 | * are updated. | ||
| 882 | * | ||
| 883 | * Return Value: On success, 0 is returned. On error, one of the | ||
| 884 | * following negative error codes is returned. | ||
| 885 | * | ||
| 886 | * %-EIO - I/O error. | ||
| 887 | * | ||
| 888 | * %-ENOMEM - Insufficient amount of memory available. | ||
| 889 | * | ||
| 890 | * %-EINVAL - Invalid values in input (segment number, flags or nblocks) | ||
| 891 | */ | ||
| 892 | ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, | ||
| 893 | unsigned supsz, size_t nsup) | ||
| 894 | { | ||
| 895 | struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; | ||
| 896 | struct buffer_head *header_bh, *bh; | ||
| 897 | struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; | ||
| 898 | struct nilfs_segment_usage *su; | ||
| 899 | void *kaddr; | ||
| 900 | unsigned long blkoff, prev_blkoff; | ||
| 901 | int cleansi, cleansu, dirtysi, dirtysu; | ||
| 902 | long ncleaned = 0, ndirtied = 0; | ||
| 903 | int ret = 0; | ||
| 904 | |||
| 905 | if (unlikely(nsup == 0)) | ||
| 906 | return ret; | ||
| 907 | |||
| 908 | for (sup = buf; sup < supend; sup = (void *)sup + supsz) { | ||
| 909 | if (sup->sup_segnum >= nilfs->ns_nsegments | ||
| 910 | || (sup->sup_flags & | ||
| 911 | (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) | ||
| 912 | || (nilfs_suinfo_update_nblocks(sup) && | ||
| 913 | sup->sup_sui.sui_nblocks > | ||
| 914 | nilfs->ns_blocks_per_segment)) | ||
| 915 | return -EINVAL; | ||
| 916 | } | ||
| 917 | |||
| 918 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
| 919 | |||
| 920 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
| 921 | if (ret < 0) | ||
| 922 | goto out_sem; | ||
| 923 | |||
| 924 | sup = buf; | ||
| 925 | blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); | ||
| 926 | ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); | ||
| 927 | if (ret < 0) | ||
| 928 | goto out_header; | ||
| 929 | |||
| 930 | for (;;) { | ||
| 931 | kaddr = kmap_atomic(bh->b_page); | ||
| 932 | su = nilfs_sufile_block_get_segment_usage( | ||
| 933 | sufile, sup->sup_segnum, bh, kaddr); | ||
| 934 | |||
| 935 | if (nilfs_suinfo_update_lastmod(sup)) | ||
| 936 | su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); | ||
| 937 | |||
| 938 | if (nilfs_suinfo_update_nblocks(sup)) | ||
| 939 | su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); | ||
| 940 | |||
| 941 | if (nilfs_suinfo_update_flags(sup)) { | ||
| 942 | /* | ||
| 943 | * Active flag is a virtual flag projected by running | ||
| 944 | * nilfs kernel code - drop it not to write it to | ||
| 945 | * disk. | ||
| 946 | */ | ||
| 947 | sup->sup_sui.sui_flags &= | ||
| 948 | ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); | ||
| 949 | |||
| 950 | cleansi = nilfs_suinfo_clean(&sup->sup_sui); | ||
| 951 | cleansu = nilfs_segment_usage_clean(su); | ||
| 952 | dirtysi = nilfs_suinfo_dirty(&sup->sup_sui); | ||
| 953 | dirtysu = nilfs_segment_usage_dirty(su); | ||
| 954 | |||
| 955 | if (cleansi && !cleansu) | ||
| 956 | ++ncleaned; | ||
| 957 | else if (!cleansi && cleansu) | ||
| 958 | --ncleaned; | ||
| 959 | |||
| 960 | if (dirtysi && !dirtysu) | ||
| 961 | ++ndirtied; | ||
| 962 | else if (!dirtysi && dirtysu) | ||
| 963 | --ndirtied; | ||
| 964 | |||
| 965 | su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); | ||
| 966 | } | ||
| 967 | |||
| 968 | kunmap_atomic(kaddr); | ||
| 969 | |||
| 970 | sup = (void *)sup + supsz; | ||
| 971 | if (sup >= supend) | ||
| 972 | break; | ||
| 973 | |||
| 974 | prev_blkoff = blkoff; | ||
| 975 | blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); | ||
| 976 | if (blkoff == prev_blkoff) | ||
| 977 | continue; | ||
| 978 | |||
| 979 | /* get different block */ | ||
| 980 | mark_buffer_dirty(bh); | ||
| 981 | put_bh(bh); | ||
| 982 | ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); | ||
| 983 | if (unlikely(ret < 0)) | ||
| 984 | goto out_mark; | ||
| 985 | } | ||
| 986 | mark_buffer_dirty(bh); | ||
| 987 | put_bh(bh); | ||
| 988 | |||
| 989 | out_mark: | ||
| 990 | if (ncleaned || ndirtied) { | ||
| 991 | nilfs_sufile_mod_counter(header_bh, (u64)ncleaned, | ||
| 992 | (u64)ndirtied); | ||
| 993 | NILFS_SUI(sufile)->ncleansegs += ncleaned; | ||
| 994 | } | ||
| 995 | nilfs_mdt_mark_dirty(sufile); | ||
| 996 | out_header: | ||
| 997 | put_bh(header_bh); | ||
| 998 | out_sem: | ||
| 999 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
| 1000 | return ret; | ||
| 1001 | } | ||
| 1002 | |||
| 1003 | /** | ||
| 1004 | * nilfs_sufile_trim_fs() - trim ioctl handle function | ||
| 1005 | * @sufile: inode of segment usage file | ||
| 1006 | * @range: fstrim_range structure | ||
| 1007 | * | ||
| 1008 | * start: First Byte to trim | ||
| 1009 | * len: number of Bytes to trim from start | ||
| 1010 | * minlen: minimum extent length in Bytes | ||
| 1011 | * | ||
| 1012 | * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes | ||
| 1013 | * from start to start+len. start is rounded up to the next block boundary | ||
| 1014 | * and start+len is rounded down. For each clean segment blkdev_issue_discard | ||
| 1015 | * function is invoked. | ||
| 1016 | * | ||
| 1017 | * Return Value: On success, 0 is returned or negative error code, otherwise. | ||
| 1018 | */ | ||
| 1019 | int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) | ||
| 1020 | { | ||
| 1021 | struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; | ||
| 1022 | struct buffer_head *su_bh; | ||
| 1023 | struct nilfs_segment_usage *su; | ||
| 1024 | void *kaddr; | ||
| 1025 | size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; | ||
| 1026 | sector_t seg_start, seg_end, start_block, end_block; | ||
| 1027 | sector_t start = 0, nblocks = 0; | ||
| 1028 | u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0; | ||
| 1029 | int ret = 0; | ||
| 1030 | unsigned int sects_per_block; | ||
| 1031 | |||
| 1032 | sects_per_block = (1 << nilfs->ns_blocksize_bits) / | ||
| 1033 | bdev_logical_block_size(nilfs->ns_bdev); | ||
| 1034 | len = range->len >> nilfs->ns_blocksize_bits; | ||
| 1035 | minlen = range->minlen >> nilfs->ns_blocksize_bits; | ||
| 1036 | max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment); | ||
| 1037 | |||
| 1038 | if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits) | ||
| 1039 | return -EINVAL; | ||
| 1040 | |||
| 1041 | start_block = (range->start + nilfs->ns_blocksize - 1) >> | ||
| 1042 | nilfs->ns_blocksize_bits; | ||
| 1043 | |||
| 1044 | /* | ||
| 1045 | * range->len can be very large (actually, it is set to | ||
| 1046 | * ULLONG_MAX by default) - truncate upper end of the range | ||
| 1047 | * carefully so as not to overflow. | ||
| 1048 | */ | ||
| 1049 | if (max_blocks - start_block < len) | ||
| 1050 | end_block = max_blocks - 1; | ||
| 1051 | else | ||
| 1052 | end_block = start_block + len - 1; | ||
| 1053 | |||
| 1054 | segnum = nilfs_get_segnum_of_block(nilfs, start_block); | ||
| 1055 | segnum_end = nilfs_get_segnum_of_block(nilfs, end_block); | ||
| 1056 | |||
| 1057 | down_read(&NILFS_MDT(sufile)->mi_sem); | ||
| 1058 | |||
| 1059 | while (segnum <= segnum_end) { | ||
| 1060 | n = nilfs_sufile_segment_usages_in_block(sufile, segnum, | ||
| 1061 | segnum_end); | ||
| 1062 | |||
| 1063 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, | ||
| 1064 | &su_bh); | ||
| 1065 | if (ret < 0) { | ||
| 1066 | if (ret != -ENOENT) | ||
| 1067 | goto out_sem; | ||
| 1068 | /* hole */ | ||
| 1069 | segnum += n; | ||
| 1070 | continue; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | kaddr = kmap_atomic(su_bh->b_page); | ||
| 1074 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, | ||
| 1075 | su_bh, kaddr); | ||
| 1076 | for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { | ||
| 1077 | if (!nilfs_segment_usage_clean(su)) | ||
| 1078 | continue; | ||
| 1079 | |||
| 1080 | nilfs_get_segment_range(nilfs, segnum, &seg_start, | ||
| 1081 | &seg_end); | ||
| 1082 | |||
| 1083 | if (!nblocks) { | ||
| 1084 | /* start new extent */ | ||
| 1085 | start = seg_start; | ||
| 1086 | nblocks = seg_end - seg_start + 1; | ||
| 1087 | continue; | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | if (start + nblocks == seg_start) { | ||
| 1091 | /* add to previous extent */ | ||
| 1092 | nblocks += seg_end - seg_start + 1; | ||
| 1093 | continue; | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | /* discard previous extent */ | ||
| 1097 | if (start < start_block) { | ||
| 1098 | nblocks -= start_block - start; | ||
| 1099 | start = start_block; | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | if (nblocks >= minlen) { | ||
| 1103 | kunmap_atomic(kaddr); | ||
| 1104 | |||
| 1105 | ret = blkdev_issue_discard(nilfs->ns_bdev, | ||
| 1106 | start * sects_per_block, | ||
| 1107 | nblocks * sects_per_block, | ||
| 1108 | GFP_NOFS, 0); | ||
| 1109 | if (ret < 0) { | ||
| 1110 | put_bh(su_bh); | ||
| 1111 | goto out_sem; | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | ndiscarded += nblocks; | ||
| 1115 | kaddr = kmap_atomic(su_bh->b_page); | ||
| 1116 | su = nilfs_sufile_block_get_segment_usage( | ||
| 1117 | sufile, segnum, su_bh, kaddr); | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | /* start new extent */ | ||
| 1121 | start = seg_start; | ||
| 1122 | nblocks = seg_end - seg_start + 1; | ||
| 1123 | } | ||
| 1124 | kunmap_atomic(kaddr); | ||
| 1125 | put_bh(su_bh); | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | |||
| 1129 | if (nblocks) { | ||
| 1130 | /* discard last extent */ | ||
| 1131 | if (start < start_block) { | ||
| 1132 | nblocks -= start_block - start; | ||
| 1133 | start = start_block; | ||
| 1134 | } | ||
| 1135 | if (start + nblocks > end_block + 1) | ||
| 1136 | nblocks = end_block - start + 1; | ||
| 1137 | |||
| 1138 | if (nblocks >= minlen) { | ||
| 1139 | ret = blkdev_issue_discard(nilfs->ns_bdev, | ||
| 1140 | start * sects_per_block, | ||
| 1141 | nblocks * sects_per_block, | ||
| 1142 | GFP_NOFS, 0); | ||
| 1143 | if (!ret) | ||
| 1144 | ndiscarded += nblocks; | ||
| 1145 | } | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | out_sem: | ||
| 1149 | up_read(&NILFS_MDT(sufile)->mi_sem); | ||
| 1150 | |||
| 1151 | range->len = ndiscarded << nilfs->ns_blocksize_bits; | ||
| 1152 | return ret; | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | /** | ||
| 873 | * nilfs_sufile_read - read or get sufile inode | 1156 | * nilfs_sufile_read - read or get sufile inode |
| 874 | * @sb: super block instance | 1157 | * @sb: super block instance |
| 875 | * @susize: size of a segment usage entry | 1158 | * @susize: size of a segment usage entry |
| @@ -886,6 +1169,18 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, | |||
| 886 | void *kaddr; | 1169 | void *kaddr; |
| 887 | int err; | 1170 | int err; |
| 888 | 1171 | ||
| 1172 | if (susize > sb->s_blocksize) { | ||
| 1173 | printk(KERN_ERR | ||
| 1174 | "NILFS: too large segment usage size: %zu bytes.\n", | ||
| 1175 | susize); | ||
| 1176 | return -EINVAL; | ||
| 1177 | } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { | ||
| 1178 | printk(KERN_ERR | ||
| 1179 | "NILFS: too small segment usage size: %zu bytes.\n", | ||
| 1180 | susize); | ||
| 1181 | return -EINVAL; | ||
| 1182 | } | ||
| 1183 | |||
| 889 | sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); | 1184 | sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); |
| 890 | if (unlikely(!sufile)) | 1185 | if (unlikely(!sufile)) |
| 891 | return -ENOMEM; | 1186 | return -ENOMEM; |
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index e84bc5b51fc1..b8afd72f2379 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h | |||
| @@ -44,6 +44,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, | |||
| 44 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); | 44 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); |
| 45 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, | 45 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, |
| 46 | size_t); | 46 | size_t); |
| 47 | ssize_t nilfs_sufile_set_suinfo(struct inode *, void *, unsigned , size_t); | ||
| 47 | 48 | ||
| 48 | int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, | 49 | int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, |
| 49 | void (*dofunc)(struct inode *, __u64, | 50 | void (*dofunc)(struct inode *, __u64, |
| @@ -65,6 +66,7 @@ void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, | |||
| 65 | int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); | 66 | int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); |
| 66 | int nilfs_sufile_read(struct super_block *sb, size_t susize, | 67 | int nilfs_sufile_read(struct super_block *sb, size_t susize, |
| 67 | struct nilfs_inode *raw_inode, struct inode **inodep); | 68 | struct nilfs_inode *raw_inode, struct inode **inodep); |
| 69 | int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range); | ||
| 68 | 70 | ||
| 69 | /** | 71 | /** |
| 70 | * nilfs_sufile_scrap - make a segment garbage | 72 | * nilfs_sufile_scrap - make a segment garbage |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 7ac2a122ca1d..8c532b2ca3ab 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
| @@ -1129,6 +1129,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1129 | unsigned long old_mount_opt; | 1129 | unsigned long old_mount_opt; |
| 1130 | int err; | 1130 | int err; |
| 1131 | 1131 | ||
| 1132 | sync_filesystem(sb); | ||
| 1132 | old_sb_flags = sb->s_flags; | 1133 | old_sb_flags = sb->s_flags; |
| 1133 | old_mount_opt = nilfs->ns_mount_opt; | 1134 | old_mount_opt = nilfs->ns_mount_opt; |
| 1134 | 1135 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 94c451ce6d24..8ba8229ba076 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
| @@ -399,6 +399,16 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, | |||
| 399 | return -EINVAL; | 399 | return -EINVAL; |
| 400 | 400 | ||
| 401 | nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); | 401 | nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); |
| 402 | if (nilfs->ns_inode_size > nilfs->ns_blocksize) { | ||
| 403 | printk(KERN_ERR "NILFS: too large inode size: %d bytes.\n", | ||
| 404 | nilfs->ns_inode_size); | ||
| 405 | return -EINVAL; | ||
| 406 | } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) { | ||
| 407 | printk(KERN_ERR "NILFS: too small inode size: %d bytes.\n", | ||
| 408 | nilfs->ns_inode_size); | ||
| 409 | return -EINVAL; | ||
| 410 | } | ||
| 411 | |||
| 402 | nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); | 412 | nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); |
| 403 | 413 | ||
| 404 | nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); | 414 | nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index dc638f786d5c..ee9cb3795c2b 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
| @@ -60,8 +60,8 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) | |||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 62 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 63 | static int fanotify_get_response_from_access(struct fsnotify_group *group, | 63 | static int fanotify_get_response(struct fsnotify_group *group, |
| 64 | struct fanotify_event_info *event) | 64 | struct fanotify_perm_event_info *event) |
| 65 | { | 65 | { |
| 66 | int ret; | 66 | int ret; |
| 67 | 67 | ||
| @@ -142,6 +142,40 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, | |||
| 142 | return false; | 142 | return false; |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, | ||
| 146 | struct path *path) | ||
| 147 | { | ||
| 148 | struct fanotify_event_info *event; | ||
| 149 | |||
| 150 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
| 151 | if (mask & FAN_ALL_PERM_EVENTS) { | ||
| 152 | struct fanotify_perm_event_info *pevent; | ||
| 153 | |||
| 154 | pevent = kmem_cache_alloc(fanotify_perm_event_cachep, | ||
| 155 | GFP_KERNEL); | ||
| 156 | if (!pevent) | ||
| 157 | return NULL; | ||
| 158 | event = &pevent->fae; | ||
| 159 | pevent->response = 0; | ||
| 160 | goto init; | ||
| 161 | } | ||
| 162 | #endif | ||
| 163 | event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); | ||
| 164 | if (!event) | ||
| 165 | return NULL; | ||
| 166 | init: __maybe_unused | ||
| 167 | fsnotify_init_event(&event->fse, inode, mask); | ||
| 168 | event->tgid = get_pid(task_tgid(current)); | ||
| 169 | if (path) { | ||
| 170 | event->path = *path; | ||
| 171 | path_get(&event->path); | ||
| 172 | } else { | ||
| 173 | event->path.mnt = NULL; | ||
| 174 | event->path.dentry = NULL; | ||
| 175 | } | ||
| 176 | return event; | ||
| 177 | } | ||
| 178 | |||
| 145 | static int fanotify_handle_event(struct fsnotify_group *group, | 179 | static int fanotify_handle_event(struct fsnotify_group *group, |
| 146 | struct inode *inode, | 180 | struct inode *inode, |
| 147 | struct fsnotify_mark *inode_mark, | 181 | struct fsnotify_mark *inode_mark, |
| @@ -171,25 +205,11 @@ static int fanotify_handle_event(struct fsnotify_group *group, | |||
| 171 | pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, | 205 | pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, |
| 172 | mask); | 206 | mask); |
| 173 | 207 | ||
| 174 | event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); | 208 | event = fanotify_alloc_event(inode, mask, data); |
| 175 | if (unlikely(!event)) | 209 | if (unlikely(!event)) |
| 176 | return -ENOMEM; | 210 | return -ENOMEM; |
| 177 | 211 | ||
| 178 | fsn_event = &event->fse; | 212 | fsn_event = &event->fse; |
| 179 | fsnotify_init_event(fsn_event, inode, mask); | ||
| 180 | event->tgid = get_pid(task_tgid(current)); | ||
| 181 | if (data_type == FSNOTIFY_EVENT_PATH) { | ||
| 182 | struct path *path = data; | ||
| 183 | event->path = *path; | ||
| 184 | path_get(&event->path); | ||
| 185 | } else { | ||
| 186 | event->path.mnt = NULL; | ||
| 187 | event->path.dentry = NULL; | ||
| 188 | } | ||
| 189 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
| 190 | event->response = 0; | ||
| 191 | #endif | ||
| 192 | |||
| 193 | ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); | 213 | ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); |
| 194 | if (ret) { | 214 | if (ret) { |
| 195 | /* Permission events shouldn't be merged */ | 215 | /* Permission events shouldn't be merged */ |
| @@ -202,7 +222,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, | |||
| 202 | 222 | ||
| 203 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 223 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 204 | if (mask & FAN_ALL_PERM_EVENTS) { | 224 | if (mask & FAN_ALL_PERM_EVENTS) { |
| 205 | ret = fanotify_get_response_from_access(group, event); | 225 | ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event)); |
| 206 | fsnotify_destroy_event(group, fsn_event); | 226 | fsnotify_destroy_event(group, fsn_event); |
| 207 | } | 227 | } |
| 208 | #endif | 228 | #endif |
| @@ -225,6 +245,13 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) | |||
| 225 | event = FANOTIFY_E(fsn_event); | 245 | event = FANOTIFY_E(fsn_event); |
| 226 | path_put(&event->path); | 246 | path_put(&event->path); |
| 227 | put_pid(event->tgid); | 247 | put_pid(event->tgid); |
| 248 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
| 249 | if (fsn_event->mask & FAN_ALL_PERM_EVENTS) { | ||
| 250 | kmem_cache_free(fanotify_perm_event_cachep, | ||
| 251 | FANOTIFY_PE(fsn_event)); | ||
| 252 | return; | ||
| 253 | } | ||
| 254 | #endif | ||
| 228 | kmem_cache_free(fanotify_event_cachep, event); | 255 | kmem_cache_free(fanotify_event_cachep, event); |
| 229 | } | 256 | } |
| 230 | 257 | ||
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 32a2f034fb94..2a5fb14115df 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h | |||
| @@ -3,13 +3,12 @@ | |||
| 3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
| 4 | 4 | ||
| 5 | extern struct kmem_cache *fanotify_event_cachep; | 5 | extern struct kmem_cache *fanotify_event_cachep; |
| 6 | extern struct kmem_cache *fanotify_perm_event_cachep; | ||
| 6 | 7 | ||
| 7 | /* | 8 | /* |
| 8 | * Lifetime of the structure differs for normal and permission events. In both | 9 | * Structure for normal fanotify events. It gets allocated in |
| 9 | * cases the structure is allocated in fanotify_handle_event(). For normal | 10 | * fanotify_handle_event() and freed when the information is retrieved by |
| 10 | * events the structure is freed immediately after reporting it to userspace. | 11 | * userspace |
| 11 | * For permission events we free it only after we receive response from | ||
| 12 | * userspace. | ||
| 13 | */ | 12 | */ |
| 14 | struct fanotify_event_info { | 13 | struct fanotify_event_info { |
| 15 | struct fsnotify_event fse; | 14 | struct fsnotify_event fse; |
| @@ -19,12 +18,33 @@ struct fanotify_event_info { | |||
| 19 | */ | 18 | */ |
| 20 | struct path path; | 19 | struct path path; |
| 21 | struct pid *tgid; | 20 | struct pid *tgid; |
| 21 | }; | ||
| 22 | |||
| 22 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 23 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 23 | u32 response; /* userspace answer to question */ | 24 | /* |
| 24 | #endif | 25 | * Structure for permission fanotify events. It gets allocated and freed in |
| 26 | * fanotify_handle_event() since we wait there for user response. When the | ||
| 27 | * information is retrieved by userspace the structure is moved from | ||
| 28 | * group->notification_list to group->fanotify_data.access_list to wait for | ||
| 29 | * user response. | ||
| 30 | */ | ||
| 31 | struct fanotify_perm_event_info { | ||
| 32 | struct fanotify_event_info fae; | ||
| 33 | int response; /* userspace answer to question */ | ||
| 34 | int fd; /* fd we passed to userspace for this event */ | ||
| 25 | }; | 35 | }; |
| 26 | 36 | ||
| 37 | static inline struct fanotify_perm_event_info * | ||
| 38 | FANOTIFY_PE(struct fsnotify_event *fse) | ||
| 39 | { | ||
| 40 | return container_of(fse, struct fanotify_perm_event_info, fae.fse); | ||
| 41 | } | ||
| 42 | #endif | ||
| 43 | |||
| 27 | static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) | 44 | static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) |
| 28 | { | 45 | { |
| 29 | return container_of(fse, struct fanotify_event_info, fse); | 46 | return container_of(fse, struct fanotify_event_info, fse); |
| 30 | } | 47 | } |
| 48 | |||
| 49 | struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, | ||
| 50 | struct path *path); | ||
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 287a22c04149..4e565c814309 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
| @@ -28,14 +28,8 @@ | |||
| 28 | extern const struct fsnotify_ops fanotify_fsnotify_ops; | 28 | extern const struct fsnotify_ops fanotify_fsnotify_ops; |
| 29 | 29 | ||
| 30 | static struct kmem_cache *fanotify_mark_cache __read_mostly; | 30 | static struct kmem_cache *fanotify_mark_cache __read_mostly; |
| 31 | static struct kmem_cache *fanotify_response_event_cache __read_mostly; | ||
| 32 | struct kmem_cache *fanotify_event_cachep __read_mostly; | 31 | struct kmem_cache *fanotify_event_cachep __read_mostly; |
| 33 | 32 | struct kmem_cache *fanotify_perm_event_cachep __read_mostly; | |
| 34 | struct fanotify_response_event { | ||
| 35 | struct list_head list; | ||
| 36 | __s32 fd; | ||
| 37 | struct fanotify_event_info *event; | ||
| 38 | }; | ||
| 39 | 33 | ||
| 40 | /* | 34 | /* |
| 41 | * Get an fsnotify notification event if one exists and is small | 35 | * Get an fsnotify notification event if one exists and is small |
| @@ -135,33 +129,34 @@ static int fill_event_metadata(struct fsnotify_group *group, | |||
| 135 | } | 129 | } |
| 136 | 130 | ||
| 137 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 131 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 138 | static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group, | 132 | static struct fanotify_perm_event_info *dequeue_event( |
| 139 | __s32 fd) | 133 | struct fsnotify_group *group, int fd) |
| 140 | { | 134 | { |
| 141 | struct fanotify_response_event *re, *return_re = NULL; | 135 | struct fanotify_perm_event_info *event, *return_e = NULL; |
| 142 | 136 | ||
| 143 | mutex_lock(&group->fanotify_data.access_mutex); | 137 | spin_lock(&group->fanotify_data.access_lock); |
| 144 | list_for_each_entry(re, &group->fanotify_data.access_list, list) { | 138 | list_for_each_entry(event, &group->fanotify_data.access_list, |
| 145 | if (re->fd != fd) | 139 | fae.fse.list) { |
| 140 | if (event->fd != fd) | ||
| 146 | continue; | 141 | continue; |
| 147 | 142 | ||
| 148 | list_del_init(&re->list); | 143 | list_del_init(&event->fae.fse.list); |
| 149 | return_re = re; | 144 | return_e = event; |
| 150 | break; | 145 | break; |
| 151 | } | 146 | } |
| 152 | mutex_unlock(&group->fanotify_data.access_mutex); | 147 | spin_unlock(&group->fanotify_data.access_lock); |
| 153 | 148 | ||
| 154 | pr_debug("%s: found return_re=%p\n", __func__, return_re); | 149 | pr_debug("%s: found return_re=%p\n", __func__, return_e); |
| 155 | 150 | ||
| 156 | return return_re; | 151 | return return_e; |
| 157 | } | 152 | } |
| 158 | 153 | ||
| 159 | static int process_access_response(struct fsnotify_group *group, | 154 | static int process_access_response(struct fsnotify_group *group, |
| 160 | struct fanotify_response *response_struct) | 155 | struct fanotify_response *response_struct) |
| 161 | { | 156 | { |
| 162 | struct fanotify_response_event *re; | 157 | struct fanotify_perm_event_info *event; |
| 163 | __s32 fd = response_struct->fd; | 158 | int fd = response_struct->fd; |
| 164 | __u32 response = response_struct->response; | 159 | int response = response_struct->response; |
| 165 | 160 | ||
| 166 | pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, | 161 | pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, |
| 167 | fd, response); | 162 | fd, response); |
| @@ -181,58 +176,15 @@ static int process_access_response(struct fsnotify_group *group, | |||
| 181 | if (fd < 0) | 176 | if (fd < 0) |
| 182 | return -EINVAL; | 177 | return -EINVAL; |
| 183 | 178 | ||
| 184 | re = dequeue_re(group, fd); | 179 | event = dequeue_event(group, fd); |
| 185 | if (!re) | 180 | if (!event) |
| 186 | return -ENOENT; | 181 | return -ENOENT; |
| 187 | 182 | ||
| 188 | re->event->response = response; | 183 | event->response = response; |
| 189 | |||
| 190 | wake_up(&group->fanotify_data.access_waitq); | 184 | wake_up(&group->fanotify_data.access_waitq); |
| 191 | 185 | ||
| 192 | kmem_cache_free(fanotify_response_event_cache, re); | ||
| 193 | |||
| 194 | return 0; | ||
| 195 | } | ||
| 196 | |||
| 197 | static int prepare_for_access_response(struct fsnotify_group *group, | ||
| 198 | struct fsnotify_event *event, | ||
| 199 | __s32 fd) | ||
| 200 | { | ||
| 201 | struct fanotify_response_event *re; | ||
| 202 | |||
| 203 | if (!(event->mask & FAN_ALL_PERM_EVENTS)) | ||
| 204 | return 0; | ||
| 205 | |||
| 206 | re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL); | ||
| 207 | if (!re) | ||
| 208 | return -ENOMEM; | ||
| 209 | |||
| 210 | re->event = FANOTIFY_E(event); | ||
| 211 | re->fd = fd; | ||
| 212 | |||
| 213 | mutex_lock(&group->fanotify_data.access_mutex); | ||
| 214 | |||
| 215 | if (atomic_read(&group->fanotify_data.bypass_perm)) { | ||
| 216 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
| 217 | kmem_cache_free(fanotify_response_event_cache, re); | ||
| 218 | FANOTIFY_E(event)->response = FAN_ALLOW; | ||
| 219 | return 0; | ||
| 220 | } | ||
| 221 | |||
| 222 | list_add_tail(&re->list, &group->fanotify_data.access_list); | ||
| 223 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
| 224 | |||
| 225 | return 0; | ||
| 226 | } | ||
| 227 | |||
| 228 | #else | ||
| 229 | static int prepare_for_access_response(struct fsnotify_group *group, | ||
| 230 | struct fsnotify_event *event, | ||
| 231 | __s32 fd) | ||
| 232 | { | ||
| 233 | return 0; | 186 | return 0; |
| 234 | } | 187 | } |
| 235 | |||
| 236 | #endif | 188 | #endif |
| 237 | 189 | ||
| 238 | static ssize_t copy_event_to_user(struct fsnotify_group *group, | 190 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
| @@ -247,7 +199,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
| 247 | 199 | ||
| 248 | ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); | 200 | ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); |
| 249 | if (ret < 0) | 201 | if (ret < 0) |
| 250 | goto out; | 202 | return ret; |
| 251 | 203 | ||
| 252 | fd = fanotify_event_metadata.fd; | 204 | fd = fanotify_event_metadata.fd; |
| 253 | ret = -EFAULT; | 205 | ret = -EFAULT; |
| @@ -255,9 +207,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
| 255 | fanotify_event_metadata.event_len)) | 207 | fanotify_event_metadata.event_len)) |
| 256 | goto out_close_fd; | 208 | goto out_close_fd; |
| 257 | 209 | ||
| 258 | ret = prepare_for_access_response(group, event, fd); | 210 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 259 | if (ret) | 211 | if (event->mask & FAN_ALL_PERM_EVENTS) |
| 260 | goto out_close_fd; | 212 | FANOTIFY_PE(event)->fd = fd; |
| 213 | #endif | ||
| 261 | 214 | ||
| 262 | if (fd != FAN_NOFD) | 215 | if (fd != FAN_NOFD) |
| 263 | fd_install(fd, f); | 216 | fd_install(fd, f); |
| @@ -268,13 +221,6 @@ out_close_fd: | |||
| 268 | put_unused_fd(fd); | 221 | put_unused_fd(fd); |
| 269 | fput(f); | 222 | fput(f); |
| 270 | } | 223 | } |
| 271 | out: | ||
| 272 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
| 273 | if (event->mask & FAN_ALL_PERM_EVENTS) { | ||
| 274 | FANOTIFY_E(event)->response = FAN_DENY; | ||
| 275 | wake_up(&group->fanotify_data.access_waitq); | ||
| 276 | } | ||
| 277 | #endif | ||
| 278 | return ret; | 224 | return ret; |
| 279 | } | 225 | } |
| 280 | 226 | ||
| @@ -314,35 +260,50 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, | |||
| 314 | kevent = get_one_event(group, count); | 260 | kevent = get_one_event(group, count); |
| 315 | mutex_unlock(&group->notification_mutex); | 261 | mutex_unlock(&group->notification_mutex); |
| 316 | 262 | ||
| 317 | if (kevent) { | 263 | if (IS_ERR(kevent)) { |
| 318 | ret = PTR_ERR(kevent); | 264 | ret = PTR_ERR(kevent); |
| 319 | if (IS_ERR(kevent)) | 265 | break; |
| 266 | } | ||
| 267 | |||
| 268 | if (!kevent) { | ||
| 269 | ret = -EAGAIN; | ||
| 270 | if (file->f_flags & O_NONBLOCK) | ||
| 320 | break; | 271 | break; |
| 321 | ret = copy_event_to_user(group, kevent, buf); | 272 | |
| 322 | /* | 273 | ret = -ERESTARTSYS; |
| 323 | * Permission events get destroyed after we | 274 | if (signal_pending(current)) |
| 324 | * receive response | 275 | break; |
| 325 | */ | 276 | |
| 326 | if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) | 277 | if (start != buf) |
| 327 | fsnotify_destroy_event(group, kevent); | ||
| 328 | if (ret < 0) | ||
| 329 | break; | 278 | break; |
| 330 | buf += ret; | 279 | schedule(); |
| 331 | count -= ret; | ||
| 332 | continue; | 280 | continue; |
| 333 | } | 281 | } |
| 334 | 282 | ||
| 335 | ret = -EAGAIN; | 283 | ret = copy_event_to_user(group, kevent, buf); |
| 336 | if (file->f_flags & O_NONBLOCK) | 284 | /* |
| 337 | break; | 285 | * Permission events get queued to wait for response. Other |
| 338 | ret = -ERESTARTSYS; | 286 | * events can be destroyed now. |
| 339 | if (signal_pending(current)) | 287 | */ |
| 340 | break; | 288 | if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { |
| 341 | 289 | fsnotify_destroy_event(group, kevent); | |
| 342 | if (start != buf) | 290 | if (ret < 0) |
| 343 | break; | 291 | break; |
| 344 | 292 | } else { | |
| 345 | schedule(); | 293 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 294 | if (ret < 0) { | ||
| 295 | FANOTIFY_PE(kevent)->response = FAN_DENY; | ||
| 296 | wake_up(&group->fanotify_data.access_waitq); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | spin_lock(&group->fanotify_data.access_lock); | ||
| 300 | list_add_tail(&kevent->list, | ||
| 301 | &group->fanotify_data.access_list); | ||
| 302 | spin_unlock(&group->fanotify_data.access_lock); | ||
| 303 | #endif | ||
| 304 | } | ||
| 305 | buf += ret; | ||
| 306 | count -= ret; | ||
| 346 | } | 307 | } |
| 347 | 308 | ||
| 348 | finish_wait(&group->notification_waitq, &wait); | 309 | finish_wait(&group->notification_waitq, &wait); |
| @@ -383,22 +344,21 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
| 383 | struct fsnotify_group *group = file->private_data; | 344 | struct fsnotify_group *group = file->private_data; |
| 384 | 345 | ||
| 385 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 346 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 386 | struct fanotify_response_event *re, *lre; | 347 | struct fanotify_perm_event_info *event, *next; |
| 387 | 348 | ||
| 388 | mutex_lock(&group->fanotify_data.access_mutex); | 349 | spin_lock(&group->fanotify_data.access_lock); |
| 389 | 350 | ||
| 390 | atomic_inc(&group->fanotify_data.bypass_perm); | 351 | atomic_inc(&group->fanotify_data.bypass_perm); |
| 391 | 352 | ||
| 392 | list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { | 353 | list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, |
| 393 | pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, | 354 | fae.fse.list) { |
| 394 | re, re->event); | 355 | pr_debug("%s: found group=%p event=%p\n", __func__, group, |
| 356 | event); | ||
| 395 | 357 | ||
| 396 | list_del_init(&re->list); | 358 | list_del_init(&event->fae.fse.list); |
| 397 | re->event->response = FAN_ALLOW; | 359 | event->response = FAN_ALLOW; |
| 398 | |||
| 399 | kmem_cache_free(fanotify_response_event_cache, re); | ||
| 400 | } | 360 | } |
| 401 | mutex_unlock(&group->fanotify_data.access_mutex); | 361 | spin_unlock(&group->fanotify_data.access_lock); |
| 402 | 362 | ||
| 403 | wake_up(&group->fanotify_data.access_waitq); | 363 | wake_up(&group->fanotify_data.access_waitq); |
| 404 | #endif | 364 | #endif |
| @@ -731,21 +691,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
| 731 | group->fanotify_data.user = user; | 691 | group->fanotify_data.user = user; |
| 732 | atomic_inc(&user->fanotify_listeners); | 692 | atomic_inc(&user->fanotify_listeners); |
| 733 | 693 | ||
| 734 | oevent = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); | 694 | oevent = fanotify_alloc_event(NULL, FS_Q_OVERFLOW, NULL); |
| 735 | if (unlikely(!oevent)) { | 695 | if (unlikely(!oevent)) { |
| 736 | fd = -ENOMEM; | 696 | fd = -ENOMEM; |
| 737 | goto out_destroy_group; | 697 | goto out_destroy_group; |
| 738 | } | 698 | } |
| 739 | group->overflow_event = &oevent->fse; | 699 | group->overflow_event = &oevent->fse; |
| 740 | fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); | ||
| 741 | oevent->tgid = get_pid(task_tgid(current)); | ||
| 742 | oevent->path.mnt = NULL; | ||
| 743 | oevent->path.dentry = NULL; | ||
| 744 | 700 | ||
| 745 | group->fanotify_data.f_flags = event_f_flags; | 701 | group->fanotify_data.f_flags = event_f_flags; |
| 746 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 702 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
| 747 | oevent->response = 0; | 703 | spin_lock_init(&group->fanotify_data.access_lock); |
| 748 | mutex_init(&group->fanotify_data.access_mutex); | ||
| 749 | init_waitqueue_head(&group->fanotify_data.access_waitq); | 704 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
| 750 | INIT_LIST_HEAD(&group->fanotify_data.access_list); | 705 | INIT_LIST_HEAD(&group->fanotify_data.access_list); |
| 751 | atomic_set(&group->fanotify_data.bypass_perm, 0); | 706 | atomic_set(&group->fanotify_data.bypass_perm, 0); |
| @@ -920,9 +875,11 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, | |||
| 920 | static int __init fanotify_user_setup(void) | 875 | static int __init fanotify_user_setup(void) |
| 921 | { | 876 | { |
| 922 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); | 877 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); |
| 923 | fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, | ||
| 924 | SLAB_PANIC); | ||
| 925 | fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); | 878 | fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); |
| 879 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
| 880 | fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event_info, | ||
| 881 | SLAB_PANIC); | ||
| 882 | #endif | ||
| 926 | 883 | ||
| 927 | return 0; | 884 | return 0; |
| 928 | } | 885 | } |
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index ffb9b3675736..9d8153ebacfb 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
| @@ -2259,7 +2259,7 @@ void ntfs_evict_big_inode(struct inode *vi) | |||
| 2259 | { | 2259 | { |
| 2260 | ntfs_inode *ni = NTFS_I(vi); | 2260 | ntfs_inode *ni = NTFS_I(vi); |
| 2261 | 2261 | ||
| 2262 | truncate_inode_pages(&vi->i_data, 0); | 2262 | truncate_inode_pages_final(&vi->i_data); |
| 2263 | clear_inode(vi); | 2263 | clear_inode(vi); |
| 2264 | 2264 | ||
| 2265 | #ifdef NTFS_RW | 2265 | #ifdef NTFS_RW |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 82650d52d916..bd5610d48242 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
| @@ -468,6 +468,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
| 468 | 468 | ||
| 469 | ntfs_debug("Entering with remount options string: %s", opt); | 469 | ntfs_debug("Entering with remount options string: %s", opt); |
| 470 | 470 | ||
| 471 | sync_filesystem(sb); | ||
| 472 | |||
| 471 | #ifndef NTFS_RW | 473 | #ifndef NTFS_RW |
| 472 | /* For read-only compiled driver, enforce read-only flag. */ | 474 | /* For read-only compiled driver, enforce read-only flag. */ |
| 473 | *flags |= MS_RDONLY; | 475 | *flags |= MS_RDONLY; |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 555f4cddefe3..7e8282dcea2a 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
| @@ -205,6 +205,7 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | |||
| 205 | di->i_mode = cpu_to_le16(inode->i_mode); | 205 | di->i_mode = cpu_to_le16(inode->i_mode); |
| 206 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 206 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
| 207 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 207 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
| 208 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 208 | 209 | ||
| 209 | ocfs2_journal_dirty(handle, di_bh); | 210 | ocfs2_journal_dirty(handle, di_bh); |
| 210 | 211 | ||
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index e2edff38be52..b4deb5f750d9 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -5728,6 +5728,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
| 5728 | } | 5728 | } |
| 5729 | 5729 | ||
| 5730 | ocfs2_et_update_clusters(et, -len); | 5730 | ocfs2_et_update_clusters(et, -len); |
| 5731 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 5731 | 5732 | ||
| 5732 | ocfs2_journal_dirty(handle, et->et_root_bh); | 5733 | ocfs2_journal_dirty(handle, et->et_root_bh); |
| 5733 | 5734 | ||
| @@ -6932,6 +6933,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6932 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | 6933 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); |
| 6933 | spin_unlock(&oi->ip_lock); | 6934 | spin_unlock(&oi->ip_lock); |
| 6934 | 6935 | ||
| 6936 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 6935 | ocfs2_dinode_new_extent_list(inode, di); | 6937 | ocfs2_dinode_new_extent_list(inode, di); |
| 6936 | 6938 | ||
| 6937 | ocfs2_journal_dirty(handle, di_bh); | 6939 | ocfs2_journal_dirty(handle, di_bh); |
| @@ -7208,6 +7210,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, | |||
| 7208 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); | 7210 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); |
| 7209 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 7211 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
| 7210 | 7212 | ||
| 7213 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 7211 | ocfs2_journal_dirty(handle, di_bh); | 7214 | ocfs2_journal_dirty(handle, di_bh); |
| 7212 | 7215 | ||
| 7213 | out_commit: | 7216 | out_commit: |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index aeb44e879c51..d310d12a9adc 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -571,7 +571,6 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
| 571 | { | 571 | { |
| 572 | struct inode *inode = file_inode(iocb->ki_filp); | 572 | struct inode *inode = file_inode(iocb->ki_filp); |
| 573 | int level; | 573 | int level; |
| 574 | wait_queue_head_t *wq = ocfs2_ioend_wq(inode); | ||
| 575 | 574 | ||
| 576 | /* this io's submitter should not have unlocked this before we could */ | 575 | /* this io's submitter should not have unlocked this before we could */ |
| 577 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | 576 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); |
| @@ -582,10 +581,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
| 582 | if (ocfs2_iocb_is_unaligned_aio(iocb)) { | 581 | if (ocfs2_iocb_is_unaligned_aio(iocb)) { |
| 583 | ocfs2_iocb_clear_unaligned_aio(iocb); | 582 | ocfs2_iocb_clear_unaligned_aio(iocb); |
| 584 | 583 | ||
| 585 | if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) && | 584 | mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); |
| 586 | waitqueue_active(wq)) { | ||
| 587 | wake_up_all(wq); | ||
| 588 | } | ||
| 589 | } | 585 | } |
| 590 | 586 | ||
| 591 | ocfs2_iocb_clear_rw_locked(iocb); | 587 | ocfs2_iocb_clear_rw_locked(iocb); |
| @@ -2043,6 +2039,7 @@ out_write_size: | |||
| 2043 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2039 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 2044 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 2040 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
| 2045 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 2041 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
| 2042 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 2046 | ocfs2_journal_dirty(handle, wc->w_di_bh); | 2043 | ocfs2_journal_dirty(handle, wc->w_di_bh); |
| 2047 | 2044 | ||
| 2048 | ocfs2_commit_trans(osb, handle); | 2045 | ocfs2_commit_trans(osb, handle); |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index f671e49beb34..6cae155d54df 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
| @@ -102,9 +102,4 @@ enum ocfs2_iocb_lock_bits { | |||
| 102 | #define ocfs2_iocb_is_unaligned_aio(iocb) \ | 102 | #define ocfs2_iocb_is_unaligned_aio(iocb) \ |
| 103 | test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) | 103 | test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) |
| 104 | 104 | ||
| 105 | #define OCFS2_IOEND_WQ_HASH_SZ 37 | ||
| 106 | #define ocfs2_ioend_wq(v) (&ocfs2__ioend_wq[((unsigned long)(v)) %\ | ||
| 107 | OCFS2_IOEND_WQ_HASH_SZ]) | ||
| 108 | extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; | ||
| 109 | |||
| 110 | #endif /* OCFS2_FILE_H */ | 105 | #endif /* OCFS2_FILE_H */ |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 5b704c63a103..1edcb141f639 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
| @@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
| 90 | * information for this bh as it's not marked locally | 90 | * information for this bh as it's not marked locally |
| 91 | * uptodate. */ | 91 | * uptodate. */ |
| 92 | ret = -EIO; | 92 | ret = -EIO; |
| 93 | put_bh(bh); | ||
| 94 | mlog_errno(ret); | 93 | mlog_errno(ret); |
| 95 | } | 94 | } |
| 96 | 95 | ||
| @@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | |||
| 420 | 419 | ||
| 421 | if (!buffer_uptodate(bh)) { | 420 | if (!buffer_uptodate(bh)) { |
| 422 | ret = -EIO; | 421 | ret = -EIO; |
| 423 | put_bh(bh); | ||
| 424 | mlog_errno(ret); | 422 | mlog_errno(ret); |
| 425 | } | 423 | } |
| 426 | 424 | ||
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index a4b07730b2e1..b7f57271d49c 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
| @@ -41,7 +41,7 @@ static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 41 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); | 41 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); |
| 42 | } | 42 | } |
| 43 | static struct kobj_attribute attr_version = | 43 | static struct kobj_attribute attr_version = |
| 44 | __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL); | 44 | __ATTR(interface_revision, S_IRUGO, version_show, NULL); |
| 45 | 45 | ||
| 46 | static struct attribute *o2cb_attrs[] = { | 46 | static struct attribute *o2cb_attrs[] = { |
| 47 | &attr_version.attr, | 47 | &attr_version.attr, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2cd2406b4140..eb649d23a4de 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -262,17 +262,17 @@ static void o2net_update_recv_stats(struct o2net_sock_container *sc) | |||
| 262 | 262 | ||
| 263 | #endif /* CONFIG_OCFS2_FS_STATS */ | 263 | #endif /* CONFIG_OCFS2_FS_STATS */ |
| 264 | 264 | ||
| 265 | static inline int o2net_reconnect_delay(void) | 265 | static inline unsigned int o2net_reconnect_delay(void) |
| 266 | { | 266 | { |
| 267 | return o2nm_single_cluster->cl_reconnect_delay_ms; | 267 | return o2nm_single_cluster->cl_reconnect_delay_ms; |
| 268 | } | 268 | } |
| 269 | 269 | ||
| 270 | static inline int o2net_keepalive_delay(void) | 270 | static inline unsigned int o2net_keepalive_delay(void) |
| 271 | { | 271 | { |
| 272 | return o2nm_single_cluster->cl_keepalive_delay_ms; | 272 | return o2nm_single_cluster->cl_keepalive_delay_ms; |
| 273 | } | 273 | } |
| 274 | 274 | ||
| 275 | static inline int o2net_idle_timeout(void) | 275 | static inline unsigned int o2net_idle_timeout(void) |
| 276 | { | 276 | { |
| 277 | return o2nm_single_cluster->cl_idle_timeout_ms; | 277 | return o2nm_single_cluster->cl_idle_timeout_ms; |
| 278 | } | 278 | } |
| @@ -1964,18 +1964,30 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes) | |||
| 1964 | goto out; | 1964 | goto out; |
| 1965 | } | 1965 | } |
| 1966 | 1966 | ||
| 1967 | /* ->sk_data_ready is also called for a newly established child socket | 1967 | /* This callback may called twice when a new connection |
| 1968 | * before it has been accepted and the acceptor has set up their | 1968 | * is being established as a child socket inherits everything |
| 1969 | * data_ready.. we only want to queue listen work for our listening | 1969 | * from a parent LISTEN socket, including the data_ready cb of |
| 1970 | * socket */ | 1970 | * the parent. This leads to a hazard. In o2net_accept_one() |
| 1971 | * we are still initializing the child socket but have not | ||
| 1972 | * changed the inherited data_ready callback yet when | ||
| 1973 | * data starts arriving. | ||
| 1974 | * We avoid this hazard by checking the state. | ||
| 1975 | * For the listening socket, the state will be TCP_LISTEN; for the new | ||
| 1976 | * socket, will be TCP_ESTABLISHED. Also, in this case, | ||
| 1977 | * sk->sk_user_data is not a valid function pointer. | ||
| 1978 | */ | ||
| 1979 | |||
| 1971 | if (sk->sk_state == TCP_LISTEN) { | 1980 | if (sk->sk_state == TCP_LISTEN) { |
| 1972 | mlog(ML_TCP, "bytes: %d\n", bytes); | 1981 | mlog(ML_TCP, "bytes: %d\n", bytes); |
| 1973 | queue_work(o2net_wq, &o2net_listen_work); | 1982 | queue_work(o2net_wq, &o2net_listen_work); |
| 1983 | } else { | ||
| 1984 | ready = NULL; | ||
| 1974 | } | 1985 | } |
| 1975 | 1986 | ||
| 1976 | out: | 1987 | out: |
| 1977 | read_unlock(&sk->sk_callback_lock); | 1988 | read_unlock(&sk->sk_callback_lock); |
| 1978 | ready(sk, bytes); | 1989 | if (ready != NULL) |
| 1990 | ready(sk, bytes); | ||
| 1979 | } | 1991 | } |
| 1980 | 1992 | ||
| 1981 | static int o2net_open_listening_sock(__be32 addr, __be16 port) | 1993 | static int o2net_open_listening_sock(__be32 addr, __be16 port) |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 0d3a97d2d5f6..e2e05a106beb 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | #include "dlmglue.h" | 37 | #include "dlmglue.h" |
| 38 | #include "file.h" | 38 | #include "file.h" |
| 39 | #include "inode.h" | 39 | #include "inode.h" |
| 40 | #include "super.h" | ||
| 41 | #include "ocfs2_trace.h" | 40 | #include "ocfs2_trace.h" |
| 42 | 41 | ||
| 43 | void ocfs2_dentry_attach_gen(struct dentry *dentry) | 42 | void ocfs2_dentry_attach_gen(struct dentry *dentry) |
| @@ -346,52 +345,6 @@ out_attach: | |||
| 346 | return ret; | 345 | return ret; |
| 347 | } | 346 | } |
| 348 | 347 | ||
| 349 | DEFINE_SPINLOCK(dentry_list_lock); | ||
| 350 | |||
| 351 | /* We limit the number of dentry locks to drop in one go. We have | ||
| 352 | * this limit so that we don't starve other users of ocfs2_wq. */ | ||
| 353 | #define DL_INODE_DROP_COUNT 64 | ||
| 354 | |||
| 355 | /* Drop inode references from dentry locks */ | ||
| 356 | static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count) | ||
| 357 | { | ||
| 358 | struct ocfs2_dentry_lock *dl; | ||
| 359 | |||
| 360 | spin_lock(&dentry_list_lock); | ||
| 361 | while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) { | ||
| 362 | dl = osb->dentry_lock_list; | ||
| 363 | osb->dentry_lock_list = dl->dl_next; | ||
| 364 | spin_unlock(&dentry_list_lock); | ||
| 365 | iput(dl->dl_inode); | ||
| 366 | kfree(dl); | ||
| 367 | spin_lock(&dentry_list_lock); | ||
| 368 | } | ||
| 369 | spin_unlock(&dentry_list_lock); | ||
| 370 | } | ||
| 371 | |||
| 372 | void ocfs2_drop_dl_inodes(struct work_struct *work) | ||
| 373 | { | ||
| 374 | struct ocfs2_super *osb = container_of(work, struct ocfs2_super, | ||
| 375 | dentry_lock_work); | ||
| 376 | |||
| 377 | __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT); | ||
| 378 | /* | ||
| 379 | * Don't queue dropping if umount is in progress. We flush the | ||
| 380 | * list in ocfs2_dismount_volume | ||
| 381 | */ | ||
| 382 | spin_lock(&dentry_list_lock); | ||
| 383 | if (osb->dentry_lock_list && | ||
| 384 | !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) | ||
| 385 | queue_work(ocfs2_wq, &osb->dentry_lock_work); | ||
| 386 | spin_unlock(&dentry_list_lock); | ||
| 387 | } | ||
| 388 | |||
| 389 | /* Flush the whole work queue */ | ||
| 390 | void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) | ||
| 391 | { | ||
| 392 | __ocfs2_drop_dl_inodes(osb, -1); | ||
| 393 | } | ||
| 394 | |||
| 395 | /* | 348 | /* |
| 396 | * ocfs2_dentry_iput() and friends. | 349 | * ocfs2_dentry_iput() and friends. |
| 397 | * | 350 | * |
| @@ -416,24 +369,16 @@ void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) | |||
| 416 | static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, | 369 | static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, |
| 417 | struct ocfs2_dentry_lock *dl) | 370 | struct ocfs2_dentry_lock *dl) |
| 418 | { | 371 | { |
| 372 | iput(dl->dl_inode); | ||
| 419 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); | 373 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); |
| 420 | ocfs2_lock_res_free(&dl->dl_lockres); | 374 | ocfs2_lock_res_free(&dl->dl_lockres); |
| 421 | 375 | kfree(dl); | |
| 422 | /* We leave dropping of inode reference to ocfs2_wq as that can | ||
| 423 | * possibly lead to inode deletion which gets tricky */ | ||
| 424 | spin_lock(&dentry_list_lock); | ||
| 425 | if (!osb->dentry_lock_list && | ||
| 426 | !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) | ||
| 427 | queue_work(ocfs2_wq, &osb->dentry_lock_work); | ||
| 428 | dl->dl_next = osb->dentry_lock_list; | ||
| 429 | osb->dentry_lock_list = dl; | ||
| 430 | spin_unlock(&dentry_list_lock); | ||
| 431 | } | 376 | } |
| 432 | 377 | ||
| 433 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | 378 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, |
| 434 | struct ocfs2_dentry_lock *dl) | 379 | struct ocfs2_dentry_lock *dl) |
| 435 | { | 380 | { |
| 436 | int unlock; | 381 | int unlock = 0; |
| 437 | 382 | ||
| 438 | BUG_ON(dl->dl_count == 0); | 383 | BUG_ON(dl->dl_count == 0); |
| 439 | 384 | ||
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index b79eff709958..55f58892b153 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
| @@ -29,13 +29,8 @@ | |||
| 29 | extern const struct dentry_operations ocfs2_dentry_ops; | 29 | extern const struct dentry_operations ocfs2_dentry_ops; |
| 30 | 30 | ||
| 31 | struct ocfs2_dentry_lock { | 31 | struct ocfs2_dentry_lock { |
| 32 | /* Use count of dentry lock */ | ||
| 33 | unsigned int dl_count; | 32 | unsigned int dl_count; |
| 34 | union { | 33 | u64 dl_parent_blkno; |
| 35 | /* Linked list of dentry locks to release */ | ||
| 36 | struct ocfs2_dentry_lock *dl_next; | ||
| 37 | u64 dl_parent_blkno; | ||
| 38 | }; | ||
| 39 | 34 | ||
| 40 | /* | 35 | /* |
| 41 | * The ocfs2_dentry_lock keeps an inode reference until | 36 | * The ocfs2_dentry_lock keeps an inode reference until |
| @@ -49,14 +44,9 @@ struct ocfs2_dentry_lock { | |||
| 49 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, | 44 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, |
| 50 | u64 parent_blkno); | 45 | u64 parent_blkno); |
| 51 | 46 | ||
| 52 | extern spinlock_t dentry_list_lock; | ||
| 53 | |||
| 54 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | 47 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, |
| 55 | struct ocfs2_dentry_lock *dl); | 48 | struct ocfs2_dentry_lock *dl); |
| 56 | 49 | ||
| 57 | void ocfs2_drop_dl_inodes(struct work_struct *work); | ||
| 58 | void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb); | ||
| 59 | |||
| 60 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, | 50 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, |
| 61 | int skip_unhashed); | 51 | int skip_unhashed); |
| 62 | 52 | ||
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 91a7e85ac8fd..0717662b4aef 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -2957,6 +2957,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 2957 | ocfs2_init_dir_trailer(dir, dirdata_bh, i); | 2957 | ocfs2_init_dir_trailer(dir, dirdata_bh, i); |
| 2958 | } | 2958 | } |
| 2959 | 2959 | ||
| 2960 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
| 2960 | ocfs2_journal_dirty(handle, dirdata_bh); | 2961 | ocfs2_journal_dirty(handle, dirdata_bh); |
| 2961 | 2962 | ||
| 2962 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { | 2963 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { |
| @@ -3005,6 +3006,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 3005 | di->i_size = cpu_to_le64(sb->s_blocksize); | 3006 | di->i_size = cpu_to_le64(sb->s_blocksize); |
| 3006 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); | 3007 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); |
| 3007 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); | 3008 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); |
| 3009 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
| 3008 | 3010 | ||
| 3009 | /* | 3011 | /* |
| 3010 | * This should never fail as our extent list is empty and all | 3012 | * This should never fail as our extent list is empty and all |
| @@ -3338,6 +3340,7 @@ do_extend: | |||
| 3338 | } else { | 3340 | } else { |
| 3339 | de->rec_len = cpu_to_le16(sb->s_blocksize); | 3341 | de->rec_len = cpu_to_le16(sb->s_blocksize); |
| 3340 | } | 3342 | } |
| 3343 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
| 3341 | ocfs2_journal_dirty(handle, new_bh); | 3344 | ocfs2_journal_dirty(handle, new_bh); |
| 3342 | 3345 | ||
| 3343 | dir_i_size += dir->i_sb->s_blocksize; | 3346 | dir_i_size += dir->i_sb->s_blocksize; |
| @@ -3896,6 +3899,7 @@ out_commit: | |||
| 3896 | dquot_free_space_nodirty(dir, | 3899 | dquot_free_space_nodirty(dir, |
| 3897 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | 3900 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); |
| 3898 | 3901 | ||
| 3902 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
| 3899 | ocfs2_commit_trans(osb, handle); | 3903 | ocfs2_commit_trans(osb, handle); |
| 3900 | 3904 | ||
| 3901 | out: | 3905 | out: |
| @@ -4134,6 +4138,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, | |||
| 4134 | mlog_errno(ret); | 4138 | mlog_errno(ret); |
| 4135 | did_quota = 0; | 4139 | did_quota = 0; |
| 4136 | 4140 | ||
| 4141 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
| 4137 | ocfs2_journal_dirty(handle, dx_root_bh); | 4142 | ocfs2_journal_dirty(handle, dx_root_bh); |
| 4138 | 4143 | ||
| 4139 | out_commit: | 4144 | out_commit: |
| @@ -4401,6 +4406,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir, | |||
| 4401 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | 4406 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); |
| 4402 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 4407 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
| 4403 | di->i_dx_root = cpu_to_le64(0ULL); | 4408 | di->i_dx_root = cpu_to_le64(0ULL); |
| 4409 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
| 4404 | 4410 | ||
| 4405 | ocfs2_journal_dirty(handle, di_bh); | 4411 | ocfs2_journal_dirty(handle, di_bh); |
| 4406 | 4412 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 33660a4a52fa..c973690dc0bc 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -1123,7 +1123,6 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
| 1123 | struct dlm_ctxt *dlm = NULL; | 1123 | struct dlm_ctxt *dlm = NULL; |
| 1124 | char *local = NULL; | 1124 | char *local = NULL; |
| 1125 | int status = 0; | 1125 | int status = 0; |
| 1126 | int locked = 0; | ||
| 1127 | 1126 | ||
| 1128 | qr = (struct dlm_query_region *) msg->buf; | 1127 | qr = (struct dlm_query_region *) msg->buf; |
| 1129 | 1128 | ||
| @@ -1132,10 +1131,8 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
| 1132 | 1131 | ||
| 1133 | /* buffer used in dlm_mast_regions() */ | 1132 | /* buffer used in dlm_mast_regions() */ |
| 1134 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | 1133 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); |
| 1135 | if (!local) { | 1134 | if (!local) |
| 1136 | status = -ENOMEM; | 1135 | return -ENOMEM; |
| 1137 | goto bail; | ||
| 1138 | } | ||
| 1139 | 1136 | ||
| 1140 | status = -EINVAL; | 1137 | status = -EINVAL; |
| 1141 | 1138 | ||
| @@ -1144,16 +1141,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
| 1144 | if (!dlm) { | 1141 | if (!dlm) { |
| 1145 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | 1142 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " |
| 1146 | "before join domain\n", qr->qr_node, qr->qr_domain); | 1143 | "before join domain\n", qr->qr_node, qr->qr_domain); |
| 1147 | goto bail; | 1144 | goto out_domain_lock; |
| 1148 | } | 1145 | } |
| 1149 | 1146 | ||
| 1150 | spin_lock(&dlm->spinlock); | 1147 | spin_lock(&dlm->spinlock); |
| 1151 | locked = 1; | ||
| 1152 | if (dlm->joining_node != qr->qr_node) { | 1148 | if (dlm->joining_node != qr->qr_node) { |
| 1153 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | 1149 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " |
| 1154 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, | 1150 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, |
| 1155 | dlm->joining_node); | 1151 | dlm->joining_node); |
| 1156 | goto bail; | 1152 | goto out_dlm_lock; |
| 1157 | } | 1153 | } |
| 1158 | 1154 | ||
| 1159 | /* Support for global heartbeat was added in 1.1 */ | 1155 | /* Support for global heartbeat was added in 1.1 */ |
| @@ -1163,14 +1159,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
| 1163 | "but active dlm protocol is %d.%d\n", qr->qr_node, | 1159 | "but active dlm protocol is %d.%d\n", qr->qr_node, |
| 1164 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, | 1160 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, |
| 1165 | dlm->dlm_locking_proto.pv_minor); | 1161 | dlm->dlm_locking_proto.pv_minor); |
| 1166 | goto bail; | 1162 | goto out_dlm_lock; |
| 1167 | } | 1163 | } |
| 1168 | 1164 | ||
| 1169 | status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions)); | 1165 | status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions)); |
| 1170 | 1166 | ||
| 1171 | bail: | 1167 | out_dlm_lock: |
| 1172 | if (locked) | 1168 | spin_unlock(&dlm->spinlock); |
| 1173 | spin_unlock(&dlm->spinlock); | 1169 | |
| 1170 | out_domain_lock: | ||
| 1174 | spin_unlock(&dlm_domain_lock); | 1171 | spin_unlock(&dlm_domain_lock); |
| 1175 | 1172 | ||
| 1176 | kfree(local); | 1173 | kfree(local); |
| @@ -1877,19 +1874,19 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
| 1877 | goto bail; | 1874 | goto bail; |
| 1878 | } | 1875 | } |
| 1879 | 1876 | ||
| 1880 | status = dlm_debug_init(dlm); | 1877 | status = dlm_launch_thread(dlm); |
| 1881 | if (status < 0) { | 1878 | if (status < 0) { |
| 1882 | mlog_errno(status); | 1879 | mlog_errno(status); |
| 1883 | goto bail; | 1880 | goto bail; |
| 1884 | } | 1881 | } |
| 1885 | 1882 | ||
| 1886 | status = dlm_launch_thread(dlm); | 1883 | status = dlm_launch_recovery_thread(dlm); |
| 1887 | if (status < 0) { | 1884 | if (status < 0) { |
| 1888 | mlog_errno(status); | 1885 | mlog_errno(status); |
| 1889 | goto bail; | 1886 | goto bail; |
| 1890 | } | 1887 | } |
| 1891 | 1888 | ||
| 1892 | status = dlm_launch_recovery_thread(dlm); | 1889 | status = dlm_debug_init(dlm); |
| 1893 | if (status < 0) { | 1890 | if (status < 0) { |
| 1894 | mlog_errno(status); | 1891 | mlog_errno(status); |
| 1895 | goto bail; | 1892 | goto bail; |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 7035af09cc03..fe29f7978f81 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -537,7 +537,10 @@ master_here: | |||
| 537 | /* success! see if any other nodes need recovery */ | 537 | /* success! see if any other nodes need recovery */ |
| 538 | mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", | 538 | mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", |
| 539 | dlm->name, dlm->reco.dead_node, dlm->node_num); | 539 | dlm->name, dlm->reco.dead_node, dlm->node_num); |
| 540 | dlm_reset_recovery(dlm); | 540 | spin_lock(&dlm->spinlock); |
| 541 | __dlm_reset_recovery(dlm); | ||
| 542 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; | ||
| 543 | spin_unlock(&dlm->spinlock); | ||
| 541 | } | 544 | } |
| 542 | dlm_end_recovery(dlm); | 545 | dlm_end_recovery(dlm); |
| 543 | 546 | ||
| @@ -695,6 +698,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 695 | if (all_nodes_done) { | 698 | if (all_nodes_done) { |
| 696 | int ret; | 699 | int ret; |
| 697 | 700 | ||
| 701 | /* Set this flag on recovery master to avoid | ||
| 702 | * a new recovery for another dead node start | ||
| 703 | * before the recovery is not done. That may | ||
| 704 | * cause recovery hung.*/ | ||
| 705 | spin_lock(&dlm->spinlock); | ||
| 706 | dlm->reco.state |= DLM_RECO_STATE_FINALIZE; | ||
| 707 | spin_unlock(&dlm->spinlock); | ||
| 708 | |||
| 698 | /* all nodes are now in DLM_RECO_NODE_DATA_DONE state | 709 | /* all nodes are now in DLM_RECO_NODE_DATA_DONE state |
| 699 | * just send a finalize message to everyone and | 710 | * just send a finalize message to everyone and |
| 700 | * clean up */ | 711 | * clean up */ |
| @@ -1750,13 +1761,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
| 1750 | struct dlm_migratable_lockres *mres) | 1761 | struct dlm_migratable_lockres *mres) |
| 1751 | { | 1762 | { |
| 1752 | struct dlm_migratable_lock *ml; | 1763 | struct dlm_migratable_lock *ml; |
| 1753 | struct list_head *queue; | 1764 | struct list_head *queue, *iter; |
| 1754 | struct list_head *tmpq = NULL; | 1765 | struct list_head *tmpq = NULL; |
| 1755 | struct dlm_lock *newlock = NULL; | 1766 | struct dlm_lock *newlock = NULL; |
| 1756 | struct dlm_lockstatus *lksb = NULL; | 1767 | struct dlm_lockstatus *lksb = NULL; |
| 1757 | int ret = 0; | 1768 | int ret = 0; |
| 1758 | int i, j, bad; | 1769 | int i, j, bad; |
| 1759 | struct dlm_lock *lock = NULL; | 1770 | struct dlm_lock *lock; |
| 1760 | u8 from = O2NM_MAX_NODES; | 1771 | u8 from = O2NM_MAX_NODES; |
| 1761 | unsigned int added = 0; | 1772 | unsigned int added = 0; |
| 1762 | __be64 c; | 1773 | __be64 c; |
| @@ -1791,14 +1802,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
| 1791 | /* MIGRATION ONLY! */ | 1802 | /* MIGRATION ONLY! */ |
| 1792 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); | 1803 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); |
| 1793 | 1804 | ||
| 1805 | lock = NULL; | ||
| 1794 | spin_lock(&res->spinlock); | 1806 | spin_lock(&res->spinlock); |
| 1795 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { | 1807 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { |
| 1796 | tmpq = dlm_list_idx_to_ptr(res, j); | 1808 | tmpq = dlm_list_idx_to_ptr(res, j); |
| 1797 | list_for_each_entry(lock, tmpq, list) { | 1809 | list_for_each(iter, tmpq) { |
| 1798 | if (lock->ml.cookie != ml->cookie) | 1810 | lock = list_entry(iter, |
| 1799 | lock = NULL; | 1811 | struct dlm_lock, list); |
| 1800 | else | 1812 | if (lock->ml.cookie == ml->cookie) |
| 1801 | break; | 1813 | break; |
| 1814 | lock = NULL; | ||
| 1802 | } | 1815 | } |
| 1803 | if (lock) | 1816 | if (lock) |
| 1804 | break; | 1817 | break; |
| @@ -2882,8 +2895,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 2882 | BUG(); | 2895 | BUG(); |
| 2883 | } | 2896 | } |
| 2884 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; | 2897 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; |
| 2898 | __dlm_reset_recovery(dlm); | ||
| 2885 | spin_unlock(&dlm->spinlock); | 2899 | spin_unlock(&dlm->spinlock); |
| 2886 | dlm_reset_recovery(dlm); | ||
| 2887 | dlm_kick_recovery_thread(dlm); | 2900 | dlm_kick_recovery_thread(dlm); |
| 2888 | break; | 2901 | break; |
| 2889 | default: | 2902 | default: |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 19986959d149..6bd690b5a061 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -3144,22 +3144,60 @@ out: | |||
| 3144 | return 0; | 3144 | return 0; |
| 3145 | } | 3145 | } |
| 3146 | 3146 | ||
| 3147 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | ||
| 3148 | struct ocfs2_lock_res *lockres); | ||
| 3149 | |||
| 3147 | /* Mark the lockres as being dropped. It will no longer be | 3150 | /* Mark the lockres as being dropped. It will no longer be |
| 3148 | * queued if blocking, but we still may have to wait on it | 3151 | * queued if blocking, but we still may have to wait on it |
| 3149 | * being dequeued from the downconvert thread before we can consider | 3152 | * being dequeued from the downconvert thread before we can consider |
| 3150 | * it safe to drop. | 3153 | * it safe to drop. |
| 3151 | * | 3154 | * |
| 3152 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 3155 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
| 3153 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | 3156 | void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, |
| 3157 | struct ocfs2_lock_res *lockres) | ||
| 3154 | { | 3158 | { |
| 3155 | int status; | 3159 | int status; |
| 3156 | struct ocfs2_mask_waiter mw; | 3160 | struct ocfs2_mask_waiter mw; |
| 3157 | unsigned long flags; | 3161 | unsigned long flags, flags2; |
| 3158 | 3162 | ||
| 3159 | ocfs2_init_mask_waiter(&mw); | 3163 | ocfs2_init_mask_waiter(&mw); |
| 3160 | 3164 | ||
| 3161 | spin_lock_irqsave(&lockres->l_lock, flags); | 3165 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 3162 | lockres->l_flags |= OCFS2_LOCK_FREEING; | 3166 | lockres->l_flags |= OCFS2_LOCK_FREEING; |
| 3167 | if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) { | ||
| 3168 | /* | ||
| 3169 | * We know the downconvert is queued but not in progress | ||
| 3170 | * because we are the downconvert thread and processing | ||
| 3171 | * different lock. So we can just remove the lock from the | ||
| 3172 | * queue. This is not only an optimization but also a way | ||
| 3173 | * to avoid the following deadlock: | ||
| 3174 | * ocfs2_dentry_post_unlock() | ||
| 3175 | * ocfs2_dentry_lock_put() | ||
| 3176 | * ocfs2_drop_dentry_lock() | ||
| 3177 | * iput() | ||
| 3178 | * ocfs2_evict_inode() | ||
| 3179 | * ocfs2_clear_inode() | ||
| 3180 | * ocfs2_mark_lockres_freeing() | ||
| 3181 | * ... blocks waiting for OCFS2_LOCK_QUEUED | ||
| 3182 | * since we are the downconvert thread which | ||
| 3183 | * should clear the flag. | ||
| 3184 | */ | ||
| 3185 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 3186 | spin_lock_irqsave(&osb->dc_task_lock, flags2); | ||
| 3187 | list_del_init(&lockres->l_blocked_list); | ||
| 3188 | osb->blocked_lock_count--; | ||
| 3189 | spin_unlock_irqrestore(&osb->dc_task_lock, flags2); | ||
| 3190 | /* | ||
| 3191 | * Warn if we recurse into another post_unlock call. Strictly | ||
| 3192 | * speaking it isn't a problem but we need to be careful if | ||
| 3193 | * that happens (stack overflow, deadlocks, ...) so warn if | ||
| 3194 | * ocfs2 grows a path for which this can happen. | ||
| 3195 | */ | ||
| 3196 | WARN_ON_ONCE(lockres->l_ops->post_unlock); | ||
| 3197 | /* Since the lock is freeing we don't do much in the fn below */ | ||
| 3198 | ocfs2_process_blocked_lock(osb, lockres); | ||
| 3199 | return; | ||
| 3200 | } | ||
| 3163 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { | 3201 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { |
| 3164 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); | 3202 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); |
| 3165 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3203 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| @@ -3180,7 +3218,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | |||
| 3180 | { | 3218 | { |
| 3181 | int ret; | 3219 | int ret; |
| 3182 | 3220 | ||
| 3183 | ocfs2_mark_lockres_freeing(lockres); | 3221 | ocfs2_mark_lockres_freeing(osb, lockres); |
| 3184 | ret = ocfs2_drop_lock(osb, lockres); | 3222 | ret = ocfs2_drop_lock(osb, lockres); |
| 3185 | if (ret) | 3223 | if (ret) |
| 3186 | mlog_errno(ret); | 3224 | mlog_errno(ret); |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 1d596d8c4a4a..d293a22c32c5 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
| @@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex); | |||
| 157 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex); | 157 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex); |
| 158 | 158 | ||
| 159 | 159 | ||
| 160 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 160 | void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, |
| 161 | struct ocfs2_lock_res *lockres); | ||
| 161 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 162 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
| 162 | struct ocfs2_lock_res *lockres); | 163 | struct ocfs2_lock_res *lockres); |
| 163 | 164 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8450262bcf2a..ff33c5ef87f2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -175,9 +175,13 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, | |||
| 175 | int datasync) | 175 | int datasync) |
| 176 | { | 176 | { |
| 177 | int err = 0; | 177 | int err = 0; |
| 178 | journal_t *journal; | ||
| 179 | struct inode *inode = file->f_mapping->host; | 178 | struct inode *inode = file->f_mapping->host; |
| 180 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 179 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 180 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 181 | journal_t *journal = osb->journal->j_journal; | ||
| 182 | int ret; | ||
| 183 | tid_t commit_tid; | ||
| 184 | bool needs_barrier = false; | ||
| 181 | 185 | ||
| 182 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, | 186 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, |
| 183 | OCFS2_I(inode)->ip_blkno, | 187 | OCFS2_I(inode)->ip_blkno, |
| @@ -192,29 +196,19 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, | |||
| 192 | if (err) | 196 | if (err) |
| 193 | return err; | 197 | return err; |
| 194 | 198 | ||
| 195 | /* | 199 | commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid; |
| 196 | * Probably don't need the i_mutex at all in here, just putting it here | 200 | if (journal->j_flags & JBD2_BARRIER && |
| 197 | * to be consistent with how fsync used to be called, someone more | 201 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
| 198 | * familiar with the fs could possibly remove it. | 202 | needs_barrier = true; |
| 199 | */ | 203 | err = jbd2_complete_transaction(journal, commit_tid); |
| 200 | mutex_lock(&inode->i_mutex); | 204 | if (needs_barrier) { |
| 201 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { | 205 | ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
| 202 | /* | 206 | if (!err) |
| 203 | * We still have to flush drive's caches to get data to the | 207 | err = ret; |
| 204 | * platter | ||
| 205 | */ | ||
| 206 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | ||
| 207 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | ||
| 208 | goto bail; | ||
| 209 | } | 208 | } |
| 210 | 209 | ||
| 211 | journal = osb->journal->j_journal; | ||
| 212 | err = jbd2_journal_force_commit(journal); | ||
| 213 | |||
| 214 | bail: | ||
| 215 | if (err) | 210 | if (err) |
| 216 | mlog_errno(err); | 211 | mlog_errno(err); |
| 217 | mutex_unlock(&inode->i_mutex); | ||
| 218 | 212 | ||
| 219 | return (err < 0) ? -EIO : 0; | 213 | return (err < 0) ? -EIO : 0; |
| 220 | } | 214 | } |
| @@ -292,6 +286,7 @@ int ocfs2_update_inode_atime(struct inode *inode, | |||
| 292 | inode->i_atime = CURRENT_TIME; | 286 | inode->i_atime = CURRENT_TIME; |
| 293 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); | 287 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); |
| 294 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); | 288 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); |
| 289 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 295 | ocfs2_journal_dirty(handle, bh); | 290 | ocfs2_journal_dirty(handle, bh); |
| 296 | 291 | ||
| 297 | out_commit: | 292 | out_commit: |
| @@ -341,6 +336,7 @@ int ocfs2_simple_size_update(struct inode *inode, | |||
| 341 | if (ret < 0) | 336 | if (ret < 0) |
| 342 | mlog_errno(ret); | 337 | mlog_errno(ret); |
| 343 | 338 | ||
| 339 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 344 | ocfs2_commit_trans(osb, handle); | 340 | ocfs2_commit_trans(osb, handle); |
| 345 | out: | 341 | out: |
| 346 | return ret; | 342 | return ret; |
| @@ -435,6 +431,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
| 435 | di->i_size = cpu_to_le64(new_i_size); | 431 | di->i_size = cpu_to_le64(new_i_size); |
| 436 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); | 432 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); |
| 437 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 433 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
| 434 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 438 | 435 | ||
| 439 | ocfs2_journal_dirty(handle, fe_bh); | 436 | ocfs2_journal_dirty(handle, fe_bh); |
| 440 | 437 | ||
| @@ -650,7 +647,7 @@ restarted_transaction: | |||
| 650 | mlog_errno(status); | 647 | mlog_errno(status); |
| 651 | goto leave; | 648 | goto leave; |
| 652 | } | 649 | } |
| 653 | 650 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | |
| 654 | ocfs2_journal_dirty(handle, bh); | 651 | ocfs2_journal_dirty(handle, bh); |
| 655 | 652 | ||
| 656 | spin_lock(&OCFS2_I(inode)->ip_lock); | 653 | spin_lock(&OCFS2_I(inode)->ip_lock); |
| @@ -743,6 +740,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode, | |||
| 743 | OCFS2_JOURNAL_ACCESS_WRITE); | 740 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 744 | if (ret) | 741 | if (ret) |
| 745 | mlog_errno(ret); | 742 | mlog_errno(ret); |
| 743 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 746 | 744 | ||
| 747 | out: | 745 | out: |
| 748 | if (ret) { | 746 | if (ret) { |
| @@ -840,6 +838,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
| 840 | di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 838 | di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
| 841 | di->i_mtime_nsec = di->i_ctime_nsec; | 839 | di->i_mtime_nsec = di->i_ctime_nsec; |
| 842 | ocfs2_journal_dirty(handle, di_bh); | 840 | ocfs2_journal_dirty(handle, di_bh); |
| 841 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 843 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 842 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
| 844 | } | 843 | } |
| 845 | 844 | ||
| @@ -1344,6 +1343,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode, | |||
| 1344 | 1343 | ||
| 1345 | di = (struct ocfs2_dinode *) bh->b_data; | 1344 | di = (struct ocfs2_dinode *) bh->b_data; |
| 1346 | di->i_mode = cpu_to_le16(inode->i_mode); | 1345 | di->i_mode = cpu_to_le16(inode->i_mode); |
| 1346 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 1347 | 1347 | ||
| 1348 | ocfs2_journal_dirty(handle, bh); | 1348 | ocfs2_journal_dirty(handle, bh); |
| 1349 | 1349 | ||
| @@ -1576,6 +1576,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, | |||
| 1576 | if (ret) | 1576 | if (ret) |
| 1577 | mlog_errno(ret); | 1577 | mlog_errno(ret); |
| 1578 | } | 1578 | } |
| 1579 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 1579 | 1580 | ||
| 1580 | ocfs2_commit_trans(osb, handle); | 1581 | ocfs2_commit_trans(osb, handle); |
| 1581 | out: | 1582 | out: |
| @@ -2061,13 +2062,6 @@ out: | |||
| 2061 | return ret; | 2062 | return ret; |
| 2062 | } | 2063 | } |
| 2063 | 2064 | ||
| 2064 | static void ocfs2_aiodio_wait(struct inode *inode) | ||
| 2065 | { | ||
| 2066 | wait_queue_head_t *wq = ocfs2_ioend_wq(inode); | ||
| 2067 | |||
| 2068 | wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0)); | ||
| 2069 | } | ||
| 2070 | |||
| 2071 | static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) | 2065 | static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) |
| 2072 | { | 2066 | { |
| 2073 | int blockmask = inode->i_sb->s_blocksize - 1; | 2067 | int blockmask = inode->i_sb->s_blocksize - 1; |
| @@ -2345,10 +2339,8 @@ relock: | |||
| 2345 | * Wait on previous unaligned aio to complete before | 2339 | * Wait on previous unaligned aio to complete before |
| 2346 | * proceeding. | 2340 | * proceeding. |
| 2347 | */ | 2341 | */ |
| 2348 | ocfs2_aiodio_wait(inode); | 2342 | mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio); |
| 2349 | 2343 | /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */ | |
| 2350 | /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */ | ||
| 2351 | atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio); | ||
| 2352 | ocfs2_iocb_set_unaligned_aio(iocb); | 2344 | ocfs2_iocb_set_unaligned_aio(iocb); |
| 2353 | } | 2345 | } |
| 2354 | 2346 | ||
| @@ -2393,8 +2385,8 @@ out_dio: | |||
| 2393 | 2385 | ||
| 2394 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || | 2386 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || |
| 2395 | ((file->f_flags & O_DIRECT) && !direct_io)) { | 2387 | ((file->f_flags & O_DIRECT) && !direct_io)) { |
| 2396 | ret = filemap_fdatawrite_range(file->f_mapping, pos, | 2388 | ret = filemap_fdatawrite_range(file->f_mapping, *ppos, |
| 2397 | pos + count - 1); | 2389 | *ppos + count - 1); |
| 2398 | if (ret < 0) | 2390 | if (ret < 0) |
| 2399 | written = ret; | 2391 | written = ret; |
| 2400 | 2392 | ||
| @@ -2407,8 +2399,8 @@ out_dio: | |||
| 2407 | } | 2399 | } |
| 2408 | 2400 | ||
| 2409 | if (!ret) | 2401 | if (!ret) |
| 2410 | ret = filemap_fdatawait_range(file->f_mapping, pos, | 2402 | ret = filemap_fdatawait_range(file->f_mapping, *ppos, |
| 2411 | pos + count - 1); | 2403 | *ppos + count - 1); |
| 2412 | } | 2404 | } |
| 2413 | 2405 | ||
| 2414 | /* | 2406 | /* |
| @@ -2428,7 +2420,7 @@ out_dio: | |||
| 2428 | 2420 | ||
| 2429 | if (unaligned_dio) { | 2421 | if (unaligned_dio) { |
| 2430 | ocfs2_iocb_clear_unaligned_aio(iocb); | 2422 | ocfs2_iocb_clear_unaligned_aio(iocb); |
| 2431 | atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); | 2423 | mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); |
| 2432 | } | 2424 | } |
| 2433 | 2425 | ||
| 2434 | out: | 2426 | out: |
| @@ -2645,7 +2637,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) | |||
| 2645 | case SEEK_SET: | 2637 | case SEEK_SET: |
| 2646 | break; | 2638 | break; |
| 2647 | case SEEK_END: | 2639 | case SEEK_END: |
| 2648 | offset += inode->i_size; | 2640 | /* SEEK_END requires the OCFS2 inode lock for the file |
| 2641 | * because it references the file's size. | ||
| 2642 | */ | ||
| 2643 | ret = ocfs2_inode_lock(inode, NULL, 0); | ||
| 2644 | if (ret < 0) { | ||
| 2645 | mlog_errno(ret); | ||
| 2646 | goto out; | ||
| 2647 | } | ||
| 2648 | offset += i_size_read(inode); | ||
| 2649 | ocfs2_inode_unlock(inode, 0); | ||
| 2649 | break; | 2650 | break; |
| 2650 | case SEEK_CUR: | 2651 | case SEEK_CUR: |
| 2651 | if (offset == 0) { | 2652 | if (offset == 0) { |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index f29a90fde619..437de7f768c6 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -130,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, | |||
| 130 | struct inode *inode = NULL; | 130 | struct inode *inode = NULL; |
| 131 | struct super_block *sb = osb->sb; | 131 | struct super_block *sb = osb->sb; |
| 132 | struct ocfs2_find_inode_args args; | 132 | struct ocfs2_find_inode_args args; |
| 133 | journal_t *journal = OCFS2_SB(sb)->journal->j_journal; | ||
| 133 | 134 | ||
| 134 | trace_ocfs2_iget_begin((unsigned long long)blkno, flags, | 135 | trace_ocfs2_iget_begin((unsigned long long)blkno, flags, |
| 135 | sysfile_type); | 136 | sysfile_type); |
| @@ -169,6 +170,32 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, | |||
| 169 | goto bail; | 170 | goto bail; |
| 170 | } | 171 | } |
| 171 | 172 | ||
| 173 | /* | ||
| 174 | * Set transaction id's of transactions that have to be committed | ||
| 175 | * to finish f[data]sync. We set them to currently running transaction | ||
| 176 | * as we cannot be sure that the inode or some of its metadata isn't | ||
| 177 | * part of the transaction - the inode could have been reclaimed and | ||
| 178 | * now it is reread from disk. | ||
| 179 | */ | ||
| 180 | if (journal) { | ||
| 181 | transaction_t *transaction; | ||
| 182 | tid_t tid; | ||
| 183 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 184 | |||
| 185 | read_lock(&journal->j_state_lock); | ||
| 186 | if (journal->j_running_transaction) | ||
| 187 | transaction = journal->j_running_transaction; | ||
| 188 | else | ||
| 189 | transaction = journal->j_committing_transaction; | ||
| 190 | if (transaction) | ||
| 191 | tid = transaction->t_tid; | ||
| 192 | else | ||
| 193 | tid = journal->j_commit_sequence; | ||
| 194 | read_unlock(&journal->j_state_lock); | ||
| 195 | oi->i_sync_tid = tid; | ||
| 196 | oi->i_datasync_tid = tid; | ||
| 197 | } | ||
| 198 | |||
| 172 | bail: | 199 | bail: |
| 173 | if (!IS_ERR(inode)) { | 200 | if (!IS_ERR(inode)) { |
| 174 | trace_ocfs2_iget_end(inode, | 201 | trace_ocfs2_iget_end(inode, |
| @@ -804,11 +831,13 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
| 804 | goto bail; | 831 | goto bail; |
| 805 | } | 832 | } |
| 806 | 833 | ||
| 807 | /* If we're coming from downconvert_thread we can't go into our own | 834 | /* |
| 808 | * voting [hello, deadlock city!], so unforuntately we just | 835 | * If we're coming from downconvert_thread we can't go into our own |
| 809 | * have to skip deleting this guy. That's OK though because | 836 | * voting [hello, deadlock city!] so we cannot delete the inode. But |
| 810 | * the node who's doing the actual deleting should handle it | 837 | * since we dropped last inode ref when downconverting dentry lock, |
| 811 | * anyway. */ | 838 | * we cannot have the file open and thus the node doing unlink will |
| 839 | * take care of deleting the inode. | ||
| 840 | */ | ||
| 812 | if (current == osb->dc_task) | 841 | if (current == osb->dc_task) |
| 813 | goto bail; | 842 | goto bail; |
| 814 | 843 | ||
| @@ -822,12 +851,6 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
| 822 | goto bail_unlock; | 851 | goto bail_unlock; |
| 823 | } | 852 | } |
| 824 | 853 | ||
| 825 | /* If we have allowd wipe of this inode for another node, it | ||
| 826 | * will be marked here so we can safely skip it. Recovery will | ||
| 827 | * cleanup any inodes we might inadvertently skip here. */ | ||
| 828 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) | ||
| 829 | goto bail_unlock; | ||
| 830 | |||
| 831 | ret = 1; | 854 | ret = 1; |
| 832 | bail_unlock: | 855 | bail_unlock: |
| 833 | spin_unlock(&oi->ip_lock); | 856 | spin_unlock(&oi->ip_lock); |
| @@ -941,7 +964,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, | |||
| 941 | (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); | 964 | (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); |
| 942 | if (sync_data) | 965 | if (sync_data) |
| 943 | filemap_write_and_wait(inode->i_mapping); | 966 | filemap_write_and_wait(inode->i_mapping); |
| 944 | truncate_inode_pages(&inode->i_data, 0); | 967 | truncate_inode_pages_final(&inode->i_data); |
| 945 | } | 968 | } |
| 946 | 969 | ||
| 947 | static void ocfs2_delete_inode(struct inode *inode) | 970 | static void ocfs2_delete_inode(struct inode *inode) |
| @@ -960,8 +983,6 @@ static void ocfs2_delete_inode(struct inode *inode) | |||
| 960 | if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) | 983 | if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) |
| 961 | goto bail; | 984 | goto bail; |
| 962 | 985 | ||
| 963 | dquot_initialize(inode); | ||
| 964 | |||
| 965 | if (!ocfs2_inode_is_valid_to_delete(inode)) { | 986 | if (!ocfs2_inode_is_valid_to_delete(inode)) { |
| 966 | /* It's probably not necessary to truncate_inode_pages | 987 | /* It's probably not necessary to truncate_inode_pages |
| 967 | * here but we do it for safety anyway (it will most | 988 | * here but we do it for safety anyway (it will most |
| @@ -970,6 +991,8 @@ static void ocfs2_delete_inode(struct inode *inode) | |||
| 970 | goto bail; | 991 | goto bail; |
| 971 | } | 992 | } |
| 972 | 993 | ||
| 994 | dquot_initialize(inode); | ||
| 995 | |||
| 973 | /* We want to block signals in delete_inode as the lock and | 996 | /* We want to block signals in delete_inode as the lock and |
| 974 | * messaging paths may return us -ERESTARTSYS. Which would | 997 | * messaging paths may return us -ERESTARTSYS. Which would |
| 975 | * cause us to exit early, resulting in inodes being orphaned | 998 | * cause us to exit early, resulting in inodes being orphaned |
| @@ -1057,6 +1080,7 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
| 1057 | { | 1080 | { |
| 1058 | int status; | 1081 | int status; |
| 1059 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1082 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
| 1083 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1060 | 1084 | ||
| 1061 | clear_inode(inode); | 1085 | clear_inode(inode); |
| 1062 | trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, | 1086 | trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, |
| @@ -1073,9 +1097,9 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
| 1073 | 1097 | ||
| 1074 | /* Do these before all the other work so that we don't bounce | 1098 | /* Do these before all the other work so that we don't bounce |
| 1075 | * the downconvert thread while waiting to destroy the locks. */ | 1099 | * the downconvert thread while waiting to destroy the locks. */ |
| 1076 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 1100 | ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres); |
| 1077 | ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); | 1101 | ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres); |
| 1078 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); | 1102 | ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres); |
| 1079 | 1103 | ||
| 1080 | ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, | 1104 | ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, |
| 1081 | &oi->ip_la_data_resv); | 1105 | &oi->ip_la_data_resv); |
| @@ -1157,7 +1181,7 @@ void ocfs2_evict_inode(struct inode *inode) | |||
| 1157 | (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { | 1181 | (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { |
| 1158 | ocfs2_delete_inode(inode); | 1182 | ocfs2_delete_inode(inode); |
| 1159 | } else { | 1183 | } else { |
| 1160 | truncate_inode_pages(&inode->i_data, 0); | 1184 | truncate_inode_pages_final(&inode->i_data); |
| 1161 | } | 1185 | } |
| 1162 | ocfs2_clear_inode(inode); | 1186 | ocfs2_clear_inode(inode); |
| 1163 | } | 1187 | } |
| @@ -1260,6 +1284,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle, | |||
| 1260 | fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 1284 | fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
| 1261 | 1285 | ||
| 1262 | ocfs2_journal_dirty(handle, bh); | 1286 | ocfs2_journal_dirty(handle, bh); |
| 1287 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 1263 | leave: | 1288 | leave: |
| 1264 | return status; | 1289 | return status; |
| 1265 | } | 1290 | } |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 621fc73bf23d..a6c991c0fc98 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
| @@ -44,7 +44,7 @@ struct ocfs2_inode_info | |||
| 44 | struct rw_semaphore ip_xattr_sem; | 44 | struct rw_semaphore ip_xattr_sem; |
| 45 | 45 | ||
| 46 | /* Number of outstanding AIO's which are not page aligned */ | 46 | /* Number of outstanding AIO's which are not page aligned */ |
| 47 | atomic_t ip_unaligned_aio; | 47 | struct mutex ip_unaligned_aio; |
| 48 | 48 | ||
| 49 | /* These fields are protected by ip_lock */ | 49 | /* These fields are protected by ip_lock */ |
| 50 | spinlock_t ip_lock; | 50 | spinlock_t ip_lock; |
| @@ -73,6 +73,13 @@ struct ocfs2_inode_info | |||
| 73 | u32 ip_dir_lock_gen; | 73 | u32 ip_dir_lock_gen; |
| 74 | 74 | ||
| 75 | struct ocfs2_alloc_reservation ip_la_data_resv; | 75 | struct ocfs2_alloc_reservation ip_la_data_resv; |
| 76 | |||
| 77 | /* | ||
| 78 | * Transactions that contain inode's metadata needed to complete | ||
| 79 | * fsync and fdatasync, respectively. | ||
| 80 | */ | ||
| 81 | tid_t i_sync_tid; | ||
| 82 | tid_t i_datasync_tid; | ||
| 76 | }; | 83 | }; |
| 77 | 84 | ||
| 78 | /* | 85 | /* |
| @@ -84,8 +91,6 @@ struct ocfs2_inode_info | |||
| 84 | #define OCFS2_INODE_BITMAP 0x00000004 | 91 | #define OCFS2_INODE_BITMAP 0x00000004 |
| 85 | /* This inode has been wiped from disk */ | 92 | /* This inode has been wiped from disk */ |
| 86 | #define OCFS2_INODE_DELETED 0x00000008 | 93 | #define OCFS2_INODE_DELETED 0x00000008 |
| 87 | /* Another node is deleting, so our delete is a nop */ | ||
| 88 | #define OCFS2_INODE_SKIP_DELETE 0x00000010 | ||
| 89 | /* Has the inode been orphaned on another node? | 94 | /* Has the inode been orphaned on another node? |
| 90 | * | 95 | * |
| 91 | * This hints to ocfs2_drop_inode that it should clear i_nlink before | 96 | * This hints to ocfs2_drop_inode that it should clear i_nlink before |
| @@ -100,11 +105,11 @@ struct ocfs2_inode_info | |||
| 100 | * rely on ocfs2_delete_inode to sort things out under the proper | 105 | * rely on ocfs2_delete_inode to sort things out under the proper |
| 101 | * cluster locks. | 106 | * cluster locks. |
| 102 | */ | 107 | */ |
| 103 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 | 108 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000010 |
| 104 | /* Does someone have the file open O_DIRECT */ | 109 | /* Does someone have the file open O_DIRECT */ |
| 105 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 | 110 | #define OCFS2_INODE_OPEN_DIRECT 0x00000020 |
| 106 | /* Tell the inode wipe code it's not in orphan dir */ | 111 | /* Tell the inode wipe code it's not in orphan dir */ |
| 107 | #define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080 | 112 | #define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000040 |
| 108 | 113 | ||
| 109 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) | 114 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) |
| 110 | { | 115 | { |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 8ca3c29accbf..490229f43731 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -413,11 +413,12 @@ int ocfs2_info_handle_freeinode(struct inode *inode, | |||
| 413 | } | 413 | } |
| 414 | 414 | ||
| 415 | status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); | 415 | status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); |
| 416 | if (status < 0) | ||
| 417 | goto bail; | ||
| 418 | 416 | ||
| 419 | iput(inode_alloc); | 417 | iput(inode_alloc); |
| 420 | inode_alloc = NULL; | 418 | inode_alloc = NULL; |
| 419 | |||
| 420 | if (status < 0) | ||
| 421 | goto bail; | ||
| 421 | } | 422 | } |
| 422 | 423 | ||
| 423 | o2info_set_request_filled(&oifi->ifi_req); | 424 | o2info_set_request_filled(&oifi->ifi_req); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 44fc3e530c3d..03ea9314fecd 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -2132,12 +2132,6 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 2132 | iter = oi->ip_next_orphan; | 2132 | iter = oi->ip_next_orphan; |
| 2133 | 2133 | ||
| 2134 | spin_lock(&oi->ip_lock); | 2134 | spin_lock(&oi->ip_lock); |
| 2135 | /* The remote delete code may have set these on the | ||
| 2136 | * assumption that the other node would wipe them | ||
| 2137 | * successfully. If they are still in the node's | ||
| 2138 | * orphan dir, we need to reset that state. */ | ||
| 2139 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | ||
| 2140 | |||
| 2141 | /* Set the proper information to get us going into | 2135 | /* Set the proper information to get us going into |
| 2142 | * ocfs2_delete_inode. */ | 2136 | * ocfs2_delete_inode. */ |
| 2143 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | 2137 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 9ff4e8cf9d97..7f8cde94abfe 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -626,4 +626,15 @@ static inline int ocfs2_begin_ordered_truncate(struct inode *inode, | |||
| 626 | new_size); | 626 | new_size); |
| 627 | } | 627 | } |
| 628 | 628 | ||
| 629 | static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, | ||
| 630 | struct inode *inode, | ||
| 631 | int datasync) | ||
| 632 | { | ||
| 633 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 634 | |||
| 635 | oi->i_sync_tid = handle->h_transaction->t_tid; | ||
| 636 | if (datasync) | ||
| 637 | oi->i_datasync_tid = handle->h_transaction->t_tid; | ||
| 638 | } | ||
| 639 | |||
| 629 | #endif /* OCFS2_JOURNAL_H */ | 640 | #endif /* OCFS2_JOURNAL_H */ |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index e57c804069ea..6b6d092b0998 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c | |||
| @@ -82,6 +82,8 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, | |||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | ret = flock_lock_file_wait(file, fl); | 84 | ret = flock_lock_file_wait(file, fl); |
| 85 | if (ret) | ||
| 86 | ocfs2_file_unlock(file); | ||
| 85 | 87 | ||
| 86 | out: | 88 | out: |
| 87 | mutex_unlock(&fp->fp_mutex); | 89 | mutex_unlock(&fp->fp_mutex); |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 64c304d668f0..599eb4c4c8be 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
| @@ -151,6 +151,7 @@ static int __ocfs2_move_extent(handle_t *handle, | |||
| 151 | old_blkno, len); | 151 | old_blkno, len); |
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 154 | out: | 155 | out: |
| 155 | ocfs2_free_path(path); | 156 | ocfs2_free_path(path); |
| 156 | return ret; | 157 | return ret; |
| @@ -690,8 +691,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, | |||
| 690 | 691 | ||
| 691 | ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, | 692 | ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, |
| 692 | goal_bit, len); | 693 | goal_bit, len); |
| 693 | if (ret) | 694 | if (ret) { |
| 695 | ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len, | ||
| 696 | le16_to_cpu(gd->bg_chain)); | ||
| 694 | mlog_errno(ret); | 697 | mlog_errno(ret); |
| 698 | } | ||
| 695 | 699 | ||
| 696 | /* | 700 | /* |
| 697 | * Here we should write the new page out first if we are | 701 | * Here we should write the new page out first if we are |
| @@ -957,6 +961,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) | |||
| 957 | inode->i_ctime = CURRENT_TIME; | 961 | inode->i_ctime = CURRENT_TIME; |
| 958 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 962 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
| 959 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 963 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
| 964 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 960 | 965 | ||
| 961 | ocfs2_journal_dirty(handle, di_bh); | 966 | ocfs2_journal_dirty(handle, di_bh); |
| 962 | 967 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3683643f3f0e..2060fc398445 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -450,7 +450,6 @@ leave: | |||
| 450 | 450 | ||
| 451 | brelse(new_fe_bh); | 451 | brelse(new_fe_bh); |
| 452 | brelse(parent_fe_bh); | 452 | brelse(parent_fe_bh); |
| 453 | kfree(si.name); | ||
| 454 | kfree(si.value); | 453 | kfree(si.value); |
| 455 | 454 | ||
| 456 | ocfs2_free_dir_lookup_result(&lookup); | 455 | ocfs2_free_dir_lookup_result(&lookup); |
| @@ -495,6 +494,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
| 495 | struct ocfs2_dinode *fe = NULL; | 494 | struct ocfs2_dinode *fe = NULL; |
| 496 | struct ocfs2_extent_list *fel; | 495 | struct ocfs2_extent_list *fel; |
| 497 | u16 feat; | 496 | u16 feat; |
| 497 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 498 | 498 | ||
| 499 | *new_fe_bh = NULL; | 499 | *new_fe_bh = NULL; |
| 500 | 500 | ||
| @@ -576,8 +576,8 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
| 576 | mlog_errno(status); | 576 | mlog_errno(status); |
| 577 | } | 577 | } |
| 578 | 578 | ||
| 579 | status = 0; /* error in ocfs2_create_new_inode_locks is not | 579 | oi->i_sync_tid = handle->h_transaction->t_tid; |
| 580 | * critical */ | 580 | oi->i_datasync_tid = handle->h_transaction->t_tid; |
| 581 | 581 | ||
| 582 | leave: | 582 | leave: |
| 583 | if (status < 0) { | 583 | if (status < 0) { |
| @@ -1855,7 +1855,6 @@ bail: | |||
| 1855 | 1855 | ||
| 1856 | brelse(new_fe_bh); | 1856 | brelse(new_fe_bh); |
| 1857 | brelse(parent_fe_bh); | 1857 | brelse(parent_fe_bh); |
| 1858 | kfree(si.name); | ||
| 1859 | kfree(si.value); | 1858 | kfree(si.value); |
| 1860 | ocfs2_free_dir_lookup_result(&lookup); | 1859 | ocfs2_free_dir_lookup_result(&lookup); |
| 1861 | if (inode_ac) | 1860 | if (inode_ac) |
| @@ -2481,6 +2480,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
| 2481 | di->i_orphaned_slot = 0; | 2480 | di->i_orphaned_slot = 0; |
| 2482 | set_nlink(inode, 1); | 2481 | set_nlink(inode, 1); |
| 2483 | ocfs2_set_links_count(di, inode->i_nlink); | 2482 | ocfs2_set_links_count(di, inode->i_nlink); |
| 2483 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
| 2484 | ocfs2_journal_dirty(handle, di_bh); | 2484 | ocfs2_journal_dirty(handle, di_bh); |
| 2485 | 2485 | ||
| 2486 | status = ocfs2_add_entry(handle, dentry, inode, | 2486 | status = ocfs2_add_entry(handle, dentry, inode, |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 553f53cc73ae..8d64a97a9d5e 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
| 31 | #include <linux/wait.h> | 31 | #include <linux/wait.h> |
| 32 | #include <linux/list.h> | 32 | #include <linux/list.h> |
| 33 | #include <linux/llist.h> | ||
| 33 | #include <linux/rbtree.h> | 34 | #include <linux/rbtree.h> |
| 34 | #include <linux/workqueue.h> | 35 | #include <linux/workqueue.h> |
| 35 | #include <linux/kref.h> | 36 | #include <linux/kref.h> |
| @@ -274,19 +275,16 @@ enum ocfs2_mount_options | |||
| 274 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | 275 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ |
| 275 | }; | 276 | }; |
| 276 | 277 | ||
| 277 | #define OCFS2_OSB_SOFT_RO 0x0001 | 278 | #define OCFS2_OSB_SOFT_RO 0x0001 |
| 278 | #define OCFS2_OSB_HARD_RO 0x0002 | 279 | #define OCFS2_OSB_HARD_RO 0x0002 |
| 279 | #define OCFS2_OSB_ERROR_FS 0x0004 | 280 | #define OCFS2_OSB_ERROR_FS 0x0004 |
| 280 | #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 | 281 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 |
| 281 | |||
| 282 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | ||
| 283 | 282 | ||
| 284 | struct ocfs2_journal; | 283 | struct ocfs2_journal; |
| 285 | struct ocfs2_slot_info; | 284 | struct ocfs2_slot_info; |
| 286 | struct ocfs2_recovery_map; | 285 | struct ocfs2_recovery_map; |
| 287 | struct ocfs2_replay_map; | 286 | struct ocfs2_replay_map; |
| 288 | struct ocfs2_quota_recovery; | 287 | struct ocfs2_quota_recovery; |
| 289 | struct ocfs2_dentry_lock; | ||
| 290 | struct ocfs2_super | 288 | struct ocfs2_super |
| 291 | { | 289 | { |
| 292 | struct task_struct *commit_task; | 290 | struct task_struct *commit_task; |
| @@ -414,10 +412,9 @@ struct ocfs2_super | |||
| 414 | struct list_head blocked_lock_list; | 412 | struct list_head blocked_lock_list; |
| 415 | unsigned long blocked_lock_count; | 413 | unsigned long blocked_lock_count; |
| 416 | 414 | ||
| 417 | /* List of dentry locks to release. Anyone can add locks to | 415 | /* List of dquot structures to drop last reference to */ |
| 418 | * the list, ocfs2_wq processes the list */ | 416 | struct llist_head dquot_drop_list; |
| 419 | struct ocfs2_dentry_lock *dentry_lock_list; | 417 | struct work_struct dquot_drop_work; |
| 420 | struct work_struct dentry_lock_work; | ||
| 421 | 418 | ||
| 422 | wait_queue_head_t osb_mount_event; | 419 | wait_queue_head_t osb_mount_event; |
| 423 | 420 | ||
| @@ -449,6 +446,8 @@ struct ocfs2_super | |||
| 449 | /* rb tree root for refcount lock. */ | 446 | /* rb tree root for refcount lock. */ |
| 450 | struct rb_root osb_rf_lock_tree; | 447 | struct rb_root osb_rf_lock_tree; |
| 451 | struct ocfs2_refcount_tree *osb_ref_tree_lru; | 448 | struct ocfs2_refcount_tree *osb_ref_tree_lru; |
| 449 | |||
| 450 | struct mutex system_file_mutex; | ||
| 452 | }; | 451 | }; |
| 453 | 452 | ||
| 454 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 453 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
| @@ -579,18 +578,6 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, | |||
| 579 | spin_unlock(&osb->osb_lock); | 578 | spin_unlock(&osb->osb_lock); |
| 580 | } | 579 | } |
| 581 | 580 | ||
| 582 | |||
| 583 | static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, | ||
| 584 | unsigned long flag) | ||
| 585 | { | ||
| 586 | unsigned long ret; | ||
| 587 | |||
| 588 | spin_lock(&osb->osb_lock); | ||
| 589 | ret = osb->osb_flags & flag; | ||
| 590 | spin_unlock(&osb->osb_lock); | ||
| 591 | return ret; | ||
| 592 | } | ||
| 593 | |||
| 594 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, | 581 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, |
| 595 | int hard) | 582 | int hard) |
| 596 | { | 583 | { |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index d5ab56cbe5c5..f266d67df3c6 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
| @@ -28,6 +28,7 @@ struct ocfs2_dquot { | |||
| 28 | unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ | 28 | unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ |
| 29 | s64 dq_origspace; /* Last globally synced space usage */ | 29 | s64 dq_origspace; /* Last globally synced space usage */ |
| 30 | s64 dq_originodes; /* Last globally synced inode usage */ | 30 | s64 dq_originodes; /* Last globally synced inode usage */ |
| 31 | struct llist_node list; /* Member of list of dquots to drop */ | ||
| 31 | }; | 32 | }; |
| 32 | 33 | ||
| 33 | /* Description of one chunk to recover in memory */ | 34 | /* Description of one chunk to recover in memory */ |
| @@ -110,6 +111,7 @@ int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, | |||
| 110 | int ocfs2_create_local_dquot(struct dquot *dquot); | 111 | int ocfs2_create_local_dquot(struct dquot *dquot); |
| 111 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); | 112 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); |
| 112 | int ocfs2_local_write_dquot(struct dquot *dquot); | 113 | int ocfs2_local_write_dquot(struct dquot *dquot); |
| 114 | void ocfs2_drop_dquot_refs(struct work_struct *work); | ||
| 113 | 115 | ||
| 114 | extern const struct dquot_operations ocfs2_quota_operations; | 116 | extern const struct dquot_operations ocfs2_quota_operations; |
| 115 | extern struct quota_format_type ocfs2_quota_format; | 117 | extern struct quota_format_type ocfs2_quota_format; |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index aaa50611ec66..b990a62cff50 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/jiffies.h> | 10 | #include <linux/jiffies.h> |
| 11 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
| 12 | #include <linux/workqueue.h> | 12 | #include <linux/workqueue.h> |
| 13 | #include <linux/llist.h> | ||
| 13 | 14 | ||
| 14 | #include <cluster/masklog.h> | 15 | #include <cluster/masklog.h> |
| 15 | 16 | ||
| @@ -679,6 +680,27 @@ static int ocfs2_calc_qdel_credits(struct super_block *sb, int type) | |||
| 679 | OCFS2_INODE_UPDATE_CREDITS; | 680 | OCFS2_INODE_UPDATE_CREDITS; |
| 680 | } | 681 | } |
| 681 | 682 | ||
| 683 | void ocfs2_drop_dquot_refs(struct work_struct *work) | ||
| 684 | { | ||
| 685 | struct ocfs2_super *osb = container_of(work, struct ocfs2_super, | ||
| 686 | dquot_drop_work); | ||
| 687 | struct llist_node *list; | ||
| 688 | struct ocfs2_dquot *odquot, *next_odquot; | ||
| 689 | |||
| 690 | list = llist_del_all(&osb->dquot_drop_list); | ||
| 691 | llist_for_each_entry_safe(odquot, next_odquot, list, list) { | ||
| 692 | /* Drop the reference we acquired in ocfs2_dquot_release() */ | ||
| 693 | dqput(&odquot->dq_dquot); | ||
| 694 | } | ||
| 695 | } | ||
| 696 | |||
| 697 | /* | ||
| 698 | * Called when the last reference to dquot is dropped. If we are called from | ||
| 699 | * downconvert thread, we cannot do all the handling here because grabbing | ||
| 700 | * quota lock could deadlock (the node holding the quota lock could need some | ||
| 701 | * other cluster lock to proceed but with blocked downconvert thread we cannot | ||
| 702 | * release any lock). | ||
| 703 | */ | ||
| 682 | static int ocfs2_release_dquot(struct dquot *dquot) | 704 | static int ocfs2_release_dquot(struct dquot *dquot) |
| 683 | { | 705 | { |
| 684 | handle_t *handle; | 706 | handle_t *handle; |
| @@ -694,6 +716,19 @@ static int ocfs2_release_dquot(struct dquot *dquot) | |||
| 694 | /* Check whether we are not racing with some other dqget() */ | 716 | /* Check whether we are not racing with some other dqget() */ |
| 695 | if (atomic_read(&dquot->dq_count) > 1) | 717 | if (atomic_read(&dquot->dq_count) > 1) |
| 696 | goto out; | 718 | goto out; |
| 719 | /* Running from downconvert thread? Postpone quota processing to wq */ | ||
| 720 | if (current == osb->dc_task) { | ||
| 721 | /* | ||
| 722 | * Grab our own reference to dquot and queue it for delayed | ||
| 723 | * dropping. Quota code rechecks after calling | ||
| 724 | * ->release_dquot() and won't free dquot structure. | ||
| 725 | */ | ||
| 726 | dqgrab(dquot); | ||
| 727 | /* First entry on list -> queue work */ | ||
| 728 | if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list)) | ||
| 729 | queue_work(ocfs2_wq, &osb->dquot_drop_work); | ||
| 730 | goto out; | ||
| 731 | } | ||
| 697 | status = ocfs2_lock_global_qf(oinfo, 1); | 732 | status = ocfs2_lock_global_qf(oinfo, 1); |
| 698 | if (status < 0) | 733 | if (status < 0) |
| 699 | goto out; | 734 | goto out; |
| @@ -717,6 +752,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) | |||
| 717 | */ | 752 | */ |
| 718 | if (status < 0) | 753 | if (status < 0) |
| 719 | mlog_errno(status); | 754 | mlog_errno(status); |
| 755 | /* | ||
| 756 | * Clear dq_off so that we search for the structure in quota file next | ||
| 757 | * time we acquire it. The structure might be deleted and reallocated | ||
| 758 | * elsewhere by another node while our dquot structure is on freelist. | ||
| 759 | */ | ||
| 760 | dquot->dq_off = 0; | ||
| 720 | clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); | 761 | clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); |
| 721 | out_trans: | 762 | out_trans: |
| 722 | ocfs2_commit_trans(osb, handle); | 763 | ocfs2_commit_trans(osb, handle); |
| @@ -756,16 +797,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) | |||
| 756 | status = ocfs2_lock_global_qf(info, 1); | 797 | status = ocfs2_lock_global_qf(info, 1); |
| 757 | if (status < 0) | 798 | if (status < 0) |
| 758 | goto out; | 799 | goto out; |
| 759 | if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { | 800 | status = ocfs2_qinfo_lock(info, 0); |
| 760 | status = ocfs2_qinfo_lock(info, 0); | 801 | if (status < 0) |
| 761 | if (status < 0) | 802 | goto out_dq; |
| 762 | goto out_dq; | 803 | /* |
| 763 | status = qtree_read_dquot(&info->dqi_gi, dquot); | 804 | * We always want to read dquot structure from disk because we don't |
| 764 | ocfs2_qinfo_unlock(info, 0); | 805 | * know what happened with it while it was on freelist. |
| 765 | if (status < 0) | 806 | */ |
| 766 | goto out_dq; | 807 | status = qtree_read_dquot(&info->dqi_gi, dquot); |
| 767 | } | 808 | ocfs2_qinfo_unlock(info, 0); |
| 768 | set_bit(DQ_READ_B, &dquot->dq_flags); | 809 | if (status < 0) |
| 810 | goto out_dq; | ||
| 769 | 811 | ||
| 770 | OCFS2_DQUOT(dquot)->dq_use_count++; | 812 | OCFS2_DQUOT(dquot)->dq_use_count++; |
| 771 | OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; | 813 | OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2e4344be3b96..2001862bf2b1 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
| @@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) | |||
| 1303 | ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); | 1303 | ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); |
| 1304 | 1304 | ||
| 1305 | out: | 1305 | out: |
| 1306 | /* Clear the read bit so that next time someone uses this | ||
| 1307 | * dquot he reads fresh info from disk and allocates local | ||
| 1308 | * dquot structure */ | ||
| 1309 | clear_bit(DQ_READ_B, &dquot->dq_flags); | ||
| 1310 | return status; | 1306 | return status; |
| 1311 | } | 1307 | } |
| 1312 | 1308 | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 1324e6600e57..83f1a665ae97 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
| @@ -346,7 +346,9 @@ int ocfs2_cluster_connect(const char *stack_name, | |||
| 346 | 346 | ||
| 347 | strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); | 347 | strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); |
| 348 | new_conn->cc_namelen = grouplen; | 348 | new_conn->cc_namelen = grouplen; |
| 349 | strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1); | 349 | if (cluster_name_len) |
| 350 | strlcpy(new_conn->cc_cluster_name, cluster_name, | ||
| 351 | CLUSTER_NAME_MAX + 1); | ||
| 350 | new_conn->cc_cluster_name_len = cluster_name_len; | 352 | new_conn->cc_cluster_name_len = cluster_name_len; |
| 351 | new_conn->cc_recovery_handler = recovery_handler; | 353 | new_conn->cc_recovery_handler = recovery_handler; |
| 352 | new_conn->cc_recovery_data = recovery_data; | 354 | new_conn->cc_recovery_data = recovery_data; |
| @@ -494,7 +496,7 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, | |||
| 494 | } | 496 | } |
| 495 | 497 | ||
| 496 | static struct kobj_attribute ocfs2_attr_max_locking_protocol = | 498 | static struct kobj_attribute ocfs2_attr_max_locking_protocol = |
| 497 | __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, | 499 | __ATTR(max_locking_protocol, S_IRUGO, |
| 498 | ocfs2_max_locking_protocol_show, NULL); | 500 | ocfs2_max_locking_protocol_show, NULL); |
| 499 | 501 | ||
| 500 | static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, | 502 | static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, |
| @@ -526,7 +528,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, | |||
| 526 | } | 528 | } |
| 527 | 529 | ||
| 528 | static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = | 530 | static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = |
| 529 | __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, | 531 | __ATTR(loaded_cluster_plugins, S_IRUGO, |
| 530 | ocfs2_loaded_cluster_plugins_show, NULL); | 532 | ocfs2_loaded_cluster_plugins_show, NULL); |
| 531 | 533 | ||
| 532 | static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, | 534 | static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, |
| @@ -548,7 +550,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, | |||
| 548 | } | 550 | } |
| 549 | 551 | ||
| 550 | static struct kobj_attribute ocfs2_attr_active_cluster_plugin = | 552 | static struct kobj_attribute ocfs2_attr_active_cluster_plugin = |
| 551 | __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, | 553 | __ATTR(active_cluster_plugin, S_IRUGO, |
| 552 | ocfs2_active_cluster_plugin_show, NULL); | 554 | ocfs2_active_cluster_plugin_show, NULL); |
| 553 | 555 | ||
| 554 | static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, | 556 | static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, |
| @@ -597,15 +599,29 @@ static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, | |||
| 597 | 599 | ||
| 598 | 600 | ||
| 599 | static struct kobj_attribute ocfs2_attr_cluster_stack = | 601 | static struct kobj_attribute ocfs2_attr_cluster_stack = |
| 600 | __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, | 602 | __ATTR(cluster_stack, S_IRUGO | S_IWUSR, |
| 601 | ocfs2_cluster_stack_show, | 603 | ocfs2_cluster_stack_show, |
| 602 | ocfs2_cluster_stack_store); | 604 | ocfs2_cluster_stack_store); |
| 603 | 605 | ||
| 606 | |||
| 607 | |||
| 608 | static ssize_t ocfs2_dlm_recover_show(struct kobject *kobj, | ||
| 609 | struct kobj_attribute *attr, | ||
| 610 | char *buf) | ||
| 611 | { | ||
| 612 | return snprintf(buf, PAGE_SIZE, "1\n"); | ||
| 613 | } | ||
| 614 | |||
| 615 | static struct kobj_attribute ocfs2_attr_dlm_recover_support = | ||
| 616 | __ATTR(dlm_recover_callback_support, S_IRUGO, | ||
| 617 | ocfs2_dlm_recover_show, NULL); | ||
| 618 | |||
| 604 | static struct attribute *ocfs2_attrs[] = { | 619 | static struct attribute *ocfs2_attrs[] = { |
| 605 | &ocfs2_attr_max_locking_protocol.attr, | 620 | &ocfs2_attr_max_locking_protocol.attr, |
| 606 | &ocfs2_attr_loaded_cluster_plugins.attr, | 621 | &ocfs2_attr_loaded_cluster_plugins.attr, |
| 607 | &ocfs2_attr_active_cluster_plugin.attr, | 622 | &ocfs2_attr_active_cluster_plugin.attr, |
| 608 | &ocfs2_attr_cluster_stack.attr, | 623 | &ocfs2_attr_cluster_stack.attr, |
| 624 | &ocfs2_attr_dlm_recover_support.attr, | ||
| 609 | NULL, | 625 | NULL, |
| 610 | }; | 626 | }; |
| 611 | 627 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 47ae2663a6f5..0cb889a17ae1 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -771,6 +771,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
| 771 | spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); | 771 | spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); |
| 772 | i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); | 772 | i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); |
| 773 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); | 773 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); |
| 774 | ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0); | ||
| 774 | 775 | ||
| 775 | status = 0; | 776 | status = 0; |
| 776 | 777 | ||
| @@ -1607,6 +1608,21 @@ out: | |||
| 1607 | return ret; | 1608 | return ret; |
| 1608 | } | 1609 | } |
| 1609 | 1610 | ||
| 1611 | void ocfs2_rollback_alloc_dinode_counts(struct inode *inode, | ||
| 1612 | struct buffer_head *di_bh, | ||
| 1613 | u32 num_bits, | ||
| 1614 | u16 chain) | ||
| 1615 | { | ||
| 1616 | u32 tmp_used; | ||
| 1617 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
| 1618 | struct ocfs2_chain_list *cl; | ||
| 1619 | |||
| 1620 | cl = (struct ocfs2_chain_list *)&di->id2.i_chain; | ||
| 1621 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | ||
| 1622 | di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits); | ||
| 1623 | le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits); | ||
| 1624 | } | ||
| 1625 | |||
| 1610 | static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, | 1626 | static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, |
| 1611 | struct ocfs2_extent_rec *rec, | 1627 | struct ocfs2_extent_rec *rec, |
| 1612 | struct ocfs2_chain_list *cl) | 1628 | struct ocfs2_chain_list *cl) |
| @@ -1707,8 +1723,12 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
| 1707 | 1723 | ||
| 1708 | ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, | 1724 | ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, |
| 1709 | res->sr_bit_offset, res->sr_bits); | 1725 | res->sr_bit_offset, res->sr_bits); |
| 1710 | if (ret < 0) | 1726 | if (ret < 0) { |
| 1727 | ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh, | ||
| 1728 | res->sr_bits, | ||
| 1729 | le16_to_cpu(gd->bg_chain)); | ||
| 1711 | mlog_errno(ret); | 1730 | mlog_errno(ret); |
| 1731 | } | ||
| 1712 | 1732 | ||
| 1713 | out_loc_only: | 1733 | out_loc_only: |
| 1714 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); | 1734 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); |
| @@ -1838,6 +1858,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
| 1838 | res->sr_bit_offset, | 1858 | res->sr_bit_offset, |
| 1839 | res->sr_bits); | 1859 | res->sr_bits); |
| 1840 | if (status < 0) { | 1860 | if (status < 0) { |
| 1861 | ocfs2_rollback_alloc_dinode_counts(alloc_inode, | ||
| 1862 | ac->ac_bh, res->sr_bits, chain); | ||
| 1841 | mlog_errno(status); | 1863 | mlog_errno(status); |
| 1842 | goto bail; | 1864 | goto bail; |
| 1843 | } | 1865 | } |
| @@ -2091,7 +2113,7 @@ int ocfs2_find_new_inode_loc(struct inode *dir, | |||
| 2091 | 2113 | ||
| 2092 | ac->ac_find_loc_priv = res; | 2114 | ac->ac_find_loc_priv = res; |
| 2093 | *fe_blkno = res->sr_blkno; | 2115 | *fe_blkno = res->sr_blkno; |
| 2094 | 2116 | ocfs2_update_inode_fsync_trans(handle, dir, 0); | |
| 2095 | out: | 2117 | out: |
| 2096 | if (handle) | 2118 | if (handle) |
| 2097 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); | 2119 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); |
| @@ -2149,6 +2171,8 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle, | |||
| 2149 | res->sr_bit_offset, | 2171 | res->sr_bit_offset, |
| 2150 | res->sr_bits); | 2172 | res->sr_bits); |
| 2151 | if (ret < 0) { | 2173 | if (ret < 0) { |
| 2174 | ocfs2_rollback_alloc_dinode_counts(ac->ac_inode, | ||
| 2175 | ac->ac_bh, res->sr_bits, chain); | ||
| 2152 | mlog_errno(ret); | 2176 | mlog_errno(ret); |
| 2153 | goto out; | 2177 | goto out; |
| 2154 | } | 2178 | } |
| @@ -2870,6 +2894,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
| 2870 | status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); | 2894 | status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); |
| 2871 | if (status < 0) { | 2895 | if (status < 0) { |
| 2872 | mutex_unlock(&inode_alloc_inode->i_mutex); | 2896 | mutex_unlock(&inode_alloc_inode->i_mutex); |
| 2897 | iput(inode_alloc_inode); | ||
| 2873 | mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n", | 2898 | mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n", |
| 2874 | (u32)suballoc_slot, status); | 2899 | (u32)suballoc_slot, status); |
| 2875 | goto bail; | 2900 | goto bail; |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 218d8036b3e7..2d2501767c0c 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
| @@ -91,6 +91,10 @@ int ocfs2_alloc_dinode_update_counts(struct inode *inode, | |||
| 91 | struct buffer_head *di_bh, | 91 | struct buffer_head *di_bh, |
| 92 | u32 num_bits, | 92 | u32 num_bits, |
| 93 | u16 chain); | 93 | u16 chain); |
| 94 | void ocfs2_rollback_alloc_dinode_counts(struct inode *inode, | ||
| 95 | struct buffer_head *di_bh, | ||
| 96 | u32 num_bits, | ||
| 97 | u16 chain); | ||
| 94 | int ocfs2_block_group_set_bits(handle_t *handle, | 98 | int ocfs2_block_group_set_bits(handle_t *handle, |
| 95 | struct inode *alloc_inode, | 99 | struct inode *alloc_inode, |
| 96 | struct ocfs2_group_desc *bg, | 100 | struct ocfs2_group_desc *bg, |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 49d84f80f36c..a7cdd56f4c79 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -561,6 +561,9 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) | |||
| 561 | if (!oi) | 561 | if (!oi) |
| 562 | return NULL; | 562 | return NULL; |
| 563 | 563 | ||
| 564 | oi->i_sync_tid = 0; | ||
| 565 | oi->i_datasync_tid = 0; | ||
| 566 | |||
| 564 | jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); | 567 | jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); |
| 565 | return &oi->vfs_inode; | 568 | return &oi->vfs_inode; |
| 566 | } | 569 | } |
| @@ -631,6 +634,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
| 631 | struct ocfs2_super *osb = OCFS2_SB(sb); | 634 | struct ocfs2_super *osb = OCFS2_SB(sb); |
| 632 | u32 tmp; | 635 | u32 tmp; |
| 633 | 636 | ||
| 637 | sync_filesystem(sb); | ||
| 638 | |||
| 634 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || | 639 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || |
| 635 | !ocfs2_check_set_options(sb, &parsed_options)) { | 640 | !ocfs2_check_set_options(sb, &parsed_options)) { |
| 636 | ret = -EINVAL; | 641 | ret = -EINVAL; |
| @@ -1238,30 +1243,11 @@ static struct dentry *ocfs2_mount(struct file_system_type *fs_type, | |||
| 1238 | return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); | 1243 | return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); |
| 1239 | } | 1244 | } |
| 1240 | 1245 | ||
| 1241 | static void ocfs2_kill_sb(struct super_block *sb) | ||
| 1242 | { | ||
| 1243 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
| 1244 | |||
| 1245 | /* Failed mount? */ | ||
| 1246 | if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) | ||
| 1247 | goto out; | ||
| 1248 | |||
| 1249 | /* Prevent further queueing of inode drop events */ | ||
| 1250 | spin_lock(&dentry_list_lock); | ||
| 1251 | ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); | ||
| 1252 | spin_unlock(&dentry_list_lock); | ||
| 1253 | /* Wait for work to finish and/or remove it */ | ||
| 1254 | cancel_work_sync(&osb->dentry_lock_work); | ||
| 1255 | out: | ||
| 1256 | kill_block_super(sb); | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | static struct file_system_type ocfs2_fs_type = { | 1246 | static struct file_system_type ocfs2_fs_type = { |
| 1260 | .owner = THIS_MODULE, | 1247 | .owner = THIS_MODULE, |
| 1261 | .name = "ocfs2", | 1248 | .name = "ocfs2", |
| 1262 | .mount = ocfs2_mount, | 1249 | .mount = ocfs2_mount, |
| 1263 | .kill_sb = ocfs2_kill_sb, | 1250 | .kill_sb = kill_block_super, |
| 1264 | |||
| 1265 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, | 1251 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, |
| 1266 | .next = NULL | 1252 | .next = NULL |
| 1267 | }; | 1253 | }; |
| @@ -1612,14 +1598,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) | |||
| 1612 | return 0; | 1598 | return 0; |
| 1613 | } | 1599 | } |
| 1614 | 1600 | ||
| 1615 | wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; | ||
| 1616 | |||
| 1617 | static int __init ocfs2_init(void) | 1601 | static int __init ocfs2_init(void) |
| 1618 | { | 1602 | { |
| 1619 | int status, i; | 1603 | int status; |
| 1620 | |||
| 1621 | for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++) | ||
| 1622 | init_waitqueue_head(&ocfs2__ioend_wq[i]); | ||
| 1623 | 1604 | ||
| 1624 | status = init_ocfs2_uptodate_cache(); | 1605 | status = init_ocfs2_uptodate_cache(); |
| 1625 | if (status < 0) | 1606 | if (status < 0) |
| @@ -1761,7 +1742,7 @@ static void ocfs2_inode_init_once(void *data) | |||
| 1761 | ocfs2_extent_map_init(&oi->vfs_inode); | 1742 | ocfs2_extent_map_init(&oi->vfs_inode); |
| 1762 | INIT_LIST_HEAD(&oi->ip_io_markers); | 1743 | INIT_LIST_HEAD(&oi->ip_io_markers); |
| 1763 | oi->ip_dir_start_lookup = 0; | 1744 | oi->ip_dir_start_lookup = 0; |
| 1764 | atomic_set(&oi->ip_unaligned_aio, 0); | 1745 | mutex_init(&oi->ip_unaligned_aio); |
| 1765 | init_rwsem(&oi->ip_alloc_sem); | 1746 | init_rwsem(&oi->ip_alloc_sem); |
| 1766 | init_rwsem(&oi->ip_xattr_sem); | 1747 | init_rwsem(&oi->ip_xattr_sem); |
| 1767 | mutex_init(&oi->ip_io_mutex); | 1748 | mutex_init(&oi->ip_io_mutex); |
| @@ -1932,17 +1913,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1932 | 1913 | ||
| 1933 | debugfs_remove(osb->osb_ctxt); | 1914 | debugfs_remove(osb->osb_ctxt); |
| 1934 | 1915 | ||
| 1935 | /* | ||
| 1936 | * Flush inode dropping work queue so that deletes are | ||
| 1937 | * performed while the filesystem is still working | ||
| 1938 | */ | ||
| 1939 | ocfs2_drop_all_dl_inodes(osb); | ||
| 1940 | |||
| 1941 | /* Orphan scan should be stopped as early as possible */ | 1916 | /* Orphan scan should be stopped as early as possible */ |
| 1942 | ocfs2_orphan_scan_stop(osb); | 1917 | ocfs2_orphan_scan_stop(osb); |
| 1943 | 1918 | ||
| 1944 | ocfs2_disable_quotas(osb); | 1919 | ocfs2_disable_quotas(osb); |
| 1945 | 1920 | ||
| 1921 | /* All dquots should be freed by now */ | ||
| 1922 | WARN_ON(!llist_empty(&osb->dquot_drop_list)); | ||
| 1923 | /* Wait for worker to be done with the work structure in osb */ | ||
| 1924 | cancel_work_sync(&osb->dquot_drop_work); | ||
| 1925 | |||
| 1946 | ocfs2_shutdown_local_alloc(osb); | 1926 | ocfs2_shutdown_local_alloc(osb); |
| 1947 | 1927 | ||
| 1948 | /* This will disable recovery and flush any recovery work. */ | 1928 | /* This will disable recovery and flush any recovery work. */ |
| @@ -2077,7 +2057,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2077 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | 2057 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; |
| 2078 | struct inode *inode = NULL; | 2058 | struct inode *inode = NULL; |
| 2079 | struct ocfs2_journal *journal; | 2059 | struct ocfs2_journal *journal; |
| 2080 | __le32 uuid_net_key; | ||
| 2081 | struct ocfs2_super *osb; | 2060 | struct ocfs2_super *osb; |
| 2082 | u64 total_blocks; | 2061 | u64 total_blocks; |
| 2083 | 2062 | ||
| @@ -2123,6 +2102,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2123 | spin_lock_init(&osb->osb_xattr_lock); | 2102 | spin_lock_init(&osb->osb_xattr_lock); |
| 2124 | ocfs2_init_steal_slots(osb); | 2103 | ocfs2_init_steal_slots(osb); |
| 2125 | 2104 | ||
| 2105 | mutex_init(&osb->system_file_mutex); | ||
| 2106 | |||
| 2126 | atomic_set(&osb->alloc_stats.moves, 0); | 2107 | atomic_set(&osb->alloc_stats.moves, 0); |
| 2127 | atomic_set(&osb->alloc_stats.local_data, 0); | 2108 | atomic_set(&osb->alloc_stats.local_data, 0); |
| 2128 | atomic_set(&osb->alloc_stats.bitmap_data, 0); | 2109 | atomic_set(&osb->alloc_stats.bitmap_data, 0); |
| @@ -2276,8 +2257,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2276 | INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); | 2257 | INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); |
| 2277 | journal->j_state = OCFS2_JOURNAL_FREE; | 2258 | journal->j_state = OCFS2_JOURNAL_FREE; |
| 2278 | 2259 | ||
| 2279 | INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); | 2260 | INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); |
| 2280 | osb->dentry_lock_list = NULL; | 2261 | init_llist_head(&osb->dquot_drop_list); |
| 2281 | 2262 | ||
| 2282 | /* get some pseudo constants for clustersize bits */ | 2263 | /* get some pseudo constants for clustersize bits */ |
| 2283 | osb->s_clustersize_bits = | 2264 | osb->s_clustersize_bits = |
| @@ -2311,8 +2292,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2311 | goto bail; | 2292 | goto bail; |
| 2312 | } | 2293 | } |
| 2313 | 2294 | ||
| 2314 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); | ||
| 2315 | |||
| 2316 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); | 2295 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); |
| 2317 | osb->vol_label[63] = '\0'; | 2296 | osb->vol_label[63] = '\0'; |
| 2318 | osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); | 2297 | osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index f053688d22a3..af155c183123 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
| @@ -113,9 +113,11 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
| 113 | } else | 113 | } else |
| 114 | arr = get_local_system_inode(osb, type, slot); | 114 | arr = get_local_system_inode(osb, type, slot); |
| 115 | 115 | ||
| 116 | mutex_lock(&osb->system_file_mutex); | ||
| 116 | if (arr && ((inode = *arr) != NULL)) { | 117 | if (arr && ((inode = *arr) != NULL)) { |
| 117 | /* get a ref in addition to the array ref */ | 118 | /* get a ref in addition to the array ref */ |
| 118 | inode = igrab(inode); | 119 | inode = igrab(inode); |
| 120 | mutex_unlock(&osb->system_file_mutex); | ||
| 119 | BUG_ON(!inode); | 121 | BUG_ON(!inode); |
| 120 | 122 | ||
| 121 | return inode; | 123 | return inode; |
| @@ -129,6 +131,7 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
| 129 | *arr = igrab(inode); | 131 | *arr = igrab(inode); |
| 130 | BUG_ON(!*arr); | 132 | BUG_ON(!*arr); |
| 131 | } | 133 | } |
| 134 | mutex_unlock(&osb->system_file_mutex); | ||
| 132 | return inode; | 135 | return inode; |
| 133 | } | 136 | } |
| 134 | 137 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 185fa3b7f962..016f01df3825 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
| @@ -369,7 +369,7 @@ static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) | |||
| 369 | * them fully. | 369 | * them fully. |
| 370 | */ | 370 | */ |
| 371 | static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, | 371 | static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, |
| 372 | u64 xb_blkno) | 372 | u64 xb_blkno, int new) |
| 373 | { | 373 | { |
| 374 | int i, rc = 0; | 374 | int i, rc = 0; |
| 375 | 375 | ||
| @@ -383,9 +383,16 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, | |||
| 383 | } | 383 | } |
| 384 | 384 | ||
| 385 | if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), | 385 | if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), |
| 386 | bucket->bu_bhs[i])) | 386 | bucket->bu_bhs[i])) { |
| 387 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), | 387 | if (new) |
| 388 | bucket->bu_bhs[i]); | 388 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), |
| 389 | bucket->bu_bhs[i]); | ||
| 390 | else { | ||
| 391 | set_buffer_uptodate(bucket->bu_bhs[i]); | ||
| 392 | ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), | ||
| 393 | bucket->bu_bhs[i]); | ||
| 394 | } | ||
| 395 | } | ||
| 389 | } | 396 | } |
| 390 | 397 | ||
| 391 | if (rc) | 398 | if (rc) |
| @@ -2602,6 +2609,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | |||
| 2602 | oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); | 2609 | oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); |
| 2603 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | 2610 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); |
| 2604 | spin_unlock(&oi->ip_lock); | 2611 | spin_unlock(&oi->ip_lock); |
| 2612 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 2605 | 2613 | ||
| 2606 | ocfs2_journal_dirty(handle, di_bh); | 2614 | ocfs2_journal_dirty(handle, di_bh); |
| 2607 | out_commit: | 2615 | out_commit: |
| @@ -3200,8 +3208,15 @@ meta_guess: | |||
| 3200 | clusters_add += 1; | 3208 | clusters_add += 1; |
| 3201 | } | 3209 | } |
| 3202 | } else { | 3210 | } else { |
| 3203 | meta_add += 1; | ||
| 3204 | credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; | 3211 | credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; |
| 3212 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
| 3213 | struct ocfs2_extent_list *el = &def_xv.xv.xr_list; | ||
| 3214 | meta_add += ocfs2_extend_meta_needed(el); | ||
| 3215 | credits += ocfs2_calc_extend_credits(inode->i_sb, | ||
| 3216 | el); | ||
| 3217 | } else { | ||
| 3218 | meta_add += 1; | ||
| 3219 | } | ||
| 3205 | } | 3220 | } |
| 3206 | out: | 3221 | out: |
| 3207 | if (clusters_need) | 3222 | if (clusters_need) |
| @@ -3614,6 +3629,7 @@ int ocfs2_xattr_set(struct inode *inode, | |||
| 3614 | } | 3629 | } |
| 3615 | 3630 | ||
| 3616 | ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); | 3631 | ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); |
| 3632 | ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); | ||
| 3617 | 3633 | ||
| 3618 | ocfs2_commit_trans(osb, ctxt.handle); | 3634 | ocfs2_commit_trans(osb, ctxt.handle); |
| 3619 | 3635 | ||
| @@ -4294,7 +4310,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, | |||
| 4294 | 4310 | ||
| 4295 | trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); | 4311 | trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); |
| 4296 | 4312 | ||
| 4297 | ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); | 4313 | ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); |
| 4298 | if (ret) { | 4314 | if (ret) { |
| 4299 | mlog_errno(ret); | 4315 | mlog_errno(ret); |
| 4300 | goto out; | 4316 | goto out; |
| @@ -4638,7 +4654,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, | |||
| 4638 | * Even if !new_bucket_head, we're overwriting t_bucket. Thus, | 4654 | * Even if !new_bucket_head, we're overwriting t_bucket. Thus, |
| 4639 | * there's no need to read it. | 4655 | * there's no need to read it. |
| 4640 | */ | 4656 | */ |
| 4641 | ret = ocfs2_init_xattr_bucket(t_bucket, new_blk); | 4657 | ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); |
| 4642 | if (ret) { | 4658 | if (ret) { |
| 4643 | mlog_errno(ret); | 4659 | mlog_errno(ret); |
| 4644 | goto out; | 4660 | goto out; |
| @@ -4804,7 +4820,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, | |||
| 4804 | * Even if !t_is_new, we're overwriting t_bucket. Thus, | 4820 | * Even if !t_is_new, we're overwriting t_bucket. Thus, |
| 4805 | * there's no need to read it. | 4821 | * there's no need to read it. |
| 4806 | */ | 4822 | */ |
| 4807 | ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno); | 4823 | ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); |
| 4808 | if (ret) | 4824 | if (ret) |
| 4809 | goto out; | 4825 | goto out; |
| 4810 | 4826 | ||
| @@ -5476,6 +5492,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, | |||
| 5476 | ret = ocfs2_truncate_log_append(osb, handle, blkno, len); | 5492 | ret = ocfs2_truncate_log_append(osb, handle, blkno, len); |
| 5477 | if (ret) | 5493 | if (ret) |
| 5478 | mlog_errno(ret); | 5494 | mlog_errno(ret); |
| 5495 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
| 5479 | 5496 | ||
| 5480 | out_commit: | 5497 | out_commit: |
| 5481 | ocfs2_commit_trans(osb, handle); | 5498 | ocfs2_commit_trans(osb, handle); |
| @@ -6830,7 +6847,7 @@ static int ocfs2_reflink_xattr_bucket(handle_t *handle, | |||
| 6830 | break; | 6847 | break; |
| 6831 | } | 6848 | } |
| 6832 | 6849 | ||
| 6833 | ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno); | 6850 | ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); |
| 6834 | if (ret) { | 6851 | if (ret) { |
| 6835 | mlog_errno(ret); | 6852 | mlog_errno(ret); |
| 6836 | break; | 6853 | break; |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d8b0afde2179..ec58c7659183 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
| @@ -183,7 +183,7 @@ int omfs_sync_inode(struct inode *inode) | |||
| 183 | */ | 183 | */ |
| 184 | static void omfs_evict_inode(struct inode *inode) | 184 | static void omfs_evict_inode(struct inode *inode) |
| 185 | { | 185 | { |
| 186 | truncate_inode_pages(&inode->i_data, 0); | 186 | truncate_inode_pages_final(&inode->i_data); |
| 187 | clear_inode(inode); | 187 | clear_inode(inode); |
| 188 | 188 | ||
| 189 | if (inode->i_nlink) | 189 | if (inode->i_nlink) |
| @@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 231 | return -EINVAL; | 231 | return -EINVAL; |
| 232 | 232 | ||
| 233 | /* Return error if mode is not supported */ | 233 | /* Return error if mode is not supported */ |
| 234 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 234 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
| 235 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
| 236 | return -EOPNOTSUPP; | ||
| 237 | |||
| 238 | /* Punch hole and zero range are mutually exclusive */ | ||
| 239 | if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) == | ||
| 240 | (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) | ||
| 235 | return -EOPNOTSUPP; | 241 | return -EOPNOTSUPP; |
| 236 | 242 | ||
| 237 | /* Punch hole must have keep size set */ | 243 | /* Punch hole must have keep size set */ |
| @@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 239 | !(mode & FALLOC_FL_KEEP_SIZE)) | 245 | !(mode & FALLOC_FL_KEEP_SIZE)) |
| 240 | return -EOPNOTSUPP; | 246 | return -EOPNOTSUPP; |
| 241 | 247 | ||
| 248 | /* Collapse range should only be used exclusively. */ | ||
| 249 | if ((mode & FALLOC_FL_COLLAPSE_RANGE) && | ||
| 250 | (mode & ~FALLOC_FL_COLLAPSE_RANGE)) | ||
| 251 | return -EINVAL; | ||
| 252 | |||
| 242 | if (!(file->f_mode & FMODE_WRITE)) | 253 | if (!(file->f_mode & FMODE_WRITE)) |
| 243 | return -EBADF; | 254 | return -EBADF; |
| 244 | 255 | ||
| 245 | /* It's not possible punch hole on append only file */ | 256 | /* |
| 246 | if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode)) | 257 | * It's not possible to punch hole or perform collapse range |
| 258 | * on append only file | ||
| 259 | */ | ||
| 260 | if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE) | ||
| 261 | && IS_APPEND(inode)) | ||
| 247 | return -EPERM; | 262 | return -EPERM; |
| 248 | 263 | ||
| 249 | if (IS_IMMUTABLE(inode)) | 264 | if (IS_IMMUTABLE(inode)) |
| @@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 271 | if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) | 286 | if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) |
| 272 | return -EFBIG; | 287 | return -EFBIG; |
| 273 | 288 | ||
| 289 | /* | ||
| 290 | * There is no need to overlap collapse range with EOF, in which case | ||
| 291 | * it is effectively a truncate operation | ||
| 292 | */ | ||
| 293 | if ((mode & FALLOC_FL_COLLAPSE_RANGE) && | ||
| 294 | (offset + len >= i_size_read(inode))) | ||
| 295 | return -EINVAL; | ||
| 296 | |||
| 274 | if (!file->f_op->fallocate) | 297 | if (!file->f_op->fallocate) |
| 275 | return -EOPNOTSUPP; | 298 | return -EOPNOTSUPP; |
| 276 | 299 | ||
| @@ -705,6 +728,10 @@ static int do_dentry_open(struct file *f, | |||
| 705 | return 0; | 728 | return 0; |
| 706 | } | 729 | } |
| 707 | 730 | ||
| 731 | /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ | ||
| 732 | if (S_ISREG(inode->i_mode)) | ||
| 733 | f->f_mode |= FMODE_ATOMIC_POS; | ||
| 734 | |||
| 708 | f->f_op = fops_get(inode->i_fop); | 735 | f->f_op = fops_get(inode->i_fop); |
| 709 | if (unlikely(WARN_ON(!f->f_op))) { | 736 | if (unlikely(WARN_ON(!f->f_op))) { |
| 710 | error = -ENODEV; | 737 | error = -ENODEV; |
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 8c0ceb8dd1f7..15e4500cda3e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
| @@ -368,6 +368,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) | |||
| 368 | 368 | ||
| 369 | static int openprom_remount(struct super_block *sb, int *flags, char *data) | 369 | static int openprom_remount(struct super_block *sb, int *flags, char *data) |
| 370 | { | 370 | { |
| 371 | sync_filesystem(sb); | ||
| 371 | *flags |= MS_NOATIME; | 372 | *flags |= MS_NOATIME; |
| 372 | return 0; | 373 | return 0; |
| 373 | } | 374 | } |
diff --git a/fs/pnode.c b/fs/pnode.c index c7221bb19801..88396df725b4 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
| @@ -220,14 +220,14 @@ static struct mount *get_source(struct mount *dest, | |||
| 220 | * @tree_list : list of heads of trees to be attached. | 220 | * @tree_list : list of heads of trees to be attached. |
| 221 | */ | 221 | */ |
| 222 | int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, | 222 | int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, |
| 223 | struct mount *source_mnt, struct list_head *tree_list) | 223 | struct mount *source_mnt, struct hlist_head *tree_list) |
| 224 | { | 224 | { |
| 225 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; | 225 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; |
| 226 | struct mount *m, *child; | 226 | struct mount *m, *child; |
| 227 | int ret = 0; | 227 | int ret = 0; |
| 228 | struct mount *prev_dest_mnt = dest_mnt; | 228 | struct mount *prev_dest_mnt = dest_mnt; |
| 229 | struct mount *prev_src_mnt = source_mnt; | 229 | struct mount *prev_src_mnt = source_mnt; |
| 230 | LIST_HEAD(tmp_list); | 230 | HLIST_HEAD(tmp_list); |
| 231 | 231 | ||
| 232 | for (m = propagation_next(dest_mnt, dest_mnt); m; | 232 | for (m = propagation_next(dest_mnt, dest_mnt); m; |
| 233 | m = propagation_next(m, dest_mnt)) { | 233 | m = propagation_next(m, dest_mnt)) { |
| @@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, | |||
| 246 | child = copy_tree(source, source->mnt.mnt_root, type); | 246 | child = copy_tree(source, source->mnt.mnt_root, type); |
| 247 | if (IS_ERR(child)) { | 247 | if (IS_ERR(child)) { |
| 248 | ret = PTR_ERR(child); | 248 | ret = PTR_ERR(child); |
| 249 | list_splice(tree_list, tmp_list.prev); | 249 | tmp_list = *tree_list; |
| 250 | tmp_list.first->pprev = &tmp_list.first; | ||
| 251 | INIT_HLIST_HEAD(tree_list); | ||
| 250 | goto out; | 252 | goto out; |
| 251 | } | 253 | } |
| 252 | 254 | ||
| 253 | if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { | 255 | if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { |
| 254 | mnt_set_mountpoint(m, dest_mp, child); | 256 | mnt_set_mountpoint(m, dest_mp, child); |
| 255 | list_add_tail(&child->mnt_hash, tree_list); | 257 | hlist_add_head(&child->mnt_hash, tree_list); |
| 256 | } else { | 258 | } else { |
| 257 | /* | 259 | /* |
| 258 | * This can happen if the parent mount was bind mounted | 260 | * This can happen if the parent mount was bind mounted |
| 259 | * on some subdirectory of a shared/slave mount. | 261 | * on some subdirectory of a shared/slave mount. |
| 260 | */ | 262 | */ |
| 261 | list_add_tail(&child->mnt_hash, &tmp_list); | 263 | hlist_add_head(&child->mnt_hash, &tmp_list); |
| 262 | } | 264 | } |
| 263 | prev_dest_mnt = m; | 265 | prev_dest_mnt = m; |
| 264 | prev_src_mnt = child; | 266 | prev_src_mnt = child; |
| 265 | } | 267 | } |
| 266 | out: | 268 | out: |
| 267 | lock_mount_hash(); | 269 | lock_mount_hash(); |
| 268 | while (!list_empty(&tmp_list)) { | 270 | while (!hlist_empty(&tmp_list)) { |
| 269 | child = list_first_entry(&tmp_list, struct mount, mnt_hash); | 271 | child = hlist_entry(tmp_list.first, struct mount, mnt_hash); |
| 270 | umount_tree(child, 0); | 272 | umount_tree(child, 0); |
| 271 | } | 273 | } |
| 272 | unlock_mount_hash(); | 274 | unlock_mount_hash(); |
| @@ -338,8 +340,10 @@ static void __propagate_umount(struct mount *mnt) | |||
| 338 | * umount the child only if the child has no | 340 | * umount the child only if the child has no |
| 339 | * other children | 341 | * other children |
| 340 | */ | 342 | */ |
| 341 | if (child && list_empty(&child->mnt_mounts)) | 343 | if (child && list_empty(&child->mnt_mounts)) { |
| 342 | list_move_tail(&child->mnt_hash, &mnt->mnt_hash); | 344 | hlist_del_init_rcu(&child->mnt_hash); |
| 345 | hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); | ||
| 346 | } | ||
| 343 | } | 347 | } |
| 344 | } | 348 | } |
| 345 | 349 | ||
| @@ -350,11 +354,11 @@ static void __propagate_umount(struct mount *mnt) | |||
| 350 | * | 354 | * |
| 351 | * vfsmount lock must be held for write | 355 | * vfsmount lock must be held for write |
| 352 | */ | 356 | */ |
| 353 | int propagate_umount(struct list_head *list) | 357 | int propagate_umount(struct hlist_head *list) |
| 354 | { | 358 | { |
| 355 | struct mount *mnt; | 359 | struct mount *mnt; |
| 356 | 360 | ||
| 357 | list_for_each_entry(mnt, list, mnt_hash) | 361 | hlist_for_each_entry(mnt, list, mnt_hash) |
| 358 | __propagate_umount(mnt); | 362 | __propagate_umount(mnt); |
| 359 | return 0; | 363 | return 0; |
| 360 | } | 364 | } |
diff --git a/fs/pnode.h b/fs/pnode.h index 59e7eda1851e..fc28a27fa892 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
| @@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct mount *mnt) | |||
| 36 | 36 | ||
| 37 | void change_mnt_propagation(struct mount *, int); | 37 | void change_mnt_propagation(struct mount *, int); |
| 38 | int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, | 38 | int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, |
| 39 | struct list_head *); | 39 | struct hlist_head *); |
| 40 | int propagate_umount(struct list_head *); | 40 | int propagate_umount(struct hlist_head *); |
| 41 | int propagate_mount_busy(struct mount *, int); | 41 | int propagate_mount_busy(struct mount *, int); |
| 42 | void mnt_release_group_id(struct mount *); | 42 | void mnt_release_group_id(struct mount *); |
| 43 | int get_dominating_id(struct mount *mnt, const struct path *root); | 43 | int get_dominating_id(struct mount *mnt, const struct path *root); |
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 11c54fd51e16..9e363e41dacc 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
| @@ -723,7 +723,7 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, | |||
| 723 | void *buffer, size_t size) | 723 | void *buffer, size_t size) |
| 724 | { | 724 | { |
| 725 | posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; | 725 | posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; |
| 726 | posix_acl_xattr_entry *ext_entry = ext_acl->a_entries; | 726 | posix_acl_xattr_entry *ext_entry; |
| 727 | int real_size, n; | 727 | int real_size, n; |
| 728 | 728 | ||
| 729 | real_size = posix_acl_xattr_size(acl->a_count); | 729 | real_size = posix_acl_xattr_size(acl->a_count); |
| @@ -731,7 +731,8 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, | |||
| 731 | return real_size; | 731 | return real_size; |
| 732 | if (real_size > size) | 732 | if (real_size > size) |
| 733 | return -ERANGE; | 733 | return -ERANGE; |
| 734 | 734 | ||
| 735 | ext_entry = ext_acl->a_entries; | ||
| 735 | ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); | 736 | ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); |
| 736 | 737 | ||
| 737 | for (n=0; n < acl->a_count; n++, ext_entry++) { | 738 | for (n=0; n < acl->a_count; n++, ext_entry++) { |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ab30716584f5..239493ec718e 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
| @@ -27,6 +27,5 @@ proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o | |||
| 27 | proc-$(CONFIG_NET) += proc_net.o | 27 | proc-$(CONFIG_NET) += proc_net.o |
| 28 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 28 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
| 29 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o | 29 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o |
| 30 | proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o | ||
| 31 | proc-$(CONFIG_PRINTK) += kmsg.o | 30 | proc-$(CONFIG_PRINTK) += kmsg.o |
| 32 | proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o | 31 | proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 51507065263b..b9760628e1fd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -1824,6 +1824,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) | |||
| 1824 | if (rc) | 1824 | if (rc) |
| 1825 | goto out_mmput; | 1825 | goto out_mmput; |
| 1826 | 1826 | ||
| 1827 | rc = -ENOENT; | ||
| 1827 | down_read(&mm->mmap_sem); | 1828 | down_read(&mm->mmap_sem); |
| 1828 | vma = find_exact_vma(mm, vm_start, vm_end); | 1829 | vma = find_exact_vma(mm, vm_start, vm_end); |
| 1829 | if (vma && vma->vm_file) { | 1830 | if (vma && vma->vm_file) { |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 124fc43c7090..8f20e3404fd2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
| @@ -35,7 +35,7 @@ static void proc_evict_inode(struct inode *inode) | |||
| 35 | const struct proc_ns_operations *ns_ops; | 35 | const struct proc_ns_operations *ns_ops; |
| 36 | void *ns; | 36 | void *ns; |
| 37 | 37 | ||
| 38 | truncate_inode_pages(&inode->i_data, 0); | 38 | truncate_inode_pages_final(&inode->i_data); |
| 39 | clear_inode(inode); | 39 | clear_inode(inode); |
| 40 | 40 | ||
| 41 | /* Stop tracking associated processes */ | 41 | /* Stop tracking associated processes */ |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 651d09a11dde..3ab6d14e71c5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -211,13 +211,6 @@ extern int proc_fill_super(struct super_block *); | |||
| 211 | extern void proc_entry_rundown(struct proc_dir_entry *); | 211 | extern void proc_entry_rundown(struct proc_dir_entry *); |
| 212 | 212 | ||
| 213 | /* | 213 | /* |
| 214 | * proc_devtree.c | ||
| 215 | */ | ||
| 216 | #ifdef CONFIG_PROC_DEVICETREE | ||
| 217 | extern void proc_device_tree_init(void); | ||
| 218 | #endif | ||
| 219 | |||
| 220 | /* | ||
| 221 | * proc_namespaces.c | 214 | * proc_namespaces.c |
| 222 | */ | 215 | */ |
| 223 | extern const struct inode_operations proc_ns_dir_inode_operations; | 216 | extern const struct inode_operations proc_ns_dir_inode_operations; |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 02174a610315..e647c55275d9 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
| @@ -121,9 +121,8 @@ u64 stable_page_flags(struct page *page) | |||
| 121 | * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon | 121 | * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon |
| 122 | * to make sure a given page is a thp, not a non-huge compound page. | 122 | * to make sure a given page is a thp, not a non-huge compound page. |
| 123 | */ | 123 | */ |
| 124 | else if (PageTransCompound(page) && | 124 | else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || |
| 125 | (PageLRU(compound_trans_head(page)) || | 125 | PageAnon(compound_head(page)))) |
| 126 | PageAnon(compound_trans_head(page)))) | ||
| 127 | u |= 1 << KPF_THP; | 126 | u |= 1 << KPF_THP; |
| 128 | 127 | ||
| 129 | /* | 128 | /* |
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c deleted file mode 100644 index c82dd5147845..000000000000 --- a/fs/proc/proc_devtree.c +++ /dev/null | |||
| @@ -1,241 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * proc_devtree.c - handles /proc/device-tree | ||
| 3 | * | ||
| 4 | * Copyright 1997 Paul Mackerras | ||
| 5 | */ | ||
| 6 | #include <linux/errno.h> | ||
| 7 | #include <linux/init.h> | ||
| 8 | #include <linux/time.h> | ||
| 9 | #include <linux/proc_fs.h> | ||
| 10 | #include <linux/seq_file.h> | ||
| 11 | #include <linux/printk.h> | ||
| 12 | #include <linux/stat.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <linux/of.h> | ||
| 15 | #include <linux/export.h> | ||
| 16 | #include <linux/slab.h> | ||
| 17 | #include <asm/uaccess.h> | ||
| 18 | #include "internal.h" | ||
| 19 | |||
| 20 | static inline void set_node_proc_entry(struct device_node *np, | ||
| 21 | struct proc_dir_entry *de) | ||
| 22 | { | ||
| 23 | np->pde = de; | ||
| 24 | } | ||
| 25 | |||
| 26 | static struct proc_dir_entry *proc_device_tree; | ||
| 27 | |||
| 28 | /* | ||
| 29 | * Supply data on a read from /proc/device-tree/node/property. | ||
| 30 | */ | ||
| 31 | static int property_proc_show(struct seq_file *m, void *v) | ||
| 32 | { | ||
| 33 | struct property *pp = m->private; | ||
| 34 | |||
| 35 | seq_write(m, pp->value, pp->length); | ||
| 36 | return 0; | ||
| 37 | } | ||
| 38 | |||
| 39 | static int property_proc_open(struct inode *inode, struct file *file) | ||
| 40 | { | ||
| 41 | return single_open(file, property_proc_show, __PDE_DATA(inode)); | ||
| 42 | } | ||
| 43 | |||
| 44 | static const struct file_operations property_proc_fops = { | ||
| 45 | .owner = THIS_MODULE, | ||
| 46 | .open = property_proc_open, | ||
| 47 | .read = seq_read, | ||
| 48 | .llseek = seq_lseek, | ||
| 49 | .release = single_release, | ||
| 50 | }; | ||
| 51 | |||
| 52 | /* | ||
| 53 | * For a node with a name like "gc@10", we make symlinks called "gc" | ||
| 54 | * and "@10" to it. | ||
| 55 | */ | ||
| 56 | |||
| 57 | /* | ||
| 58 | * Add a property to a node | ||
| 59 | */ | ||
| 60 | static struct proc_dir_entry * | ||
| 61 | __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp, | ||
| 62 | const char *name) | ||
| 63 | { | ||
| 64 | struct proc_dir_entry *ent; | ||
| 65 | |||
| 66 | /* | ||
| 67 | * Unfortunately proc_register puts each new entry | ||
| 68 | * at the beginning of the list. So we rearrange them. | ||
| 69 | */ | ||
| 70 | ent = proc_create_data(name, | ||
| 71 | strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR, | ||
| 72 | de, &property_proc_fops, pp); | ||
| 73 | if (ent == NULL) | ||
| 74 | return NULL; | ||
| 75 | |||
| 76 | if (!strncmp(name, "security-", 9)) | ||
| 77 | proc_set_size(ent, 0); /* don't leak number of password chars */ | ||
| 78 | else | ||
| 79 | proc_set_size(ent, pp->length); | ||
| 80 | |||
| 81 | return ent; | ||
| 82 | } | ||
| 83 | |||
| 84 | |||
| 85 | void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop) | ||
| 86 | { | ||
| 87 | __proc_device_tree_add_prop(pde, prop, prop->name); | ||
| 88 | } | ||
| 89 | |||
| 90 | void proc_device_tree_remove_prop(struct proc_dir_entry *pde, | ||
| 91 | struct property *prop) | ||
| 92 | { | ||
| 93 | remove_proc_entry(prop->name, pde); | ||
| 94 | } | ||
| 95 | |||
| 96 | void proc_device_tree_update_prop(struct proc_dir_entry *pde, | ||
| 97 | struct property *newprop, | ||
| 98 | struct property *oldprop) | ||
| 99 | { | ||
| 100 | struct proc_dir_entry *ent; | ||
| 101 | |||
| 102 | if (!oldprop) { | ||
| 103 | proc_device_tree_add_prop(pde, newprop); | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | |||
| 107 | for (ent = pde->subdir; ent != NULL; ent = ent->next) | ||
| 108 | if (ent->data == oldprop) | ||
| 109 | break; | ||
| 110 | if (ent == NULL) { | ||
| 111 | pr_warn("device-tree: property \"%s\" does not exist\n", | ||
| 112 | oldprop->name); | ||
| 113 | } else { | ||
| 114 | ent->data = newprop; | ||
| 115 | ent->size = newprop->length; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | /* | ||
| 120 | * Various dodgy firmware might give us nodes and/or properties with | ||
| 121 | * conflicting names. That's generally ok, except for exporting via /proc, | ||
| 122 | * so munge names here to ensure they're unique. | ||
| 123 | */ | ||
| 124 | |||
| 125 | static int duplicate_name(struct proc_dir_entry *de, const char *name) | ||
| 126 | { | ||
| 127 | struct proc_dir_entry *ent; | ||
| 128 | int found = 0; | ||
| 129 | |||
| 130 | spin_lock(&proc_subdir_lock); | ||
| 131 | |||
| 132 | for (ent = de->subdir; ent != NULL; ent = ent->next) { | ||
| 133 | if (strcmp(ent->name, name) == 0) { | ||
| 134 | found = 1; | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | spin_unlock(&proc_subdir_lock); | ||
| 140 | |||
| 141 | return found; | ||
| 142 | } | ||
| 143 | |||
| 144 | static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de, | ||
| 145 | const char *name) | ||
| 146 | { | ||
| 147 | char *fixed_name; | ||
| 148 | int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */ | ||
| 149 | int i = 1, size; | ||
| 150 | |||
| 151 | realloc: | ||
| 152 | fixed_name = kmalloc(fixup_len, GFP_KERNEL); | ||
| 153 | if (fixed_name == NULL) { | ||
| 154 | pr_err("device-tree: Out of memory trying to fixup " | ||
| 155 | "name \"%s\"\n", name); | ||
| 156 | return name; | ||
| 157 | } | ||
| 158 | |||
| 159 | retry: | ||
| 160 | size = snprintf(fixed_name, fixup_len, "%s#%d", name, i); | ||
| 161 | size++; /* account for NULL */ | ||
| 162 | |||
| 163 | if (size > fixup_len) { | ||
| 164 | /* We ran out of space, free and reallocate. */ | ||
| 165 | kfree(fixed_name); | ||
| 166 | fixup_len = size; | ||
| 167 | goto realloc; | ||
| 168 | } | ||
| 169 | |||
| 170 | if (duplicate_name(de, fixed_name)) { | ||
| 171 | /* Multiple duplicates. Retry with a different offset. */ | ||
| 172 | i++; | ||
| 173 | goto retry; | ||
| 174 | } | ||
| 175 | |||
| 176 | pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n", | ||
| 177 | np->full_name, fixed_name); | ||
| 178 | |||
| 179 | return fixed_name; | ||
| 180 | } | ||
| 181 | |||
| 182 | /* | ||
| 183 | * Process a node, adding entries for its children and its properties. | ||
| 184 | */ | ||
| 185 | void proc_device_tree_add_node(struct device_node *np, | ||
| 186 | struct proc_dir_entry *de) | ||
| 187 | { | ||
| 188 | struct property *pp; | ||
| 189 | struct proc_dir_entry *ent; | ||
| 190 | struct device_node *child; | ||
| 191 | const char *p; | ||
| 192 | |||
| 193 | set_node_proc_entry(np, de); | ||
| 194 | for (child = NULL; (child = of_get_next_child(np, child));) { | ||
| 195 | /* Use everything after the last slash, or the full name */ | ||
| 196 | p = kbasename(child->full_name); | ||
| 197 | |||
| 198 | if (duplicate_name(de, p)) | ||
| 199 | p = fixup_name(np, de, p); | ||
| 200 | |||
| 201 | ent = proc_mkdir(p, de); | ||
| 202 | if (ent == NULL) | ||
| 203 | break; | ||
| 204 | proc_device_tree_add_node(child, ent); | ||
| 205 | } | ||
| 206 | of_node_put(child); | ||
| 207 | |||
| 208 | for (pp = np->properties; pp != NULL; pp = pp->next) { | ||
| 209 | p = pp->name; | ||
| 210 | |||
| 211 | if (strchr(p, '/')) | ||
| 212 | continue; | ||
| 213 | |||
| 214 | if (duplicate_name(de, p)) | ||
| 215 | p = fixup_name(np, de, p); | ||
| 216 | |||
| 217 | ent = __proc_device_tree_add_prop(de, pp, p); | ||
| 218 | if (ent == NULL) | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Called on initialization to set up the /proc/device-tree subtree | ||
| 225 | */ | ||
| 226 | void __init proc_device_tree_init(void) | ||
| 227 | { | ||
| 228 | struct device_node *root; | ||
| 229 | |||
| 230 | proc_device_tree = proc_mkdir("device-tree", NULL); | ||
| 231 | if (proc_device_tree == NULL) | ||
| 232 | return; | ||
| 233 | root = of_find_node_by_path("/"); | ||
| 234 | if (root == NULL) { | ||
| 235 | remove_proc_entry("device-tree", NULL); | ||
| 236 | pr_debug("/proc/device-tree: can't find root\n"); | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | proc_device_tree_add_node(root, proc_device_tree); | ||
| 240 | of_node_put(root); | ||
| 241 | } | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index 87dbcbef7fe4..5dbadecb234d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
| @@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) | |||
| 92 | int proc_remount(struct super_block *sb, int *flags, char *data) | 92 | int proc_remount(struct super_block *sb, int *flags, char *data) |
| 93 | { | 93 | { |
| 94 | struct pid_namespace *pid = sb->s_fs_info; | 94 | struct pid_namespace *pid = sb->s_fs_info; |
| 95 | |||
| 96 | sync_filesystem(sb); | ||
| 95 | return !proc_parse_options(data, pid); | 97 | return !proc_parse_options(data, pid); |
| 96 | } | 98 | } |
| 97 | 99 | ||
| @@ -183,9 +185,6 @@ void __init proc_root_init(void) | |||
| 183 | proc_mkdir("openprom", NULL); | 185 | proc_mkdir("openprom", NULL); |
| 184 | #endif | 186 | #endif |
| 185 | proc_tty_init(); | 187 | proc_tty_init(); |
| 186 | #ifdef CONFIG_PROC_DEVICETREE | ||
| 187 | proc_device_tree_init(); | ||
| 188 | #endif | ||
| 189 | proc_mkdir("bus", NULL); | 188 | proc_mkdir("bus", NULL); |
| 190 | proc_sys_init(); | 189 | proc_sys_init(); |
| 191 | } | 190 | } |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6f599c62f0cc..9d231e9e5f0e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
| 10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
| 11 | #include <linux/irqnr.h> | 11 | #include <linux/irqnr.h> |
| 12 | #include <asm/cputime.h> | 12 | #include <linux/cputime.h> |
| 13 | #include <linux/tick.h> | 13 | #include <linux/tick.h> |
| 14 | 14 | ||
| 15 | #ifndef arch_irq_stat_cpu | 15 | #ifndef arch_irq_stat_cpu |
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 7141b8d0ca9e..33de567c25af 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #include <linux/seq_file.h> | 5 | #include <linux/seq_file.h> |
| 6 | #include <linux/time.h> | 6 | #include <linux/time.h> |
| 7 | #include <linux/kernel_stat.h> | 7 | #include <linux/kernel_stat.h> |
| 8 | #include <asm/cputime.h> | 8 | #include <linux/cputime.h> |
| 9 | 9 | ||
| 10 | static int uptime_proc_show(struct seq_file *m, void *v) | 10 | static int uptime_proc_show(struct seq_file *m, void *v) |
| 11 | { | 11 | { |
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 12823845d324..192297b0090d 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
| @@ -249,6 +249,7 @@ static void parse_options(char *options) | |||
| 249 | 249 | ||
| 250 | static int pstore_remount(struct super_block *sb, int *flags, char *data) | 250 | static int pstore_remount(struct super_block *sb, int *flags, char *data) |
| 251 | { | 251 | { |
| 252 | sync_filesystem(sb); | ||
| 252 | parse_options(data); | 253 | parse_options(data); |
| 253 | 254 | ||
| 254 | return 0; | 255 | return 0; |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 78c3c2097787..46d269e38706 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
| @@ -497,6 +497,7 @@ void pstore_get_records(int quiet) | |||
| 497 | big_oops_buf_sz); | 497 | big_oops_buf_sz); |
| 498 | 498 | ||
| 499 | if (unzipped_len > 0) { | 499 | if (unzipped_len > 0) { |
| 500 | kfree(buf); | ||
| 500 | buf = big_oops_buf; | 501 | buf = big_oops_buf; |
| 501 | size = unzipped_len; | 502 | size = unzipped_len; |
| 502 | compressed = false; | 503 | compressed = false; |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index fa8cef2cca3a..3b5744306ed8 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
| @@ -86,6 +86,7 @@ struct ramoops_context { | |||
| 86 | struct persistent_ram_ecc_info ecc_info; | 86 | struct persistent_ram_ecc_info ecc_info; |
| 87 | unsigned int max_dump_cnt; | 87 | unsigned int max_dump_cnt; |
| 88 | unsigned int dump_write_cnt; | 88 | unsigned int dump_write_cnt; |
| 89 | /* _read_cnt need clear on ramoops_pstore_open */ | ||
| 89 | unsigned int dump_read_cnt; | 90 | unsigned int dump_read_cnt; |
| 90 | unsigned int console_read_cnt; | 91 | unsigned int console_read_cnt; |
| 91 | unsigned int ftrace_read_cnt; | 92 | unsigned int ftrace_read_cnt; |
| @@ -101,6 +102,7 @@ static int ramoops_pstore_open(struct pstore_info *psi) | |||
| 101 | 102 | ||
| 102 | cxt->dump_read_cnt = 0; | 103 | cxt->dump_read_cnt = 0; |
| 103 | cxt->console_read_cnt = 0; | 104 | cxt->console_read_cnt = 0; |
| 105 | cxt->ftrace_read_cnt = 0; | ||
| 104 | return 0; | 106 | return 0; |
| 105 | } | 107 | } |
| 106 | 108 | ||
| @@ -117,13 +119,15 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, | |||
| 117 | return NULL; | 119 | return NULL; |
| 118 | 120 | ||
| 119 | prz = przs[i]; | 121 | prz = przs[i]; |
| 122 | if (!prz) | ||
| 123 | return NULL; | ||
| 120 | 124 | ||
| 121 | if (update) { | 125 | /* Update old/shadowed buffer. */ |
| 122 | /* Update old/shadowed buffer. */ | 126 | if (update) |
| 123 | persistent_ram_save_old(prz); | 127 | persistent_ram_save_old(prz); |
| 124 | if (!persistent_ram_old_size(prz)) | 128 | |
| 125 | return NULL; | 129 | if (!persistent_ram_old_size(prz)) |
| 126 | } | 130 | return NULL; |
| 127 | 131 | ||
| 128 | *typep = type; | 132 | *typep = type; |
| 129 | *id = i; | 133 | *id = i; |
| @@ -316,6 +320,7 @@ static void ramoops_free_przs(struct ramoops_context *cxt) | |||
| 316 | { | 320 | { |
| 317 | int i; | 321 | int i; |
| 318 | 322 | ||
| 323 | cxt->max_dump_cnt = 0; | ||
| 319 | if (!cxt->przs) | 324 | if (!cxt->przs) |
| 320 | return; | 325 | return; |
| 321 | 326 | ||
| @@ -346,7 +351,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, | |||
| 346 | GFP_KERNEL); | 351 | GFP_KERNEL); |
| 347 | if (!cxt->przs) { | 352 | if (!cxt->przs) { |
| 348 | dev_err(dev, "failed to initialize a prz array for dumps\n"); | 353 | dev_err(dev, "failed to initialize a prz array for dumps\n"); |
| 349 | return -ENOMEM; | 354 | goto fail_prz; |
| 350 | } | 355 | } |
| 351 | 356 | ||
| 352 | for (i = 0; i < cxt->max_dump_cnt; i++) { | 357 | for (i = 0; i < cxt->max_dump_cnt; i++) { |
| @@ -428,7 +433,6 @@ static int ramoops_probe(struct platform_device *pdev) | |||
| 428 | if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) | 433 | if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) |
| 429 | pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); | 434 | pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); |
| 430 | 435 | ||
| 431 | cxt->dump_read_cnt = 0; | ||
| 432 | cxt->size = pdata->mem_size; | 436 | cxt->size = pdata->mem_size; |
| 433 | cxt->phys_addr = pdata->mem_address; | 437 | cxt->phys_addr = pdata->mem_address; |
| 434 | cxt->record_size = pdata->record_size; | 438 | cxt->record_size = pdata->record_size; |
| @@ -505,7 +509,6 @@ fail_buf: | |||
| 505 | kfree(cxt->pstore.buf); | 509 | kfree(cxt->pstore.buf); |
| 506 | fail_clear: | 510 | fail_clear: |
| 507 | cxt->pstore.bufsize = 0; | 511 | cxt->pstore.bufsize = 0; |
| 508 | cxt->max_dump_cnt = 0; | ||
| 509 | fail_cnt: | 512 | fail_cnt: |
| 510 | kfree(cxt->fprz); | 513 | kfree(cxt->fprz); |
| 511 | fail_init_fprz: | 514 | fail_init_fprz: |
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index de272d426763..ff7e3d4df5a1 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c | |||
| @@ -54,7 +54,7 @@ static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a) | |||
| 54 | do { | 54 | do { |
| 55 | old = atomic_read(&prz->buffer->start); | 55 | old = atomic_read(&prz->buffer->start); |
| 56 | new = old + a; | 56 | new = old + a; |
| 57 | while (unlikely(new > prz->buffer_size)) | 57 | while (unlikely(new >= prz->buffer_size)) |
| 58 | new -= prz->buffer_size; | 58 | new -= prz->buffer_size; |
| 59 | } while (atomic_cmpxchg(&prz->buffer->start, old, new) != old); | 59 | } while (atomic_cmpxchg(&prz->buffer->start, old, new) != old); |
| 60 | 60 | ||
| @@ -91,7 +91,7 @@ static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a) | |||
| 91 | 91 | ||
| 92 | old = atomic_read(&prz->buffer->start); | 92 | old = atomic_read(&prz->buffer->start); |
| 93 | new = old + a; | 93 | new = old + a; |
| 94 | while (unlikely(new > prz->buffer_size)) | 94 | while (unlikely(new >= prz->buffer_size)) |
| 95 | new -= prz->buffer_size; | 95 | new -= prz->buffer_size; |
| 96 | atomic_set(&prz->buffer->start, new); | 96 | atomic_set(&prz->buffer->start, new); |
| 97 | 97 | ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 89558810381c..c4bcb778886e 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
| @@ -44,6 +44,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) | |||
| 44 | { | 44 | { |
| 45 | struct qnx4_sb_info *qs; | 45 | struct qnx4_sb_info *qs; |
| 46 | 46 | ||
| 47 | sync_filesystem(sb); | ||
| 47 | qs = qnx4_sb(sb); | 48 | qs = qnx4_sb(sb); |
| 48 | qs->Version = QNX4_VERSION; | 49 | qs->Version = QNX4_VERSION; |
| 49 | *flags |= MS_RDONLY; | 50 | *flags |= MS_RDONLY; |
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 8d941edfefa1..65cdaab3ed49 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c | |||
| @@ -55,6 +55,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) | |||
| 55 | 55 | ||
| 56 | static int qnx6_remount(struct super_block *sb, int *flags, char *data) | 56 | static int qnx6_remount(struct super_block *sb, int *flags, char *data) |
| 57 | { | 57 | { |
| 58 | sync_filesystem(sb); | ||
| 58 | *flags |= MS_RDONLY; | 59 | *flags |= MS_RDONLY; |
| 59 | return 0; | 60 | return 0; |
| 60 | } | 61 | } |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index cfc8dcc16043..9cd5f63715c0 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
| @@ -528,7 +528,7 @@ restart: | |||
| 528 | if (atomic_read(&dquot->dq_count)) { | 528 | if (atomic_read(&dquot->dq_count)) { |
| 529 | DEFINE_WAIT(wait); | 529 | DEFINE_WAIT(wait); |
| 530 | 530 | ||
| 531 | atomic_inc(&dquot->dq_count); | 531 | dqgrab(dquot); |
| 532 | prepare_to_wait(&dquot->dq_wait_unused, &wait, | 532 | prepare_to_wait(&dquot->dq_wait_unused, &wait, |
| 533 | TASK_UNINTERRUPTIBLE); | 533 | TASK_UNINTERRUPTIBLE); |
| 534 | spin_unlock(&dq_list_lock); | 534 | spin_unlock(&dq_list_lock); |
| @@ -632,7 +632,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type) | |||
| 632 | /* Now we have active dquot from which someone is | 632 | /* Now we have active dquot from which someone is |
| 633 | * holding reference so we can safely just increase | 633 | * holding reference so we can safely just increase |
| 634 | * use count */ | 634 | * use count */ |
| 635 | atomic_inc(&dquot->dq_count); | 635 | dqgrab(dquot); |
| 636 | spin_unlock(&dq_list_lock); | 636 | spin_unlock(&dq_list_lock); |
| 637 | dqstats_inc(DQST_LOOKUPS); | 637 | dqstats_inc(DQST_LOOKUPS); |
| 638 | err = sb->dq_op->write_dquot(dquot); | 638 | err = sb->dq_op->write_dquot(dquot); |
diff --git a/fs/read_write.c b/fs/read_write.c index edc5746a902a..31c6efa43183 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -264,10 +264,22 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) | |||
| 264 | } | 264 | } |
| 265 | EXPORT_SYMBOL(vfs_llseek); | 265 | EXPORT_SYMBOL(vfs_llseek); |
| 266 | 266 | ||
| 267 | static inline struct fd fdget_pos(int fd) | ||
| 268 | { | ||
| 269 | return __to_fd(__fdget_pos(fd)); | ||
| 270 | } | ||
| 271 | |||
| 272 | static inline void fdput_pos(struct fd f) | ||
| 273 | { | ||
| 274 | if (f.flags & FDPUT_POS_UNLOCK) | ||
| 275 | mutex_unlock(&f.file->f_pos_lock); | ||
| 276 | fdput(f); | ||
| 277 | } | ||
| 278 | |||
| 267 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) | 279 | SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) |
| 268 | { | 280 | { |
| 269 | off_t retval; | 281 | off_t retval; |
| 270 | struct fd f = fdget(fd); | 282 | struct fd f = fdget_pos(fd); |
| 271 | if (!f.file) | 283 | if (!f.file) |
| 272 | return -EBADF; | 284 | return -EBADF; |
| 273 | 285 | ||
| @@ -278,7 +290,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) | |||
| 278 | if (res != (loff_t)retval) | 290 | if (res != (loff_t)retval) |
| 279 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ | 291 | retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ |
| 280 | } | 292 | } |
| 281 | fdput(f); | 293 | fdput_pos(f); |
| 282 | return retval; | 294 | return retval; |
| 283 | } | 295 | } |
| 284 | 296 | ||
| @@ -295,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, | |||
| 295 | unsigned int, whence) | 307 | unsigned int, whence) |
| 296 | { | 308 | { |
| 297 | int retval; | 309 | int retval; |
| 298 | struct fd f = fdget(fd); | 310 | struct fd f = fdget_pos(fd); |
| 299 | loff_t offset; | 311 | loff_t offset; |
| 300 | 312 | ||
| 301 | if (!f.file) | 313 | if (!f.file) |
| @@ -315,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, | |||
| 315 | retval = 0; | 327 | retval = 0; |
| 316 | } | 328 | } |
| 317 | out_putf: | 329 | out_putf: |
| 318 | fdput(f); | 330 | fdput_pos(f); |
| 319 | return retval; | 331 | return retval; |
| 320 | } | 332 | } |
| 321 | #endif | 333 | #endif |
| @@ -498,7 +510,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) | |||
| 498 | 510 | ||
| 499 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) | 511 | SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) |
| 500 | { | 512 | { |
| 501 | struct fd f = fdget(fd); | 513 | struct fd f = fdget_pos(fd); |
| 502 | ssize_t ret = -EBADF; | 514 | ssize_t ret = -EBADF; |
| 503 | 515 | ||
| 504 | if (f.file) { | 516 | if (f.file) { |
| @@ -506,7 +518,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) | |||
| 506 | ret = vfs_read(f.file, buf, count, &pos); | 518 | ret = vfs_read(f.file, buf, count, &pos); |
| 507 | if (ret >= 0) | 519 | if (ret >= 0) |
| 508 | file_pos_write(f.file, pos); | 520 | file_pos_write(f.file, pos); |
| 509 | fdput(f); | 521 | fdput_pos(f); |
| 510 | } | 522 | } |
| 511 | return ret; | 523 | return ret; |
| 512 | } | 524 | } |
| @@ -514,7 +526,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) | |||
| 514 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | 526 | SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, |
| 515 | size_t, count) | 527 | size_t, count) |
| 516 | { | 528 | { |
| 517 | struct fd f = fdget(fd); | 529 | struct fd f = fdget_pos(fd); |
| 518 | ssize_t ret = -EBADF; | 530 | ssize_t ret = -EBADF; |
| 519 | 531 | ||
| 520 | if (f.file) { | 532 | if (f.file) { |
| @@ -522,7 +534,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | |||
| 522 | ret = vfs_write(f.file, buf, count, &pos); | 534 | ret = vfs_write(f.file, buf, count, &pos); |
| 523 | if (ret >= 0) | 535 | if (ret >= 0) |
| 524 | file_pos_write(f.file, pos); | 536 | file_pos_write(f.file, pos); |
| 525 | fdput(f); | 537 | fdput_pos(f); |
| 526 | } | 538 | } |
| 527 | 539 | ||
| 528 | return ret; | 540 | return ret; |
| @@ -797,7 +809,7 @@ EXPORT_SYMBOL(vfs_writev); | |||
| 797 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | 809 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, |
| 798 | unsigned long, vlen) | 810 | unsigned long, vlen) |
| 799 | { | 811 | { |
| 800 | struct fd f = fdget(fd); | 812 | struct fd f = fdget_pos(fd); |
| 801 | ssize_t ret = -EBADF; | 813 | ssize_t ret = -EBADF; |
| 802 | 814 | ||
| 803 | if (f.file) { | 815 | if (f.file) { |
| @@ -805,7 +817,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | |||
| 805 | ret = vfs_readv(f.file, vec, vlen, &pos); | 817 | ret = vfs_readv(f.file, vec, vlen, &pos); |
| 806 | if (ret >= 0) | 818 | if (ret >= 0) |
| 807 | file_pos_write(f.file, pos); | 819 | file_pos_write(f.file, pos); |
| 808 | fdput(f); | 820 | fdput_pos(f); |
| 809 | } | 821 | } |
| 810 | 822 | ||
| 811 | if (ret > 0) | 823 | if (ret > 0) |
| @@ -817,7 +829,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | |||
| 817 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | 829 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, |
| 818 | unsigned long, vlen) | 830 | unsigned long, vlen) |
| 819 | { | 831 | { |
| 820 | struct fd f = fdget(fd); | 832 | struct fd f = fdget_pos(fd); |
| 821 | ssize_t ret = -EBADF; | 833 | ssize_t ret = -EBADF; |
| 822 | 834 | ||
| 823 | if (f.file) { | 835 | if (f.file) { |
| @@ -825,7 +837,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | |||
| 825 | ret = vfs_writev(f.file, vec, vlen, &pos); | 837 | ret = vfs_writev(f.file, vec, vlen, &pos); |
| 826 | if (ret >= 0) | 838 | if (ret >= 0) |
| 827 | file_pos_write(f.file, pos); | 839 | file_pos_write(f.file, pos); |
| 828 | fdput(f); | 840 | fdput_pos(f); |
| 829 | } | 841 | } |
| 830 | 842 | ||
| 831 | if (ret > 0) | 843 | if (ret > 0) |
| @@ -968,7 +980,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, | |||
| 968 | const struct compat_iovec __user *,vec, | 980 | const struct compat_iovec __user *,vec, |
| 969 | compat_ulong_t, vlen) | 981 | compat_ulong_t, vlen) |
| 970 | { | 982 | { |
| 971 | struct fd f = fdget(fd); | 983 | struct fd f = fdget_pos(fd); |
| 972 | ssize_t ret; | 984 | ssize_t ret; |
| 973 | loff_t pos; | 985 | loff_t pos; |
| 974 | 986 | ||
| @@ -978,13 +990,13 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, | |||
| 978 | ret = compat_readv(f.file, vec, vlen, &pos); | 990 | ret = compat_readv(f.file, vec, vlen, &pos); |
| 979 | if (ret >= 0) | 991 | if (ret >= 0) |
| 980 | f.file->f_pos = pos; | 992 | f.file->f_pos = pos; |
| 981 | fdput(f); | 993 | fdput_pos(f); |
| 982 | return ret; | 994 | return ret; |
| 983 | } | 995 | } |
| 984 | 996 | ||
| 985 | COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | 997 | static long __compat_sys_preadv64(unsigned long fd, |
| 986 | const struct compat_iovec __user *,vec, | 998 | const struct compat_iovec __user *vec, |
| 987 | unsigned long, vlen, loff_t, pos) | 999 | unsigned long vlen, loff_t pos) |
| 988 | { | 1000 | { |
| 989 | struct fd f; | 1001 | struct fd f; |
| 990 | ssize_t ret; | 1002 | ssize_t ret; |
| @@ -1001,12 +1013,22 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | |||
| 1001 | return ret; | 1013 | return ret; |
| 1002 | } | 1014 | } |
| 1003 | 1015 | ||
| 1016 | #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 | ||
| 1017 | COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | ||
| 1018 | const struct compat_iovec __user *,vec, | ||
| 1019 | unsigned long, vlen, loff_t, pos) | ||
| 1020 | { | ||
| 1021 | return __compat_sys_preadv64(fd, vec, vlen, pos); | ||
| 1022 | } | ||
| 1023 | #endif | ||
| 1024 | |||
| 1004 | COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, | 1025 | COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, |
| 1005 | const struct compat_iovec __user *,vec, | 1026 | const struct compat_iovec __user *,vec, |
| 1006 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) | 1027 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
| 1007 | { | 1028 | { |
| 1008 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | 1029 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
| 1009 | return compat_sys_preadv64(fd, vec, vlen, pos); | 1030 | |
| 1031 | return __compat_sys_preadv64(fd, vec, vlen, pos); | ||
| 1010 | } | 1032 | } |
| 1011 | 1033 | ||
| 1012 | static size_t compat_writev(struct file *file, | 1034 | static size_t compat_writev(struct file *file, |
| @@ -1035,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, | |||
| 1035 | const struct compat_iovec __user *, vec, | 1057 | const struct compat_iovec __user *, vec, |
| 1036 | compat_ulong_t, vlen) | 1058 | compat_ulong_t, vlen) |
| 1037 | { | 1059 | { |
| 1038 | struct fd f = fdget(fd); | 1060 | struct fd f = fdget_pos(fd); |
| 1039 | ssize_t ret; | 1061 | ssize_t ret; |
| 1040 | loff_t pos; | 1062 | loff_t pos; |
| 1041 | 1063 | ||
| @@ -1045,13 +1067,13 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, | |||
| 1045 | ret = compat_writev(f.file, vec, vlen, &pos); | 1067 | ret = compat_writev(f.file, vec, vlen, &pos); |
| 1046 | if (ret >= 0) | 1068 | if (ret >= 0) |
| 1047 | f.file->f_pos = pos; | 1069 | f.file->f_pos = pos; |
| 1048 | fdput(f); | 1070 | fdput_pos(f); |
| 1049 | return ret; | 1071 | return ret; |
| 1050 | } | 1072 | } |
| 1051 | 1073 | ||
| 1052 | COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | 1074 | static long __compat_sys_pwritev64(unsigned long fd, |
| 1053 | const struct compat_iovec __user *,vec, | 1075 | const struct compat_iovec __user *vec, |
| 1054 | unsigned long, vlen, loff_t, pos) | 1076 | unsigned long vlen, loff_t pos) |
| 1055 | { | 1077 | { |
| 1056 | struct fd f; | 1078 | struct fd f; |
| 1057 | ssize_t ret; | 1079 | ssize_t ret; |
| @@ -1068,12 +1090,22 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | |||
| 1068 | return ret; | 1090 | return ret; |
| 1069 | } | 1091 | } |
| 1070 | 1092 | ||
| 1093 | #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 | ||
| 1094 | COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | ||
| 1095 | const struct compat_iovec __user *,vec, | ||
| 1096 | unsigned long, vlen, loff_t, pos) | ||
| 1097 | { | ||
| 1098 | return __compat_sys_pwritev64(fd, vec, vlen, pos); | ||
| 1099 | } | ||
| 1100 | #endif | ||
| 1101 | |||
| 1071 | COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, | 1102 | COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, |
| 1072 | const struct compat_iovec __user *,vec, | 1103 | const struct compat_iovec __user *,vec, |
| 1073 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) | 1104 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
| 1074 | { | 1105 | { |
| 1075 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | 1106 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
| 1076 | return compat_sys_pwritev64(fd, vec, vlen, pos); | 1107 | |
| 1108 | return __compat_sys_pwritev64(fd, vec, vlen, pos); | ||
| 1077 | } | 1109 | } |
| 1078 | #endif | 1110 | #endif |
| 1079 | 1111 | ||
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ad62bdbb451e..bc8b8009897d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -35,7 +35,7 @@ void reiserfs_evict_inode(struct inode *inode) | |||
| 35 | if (!inode->i_nlink && !is_bad_inode(inode)) | 35 | if (!inode->i_nlink && !is_bad_inode(inode)) |
| 36 | dquot_initialize(inode); | 36 | dquot_initialize(inode); |
| 37 | 37 | ||
| 38 | truncate_inode_pages(&inode->i_data, 0); | 38 | truncate_inode_pages_final(&inode->i_data); |
| 39 | if (inode->i_nlink) | 39 | if (inode->i_nlink) |
| 40 | goto no_delete; | 40 | goto no_delete; |
| 41 | 41 | ||
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 8d06adf89948..83d4eac8059a 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
| @@ -2831,6 +2831,7 @@ void reiserfs_init_alloc_options(struct super_block *s); | |||
| 2831 | */ | 2831 | */ |
| 2832 | __le32 reiserfs_choose_packing(struct inode *dir); | 2832 | __le32 reiserfs_choose_packing(struct inode *dir); |
| 2833 | 2833 | ||
| 2834 | void show_alloc_options(struct seq_file *seq, struct super_block *s); | ||
| 2834 | int reiserfs_init_bitmap_cache(struct super_block *sb); | 2835 | int reiserfs_init_bitmap_cache(struct super_block *sb); |
| 2835 | void reiserfs_free_bitmap_cache(struct super_block *sb); | 2836 | void reiserfs_free_bitmap_cache(struct super_block *sb); |
| 2836 | void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); | 2837 | void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2c803353f8ac..9fb20426005e 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
| @@ -62,7 +62,6 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) | |||
| 62 | 62 | ||
| 63 | static int reiserfs_remount(struct super_block *s, int *flags, char *data); | 63 | static int reiserfs_remount(struct super_block *s, int *flags, char *data); |
| 64 | static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); | 64 | static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); |
| 65 | void show_alloc_options(struct seq_file *seq, struct super_block *s); | ||
| 66 | 65 | ||
| 67 | static int reiserfs_sync_fs(struct super_block *s, int wait) | 66 | static int reiserfs_sync_fs(struct super_block *s, int wait) |
| 68 | { | 67 | { |
| @@ -597,7 +596,7 @@ static void init_once(void *foo) | |||
| 597 | inode_init_once(&ei->vfs_inode); | 596 | inode_init_once(&ei->vfs_inode); |
| 598 | } | 597 | } |
| 599 | 598 | ||
| 600 | static int init_inodecache(void) | 599 | static int __init init_inodecache(void) |
| 601 | { | 600 | { |
| 602 | reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", | 601 | reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", |
| 603 | sizeof(struct | 602 | sizeof(struct |
| @@ -1319,6 +1318,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
| 1319 | int i; | 1318 | int i; |
| 1320 | #endif | 1319 | #endif |
| 1321 | 1320 | ||
| 1321 | sync_filesystem(s); | ||
| 1322 | reiserfs_write_lock(s); | 1322 | reiserfs_write_lock(s); |
| 1323 | 1323 | ||
| 1324 | #ifdef CONFIG_QUOTA | 1324 | #ifdef CONFIG_QUOTA |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index d8418782862b..ef90e8bca95a 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
| @@ -432,6 +432,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 432 | */ | 432 | */ |
| 433 | static int romfs_remount(struct super_block *sb, int *flags, char *data) | 433 | static int romfs_remount(struct super_block *sb, int *flags, char *data) |
| 434 | { | 434 | { |
| 435 | sync_filesystem(sb); | ||
| 435 | *flags |= MS_RDONLY; | 436 | *flags |= MS_RDONLY; |
| 436 | return 0; | 437 | return 0; |
| 437 | } | 438 | } |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 202df6312d4e..031c8d67fd51 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
| @@ -371,6 +371,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 371 | 371 | ||
| 372 | static int squashfs_remount(struct super_block *sb, int *flags, char *data) | 372 | static int squashfs_remount(struct super_block *sb, int *flags, char *data) |
| 373 | { | 373 | { |
| 374 | sync_filesystem(sb); | ||
| 374 | *flags |= MS_RDONLY; | 375 | *flags |= MS_RDONLY; |
| 375 | return 0; | 376 | return 0; |
| 376 | } | 377 | } |
diff --git a/fs/super.c b/fs/super.c index 80d5cf2ca765..e9dc3c3fe159 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -719,8 +719,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
| 719 | } | 719 | } |
| 720 | } | 720 | } |
| 721 | 721 | ||
| 722 | sync_filesystem(sb); | ||
| 723 | |||
| 724 | if (sb->s_op->remount_fs) { | 722 | if (sb->s_op->remount_fs) { |
| 725 | retval = sb->s_op->remount_fs(sb, &flags, data); | 723 | retval = sb->s_op->remount_fs(sb, &flags, data); |
| 726 | if (retval) { | 724 | if (retval) { |
diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig index 8c41feacbac5..b2756014508c 100644 --- a/fs/sysfs/Kconfig +++ b/fs/sysfs/Kconfig | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | config SYSFS | 1 | config SYSFS |
| 2 | bool "sysfs file system support" if EXPERT | 2 | bool "sysfs file system support" if EXPERT |
| 3 | default y | 3 | default y |
| 4 | select KERNFS | ||
| 4 | help | 5 | help |
| 5 | The sysfs filesystem is a virtual filesystem that the kernel uses to | 6 | The sysfs filesystem is a virtual filesystem that the kernel uses to |
| 6 | export internal kernel objects, their attributes, and their | 7 | export internal kernel objects, their attributes, and their |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index ee0d761c3179..0b45ff42f374 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
| @@ -19,39 +19,18 @@ | |||
| 19 | 19 | ||
| 20 | DEFINE_SPINLOCK(sysfs_symlink_target_lock); | 20 | DEFINE_SPINLOCK(sysfs_symlink_target_lock); |
| 21 | 21 | ||
| 22 | /** | ||
| 23 | * sysfs_pathname - return full path to sysfs dirent | ||
| 24 | * @kn: kernfs_node whose path we want | ||
| 25 | * @path: caller allocated buffer of size PATH_MAX | ||
| 26 | * | ||
| 27 | * Gives the name "/" to the sysfs_root entry; any path returned | ||
| 28 | * is relative to wherever sysfs is mounted. | ||
| 29 | */ | ||
| 30 | static char *sysfs_pathname(struct kernfs_node *kn, char *path) | ||
| 31 | { | ||
| 32 | if (kn->parent) { | ||
| 33 | sysfs_pathname(kn->parent, path); | ||
| 34 | strlcat(path, "/", PATH_MAX); | ||
| 35 | } | ||
| 36 | strlcat(path, kn->name, PATH_MAX); | ||
| 37 | return path; | ||
| 38 | } | ||
| 39 | |||
| 40 | void sysfs_warn_dup(struct kernfs_node *parent, const char *name) | 22 | void sysfs_warn_dup(struct kernfs_node *parent, const char *name) |
| 41 | { | 23 | { |
| 42 | char *path; | 24 | char *buf, *path = NULL; |
| 43 | 25 | ||
| 44 | path = kzalloc(PATH_MAX, GFP_KERNEL); | 26 | buf = kzalloc(PATH_MAX, GFP_KERNEL); |
| 45 | if (path) { | 27 | if (buf) |
| 46 | sysfs_pathname(parent, path); | 28 | path = kernfs_path(parent, buf, PATH_MAX); |
| 47 | strlcat(path, "/", PATH_MAX); | ||
| 48 | strlcat(path, name, PATH_MAX); | ||
| 49 | } | ||
| 50 | 29 | ||
| 51 | WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", | 30 | WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n", |
| 52 | path ? path : name); | 31 | path, name); |
| 53 | 32 | ||
| 54 | kfree(path); | 33 | kfree(buf); |
| 55 | } | 34 | } |
| 56 | 35 | ||
| 57 | /** | 36 | /** |
| @@ -122,9 +101,13 @@ void sysfs_remove_dir(struct kobject *kobj) | |||
| 122 | int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, | 101 | int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, |
| 123 | const void *new_ns) | 102 | const void *new_ns) |
| 124 | { | 103 | { |
| 125 | struct kernfs_node *parent = kobj->sd->parent; | 104 | struct kernfs_node *parent; |
| 105 | int ret; | ||
| 126 | 106 | ||
| 127 | return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); | 107 | parent = kernfs_get_parent(kobj->sd); |
| 108 | ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); | ||
| 109 | kernfs_put(parent); | ||
| 110 | return ret; | ||
| 128 | } | 111 | } |
| 129 | 112 | ||
| 130 | int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, | 113 | int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, |
| @@ -133,7 +116,6 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, | |||
| 133 | struct kernfs_node *kn = kobj->sd; | 116 | struct kernfs_node *kn = kobj->sd; |
| 134 | struct kernfs_node *new_parent; | 117 | struct kernfs_node *new_parent; |
| 135 | 118 | ||
| 136 | BUG_ON(!kn->parent); | ||
| 137 | new_parent = new_parent_kobj && new_parent_kobj->sd ? | 119 | new_parent = new_parent_kobj && new_parent_kobj->sd ? |
| 138 | new_parent_kobj->sd : sysfs_root_kn; | 120 | new_parent_kobj->sd : sysfs_root_kn; |
| 139 | 121 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 810cf6e613e5..1b8b91b67fdb 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
| @@ -372,6 +372,29 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, | |||
| 372 | } | 372 | } |
| 373 | EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); | 373 | EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); |
| 374 | 374 | ||
| 375 | /** | ||
| 376 | * sysfs_remove_file_self - remove an object attribute from its own method | ||
| 377 | * @kobj: object we're acting for | ||
| 378 | * @attr: attribute descriptor | ||
| 379 | * | ||
| 380 | * See kernfs_remove_self() for details. | ||
| 381 | */ | ||
| 382 | bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) | ||
| 383 | { | ||
| 384 | struct kernfs_node *parent = kobj->sd; | ||
| 385 | struct kernfs_node *kn; | ||
| 386 | bool ret; | ||
| 387 | |||
| 388 | kn = kernfs_find_and_get(parent, attr->name); | ||
| 389 | if (WARN_ON_ONCE(!kn)) | ||
| 390 | return false; | ||
| 391 | |||
| 392 | ret = kernfs_remove_self(kn); | ||
| 393 | |||
| 394 | kernfs_put(kn); | ||
| 395 | return ret; | ||
| 396 | } | ||
| 397 | |||
| 375 | void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) | 398 | void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) |
| 376 | { | 399 | { |
| 377 | int i; | 400 | int i; |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 6b579387c67a..aa0406895b53 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
| @@ -70,8 +70,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, | |||
| 70 | if (grp->bin_attrs) { | 70 | if (grp->bin_attrs) { |
| 71 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { | 71 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { |
| 72 | if (update) | 72 | if (update) |
| 73 | sysfs_remove_bin_file(kobj, *bin_attr); | 73 | kernfs_remove_by_name(parent, |
| 74 | error = sysfs_create_bin_file(kobj, *bin_attr); | 74 | (*bin_attr)->attr.name); |
| 75 | error = sysfs_add_file_mode_ns(parent, | ||
| 76 | &(*bin_attr)->attr, true, | ||
| 77 | (*bin_attr)->attr.mode, NULL); | ||
| 75 | if (error) | 78 | if (error) |
| 76 | break; | 79 | break; |
| 77 | } | 80 | } |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 3eaf5c6622eb..a66ad6196f59 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
| @@ -63,7 +63,7 @@ int __init sysfs_init(void) | |||
| 63 | { | 63 | { |
| 64 | int err; | 64 | int err; |
| 65 | 65 | ||
| 66 | sysfs_root = kernfs_create_root(NULL, NULL); | 66 | sysfs_root = kernfs_create_root(NULL, 0, NULL); |
| 67 | if (IS_ERR(sysfs_root)) | 67 | if (IS_ERR(sysfs_root)) |
| 68 | return PTR_ERR(sysfs_root); | 68 | return PTR_ERR(sysfs_root); |
| 69 | 69 | ||
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index c327d4ee1235..88956309cc86 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
| @@ -60,6 +60,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) | |||
| 60 | { | 60 | { |
| 61 | struct sysv_sb_info *sbi = SYSV_SB(sb); | 61 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
| 62 | 62 | ||
| 63 | sync_filesystem(sb); | ||
| 63 | if (sbi->s_forced_ro) | 64 | if (sbi->s_forced_ro) |
| 64 | *flags |= MS_RDONLY; | 65 | *flags |= MS_RDONLY; |
| 65 | return 0; | 66 | return 0; |
| @@ -295,7 +296,7 @@ int sysv_sync_inode(struct inode *inode) | |||
| 295 | 296 | ||
| 296 | static void sysv_evict_inode(struct inode *inode) | 297 | static void sysv_evict_inode(struct inode *inode) |
| 297 | { | 298 | { |
| 298 | truncate_inode_pages(&inode->i_data, 0); | 299 | truncate_inode_pages_final(&inode->i_data); |
| 299 | if (!inode->i_nlink) { | 300 | if (!inode->i_nlink) { |
| 300 | inode->i_size = 0; | 301 | inode->i_size = 0; |
| 301 | sysv_truncate(inode); | 302 | sysv_truncate(inode); |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 929312180dd0..0013142c0475 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
| @@ -317,6 +317,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
| 317 | (clockid != CLOCK_MONOTONIC && | 317 | (clockid != CLOCK_MONOTONIC && |
| 318 | clockid != CLOCK_REALTIME && | 318 | clockid != CLOCK_REALTIME && |
| 319 | clockid != CLOCK_REALTIME_ALARM && | 319 | clockid != CLOCK_REALTIME_ALARM && |
| 320 | clockid != CLOCK_BOOTTIME && | ||
| 320 | clockid != CLOCK_BOOTTIME_ALARM)) | 321 | clockid != CLOCK_BOOTTIME_ALARM)) |
| 321 | return -EINVAL; | 322 | return -EINVAL; |
| 322 | 323 | ||
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5ded8490c0c6..a1266089eca1 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -351,7 +351,7 @@ static void ubifs_evict_inode(struct inode *inode) | |||
| 351 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); | 351 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); |
| 352 | ubifs_assert(!atomic_read(&inode->i_count)); | 352 | ubifs_assert(!atomic_read(&inode->i_count)); |
| 353 | 353 | ||
| 354 | truncate_inode_pages(&inode->i_data, 0); | 354 | truncate_inode_pages_final(&inode->i_data); |
| 355 | 355 | ||
| 356 | if (inode->i_nlink) | 356 | if (inode->i_nlink) |
| 357 | goto done; | 357 | goto done; |
| @@ -1827,6 +1827,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 1827 | int err; | 1827 | int err; |
| 1828 | struct ubifs_info *c = sb->s_fs_info; | 1828 | struct ubifs_info *c = sb->s_fs_info; |
| 1829 | 1829 | ||
| 1830 | sync_filesystem(sb); | ||
| 1830 | dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); | 1831 | dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); |
| 1831 | 1832 | ||
| 1832 | err = ubifs_parse_options(c, data, 1); | 1833 | err = ubifs_parse_options(c, data, 1); |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 982ce05c87ed..5d643706212f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
| @@ -146,8 +146,8 @@ void udf_evict_inode(struct inode *inode) | |||
| 146 | want_delete = 1; | 146 | want_delete = 1; |
| 147 | udf_setsize(inode, 0); | 147 | udf_setsize(inode, 0); |
| 148 | udf_update_inode(inode, IS_SYNC(inode)); | 148 | udf_update_inode(inode, IS_SYNC(inode)); |
| 149 | } else | 149 | } |
| 150 | truncate_inode_pages(&inode->i_data, 0); | 150 | truncate_inode_pages_final(&inode->i_data); |
| 151 | invalidate_inode_buffers(inode); | 151 | invalidate_inode_buffers(inode); |
| 152 | clear_inode(inode); | 152 | clear_inode(inode); |
| 153 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && | 153 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 3306b9f69bed..64f2b7334d08 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
| @@ -646,6 +646,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
| 646 | int error = 0; | 646 | int error = 0; |
| 647 | struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); | 647 | struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); |
| 648 | 648 | ||
| 649 | sync_filesystem(sb); | ||
| 649 | if (lvidiu) { | 650 | if (lvidiu) { |
| 650 | int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); | 651 | int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); |
| 651 | if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) | 652 | if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index c8ca96086784..61e8a9b021dd 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
| @@ -885,7 +885,7 @@ void ufs_evict_inode(struct inode * inode) | |||
| 885 | if (!inode->i_nlink && !is_bad_inode(inode)) | 885 | if (!inode->i_nlink && !is_bad_inode(inode)) |
| 886 | want_delete = 1; | 886 | want_delete = 1; |
| 887 | 887 | ||
| 888 | truncate_inode_pages(&inode->i_data, 0); | 888 | truncate_inode_pages_final(&inode->i_data); |
| 889 | if (want_delete) { | 889 | if (want_delete) { |
| 890 | loff_t old_i_size; | 890 | loff_t old_i_size; |
| 891 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ | 891 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 329f2f53b7ed..b8c6791f046f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
| @@ -1280,6 +1280,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
| 1280 | unsigned new_mount_opt, ufstype; | 1280 | unsigned new_mount_opt, ufstype; |
| 1281 | unsigned flags; | 1281 | unsigned flags; |
| 1282 | 1282 | ||
| 1283 | sync_filesystem(sb); | ||
| 1283 | lock_ufs(sb); | 1284 | lock_ufs(sb); |
| 1284 | mutex_lock(&UFS_SB(sb)->s_lock); | 1285 | mutex_lock(&UFS_SB(sb)->s_lock); |
| 1285 | uspi = UFS_SB(sb)->s_uspi; | 1286 | uspi = UFS_SB(sb)->s_uspi; |
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 66a36befc5c0..844e288b9576 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c | |||
| @@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) | |||
| 65 | void * | 65 | void * |
| 66 | kmem_zalloc_large(size_t size, xfs_km_flags_t flags) | 66 | kmem_zalloc_large(size_t size, xfs_km_flags_t flags) |
| 67 | { | 67 | { |
| 68 | unsigned noio_flag = 0; | ||
| 68 | void *ptr; | 69 | void *ptr; |
| 70 | gfp_t lflags; | ||
| 69 | 71 | ||
| 70 | ptr = kmem_zalloc(size, flags | KM_MAYFAIL); | 72 | ptr = kmem_zalloc(size, flags | KM_MAYFAIL); |
| 71 | if (ptr) | 73 | if (ptr) |
| 72 | return ptr; | 74 | return ptr; |
| 73 | return vzalloc(size); | 75 | |
| 76 | /* | ||
| 77 | * __vmalloc() will allocate data pages and auxillary structures (e.g. | ||
| 78 | * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context | ||
| 79 | * here. Hence we need to tell memory reclaim that we are in such a | ||
| 80 | * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering | ||
| 81 | * the filesystem here and potentially deadlocking. | ||
| 82 | */ | ||
| 83 | if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) | ||
| 84 | noio_flag = memalloc_noio_save(); | ||
| 85 | |||
| 86 | lflags = kmem_flags_convert(flags); | ||
| 87 | ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); | ||
| 88 | |||
| 89 | if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) | ||
| 90 | memalloc_noio_restore(noio_flag); | ||
| 91 | |||
| 92 | return ptr; | ||
| 74 | } | 93 | } |
| 75 | 94 | ||
| 76 | void | 95 | void |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 0ecec1896f25..6888ad886ff6 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
| @@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
| 281 | if (!acl) | 281 | if (!acl) |
| 282 | goto set_acl; | 282 | goto set_acl; |
| 283 | 283 | ||
| 284 | error = -EINVAL; | 284 | error = -E2BIG; |
| 285 | if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) | 285 | if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) |
| 286 | return error; | 286 | return error; |
| 287 | 287 | ||
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 3fc109819c34..0fdd4109c624 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
| @@ -89,6 +89,8 @@ typedef struct xfs_agf { | |||
| 89 | /* structure must be padded to 64 bit alignment */ | 89 | /* structure must be padded to 64 bit alignment */ |
| 90 | } xfs_agf_t; | 90 | } xfs_agf_t; |
| 91 | 91 | ||
| 92 | #define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) | ||
| 93 | |||
| 92 | #define XFS_AGF_MAGICNUM 0x00000001 | 94 | #define XFS_AGF_MAGICNUM 0x00000001 |
| 93 | #define XFS_AGF_VERSIONNUM 0x00000002 | 95 | #define XFS_AGF_VERSIONNUM 0x00000002 |
| 94 | #define XFS_AGF_SEQNO 0x00000004 | 96 | #define XFS_AGF_SEQNO 0x00000004 |
| @@ -167,6 +169,8 @@ typedef struct xfs_agi { | |||
| 167 | /* structure must be padded to 64 bit alignment */ | 169 | /* structure must be padded to 64 bit alignment */ |
| 168 | } xfs_agi_t; | 170 | } xfs_agi_t; |
| 169 | 171 | ||
| 172 | #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) | ||
| 173 | |||
| 170 | #define XFS_AGI_MAGICNUM 0x00000001 | 174 | #define XFS_AGI_MAGICNUM 0x00000001 |
| 171 | #define XFS_AGI_VERSIONNUM 0x00000002 | 175 | #define XFS_AGI_VERSIONNUM 0x00000002 |
| 172 | #define XFS_AGI_SEQNO 0x00000004 | 176 | #define XFS_AGI_SEQNO 0x00000004 |
| @@ -222,6 +226,8 @@ typedef struct xfs_agfl { | |||
| 222 | __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ | 226 | __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ |
| 223 | } xfs_agfl_t; | 227 | } xfs_agfl_t; |
| 224 | 228 | ||
| 229 | #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) | ||
| 230 | |||
| 225 | /* | 231 | /* |
| 226 | * tags for inode radix tree | 232 | * tags for inode radix tree |
| 227 | */ | 233 | */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 9eab2dfdcbb5..c1cf6a336a72 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
| @@ -474,7 +474,6 @@ xfs_agfl_read_verify( | |||
| 474 | struct xfs_buf *bp) | 474 | struct xfs_buf *bp) |
| 475 | { | 475 | { |
| 476 | struct xfs_mount *mp = bp->b_target->bt_mount; | 476 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 477 | int agfl_ok = 1; | ||
| 478 | 477 | ||
| 479 | /* | 478 | /* |
| 480 | * There is no verification of non-crc AGFLs because mkfs does not | 479 | * There is no verification of non-crc AGFLs because mkfs does not |
| @@ -485,15 +484,13 @@ xfs_agfl_read_verify( | |||
| 485 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 484 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
| 486 | return; | 485 | return; |
| 487 | 486 | ||
| 488 | agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 487 | if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) |
| 489 | offsetof(struct xfs_agfl, agfl_crc)); | 488 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 490 | 489 | else if (!xfs_agfl_verify(bp)) | |
| 491 | agfl_ok = agfl_ok && xfs_agfl_verify(bp); | ||
| 492 | |||
| 493 | if (!agfl_ok) { | ||
| 494 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 495 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 490 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 496 | } | 491 | |
| 492 | if (bp->b_error) | ||
| 493 | xfs_verifier_error(bp); | ||
| 497 | } | 494 | } |
| 498 | 495 | ||
| 499 | static void | 496 | static void |
| @@ -508,16 +505,15 @@ xfs_agfl_write_verify( | |||
| 508 | return; | 505 | return; |
| 509 | 506 | ||
| 510 | if (!xfs_agfl_verify(bp)) { | 507 | if (!xfs_agfl_verify(bp)) { |
| 511 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 512 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 508 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 509 | xfs_verifier_error(bp); | ||
| 513 | return; | 510 | return; |
| 514 | } | 511 | } |
| 515 | 512 | ||
| 516 | if (bip) | 513 | if (bip) |
| 517 | XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 514 | XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 518 | 515 | ||
| 519 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 516 | xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); |
| 520 | offsetof(struct xfs_agfl, agfl_crc)); | ||
| 521 | } | 517 | } |
| 522 | 518 | ||
| 523 | const struct xfs_buf_ops xfs_agfl_buf_ops = { | 519 | const struct xfs_buf_ops xfs_agfl_buf_ops = { |
| @@ -2238,19 +2234,17 @@ xfs_agf_read_verify( | |||
| 2238 | struct xfs_buf *bp) | 2234 | struct xfs_buf *bp) |
| 2239 | { | 2235 | { |
| 2240 | struct xfs_mount *mp = bp->b_target->bt_mount; | 2236 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 2241 | int agf_ok = 1; | ||
| 2242 | |||
| 2243 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
| 2244 | agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
| 2245 | offsetof(struct xfs_agf, agf_crc)); | ||
| 2246 | 2237 | ||
| 2247 | agf_ok = agf_ok && xfs_agf_verify(mp, bp); | 2238 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
| 2248 | 2239 | !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) | |
| 2249 | if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, | 2240 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 2250 | XFS_RANDOM_ALLOC_READ_AGF))) { | 2241 | else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, |
| 2251 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 2242 | XFS_ERRTAG_ALLOC_READ_AGF, |
| 2243 | XFS_RANDOM_ALLOC_READ_AGF)) | ||
| 2252 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 2244 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 2253 | } | 2245 | |
| 2246 | if (bp->b_error) | ||
| 2247 | xfs_verifier_error(bp); | ||
| 2254 | } | 2248 | } |
| 2255 | 2249 | ||
| 2256 | static void | 2250 | static void |
| @@ -2261,8 +2255,8 @@ xfs_agf_write_verify( | |||
| 2261 | struct xfs_buf_log_item *bip = bp->b_fspriv; | 2255 | struct xfs_buf_log_item *bip = bp->b_fspriv; |
| 2262 | 2256 | ||
| 2263 | if (!xfs_agf_verify(mp, bp)) { | 2257 | if (!xfs_agf_verify(mp, bp)) { |
| 2264 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 2265 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 2258 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 2259 | xfs_verifier_error(bp); | ||
| 2266 | return; | 2260 | return; |
| 2267 | } | 2261 | } |
| 2268 | 2262 | ||
| @@ -2272,8 +2266,7 @@ xfs_agf_write_verify( | |||
| 2272 | if (bip) | 2266 | if (bip) |
| 2273 | XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 2267 | XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 2274 | 2268 | ||
| 2275 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 2269 | xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); |
| 2276 | offsetof(struct xfs_agf, agf_crc)); | ||
| 2277 | } | 2270 | } |
| 2278 | 2271 | ||
| 2279 | const struct xfs_buf_ops xfs_agf_buf_ops = { | 2272 | const struct xfs_buf_ops xfs_agf_buf_ops = { |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 13085429e523..cc1eadcbb049 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
| @@ -355,12 +355,14 @@ static void | |||
| 355 | xfs_allocbt_read_verify( | 355 | xfs_allocbt_read_verify( |
| 356 | struct xfs_buf *bp) | 356 | struct xfs_buf *bp) |
| 357 | { | 357 | { |
| 358 | if (!(xfs_btree_sblock_verify_crc(bp) && | 358 | if (!xfs_btree_sblock_verify_crc(bp)) |
| 359 | xfs_allocbt_verify(bp))) { | 359 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 360 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 360 | else if (!xfs_allocbt_verify(bp)) |
| 361 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 362 | bp->b_target->bt_mount, bp->b_addr); | ||
| 363 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 361 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 362 | |||
| 363 | if (bp->b_error) { | ||
| 364 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
| 365 | xfs_verifier_error(bp); | ||
| 364 | } | 366 | } |
| 365 | } | 367 | } |
| 366 | 368 | ||
| @@ -370,9 +372,9 @@ xfs_allocbt_write_verify( | |||
| 370 | { | 372 | { |
| 371 | if (!xfs_allocbt_verify(bp)) { | 373 | if (!xfs_allocbt_verify(bp)) { |
| 372 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 374 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
| 373 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 374 | bp->b_target->bt_mount, bp->b_addr); | ||
| 375 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 375 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 376 | xfs_verifier_error(bp); | ||
| 377 | return; | ||
| 376 | } | 378 | } |
| 377 | xfs_btree_sblock_calc_crc(bp); | 379 | xfs_btree_sblock_calc_crc(bp); |
| 378 | 380 | ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index db2cfb067d0b..75df77d09f75 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -632,38 +632,46 @@ xfs_map_at_offset( | |||
| 632 | } | 632 | } |
| 633 | 633 | ||
| 634 | /* | 634 | /* |
| 635 | * Test if a given page is suitable for writing as part of an unwritten | 635 | * Test if a given page contains at least one buffer of a given @type. |
| 636 | * or delayed allocate extent. | 636 | * If @check_all_buffers is true, then we walk all the buffers in the page to |
| 637 | * try to find one of the type passed in. If it is not set, then the caller only | ||
| 638 | * needs to check the first buffer on the page for a match. | ||
| 637 | */ | 639 | */ |
| 638 | STATIC int | 640 | STATIC bool |
| 639 | xfs_check_page_type( | 641 | xfs_check_page_type( |
| 640 | struct page *page, | 642 | struct page *page, |
| 641 | unsigned int type) | 643 | unsigned int type, |
| 644 | bool check_all_buffers) | ||
| 642 | { | 645 | { |
| 643 | if (PageWriteback(page)) | 646 | struct buffer_head *bh; |
| 644 | return 0; | 647 | struct buffer_head *head; |
| 645 | 648 | ||
| 646 | if (page->mapping && page_has_buffers(page)) { | 649 | if (PageWriteback(page)) |
| 647 | struct buffer_head *bh, *head; | 650 | return false; |
| 648 | int acceptable = 0; | 651 | if (!page->mapping) |
| 652 | return false; | ||
| 653 | if (!page_has_buffers(page)) | ||
| 654 | return false; | ||
| 649 | 655 | ||
| 650 | bh = head = page_buffers(page); | 656 | bh = head = page_buffers(page); |
| 651 | do { | 657 | do { |
| 652 | if (buffer_unwritten(bh)) | 658 | if (buffer_unwritten(bh)) { |
| 653 | acceptable += (type == XFS_IO_UNWRITTEN); | 659 | if (type == XFS_IO_UNWRITTEN) |
| 654 | else if (buffer_delay(bh)) | 660 | return true; |
| 655 | acceptable += (type == XFS_IO_DELALLOC); | 661 | } else if (buffer_delay(bh)) { |
| 656 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 662 | if (type == XFS_IO_DELALLOC) |
| 657 | acceptable += (type == XFS_IO_OVERWRITE); | 663 | return true; |
| 658 | else | 664 | } else if (buffer_dirty(bh) && buffer_mapped(bh)) { |
| 659 | break; | 665 | if (type == XFS_IO_OVERWRITE) |
| 660 | } while ((bh = bh->b_this_page) != head); | 666 | return true; |
| 667 | } | ||
| 661 | 668 | ||
| 662 | if (acceptable) | 669 | /* If we are only checking the first buffer, we are done now. */ |
| 663 | return 1; | 670 | if (!check_all_buffers) |
| 664 | } | 671 | break; |
| 672 | } while ((bh = bh->b_this_page) != head); | ||
| 665 | 673 | ||
| 666 | return 0; | 674 | return false; |
| 667 | } | 675 | } |
| 668 | 676 | ||
| 669 | /* | 677 | /* |
| @@ -697,7 +705,7 @@ xfs_convert_page( | |||
| 697 | goto fail_unlock_page; | 705 | goto fail_unlock_page; |
| 698 | if (page->mapping != inode->i_mapping) | 706 | if (page->mapping != inode->i_mapping) |
| 699 | goto fail_unlock_page; | 707 | goto fail_unlock_page; |
| 700 | if (!xfs_check_page_type(page, (*ioendp)->io_type)) | 708 | if (!xfs_check_page_type(page, (*ioendp)->io_type, false)) |
| 701 | goto fail_unlock_page; | 709 | goto fail_unlock_page; |
| 702 | 710 | ||
| 703 | /* | 711 | /* |
| @@ -742,6 +750,15 @@ xfs_convert_page( | |||
| 742 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | 750 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; |
| 743 | page_dirty = p_offset / len; | 751 | page_dirty = p_offset / len; |
| 744 | 752 | ||
| 753 | /* | ||
| 754 | * The moment we find a buffer that doesn't match our current type | ||
| 755 | * specification or can't be written, abort the loop and start | ||
| 756 | * writeback. As per the above xfs_imap_valid() check, only | ||
| 757 | * xfs_vm_writepage() can handle partial page writeback fully - we are | ||
| 758 | * limited here to the buffers that are contiguous with the current | ||
| 759 | * ioend, and hence a buffer we can't write breaks that contiguity and | ||
| 760 | * we have to defer the rest of the IO to xfs_vm_writepage(). | ||
| 761 | */ | ||
| 745 | bh = head = page_buffers(page); | 762 | bh = head = page_buffers(page); |
| 746 | do { | 763 | do { |
| 747 | if (offset >= end_offset) | 764 | if (offset >= end_offset) |
| @@ -750,7 +767,7 @@ xfs_convert_page( | |||
| 750 | uptodate = 0; | 767 | uptodate = 0; |
| 751 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { | 768 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { |
| 752 | done = 1; | 769 | done = 1; |
| 753 | continue; | 770 | break; |
| 754 | } | 771 | } |
| 755 | 772 | ||
| 756 | if (buffer_unwritten(bh) || buffer_delay(bh) || | 773 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
| @@ -762,10 +779,11 @@ xfs_convert_page( | |||
| 762 | else | 779 | else |
| 763 | type = XFS_IO_OVERWRITE; | 780 | type = XFS_IO_OVERWRITE; |
| 764 | 781 | ||
| 765 | if (!xfs_imap_valid(inode, imap, offset)) { | 782 | /* |
| 766 | done = 1; | 783 | * imap should always be valid because of the above |
| 767 | continue; | 784 | * partial page end_offset check on the imap. |
| 768 | } | 785 | */ |
| 786 | ASSERT(xfs_imap_valid(inode, imap, offset)); | ||
| 769 | 787 | ||
| 770 | lock_buffer(bh); | 788 | lock_buffer(bh); |
| 771 | if (type != XFS_IO_OVERWRITE) | 789 | if (type != XFS_IO_OVERWRITE) |
| @@ -777,6 +795,7 @@ xfs_convert_page( | |||
| 777 | count++; | 795 | count++; |
| 778 | } else { | 796 | } else { |
| 779 | done = 1; | 797 | done = 1; |
| 798 | break; | ||
| 780 | } | 799 | } |
| 781 | } while (offset += len, (bh = bh->b_this_page) != head); | 800 | } while (offset += len, (bh = bh->b_this_page) != head); |
| 782 | 801 | ||
| @@ -868,7 +887,7 @@ xfs_aops_discard_page( | |||
| 868 | struct buffer_head *bh, *head; | 887 | struct buffer_head *bh, *head; |
| 869 | loff_t offset = page_offset(page); | 888 | loff_t offset = page_offset(page); |
| 870 | 889 | ||
| 871 | if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) | 890 | if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) |
| 872 | goto out_invalidate; | 891 | goto out_invalidate; |
| 873 | 892 | ||
| 874 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 893 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| @@ -1441,7 +1460,8 @@ xfs_vm_direct_IO( | |||
| 1441 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1460 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
| 1442 | offset, nr_segs, | 1461 | offset, nr_segs, |
| 1443 | xfs_get_blocks_direct, | 1462 | xfs_get_blocks_direct, |
| 1444 | xfs_end_io_direct_write, NULL, 0); | 1463 | xfs_end_io_direct_write, NULL, |
| 1464 | DIO_ASYNC_EXTEND); | ||
| 1445 | if (ret != -EIOCBQUEUED && iocb->private) | 1465 | if (ret != -EIOCBQUEUED && iocb->private) |
| 1446 | goto out_destroy_ioend; | 1466 | goto out_destroy_ioend; |
| 1447 | } else { | 1467 | } else { |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 7b126f46a2f9..fe9587fab17a 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
| @@ -213,8 +213,8 @@ xfs_attr3_leaf_write_verify( | |||
| 213 | struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; | 213 | struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; |
| 214 | 214 | ||
| 215 | if (!xfs_attr3_leaf_verify(bp)) { | 215 | if (!xfs_attr3_leaf_verify(bp)) { |
| 216 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 217 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 216 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 217 | xfs_verifier_error(bp); | ||
| 218 | return; | 218 | return; |
| 219 | } | 219 | } |
| 220 | 220 | ||
| @@ -224,7 +224,7 @@ xfs_attr3_leaf_write_verify( | |||
| 224 | if (bip) | 224 | if (bip) |
| 225 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | 225 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 226 | 226 | ||
| 227 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); | 227 | xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | /* | 230 | /* |
| @@ -239,13 +239,14 @@ xfs_attr3_leaf_read_verify( | |||
| 239 | { | 239 | { |
| 240 | struct xfs_mount *mp = bp->b_target->bt_mount; | 240 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 241 | 241 | ||
| 242 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 242 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
| 243 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 243 | !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) |
| 244 | XFS_ATTR3_LEAF_CRC_OFF)) || | 244 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 245 | !xfs_attr3_leaf_verify(bp)) { | 245 | else if (!xfs_attr3_leaf_verify(bp)) |
| 246 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 247 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 246 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 248 | } | 247 | |
| 248 | if (bp->b_error) | ||
| 249 | xfs_verifier_error(bp); | ||
| 249 | } | 250 | } |
| 250 | 251 | ||
| 251 | const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { | 252 | const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { |
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 5549d69ddb45..6e37823e2932 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c | |||
| @@ -125,7 +125,6 @@ xfs_attr3_rmt_read_verify( | |||
| 125 | struct xfs_mount *mp = bp->b_target->bt_mount; | 125 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 126 | char *ptr; | 126 | char *ptr; |
| 127 | int len; | 127 | int len; |
| 128 | bool corrupt = false; | ||
| 129 | xfs_daddr_t bno; | 128 | xfs_daddr_t bno; |
| 130 | 129 | ||
| 131 | /* no verification of non-crc buffers */ | 130 | /* no verification of non-crc buffers */ |
| @@ -140,11 +139,11 @@ xfs_attr3_rmt_read_verify( | |||
| 140 | while (len > 0) { | 139 | while (len > 0) { |
| 141 | if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), | 140 | if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), |
| 142 | XFS_ATTR3_RMT_CRC_OFF)) { | 141 | XFS_ATTR3_RMT_CRC_OFF)) { |
| 143 | corrupt = true; | 142 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 144 | break; | 143 | break; |
| 145 | } | 144 | } |
| 146 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { | 145 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { |
| 147 | corrupt = true; | 146 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 148 | break; | 147 | break; |
| 149 | } | 148 | } |
| 150 | len -= XFS_LBSIZE(mp); | 149 | len -= XFS_LBSIZE(mp); |
| @@ -152,10 +151,9 @@ xfs_attr3_rmt_read_verify( | |||
| 152 | bno += mp->m_bsize; | 151 | bno += mp->m_bsize; |
| 153 | } | 152 | } |
| 154 | 153 | ||
| 155 | if (corrupt) { | 154 | if (bp->b_error) |
| 156 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 155 | xfs_verifier_error(bp); |
| 157 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 156 | else |
| 158 | } else | ||
| 159 | ASSERT(len == 0); | 157 | ASSERT(len == 0); |
| 160 | } | 158 | } |
| 161 | 159 | ||
| @@ -180,9 +178,8 @@ xfs_attr3_rmt_write_verify( | |||
| 180 | 178 | ||
| 181 | while (len > 0) { | 179 | while (len > 0) { |
| 182 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { | 180 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { |
| 183 | XFS_CORRUPTION_ERROR(__func__, | ||
| 184 | XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 185 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 181 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 182 | xfs_verifier_error(bp); | ||
| 186 | return; | 183 | return; |
| 187 | } | 184 | } |
| 188 | if (bip) { | 185 | if (bip) { |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 152543c4ca70..5b6092ef51ef 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
| @@ -5378,3 +5378,196 @@ error0: | |||
| 5378 | } | 5378 | } |
| 5379 | return error; | 5379 | return error; |
| 5380 | } | 5380 | } |
| 5381 | |||
| 5382 | /* | ||
| 5383 | * Shift extent records to the left to cover a hole. | ||
| 5384 | * | ||
| 5385 | * The maximum number of extents to be shifted in a single operation | ||
| 5386 | * is @num_exts, and @current_ext keeps track of the current extent | ||
| 5387 | * index we have shifted. @offset_shift_fsb is the length by which each | ||
| 5388 | * extent is shifted. If there is no hole to shift the extents | ||
| 5389 | * into, this will be considered invalid operation and we abort immediately. | ||
| 5390 | */ | ||
| 5391 | int | ||
| 5392 | xfs_bmap_shift_extents( | ||
| 5393 | struct xfs_trans *tp, | ||
| 5394 | struct xfs_inode *ip, | ||
| 5395 | int *done, | ||
| 5396 | xfs_fileoff_t start_fsb, | ||
| 5397 | xfs_fileoff_t offset_shift_fsb, | ||
| 5398 | xfs_extnum_t *current_ext, | ||
| 5399 | xfs_fsblock_t *firstblock, | ||
| 5400 | struct xfs_bmap_free *flist, | ||
| 5401 | int num_exts) | ||
| 5402 | { | ||
| 5403 | struct xfs_btree_cur *cur; | ||
| 5404 | struct xfs_bmbt_rec_host *gotp; | ||
| 5405 | struct xfs_bmbt_irec got; | ||
| 5406 | struct xfs_bmbt_irec left; | ||
| 5407 | struct xfs_mount *mp = ip->i_mount; | ||
| 5408 | struct xfs_ifork *ifp; | ||
| 5409 | xfs_extnum_t nexts = 0; | ||
| 5410 | xfs_fileoff_t startoff; | ||
| 5411 | int error = 0; | ||
| 5412 | int i; | ||
| 5413 | int whichfork = XFS_DATA_FORK; | ||
| 5414 | int logflags; | ||
| 5415 | xfs_filblks_t blockcount = 0; | ||
| 5416 | |||
| 5417 | if (unlikely(XFS_TEST_ERROR( | ||
| 5418 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
| 5419 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), | ||
| 5420 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
| 5421 | XFS_ERROR_REPORT("xfs_bmap_shift_extents", | ||
| 5422 | XFS_ERRLEVEL_LOW, mp); | ||
| 5423 | return XFS_ERROR(EFSCORRUPTED); | ||
| 5424 | } | ||
| 5425 | |||
| 5426 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 5427 | return XFS_ERROR(EIO); | ||
| 5428 | |||
| 5429 | ASSERT(current_ext != NULL); | ||
| 5430 | |||
| 5431 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
| 5432 | |||
| 5433 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
| 5434 | /* Read in all the extents */ | ||
| 5435 | error = xfs_iread_extents(tp, ip, whichfork); | ||
| 5436 | if (error) | ||
| 5437 | return error; | ||
| 5438 | } | ||
| 5439 | |||
| 5440 | /* | ||
| 5441 | * If *current_ext is 0, we would need to lookup the extent | ||
| 5442 | * from where we would start shifting and store it in gotp. | ||
| 5443 | */ | ||
| 5444 | if (!*current_ext) { | ||
| 5445 | gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); | ||
| 5446 | /* | ||
| 5447 | * gotp can be null in 2 cases: 1) if there are no extents | ||
| 5448 | * or 2) start_fsb lies in a hole beyond which there are | ||
| 5449 | * no extents. Either way, we are done. | ||
| 5450 | */ | ||
| 5451 | if (!gotp) { | ||
| 5452 | *done = 1; | ||
| 5453 | return 0; | ||
| 5454 | } | ||
| 5455 | } | ||
| 5456 | |||
| 5457 | /* We are going to change core inode */ | ||
| 5458 | logflags = XFS_ILOG_CORE; | ||
| 5459 | |||
| 5460 | if (ifp->if_flags & XFS_IFBROOT) { | ||
| 5461 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | ||
| 5462 | cur->bc_private.b.firstblock = *firstblock; | ||
| 5463 | cur->bc_private.b.flist = flist; | ||
| 5464 | cur->bc_private.b.flags = 0; | ||
| 5465 | } else { | ||
| 5466 | cur = NULL; | ||
| 5467 | logflags |= XFS_ILOG_DEXT; | ||
| 5468 | } | ||
| 5469 | |||
| 5470 | while (nexts++ < num_exts && | ||
| 5471 | *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) { | ||
| 5472 | |||
| 5473 | gotp = xfs_iext_get_ext(ifp, *current_ext); | ||
| 5474 | xfs_bmbt_get_all(gotp, &got); | ||
| 5475 | startoff = got.br_startoff - offset_shift_fsb; | ||
| 5476 | |||
| 5477 | /* | ||
| 5478 | * Before shifting extent into hole, make sure that the hole | ||
| 5479 | * is large enough to accomodate the shift. | ||
| 5480 | */ | ||
| 5481 | if (*current_ext) { | ||
| 5482 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, | ||
| 5483 | *current_ext - 1), &left); | ||
| 5484 | |||
| 5485 | if (startoff < left.br_startoff + left.br_blockcount) | ||
| 5486 | error = XFS_ERROR(EINVAL); | ||
| 5487 | } else if (offset_shift_fsb > got.br_startoff) { | ||
| 5488 | /* | ||
| 5489 | * When first extent is shifted, offset_shift_fsb | ||
| 5490 | * should be less than the stating offset of | ||
| 5491 | * the first extent. | ||
| 5492 | */ | ||
| 5493 | error = XFS_ERROR(EINVAL); | ||
| 5494 | } | ||
| 5495 | |||
| 5496 | if (error) | ||
| 5497 | goto del_cursor; | ||
| 5498 | |||
| 5499 | if (cur) { | ||
| 5500 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
| 5501 | got.br_startblock, | ||
| 5502 | got.br_blockcount, | ||
| 5503 | &i); | ||
| 5504 | if (error) | ||
| 5505 | goto del_cursor; | ||
| 5506 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
| 5507 | } | ||
| 5508 | |||
| 5509 | /* Check if we can merge 2 adjacent extents */ | ||
| 5510 | if (*current_ext && | ||
| 5511 | left.br_startoff + left.br_blockcount == startoff && | ||
| 5512 | left.br_startblock + left.br_blockcount == | ||
| 5513 | got.br_startblock && | ||
| 5514 | left.br_state == got.br_state && | ||
| 5515 | left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { | ||
| 5516 | blockcount = left.br_blockcount + | ||
| 5517 | got.br_blockcount; | ||
| 5518 | xfs_iext_remove(ip, *current_ext, 1, 0); | ||
| 5519 | if (cur) { | ||
| 5520 | error = xfs_btree_delete(cur, &i); | ||
| 5521 | if (error) | ||
| 5522 | goto del_cursor; | ||
| 5523 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
| 5524 | } | ||
| 5525 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
| 5526 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
| 5527 | gotp = xfs_iext_get_ext(ifp, --*current_ext); | ||
| 5528 | xfs_bmbt_get_all(gotp, &got); | ||
| 5529 | |||
| 5530 | /* Make cursor point to the extent we will update */ | ||
| 5531 | if (cur) { | ||
| 5532 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
| 5533 | got.br_startblock, | ||
| 5534 | got.br_blockcount, | ||
| 5535 | &i); | ||
| 5536 | if (error) | ||
| 5537 | goto del_cursor; | ||
| 5538 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
| 5539 | } | ||
| 5540 | |||
| 5541 | xfs_bmbt_set_blockcount(gotp, blockcount); | ||
| 5542 | got.br_blockcount = blockcount; | ||
| 5543 | } else { | ||
| 5544 | /* We have to update the startoff */ | ||
| 5545 | xfs_bmbt_set_startoff(gotp, startoff); | ||
| 5546 | got.br_startoff = startoff; | ||
| 5547 | } | ||
| 5548 | |||
| 5549 | if (cur) { | ||
| 5550 | error = xfs_bmbt_update(cur, got.br_startoff, | ||
| 5551 | got.br_startblock, | ||
| 5552 | got.br_blockcount, | ||
| 5553 | got.br_state); | ||
| 5554 | if (error) | ||
| 5555 | goto del_cursor; | ||
| 5556 | } | ||
| 5557 | |||
| 5558 | (*current_ext)++; | ||
| 5559 | } | ||
| 5560 | |||
| 5561 | /* Check if we are done */ | ||
| 5562 | if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork)) | ||
| 5563 | *done = 1; | ||
| 5564 | |||
| 5565 | del_cursor: | ||
| 5566 | if (cur) | ||
| 5567 | xfs_btree_del_cursor(cur, | ||
| 5568 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
| 5569 | |||
| 5570 | xfs_trans_log_inode(tp, ip, logflags); | ||
| 5571 | |||
| 5572 | return error; | ||
| 5573 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 33b41f351225..f84bd7af43be 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
| @@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) | |||
| 127 | { BMAP_RIGHT_FILLING, "RF" }, \ | 127 | { BMAP_RIGHT_FILLING, "RF" }, \ |
| 128 | { BMAP_ATTRFORK, "ATTR" } | 128 | { BMAP_ATTRFORK, "ATTR" } |
| 129 | 129 | ||
| 130 | |||
| 131 | /* | ||
| 132 | * This macro is used to determine how many extents will be shifted | ||
| 133 | * in one write transaction. We could require two splits, | ||
| 134 | * an extent move on the first and an extent merge on the second, | ||
| 135 | * So it is proper that one extent is shifted inside write transaction | ||
| 136 | * at a time. | ||
| 137 | */ | ||
| 138 | #define XFS_BMAP_MAX_SHIFT_EXTENTS 1 | ||
| 139 | |||
| 130 | #ifdef DEBUG | 140 | #ifdef DEBUG |
| 131 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, | 141 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, |
| 132 | int whichfork, unsigned long caller_ip); | 142 | int whichfork, unsigned long caller_ip); |
| @@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, | |||
| 169 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, | 179 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, |
| 170 | xfs_extnum_t num); | 180 | xfs_extnum_t num); |
| 171 | uint xfs_default_attroffset(struct xfs_inode *ip); | 181 | uint xfs_default_attroffset(struct xfs_inode *ip); |
| 182 | int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, | ||
| 183 | int *done, xfs_fileoff_t start_fsb, | ||
| 184 | xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, | ||
| 185 | xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, | ||
| 186 | int num_exts); | ||
| 172 | 187 | ||
| 173 | #endif /* __XFS_BMAP_H__ */ | 188 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 706bc3f777cb..818d546664e7 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
| @@ -780,12 +780,14 @@ static void | |||
| 780 | xfs_bmbt_read_verify( | 780 | xfs_bmbt_read_verify( |
| 781 | struct xfs_buf *bp) | 781 | struct xfs_buf *bp) |
| 782 | { | 782 | { |
| 783 | if (!(xfs_btree_lblock_verify_crc(bp) && | 783 | if (!xfs_btree_lblock_verify_crc(bp)) |
| 784 | xfs_bmbt_verify(bp))) { | 784 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 785 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 785 | else if (!xfs_bmbt_verify(bp)) |
| 786 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 787 | bp->b_target->bt_mount, bp->b_addr); | ||
| 788 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 786 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 787 | |||
| 788 | if (bp->b_error) { | ||
| 789 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
| 790 | xfs_verifier_error(bp); | ||
| 789 | } | 791 | } |
| 790 | } | 792 | } |
| 791 | 793 | ||
| @@ -794,11 +796,9 @@ xfs_bmbt_write_verify( | |||
| 794 | struct xfs_buf *bp) | 796 | struct xfs_buf *bp) |
| 795 | { | 797 | { |
| 796 | if (!xfs_bmbt_verify(bp)) { | 798 | if (!xfs_bmbt_verify(bp)) { |
| 797 | xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn); | ||
| 798 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 799 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
| 799 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 800 | bp->b_target->bt_mount, bp->b_addr); | ||
| 801 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 800 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 801 | xfs_verifier_error(bp); | ||
| 802 | return; | 802 | return; |
| 803 | } | 803 | } |
| 804 | xfs_btree_lblock_calc_crc(bp); | 804 | xfs_btree_lblock_calc_crc(bp); |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index f264616080ca..01f6a646caa1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
| @@ -1349,7 +1349,6 @@ xfs_free_file_space( | |||
| 1349 | * the freeing of the space succeeds at ENOSPC. | 1349 | * the freeing of the space succeeds at ENOSPC. |
| 1350 | */ | 1350 | */ |
| 1351 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | 1351 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); |
| 1352 | tp->t_flags |= XFS_TRANS_RESERVE; | ||
| 1353 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); | 1352 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); |
| 1354 | 1353 | ||
| 1355 | /* | 1354 | /* |
| @@ -1468,6 +1467,102 @@ out: | |||
| 1468 | } | 1467 | } |
| 1469 | 1468 | ||
| 1470 | /* | 1469 | /* |
| 1470 | * xfs_collapse_file_space() | ||
| 1471 | * This routine frees disk space and shift extent for the given file. | ||
| 1472 | * The first thing we do is to free data blocks in the specified range | ||
| 1473 | * by calling xfs_free_file_space(). It would also sync dirty data | ||
| 1474 | * and invalidate page cache over the region on which collapse range | ||
| 1475 | * is working. And Shift extent records to the left to cover a hole. | ||
| 1476 | * RETURNS: | ||
| 1477 | * 0 on success | ||
| 1478 | * errno on error | ||
| 1479 | * | ||
| 1480 | */ | ||
| 1481 | int | ||
| 1482 | xfs_collapse_file_space( | ||
| 1483 | struct xfs_inode *ip, | ||
| 1484 | xfs_off_t offset, | ||
| 1485 | xfs_off_t len) | ||
| 1486 | { | ||
| 1487 | int done = 0; | ||
| 1488 | struct xfs_mount *mp = ip->i_mount; | ||
| 1489 | struct xfs_trans *tp; | ||
| 1490 | int error; | ||
| 1491 | xfs_extnum_t current_ext = 0; | ||
| 1492 | struct xfs_bmap_free free_list; | ||
| 1493 | xfs_fsblock_t first_block; | ||
| 1494 | int committed; | ||
| 1495 | xfs_fileoff_t start_fsb; | ||
| 1496 | xfs_fileoff_t shift_fsb; | ||
| 1497 | |||
| 1498 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
| 1499 | |||
| 1500 | trace_xfs_collapse_file_space(ip); | ||
| 1501 | |||
| 1502 | start_fsb = XFS_B_TO_FSB(mp, offset + len); | ||
| 1503 | shift_fsb = XFS_B_TO_FSB(mp, len); | ||
| 1504 | |||
| 1505 | error = xfs_free_file_space(ip, offset, len); | ||
| 1506 | if (error) | ||
| 1507 | return error; | ||
| 1508 | |||
| 1509 | while (!error && !done) { | ||
| 1510 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | ||
| 1511 | tp->t_flags |= XFS_TRANS_RESERVE; | ||
| 1512 | /* | ||
| 1513 | * We would need to reserve permanent block for transaction. | ||
| 1514 | * This will come into picture when after shifting extent into | ||
| 1515 | * hole we found that adjacent extents can be merged which | ||
| 1516 | * may lead to freeing of a block during record update. | ||
| 1517 | */ | ||
| 1518 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, | ||
| 1519 | XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); | ||
| 1520 | if (error) { | ||
| 1521 | ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); | ||
| 1522 | xfs_trans_cancel(tp, 0); | ||
| 1523 | break; | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 1527 | error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, | ||
| 1528 | ip->i_gdquot, ip->i_pdquot, | ||
| 1529 | XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, | ||
| 1530 | XFS_QMOPT_RES_REGBLKS); | ||
| 1531 | if (error) | ||
| 1532 | goto out; | ||
| 1533 | |||
| 1534 | xfs_trans_ijoin(tp, ip, 0); | ||
| 1535 | |||
| 1536 | xfs_bmap_init(&free_list, &first_block); | ||
| 1537 | |||
| 1538 | /* | ||
| 1539 | * We are using the write transaction in which max 2 bmbt | ||
| 1540 | * updates are allowed | ||
| 1541 | */ | ||
| 1542 | error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, | ||
| 1543 | shift_fsb, ¤t_ext, | ||
| 1544 | &first_block, &free_list, | ||
| 1545 | XFS_BMAP_MAX_SHIFT_EXTENTS); | ||
| 1546 | if (error) | ||
| 1547 | goto out; | ||
| 1548 | |||
| 1549 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
| 1550 | if (error) | ||
| 1551 | goto out; | ||
| 1552 | |||
| 1553 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
| 1554 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | return error; | ||
| 1558 | |||
| 1559 | out: | ||
| 1560 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | ||
| 1561 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 1562 | return error; | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | /* | ||
| 1471 | * We need to check that the format of the data fork in the temporary inode is | 1566 | * We need to check that the format of the data fork in the temporary inode is |
| 1472 | * valid for the target inode before doing the swap. This is not a problem with | 1567 | * valid for the target inode before doing the swap. This is not a problem with |
| 1473 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | 1568 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized |
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 900747b25772..935ed2b24edf 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h | |||
| @@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, | |||
| 99 | xfs_off_t len); | 99 | xfs_off_t len); |
| 100 | int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, | 100 | int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, |
| 101 | xfs_off_t len); | 101 | xfs_off_t len); |
| 102 | int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, | ||
| 103 | xfs_off_t len); | ||
| 102 | 104 | ||
| 103 | /* EOF block manipulation functions */ | 105 | /* EOF block manipulation functions */ |
| 104 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); | 106 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 9adaae4f3e2f..e80d59fdf89a 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
| @@ -234,8 +234,7 @@ xfs_btree_lblock_calc_crc( | |||
| 234 | return; | 234 | return; |
| 235 | if (bip) | 235 | if (bip) |
| 236 | block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 236 | block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 237 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 237 | xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); |
| 238 | XFS_BTREE_LBLOCK_CRC_OFF); | ||
| 239 | } | 238 | } |
| 240 | 239 | ||
| 241 | bool | 240 | bool |
| @@ -243,8 +242,8 @@ xfs_btree_lblock_verify_crc( | |||
| 243 | struct xfs_buf *bp) | 242 | struct xfs_buf *bp) |
| 244 | { | 243 | { |
| 245 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | 244 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) |
| 246 | return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 245 | return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); |
| 247 | XFS_BTREE_LBLOCK_CRC_OFF); | 246 | |
| 248 | return true; | 247 | return true; |
| 249 | } | 248 | } |
| 250 | 249 | ||
| @@ -267,8 +266,7 @@ xfs_btree_sblock_calc_crc( | |||
| 267 | return; | 266 | return; |
| 268 | if (bip) | 267 | if (bip) |
| 269 | block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 268 | block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 270 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 269 | xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); |
| 271 | XFS_BTREE_SBLOCK_CRC_OFF); | ||
| 272 | } | 270 | } |
| 273 | 271 | ||
| 274 | bool | 272 | bool |
| @@ -276,8 +274,8 @@ xfs_btree_sblock_verify_crc( | |||
| 276 | struct xfs_buf *bp) | 274 | struct xfs_buf *bp) |
| 277 | { | 275 | { |
| 278 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | 276 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) |
| 279 | return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 277 | return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); |
| 280 | XFS_BTREE_SBLOCK_CRC_OFF); | 278 | |
| 281 | return true; | 279 | return true; |
| 282 | } | 280 | } |
| 283 | 281 | ||
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9c061ef2b0d9..107f2fdfe41f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
| @@ -396,7 +396,17 @@ _xfs_buf_map_pages( | |||
| 396 | bp->b_addr = NULL; | 396 | bp->b_addr = NULL; |
| 397 | } else { | 397 | } else { |
| 398 | int retried = 0; | 398 | int retried = 0; |
| 399 | unsigned noio_flag; | ||
| 399 | 400 | ||
| 401 | /* | ||
| 402 | * vm_map_ram() will allocate auxillary structures (e.g. | ||
| 403 | * pagetables) with GFP_KERNEL, yet we are likely to be under | ||
| 404 | * GFP_NOFS context here. Hence we need to tell memory reclaim | ||
| 405 | * that we are in such a context via PF_MEMALLOC_NOIO to prevent | ||
| 406 | * memory reclaim re-entering the filesystem here and | ||
| 407 | * potentially deadlocking. | ||
| 408 | */ | ||
| 409 | noio_flag = memalloc_noio_save(); | ||
| 400 | do { | 410 | do { |
| 401 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | 411 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, |
| 402 | -1, PAGE_KERNEL); | 412 | -1, PAGE_KERNEL); |
| @@ -404,6 +414,7 @@ _xfs_buf_map_pages( | |||
| 404 | break; | 414 | break; |
| 405 | vm_unmap_aliases(); | 415 | vm_unmap_aliases(); |
| 406 | } while (retried++ <= 1); | 416 | } while (retried++ <= 1); |
| 417 | memalloc_noio_restore(noio_flag); | ||
| 407 | 418 | ||
| 408 | if (!bp->b_addr) | 419 | if (!bp->b_addr) |
| 409 | return -ENOMEM; | 420 | return -ENOMEM; |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 995339534db6..b8a3abf6cf47 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
| @@ -369,6 +369,20 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) | |||
| 369 | xfs_buf_rele(bp); | 369 | xfs_buf_rele(bp); |
| 370 | } | 370 | } |
| 371 | 371 | ||
| 372 | static inline int | ||
| 373 | xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) | ||
| 374 | { | ||
| 375 | return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
| 376 | cksum_offset); | ||
| 377 | } | ||
| 378 | |||
| 379 | static inline void | ||
| 380 | xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) | ||
| 381 | { | ||
| 382 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
| 383 | cksum_offset); | ||
| 384 | } | ||
| 385 | |||
| 372 | /* | 386 | /* |
| 373 | * Handling of buftargs. | 387 | * Handling of buftargs. |
| 374 | */ | 388 | */ |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 33149113e333..8752821443be 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
| @@ -796,20 +796,6 @@ xfs_buf_item_init( | |||
| 796 | bip->bli_formats[i].blf_map_size = map_size; | 796 | bip->bli_formats[i].blf_map_size = map_size; |
| 797 | } | 797 | } |
| 798 | 798 | ||
| 799 | #ifdef XFS_TRANS_DEBUG | ||
| 800 | /* | ||
| 801 | * Allocate the arrays for tracking what needs to be logged | ||
| 802 | * and what our callers request to be logged. bli_orig | ||
| 803 | * holds a copy of the original, clean buffer for comparison | ||
| 804 | * against, and bli_logged keeps a 1 bit flag per byte in | ||
| 805 | * the buffer to indicate which bytes the callers have asked | ||
| 806 | * to have logged. | ||
| 807 | */ | ||
| 808 | bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP); | ||
| 809 | memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length)); | ||
| 810 | bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP); | ||
| 811 | #endif | ||
| 812 | |||
| 813 | /* | 799 | /* |
| 814 | * Put the buf item into the list of items attached to the | 800 | * Put the buf item into the list of items attached to the |
| 815 | * buffer at the front. | 801 | * buffer at the front. |
| @@ -957,11 +943,6 @@ STATIC void | |||
| 957 | xfs_buf_item_free( | 943 | xfs_buf_item_free( |
| 958 | xfs_buf_log_item_t *bip) | 944 | xfs_buf_log_item_t *bip) |
| 959 | { | 945 | { |
| 960 | #ifdef XFS_TRANS_DEBUG | ||
| 961 | kmem_free(bip->bli_orig); | ||
| 962 | kmem_free(bip->bli_logged); | ||
| 963 | #endif /* XFS_TRANS_DEBUG */ | ||
| 964 | |||
| 965 | xfs_buf_item_free_format(bip); | 946 | xfs_buf_item_free_format(bip); |
| 966 | kmem_zone_free(xfs_buf_item_zone, bip); | 947 | kmem_zone_free(xfs_buf_item_zone, bip); |
| 967 | } | 948 | } |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 796272a2e129..6cc5f6785a77 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
| @@ -185,8 +185,8 @@ xfs_da3_node_write_verify( | |||
| 185 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; | 185 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; |
| 186 | 186 | ||
| 187 | if (!xfs_da3_node_verify(bp)) { | 187 | if (!xfs_da3_node_verify(bp)) { |
| 188 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 189 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 188 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 189 | xfs_verifier_error(bp); | ||
| 190 | return; | 190 | return; |
| 191 | } | 191 | } |
| 192 | 192 | ||
| @@ -196,7 +196,7 @@ xfs_da3_node_write_verify( | |||
| 196 | if (bip) | 196 | if (bip) |
| 197 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | 197 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 198 | 198 | ||
| 199 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); | 199 | xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | /* | 202 | /* |
| @@ -209,18 +209,20 @@ static void | |||
| 209 | xfs_da3_node_read_verify( | 209 | xfs_da3_node_read_verify( |
| 210 | struct xfs_buf *bp) | 210 | struct xfs_buf *bp) |
| 211 | { | 211 | { |
| 212 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
| 213 | struct xfs_da_blkinfo *info = bp->b_addr; | 212 | struct xfs_da_blkinfo *info = bp->b_addr; |
| 214 | 213 | ||
| 215 | switch (be16_to_cpu(info->magic)) { | 214 | switch (be16_to_cpu(info->magic)) { |
| 216 | case XFS_DA3_NODE_MAGIC: | 215 | case XFS_DA3_NODE_MAGIC: |
| 217 | if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 216 | if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { |
| 218 | XFS_DA3_NODE_CRC_OFF)) | 217 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 219 | break; | 218 | break; |
| 219 | } | ||
| 220 | /* fall through */ | 220 | /* fall through */ |
| 221 | case XFS_DA_NODE_MAGIC: | 221 | case XFS_DA_NODE_MAGIC: |
| 222 | if (!xfs_da3_node_verify(bp)) | 222 | if (!xfs_da3_node_verify(bp)) { |
| 223 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
| 223 | break; | 224 | break; |
| 225 | } | ||
| 224 | return; | 226 | return; |
| 225 | case XFS_ATTR_LEAF_MAGIC: | 227 | case XFS_ATTR_LEAF_MAGIC: |
| 226 | case XFS_ATTR3_LEAF_MAGIC: | 228 | case XFS_ATTR3_LEAF_MAGIC: |
| @@ -237,8 +239,7 @@ xfs_da3_node_read_verify( | |||
| 237 | } | 239 | } |
| 238 | 240 | ||
| 239 | /* corrupt block */ | 241 | /* corrupt block */ |
| 240 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 242 | xfs_verifier_error(bp); |
| 241 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
| 242 | } | 243 | } |
| 243 | 244 | ||
| 244 | const struct xfs_buf_ops xfs_da3_node_buf_ops = { | 245 | const struct xfs_buf_ops xfs_da3_node_buf_ops = { |
| @@ -1295,7 +1296,7 @@ xfs_da3_fixhashpath( | |||
| 1295 | node = blk->bp->b_addr; | 1296 | node = blk->bp->b_addr; |
| 1296 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | 1297 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); |
| 1297 | btree = dp->d_ops->node_tree_p(node); | 1298 | btree = dp->d_ops->node_tree_p(node); |
| 1298 | if (be32_to_cpu(btree->hashval) == lasthash) | 1299 | if (be32_to_cpu(btree[blk->index].hashval) == lasthash) |
| 1299 | break; | 1300 | break; |
| 1300 | blk->hashval = lasthash; | 1301 | blk->hashval = lasthash; |
| 1301 | btree[blk->index].hashval = cpu_to_be32(lasthash); | 1302 | btree[blk->index].hashval = cpu_to_be32(lasthash); |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e5869b50dc41..623bbe8fd921 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
| @@ -89,6 +89,8 @@ typedef struct xfs_dinode { | |||
| 89 | /* structure must be padded to 64 bit alignment */ | 89 | /* structure must be padded to 64 bit alignment */ |
| 90 | } xfs_dinode_t; | 90 | } xfs_dinode_t; |
| 91 | 91 | ||
| 92 | #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) | ||
| 93 | |||
| 92 | #define DI_MAX_FLUSH 0xffff | 94 | #define DI_MAX_FLUSH 0xffff |
| 93 | 95 | ||
| 94 | /* | 96 | /* |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index ce16ef02997a..fda46253966a 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
| @@ -180,16 +180,23 @@ xfs_dir_init( | |||
| 180 | xfs_inode_t *dp, | 180 | xfs_inode_t *dp, |
| 181 | xfs_inode_t *pdp) | 181 | xfs_inode_t *pdp) |
| 182 | { | 182 | { |
| 183 | xfs_da_args_t args; | 183 | struct xfs_da_args *args; |
| 184 | int error; | 184 | int error; |
| 185 | 185 | ||
| 186 | memset((char *)&args, 0, sizeof(args)); | ||
| 187 | args.dp = dp; | ||
| 188 | args.trans = tp; | ||
| 189 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 186 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
| 190 | if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) | 187 | error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); |
| 188 | if (error) | ||
| 191 | return error; | 189 | return error; |
| 192 | return xfs_dir2_sf_create(&args, pdp->i_ino); | 190 | |
| 191 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
| 192 | if (!args) | ||
| 193 | return ENOMEM; | ||
| 194 | |||
| 195 | args->dp = dp; | ||
| 196 | args->trans = tp; | ||
| 197 | error = xfs_dir2_sf_create(args, pdp->i_ino); | ||
| 198 | kmem_free(args); | ||
| 199 | return error; | ||
| 193 | } | 200 | } |
| 194 | 201 | ||
| 195 | /* | 202 | /* |
| @@ -205,41 +212,56 @@ xfs_dir_createname( | |||
| 205 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 212 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
| 206 | xfs_extlen_t total) /* bmap's total block count */ | 213 | xfs_extlen_t total) /* bmap's total block count */ |
| 207 | { | 214 | { |
| 208 | xfs_da_args_t args; | 215 | struct xfs_da_args *args; |
| 209 | int rval; | 216 | int rval; |
| 210 | int v; /* type-checking value */ | 217 | int v; /* type-checking value */ |
| 211 | 218 | ||
| 212 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 219 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
| 213 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 220 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); |
| 221 | if (rval) | ||
| 214 | return rval; | 222 | return rval; |
| 215 | XFS_STATS_INC(xs_dir_create); | 223 | XFS_STATS_INC(xs_dir_create); |
| 216 | 224 | ||
| 217 | memset(&args, 0, sizeof(xfs_da_args_t)); | 225 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
| 218 | args.name = name->name; | 226 | if (!args) |
| 219 | args.namelen = name->len; | 227 | return ENOMEM; |
| 220 | args.filetype = name->type; | 228 | |
| 221 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 229 | args->name = name->name; |
| 222 | args.inumber = inum; | 230 | args->namelen = name->len; |
| 223 | args.dp = dp; | 231 | args->filetype = name->type; |
| 224 | args.firstblock = first; | 232 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
| 225 | args.flist = flist; | 233 | args->inumber = inum; |
| 226 | args.total = total; | 234 | args->dp = dp; |
| 227 | args.whichfork = XFS_DATA_FORK; | 235 | args->firstblock = first; |
| 228 | args.trans = tp; | 236 | args->flist = flist; |
| 229 | args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; | 237 | args->total = total; |
| 230 | 238 | args->whichfork = XFS_DATA_FORK; | |
| 231 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 239 | args->trans = tp; |
| 232 | rval = xfs_dir2_sf_addname(&args); | 240 | args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; |
| 233 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 241 | |
| 234 | return rval; | 242 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
| 235 | else if (v) | 243 | rval = xfs_dir2_sf_addname(args); |
| 236 | rval = xfs_dir2_block_addname(&args); | 244 | goto out_free; |
| 237 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 245 | } |
| 238 | return rval; | 246 | |
| 239 | else if (v) | 247 | rval = xfs_dir2_isblock(tp, dp, &v); |
| 240 | rval = xfs_dir2_leaf_addname(&args); | 248 | if (rval) |
| 249 | goto out_free; | ||
| 250 | if (v) { | ||
| 251 | rval = xfs_dir2_block_addname(args); | ||
| 252 | goto out_free; | ||
| 253 | } | ||
| 254 | |||
| 255 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
| 256 | if (rval) | ||
| 257 | goto out_free; | ||
| 258 | if (v) | ||
| 259 | rval = xfs_dir2_leaf_addname(args); | ||
| 241 | else | 260 | else |
| 242 | rval = xfs_dir2_node_addname(&args); | 261 | rval = xfs_dir2_node_addname(args); |
| 262 | |||
| 263 | out_free: | ||
| 264 | kmem_free(args); | ||
| 243 | return rval; | 265 | return rval; |
| 244 | } | 266 | } |
| 245 | 267 | ||
| @@ -282,46 +304,66 @@ xfs_dir_lookup( | |||
| 282 | xfs_ino_t *inum, /* out: inode number */ | 304 | xfs_ino_t *inum, /* out: inode number */ |
| 283 | struct xfs_name *ci_name) /* out: actual name if CI match */ | 305 | struct xfs_name *ci_name) /* out: actual name if CI match */ |
| 284 | { | 306 | { |
| 285 | xfs_da_args_t args; | 307 | struct xfs_da_args *args; |
| 286 | int rval; | 308 | int rval; |
| 287 | int v; /* type-checking value */ | 309 | int v; /* type-checking value */ |
| 288 | 310 | ||
| 289 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 311 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
| 290 | XFS_STATS_INC(xs_dir_lookup); | 312 | XFS_STATS_INC(xs_dir_lookup); |
| 291 | 313 | ||
| 292 | memset(&args, 0, sizeof(xfs_da_args_t)); | 314 | /* |
| 293 | args.name = name->name; | 315 | * We need to use KM_NOFS here so that lockdep will not throw false |
| 294 | args.namelen = name->len; | 316 | * positive deadlock warnings on a non-transactional lookup path. It is |
| 295 | args.filetype = name->type; | 317 | * safe to recurse into inode recalim in that case, but lockdep can't |
| 296 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 318 | * easily be taught about it. Hence KM_NOFS avoids having to add more |
| 297 | args.dp = dp; | 319 | * lockdep Doing this avoids having to add a bunch of lockdep class |
| 298 | args.whichfork = XFS_DATA_FORK; | 320 | * annotations into the reclaim path for the ilock. |
| 299 | args.trans = tp; | 321 | */ |
| 300 | args.op_flags = XFS_DA_OP_OKNOENT; | 322 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
| 323 | args->name = name->name; | ||
| 324 | args->namelen = name->len; | ||
| 325 | args->filetype = name->type; | ||
| 326 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
| 327 | args->dp = dp; | ||
| 328 | args->whichfork = XFS_DATA_FORK; | ||
| 329 | args->trans = tp; | ||
| 330 | args->op_flags = XFS_DA_OP_OKNOENT; | ||
| 301 | if (ci_name) | 331 | if (ci_name) |
| 302 | args.op_flags |= XFS_DA_OP_CILOOKUP; | 332 | args->op_flags |= XFS_DA_OP_CILOOKUP; |
| 303 | 333 | ||
| 304 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 334 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
| 305 | rval = xfs_dir2_sf_lookup(&args); | 335 | rval = xfs_dir2_sf_lookup(args); |
| 306 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 336 | goto out_check_rval; |
| 307 | return rval; | 337 | } |
| 308 | else if (v) | 338 | |
| 309 | rval = xfs_dir2_block_lookup(&args); | 339 | rval = xfs_dir2_isblock(tp, dp, &v); |
| 310 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 340 | if (rval) |
| 311 | return rval; | 341 | goto out_free; |
| 312 | else if (v) | 342 | if (v) { |
| 313 | rval = xfs_dir2_leaf_lookup(&args); | 343 | rval = xfs_dir2_block_lookup(args); |
| 344 | goto out_check_rval; | ||
| 345 | } | ||
| 346 | |||
| 347 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
| 348 | if (rval) | ||
| 349 | goto out_free; | ||
| 350 | if (v) | ||
| 351 | rval = xfs_dir2_leaf_lookup(args); | ||
| 314 | else | 352 | else |
| 315 | rval = xfs_dir2_node_lookup(&args); | 353 | rval = xfs_dir2_node_lookup(args); |
| 354 | |||
| 355 | out_check_rval: | ||
| 316 | if (rval == EEXIST) | 356 | if (rval == EEXIST) |
| 317 | rval = 0; | 357 | rval = 0; |
| 318 | if (!rval) { | 358 | if (!rval) { |
| 319 | *inum = args.inumber; | 359 | *inum = args->inumber; |
| 320 | if (ci_name) { | 360 | if (ci_name) { |
| 321 | ci_name->name = args.value; | 361 | ci_name->name = args->value; |
| 322 | ci_name->len = args.valuelen; | 362 | ci_name->len = args->valuelen; |
| 323 | } | 363 | } |
| 324 | } | 364 | } |
| 365 | out_free: | ||
| 366 | kmem_free(args); | ||
| 325 | return rval; | 367 | return rval; |
| 326 | } | 368 | } |
| 327 | 369 | ||
| @@ -338,38 +380,51 @@ xfs_dir_removename( | |||
| 338 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 380 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
| 339 | xfs_extlen_t total) /* bmap's total block count */ | 381 | xfs_extlen_t total) /* bmap's total block count */ |
| 340 | { | 382 | { |
| 341 | xfs_da_args_t args; | 383 | struct xfs_da_args *args; |
| 342 | int rval; | 384 | int rval; |
| 343 | int v; /* type-checking value */ | 385 | int v; /* type-checking value */ |
| 344 | 386 | ||
| 345 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 387 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
| 346 | XFS_STATS_INC(xs_dir_remove); | 388 | XFS_STATS_INC(xs_dir_remove); |
| 347 | 389 | ||
| 348 | memset(&args, 0, sizeof(xfs_da_args_t)); | 390 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
| 349 | args.name = name->name; | 391 | if (!args) |
| 350 | args.namelen = name->len; | 392 | return ENOMEM; |
| 351 | args.filetype = name->type; | 393 | |
| 352 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 394 | args->name = name->name; |
| 353 | args.inumber = ino; | 395 | args->namelen = name->len; |
| 354 | args.dp = dp; | 396 | args->filetype = name->type; |
| 355 | args.firstblock = first; | 397 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
| 356 | args.flist = flist; | 398 | args->inumber = ino; |
| 357 | args.total = total; | 399 | args->dp = dp; |
| 358 | args.whichfork = XFS_DATA_FORK; | 400 | args->firstblock = first; |
| 359 | args.trans = tp; | 401 | args->flist = flist; |
| 360 | 402 | args->total = total; | |
| 361 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 403 | args->whichfork = XFS_DATA_FORK; |
| 362 | rval = xfs_dir2_sf_removename(&args); | 404 | args->trans = tp; |
| 363 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 405 | |
| 364 | return rval; | 406 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
| 365 | else if (v) | 407 | rval = xfs_dir2_sf_removename(args); |
| 366 | rval = xfs_dir2_block_removename(&args); | 408 | goto out_free; |
| 367 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 409 | } |
| 368 | return rval; | 410 | |
| 369 | else if (v) | 411 | rval = xfs_dir2_isblock(tp, dp, &v); |
| 370 | rval = xfs_dir2_leaf_removename(&args); | 412 | if (rval) |
| 413 | goto out_free; | ||
| 414 | if (v) { | ||
| 415 | rval = xfs_dir2_block_removename(args); | ||
| 416 | goto out_free; | ||
| 417 | } | ||
| 418 | |||
| 419 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
| 420 | if (rval) | ||
| 421 | goto out_free; | ||
| 422 | if (v) | ||
| 423 | rval = xfs_dir2_leaf_removename(args); | ||
| 371 | else | 424 | else |
| 372 | rval = xfs_dir2_node_removename(&args); | 425 | rval = xfs_dir2_node_removename(args); |
| 426 | out_free: | ||
| 427 | kmem_free(args); | ||
| 373 | return rval; | 428 | return rval; |
| 374 | } | 429 | } |
| 375 | 430 | ||
| @@ -386,40 +441,54 @@ xfs_dir_replace( | |||
| 386 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 441 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
| 387 | xfs_extlen_t total) /* bmap's total block count */ | 442 | xfs_extlen_t total) /* bmap's total block count */ |
| 388 | { | 443 | { |
| 389 | xfs_da_args_t args; | 444 | struct xfs_da_args *args; |
| 390 | int rval; | 445 | int rval; |
| 391 | int v; /* type-checking value */ | 446 | int v; /* type-checking value */ |
| 392 | 447 | ||
| 393 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 448 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
| 394 | 449 | ||
| 395 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 450 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); |
| 451 | if (rval) | ||
| 396 | return rval; | 452 | return rval; |
| 397 | 453 | ||
| 398 | memset(&args, 0, sizeof(xfs_da_args_t)); | 454 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
| 399 | args.name = name->name; | 455 | if (!args) |
| 400 | args.namelen = name->len; | 456 | return ENOMEM; |
| 401 | args.filetype = name->type; | 457 | |
| 402 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 458 | args->name = name->name; |
| 403 | args.inumber = inum; | 459 | args->namelen = name->len; |
| 404 | args.dp = dp; | 460 | args->filetype = name->type; |
| 405 | args.firstblock = first; | 461 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
| 406 | args.flist = flist; | 462 | args->inumber = inum; |
| 407 | args.total = total; | 463 | args->dp = dp; |
| 408 | args.whichfork = XFS_DATA_FORK; | 464 | args->firstblock = first; |
| 409 | args.trans = tp; | 465 | args->flist = flist; |
| 410 | 466 | args->total = total; | |
| 411 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 467 | args->whichfork = XFS_DATA_FORK; |
| 412 | rval = xfs_dir2_sf_replace(&args); | 468 | args->trans = tp; |
| 413 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 469 | |
| 414 | return rval; | 470 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
| 415 | else if (v) | 471 | rval = xfs_dir2_sf_replace(args); |
| 416 | rval = xfs_dir2_block_replace(&args); | 472 | goto out_free; |
| 417 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 473 | } |
| 418 | return rval; | 474 | |
| 419 | else if (v) | 475 | rval = xfs_dir2_isblock(tp, dp, &v); |
| 420 | rval = xfs_dir2_leaf_replace(&args); | 476 | if (rval) |
| 477 | goto out_free; | ||
| 478 | if (v) { | ||
| 479 | rval = xfs_dir2_block_replace(args); | ||
| 480 | goto out_free; | ||
| 481 | } | ||
| 482 | |||
| 483 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
| 484 | if (rval) | ||
| 485 | goto out_free; | ||
| 486 | if (v) | ||
| 487 | rval = xfs_dir2_leaf_replace(args); | ||
| 421 | else | 488 | else |
| 422 | rval = xfs_dir2_node_replace(&args); | 489 | rval = xfs_dir2_node_replace(args); |
| 490 | out_free: | ||
| 491 | kmem_free(args); | ||
| 423 | return rval; | 492 | return rval; |
| 424 | } | 493 | } |
| 425 | 494 | ||
| @@ -434,7 +503,7 @@ xfs_dir_canenter( | |||
| 434 | struct xfs_name *name, /* name of entry to add */ | 503 | struct xfs_name *name, /* name of entry to add */ |
| 435 | uint resblks) | 504 | uint resblks) |
| 436 | { | 505 | { |
| 437 | xfs_da_args_t args; | 506 | struct xfs_da_args *args; |
| 438 | int rval; | 507 | int rval; |
| 439 | int v; /* type-checking value */ | 508 | int v; /* type-checking value */ |
| 440 | 509 | ||
| @@ -443,29 +512,42 @@ xfs_dir_canenter( | |||
| 443 | 512 | ||
| 444 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 513 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
| 445 | 514 | ||
| 446 | memset(&args, 0, sizeof(xfs_da_args_t)); | 515 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
| 447 | args.name = name->name; | 516 | if (!args) |
| 448 | args.namelen = name->len; | 517 | return ENOMEM; |
| 449 | args.filetype = name->type; | 518 | |
| 450 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 519 | args->name = name->name; |
| 451 | args.dp = dp; | 520 | args->namelen = name->len; |
| 452 | args.whichfork = XFS_DATA_FORK; | 521 | args->filetype = name->type; |
| 453 | args.trans = tp; | 522 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
| 454 | args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | | 523 | args->dp = dp; |
| 524 | args->whichfork = XFS_DATA_FORK; | ||
| 525 | args->trans = tp; | ||
| 526 | args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | | ||
| 455 | XFS_DA_OP_OKNOENT; | 527 | XFS_DA_OP_OKNOENT; |
| 456 | 528 | ||
| 457 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 529 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
| 458 | rval = xfs_dir2_sf_addname(&args); | 530 | rval = xfs_dir2_sf_addname(args); |
| 459 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 531 | goto out_free; |
| 460 | return rval; | 532 | } |
| 461 | else if (v) | 533 | |
| 462 | rval = xfs_dir2_block_addname(&args); | 534 | rval = xfs_dir2_isblock(tp, dp, &v); |
| 463 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 535 | if (rval) |
| 464 | return rval; | 536 | goto out_free; |
| 465 | else if (v) | 537 | if (v) { |
| 466 | rval = xfs_dir2_leaf_addname(&args); | 538 | rval = xfs_dir2_block_addname(args); |
| 539 | goto out_free; | ||
| 540 | } | ||
| 541 | |||
| 542 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
| 543 | if (rval) | ||
| 544 | goto out_free; | ||
| 545 | if (v) | ||
| 546 | rval = xfs_dir2_leaf_addname(args); | ||
| 467 | else | 547 | else |
| 468 | rval = xfs_dir2_node_addname(&args); | 548 | rval = xfs_dir2_node_addname(args); |
| 549 | out_free: | ||
| 550 | kmem_free(args); | ||
| 469 | return rval; | 551 | return rval; |
| 470 | } | 552 | } |
| 471 | 553 | ||
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 90cdbf4b5f19..4f6a38cb83a4 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
| @@ -89,13 +89,14 @@ xfs_dir3_block_read_verify( | |||
| 89 | { | 89 | { |
| 90 | struct xfs_mount *mp = bp->b_target->bt_mount; | 90 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 91 | 91 | ||
| 92 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 92 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
| 93 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 93 | !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) |
| 94 | XFS_DIR3_DATA_CRC_OFF)) || | 94 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 95 | !xfs_dir3_block_verify(bp)) { | 95 | else if (!xfs_dir3_block_verify(bp)) |
| 96 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 97 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 96 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 98 | } | 97 | |
| 98 | if (bp->b_error) | ||
| 99 | xfs_verifier_error(bp); | ||
| 99 | } | 100 | } |
| 100 | 101 | ||
| 101 | static void | 102 | static void |
| @@ -107,8 +108,8 @@ xfs_dir3_block_write_verify( | |||
| 107 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | 108 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; |
| 108 | 109 | ||
| 109 | if (!xfs_dir3_block_verify(bp)) { | 110 | if (!xfs_dir3_block_verify(bp)) { |
| 110 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 111 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 111 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 112 | xfs_verifier_error(bp); | ||
| 112 | return; | 113 | return; |
| 113 | } | 114 | } |
| 114 | 115 | ||
| @@ -118,7 +119,7 @@ xfs_dir3_block_write_verify( | |||
| 118 | if (bip) | 119 | if (bip) |
| 119 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | 120 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 120 | 121 | ||
| 121 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); | 122 | xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); |
| 122 | } | 123 | } |
| 123 | 124 | ||
| 124 | const struct xfs_buf_ops xfs_dir3_block_buf_ops = { | 125 | const struct xfs_buf_ops xfs_dir3_block_buf_ops = { |
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 70acff4ee173..afa4ad523f3f 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c | |||
| @@ -241,7 +241,6 @@ static void | |||
| 241 | xfs_dir3_data_reada_verify( | 241 | xfs_dir3_data_reada_verify( |
| 242 | struct xfs_buf *bp) | 242 | struct xfs_buf *bp) |
| 243 | { | 243 | { |
| 244 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
| 245 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | 244 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; |
| 246 | 245 | ||
| 247 | switch (hdr->magic) { | 246 | switch (hdr->magic) { |
| @@ -255,8 +254,8 @@ xfs_dir3_data_reada_verify( | |||
| 255 | xfs_dir3_data_verify(bp); | 254 | xfs_dir3_data_verify(bp); |
| 256 | return; | 255 | return; |
| 257 | default: | 256 | default: |
| 258 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
| 259 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 257 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 258 | xfs_verifier_error(bp); | ||
| 260 | break; | 259 | break; |
| 261 | } | 260 | } |
| 262 | } | 261 | } |
| @@ -267,13 +266,14 @@ xfs_dir3_data_read_verify( | |||
| 267 | { | 266 | { |
| 268 | struct xfs_mount *mp = bp->b_target->bt_mount; | 267 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 269 | 268 | ||
| 270 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 269 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
| 271 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 270 | !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) |
| 272 | XFS_DIR3_DATA_CRC_OFF)) || | 271 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 273 | !xfs_dir3_data_verify(bp)) { | 272 | else if (!xfs_dir3_data_verify(bp)) |
| 274 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 275 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 273 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 276 | } | 274 | |
| 275 | if (bp->b_error) | ||
| 276 | xfs_verifier_error(bp); | ||
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | static void | 279 | static void |
| @@ -285,8 +285,8 @@ xfs_dir3_data_write_verify( | |||
| 285 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | 285 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; |
| 286 | 286 | ||
| 287 | if (!xfs_dir3_data_verify(bp)) { | 287 | if (!xfs_dir3_data_verify(bp)) { |
| 288 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 289 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 288 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 289 | xfs_verifier_error(bp); | ||
| 290 | return; | 290 | return; |
| 291 | } | 291 | } |
| 292 | 292 | ||
| @@ -296,7 +296,7 @@ xfs_dir3_data_write_verify( | |||
| 296 | if (bip) | 296 | if (bip) |
| 297 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | 297 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 298 | 298 | ||
| 299 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); | 299 | xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | const struct xfs_buf_ops xfs_dir3_data_buf_ops = { | 302 | const struct xfs_buf_ops xfs_dir3_data_buf_ops = { |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index ae47ec6e16c4..d36e97df1187 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
| @@ -179,13 +179,14 @@ __read_verify( | |||
| 179 | { | 179 | { |
| 180 | struct xfs_mount *mp = bp->b_target->bt_mount; | 180 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 181 | 181 | ||
| 182 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 182 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
| 183 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 183 | !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) |
| 184 | XFS_DIR3_LEAF_CRC_OFF)) || | 184 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 185 | !xfs_dir3_leaf_verify(bp, magic)) { | 185 | else if (!xfs_dir3_leaf_verify(bp, magic)) |
| 186 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 187 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 186 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 188 | } | 187 | |
| 188 | if (bp->b_error) | ||
| 189 | xfs_verifier_error(bp); | ||
| 189 | } | 190 | } |
| 190 | 191 | ||
| 191 | static void | 192 | static void |
| @@ -198,8 +199,8 @@ __write_verify( | |||
| 198 | struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; | 199 | struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; |
| 199 | 200 | ||
| 200 | if (!xfs_dir3_leaf_verify(bp, magic)) { | 201 | if (!xfs_dir3_leaf_verify(bp, magic)) { |
| 201 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 202 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 202 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 203 | xfs_verifier_error(bp); | ||
| 203 | return; | 204 | return; |
| 204 | } | 205 | } |
| 205 | 206 | ||
| @@ -209,7 +210,7 @@ __write_verify( | |||
| 209 | if (bip) | 210 | if (bip) |
| 210 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | 211 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 211 | 212 | ||
| 212 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); | 213 | xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); |
| 213 | } | 214 | } |
| 214 | 215 | ||
| 215 | static void | 216 | static void |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 48c7d18f68c3..cb434d732681 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
| @@ -115,13 +115,14 @@ xfs_dir3_free_read_verify( | |||
| 115 | { | 115 | { |
| 116 | struct xfs_mount *mp = bp->b_target->bt_mount; | 116 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 117 | 117 | ||
| 118 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 118 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
| 119 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 119 | !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) |
| 120 | XFS_DIR3_FREE_CRC_OFF)) || | 120 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 121 | !xfs_dir3_free_verify(bp)) { | 121 | else if (!xfs_dir3_free_verify(bp)) |
| 122 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 123 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 122 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 124 | } | 123 | |
| 124 | if (bp->b_error) | ||
| 125 | xfs_verifier_error(bp); | ||
| 125 | } | 126 | } |
| 126 | 127 | ||
| 127 | static void | 128 | static void |
| @@ -133,8 +134,8 @@ xfs_dir3_free_write_verify( | |||
| 133 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | 134 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; |
| 134 | 135 | ||
| 135 | if (!xfs_dir3_free_verify(bp)) { | 136 | if (!xfs_dir3_free_verify(bp)) { |
| 136 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 137 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 137 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 138 | xfs_verifier_error(bp); | ||
| 138 | return; | 139 | return; |
| 139 | } | 140 | } |
| 140 | 141 | ||
| @@ -144,7 +145,7 @@ xfs_dir3_free_write_verify( | |||
| 144 | if (bip) | 145 | if (bip) |
| 145 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | 146 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 146 | 147 | ||
| 147 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); | 148 | xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); |
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | const struct xfs_buf_ops xfs_dir3_free_buf_ops = { | 151 | const struct xfs_buf_ops xfs_dir3_free_buf_ops = { |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 7aeb4c895b32..868b19f096bf 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
| @@ -615,7 +615,7 @@ xfs_qm_dqread( | |||
| 615 | 615 | ||
| 616 | if (flags & XFS_QMOPT_DQALLOC) { | 616 | if (flags & XFS_QMOPT_DQALLOC) { |
| 617 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); | 617 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); |
| 618 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm, | 618 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, |
| 619 | XFS_QM_DQALLOC_SPACE_RES(mp), 0); | 619 | XFS_QM_DQALLOC_SPACE_RES(mp), 0); |
| 620 | if (error) | 620 | if (error) |
| 621 | goto error1; | 621 | goto error1; |
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c index d401457d2f25..610da8177737 100644 --- a/fs/xfs/xfs_dquot_buf.c +++ b/fs/xfs/xfs_dquot_buf.c | |||
| @@ -257,10 +257,13 @@ xfs_dquot_buf_read_verify( | |||
| 257 | { | 257 | { |
| 258 | struct xfs_mount *mp = bp->b_target->bt_mount; | 258 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 259 | 259 | ||
| 260 | if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { | 260 | if (!xfs_dquot_buf_verify_crc(mp, bp)) |
| 261 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 261 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 262 | else if (!xfs_dquot_buf_verify(mp, bp)) | ||
| 262 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 263 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 263 | } | 264 | |
| 265 | if (bp->b_error) | ||
| 266 | xfs_verifier_error(bp); | ||
| 264 | } | 267 | } |
| 265 | 268 | ||
| 266 | /* | 269 | /* |
| @@ -275,8 +278,8 @@ xfs_dquot_buf_write_verify( | |||
| 275 | struct xfs_mount *mp = bp->b_target->bt_mount; | 278 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 276 | 279 | ||
| 277 | if (!xfs_dquot_buf_verify(mp, bp)) { | 280 | if (!xfs_dquot_buf_verify(mp, bp)) { |
| 278 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 279 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 281 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 282 | xfs_verifier_error(bp); | ||
| 280 | return; | 283 | return; |
| 281 | } | 284 | } |
| 282 | } | 285 | } |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 9995b807d627..edac5b057d28 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
| @@ -156,7 +156,7 @@ xfs_error_report( | |||
| 156 | { | 156 | { |
| 157 | if (level <= xfs_error_level) { | 157 | if (level <= xfs_error_level) { |
| 158 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, | 158 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, |
| 159 | "Internal error %s at line %d of file %s. Caller 0x%p", | 159 | "Internal error %s at line %d of file %s. Caller %pF", |
| 160 | tag, linenum, filename, ra); | 160 | tag, linenum, filename, ra); |
| 161 | 161 | ||
| 162 | xfs_stack_trace(); | 162 | xfs_stack_trace(); |
| @@ -178,3 +178,28 @@ xfs_corruption_error( | |||
| 178 | xfs_error_report(tag, level, mp, filename, linenum, ra); | 178 | xfs_error_report(tag, level, mp, filename, linenum, ra); |
| 179 | xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); | 179 | xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); |
| 180 | } | 180 | } |
| 181 | |||
| 182 | /* | ||
| 183 | * Warnings specifically for verifier errors. Differentiate CRC vs. invalid | ||
| 184 | * values, and omit the stack trace unless the error level is tuned high. | ||
| 185 | */ | ||
| 186 | void | ||
| 187 | xfs_verifier_error( | ||
| 188 | struct xfs_buf *bp) | ||
| 189 | { | ||
| 190 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
| 191 | |||
| 192 | xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx", | ||
| 193 | bp->b_error == EFSBADCRC ? "CRC error" : "corruption", | ||
| 194 | __return_address, bp->b_bn); | ||
| 195 | |||
| 196 | xfs_alert(mp, "Unmount and run xfs_repair"); | ||
| 197 | |||
| 198 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { | ||
| 199 | xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); | ||
| 200 | xfs_hex_dump(xfs_buf_offset(bp, 0), 64); | ||
| 201 | } | ||
| 202 | |||
| 203 | if (xfs_error_level >= XFS_ERRLEVEL_HIGH) | ||
| 204 | xfs_stack_trace(); | ||
| 205 | } | ||
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 079a367f44ee..c1c57d4a4b5d 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
| @@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, | |||
| 34 | extern void xfs_corruption_error(const char *tag, int level, | 34 | extern void xfs_corruption_error(const char *tag, int level, |
| 35 | struct xfs_mount *mp, void *p, const char *filename, | 35 | struct xfs_mount *mp, void *p, const char *filename, |
| 36 | int linenum, inst_t *ra); | 36 | int linenum, inst_t *ra); |
| 37 | extern void xfs_verifier_error(struct xfs_buf *bp); | ||
| 37 | 38 | ||
| 38 | #define XFS_ERROR_REPORT(e, lvl, mp) \ | 39 | #define XFS_ERROR_REPORT(e, lvl, mp) \ |
| 39 | xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) | 40 | xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 64b48eade91d..f7abff8c16ca 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -823,7 +823,8 @@ xfs_file_fallocate( | |||
| 823 | 823 | ||
| 824 | if (!S_ISREG(inode->i_mode)) | 824 | if (!S_ISREG(inode->i_mode)) |
| 825 | return -EINVAL; | 825 | return -EINVAL; |
| 826 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 826 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
| 827 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
| 827 | return -EOPNOTSUPP; | 828 | return -EOPNOTSUPP; |
| 828 | 829 | ||
| 829 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 830 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
| @@ -831,6 +832,20 @@ xfs_file_fallocate( | |||
| 831 | error = xfs_free_file_space(ip, offset, len); | 832 | error = xfs_free_file_space(ip, offset, len); |
| 832 | if (error) | 833 | if (error) |
| 833 | goto out_unlock; | 834 | goto out_unlock; |
| 835 | } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { | ||
| 836 | unsigned blksize_mask = (1 << inode->i_blkbits) - 1; | ||
| 837 | |||
| 838 | if (offset & blksize_mask || len & blksize_mask) { | ||
| 839 | error = -EINVAL; | ||
| 840 | goto out_unlock; | ||
| 841 | } | ||
| 842 | |||
| 843 | ASSERT(offset + len < i_size_read(inode)); | ||
| 844 | new_size = i_size_read(inode) - len; | ||
| 845 | |||
| 846 | error = xfs_collapse_file_space(ip, offset, len); | ||
| 847 | if (error) | ||
| 848 | goto out_unlock; | ||
| 834 | } else { | 849 | } else { |
| 835 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 850 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
| 836 | offset + len > i_size_read(inode)) { | 851 | offset + len > i_size_read(inode)) { |
| @@ -840,8 +855,11 @@ xfs_file_fallocate( | |||
| 840 | goto out_unlock; | 855 | goto out_unlock; |
| 841 | } | 856 | } |
| 842 | 857 | ||
| 843 | error = xfs_alloc_file_space(ip, offset, len, | 858 | if (mode & FALLOC_FL_ZERO_RANGE) |
| 844 | XFS_BMAPI_PREALLOC); | 859 | error = xfs_zero_file_space(ip, offset, len); |
| 860 | else | ||
| 861 | error = xfs_alloc_file_space(ip, offset, len, | ||
| 862 | XFS_BMAPI_PREALLOC); | ||
| 845 | if (error) | 863 | if (error) |
| 846 | goto out_unlock; | 864 | goto out_unlock; |
| 847 | } | 865 | } |
| @@ -859,7 +877,7 @@ xfs_file_fallocate( | |||
| 859 | if (ip->i_d.di_mode & S_IXGRP) | 877 | if (ip->i_d.di_mode & S_IXGRP) |
| 860 | ip->i_d.di_mode &= ~S_ISGID; | 878 | ip->i_d.di_mode &= ~S_ISGID; |
| 861 | 879 | ||
| 862 | if (!(mode & FALLOC_FL_PUNCH_HOLE)) | 880 | if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) |
| 863 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; | 881 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; |
| 864 | 882 | ||
| 865 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 883 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index b6ab5a3cfa12..9898f31d05d8 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h | |||
| @@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr { | |||
| 145 | __be64 sl_lsn; | 145 | __be64 sl_lsn; |
| 146 | }; | 146 | }; |
| 147 | 147 | ||
| 148 | #define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) | ||
| 149 | |||
| 148 | /* | 150 | /* |
| 149 | * The maximum pathlen is 1024 bytes. Since the minimum file system | 151 | * The maximum pathlen is 1024 bytes. Since the minimum file system |
| 150 | * blocksize is 512 bytes, we can get a max of 3 extents back from | 152 | * blocksize is 512 bytes, we can get a max of 3 extents back from |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5d7f105a1c82..8f711db61a0c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
| @@ -363,6 +363,18 @@ xfs_ialloc_ag_alloc( | |||
| 363 | args.minleft = args.mp->m_in_maxlevels - 1; | 363 | args.minleft = args.mp->m_in_maxlevels - 1; |
| 364 | if ((error = xfs_alloc_vextent(&args))) | 364 | if ((error = xfs_alloc_vextent(&args))) |
| 365 | return error; | 365 | return error; |
| 366 | |||
| 367 | /* | ||
| 368 | * This request might have dirtied the transaction if the AG can | ||
| 369 | * satisfy the request, but the exact block was not available. | ||
| 370 | * If the allocation did fail, subsequent requests will relax | ||
| 371 | * the exact agbno requirement and increase the alignment | ||
| 372 | * instead. It is critical that the total size of the request | ||
| 373 | * (len + alignment + slop) does not increase from this point | ||
| 374 | * on, so reset minalignslop to ensure it is not included in | ||
| 375 | * subsequent requests. | ||
| 376 | */ | ||
| 377 | args.minalignslop = 0; | ||
| 366 | } else | 378 | } else |
| 367 | args.fsbno = NULLFSBLOCK; | 379 | args.fsbno = NULLFSBLOCK; |
| 368 | 380 | ||
| @@ -1568,18 +1580,17 @@ xfs_agi_read_verify( | |||
| 1568 | struct xfs_buf *bp) | 1580 | struct xfs_buf *bp) |
| 1569 | { | 1581 | { |
| 1570 | struct xfs_mount *mp = bp->b_target->bt_mount; | 1582 | struct xfs_mount *mp = bp->b_target->bt_mount; |
| 1571 | int agi_ok = 1; | ||
| 1572 | |||
| 1573 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
| 1574 | agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
| 1575 | offsetof(struct xfs_agi, agi_crc)); | ||
| 1576 | agi_ok = agi_ok && xfs_agi_verify(bp); | ||
| 1577 | 1583 | ||
| 1578 | if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, | 1584 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
| 1579 | XFS_RANDOM_IALLOC_READ_AGI))) { | 1585 | !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) |
| 1580 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 1586 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 1587 | else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, | ||
| 1588 | XFS_ERRTAG_IALLOC_READ_AGI, | ||
| 1589 | XFS_RANDOM_IALLOC_READ_AGI)) | ||
| 1581 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 1590 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 1582 | } | 1591 | |
| 1592 | if (bp->b_error) | ||
| 1593 | xfs_verifier_error(bp); | ||
| 1583 | } | 1594 | } |
| 1584 | 1595 | ||
| 1585 | static void | 1596 | static void |
| @@ -1590,8 +1601,8 @@ xfs_agi_write_verify( | |||
| 1590 | struct xfs_buf_log_item *bip = bp->b_fspriv; | 1601 | struct xfs_buf_log_item *bip = bp->b_fspriv; |
| 1591 | 1602 | ||
| 1592 | if (!xfs_agi_verify(bp)) { | 1603 | if (!xfs_agi_verify(bp)) { |
| 1593 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 1594 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 1604 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 1605 | xfs_verifier_error(bp); | ||
| 1595 | return; | 1606 | return; |
| 1596 | } | 1607 | } |
| 1597 | 1608 | ||
| @@ -1600,8 +1611,7 @@ xfs_agi_write_verify( | |||
| 1600 | 1611 | ||
| 1601 | if (bip) | 1612 | if (bip) |
| 1602 | XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 1613 | XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 1603 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 1614 | xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); |
| 1604 | offsetof(struct xfs_agi, agi_crc)); | ||
| 1605 | } | 1615 | } |
| 1606 | 1616 | ||
| 1607 | const struct xfs_buf_ops xfs_agi_buf_ops = { | 1617 | const struct xfs_buf_ops xfs_agi_buf_ops = { |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c8fa5bbb36de..7e309b11e87d 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
| @@ -243,12 +243,14 @@ static void | |||
| 243 | xfs_inobt_read_verify( | 243 | xfs_inobt_read_verify( |
| 244 | struct xfs_buf *bp) | 244 | struct xfs_buf *bp) |
| 245 | { | 245 | { |
| 246 | if (!(xfs_btree_sblock_verify_crc(bp) && | 246 | if (!xfs_btree_sblock_verify_crc(bp)) |
| 247 | xfs_inobt_verify(bp))) { | 247 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 248 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 248 | else if (!xfs_inobt_verify(bp)) |
| 249 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 250 | bp->b_target->bt_mount, bp->b_addr); | ||
| 251 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 249 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 250 | |||
| 251 | if (bp->b_error) { | ||
| 252 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
| 253 | xfs_verifier_error(bp); | ||
| 252 | } | 254 | } |
| 253 | } | 255 | } |
| 254 | 256 | ||
| @@ -258,9 +260,9 @@ xfs_inobt_write_verify( | |||
| 258 | { | 260 | { |
| 259 | if (!xfs_inobt_verify(bp)) { | 261 | if (!xfs_inobt_verify(bp)) { |
| 260 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 262 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
| 261 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 262 | bp->b_target->bt_mount, bp->b_addr); | ||
| 263 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 263 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 264 | xfs_verifier_error(bp); | ||
| 265 | return; | ||
| 264 | } | 266 | } |
| 265 | xfs_btree_sblock_calc_crc(bp); | 267 | xfs_btree_sblock_calc_crc(bp); |
| 266 | 268 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3a137e9f9a7d..5e7a38fa6ee6 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -42,7 +42,6 @@ | |||
| 42 | #include "xfs_bmap_util.h" | 42 | #include "xfs_bmap_util.h" |
| 43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
| 44 | #include "xfs_quota.h" | 44 | #include "xfs_quota.h" |
| 45 | #include "xfs_dinode.h" | ||
| 46 | #include "xfs_filestream.h" | 45 | #include "xfs_filestream.h" |
| 47 | #include "xfs_cksum.h" | 46 | #include "xfs_cksum.h" |
| 48 | #include "xfs_trace.h" | 47 | #include "xfs_trace.h" |
| @@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone; | |||
| 62 | 61 | ||
| 63 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); | 62 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); |
| 64 | 63 | ||
| 64 | STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *); | ||
| 65 | |||
| 65 | /* | 66 | /* |
| 66 | * helper function to extract extent size hint from inode | 67 | * helper function to extract extent size hint from inode |
| 67 | */ | 68 | */ |
| @@ -1115,7 +1116,7 @@ xfs_bumplink( | |||
| 1115 | { | 1116 | { |
| 1116 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | 1117 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
| 1117 | 1118 | ||
| 1118 | ASSERT(ip->i_d.di_nlink > 0); | 1119 | ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE)); |
| 1119 | ip->i_d.di_nlink++; | 1120 | ip->i_d.di_nlink++; |
| 1120 | inc_nlink(VFS_I(ip)); | 1121 | inc_nlink(VFS_I(ip)); |
| 1121 | if ((ip->i_d.di_version == 1) && | 1122 | if ((ip->i_d.di_version == 1) && |
| @@ -1165,10 +1166,7 @@ xfs_create( | |||
| 1165 | if (XFS_FORCED_SHUTDOWN(mp)) | 1166 | if (XFS_FORCED_SHUTDOWN(mp)) |
| 1166 | return XFS_ERROR(EIO); | 1167 | return XFS_ERROR(EIO); |
| 1167 | 1168 | ||
| 1168 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1169 | prid = xfs_get_initial_prid(dp); |
| 1169 | prid = xfs_get_projid(dp); | ||
| 1170 | else | ||
| 1171 | prid = XFS_PROJID_DEFAULT; | ||
| 1172 | 1170 | ||
| 1173 | /* | 1171 | /* |
| 1174 | * Make sure that we have allocated dquot(s) on disk. | 1172 | * Make sure that we have allocated dquot(s) on disk. |
| @@ -1333,6 +1331,113 @@ xfs_create( | |||
| 1333 | } | 1331 | } |
| 1334 | 1332 | ||
| 1335 | int | 1333 | int |
| 1334 | xfs_create_tmpfile( | ||
| 1335 | struct xfs_inode *dp, | ||
| 1336 | struct dentry *dentry, | ||
| 1337 | umode_t mode) | ||
| 1338 | { | ||
| 1339 | struct xfs_mount *mp = dp->i_mount; | ||
| 1340 | struct xfs_inode *ip = NULL; | ||
| 1341 | struct xfs_trans *tp = NULL; | ||
| 1342 | int error; | ||
| 1343 | uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
| 1344 | prid_t prid; | ||
| 1345 | struct xfs_dquot *udqp = NULL; | ||
| 1346 | struct xfs_dquot *gdqp = NULL; | ||
| 1347 | struct xfs_dquot *pdqp = NULL; | ||
| 1348 | struct xfs_trans_res *tres; | ||
| 1349 | uint resblks; | ||
| 1350 | |||
| 1351 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 1352 | return XFS_ERROR(EIO); | ||
| 1353 | |||
| 1354 | prid = xfs_get_initial_prid(dp); | ||
| 1355 | |||
| 1356 | /* | ||
| 1357 | * Make sure that we have allocated dquot(s) on disk. | ||
| 1358 | */ | ||
| 1359 | error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), | ||
| 1360 | xfs_kgid_to_gid(current_fsgid()), prid, | ||
| 1361 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, | ||
| 1362 | &udqp, &gdqp, &pdqp); | ||
| 1363 | if (error) | ||
| 1364 | return error; | ||
| 1365 | |||
| 1366 | resblks = XFS_IALLOC_SPACE_RES(mp); | ||
| 1367 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE); | ||
| 1368 | |||
| 1369 | tres = &M_RES(mp)->tr_create_tmpfile; | ||
| 1370 | error = xfs_trans_reserve(tp, tres, resblks, 0); | ||
| 1371 | if (error == ENOSPC) { | ||
| 1372 | /* No space at all so try a "no-allocation" reservation */ | ||
| 1373 | resblks = 0; | ||
| 1374 | error = xfs_trans_reserve(tp, tres, 0, 0); | ||
| 1375 | } | ||
| 1376 | if (error) { | ||
| 1377 | cancel_flags = 0; | ||
| 1378 | goto out_trans_cancel; | ||
| 1379 | } | ||
| 1380 | |||
| 1381 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, | ||
| 1382 | pdqp, resblks, 1, 0); | ||
| 1383 | if (error) | ||
| 1384 | goto out_trans_cancel; | ||
| 1385 | |||
| 1386 | error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, | ||
| 1387 | prid, resblks > 0, &ip, NULL); | ||
| 1388 | if (error) { | ||
| 1389 | if (error == ENOSPC) | ||
| 1390 | goto out_trans_cancel; | ||
| 1391 | goto out_trans_abort; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
| 1395 | xfs_trans_set_sync(tp); | ||
| 1396 | |||
| 1397 | /* | ||
| 1398 | * Attach the dquot(s) to the inodes and modify them incore. | ||
| 1399 | * These ids of the inode couldn't have changed since the new | ||
| 1400 | * inode has been locked ever since it was created. | ||
| 1401 | */ | ||
| 1402 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); | ||
| 1403 | |||
| 1404 | ip->i_d.di_nlink--; | ||
| 1405 | d_tmpfile(dentry, VFS_I(ip)); | ||
| 1406 | error = xfs_iunlink(tp, ip); | ||
| 1407 | if (error) | ||
| 1408 | goto out_trans_abort; | ||
| 1409 | |||
| 1410 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
| 1411 | if (error) | ||
| 1412 | goto out_release_inode; | ||
| 1413 | |||
| 1414 | xfs_qm_dqrele(udqp); | ||
| 1415 | xfs_qm_dqrele(gdqp); | ||
| 1416 | xfs_qm_dqrele(pdqp); | ||
| 1417 | |||
| 1418 | return 0; | ||
| 1419 | |||
| 1420 | out_trans_abort: | ||
| 1421 | cancel_flags |= XFS_TRANS_ABORT; | ||
| 1422 | out_trans_cancel: | ||
| 1423 | xfs_trans_cancel(tp, cancel_flags); | ||
| 1424 | out_release_inode: | ||
| 1425 | /* | ||
| 1426 | * Wait until after the current transaction is aborted to | ||
| 1427 | * release the inode. This prevents recursive transactions | ||
| 1428 | * and deadlocks from xfs_inactive. | ||
| 1429 | */ | ||
| 1430 | if (ip) | ||
| 1431 | IRELE(ip); | ||
| 1432 | |||
| 1433 | xfs_qm_dqrele(udqp); | ||
| 1434 | xfs_qm_dqrele(gdqp); | ||
| 1435 | xfs_qm_dqrele(pdqp); | ||
| 1436 | |||
| 1437 | return error; | ||
| 1438 | } | ||
| 1439 | |||
| 1440 | int | ||
| 1336 | xfs_link( | 1441 | xfs_link( |
| 1337 | xfs_inode_t *tdp, | 1442 | xfs_inode_t *tdp, |
| 1338 | xfs_inode_t *sip, | 1443 | xfs_inode_t *sip, |
| @@ -1397,6 +1502,12 @@ xfs_link( | |||
| 1397 | 1502 | ||
| 1398 | xfs_bmap_init(&free_list, &first_block); | 1503 | xfs_bmap_init(&free_list, &first_block); |
| 1399 | 1504 | ||
| 1505 | if (sip->i_d.di_nlink == 0) { | ||
| 1506 | error = xfs_iunlink_remove(tp, sip); | ||
| 1507 | if (error) | ||
| 1508 | goto abort_return; | ||
| 1509 | } | ||
| 1510 | |||
| 1400 | error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, | 1511 | error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, |
| 1401 | &first_block, &free_list, resblks); | 1512 | &first_block, &free_list, resblks); |
| 1402 | if (error) | 1513 | if (error) |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 65e2350f449c..396cc1fafd0d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | 20 | ||
| 21 | #include "xfs_inode_buf.h" | 21 | #include "xfs_inode_buf.h" |
| 22 | #include "xfs_inode_fork.h" | 22 | #include "xfs_inode_fork.h" |
| 23 | #include "xfs_dinode.h" | ||
| 23 | 24 | ||
| 24 | /* | 25 | /* |
| 25 | * Kernel only inode definitions | 26 | * Kernel only inode definitions |
| @@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip, | |||
| 192 | ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); | 193 | ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); |
| 193 | } | 194 | } |
| 194 | 195 | ||
| 196 | static inline prid_t | ||
| 197 | xfs_get_initial_prid(struct xfs_inode *dp) | ||
| 198 | { | ||
| 199 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | ||
| 200 | return xfs_get_projid(dp); | ||
| 201 | |||
| 202 | return XFS_PROJID_DEFAULT; | ||
| 203 | } | ||
| 204 | |||
| 195 | /* | 205 | /* |
| 196 | * In-core inode flags. | 206 | * In-core inode flags. |
| 197 | */ | 207 | */ |
| @@ -323,6 +333,8 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, | |||
| 323 | struct xfs_inode **ipp, struct xfs_name *ci_name); | 333 | struct xfs_inode **ipp, struct xfs_name *ci_name); |
| 324 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, | 334 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, |
| 325 | umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); | 335 | umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); |
| 336 | int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, | ||
| 337 | umode_t mode); | ||
| 326 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | 338 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, |
| 327 | struct xfs_inode *ip); | 339 | struct xfs_inode *ip); |
| 328 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 340 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index 4fc9f39dd89e..24e993996bdc 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c | |||
| @@ -102,8 +102,7 @@ xfs_inode_buf_verify( | |||
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 104 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 105 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, | 105 | xfs_verifier_error(bp); |
| 106 | mp, dip); | ||
| 107 | #ifdef DEBUG | 106 | #ifdef DEBUG |
| 108 | xfs_alert(mp, | 107 | xfs_alert(mp, |
| 109 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", | 108 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", |
| @@ -306,7 +305,7 @@ xfs_dinode_verify( | |||
| 306 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 305 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
| 307 | return false; | 306 | return false; |
| 308 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, | 307 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, |
| 309 | offsetof(struct xfs_dinode, di_crc))) | 308 | XFS_DINODE_CRC_OFF)) |
| 310 | return false; | 309 | return false; |
| 311 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) | 310 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) |
| 312 | return false; | 311 | return false; |
| @@ -327,7 +326,7 @@ xfs_dinode_calc_crc( | |||
| 327 | 326 | ||
| 328 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); | 327 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); |
| 329 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, | 328 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, |
| 330 | offsetof(struct xfs_dinode, di_crc)); | 329 | XFS_DINODE_CRC_OFF); |
| 331 | dip->di_crc = xfs_end_cksum(crc); | 330 | dip->di_crc = xfs_end_cksum(crc); |
| 332 | } | 331 | } |
| 333 | 332 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 22d1cbea283d..3b80ebae05f5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
| @@ -128,7 +128,6 @@ xfs_iomap_write_direct( | |||
| 128 | xfs_fsblock_t firstfsb; | 128 | xfs_fsblock_t firstfsb; |
| 129 | xfs_extlen_t extsz, temp; | 129 | xfs_extlen_t extsz, temp; |
| 130 | int nimaps; | 130 | int nimaps; |
| 131 | int bmapi_flag; | ||
| 132 | int quota_flag; | 131 | int quota_flag; |
| 133 | int rt; | 132 | int rt; |
| 134 | xfs_trans_t *tp; | 133 | xfs_trans_t *tp; |
| @@ -200,18 +199,15 @@ xfs_iomap_write_direct( | |||
| 200 | 199 | ||
| 201 | xfs_trans_ijoin(tp, ip, 0); | 200 | xfs_trans_ijoin(tp, ip, 0); |
| 202 | 201 | ||
| 203 | bmapi_flag = 0; | ||
| 204 | if (offset < XFS_ISIZE(ip) || extsz) | ||
| 205 | bmapi_flag |= XFS_BMAPI_PREALLOC; | ||
| 206 | |||
| 207 | /* | 202 | /* |
| 208 | * From this point onwards we overwrite the imap pointer that the | 203 | * From this point onwards we overwrite the imap pointer that the |
| 209 | * caller gave to us. | 204 | * caller gave to us. |
| 210 | */ | 205 | */ |
| 211 | xfs_bmap_init(&free_list, &firstfsb); | 206 | xfs_bmap_init(&free_list, &firstfsb); |
| 212 | nimaps = 1; | 207 | nimaps = 1; |
| 213 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, | 208 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, |
| 214 | &firstfsb, 0, imap, &nimaps, &free_list); | 209 | XFS_BMAPI_PREALLOC, &firstfsb, 0, |
| 210 | imap, &nimaps, &free_list); | ||
| 215 | if (error) | 211 | if (error) |
| 216 | goto out_bmap_cancel; | 212 | goto out_bmap_cancel; |
| 217 | 213 | ||
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 9ddfb8190ca1..89b07e43ca28 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include "xfs_da_btree.h" | 39 | #include "xfs_da_btree.h" |
| 40 | #include "xfs_dir2_priv.h" | 40 | #include "xfs_dir2_priv.h" |
| 41 | #include "xfs_dinode.h" | 41 | #include "xfs_dinode.h" |
| 42 | #include "xfs_trans_space.h" | ||
| 42 | 43 | ||
| 43 | #include <linux/capability.h> | 44 | #include <linux/capability.h> |
| 44 | #include <linux/xattr.h> | 45 | #include <linux/xattr.h> |
| @@ -48,6 +49,18 @@ | |||
| 48 | #include <linux/fiemap.h> | 49 | #include <linux/fiemap.h> |
| 49 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
| 50 | 51 | ||
| 52 | /* | ||
| 53 | * Directories have different lock order w.r.t. mmap_sem compared to regular | ||
| 54 | * files. This is due to readdir potentially triggering page faults on a user | ||
| 55 | * buffer inside filldir(), and this happens with the ilock on the directory | ||
| 56 | * held. For regular files, the lock order is the other way around - the | ||
| 57 | * mmap_sem is taken during the page fault, and then we lock the ilock to do | ||
| 58 | * block mapping. Hence we need a different class for the directory ilock so | ||
| 59 | * that lockdep can tell them apart. | ||
| 60 | */ | ||
| 61 | static struct lock_class_key xfs_nondir_ilock_class; | ||
| 62 | static struct lock_class_key xfs_dir_ilock_class; | ||
| 63 | |||
| 51 | static int | 64 | static int |
| 52 | xfs_initxattrs( | 65 | xfs_initxattrs( |
| 53 | struct inode *inode, | 66 | struct inode *inode, |
| @@ -1034,6 +1047,19 @@ xfs_vn_fiemap( | |||
| 1034 | return 0; | 1047 | return 0; |
| 1035 | } | 1048 | } |
| 1036 | 1049 | ||
| 1050 | STATIC int | ||
| 1051 | xfs_vn_tmpfile( | ||
| 1052 | struct inode *dir, | ||
| 1053 | struct dentry *dentry, | ||
| 1054 | umode_t mode) | ||
| 1055 | { | ||
| 1056 | int error; | ||
| 1057 | |||
| 1058 | error = xfs_create_tmpfile(XFS_I(dir), dentry, mode); | ||
| 1059 | |||
| 1060 | return -error; | ||
| 1061 | } | ||
| 1062 | |||
| 1037 | static const struct inode_operations xfs_inode_operations = { | 1063 | static const struct inode_operations xfs_inode_operations = { |
| 1038 | .get_acl = xfs_get_acl, | 1064 | .get_acl = xfs_get_acl, |
| 1039 | .set_acl = xfs_set_acl, | 1065 | .set_acl = xfs_set_acl, |
| @@ -1072,6 +1098,7 @@ static const struct inode_operations xfs_dir_inode_operations = { | |||
| 1072 | .removexattr = generic_removexattr, | 1098 | .removexattr = generic_removexattr, |
| 1073 | .listxattr = xfs_vn_listxattr, | 1099 | .listxattr = xfs_vn_listxattr, |
| 1074 | .update_time = xfs_vn_update_time, | 1100 | .update_time = xfs_vn_update_time, |
| 1101 | .tmpfile = xfs_vn_tmpfile, | ||
| 1075 | }; | 1102 | }; |
| 1076 | 1103 | ||
| 1077 | static const struct inode_operations xfs_dir_ci_inode_operations = { | 1104 | static const struct inode_operations xfs_dir_ci_inode_operations = { |
| @@ -1099,6 +1126,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { | |||
| 1099 | .removexattr = generic_removexattr, | 1126 | .removexattr = generic_removexattr, |
| 1100 | .listxattr = xfs_vn_listxattr, | 1127 | .listxattr = xfs_vn_listxattr, |
| 1101 | .update_time = xfs_vn_update_time, | 1128 | .update_time = xfs_vn_update_time, |
| 1129 | .tmpfile = xfs_vn_tmpfile, | ||
| 1102 | }; | 1130 | }; |
| 1103 | 1131 | ||
| 1104 | static const struct inode_operations xfs_symlink_inode_operations = { | 1132 | static const struct inode_operations xfs_symlink_inode_operations = { |
| @@ -1191,6 +1219,7 @@ xfs_setup_inode( | |||
| 1191 | xfs_diflags_to_iflags(inode, ip); | 1219 | xfs_diflags_to_iflags(inode, ip); |
| 1192 | 1220 | ||
| 1193 | ip->d_ops = ip->i_mount->m_nondir_inode_ops; | 1221 | ip->d_ops = ip->i_mount->m_nondir_inode_ops; |
| 1222 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); | ||
| 1194 | switch (inode->i_mode & S_IFMT) { | 1223 | switch (inode->i_mode & S_IFMT) { |
| 1195 | case S_IFREG: | 1224 | case S_IFREG: |
| 1196 | inode->i_op = &xfs_inode_operations; | 1225 | inode->i_op = &xfs_inode_operations; |
| @@ -1198,6 +1227,7 @@ xfs_setup_inode( | |||
| 1198 | inode->i_mapping->a_ops = &xfs_address_space_operations; | 1227 | inode->i_mapping->a_ops = &xfs_address_space_operations; |
| 1199 | break; | 1228 | break; |
| 1200 | case S_IFDIR: | 1229 | case S_IFDIR: |
| 1230 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); | ||
| 1201 | if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) | 1231 | if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) |
| 1202 | inode->i_op = &xfs_dir_ci_inode_operations; | 1232 | inode->i_op = &xfs_dir_ci_inode_operations; |
| 1203 | else | 1233 | else |
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index f9bb590acc0e..825249d2dfc1 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
| @@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t; | |||
| 119 | #include "xfs_iops.h" | 119 | #include "xfs_iops.h" |
| 120 | #include "xfs_aops.h" | 120 | #include "xfs_aops.h" |
| 121 | #include "xfs_super.h" | 121 | #include "xfs_super.h" |
| 122 | #include "xfs_cksum.h" | ||
| 122 | #include "xfs_buf.h" | 123 | #include "xfs_buf.h" |
| 123 | #include "xfs_message.h" | 124 | #include "xfs_message.h" |
| 124 | 125 | ||
| @@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t; | |||
| 178 | #define ENOATTR ENODATA /* Attribute not found */ | 179 | #define ENOATTR ENODATA /* Attribute not found */ |
| 179 | #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ | 180 | #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ |
| 180 | #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ | 181 | #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ |
| 182 | #define EFSBADCRC EBADMSG /* Bad CRC detected */ | ||
| 181 | 183 | ||
| 182 | #define SYNCHRONIZE() barrier() | 184 | #define SYNCHRONIZE() barrier() |
| 183 | #define __return_address __builtin_return_address(0) | 185 | #define __return_address __builtin_return_address(0) |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index b0f4ef77fa70..2c4004475e71 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
| @@ -175,7 +175,7 @@ void xlog_iodone(struct xfs_buf *); | |||
| 175 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); | 175 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
| 176 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 176 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
| 177 | 177 | ||
| 178 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | 178 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
| 179 | xfs_lsn_t *commit_lsn, int flags); | 179 | xfs_lsn_t *commit_lsn, int flags); |
| 180 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | 180 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); |
| 181 | 181 | ||
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 4ef6fdbced78..7e5455391176 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
| @@ -499,13 +499,6 @@ xlog_cil_push( | |||
| 499 | cil->xc_ctx = new_ctx; | 499 | cil->xc_ctx = new_ctx; |
| 500 | 500 | ||
| 501 | /* | 501 | /* |
| 502 | * mirror the new sequence into the cil structure so that we can do | ||
| 503 | * unlocked checks against the current sequence in log forces without | ||
| 504 | * risking deferencing a freed context pointer. | ||
| 505 | */ | ||
| 506 | cil->xc_current_sequence = new_ctx->sequence; | ||
| 507 | |||
| 508 | /* | ||
| 509 | * The switch is now done, so we can drop the context lock and move out | 502 | * The switch is now done, so we can drop the context lock and move out |
| 510 | * of a shared context. We can't just go straight to the commit record, | 503 | * of a shared context. We can't just go straight to the commit record, |
| 511 | * though - we need to synchronise with previous and future commits so | 504 | * though - we need to synchronise with previous and future commits so |
| @@ -523,8 +516,15 @@ xlog_cil_push( | |||
| 523 | * Hence we need to add this context to the committing context list so | 516 | * Hence we need to add this context to the committing context list so |
| 524 | * that higher sequences will wait for us to write out a commit record | 517 | * that higher sequences will wait for us to write out a commit record |
| 525 | * before they do. | 518 | * before they do. |
| 519 | * | ||
| 520 | * xfs_log_force_lsn requires us to mirror the new sequence into the cil | ||
| 521 | * structure atomically with the addition of this sequence to the | ||
| 522 | * committing list. This also ensures that we can do unlocked checks | ||
| 523 | * against the current sequence in log forces without risking | ||
| 524 | * deferencing a freed context pointer. | ||
| 526 | */ | 525 | */ |
| 527 | spin_lock(&cil->xc_push_lock); | 526 | spin_lock(&cil->xc_push_lock); |
| 527 | cil->xc_current_sequence = new_ctx->sequence; | ||
| 528 | list_add(&ctx->committing, &cil->xc_committing); | 528 | list_add(&ctx->committing, &cil->xc_committing); |
| 529 | spin_unlock(&cil->xc_push_lock); | 529 | spin_unlock(&cil->xc_push_lock); |
| 530 | up_write(&cil->xc_ctx_lock); | 530 | up_write(&cil->xc_ctx_lock); |
| @@ -662,8 +662,14 @@ xlog_cil_push_background( | |||
| 662 | 662 | ||
| 663 | } | 663 | } |
| 664 | 664 | ||
| 665 | /* | ||
| 666 | * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence | ||
| 667 | * number that is passed. When it returns, the work will be queued for | ||
| 668 | * @push_seq, but it won't be completed. The caller is expected to do any | ||
| 669 | * waiting for push_seq to complete if it is required. | ||
| 670 | */ | ||
| 665 | static void | 671 | static void |
| 666 | xlog_cil_push_foreground( | 672 | xlog_cil_push_now( |
| 667 | struct xlog *log, | 673 | struct xlog *log, |
| 668 | xfs_lsn_t push_seq) | 674 | xfs_lsn_t push_seq) |
| 669 | { | 675 | { |
| @@ -688,10 +694,8 @@ xlog_cil_push_foreground( | |||
| 688 | } | 694 | } |
| 689 | 695 | ||
| 690 | cil->xc_push_seq = push_seq; | 696 | cil->xc_push_seq = push_seq; |
| 697 | queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); | ||
| 691 | spin_unlock(&cil->xc_push_lock); | 698 | spin_unlock(&cil->xc_push_lock); |
| 692 | |||
| 693 | /* do the push now */ | ||
| 694 | xlog_cil_push(log); | ||
| 695 | } | 699 | } |
| 696 | 700 | ||
| 697 | bool | 701 | bool |
| @@ -721,7 +725,7 @@ xlog_cil_empty( | |||
| 721 | * background commit, returns without it held once background commits are | 725 | * background commit, returns without it held once background commits are |
| 722 | * allowed again. | 726 | * allowed again. |
| 723 | */ | 727 | */ |
| 724 | int | 728 | void |
| 725 | xfs_log_commit_cil( | 729 | xfs_log_commit_cil( |
| 726 | struct xfs_mount *mp, | 730 | struct xfs_mount *mp, |
| 727 | struct xfs_trans *tp, | 731 | struct xfs_trans *tp, |
| @@ -767,7 +771,6 @@ xfs_log_commit_cil( | |||
| 767 | xlog_cil_push_background(log); | 771 | xlog_cil_push_background(log); |
| 768 | 772 | ||
| 769 | up_read(&cil->xc_ctx_lock); | 773 | up_read(&cil->xc_ctx_lock); |
| 770 | return 0; | ||
| 771 | } | 774 | } |
| 772 | 775 | ||
| 773 | /* | 776 | /* |
| @@ -796,7 +799,8 @@ xlog_cil_force_lsn( | |||
| 796 | * xlog_cil_push() handles racing pushes for the same sequence, | 799 | * xlog_cil_push() handles racing pushes for the same sequence, |
| 797 | * so no need to deal with it here. | 800 | * so no need to deal with it here. |
| 798 | */ | 801 | */ |
| 799 | xlog_cil_push_foreground(log, sequence); | 802 | restart: |
| 803 | xlog_cil_push_now(log, sequence); | ||
| 800 | 804 | ||
| 801 | /* | 805 | /* |
| 802 | * See if we can find a previous sequence still committing. | 806 | * See if we can find a previous sequence still committing. |
| @@ -804,7 +808,6 @@ xlog_cil_force_lsn( | |||
| 804 | * before allowing the force of push_seq to go ahead. Hence block | 808 | * before allowing the force of push_seq to go ahead. Hence block |
| 805 | * on commits for those as well. | 809 | * on commits for those as well. |
| 806 | */ | 810 | */ |
| 807 | restart: | ||
| 808 | spin_lock(&cil->xc_push_lock); | 811 | spin_lock(&cil->xc_push_lock); |
| 809 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 812 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
| 810 | if (ctx->sequence > sequence) | 813 | if (ctx->sequence > sequence) |
| @@ -822,6 +825,28 @@ restart: | |||
| 822 | /* found it! */ | 825 | /* found it! */ |
| 823 | commit_lsn = ctx->commit_lsn; | 826 | commit_lsn = ctx->commit_lsn; |
| 824 | } | 827 | } |
| 828 | |||
| 829 | /* | ||
| 830 | * The call to xlog_cil_push_now() executes the push in the background. | ||
| 831 | * Hence by the time we have got here it our sequence may not have been | ||
| 832 | * pushed yet. This is true if the current sequence still matches the | ||
| 833 | * push sequence after the above wait loop and the CIL still contains | ||
| 834 | * dirty objects. | ||
| 835 | * | ||
| 836 | * When the push occurs, it will empty the CIL and | ||
| 837 | * atomically increment the currect sequence past the push sequence and | ||
| 838 | * move it into the committing list. Of course, if the CIL is clean at | ||
| 839 | * the time of the push, it won't have pushed the CIL at all, so in that | ||
| 840 | * case we should try the push for this sequence again from the start | ||
| 841 | * just in case. | ||
| 842 | */ | ||
| 843 | |||
| 844 | if (sequence == cil->xc_current_sequence && | ||
| 845 | !list_empty(&cil->xc_cil)) { | ||
| 846 | spin_unlock(&cil->xc_push_lock); | ||
| 847 | goto restart; | ||
| 848 | } | ||
| 849 | |||
| 825 | spin_unlock(&cil->xc_push_lock); | 850 | spin_unlock(&cil->xc_push_lock); |
| 826 | return commit_lsn; | 851 | return commit_lsn; |
| 827 | } | 852 | } |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f96c05669a9e..993cb19e7d39 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -314,6 +314,9 @@ reread: | |||
| 314 | error = bp->b_error; | 314 | error = bp->b_error; |
| 315 | if (loud) | 315 | if (loud) |
| 316 | xfs_warn(mp, "SB validate failed with error %d.", error); | 316 | xfs_warn(mp, "SB validate failed with error %d.", error); |
| 317 | /* bad CRC means corrupted metadata */ | ||
| 318 | if (error == EFSBADCRC) | ||
| 319 | error = EFSCORRUPTED; | ||
| 317 | goto release_buf; | 320 | goto release_buf; |
| 318 | } | 321 | } |
| 319 | 322 | ||
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a6a76b2b6a85..ec5ca65c6211 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
| @@ -842,7 +842,7 @@ xfs_growfs_rt_alloc( | |||
| 842 | /* | 842 | /* |
| 843 | * Reserve space & log for one extent added to the file. | 843 | * Reserve space & log for one extent added to the file. |
| 844 | */ | 844 | */ |
| 845 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, | 845 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, |
| 846 | resblks, 0); | 846 | resblks, 0); |
| 847 | if (error) | 847 | if (error) |
| 848 | goto error_cancel; | 848 | goto error_cancel; |
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index 1e116794bb66..0c0e41bbe4e3 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c | |||
| @@ -288,6 +288,7 @@ xfs_mount_validate_sb( | |||
| 288 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || | 288 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || |
| 289 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || | 289 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || |
| 290 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || | 290 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || |
| 291 | sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || | ||
| 291 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || | 292 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || |
| 292 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || | 293 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || |
| 293 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || | 294 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || |
| @@ -610,12 +611,11 @@ xfs_sb_read_verify( | |||
| 610 | XFS_SB_VERSION_5) || | 611 | XFS_SB_VERSION_5) || |
| 611 | dsb->sb_crc != 0)) { | 612 | dsb->sb_crc != 0)) { |
| 612 | 613 | ||
| 613 | if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 614 | if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { |
| 614 | offsetof(struct xfs_sb, sb_crc))) { | ||
| 615 | /* Only fail bad secondaries on a known V5 filesystem */ | 615 | /* Only fail bad secondaries on a known V5 filesystem */ |
| 616 | if (bp->b_bn == XFS_SB_DADDR || | 616 | if (bp->b_bn == XFS_SB_DADDR || |
| 617 | xfs_sb_version_hascrc(&mp->m_sb)) { | 617 | xfs_sb_version_hascrc(&mp->m_sb)) { |
| 618 | error = EFSCORRUPTED; | 618 | error = EFSBADCRC; |
| 619 | goto out_error; | 619 | goto out_error; |
| 620 | } | 620 | } |
| 621 | } | 621 | } |
| @@ -624,10 +624,9 @@ xfs_sb_read_verify( | |||
| 624 | 624 | ||
| 625 | out_error: | 625 | out_error: |
| 626 | if (error) { | 626 | if (error) { |
| 627 | if (error == EFSCORRUPTED) | ||
| 628 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 629 | mp, bp->b_addr); | ||
| 630 | xfs_buf_ioerror(bp, error); | 627 | xfs_buf_ioerror(bp, error); |
| 628 | if (error == EFSCORRUPTED || error == EFSBADCRC) | ||
| 629 | xfs_verifier_error(bp); | ||
| 631 | } | 630 | } |
| 632 | } | 631 | } |
| 633 | 632 | ||
| @@ -662,9 +661,8 @@ xfs_sb_write_verify( | |||
| 662 | 661 | ||
| 663 | error = xfs_sb_verify(bp, false); | 662 | error = xfs_sb_verify(bp, false); |
| 664 | if (error) { | 663 | if (error) { |
| 665 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
| 666 | mp, bp->b_addr); | ||
| 667 | xfs_buf_ioerror(bp, error); | 664 | xfs_buf_ioerror(bp, error); |
| 665 | xfs_verifier_error(bp); | ||
| 668 | return; | 666 | return; |
| 669 | } | 667 | } |
| 670 | 668 | ||
| @@ -674,8 +672,7 @@ xfs_sb_write_verify( | |||
| 674 | if (bip) | 672 | if (bip) |
| 675 | XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 673 | XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 676 | 674 | ||
| 677 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 675 | xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); |
| 678 | offsetof(struct xfs_sb, sb_crc)); | ||
| 679 | } | 676 | } |
| 680 | 677 | ||
| 681 | const struct xfs_buf_ops xfs_sb_buf_ops = { | 678 | const struct xfs_buf_ops xfs_sb_buf_ops = { |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 35061d4b614c..f7b2fe77c5a5 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
| @@ -182,6 +182,8 @@ typedef struct xfs_sb { | |||
| 182 | /* must be padded to 64 bit alignment */ | 182 | /* must be padded to 64 bit alignment */ |
| 183 | } xfs_sb_t; | 183 | } xfs_sb_t; |
| 184 | 184 | ||
| 185 | #define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) | ||
| 186 | |||
| 185 | /* | 187 | /* |
| 186 | * Superblock - on disk version. Must match the in core version above. | 188 | * Superblock - on disk version. Must match the in core version above. |
| 187 | * Must be padded to 64 bit alignment. | 189 | * Must be padded to 64 bit alignment. |
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h index 8c5035a13df1..4484e5151395 100644 --- a/fs/xfs/xfs_shared.h +++ b/fs/xfs/xfs_shared.h | |||
| @@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
| 104 | #define XFS_TRANS_SB_COUNT 41 | 104 | #define XFS_TRANS_SB_COUNT 41 |
| 105 | #define XFS_TRANS_CHECKPOINT 42 | 105 | #define XFS_TRANS_CHECKPOINT 42 |
| 106 | #define XFS_TRANS_ICREATE 43 | 106 | #define XFS_TRANS_ICREATE 43 |
| 107 | #define XFS_TRANS_TYPE_MAX 43 | 107 | #define XFS_TRANS_CREATE_TMPFILE 44 |
| 108 | #define XFS_TRANS_TYPE_MAX 44 | ||
| 108 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 109 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
| 109 | 110 | ||
| 110 | #define XFS_TRANS_TYPES \ | 111 | #define XFS_TRANS_TYPES \ |
| @@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
| 112 | { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ | 113 | { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ |
| 113 | { XFS_TRANS_INACTIVE, "INACTIVE" }, \ | 114 | { XFS_TRANS_INACTIVE, "INACTIVE" }, \ |
| 114 | { XFS_TRANS_CREATE, "CREATE" }, \ | 115 | { XFS_TRANS_CREATE, "CREATE" }, \ |
| 116 | { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ | ||
| 115 | { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ | 117 | { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ |
| 116 | { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ | 118 | { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ |
| 117 | { XFS_TRANS_REMOVE, "REMOVE" }, \ | 119 | { XFS_TRANS_REMOVE, "REMOVE" }, \ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d971f4932b5d..205376776377 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
| @@ -996,7 +996,7 @@ xfs_fs_evict_inode( | |||
| 996 | 996 | ||
| 997 | trace_xfs_evict_inode(ip); | 997 | trace_xfs_evict_inode(ip); |
| 998 | 998 | ||
| 999 | truncate_inode_pages(&inode->i_data, 0); | 999 | truncate_inode_pages_final(&inode->i_data); |
| 1000 | clear_inode(inode); | 1000 | clear_inode(inode); |
| 1001 | XFS_STATS_INC(vn_rele); | 1001 | XFS_STATS_INC(vn_rele); |
| 1002 | XFS_STATS_INC(vn_remove); | 1002 | XFS_STATS_INC(vn_remove); |
| @@ -1197,6 +1197,7 @@ xfs_fs_remount( | |||
| 1197 | char *p; | 1197 | char *p; |
| 1198 | int error; | 1198 | int error; |
| 1199 | 1199 | ||
| 1200 | sync_filesystem(sb); | ||
| 1200 | while ((p = strsep(&options, ",")) != NULL) { | 1201 | while ((p = strsep(&options, ",")) != NULL) { |
| 1201 | int token; | 1202 | int token; |
| 1202 | 1203 | ||
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 14e58f2c96bd..52979aa90986 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
| @@ -80,6 +80,10 @@ xfs_readlink_bmap( | |||
| 80 | if (error) { | 80 | if (error) { |
| 81 | xfs_buf_ioerror_alert(bp, __func__); | 81 | xfs_buf_ioerror_alert(bp, __func__); |
| 82 | xfs_buf_relse(bp); | 82 | xfs_buf_relse(bp); |
| 83 | |||
| 84 | /* bad CRC means corrupted metadata */ | ||
| 85 | if (error == EFSBADCRC) | ||
| 86 | error = EFSCORRUPTED; | ||
| 83 | goto out; | 87 | goto out; |
| 84 | } | 88 | } |
| 85 | byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); | 89 | byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); |
| @@ -208,10 +212,7 @@ xfs_symlink( | |||
| 208 | return XFS_ERROR(ENAMETOOLONG); | 212 | return XFS_ERROR(ENAMETOOLONG); |
| 209 | 213 | ||
| 210 | udqp = gdqp = NULL; | 214 | udqp = gdqp = NULL; |
| 211 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 215 | prid = xfs_get_initial_prid(dp); |
| 212 | prid = xfs_get_projid(dp); | ||
| 213 | else | ||
| 214 | prid = XFS_PROJID_DEFAULT; | ||
| 215 | 216 | ||
| 216 | /* | 217 | /* |
| 217 | * Make sure that we have allocated dquot(s) on disk. | 218 | * Make sure that we have allocated dquot(s) on disk. |
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c index bf59a2b45f8c..9b32052ff65e 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/xfs_symlink_remote.c | |||
| @@ -133,12 +133,13 @@ xfs_symlink_read_verify( | |||
| 133 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 133 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
| 134 | return; | 134 | return; |
| 135 | 135 | ||
| 136 | if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 136 | if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) |
| 137 | offsetof(struct xfs_dsymlink_hdr, sl_crc)) || | 137 | xfs_buf_ioerror(bp, EFSBADCRC); |
| 138 | !xfs_symlink_verify(bp)) { | 138 | else if (!xfs_symlink_verify(bp)) |
| 139 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 140 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 139 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 141 | } | 140 | |
| 141 | if (bp->b_error) | ||
| 142 | xfs_verifier_error(bp); | ||
| 142 | } | 143 | } |
| 143 | 144 | ||
| 144 | static void | 145 | static void |
| @@ -153,8 +154,8 @@ xfs_symlink_write_verify( | |||
| 153 | return; | 154 | return; |
| 154 | 155 | ||
| 155 | if (!xfs_symlink_verify(bp)) { | 156 | if (!xfs_symlink_verify(bp)) { |
| 156 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
| 157 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 157 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
| 158 | xfs_verifier_error(bp); | ||
| 158 | return; | 159 | return; |
| 159 | } | 160 | } |
| 160 | 161 | ||
| @@ -162,8 +163,7 @@ xfs_symlink_write_verify( | |||
| 162 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; | 163 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; |
| 163 | dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 164 | dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
| 164 | } | 165 | } |
| 165 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 166 | xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); |
| 166 | offsetof(struct xfs_dsymlink_hdr, sl_crc)); | ||
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | const struct xfs_buf_ops xfs_symlink_buf_ops = { | 169 | const struct xfs_buf_ops xfs_symlink_buf_ops = { |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 425dfa45b9a0..a4ae41c179a8 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink); | |||
| 603 | DEFINE_INODE_EVENT(xfs_inactive_symlink); | 603 | DEFINE_INODE_EVENT(xfs_inactive_symlink); |
| 604 | DEFINE_INODE_EVENT(xfs_alloc_file_space); | 604 | DEFINE_INODE_EVENT(xfs_alloc_file_space); |
| 605 | DEFINE_INODE_EVENT(xfs_free_file_space); | 605 | DEFINE_INODE_EVENT(xfs_free_file_space); |
| 606 | DEFINE_INODE_EVENT(xfs_collapse_file_space); | ||
| 606 | DEFINE_INODE_EVENT(xfs_readdir); | 607 | DEFINE_INODE_EVENT(xfs_readdir); |
| 607 | #ifdef CONFIG_XFS_POSIX_ACL | 608 | #ifdef CONFIG_XFS_POSIX_ACL |
| 608 | DEFINE_INODE_EVENT(xfs_get_acl); | 609 | DEFINE_INODE_EVENT(xfs_get_acl); |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c812c5c060de..54a57326d85b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
| @@ -887,12 +887,7 @@ xfs_trans_commit( | |||
| 887 | xfs_trans_apply_sb_deltas(tp); | 887 | xfs_trans_apply_sb_deltas(tp); |
| 888 | xfs_trans_apply_dquot_deltas(tp); | 888 | xfs_trans_apply_dquot_deltas(tp); |
| 889 | 889 | ||
| 890 | error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); | 890 | xfs_log_commit_cil(mp, tp, &commit_lsn, flags); |
| 891 | if (error == ENOMEM) { | ||
| 892 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | ||
| 893 | error = XFS_ERROR(EIO); | ||
| 894 | goto out_unreserve; | ||
| 895 | } | ||
| 896 | 891 | ||
| 897 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 892 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
| 898 | xfs_trans_free(tp); | 893 | xfs_trans_free(tp); |
| @@ -902,10 +897,7 @@ xfs_trans_commit( | |||
| 902 | * log out now and wait for it. | 897 | * log out now and wait for it. |
| 903 | */ | 898 | */ |
| 904 | if (sync) { | 899 | if (sync) { |
| 905 | if (!error) { | 900 | error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); |
| 906 | error = _xfs_log_force_lsn(mp, commit_lsn, | ||
| 907 | XFS_LOG_SYNC, NULL); | ||
| 908 | } | ||
| 909 | XFS_STATS_INC(xs_trans_sync); | 901 | XFS_STATS_INC(xs_trans_sync); |
| 910 | } else { | 902 | } else { |
| 911 | XFS_STATS_INC(xs_trans_async); | 903 | XFS_STATS_INC(xs_trans_async); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 647b6f1d8923..b8eef0549f3f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
| @@ -275,6 +275,10 @@ xfs_trans_read_buf_map( | |||
| 275 | XFS_BUF_UNDONE(bp); | 275 | XFS_BUF_UNDONE(bp); |
| 276 | xfs_buf_stale(bp); | 276 | xfs_buf_stale(bp); |
| 277 | xfs_buf_relse(bp); | 277 | xfs_buf_relse(bp); |
| 278 | |||
| 279 | /* bad CRC means corrupted metadata */ | ||
| 280 | if (error == EFSBADCRC) | ||
| 281 | error = EFSCORRUPTED; | ||
| 278 | return error; | 282 | return error; |
| 279 | } | 283 | } |
| 280 | #ifdef DEBUG | 284 | #ifdef DEBUG |
| @@ -338,6 +342,9 @@ xfs_trans_read_buf_map( | |||
| 338 | if (tp->t_flags & XFS_TRANS_DIRTY) | 342 | if (tp->t_flags & XFS_TRANS_DIRTY) |
| 339 | xfs_force_shutdown(tp->t_mountp, | 343 | xfs_force_shutdown(tp->t_mountp, |
| 340 | SHUTDOWN_META_IO_ERROR); | 344 | SHUTDOWN_META_IO_ERROR); |
| 345 | /* bad CRC means corrupted metadata */ | ||
| 346 | if (error == EFSBADCRC) | ||
| 347 | error = EFSCORRUPTED; | ||
| 341 | return error; | 348 | return error; |
| 342 | } | 349 | } |
| 343 | } | 350 | } |
| @@ -375,6 +382,10 @@ xfs_trans_read_buf_map( | |||
| 375 | if (tp->t_flags & XFS_TRANS_DIRTY) | 382 | if (tp->t_flags & XFS_TRANS_DIRTY) |
| 376 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); | 383 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); |
| 377 | xfs_buf_relse(bp); | 384 | xfs_buf_relse(bp); |
| 385 | |||
| 386 | /* bad CRC means corrupted metadata */ | ||
| 387 | if (error == EFSBADCRC) | ||
| 388 | error = EFSCORRUPTED; | ||
| 378 | return error; | 389 | return error; |
| 379 | } | 390 | } |
| 380 | #ifdef DEBUG | 391 | #ifdef DEBUG |
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index 2ffd3e331b49..ae368165244d 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c | |||
| @@ -81,20 +81,28 @@ xfs_calc_buf_res( | |||
| 81 | * on disk. Hence we need an inode reservation function that calculates all this | 81 | * on disk. Hence we need an inode reservation function that calculates all this |
| 82 | * correctly. So, we log: | 82 | * correctly. So, we log: |
| 83 | * | 83 | * |
| 84 | * - log op headers for object | 84 | * - 4 log op headers for object |
| 85 | * - for the ilf, the inode core and 2 forks | ||
| 85 | * - inode log format object | 86 | * - inode log format object |
| 86 | * - the entire inode contents (core + 2 forks) | 87 | * - the inode core |
| 87 | * - two bmap btree block headers | 88 | * - two inode forks containing bmap btree root blocks. |
| 89 | * - the btree data contained by both forks will fit into the inode size, | ||
| 90 | * hence when combined with the inode core above, we have a total of the | ||
| 91 | * actual inode size. | ||
| 92 | * - the BMBT headers need to be accounted separately, as they are | ||
| 93 | * additional to the records and pointers that fit inside the inode | ||
| 94 | * forks. | ||
| 88 | */ | 95 | */ |
| 89 | STATIC uint | 96 | STATIC uint |
| 90 | xfs_calc_inode_res( | 97 | xfs_calc_inode_res( |
| 91 | struct xfs_mount *mp, | 98 | struct xfs_mount *mp, |
| 92 | uint ninodes) | 99 | uint ninodes) |
| 93 | { | 100 | { |
| 94 | return ninodes * (sizeof(struct xlog_op_header) + | 101 | return ninodes * |
| 95 | sizeof(struct xfs_inode_log_format) + | 102 | (4 * sizeof(struct xlog_op_header) + |
| 96 | mp->m_sb.sb_inodesize + | 103 | sizeof(struct xfs_inode_log_format) + |
| 97 | 2 * XFS_BMBT_BLOCK_LEN(mp)); | 104 | mp->m_sb.sb_inodesize + |
| 105 | 2 * XFS_BMBT_BLOCK_LEN(mp)); | ||
| 98 | } | 106 | } |
| 99 | 107 | ||
| 100 | /* | 108 | /* |
| @@ -204,6 +212,19 @@ xfs_calc_rename_reservation( | |||
| 204 | } | 212 | } |
| 205 | 213 | ||
| 206 | /* | 214 | /* |
| 215 | * For removing an inode from unlinked list at first, we can modify: | ||
| 216 | * the agi hash list and counters: sector size | ||
| 217 | * the on disk inode before ours in the agi hash list: inode cluster size | ||
| 218 | */ | ||
| 219 | STATIC uint | ||
| 220 | xfs_calc_iunlink_remove_reservation( | ||
| 221 | struct xfs_mount *mp) | ||
| 222 | { | ||
| 223 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
| 224 | max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 207 | * For creating a link to an inode: | 228 | * For creating a link to an inode: |
| 208 | * the parent directory inode: inode size | 229 | * the parent directory inode: inode size |
| 209 | * the linked inode: inode size | 230 | * the linked inode: inode size |
| @@ -220,6 +241,7 @@ xfs_calc_link_reservation( | |||
| 220 | struct xfs_mount *mp) | 241 | struct xfs_mount *mp) |
| 221 | { | 242 | { |
| 222 | return XFS_DQUOT_LOGRES(mp) + | 243 | return XFS_DQUOT_LOGRES(mp) + |
| 244 | xfs_calc_iunlink_remove_reservation(mp) + | ||
| 223 | MAX((xfs_calc_inode_res(mp, 2) + | 245 | MAX((xfs_calc_inode_res(mp, 2) + |
| 224 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | 246 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
| 225 | XFS_FSB_TO_B(mp, 1))), | 247 | XFS_FSB_TO_B(mp, 1))), |
| @@ -229,6 +251,18 @@ xfs_calc_link_reservation( | |||
| 229 | } | 251 | } |
| 230 | 252 | ||
| 231 | /* | 253 | /* |
| 254 | * For adding an inode to unlinked list we can modify: | ||
| 255 | * the agi hash list: sector size | ||
| 256 | * the unlinked inode: inode size | ||
| 257 | */ | ||
| 258 | STATIC uint | ||
| 259 | xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) | ||
| 260 | { | ||
| 261 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
| 262 | xfs_calc_inode_res(mp, 1); | ||
| 263 | } | ||
| 264 | |||
| 265 | /* | ||
| 232 | * For removing a directory entry we can modify: | 266 | * For removing a directory entry we can modify: |
| 233 | * the parent directory inode: inode size | 267 | * the parent directory inode: inode size |
| 234 | * the removed inode: inode size | 268 | * the removed inode: inode size |
| @@ -245,10 +279,11 @@ xfs_calc_remove_reservation( | |||
| 245 | struct xfs_mount *mp) | 279 | struct xfs_mount *mp) |
| 246 | { | 280 | { |
| 247 | return XFS_DQUOT_LOGRES(mp) + | 281 | return XFS_DQUOT_LOGRES(mp) + |
| 248 | MAX((xfs_calc_inode_res(mp, 2) + | 282 | xfs_calc_iunlink_add_reservation(mp) + |
| 283 | MAX((xfs_calc_inode_res(mp, 1) + | ||
| 249 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | 284 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
| 250 | XFS_FSB_TO_B(mp, 1))), | 285 | XFS_FSB_TO_B(mp, 1))), |
| 251 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + | 286 | (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + |
| 252 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), | 287 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), |
| 253 | XFS_FSB_TO_B(mp, 1)))); | 288 | XFS_FSB_TO_B(mp, 1)))); |
| 254 | } | 289 | } |
| @@ -343,6 +378,20 @@ xfs_calc_create_reservation( | |||
| 343 | 378 | ||
| 344 | } | 379 | } |
| 345 | 380 | ||
| 381 | STATIC uint | ||
| 382 | xfs_calc_create_tmpfile_reservation( | ||
| 383 | struct xfs_mount *mp) | ||
| 384 | { | ||
| 385 | uint res = XFS_DQUOT_LOGRES(mp); | ||
| 386 | |||
| 387 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
| 388 | res += xfs_calc_icreate_resv_alloc(mp); | ||
| 389 | else | ||
| 390 | res += xfs_calc_create_resv_alloc(mp); | ||
| 391 | |||
| 392 | return res + xfs_calc_iunlink_add_reservation(mp); | ||
| 393 | } | ||
| 394 | |||
| 346 | /* | 395 | /* |
| 347 | * Making a new directory is the same as creating a new file. | 396 | * Making a new directory is the same as creating a new file. |
| 348 | */ | 397 | */ |
| @@ -383,9 +432,9 @@ xfs_calc_ifree_reservation( | |||
| 383 | { | 432 | { |
| 384 | return XFS_DQUOT_LOGRES(mp) + | 433 | return XFS_DQUOT_LOGRES(mp) + |
| 385 | xfs_calc_inode_res(mp, 1) + | 434 | xfs_calc_inode_res(mp, 1) + |
| 386 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | 435 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
| 387 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + | 436 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + |
| 388 | max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) + | 437 | xfs_calc_iunlink_remove_reservation(mp) + |
| 389 | xfs_calc_buf_res(1, 0) + | 438 | xfs_calc_buf_res(1, 0) + |
| 390 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + | 439 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + |
| 391 | mp->m_in_maxlevels, 0) + | 440 | mp->m_in_maxlevels, 0) + |
| @@ -644,15 +693,14 @@ xfs_calc_qm_setqlim_reservation( | |||
| 644 | 693 | ||
| 645 | /* | 694 | /* |
| 646 | * Allocating quota on disk if needed. | 695 | * Allocating quota on disk if needed. |
| 647 | * the write transaction log space: M_RES(mp)->tr_write.tr_logres | 696 | * the write transaction log space for quota file extent allocation |
| 648 | * the unit of quota allocation: one system block size | 697 | * the unit of quota allocation: one system block size |
| 649 | */ | 698 | */ |
| 650 | STATIC uint | 699 | STATIC uint |
| 651 | xfs_calc_qm_dqalloc_reservation( | 700 | xfs_calc_qm_dqalloc_reservation( |
| 652 | struct xfs_mount *mp) | 701 | struct xfs_mount *mp) |
| 653 | { | 702 | { |
| 654 | ASSERT(M_RES(mp)->tr_write.tr_logres); | 703 | return xfs_calc_write_reservation(mp) + |
| 655 | return M_RES(mp)->tr_write.tr_logres + | ||
| 656 | xfs_calc_buf_res(1, | 704 | xfs_calc_buf_res(1, |
| 657 | XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); | 705 | XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); |
| 658 | } | 706 | } |
| @@ -729,6 +777,11 @@ xfs_trans_resv_calc( | |||
| 729 | resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; | 777 | resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; |
| 730 | resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | 778 | resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
| 731 | 779 | ||
| 780 | resp->tr_create_tmpfile.tr_logres = | ||
| 781 | xfs_calc_create_tmpfile_reservation(mp); | ||
| 782 | resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; | ||
| 783 | resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
| 784 | |||
| 732 | resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); | 785 | resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); |
| 733 | resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; | 786 | resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; |
| 734 | resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | 787 | resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
| @@ -784,7 +837,6 @@ xfs_trans_resv_calc( | |||
| 784 | /* The following transaction are logged in logical format */ | 837 | /* The following transaction are logged in logical format */ |
| 785 | resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); | 838 | resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); |
| 786 | resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); | 839 | resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); |
| 787 | resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp); | ||
| 788 | resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); | 840 | resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); |
| 789 | resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); | 841 | resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); |
| 790 | resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); | 842 | resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); |
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h index de7de9aaad8a..1097d14cd583 100644 --- a/fs/xfs/xfs_trans_resv.h +++ b/fs/xfs/xfs_trans_resv.h | |||
| @@ -38,11 +38,11 @@ struct xfs_trans_resv { | |||
| 38 | struct xfs_trans_res tr_remove; /* unlink trans */ | 38 | struct xfs_trans_res tr_remove; /* unlink trans */ |
| 39 | struct xfs_trans_res tr_symlink; /* symlink trans */ | 39 | struct xfs_trans_res tr_symlink; /* symlink trans */ |
| 40 | struct xfs_trans_res tr_create; /* create trans */ | 40 | struct xfs_trans_res tr_create; /* create trans */ |
| 41 | struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */ | ||
| 41 | struct xfs_trans_res tr_mkdir; /* mkdir trans */ | 42 | struct xfs_trans_res tr_mkdir; /* mkdir trans */ |
| 42 | struct xfs_trans_res tr_ifree; /* inode free trans */ | 43 | struct xfs_trans_res tr_ifree; /* inode free trans */ |
| 43 | struct xfs_trans_res tr_ichange; /* inode update trans */ | 44 | struct xfs_trans_res tr_ichange; /* inode update trans */ |
| 44 | struct xfs_trans_res tr_growdata; /* fs data section grow trans */ | 45 | struct xfs_trans_res tr_growdata; /* fs data section grow trans */ |
| 45 | struct xfs_trans_res tr_swrite; /* sync write inode trans */ | ||
| 46 | struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ | 46 | struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ |
| 47 | struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ | 47 | struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ |
| 48 | struct xfs_trans_res tr_attrinval; /* attr fork buffer | 48 | struct xfs_trans_res tr_attrinval; /* attr fork buffer |
| @@ -100,6 +100,7 @@ struct xfs_trans_resv { | |||
| 100 | #define XFS_ITRUNCATE_LOG_COUNT 2 | 100 | #define XFS_ITRUNCATE_LOG_COUNT 2 |
| 101 | #define XFS_INACTIVE_LOG_COUNT 2 | 101 | #define XFS_INACTIVE_LOG_COUNT 2 |
| 102 | #define XFS_CREATE_LOG_COUNT 2 | 102 | #define XFS_CREATE_LOG_COUNT 2 |
| 103 | #define XFS_CREATE_TMPFILE_LOG_COUNT 2 | ||
| 103 | #define XFS_MKDIR_LOG_COUNT 3 | 104 | #define XFS_MKDIR_LOG_COUNT 3 |
| 104 | #define XFS_SYMLINK_LOG_COUNT 3 | 105 | #define XFS_SYMLINK_LOG_COUNT 3 |
| 105 | #define XFS_REMOVE_LOG_COUNT 2 | 106 | #define XFS_REMOVE_LOG_COUNT 2 |
