diff options
Diffstat (limited to 'fs')
126 files changed, 3403 insertions, 3307 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 487236c65837..b6df18f1f677 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -440,14 +440,8 @@ config OCFS2_FS | |||
440 | Tools web page: http://oss.oracle.com/projects/ocfs2-tools | 440 | Tools web page: http://oss.oracle.com/projects/ocfs2-tools |
441 | OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ | 441 | OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ |
442 | 442 | ||
443 | Note: Features which OCFS2 does not support yet: | 443 | For more information on OCFS2, see the file |
444 | - extended attributes | 444 | <file:Documentation/filesystems/ocfs2.txt>. |
445 | - quotas | ||
446 | - cluster aware flock | ||
447 | - Directory change notification (F_NOTIFY) | ||
448 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | ||
449 | - POSIX ACLs | ||
450 | - readpages / writepages (not user visible) | ||
451 | 445 | ||
452 | config OCFS2_DEBUG_MASKLOG | 446 | config OCFS2_DEBUG_MASKLOG |
453 | bool "OCFS2 logging support" | 447 | bool "OCFS2 logging support" |
@@ -1028,8 +1022,8 @@ config HUGETLB_PAGE | |||
1028 | def_bool HUGETLBFS | 1022 | def_bool HUGETLBFS |
1029 | 1023 | ||
1030 | config CONFIGFS_FS | 1024 | config CONFIGFS_FS |
1031 | tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" | 1025 | tristate "Userspace-driven configuration filesystem" |
1032 | depends on SYSFS && EXPERIMENTAL | 1026 | depends on SYSFS |
1033 | help | 1027 | help |
1034 | configfs is a ram-based filesystem that provides the converse | 1028 | configfs is a ram-based filesystem that provides the converse |
1035 | of sysfs's functionality. Where sysfs is a filesystem-based | 1029 | of sysfs's functionality. Where sysfs is a filesystem-based |
@@ -1112,8 +1106,8 @@ config HFS_FS | |||
1112 | help | 1106 | help |
1113 | If you say Y here, you will be able to mount Macintosh-formatted | 1107 | If you say Y here, you will be able to mount Macintosh-formatted |
1114 | floppy disks and hard drive partitions with full read-write access. | 1108 | floppy disks and hard drive partitions with full read-write access. |
1115 | Please read <file:fs/hfs/HFS.txt> to learn about the available mount | 1109 | Please read <file:Documentation/filesystems/hfs.txt> to learn about |
1116 | options. | 1110 | the available mount options. |
1117 | 1111 | ||
1118 | To compile this file system support as a module, choose M here: the | 1112 | To compile this file system support as a module, choose M here: the |
1119 | module will be called hfs. | 1113 | module will be called hfs. |
@@ -2130,4 +2124,3 @@ source "fs/nls/Kconfig" | |||
2130 | source "fs/dlm/Kconfig" | 2124 | source "fs/dlm/Kconfig" |
2131 | 2125 | ||
2132 | endmenu | 2126 | endmenu |
2133 | |||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 993f78c55221..e48a630ae266 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -738,9 +738,9 @@ EXPORT_SYMBOL(bd_release); | |||
738 | static struct kobject *bdev_get_kobj(struct block_device *bdev) | 738 | static struct kobject *bdev_get_kobj(struct block_device *bdev) |
739 | { | 739 | { |
740 | if (bdev->bd_contains != bdev) | 740 | if (bdev->bd_contains != bdev) |
741 | return kobject_get(&bdev->bd_part->kobj); | 741 | return kobject_get(&bdev->bd_part->dev.kobj); |
742 | else | 742 | else |
743 | return kobject_get(&bdev->bd_disk->kobj); | 743 | return kobject_get(&bdev->bd_disk->dev.kobj); |
744 | } | 744 | } |
745 | 745 | ||
746 | static struct kobject *bdev_get_holder(struct block_device *bdev) | 746 | static struct kobject *bdev_get_holder(struct block_device *bdev) |
@@ -1176,7 +1176,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1176 | ret = -ENXIO; | 1176 | ret = -ENXIO; |
1177 | goto out_first; | 1177 | goto out_first; |
1178 | } | 1178 | } |
1179 | kobject_get(&p->kobj); | 1179 | kobject_get(&p->dev.kobj); |
1180 | bdev->bd_part = p; | 1180 | bdev->bd_part = p; |
1181 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | 1181 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); |
1182 | } | 1182 | } |
@@ -1299,7 +1299,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1299 | module_put(owner); | 1299 | module_put(owner); |
1300 | 1300 | ||
1301 | if (bdev->bd_contains != bdev) { | 1301 | if (bdev->bd_contains != bdev) { |
1302 | kobject_put(&bdev->bd_part->kobj); | 1302 | kobject_put(&bdev->bd_part->dev.kobj); |
1303 | bdev->bd_part = NULL; | 1303 | bdev->bd_part = NULL; |
1304 | } | 1304 | } |
1305 | bdev->bd_disk = NULL; | 1305 | bdev->bd_disk = NULL; |
diff --git a/fs/char_dev.c b/fs/char_dev.c index c3bfa76765c4..2c7a8b5b4598 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -510,9 +510,8 @@ struct cdev *cdev_alloc(void) | |||
510 | { | 510 | { |
511 | struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); | 511 | struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); |
512 | if (p) { | 512 | if (p) { |
513 | p->kobj.ktype = &ktype_cdev_dynamic; | ||
514 | INIT_LIST_HEAD(&p->list); | 513 | INIT_LIST_HEAD(&p->list); |
515 | kobject_init(&p->kobj); | 514 | kobject_init(&p->kobj, &ktype_cdev_dynamic); |
516 | } | 515 | } |
517 | return p; | 516 | return p; |
518 | } | 517 | } |
@@ -529,8 +528,7 @@ void cdev_init(struct cdev *cdev, const struct file_operations *fops) | |||
529 | { | 528 | { |
530 | memset(cdev, 0, sizeof *cdev); | 529 | memset(cdev, 0, sizeof *cdev); |
531 | INIT_LIST_HEAD(&cdev->list); | 530 | INIT_LIST_HEAD(&cdev->list); |
532 | cdev->kobj.ktype = &ktype_cdev_default; | 531 | kobject_init(&cdev->kobj, &ktype_cdev_default); |
533 | kobject_init(&cdev->kobj); | ||
534 | cdev->ops = fops; | 532 | cdev->ops = fops; |
535 | } | 533 | } |
536 | 534 | ||
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index dcc6aead70f5..e3eb3556622b 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -362,8 +362,8 @@ static int init_coda_psdev(void) | |||
362 | goto out_chrdev; | 362 | goto out_chrdev; |
363 | } | 363 | } |
364 | for (i = 0; i < MAX_CODADEVS; i++) | 364 | for (i = 0; i < MAX_CODADEVS; i++) |
365 | class_device_create(coda_psdev_class, NULL, | 365 | device_create(coda_psdev_class, NULL, |
366 | MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i); | 366 | MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i); |
367 | coda_sysctl_init(); | 367 | coda_sysctl_init(); |
368 | goto out; | 368 | goto out; |
369 | 369 | ||
@@ -405,7 +405,7 @@ static int __init init_coda(void) | |||
405 | return 0; | 405 | return 0; |
406 | out: | 406 | out: |
407 | for (i = 0; i < MAX_CODADEVS; i++) | 407 | for (i = 0; i < MAX_CODADEVS; i++) |
408 | class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); | 408 | device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); |
409 | class_destroy(coda_psdev_class); | 409 | class_destroy(coda_psdev_class); |
410 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); | 410 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); |
411 | coda_sysctl_clean(); | 411 | coda_sysctl_clean(); |
@@ -424,7 +424,7 @@ static void __exit exit_coda(void) | |||
424 | printk("coda: failed to unregister filesystem\n"); | 424 | printk("coda: failed to unregister filesystem\n"); |
425 | } | 425 | } |
426 | for (i = 0; i < MAX_CODADEVS; i++) | 426 | for (i = 0; i < MAX_CODADEVS; i++) |
427 | class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); | 427 | device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); |
428 | class_destroy(coda_psdev_class); | 428 | class_destroy(coda_psdev_class); |
429 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); | 429 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); |
430 | coda_sysctl_clean(); | 430 | coda_sysctl_clean(); |
diff --git a/fs/compat.c b/fs/compat.c index 15078ce4c04a..5216c3fd7517 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1104,10 +1104,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1104 | if (ret < 0) | 1104 | if (ret < 0) |
1105 | goto out; | 1105 | goto out; |
1106 | 1106 | ||
1107 | ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE); | ||
1108 | if (ret) | ||
1109 | goto out; | ||
1110 | |||
1111 | fnv = NULL; | 1107 | fnv = NULL; |
1112 | if (type == READ) { | 1108 | if (type == READ) { |
1113 | fn = file->f_op->read; | 1109 | fn = file->f_op->read; |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 50ed691098bc..a48dc7dd8765 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -546,7 +546,7 @@ static int populate_groups(struct config_group *group) | |||
546 | * That said, taking our i_mutex is closer to mkdir | 546 | * That said, taking our i_mutex is closer to mkdir |
547 | * emulation, and shouldn't hurt. | 547 | * emulation, and shouldn't hurt. |
548 | */ | 548 | */ |
549 | mutex_lock(&dentry->d_inode->i_mutex); | 549 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); |
550 | 550 | ||
551 | for (i = 0; group->default_groups[i]; i++) { | 551 | for (i = 0; group->default_groups[i]; i++) { |
552 | new_group = group->default_groups[i]; | 552 | new_group = group->default_groups[i]; |
@@ -1405,7 +1405,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) | |||
1405 | sd = configfs_sb->s_root->d_fsdata; | 1405 | sd = configfs_sb->s_root->d_fsdata; |
1406 | link_group(to_config_group(sd->s_element), group); | 1406 | link_group(to_config_group(sd->s_element), group); |
1407 | 1407 | ||
1408 | mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); | 1408 | mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, |
1409 | I_MUTEX_PARENT); | ||
1409 | 1410 | ||
1410 | name.name = group->cg_item.ci_name; | 1411 | name.name = group->cg_item.ci_name; |
1411 | name.len = strlen(name.name); | 1412 | name.len = strlen(name.name); |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index a3658f9a082c..397cb503a180 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -320,7 +320,7 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att | |||
320 | umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; | 320 | umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; |
321 | int error = 0; | 321 | int error = 0; |
322 | 322 | ||
323 | mutex_lock(&dir->d_inode->i_mutex); | 323 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL); |
324 | error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); | 324 | error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); |
325 | mutex_unlock(&dir->d_inode->i_mutex); | 325 | mutex_unlock(&dir->d_inode->i_mutex); |
326 | 326 | ||
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 3bf0278ea843..de3b31d0a37d 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c | |||
@@ -128,7 +128,7 @@ void configfs_release_fs(void) | |||
128 | } | 128 | } |
129 | 129 | ||
130 | 130 | ||
131 | static decl_subsys(config, NULL, NULL); | 131 | static struct kobject *config_kobj; |
132 | 132 | ||
133 | static int __init configfs_init(void) | 133 | static int __init configfs_init(void) |
134 | { | 134 | { |
@@ -140,9 +140,8 @@ static int __init configfs_init(void) | |||
140 | if (!configfs_dir_cachep) | 140 | if (!configfs_dir_cachep) |
141 | goto out; | 141 | goto out; |
142 | 142 | ||
143 | kobj_set_kset_s(&config_subsys, kernel_subsys); | 143 | config_kobj = kobject_create_and_add("config", kernel_kobj); |
144 | err = subsystem_register(&config_subsys); | 144 | if (!config_kobj) { |
145 | if (err) { | ||
146 | kmem_cache_destroy(configfs_dir_cachep); | 145 | kmem_cache_destroy(configfs_dir_cachep); |
147 | configfs_dir_cachep = NULL; | 146 | configfs_dir_cachep = NULL; |
148 | goto out; | 147 | goto out; |
@@ -151,7 +150,7 @@ static int __init configfs_init(void) | |||
151 | err = register_filesystem(&configfs_fs_type); | 150 | err = register_filesystem(&configfs_fs_type); |
152 | if (err) { | 151 | if (err) { |
153 | printk(KERN_ERR "configfs: Unable to register filesystem!\n"); | 152 | printk(KERN_ERR "configfs: Unable to register filesystem!\n"); |
154 | subsystem_unregister(&config_subsys); | 153 | kobject_put(config_kobj); |
155 | kmem_cache_destroy(configfs_dir_cachep); | 154 | kmem_cache_destroy(configfs_dir_cachep); |
156 | configfs_dir_cachep = NULL; | 155 | configfs_dir_cachep = NULL; |
157 | goto out; | 156 | goto out; |
@@ -160,7 +159,7 @@ static int __init configfs_init(void) | |||
160 | err = configfs_inode_init(); | 159 | err = configfs_inode_init(); |
161 | if (err) { | 160 | if (err) { |
162 | unregister_filesystem(&configfs_fs_type); | 161 | unregister_filesystem(&configfs_fs_type); |
163 | subsystem_unregister(&config_subsys); | 162 | kobject_put(config_kobj); |
164 | kmem_cache_destroy(configfs_dir_cachep); | 163 | kmem_cache_destroy(configfs_dir_cachep); |
165 | configfs_dir_cachep = NULL; | 164 | configfs_dir_cachep = NULL; |
166 | } | 165 | } |
@@ -171,7 +170,7 @@ out: | |||
171 | static void __exit configfs_exit(void) | 170 | static void __exit configfs_exit(void) |
172 | { | 171 | { |
173 | unregister_filesystem(&configfs_fs_type); | 172 | unregister_filesystem(&configfs_fs_type); |
174 | subsystem_unregister(&config_subsys); | 173 | kobject_put(config_kobj); |
175 | kmem_cache_destroy(configfs_dir_cachep); | 174 | kmem_cache_destroy(configfs_dir_cachep); |
176 | configfs_dir_cachep = NULL; | 175 | configfs_dir_cachep = NULL; |
177 | configfs_inode_exit(); | 176 | configfs_inode_exit(); |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 6a713b33992f..d26e2826ba5b 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -426,20 +426,19 @@ exit: | |||
426 | } | 426 | } |
427 | EXPORT_SYMBOL_GPL(debugfs_rename); | 427 | EXPORT_SYMBOL_GPL(debugfs_rename); |
428 | 428 | ||
429 | static decl_subsys(debug, NULL, NULL); | 429 | static struct kobject *debug_kobj; |
430 | 430 | ||
431 | static int __init debugfs_init(void) | 431 | static int __init debugfs_init(void) |
432 | { | 432 | { |
433 | int retval; | 433 | int retval; |
434 | 434 | ||
435 | kobj_set_kset_s(&debug_subsys, kernel_subsys); | 435 | debug_kobj = kobject_create_and_add("debug", kernel_kobj); |
436 | retval = subsystem_register(&debug_subsys); | 436 | if (!debug_kobj) |
437 | if (retval) | 437 | return -EINVAL; |
438 | return retval; | ||
439 | 438 | ||
440 | retval = register_filesystem(&debug_fs_type); | 439 | retval = register_filesystem(&debug_fs_type); |
441 | if (retval) | 440 | if (retval) |
442 | subsystem_unregister(&debug_subsys); | 441 | kobject_put(debug_kobj); |
443 | return retval; | 442 | return retval; |
444 | } | 443 | } |
445 | 444 | ||
@@ -447,7 +446,7 @@ static void __exit debugfs_exit(void) | |||
447 | { | 446 | { |
448 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | 447 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); |
449 | unregister_filesystem(&debug_fs_type); | 448 | unregister_filesystem(&debug_fs_type); |
450 | subsystem_unregister(&debug_subsys); | 449 | kobject_put(debug_kobj); |
451 | } | 450 | } |
452 | 451 | ||
453 | core_initcall(debugfs_init); | 452 | core_initcall(debugfs_init); |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 6353a8384520..5c108c49cb8c 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -166,26 +166,7 @@ static struct kobj_type dlm_ktype = { | |||
166 | .release = lockspace_kobj_release, | 166 | .release = lockspace_kobj_release, |
167 | }; | 167 | }; |
168 | 168 | ||
169 | static struct kset dlm_kset = { | 169 | static struct kset *dlm_kset; |
170 | .ktype = &dlm_ktype, | ||
171 | }; | ||
172 | |||
173 | static int kobject_setup(struct dlm_ls *ls) | ||
174 | { | ||
175 | char lsname[DLM_LOCKSPACE_LEN]; | ||
176 | int error; | ||
177 | |||
178 | memset(lsname, 0, DLM_LOCKSPACE_LEN); | ||
179 | snprintf(lsname, DLM_LOCKSPACE_LEN, "%s", ls->ls_name); | ||
180 | |||
181 | error = kobject_set_name(&ls->ls_kobj, "%s", lsname); | ||
182 | if (error) | ||
183 | return error; | ||
184 | |||
185 | ls->ls_kobj.kset = &dlm_kset; | ||
186 | ls->ls_kobj.ktype = &dlm_ktype; | ||
187 | return 0; | ||
188 | } | ||
189 | 170 | ||
190 | static int do_uevent(struct dlm_ls *ls, int in) | 171 | static int do_uevent(struct dlm_ls *ls, int in) |
191 | { | 172 | { |
@@ -220,24 +201,22 @@ static int do_uevent(struct dlm_ls *ls, int in) | |||
220 | 201 | ||
221 | int dlm_lockspace_init(void) | 202 | int dlm_lockspace_init(void) |
222 | { | 203 | { |
223 | int error; | ||
224 | |||
225 | ls_count = 0; | 204 | ls_count = 0; |
226 | mutex_init(&ls_lock); | 205 | mutex_init(&ls_lock); |
227 | INIT_LIST_HEAD(&lslist); | 206 | INIT_LIST_HEAD(&lslist); |
228 | spin_lock_init(&lslist_lock); | 207 | spin_lock_init(&lslist_lock); |
229 | 208 | ||
230 | kobject_set_name(&dlm_kset.kobj, "dlm"); | 209 | dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); |
231 | kobj_set_kset_s(&dlm_kset, kernel_subsys); | 210 | if (!dlm_kset) { |
232 | error = kset_register(&dlm_kset); | 211 | printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); |
233 | if (error) | 212 | return -ENOMEM; |
234 | printk("dlm_lockspace_init: cannot register kset %d\n", error); | 213 | } |
235 | return error; | 214 | return 0; |
236 | } | 215 | } |
237 | 216 | ||
238 | void dlm_lockspace_exit(void) | 217 | void dlm_lockspace_exit(void) |
239 | { | 218 | { |
240 | kset_unregister(&dlm_kset); | 219 | kset_unregister(dlm_kset); |
241 | } | 220 | } |
242 | 221 | ||
243 | static int dlm_scand(void *data) | 222 | static int dlm_scand(void *data) |
@@ -549,13 +528,12 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
549 | goto out_delist; | 528 | goto out_delist; |
550 | } | 529 | } |
551 | 530 | ||
552 | error = kobject_setup(ls); | 531 | ls->ls_kobj.kset = dlm_kset; |
553 | if (error) | 532 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, |
554 | goto out_stop; | 533 | "%s", ls->ls_name); |
555 | |||
556 | error = kobject_register(&ls->ls_kobj); | ||
557 | if (error) | 534 | if (error) |
558 | goto out_stop; | 535 | goto out_stop; |
536 | kobject_uevent(&ls->ls_kobj, KOBJ_ADD); | ||
559 | 537 | ||
560 | /* let kobject handle freeing of ls if there's an error */ | 538 | /* let kobject handle freeing of ls if there's an error */ |
561 | do_unreg = 1; | 539 | do_unreg = 1; |
@@ -601,7 +579,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
601 | kfree(ls->ls_rsbtbl); | 579 | kfree(ls->ls_rsbtbl); |
602 | out_lsfree: | 580 | out_lsfree: |
603 | if (do_unreg) | 581 | if (do_unreg) |
604 | kobject_unregister(&ls->ls_kobj); | 582 | kobject_put(&ls->ls_kobj); |
605 | else | 583 | else |
606 | kfree(ls); | 584 | kfree(ls); |
607 | out: | 585 | out: |
@@ -750,7 +728,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
750 | dlm_clear_members(ls); | 728 | dlm_clear_members(ls); |
751 | dlm_clear_members_gone(ls); | 729 | dlm_clear_members_gone(ls); |
752 | kfree(ls->ls_node_array); | 730 | kfree(ls->ls_node_array); |
753 | kobject_unregister(&ls->ls_kobj); | 731 | kobject_put(&ls->ls_kobj); |
754 | /* The ls structure will be freed when the kobject is done with */ | 732 | /* The ls structure will be freed when the kobject is done with */ |
755 | 733 | ||
756 | mutex_lock(&ls_lock); | 734 | mutex_lock(&ls_lock); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index e5580bcb923a..0249aa4ae181 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -734,127 +734,40 @@ static int ecryptfs_init_kmem_caches(void) | |||
734 | return 0; | 734 | return 0; |
735 | } | 735 | } |
736 | 736 | ||
737 | struct ecryptfs_obj { | 737 | static struct kobject *ecryptfs_kobj; |
738 | char *name; | ||
739 | struct list_head slot_list; | ||
740 | struct kobject kobj; | ||
741 | }; | ||
742 | |||
743 | struct ecryptfs_attribute { | ||
744 | struct attribute attr; | ||
745 | ssize_t(*show) (struct ecryptfs_obj *, char *); | ||
746 | ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t); | ||
747 | }; | ||
748 | 738 | ||
749 | static ssize_t | 739 | static ssize_t version_show(struct kobject *kobj, |
750 | ecryptfs_attr_store(struct kobject *kobj, | 740 | struct kobj_attribute *attr, char *buff) |
751 | struct attribute *attr, const char *buf, size_t len) | ||
752 | { | 741 | { |
753 | struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, | 742 | return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); |
754 | kobj); | ||
755 | struct ecryptfs_attribute *attribute = | ||
756 | container_of(attr, struct ecryptfs_attribute, attr); | ||
757 | |||
758 | return (attribute->store ? attribute->store(obj, buf, len) : 0); | ||
759 | } | 743 | } |
760 | 744 | ||
761 | static ssize_t | 745 | static struct kobj_attribute version_attr = __ATTR_RO(version); |
762 | ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) | ||
763 | { | ||
764 | struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, | ||
765 | kobj); | ||
766 | struct ecryptfs_attribute *attribute = | ||
767 | container_of(attr, struct ecryptfs_attribute, attr); | ||
768 | |||
769 | return (attribute->show ? attribute->show(obj, buf) : 0); | ||
770 | } | ||
771 | 746 | ||
772 | static struct sysfs_ops ecryptfs_sysfs_ops = { | 747 | static struct attribute *attributes[] = { |
773 | .show = ecryptfs_attr_show, | 748 | &version_attr.attr, |
774 | .store = ecryptfs_attr_store | 749 | NULL, |
775 | }; | 750 | }; |
776 | 751 | ||
777 | static struct kobj_type ecryptfs_ktype = { | 752 | static struct attribute_group attr_group = { |
778 | .sysfs_ops = &ecryptfs_sysfs_ops | 753 | .attrs = attributes, |
779 | }; | 754 | }; |
780 | 755 | ||
781 | static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL); | ||
782 | |||
783 | static ssize_t version_show(struct ecryptfs_obj *obj, char *buff) | ||
784 | { | ||
785 | return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); | ||
786 | } | ||
787 | |||
788 | static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version); | ||
789 | |||
790 | static struct ecryptfs_version_str_map_elem { | ||
791 | u32 flag; | ||
792 | char *str; | ||
793 | } ecryptfs_version_str_map[] = { | ||
794 | {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"}, | ||
795 | {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, | ||
796 | {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, | ||
797 | {ECRYPTFS_VERSIONING_POLICY, "policy"}, | ||
798 | {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"}, | ||
799 | {ECRYPTFS_VERSIONING_MULTKEY, "multiple keys per file"} | ||
800 | }; | ||
801 | |||
802 | static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) | ||
803 | { | ||
804 | int i; | ||
805 | int remaining = PAGE_SIZE; | ||
806 | int total_written = 0; | ||
807 | |||
808 | buff[0] = '\0'; | ||
809 | for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) { | ||
810 | int entry_size; | ||
811 | |||
812 | if (!(ECRYPTFS_VERSIONING_MASK | ||
813 | & ecryptfs_version_str_map[i].flag)) | ||
814 | continue; | ||
815 | entry_size = strlen(ecryptfs_version_str_map[i].str); | ||
816 | if ((entry_size + 2) > remaining) | ||
817 | goto out; | ||
818 | memcpy(buff, ecryptfs_version_str_map[i].str, entry_size); | ||
819 | buff[entry_size++] = '\n'; | ||
820 | buff[entry_size] = '\0'; | ||
821 | buff += entry_size; | ||
822 | total_written += entry_size; | ||
823 | remaining -= entry_size; | ||
824 | } | ||
825 | out: | ||
826 | return total_written; | ||
827 | } | ||
828 | |||
829 | static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str); | ||
830 | |||
831 | static int do_sysfs_registration(void) | 756 | static int do_sysfs_registration(void) |
832 | { | 757 | { |
833 | int rc; | 758 | int rc; |
834 | 759 | ||
835 | rc = subsystem_register(&ecryptfs_subsys); | 760 | ecryptfs_kobj = kobject_create_and_add("ecryptfs", fs_kobj); |
836 | if (rc) { | 761 | if (!ecryptfs_kobj) { |
837 | printk(KERN_ERR | 762 | printk(KERN_ERR "Unable to create ecryptfs kset\n"); |
838 | "Unable to register ecryptfs sysfs subsystem\n"); | 763 | rc = -ENOMEM; |
839 | goto out; | ||
840 | } | ||
841 | rc = sysfs_create_file(&ecryptfs_subsys.kobj, | ||
842 | &sysfs_attr_version.attr); | ||
843 | if (rc) { | ||
844 | printk(KERN_ERR | ||
845 | "Unable to create ecryptfs version attribute\n"); | ||
846 | subsystem_unregister(&ecryptfs_subsys); | ||
847 | goto out; | 764 | goto out; |
848 | } | 765 | } |
849 | rc = sysfs_create_file(&ecryptfs_subsys.kobj, | 766 | rc = sysfs_create_group(ecryptfs_kobj, &attr_group); |
850 | &sysfs_attr_version_str.attr); | ||
851 | if (rc) { | 767 | if (rc) { |
852 | printk(KERN_ERR | 768 | printk(KERN_ERR |
853 | "Unable to create ecryptfs version_str attribute\n"); | 769 | "Unable to create ecryptfs version attributes\n"); |
854 | sysfs_remove_file(&ecryptfs_subsys.kobj, | 770 | kobject_put(ecryptfs_kobj); |
855 | &sysfs_attr_version.attr); | ||
856 | subsystem_unregister(&ecryptfs_subsys); | ||
857 | goto out; | ||
858 | } | 771 | } |
859 | out: | 772 | out: |
860 | return rc; | 773 | return rc; |
@@ -862,11 +775,8 @@ out: | |||
862 | 775 | ||
863 | static void do_sysfs_unregistration(void) | 776 | static void do_sysfs_unregistration(void) |
864 | { | 777 | { |
865 | sysfs_remove_file(&ecryptfs_subsys.kobj, | 778 | sysfs_remove_group(ecryptfs_kobj, &attr_group); |
866 | &sysfs_attr_version.attr); | 779 | kobject_put(ecryptfs_kobj); |
867 | sysfs_remove_file(&ecryptfs_subsys.kobj, | ||
868 | &sysfs_attr_version_str.attr); | ||
869 | subsystem_unregister(&ecryptfs_subsys); | ||
870 | } | 780 | } |
871 | 781 | ||
872 | static int __init ecryptfs_init(void) | 782 | static int __init ecryptfs_init(void) |
@@ -894,7 +804,6 @@ static int __init ecryptfs_init(void) | |||
894 | printk(KERN_ERR "Failed to register filesystem\n"); | 804 | printk(KERN_ERR "Failed to register filesystem\n"); |
895 | goto out_free_kmem_caches; | 805 | goto out_free_kmem_caches; |
896 | } | 806 | } |
897 | kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); | ||
898 | rc = do_sysfs_registration(); | 807 | rc = do_sysfs_registration(); |
899 | if (rc) { | 808 | if (rc) { |
900 | printk(KERN_ERR "sysfs registration failed\n"); | 809 | printk(KERN_ERR "sysfs registration failed\n"); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0fca82021d76..300324bd563c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -482,8 +482,6 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
482 | if (wbc->nr_to_write <= 0) | 482 | if (wbc->nr_to_write <= 0) |
483 | break; | 483 | break; |
484 | } | 484 | } |
485 | if (!list_empty(&sb->s_more_io)) | ||
486 | wbc->more_io = 1; | ||
487 | return; /* Leave any unwritten inodes on s_io */ | 485 | return; /* Leave any unwritten inodes on s_io */ |
488 | } | 486 | } |
489 | 487 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 84f9f7dfdf5b..e5e80d1a4687 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -744,9 +744,6 @@ static inline void unregister_fuseblk(void) | |||
744 | } | 744 | } |
745 | #endif | 745 | #endif |
746 | 746 | ||
747 | static decl_subsys(fuse, NULL, NULL); | ||
748 | static decl_subsys(connections, NULL, NULL); | ||
749 | |||
750 | static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) | 747 | static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) |
751 | { | 748 | { |
752 | struct inode * inode = foo; | 749 | struct inode * inode = foo; |
@@ -791,32 +788,37 @@ static void fuse_fs_cleanup(void) | |||
791 | kmem_cache_destroy(fuse_inode_cachep); | 788 | kmem_cache_destroy(fuse_inode_cachep); |
792 | } | 789 | } |
793 | 790 | ||
791 | static struct kobject *fuse_kobj; | ||
792 | static struct kobject *connections_kobj; | ||
793 | |||
794 | static int fuse_sysfs_init(void) | 794 | static int fuse_sysfs_init(void) |
795 | { | 795 | { |
796 | int err; | 796 | int err; |
797 | 797 | ||
798 | kobj_set_kset_s(&fuse_subsys, fs_subsys); | 798 | fuse_kobj = kobject_create_and_add("fuse", fs_kobj); |
799 | err = subsystem_register(&fuse_subsys); | 799 | if (!fuse_kobj) { |
800 | if (err) | 800 | err = -ENOMEM; |
801 | goto out_err; | 801 | goto out_err; |
802 | } | ||
802 | 803 | ||
803 | kobj_set_kset_s(&connections_subsys, fuse_subsys); | 804 | connections_kobj = kobject_create_and_add("connections", fuse_kobj); |
804 | err = subsystem_register(&connections_subsys); | 805 | if (!connections_kobj) { |
805 | if (err) | 806 | err = -ENOMEM; |
806 | goto out_fuse_unregister; | 807 | goto out_fuse_unregister; |
808 | } | ||
807 | 809 | ||
808 | return 0; | 810 | return 0; |
809 | 811 | ||
810 | out_fuse_unregister: | 812 | out_fuse_unregister: |
811 | subsystem_unregister(&fuse_subsys); | 813 | kobject_put(fuse_kobj); |
812 | out_err: | 814 | out_err: |
813 | return err; | 815 | return err; |
814 | } | 816 | } |
815 | 817 | ||
816 | static void fuse_sysfs_cleanup(void) | 818 | static void fuse_sysfs_cleanup(void) |
817 | { | 819 | { |
818 | subsystem_unregister(&connections_subsys); | 820 | kobject_put(connections_kobj); |
819 | subsystem_unregister(&fuse_subsys); | 821 | kobject_put(fuse_kobj); |
820 | } | 822 | } |
821 | 823 | ||
822 | static int __init fuse_init(void) | 824 | static int __init fuse_init(void) |
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 04ad0caebedb..8fff11058cee 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
@@ -2,7 +2,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o | |||
2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ | 2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ |
3 | glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ | 3 | glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ |
4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ | 4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ |
5 | ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ | 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ |
6 | recovery.o rgrp.o super.o sys.o trans.o util.o | 6 | recovery.o rgrp.o super.o sys.o trans.o util.o |
7 | 7 | ||
8 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ | 8 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 93fa427bb5f5..e4effc47abfc 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -59,7 +59,6 @@ struct strip_mine { | |||
59 | static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | 59 | static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, |
60 | u64 block, struct page *page) | 60 | u64 block, struct page *page) |
61 | { | 61 | { |
62 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
63 | struct inode *inode = &ip->i_inode; | 62 | struct inode *inode = &ip->i_inode; |
64 | struct buffer_head *bh; | 63 | struct buffer_head *bh; |
65 | int release = 0; | 64 | int release = 0; |
@@ -95,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
95 | set_buffer_uptodate(bh); | 94 | set_buffer_uptodate(bh); |
96 | if (!gfs2_is_jdata(ip)) | 95 | if (!gfs2_is_jdata(ip)) |
97 | mark_buffer_dirty(bh); | 96 | mark_buffer_dirty(bh); |
98 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | 97 | if (!gfs2_is_writeback(ip)) |
99 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 98 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
100 | 99 | ||
101 | if (release) { | 100 | if (release) { |
@@ -453,8 +452,8 @@ static inline void bmap_unlock(struct inode *inode, int create) | |||
453 | * Returns: errno | 452 | * Returns: errno |
454 | */ | 453 | */ |
455 | 454 | ||
456 | int gfs2_block_map(struct inode *inode, u64 lblock, int create, | 455 | int gfs2_block_map(struct inode *inode, sector_t lblock, |
457 | struct buffer_head *bh_map) | 456 | struct buffer_head *bh_map, int create) |
458 | { | 457 | { |
459 | struct gfs2_inode *ip = GFS2_I(inode); | 458 | struct gfs2_inode *ip = GFS2_I(inode); |
460 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 459 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -470,6 +469,7 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create, | |||
470 | unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; | 469 | unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; |
471 | struct metapath mp; | 470 | struct metapath mp; |
472 | u64 size; | 471 | u64 size; |
472 | struct buffer_head *dibh = NULL; | ||
473 | 473 | ||
474 | BUG_ON(maxlen == 0); | 474 | BUG_ON(maxlen == 0); |
475 | 475 | ||
@@ -500,6 +500,8 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create, | |||
500 | error = gfs2_meta_inode_buffer(ip, &bh); | 500 | error = gfs2_meta_inode_buffer(ip, &bh); |
501 | if (error) | 501 | if (error) |
502 | goto out_fail; | 502 | goto out_fail; |
503 | dibh = bh; | ||
504 | get_bh(dibh); | ||
503 | 505 | ||
504 | for (x = 0; x < end_of_metadata; x++) { | 506 | for (x = 0; x < end_of_metadata; x++) { |
505 | lookup_block(ip, bh, x, &mp, create, &new, &dblock); | 507 | lookup_block(ip, bh, x, &mp, create, &new, &dblock); |
@@ -518,13 +520,8 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create, | |||
518 | if (boundary) | 520 | if (boundary) |
519 | set_buffer_boundary(bh_map); | 521 | set_buffer_boundary(bh_map); |
520 | if (new) { | 522 | if (new) { |
521 | struct buffer_head *dibh; | 523 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
522 | error = gfs2_meta_inode_buffer(ip, &dibh); | 524 | gfs2_dinode_out(ip, dibh->b_data); |
523 | if (!error) { | ||
524 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
525 | gfs2_dinode_out(ip, dibh->b_data); | ||
526 | brelse(dibh); | ||
527 | } | ||
528 | set_buffer_new(bh_map); | 525 | set_buffer_new(bh_map); |
529 | goto out_brelse; | 526 | goto out_brelse; |
530 | } | 527 | } |
@@ -545,6 +542,8 @@ out_brelse: | |||
545 | out_ok: | 542 | out_ok: |
546 | error = 0; | 543 | error = 0; |
547 | out_fail: | 544 | out_fail: |
545 | if (dibh) | ||
546 | brelse(dibh); | ||
548 | bmap_unlock(inode, create); | 547 | bmap_unlock(inode, create); |
549 | return error; | 548 | return error; |
550 | } | 549 | } |
@@ -560,7 +559,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi | |||
560 | BUG_ON(!new); | 559 | BUG_ON(!new); |
561 | 560 | ||
562 | bh.b_size = 1 << (inode->i_blkbits + 5); | 561 | bh.b_size = 1 << (inode->i_blkbits + 5); |
563 | ret = gfs2_block_map(inode, lblock, create, &bh); | 562 | ret = gfs2_block_map(inode, lblock, &bh, create); |
564 | *extlen = bh.b_size >> inode->i_blkbits; | 563 | *extlen = bh.b_size >> inode->i_blkbits; |
565 | *dblock = bh.b_blocknr; | 564 | *dblock = bh.b_blocknr; |
566 | if (buffer_new(&bh)) | 565 | if (buffer_new(&bh)) |
@@ -684,7 +683,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
684 | if (metadata) | 683 | if (metadata) |
685 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; | 684 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; |
686 | 685 | ||
687 | error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh); | 686 | error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); |
688 | if (error) | 687 | if (error) |
689 | return error; | 688 | return error; |
690 | 689 | ||
@@ -786,7 +785,7 @@ out_rg_gunlock: | |||
786 | out_rlist: | 785 | out_rlist: |
787 | gfs2_rlist_free(&rlist); | 786 | gfs2_rlist_free(&rlist); |
788 | out: | 787 | out: |
789 | gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh); | 788 | gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); |
790 | return error; | 789 | return error; |
791 | } | 790 | } |
792 | 791 | ||
@@ -879,7 +878,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
879 | { | 878 | { |
880 | struct inode *inode = mapping->host; | 879 | struct inode *inode = mapping->host; |
881 | struct gfs2_inode *ip = GFS2_I(inode); | 880 | struct gfs2_inode *ip = GFS2_I(inode); |
882 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
883 | loff_t from = inode->i_size; | 881 | loff_t from = inode->i_size; |
884 | unsigned long index = from >> PAGE_CACHE_SHIFT; | 882 | unsigned long index = from >> PAGE_CACHE_SHIFT; |
885 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 883 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
@@ -911,7 +909,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
911 | err = 0; | 909 | err = 0; |
912 | 910 | ||
913 | if (!buffer_mapped(bh)) { | 911 | if (!buffer_mapped(bh)) { |
914 | gfs2_get_block(inode, iblock, bh, 0); | 912 | gfs2_block_map(inode, iblock, bh, 0); |
915 | /* unmapped? It's a hole - nothing to do */ | 913 | /* unmapped? It's a hole - nothing to do */ |
916 | if (!buffer_mapped(bh)) | 914 | if (!buffer_mapped(bh)) |
917 | goto unlock; | 915 | goto unlock; |
@@ -931,7 +929,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
931 | err = 0; | 929 | err = 0; |
932 | } | 930 | } |
933 | 931 | ||
934 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | 932 | if (!gfs2_is_writeback(ip)) |
935 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 933 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
936 | 934 | ||
937 | zero_user_page(page, offset, length, KM_USER0); | 935 | zero_user_page(page, offset, length, KM_USER0); |
@@ -1224,8 +1222,13 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
1224 | do_div(lblock_stop, bsize); | 1222 | do_div(lblock_stop, bsize); |
1225 | } else { | 1223 | } else { |
1226 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; | 1224 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; |
1225 | u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; | ||
1227 | lblock = offset >> shift; | 1226 | lblock = offset >> shift; |
1228 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1227 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
1228 | if (lblock_stop > end_of_file) { | ||
1229 | *alloc_required = 1; | ||
1230 | return 0; | ||
1231 | } | ||
1229 | } | 1232 | } |
1230 | 1233 | ||
1231 | for (; lblock < lblock_stop; lblock += extlen) { | 1234 | for (; lblock < lblock_stop; lblock += extlen) { |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index ac2fd04370dc..4e6cde2943bd 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
@@ -15,7 +15,7 @@ struct gfs2_inode; | |||
15 | struct page; | 15 | struct page; |
16 | 16 | ||
17 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); | 17 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); |
18 | int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh); | 18 | int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); |
19 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); | 19 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); |
20 | 20 | ||
21 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); | 21 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); |
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index 3731ab0771d5..e51991947d2c 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c | |||
@@ -83,56 +83,6 @@ int gfs2_recoverd(void *data) | |||
83 | } | 83 | } |
84 | 84 | ||
85 | /** | 85 | /** |
86 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks | ||
87 | * @sdp: Pointer to GFS2 superblock | ||
88 | * | ||
89 | * Also, periodically check to make sure that we're using the most recent | ||
90 | * journal index. | ||
91 | */ | ||
92 | |||
93 | int gfs2_logd(void *data) | ||
94 | { | ||
95 | struct gfs2_sbd *sdp = data; | ||
96 | struct gfs2_holder ji_gh; | ||
97 | unsigned long t; | ||
98 | int need_flush; | ||
99 | |||
100 | while (!kthread_should_stop()) { | ||
101 | /* Advance the log tail */ | ||
102 | |||
103 | t = sdp->sd_log_flush_time + | ||
104 | gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; | ||
105 | |||
106 | gfs2_ail1_empty(sdp, DIO_ALL); | ||
107 | gfs2_log_lock(sdp); | ||
108 | need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); | ||
109 | gfs2_log_unlock(sdp); | ||
110 | if (need_flush || time_after_eq(jiffies, t)) { | ||
111 | gfs2_log_flush(sdp, NULL); | ||
112 | sdp->sd_log_flush_time = jiffies; | ||
113 | } | ||
114 | |||
115 | /* Check for latest journal index */ | ||
116 | |||
117 | t = sdp->sd_jindex_refresh_time + | ||
118 | gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ; | ||
119 | |||
120 | if (time_after_eq(jiffies, t)) { | ||
121 | if (!gfs2_jindex_hold(sdp, &ji_gh)) | ||
122 | gfs2_glock_dq_uninit(&ji_gh); | ||
123 | sdp->sd_jindex_refresh_time = jiffies; | ||
124 | } | ||
125 | |||
126 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; | ||
127 | if (freezing(current)) | ||
128 | refrigerator(); | ||
129 | schedule_timeout_interruptible(t); | ||
130 | } | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * gfs2_quotad - Write cached quota changes into the quota file | 86 | * gfs2_quotad - Write cached quota changes into the quota file |
137 | * @sdp: Pointer to GFS2 superblock | 87 | * @sdp: Pointer to GFS2 superblock |
138 | * | 88 | * |
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h index 0de9b3557955..4be084fb6a62 100644 --- a/fs/gfs2/daemon.h +++ b/fs/gfs2/daemon.h | |||
@@ -12,7 +12,6 @@ | |||
12 | 12 | ||
13 | int gfs2_glockd(void *data); | 13 | int gfs2_glockd(void *data); |
14 | int gfs2_recoverd(void *data); | 14 | int gfs2_recoverd(void *data); |
15 | int gfs2_logd(void *data); | ||
16 | int gfs2_quotad(void *data); | 15 | int gfs2_quotad(void *data); |
17 | 16 | ||
18 | #endif /* __DAEMON_DOT_H__ */ | 17 | #endif /* __DAEMON_DOT_H__ */ |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 9949bb746a52..57e2ed932adc 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -1876,7 +1876,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1876 | if (error) | 1876 | if (error) |
1877 | goto out; | 1877 | goto out; |
1878 | 1878 | ||
1879 | error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh); | 1879 | error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); |
1880 | if (error) | 1880 | if (error) |
1881 | goto out_qs; | 1881 | goto out_qs; |
1882 | 1882 | ||
@@ -1949,7 +1949,7 @@ out_rg_gunlock: | |||
1949 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); | 1949 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); |
1950 | out_rlist: | 1950 | out_rlist: |
1951 | gfs2_rlist_free(&rlist); | 1951 | gfs2_rlist_free(&rlist); |
1952 | gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh); | 1952 | gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); |
1953 | out_qs: | 1953 | out_qs: |
1954 | gfs2_quota_unhold(dip); | 1954 | gfs2_quota_unhold(dip); |
1955 | out: | 1955 | out: |
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index aa8dbf303f6d..f114ba2b3557 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c | |||
@@ -56,46 +56,6 @@ unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name) | |||
56 | return type; | 56 | return type; |
57 | } | 57 | } |
58 | 58 | ||
59 | static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
60 | { | ||
61 | struct inode *inode = &ip->i_inode; | ||
62 | int error = permission(inode, MAY_READ, NULL); | ||
63 | if (error) | ||
64 | return error; | ||
65 | |||
66 | return gfs2_ea_get_i(ip, er); | ||
67 | } | ||
68 | |||
69 | static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
70 | { | ||
71 | struct inode *inode = &ip->i_inode; | ||
72 | |||
73 | if (S_ISREG(inode->i_mode) || | ||
74 | (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { | ||
75 | int error = permission(inode, MAY_WRITE, NULL); | ||
76 | if (error) | ||
77 | return error; | ||
78 | } else | ||
79 | return -EPERM; | ||
80 | |||
81 | return gfs2_ea_set_i(ip, er); | ||
82 | } | ||
83 | |||
84 | static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
85 | { | ||
86 | struct inode *inode = &ip->i_inode; | ||
87 | |||
88 | if (S_ISREG(inode->i_mode) || | ||
89 | (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { | ||
90 | int error = permission(inode, MAY_WRITE, NULL); | ||
91 | if (error) | ||
92 | return error; | ||
93 | } else | ||
94 | return -EPERM; | ||
95 | |||
96 | return gfs2_ea_remove_i(ip, er); | ||
97 | } | ||
98 | |||
99 | static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | 59 | static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) |
100 | { | 60 | { |
101 | if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) && | 61 | if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) && |
@@ -108,8 +68,6 @@ static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | |||
108 | GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len))) | 68 | GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len))) |
109 | return -EOPNOTSUPP; | 69 | return -EOPNOTSUPP; |
110 | 70 | ||
111 | |||
112 | |||
113 | return gfs2_ea_get_i(ip, er); | 71 | return gfs2_ea_get_i(ip, er); |
114 | } | 72 | } |
115 | 73 | ||
@@ -170,40 +128,10 @@ static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | |||
170 | return gfs2_ea_remove_i(ip, er); | 128 | return gfs2_ea_remove_i(ip, er); |
171 | } | 129 | } |
172 | 130 | ||
173 | static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
174 | { | ||
175 | struct inode *inode = &ip->i_inode; | ||
176 | int error = permission(inode, MAY_READ, NULL); | ||
177 | if (error) | ||
178 | return error; | ||
179 | |||
180 | return gfs2_ea_get_i(ip, er); | ||
181 | } | ||
182 | |||
183 | static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
184 | { | ||
185 | struct inode *inode = &ip->i_inode; | ||
186 | int error = permission(inode, MAY_WRITE, NULL); | ||
187 | if (error) | ||
188 | return error; | ||
189 | |||
190 | return gfs2_ea_set_i(ip, er); | ||
191 | } | ||
192 | |||
193 | static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
194 | { | ||
195 | struct inode *inode = &ip->i_inode; | ||
196 | int error = permission(inode, MAY_WRITE, NULL); | ||
197 | if (error) | ||
198 | return error; | ||
199 | |||
200 | return gfs2_ea_remove_i(ip, er); | ||
201 | } | ||
202 | |||
203 | static const struct gfs2_eattr_operations gfs2_user_eaops = { | 131 | static const struct gfs2_eattr_operations gfs2_user_eaops = { |
204 | .eo_get = user_eo_get, | 132 | .eo_get = gfs2_ea_get_i, |
205 | .eo_set = user_eo_set, | 133 | .eo_set = gfs2_ea_set_i, |
206 | .eo_remove = user_eo_remove, | 134 | .eo_remove = gfs2_ea_remove_i, |
207 | .eo_name = "user", | 135 | .eo_name = "user", |
208 | }; | 136 | }; |
209 | 137 | ||
@@ -215,9 +143,9 @@ const struct gfs2_eattr_operations gfs2_system_eaops = { | |||
215 | }; | 143 | }; |
216 | 144 | ||
217 | static const struct gfs2_eattr_operations gfs2_security_eaops = { | 145 | static const struct gfs2_eattr_operations gfs2_security_eaops = { |
218 | .eo_get = security_eo_get, | 146 | .eo_get = gfs2_ea_get_i, |
219 | .eo_set = security_eo_set, | 147 | .eo_set = gfs2_ea_set_i, |
220 | .eo_remove = security_eo_remove, | 148 | .eo_remove = gfs2_ea_remove_i, |
221 | .eo_name = "security", | 149 | .eo_name = "security", |
222 | }; | 150 | }; |
223 | 151 | ||
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 2a7435b5c4dc..bee99704ea10 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c | |||
@@ -1418,7 +1418,7 @@ out: | |||
1418 | static int ea_dealloc_block(struct gfs2_inode *ip) | 1418 | static int ea_dealloc_block(struct gfs2_inode *ip) |
1419 | { | 1419 | { |
1420 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1420 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1421 | struct gfs2_alloc *al = &ip->i_alloc; | 1421 | struct gfs2_alloc *al = ip->i_alloc; |
1422 | struct gfs2_rgrpd *rgd; | 1422 | struct gfs2_rgrpd *rgd; |
1423 | struct buffer_head *dibh; | 1423 | struct buffer_head *dibh; |
1424 | int error; | 1424 | int error; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a37efe4aae6f..80e09c50590a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -217,7 +217,6 @@ int gfs2_glock_put(struct gfs2_glock *gl) | |||
217 | if (atomic_dec_and_test(&gl->gl_ref)) { | 217 | if (atomic_dec_and_test(&gl->gl_ref)) { |
218 | hlist_del(&gl->gl_list); | 218 | hlist_del(&gl->gl_list); |
219 | write_unlock(gl_lock_addr(gl->gl_hash)); | 219 | write_unlock(gl_lock_addr(gl->gl_hash)); |
220 | BUG_ON(spin_is_locked(&gl->gl_spin)); | ||
221 | gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); | 220 | gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); |
222 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); | 221 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); |
223 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); | 222 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); |
@@ -346,7 +345,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
346 | gl->gl_object = NULL; | 345 | gl->gl_object = NULL; |
347 | gl->gl_sbd = sdp; | 346 | gl->gl_sbd = sdp; |
348 | gl->gl_aspace = NULL; | 347 | gl->gl_aspace = NULL; |
349 | lops_init_le(&gl->gl_le, &gfs2_glock_lops); | ||
350 | INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); | 348 | INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); |
351 | 349 | ||
352 | /* If this glock protects actual on-disk data or metadata blocks, | 350 | /* If this glock protects actual on-disk data or metadata blocks, |
@@ -461,7 +459,6 @@ static void wait_on_holder(struct gfs2_holder *gh) | |||
461 | 459 | ||
462 | static void gfs2_demote_wake(struct gfs2_glock *gl) | 460 | static void gfs2_demote_wake(struct gfs2_glock *gl) |
463 | { | 461 | { |
464 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | ||
465 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | 462 | gl->gl_demote_state = LM_ST_EXCLUSIVE; |
466 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | 463 | clear_bit(GLF_DEMOTE, &gl->gl_flags); |
467 | smp_mb__after_clear_bit(); | 464 | smp_mb__after_clear_bit(); |
@@ -507,21 +504,12 @@ static int rq_mutex(struct gfs2_holder *gh) | |||
507 | static int rq_promote(struct gfs2_holder *gh) | 504 | static int rq_promote(struct gfs2_holder *gh) |
508 | { | 505 | { |
509 | struct gfs2_glock *gl = gh->gh_gl; | 506 | struct gfs2_glock *gl = gh->gh_gl; |
510 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
511 | 507 | ||
512 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | 508 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { |
513 | if (list_empty(&gl->gl_holders)) { | 509 | if (list_empty(&gl->gl_holders)) { |
514 | gl->gl_req_gh = gh; | 510 | gl->gl_req_gh = gh; |
515 | set_bit(GLF_LOCK, &gl->gl_flags); | 511 | set_bit(GLF_LOCK, &gl->gl_flags); |
516 | spin_unlock(&gl->gl_spin); | 512 | spin_unlock(&gl->gl_spin); |
517 | |||
518 | if (atomic_read(&sdp->sd_reclaim_count) > | ||
519 | gfs2_tune_get(sdp, gt_reclaim_limit) && | ||
520 | !(gh->gh_flags & LM_FLAG_PRIORITY)) { | ||
521 | gfs2_reclaim_glock(sdp); | ||
522 | gfs2_reclaim_glock(sdp); | ||
523 | } | ||
524 | |||
525 | gfs2_glock_xmote_th(gh->gh_gl, gh); | 513 | gfs2_glock_xmote_th(gh->gh_gl, gh); |
526 | spin_lock(&gl->gl_spin); | 514 | spin_lock(&gl->gl_spin); |
527 | } | 515 | } |
@@ -567,7 +555,10 @@ static int rq_demote(struct gfs2_glock *gl) | |||
567 | gfs2_demote_wake(gl); | 555 | gfs2_demote_wake(gl); |
568 | return 0; | 556 | return 0; |
569 | } | 557 | } |
558 | |||
570 | set_bit(GLF_LOCK, &gl->gl_flags); | 559 | set_bit(GLF_LOCK, &gl->gl_flags); |
560 | set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
561 | |||
571 | if (gl->gl_demote_state == LM_ST_UNLOCKED || | 562 | if (gl->gl_demote_state == LM_ST_UNLOCKED || |
572 | gl->gl_state != LM_ST_EXCLUSIVE) { | 563 | gl->gl_state != LM_ST_EXCLUSIVE) { |
573 | spin_unlock(&gl->gl_spin); | 564 | spin_unlock(&gl->gl_spin); |
@@ -576,7 +567,9 @@ static int rq_demote(struct gfs2_glock *gl) | |||
576 | spin_unlock(&gl->gl_spin); | 567 | spin_unlock(&gl->gl_spin); |
577 | gfs2_glock_xmote_th(gl, NULL); | 568 | gfs2_glock_xmote_th(gl, NULL); |
578 | } | 569 | } |
570 | |||
579 | spin_lock(&gl->gl_spin); | 571 | spin_lock(&gl->gl_spin); |
572 | clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
580 | 573 | ||
581 | return 0; | 574 | return 0; |
582 | } | 575 | } |
@@ -598,23 +591,18 @@ static void run_queue(struct gfs2_glock *gl) | |||
598 | if (!list_empty(&gl->gl_waiters1)) { | 591 | if (!list_empty(&gl->gl_waiters1)) { |
599 | gh = list_entry(gl->gl_waiters1.next, | 592 | gh = list_entry(gl->gl_waiters1.next, |
600 | struct gfs2_holder, gh_list); | 593 | struct gfs2_holder, gh_list); |
601 | 594 | blocked = rq_mutex(gh); | |
602 | if (test_bit(HIF_MUTEX, &gh->gh_iflags)) | ||
603 | blocked = rq_mutex(gh); | ||
604 | else | ||
605 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
606 | |||
607 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | 595 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { |
608 | blocked = rq_demote(gl); | 596 | blocked = rq_demote(gl); |
597 | if (gl->gl_waiters2 && !blocked) { | ||
598 | set_bit(GLF_DEMOTE, &gl->gl_flags); | ||
599 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
600 | } | ||
601 | gl->gl_waiters2 = 0; | ||
609 | } else if (!list_empty(&gl->gl_waiters3)) { | 602 | } else if (!list_empty(&gl->gl_waiters3)) { |
610 | gh = list_entry(gl->gl_waiters3.next, | 603 | gh = list_entry(gl->gl_waiters3.next, |
611 | struct gfs2_holder, gh_list); | 604 | struct gfs2_holder, gh_list); |
612 | 605 | blocked = rq_promote(gh); | |
613 | if (test_bit(HIF_PROMOTE, &gh->gh_iflags)) | ||
614 | blocked = rq_promote(gh); | ||
615 | else | ||
616 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
617 | |||
618 | } else | 606 | } else |
619 | break; | 607 | break; |
620 | 608 | ||
@@ -632,27 +620,21 @@ static void run_queue(struct gfs2_glock *gl) | |||
632 | 620 | ||
633 | static void gfs2_glmutex_lock(struct gfs2_glock *gl) | 621 | static void gfs2_glmutex_lock(struct gfs2_glock *gl) |
634 | { | 622 | { |
635 | struct gfs2_holder gh; | ||
636 | |||
637 | gfs2_holder_init(gl, 0, 0, &gh); | ||
638 | set_bit(HIF_MUTEX, &gh.gh_iflags); | ||
639 | if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) | ||
640 | BUG(); | ||
641 | |||
642 | spin_lock(&gl->gl_spin); | 623 | spin_lock(&gl->gl_spin); |
643 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 624 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { |
625 | struct gfs2_holder gh; | ||
626 | |||
627 | gfs2_holder_init(gl, 0, 0, &gh); | ||
628 | set_bit(HIF_WAIT, &gh.gh_iflags); | ||
644 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); | 629 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); |
630 | spin_unlock(&gl->gl_spin); | ||
631 | wait_on_holder(&gh); | ||
632 | gfs2_holder_uninit(&gh); | ||
645 | } else { | 633 | } else { |
646 | gl->gl_owner_pid = current->pid; | 634 | gl->gl_owner_pid = current->pid; |
647 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 635 | gl->gl_ip = (unsigned long)__builtin_return_address(0); |
648 | clear_bit(HIF_WAIT, &gh.gh_iflags); | 636 | spin_unlock(&gl->gl_spin); |
649 | smp_mb(); | ||
650 | wake_up_bit(&gh.gh_iflags, HIF_WAIT); | ||
651 | } | 637 | } |
652 | spin_unlock(&gl->gl_spin); | ||
653 | |||
654 | wait_on_holder(&gh); | ||
655 | gfs2_holder_uninit(&gh); | ||
656 | } | 638 | } |
657 | 639 | ||
658 | /** | 640 | /** |
@@ -691,7 +673,6 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | |||
691 | gl->gl_owner_pid = 0; | 673 | gl->gl_owner_pid = 0; |
692 | gl->gl_ip = 0; | 674 | gl->gl_ip = 0; |
693 | run_queue(gl); | 675 | run_queue(gl); |
694 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | ||
695 | spin_unlock(&gl->gl_spin); | 676 | spin_unlock(&gl->gl_spin); |
696 | } | 677 | } |
697 | 678 | ||
@@ -722,7 +703,10 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
722 | } | 703 | } |
723 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && | 704 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && |
724 | gl->gl_demote_state != state) { | 705 | gl->gl_demote_state != state) { |
725 | gl->gl_demote_state = LM_ST_UNLOCKED; | 706 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) |
707 | gl->gl_waiters2 = 1; | ||
708 | else | ||
709 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
726 | } | 710 | } |
727 | spin_unlock(&gl->gl_spin); | 711 | spin_unlock(&gl->gl_spin); |
728 | } | 712 | } |
@@ -943,8 +927,8 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl) | |||
943 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 927 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
944 | unsigned int ret; | 928 | unsigned int ret; |
945 | 929 | ||
946 | if (glops->go_drop_th) | 930 | if (glops->go_xmote_th) |
947 | glops->go_drop_th(gl); | 931 | glops->go_xmote_th(gl); |
948 | 932 | ||
949 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 933 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
950 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 934 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
@@ -1156,8 +1140,6 @@ restart: | |||
1156 | return -EIO; | 1140 | return -EIO; |
1157 | } | 1141 | } |
1158 | 1142 | ||
1159 | set_bit(HIF_PROMOTE, &gh->gh_iflags); | ||
1160 | |||
1161 | spin_lock(&gl->gl_spin); | 1143 | spin_lock(&gl->gl_spin); |
1162 | add_to_queue(gh); | 1144 | add_to_queue(gh); |
1163 | run_queue(gl); | 1145 | run_queue(gl); |
@@ -1248,12 +1230,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1248 | list_del_init(&gh->gh_list); | 1230 | list_del_init(&gh->gh_list); |
1249 | 1231 | ||
1250 | if (list_empty(&gl->gl_holders)) { | 1232 | if (list_empty(&gl->gl_holders)) { |
1251 | spin_unlock(&gl->gl_spin); | 1233 | if (glops->go_unlock) { |
1252 | 1234 | spin_unlock(&gl->gl_spin); | |
1253 | if (glops->go_unlock) | ||
1254 | glops->go_unlock(gh); | 1235 | glops->go_unlock(gh); |
1255 | 1236 | spin_lock(&gl->gl_spin); | |
1256 | spin_lock(&gl->gl_spin); | 1237 | } |
1257 | gl->gl_stamp = jiffies; | 1238 | gl->gl_stamp = jiffies; |
1258 | } | 1239 | } |
1259 | 1240 | ||
@@ -1910,8 +1891,6 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) | |||
1910 | print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); | 1891 | print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); |
1911 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | 1892 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); |
1912 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); | 1893 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); |
1913 | print_dbg(gi, " le = %s\n", | ||
1914 | (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); | ||
1915 | print_dbg(gi, " reclaim = %s\n", | 1894 | print_dbg(gi, " reclaim = %s\n", |
1916 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); | 1895 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); |
1917 | if (gl->gl_aspace) | 1896 | if (gl->gl_aspace) |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4670dcb2a877..c663b7a0f410 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -56,7 +56,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
56 | bd = list_entry(head->next, struct gfs2_bufdata, | 56 | bd = list_entry(head->next, struct gfs2_bufdata, |
57 | bd_ail_gl_list); | 57 | bd_ail_gl_list); |
58 | bh = bd->bd_bh; | 58 | bh = bd->bd_bh; |
59 | gfs2_remove_from_ail(NULL, bd); | 59 | gfs2_remove_from_ail(bd); |
60 | bd->bd_bh = NULL; | 60 | bd->bd_bh = NULL; |
61 | bh->b_private = NULL; | 61 | bh->b_private = NULL; |
62 | bd->bd_blkno = bh->b_blocknr; | 62 | bd->bd_blkno = bh->b_blocknr; |
@@ -86,15 +86,10 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) | |||
86 | if (!ip || !S_ISREG(inode->i_mode)) | 86 | if (!ip || !S_ISREG(inode->i_mode)) |
87 | return; | 87 | return; |
88 | 88 | ||
89 | if (!test_bit(GIF_PAGED, &ip->i_flags)) | ||
90 | return; | ||
91 | |||
92 | unmap_shared_mapping_range(inode->i_mapping, 0, 0); | 89 | unmap_shared_mapping_range(inode->i_mapping, 0, 0); |
93 | |||
94 | if (test_bit(GIF_SW_PAGED, &ip->i_flags)) | 90 | if (test_bit(GIF_SW_PAGED, &ip->i_flags)) |
95 | set_bit(GLF_DIRTY, &gl->gl_flags); | 91 | set_bit(GLF_DIRTY, &gl->gl_flags); |
96 | 92 | ||
97 | clear_bit(GIF_SW_PAGED, &ip->i_flags); | ||
98 | } | 93 | } |
99 | 94 | ||
100 | /** | 95 | /** |
@@ -143,44 +138,34 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
143 | static void inode_go_sync(struct gfs2_glock *gl) | 138 | static void inode_go_sync(struct gfs2_glock *gl) |
144 | { | 139 | { |
145 | struct gfs2_inode *ip = gl->gl_object; | 140 | struct gfs2_inode *ip = gl->gl_object; |
141 | struct address_space *metamapping = gl->gl_aspace->i_mapping; | ||
142 | int error; | ||
143 | |||
144 | if (gl->gl_state != LM_ST_UNLOCKED) | ||
145 | gfs2_pte_inval(gl); | ||
146 | if (gl->gl_state != LM_ST_EXCLUSIVE) | ||
147 | return; | ||
146 | 148 | ||
147 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | 149 | if (ip && !S_ISREG(ip->i_inode.i_mode)) |
148 | ip = NULL; | 150 | ip = NULL; |
149 | 151 | ||
150 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { | 152 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { |
151 | if (ip && !gfs2_is_jdata(ip)) | ||
152 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
153 | gfs2_log_flush(gl->gl_sbd, gl); | 153 | gfs2_log_flush(gl->gl_sbd, gl); |
154 | if (ip && gfs2_is_jdata(ip)) | 154 | filemap_fdatawrite(metamapping); |
155 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
156 | gfs2_meta_sync(gl); | ||
157 | if (ip) { | 155 | if (ip) { |
158 | struct address_space *mapping = ip->i_inode.i_mapping; | 156 | struct address_space *mapping = ip->i_inode.i_mapping; |
159 | int error = filemap_fdatawait(mapping); | 157 | filemap_fdatawrite(mapping); |
158 | error = filemap_fdatawait(mapping); | ||
160 | mapping_set_error(mapping, error); | 159 | mapping_set_error(mapping, error); |
161 | } | 160 | } |
161 | error = filemap_fdatawait(metamapping); | ||
162 | mapping_set_error(metamapping, error); | ||
162 | clear_bit(GLF_DIRTY, &gl->gl_flags); | 163 | clear_bit(GLF_DIRTY, &gl->gl_flags); |
163 | gfs2_ail_empty_gl(gl); | 164 | gfs2_ail_empty_gl(gl); |
164 | } | 165 | } |
165 | } | 166 | } |
166 | 167 | ||
167 | /** | 168 | /** |
168 | * inode_go_xmote_th - promote/demote a glock | ||
169 | * @gl: the glock | ||
170 | * @state: the requested state | ||
171 | * @flags: | ||
172 | * | ||
173 | */ | ||
174 | |||
175 | static void inode_go_xmote_th(struct gfs2_glock *gl) | ||
176 | { | ||
177 | if (gl->gl_state != LM_ST_UNLOCKED) | ||
178 | gfs2_pte_inval(gl); | ||
179 | if (gl->gl_state == LM_ST_EXCLUSIVE) | ||
180 | inode_go_sync(gl); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * inode_go_xmote_bh - After promoting/demoting a glock | 169 | * inode_go_xmote_bh - After promoting/demoting a glock |
185 | * @gl: the glock | 170 | * @gl: the glock |
186 | * | 171 | * |
@@ -201,22 +186,6 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl) | |||
201 | } | 186 | } |
202 | 187 | ||
203 | /** | 188 | /** |
204 | * inode_go_drop_th - unlock a glock | ||
205 | * @gl: the glock | ||
206 | * | ||
207 | * Invoked from rq_demote(). | ||
208 | * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long) | ||
209 | * is being purged from our node's glock cache; we're dropping lock. | ||
210 | */ | ||
211 | |||
212 | static void inode_go_drop_th(struct gfs2_glock *gl) | ||
213 | { | ||
214 | gfs2_pte_inval(gl); | ||
215 | if (gl->gl_state == LM_ST_EXCLUSIVE) | ||
216 | inode_go_sync(gl); | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * inode_go_inval - prepare a inode glock to be released | 189 | * inode_go_inval - prepare a inode glock to be released |
221 | * @gl: the glock | 190 | * @gl: the glock |
222 | * @flags: | 191 | * @flags: |
@@ -234,10 +203,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) | |||
234 | set_bit(GIF_INVALID, &ip->i_flags); | 203 | set_bit(GIF_INVALID, &ip->i_flags); |
235 | } | 204 | } |
236 | 205 | ||
237 | if (ip && S_ISREG(ip->i_inode.i_mode)) { | 206 | if (ip && S_ISREG(ip->i_inode.i_mode)) |
238 | truncate_inode_pages(ip->i_inode.i_mapping, 0); | 207 | truncate_inode_pages(ip->i_inode.i_mapping, 0); |
239 | clear_bit(GIF_PAGED, &ip->i_flags); | ||
240 | } | ||
241 | } | 208 | } |
242 | 209 | ||
243 | /** | 210 | /** |
@@ -294,23 +261,6 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
294 | } | 261 | } |
295 | 262 | ||
296 | /** | 263 | /** |
297 | * inode_go_unlock - operation done before an inode lock is unlocked by a | ||
298 | * process | ||
299 | * @gl: the glock | ||
300 | * @flags: | ||
301 | * | ||
302 | */ | ||
303 | |||
304 | static void inode_go_unlock(struct gfs2_holder *gh) | ||
305 | { | ||
306 | struct gfs2_glock *gl = gh->gh_gl; | ||
307 | struct gfs2_inode *ip = gl->gl_object; | ||
308 | |||
309 | if (ip) | ||
310 | gfs2_meta_cache_flush(ip); | ||
311 | } | ||
312 | |||
313 | /** | ||
314 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | 264 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock |
315 | * @gl: the glock | 265 | * @gl: the glock |
316 | * | 266 | * |
@@ -350,14 +300,14 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) | |||
350 | } | 300 | } |
351 | 301 | ||
352 | /** | 302 | /** |
353 | * trans_go_xmote_th - promote/demote the transaction glock | 303 | * trans_go_sync - promote/demote the transaction glock |
354 | * @gl: the glock | 304 | * @gl: the glock |
355 | * @state: the requested state | 305 | * @state: the requested state |
356 | * @flags: | 306 | * @flags: |
357 | * | 307 | * |
358 | */ | 308 | */ |
359 | 309 | ||
360 | static void trans_go_xmote_th(struct gfs2_glock *gl) | 310 | static void trans_go_sync(struct gfs2_glock *gl) |
361 | { | 311 | { |
362 | struct gfs2_sbd *sdp = gl->gl_sbd; | 312 | struct gfs2_sbd *sdp = gl->gl_sbd; |
363 | 313 | ||
@@ -384,7 +334,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
384 | 334 | ||
385 | if (gl->gl_state != LM_ST_UNLOCKED && | 335 | if (gl->gl_state != LM_ST_UNLOCKED && |
386 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | 336 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
387 | gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode)); | ||
388 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 337 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
389 | 338 | ||
390 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 339 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
@@ -402,24 +351,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
402 | } | 351 | } |
403 | 352 | ||
404 | /** | 353 | /** |
405 | * trans_go_drop_th - unlock the transaction glock | ||
406 | * @gl: the glock | ||
407 | * | ||
408 | * We want to sync the device even with localcaching. Remember | ||
409 | * that localcaching journal replay only marks buffers dirty. | ||
410 | */ | ||
411 | |||
412 | static void trans_go_drop_th(struct gfs2_glock *gl) | ||
413 | { | ||
414 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
415 | |||
416 | if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
417 | gfs2_meta_syncfs(sdp); | ||
418 | gfs2_log_shutdown(sdp); | ||
419 | } | ||
420 | } | ||
421 | |||
422 | /** | ||
423 | * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock | 354 | * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock |
424 | * @gl: the glock | 355 | * @gl: the glock |
425 | * | 356 | * |
@@ -433,25 +364,21 @@ static int quota_go_demote_ok(struct gfs2_glock *gl) | |||
433 | 364 | ||
434 | const struct gfs2_glock_operations gfs2_meta_glops = { | 365 | const struct gfs2_glock_operations gfs2_meta_glops = { |
435 | .go_xmote_th = meta_go_sync, | 366 | .go_xmote_th = meta_go_sync, |
436 | .go_drop_th = meta_go_sync, | ||
437 | .go_type = LM_TYPE_META, | 367 | .go_type = LM_TYPE_META, |
438 | }; | 368 | }; |
439 | 369 | ||
440 | const struct gfs2_glock_operations gfs2_inode_glops = { | 370 | const struct gfs2_glock_operations gfs2_inode_glops = { |
441 | .go_xmote_th = inode_go_xmote_th, | 371 | .go_xmote_th = inode_go_sync, |
442 | .go_xmote_bh = inode_go_xmote_bh, | 372 | .go_xmote_bh = inode_go_xmote_bh, |
443 | .go_drop_th = inode_go_drop_th, | ||
444 | .go_inval = inode_go_inval, | 373 | .go_inval = inode_go_inval, |
445 | .go_demote_ok = inode_go_demote_ok, | 374 | .go_demote_ok = inode_go_demote_ok, |
446 | .go_lock = inode_go_lock, | 375 | .go_lock = inode_go_lock, |
447 | .go_unlock = inode_go_unlock, | ||
448 | .go_type = LM_TYPE_INODE, | 376 | .go_type = LM_TYPE_INODE, |
449 | .go_min_hold_time = HZ / 10, | 377 | .go_min_hold_time = HZ / 10, |
450 | }; | 378 | }; |
451 | 379 | ||
452 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 380 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
453 | .go_xmote_th = meta_go_sync, | 381 | .go_xmote_th = meta_go_sync, |
454 | .go_drop_th = meta_go_sync, | ||
455 | .go_inval = meta_go_inval, | 382 | .go_inval = meta_go_inval, |
456 | .go_demote_ok = rgrp_go_demote_ok, | 383 | .go_demote_ok = rgrp_go_demote_ok, |
457 | .go_lock = rgrp_go_lock, | 384 | .go_lock = rgrp_go_lock, |
@@ -461,9 +388,8 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
461 | }; | 388 | }; |
462 | 389 | ||
463 | const struct gfs2_glock_operations gfs2_trans_glops = { | 390 | const struct gfs2_glock_operations gfs2_trans_glops = { |
464 | .go_xmote_th = trans_go_xmote_th, | 391 | .go_xmote_th = trans_go_sync, |
465 | .go_xmote_bh = trans_go_xmote_bh, | 392 | .go_xmote_bh = trans_go_xmote_bh, |
466 | .go_drop_th = trans_go_drop_th, | ||
467 | .go_type = LM_TYPE_NONDISK, | 393 | .go_type = LM_TYPE_NONDISK, |
468 | }; | 394 | }; |
469 | 395 | ||
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index eaddfb5a8e6f..513aaf0dc0ab 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -131,7 +131,6 @@ struct gfs2_bufdata { | |||
131 | struct gfs2_glock_operations { | 131 | struct gfs2_glock_operations { |
132 | void (*go_xmote_th) (struct gfs2_glock *gl); | 132 | void (*go_xmote_th) (struct gfs2_glock *gl); |
133 | void (*go_xmote_bh) (struct gfs2_glock *gl); | 133 | void (*go_xmote_bh) (struct gfs2_glock *gl); |
134 | void (*go_drop_th) (struct gfs2_glock *gl); | ||
135 | void (*go_inval) (struct gfs2_glock *gl, int flags); | 134 | void (*go_inval) (struct gfs2_glock *gl, int flags); |
136 | int (*go_demote_ok) (struct gfs2_glock *gl); | 135 | int (*go_demote_ok) (struct gfs2_glock *gl); |
137 | int (*go_lock) (struct gfs2_holder *gh); | 136 | int (*go_lock) (struct gfs2_holder *gh); |
@@ -141,10 +140,6 @@ struct gfs2_glock_operations { | |||
141 | }; | 140 | }; |
142 | 141 | ||
143 | enum { | 142 | enum { |
144 | /* Actions */ | ||
145 | HIF_MUTEX = 0, | ||
146 | HIF_PROMOTE = 1, | ||
147 | |||
148 | /* States */ | 143 | /* States */ |
149 | HIF_HOLDER = 6, | 144 | HIF_HOLDER = 6, |
150 | HIF_FIRST = 7, | 145 | HIF_FIRST = 7, |
@@ -171,6 +166,8 @@ enum { | |||
171 | GLF_DEMOTE = 3, | 166 | GLF_DEMOTE = 3, |
172 | GLF_PENDING_DEMOTE = 4, | 167 | GLF_PENDING_DEMOTE = 4, |
173 | GLF_DIRTY = 5, | 168 | GLF_DIRTY = 5, |
169 | GLF_DEMOTE_IN_PROGRESS = 6, | ||
170 | GLF_LFLUSH = 7, | ||
174 | }; | 171 | }; |
175 | 172 | ||
176 | struct gfs2_glock { | 173 | struct gfs2_glock { |
@@ -190,6 +187,7 @@ struct gfs2_glock { | |||
190 | struct list_head gl_holders; | 187 | struct list_head gl_holders; |
191 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 188 | struct list_head gl_waiters1; /* HIF_MUTEX */ |
192 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ |
190 | int gl_waiters2; /* GIF_DEMOTE */ | ||
193 | 191 | ||
194 | const struct gfs2_glock_operations *gl_ops; | 192 | const struct gfs2_glock_operations *gl_ops; |
195 | 193 | ||
@@ -210,7 +208,6 @@ struct gfs2_glock { | |||
210 | struct gfs2_sbd *gl_sbd; | 208 | struct gfs2_sbd *gl_sbd; |
211 | 209 | ||
212 | struct inode *gl_aspace; | 210 | struct inode *gl_aspace; |
213 | struct gfs2_log_element gl_le; | ||
214 | struct list_head gl_ail_list; | 211 | struct list_head gl_ail_list; |
215 | atomic_t gl_ail_count; | 212 | atomic_t gl_ail_count; |
216 | struct delayed_work gl_work; | 213 | struct delayed_work gl_work; |
@@ -239,7 +236,6 @@ struct gfs2_alloc { | |||
239 | enum { | 236 | enum { |
240 | GIF_INVALID = 0, | 237 | GIF_INVALID = 0, |
241 | GIF_QD_LOCKED = 1, | 238 | GIF_QD_LOCKED = 1, |
242 | GIF_PAGED = 2, | ||
243 | GIF_SW_PAGED = 3, | 239 | GIF_SW_PAGED = 3, |
244 | }; | 240 | }; |
245 | 241 | ||
@@ -268,14 +264,10 @@ struct gfs2_inode { | |||
268 | struct gfs2_glock *i_gl; /* Move into i_gh? */ | 264 | struct gfs2_glock *i_gl; /* Move into i_gh? */ |
269 | struct gfs2_holder i_iopen_gh; | 265 | struct gfs2_holder i_iopen_gh; |
270 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ | 266 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ |
271 | struct gfs2_alloc i_alloc; | 267 | struct gfs2_alloc *i_alloc; |
272 | u64 i_last_rg_alloc; | 268 | u64 i_last_rg_alloc; |
273 | 269 | ||
274 | spinlock_t i_spin; | ||
275 | struct rw_semaphore i_rw_mutex; | 270 | struct rw_semaphore i_rw_mutex; |
276 | unsigned long i_last_pfault; | ||
277 | |||
278 | struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; | ||
279 | }; | 271 | }; |
280 | 272 | ||
281 | /* | 273 | /* |
@@ -287,19 +279,12 @@ static inline struct gfs2_inode *GFS2_I(struct inode *inode) | |||
287 | return container_of(inode, struct gfs2_inode, i_inode); | 279 | return container_of(inode, struct gfs2_inode, i_inode); |
288 | } | 280 | } |
289 | 281 | ||
290 | /* To be removed? */ | 282 | static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode) |
291 | static inline struct gfs2_sbd *GFS2_SB(struct inode *inode) | ||
292 | { | 283 | { |
293 | return inode->i_sb->s_fs_info; | 284 | return inode->i_sb->s_fs_info; |
294 | } | 285 | } |
295 | 286 | ||
296 | enum { | ||
297 | GFF_DID_DIRECT_ALLOC = 0, | ||
298 | GFF_EXLOCK = 1, | ||
299 | }; | ||
300 | |||
301 | struct gfs2_file { | 287 | struct gfs2_file { |
302 | unsigned long f_flags; /* GFF_... */ | ||
303 | struct mutex f_fl_mutex; | 288 | struct mutex f_fl_mutex; |
304 | struct gfs2_holder f_fl_gh; | 289 | struct gfs2_holder f_fl_gh; |
305 | }; | 290 | }; |
@@ -373,8 +358,17 @@ struct gfs2_ail { | |||
373 | u64 ai_sync_gen; | 358 | u64 ai_sync_gen; |
374 | }; | 359 | }; |
375 | 360 | ||
361 | struct gfs2_journal_extent { | ||
362 | struct list_head extent_list; | ||
363 | |||
364 | unsigned int lblock; /* First logical block */ | ||
365 | u64 dblock; /* First disk block */ | ||
366 | u64 blocks; | ||
367 | }; | ||
368 | |||
376 | struct gfs2_jdesc { | 369 | struct gfs2_jdesc { |
377 | struct list_head jd_list; | 370 | struct list_head jd_list; |
371 | struct list_head extent_list; | ||
378 | 372 | ||
379 | struct inode *jd_inode; | 373 | struct inode *jd_inode; |
380 | unsigned int jd_jid; | 374 | unsigned int jd_jid; |
@@ -421,13 +415,9 @@ struct gfs2_args { | |||
421 | struct gfs2_tune { | 415 | struct gfs2_tune { |
422 | spinlock_t gt_spin; | 416 | spinlock_t gt_spin; |
423 | 417 | ||
424 | unsigned int gt_ilimit; | ||
425 | unsigned int gt_ilimit_tries; | ||
426 | unsigned int gt_ilimit_min; | ||
427 | unsigned int gt_demote_secs; /* Cache retention for unheld glock */ | 418 | unsigned int gt_demote_secs; /* Cache retention for unheld glock */ |
428 | unsigned int gt_incore_log_blocks; | 419 | unsigned int gt_incore_log_blocks; |
429 | unsigned int gt_log_flush_secs; | 420 | unsigned int gt_log_flush_secs; |
430 | unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ | ||
431 | 421 | ||
432 | unsigned int gt_recoverd_secs; | 422 | unsigned int gt_recoverd_secs; |
433 | unsigned int gt_logd_secs; | 423 | unsigned int gt_logd_secs; |
@@ -443,10 +433,8 @@ struct gfs2_tune { | |||
443 | unsigned int gt_new_files_jdata; | 433 | unsigned int gt_new_files_jdata; |
444 | unsigned int gt_new_files_directio; | 434 | unsigned int gt_new_files_directio; |
445 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 435 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
446 | unsigned int gt_lockdump_size; | ||
447 | unsigned int gt_stall_secs; /* Detects trouble! */ | 436 | unsigned int gt_stall_secs; /* Detects trouble! */ |
448 | unsigned int gt_complain_secs; | 437 | unsigned int gt_complain_secs; |
449 | unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ | ||
450 | unsigned int gt_statfs_quantum; | 438 | unsigned int gt_statfs_quantum; |
451 | unsigned int gt_statfs_slow; | 439 | unsigned int gt_statfs_slow; |
452 | }; | 440 | }; |
@@ -539,7 +527,6 @@ struct gfs2_sbd { | |||
539 | /* StatFS stuff */ | 527 | /* StatFS stuff */ |
540 | 528 | ||
541 | spinlock_t sd_statfs_spin; | 529 | spinlock_t sd_statfs_spin; |
542 | struct mutex sd_statfs_mutex; | ||
543 | struct gfs2_statfs_change_host sd_statfs_master; | 530 | struct gfs2_statfs_change_host sd_statfs_master; |
544 | struct gfs2_statfs_change_host sd_statfs_local; | 531 | struct gfs2_statfs_change_host sd_statfs_local; |
545 | unsigned long sd_statfs_sync_time; | 532 | unsigned long sd_statfs_sync_time; |
@@ -602,20 +589,18 @@ struct gfs2_sbd { | |||
602 | unsigned int sd_log_commited_databuf; | 589 | unsigned int sd_log_commited_databuf; |
603 | unsigned int sd_log_commited_revoke; | 590 | unsigned int sd_log_commited_revoke; |
604 | 591 | ||
605 | unsigned int sd_log_num_gl; | ||
606 | unsigned int sd_log_num_buf; | 592 | unsigned int sd_log_num_buf; |
607 | unsigned int sd_log_num_revoke; | 593 | unsigned int sd_log_num_revoke; |
608 | unsigned int sd_log_num_rg; | 594 | unsigned int sd_log_num_rg; |
609 | unsigned int sd_log_num_databuf; | 595 | unsigned int sd_log_num_databuf; |
610 | 596 | ||
611 | struct list_head sd_log_le_gl; | ||
612 | struct list_head sd_log_le_buf; | 597 | struct list_head sd_log_le_buf; |
613 | struct list_head sd_log_le_revoke; | 598 | struct list_head sd_log_le_revoke; |
614 | struct list_head sd_log_le_rg; | 599 | struct list_head sd_log_le_rg; |
615 | struct list_head sd_log_le_databuf; | 600 | struct list_head sd_log_le_databuf; |
616 | struct list_head sd_log_le_ordered; | 601 | struct list_head sd_log_le_ordered; |
617 | 602 | ||
618 | unsigned int sd_log_blks_free; | 603 | atomic_t sd_log_blks_free; |
619 | struct mutex sd_log_reserve_mutex; | 604 | struct mutex sd_log_reserve_mutex; |
620 | 605 | ||
621 | u64 sd_log_sequence; | 606 | u64 sd_log_sequence; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5f6dc32946cd..728d3169e7bd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include "log.h" | 31 | #include "log.h" |
32 | #include "meta_io.h" | 32 | #include "meta_io.h" |
33 | #include "ops_address.h" | 33 | #include "ops_address.h" |
34 | #include "ops_file.h" | ||
35 | #include "ops_inode.h" | 34 | #include "ops_inode.h" |
36 | #include "quota.h" | 35 | #include "quota.h" |
37 | #include "rgrp.h" | 36 | #include "rgrp.h" |
@@ -132,15 +131,21 @@ static struct inode *gfs2_iget_skip(struct super_block *sb, | |||
132 | 131 | ||
133 | void gfs2_set_iop(struct inode *inode) | 132 | void gfs2_set_iop(struct inode *inode) |
134 | { | 133 | { |
134 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
135 | umode_t mode = inode->i_mode; | 135 | umode_t mode = inode->i_mode; |
136 | 136 | ||
137 | if (S_ISREG(mode)) { | 137 | if (S_ISREG(mode)) { |
138 | inode->i_op = &gfs2_file_iops; | 138 | inode->i_op = &gfs2_file_iops; |
139 | inode->i_fop = &gfs2_file_fops; | 139 | if (sdp->sd_args.ar_localflocks) |
140 | inode->i_mapping->a_ops = &gfs2_file_aops; | 140 | inode->i_fop = &gfs2_file_fops_nolock; |
141 | else | ||
142 | inode->i_fop = &gfs2_file_fops; | ||
141 | } else if (S_ISDIR(mode)) { | 143 | } else if (S_ISDIR(mode)) { |
142 | inode->i_op = &gfs2_dir_iops; | 144 | inode->i_op = &gfs2_dir_iops; |
143 | inode->i_fop = &gfs2_dir_fops; | 145 | if (sdp->sd_args.ar_localflocks) |
146 | inode->i_fop = &gfs2_dir_fops_nolock; | ||
147 | else | ||
148 | inode->i_fop = &gfs2_dir_fops; | ||
144 | } else if (S_ISLNK(mode)) { | 149 | } else if (S_ISLNK(mode)) { |
145 | inode->i_op = &gfs2_symlink_iops; | 150 | inode->i_op = &gfs2_symlink_iops; |
146 | } else { | 151 | } else { |
@@ -291,12 +296,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
291 | di->di_entries = be32_to_cpu(str->di_entries); | 296 | di->di_entries = be32_to_cpu(str->di_entries); |
292 | 297 | ||
293 | di->di_eattr = be64_to_cpu(str->di_eattr); | 298 | di->di_eattr = be64_to_cpu(str->di_eattr); |
294 | return 0; | 299 | if (S_ISREG(ip->i_inode.i_mode)) |
295 | } | 300 | gfs2_set_aops(&ip->i_inode); |
296 | 301 | ||
297 | static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh) | 302 | return 0; |
298 | { | ||
299 | ip->i_cache[0] = bh; | ||
300 | } | 303 | } |
301 | 304 | ||
302 | /** | 305 | /** |
@@ -366,7 +369,8 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) | |||
366 | if (error) | 369 | if (error) |
367 | goto out_rg_gunlock; | 370 | goto out_rg_gunlock; |
368 | 371 | ||
369 | gfs2_trans_add_gl(ip->i_gl); | 372 | set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); |
373 | set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); | ||
370 | 374 | ||
371 | gfs2_free_di(rgd, ip); | 375 | gfs2_free_di(rgd, ip); |
372 | 376 | ||
@@ -707,9 +711,10 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) | |||
707 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 711 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
708 | int error; | 712 | int error; |
709 | 713 | ||
710 | gfs2_alloc_get(dip); | 714 | if (gfs2_alloc_get(dip) == NULL) |
715 | return -ENOMEM; | ||
711 | 716 | ||
712 | dip->i_alloc.al_requested = RES_DINODE; | 717 | dip->i_alloc->al_requested = RES_DINODE; |
713 | error = gfs2_inplace_reserve(dip); | 718 | error = gfs2_inplace_reserve(dip); |
714 | if (error) | 719 | if (error) |
715 | goto out; | 720 | goto out; |
@@ -855,7 +860,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
855 | 860 | ||
856 | error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); | 861 | error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); |
857 | if (alloc_required < 0) | 862 | if (alloc_required < 0) |
858 | goto fail; | 863 | goto fail_quota_locks; |
859 | if (alloc_required) { | 864 | if (alloc_required) { |
860 | error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); | 865 | error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); |
861 | if (error) | 866 | if (error) |
@@ -896,7 +901,7 @@ fail_end_trans: | |||
896 | gfs2_trans_end(sdp); | 901 | gfs2_trans_end(sdp); |
897 | 902 | ||
898 | fail_ipreserv: | 903 | fail_ipreserv: |
899 | if (dip->i_alloc.al_rgd) | 904 | if (dip->i_alloc->al_rgd) |
900 | gfs2_inplace_release(dip); | 905 | gfs2_inplace_release(dip); |
901 | 906 | ||
902 | fail_quota_locks: | 907 | fail_quota_locks: |
@@ -966,7 +971,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
966 | struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; | 971 | struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; |
967 | int error; | 972 | int error; |
968 | u64 generation; | 973 | u64 generation; |
969 | struct buffer_head *bh=NULL; | 974 | struct buffer_head *bh = NULL; |
970 | 975 | ||
971 | if (!name->len || name->len > GFS2_FNAMESIZE) | 976 | if (!name->len || name->len > GFS2_FNAMESIZE) |
972 | return ERR_PTR(-ENAMETOOLONG); | 977 | return ERR_PTR(-ENAMETOOLONG); |
@@ -1003,8 +1008,6 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1003 | if (IS_ERR(inode)) | 1008 | if (IS_ERR(inode)) |
1004 | goto fail_gunlock2; | 1009 | goto fail_gunlock2; |
1005 | 1010 | ||
1006 | gfs2_inode_bh(GFS2_I(inode), bh); | ||
1007 | |||
1008 | error = gfs2_inode_refresh(GFS2_I(inode)); | 1011 | error = gfs2_inode_refresh(GFS2_I(inode)); |
1009 | if (error) | 1012 | if (error) |
1010 | goto fail_gunlock2; | 1013 | goto fail_gunlock2; |
@@ -1021,6 +1024,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1021 | if (error) | 1024 | if (error) |
1022 | goto fail_gunlock2; | 1025 | goto fail_gunlock2; |
1023 | 1026 | ||
1027 | if (bh) | ||
1028 | brelse(bh); | ||
1024 | if (!inode) | 1029 | if (!inode) |
1025 | return ERR_PTR(-ENOMEM); | 1030 | return ERR_PTR(-ENOMEM); |
1026 | return inode; | 1031 | return inode; |
@@ -1032,6 +1037,8 @@ fail_gunlock2: | |||
1032 | fail_gunlock: | 1037 | fail_gunlock: |
1033 | gfs2_glock_dq(ghs); | 1038 | gfs2_glock_dq(ghs); |
1034 | fail: | 1039 | fail: |
1040 | if (bh) | ||
1041 | brelse(bh); | ||
1035 | return ERR_PTR(error); | 1042 | return ERR_PTR(error); |
1036 | } | 1043 | } |
1037 | 1044 | ||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 351ac87ab384..d44650662615 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -20,6 +20,18 @@ static inline int gfs2_is_jdata(const struct gfs2_inode *ip) | |||
20 | return ip->i_di.di_flags & GFS2_DIF_JDATA; | 20 | return ip->i_di.di_flags & GFS2_DIF_JDATA; |
21 | } | 21 | } |
22 | 22 | ||
23 | static inline int gfs2_is_writeback(const struct gfs2_inode *ip) | ||
24 | { | ||
25 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
26 | return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip); | ||
27 | } | ||
28 | |||
29 | static inline int gfs2_is_ordered(const struct gfs2_inode *ip) | ||
30 | { | ||
31 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
32 | return (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) && !gfs2_is_jdata(ip); | ||
33 | } | ||
34 | |||
23 | static inline int gfs2_is_dir(const struct gfs2_inode *ip) | 35 | static inline int gfs2_is_dir(const struct gfs2_inode *ip) |
24 | { | 36 | { |
25 | return S_ISDIR(ip->i_inode.i_mode); | 37 | return S_ISDIR(ip->i_inode.i_mode); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 41c5b04caaba..f2efff424224 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -67,6 +67,11 @@ static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir) | |||
67 | memset(data, 0, 256); | 67 | memset(data, 0, 256); |
68 | strncpy(data, data_arg, 255); | 68 | strncpy(data, data_arg, 255); |
69 | 69 | ||
70 | if (!strlen(data)) { | ||
71 | log_error("no mount options, (u)mount helpers not installed"); | ||
72 | return -EINVAL; | ||
73 | } | ||
74 | |||
70 | for (options = data; (x = strsep(&options, ":")); ) { | 75 | for (options = data; (x = strsep(&options, ":")); ) { |
71 | if (!*x) | 76 | if (!*x) |
72 | continue; | 77 | continue; |
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c index 1f7b038530b4..2ebd374b3143 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/gfs2/locking/dlm/plock.c | |||
@@ -89,15 +89,19 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name, | |||
89 | op->info.number = name->ln_number; | 89 | op->info.number = name->ln_number; |
90 | op->info.start = fl->fl_start; | 90 | op->info.start = fl->fl_start; |
91 | op->info.end = fl->fl_end; | 91 | op->info.end = fl->fl_end; |
92 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
93 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { | 92 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { |
93 | /* fl_owner is lockd which doesn't distinguish | ||
94 | processes on the nfs client */ | ||
95 | op->info.owner = (__u64) fl->fl_pid; | ||
94 | xop->callback = fl->fl_lmops->fl_grant; | 96 | xop->callback = fl->fl_lmops->fl_grant; |
95 | locks_init_lock(&xop->flc); | 97 | locks_init_lock(&xop->flc); |
96 | locks_copy_lock(&xop->flc, fl); | 98 | locks_copy_lock(&xop->flc, fl); |
97 | xop->fl = fl; | 99 | xop->fl = fl; |
98 | xop->file = file; | 100 | xop->file = file; |
99 | } else | 101 | } else { |
102 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
100 | xop->callback = NULL; | 103 | xop->callback = NULL; |
104 | } | ||
101 | 105 | ||
102 | send_op(op); | 106 | send_op(op); |
103 | 107 | ||
@@ -203,7 +207,10 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name, | |||
203 | op->info.number = name->ln_number; | 207 | op->info.number = name->ln_number; |
204 | op->info.start = fl->fl_start; | 208 | op->info.start = fl->fl_start; |
205 | op->info.end = fl->fl_end; | 209 | op->info.end = fl->fl_end; |
206 | op->info.owner = (__u64)(long) fl->fl_owner; | 210 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
211 | op->info.owner = (__u64) fl->fl_pid; | ||
212 | else | ||
213 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
207 | 214 | ||
208 | send_op(op); | 215 | send_op(op); |
209 | wait_event(recv_wq, (op->done != 0)); | 216 | wait_event(recv_wq, (op->done != 0)); |
@@ -242,7 +249,10 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, | |||
242 | op->info.number = name->ln_number; | 249 | op->info.number = name->ln_number; |
243 | op->info.start = fl->fl_start; | 250 | op->info.start = fl->fl_start; |
244 | op->info.end = fl->fl_end; | 251 | op->info.end = fl->fl_end; |
245 | op->info.owner = (__u64)(long) fl->fl_owner; | 252 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
253 | op->info.owner = (__u64) fl->fl_pid; | ||
254 | else | ||
255 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
246 | 256 | ||
247 | send_op(op); | 257 | send_op(op); |
248 | wait_event(recv_wq, (op->done != 0)); | 258 | wait_event(recv_wq, (op->done != 0)); |
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index ae9e6a25fe2b..a87b09839761 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
@@ -189,51 +189,39 @@ static struct kobj_type gdlm_ktype = { | |||
189 | .sysfs_ops = &gdlm_attr_ops, | 189 | .sysfs_ops = &gdlm_attr_ops, |
190 | }; | 190 | }; |
191 | 191 | ||
192 | static struct kset gdlm_kset = { | 192 | static struct kset *gdlm_kset; |
193 | .ktype = &gdlm_ktype, | ||
194 | }; | ||
195 | 193 | ||
196 | int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj) | 194 | int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj) |
197 | { | 195 | { |
198 | int error; | 196 | int error; |
199 | 197 | ||
200 | error = kobject_set_name(&ls->kobj, "%s", "lock_module"); | 198 | ls->kobj.kset = gdlm_kset; |
201 | if (error) { | 199 | error = kobject_init_and_add(&ls->kobj, &gdlm_ktype, fskobj, |
202 | log_error("can't set kobj name %d", error); | 200 | "lock_module"); |
203 | return error; | ||
204 | } | ||
205 | |||
206 | ls->kobj.kset = &gdlm_kset; | ||
207 | ls->kobj.ktype = &gdlm_ktype; | ||
208 | ls->kobj.parent = fskobj; | ||
209 | |||
210 | error = kobject_register(&ls->kobj); | ||
211 | if (error) | 201 | if (error) |
212 | log_error("can't register kobj %d", error); | 202 | log_error("can't register kobj %d", error); |
203 | kobject_uevent(&ls->kobj, KOBJ_ADD); | ||
213 | 204 | ||
214 | return error; | 205 | return error; |
215 | } | 206 | } |
216 | 207 | ||
217 | void gdlm_kobject_release(struct gdlm_ls *ls) | 208 | void gdlm_kobject_release(struct gdlm_ls *ls) |
218 | { | 209 | { |
219 | kobject_unregister(&ls->kobj); | 210 | kobject_put(&ls->kobj); |
220 | } | 211 | } |
221 | 212 | ||
222 | int gdlm_sysfs_init(void) | 213 | int gdlm_sysfs_init(void) |
223 | { | 214 | { |
224 | int error; | 215 | gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); |
225 | 216 | if (!gdlm_kset) { | |
226 | kobject_set_name(&gdlm_kset.kobj, "lock_dlm"); | 217 | printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); |
227 | kobj_set_kset_s(&gdlm_kset, kernel_subsys); | 218 | return -ENOMEM; |
228 | error = kset_register(&gdlm_kset); | 219 | } |
229 | if (error) | 220 | return 0; |
230 | printk("lock_dlm: cannot register kset %d\n", error); | ||
231 | |||
232 | return error; | ||
233 | } | 221 | } |
234 | 222 | ||
235 | void gdlm_sysfs_exit(void) | 223 | void gdlm_sysfs_exit(void) |
236 | { | 224 | { |
237 | kset_unregister(&gdlm_kset); | 225 | kset_unregister(gdlm_kset); |
238 | } | 226 | } |
239 | 227 | ||
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index bd938f06481d..521694fc19d6 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c | |||
@@ -273,18 +273,13 @@ static int gdlm_thread(void *data, int blist) | |||
273 | struct gdlm_ls *ls = (struct gdlm_ls *) data; | 273 | struct gdlm_ls *ls = (struct gdlm_ls *) data; |
274 | struct gdlm_lock *lp = NULL; | 274 | struct gdlm_lock *lp = NULL; |
275 | uint8_t complete, blocking, submit, drop; | 275 | uint8_t complete, blocking, submit, drop; |
276 | DECLARE_WAITQUEUE(wait, current); | ||
277 | 276 | ||
278 | /* Only thread1 is allowed to do blocking callbacks since gfs | 277 | /* Only thread1 is allowed to do blocking callbacks since gfs |
279 | may wait for a completion callback within a blocking cb. */ | 278 | may wait for a completion callback within a blocking cb. */ |
280 | 279 | ||
281 | while (!kthread_should_stop()) { | 280 | while (!kthread_should_stop()) { |
282 | set_current_state(TASK_INTERRUPTIBLE); | 281 | wait_event_interruptible(ls->thread_wait, |
283 | add_wait_queue(&ls->thread_wait, &wait); | 282 | !no_work(ls, blist) || kthread_should_stop()); |
284 | if (no_work(ls, blist)) | ||
285 | schedule(); | ||
286 | remove_wait_queue(&ls->thread_wait, &wait); | ||
287 | set_current_state(TASK_RUNNING); | ||
288 | 283 | ||
289 | complete = blocking = submit = drop = 0; | 284 | complete = blocking = submit = drop = 0; |
290 | 285 | ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 7df702473252..161ab6f2058e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/crc32.h> | 16 | #include <linux/crc32.h> |
17 | #include <linux/lm_interface.h> | 17 | #include <linux/lm_interface.h> |
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/kthread.h> | ||
20 | #include <linux/freezer.h> | ||
19 | 21 | ||
20 | #include "gfs2.h" | 22 | #include "gfs2.h" |
21 | #include "incore.h" | 23 | #include "incore.h" |
@@ -68,14 +70,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | |||
68 | * | 70 | * |
69 | */ | 71 | */ |
70 | 72 | ||
71 | void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd) | 73 | void gfs2_remove_from_ail(struct gfs2_bufdata *bd) |
72 | { | 74 | { |
73 | bd->bd_ail = NULL; | 75 | bd->bd_ail = NULL; |
74 | list_del_init(&bd->bd_ail_st_list); | 76 | list_del_init(&bd->bd_ail_st_list); |
75 | list_del_init(&bd->bd_ail_gl_list); | 77 | list_del_init(&bd->bd_ail_gl_list); |
76 | atomic_dec(&bd->bd_gl->gl_ail_count); | 78 | atomic_dec(&bd->bd_gl->gl_ail_count); |
77 | if (mapping) | ||
78 | gfs2_meta_cache_flush(GFS2_I(mapping->host)); | ||
79 | brelse(bd->bd_bh); | 79 | brelse(bd->bd_bh); |
80 | } | 80 | } |
81 | 81 | ||
@@ -92,8 +92,6 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
92 | struct buffer_head *bh; | 92 | struct buffer_head *bh; |
93 | int retry; | 93 | int retry; |
94 | 94 | ||
95 | BUG_ON(!spin_is_locked(&sdp->sd_log_lock)); | ||
96 | |||
97 | do { | 95 | do { |
98 | retry = 0; | 96 | retry = 0; |
99 | 97 | ||
@@ -210,7 +208,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) | |||
210 | gfs2_log_unlock(sdp); | 208 | gfs2_log_unlock(sdp); |
211 | } | 209 | } |
212 | 210 | ||
213 | int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) | 211 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) |
214 | { | 212 | { |
215 | struct gfs2_ail *ai, *s; | 213 | struct gfs2_ail *ai, *s; |
216 | int ret; | 214 | int ret; |
@@ -248,7 +246,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
248 | bd = list_entry(head->prev, struct gfs2_bufdata, | 246 | bd = list_entry(head->prev, struct gfs2_bufdata, |
249 | bd_ail_st_list); | 247 | bd_ail_st_list); |
250 | gfs2_assert(sdp, bd->bd_ail == ai); | 248 | gfs2_assert(sdp, bd->bd_ail == ai); |
251 | gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd); | 249 | gfs2_remove_from_ail(bd); |
252 | } | 250 | } |
253 | } | 251 | } |
254 | 252 | ||
@@ -303,7 +301,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | |||
303 | 301 | ||
304 | mutex_lock(&sdp->sd_log_reserve_mutex); | 302 | mutex_lock(&sdp->sd_log_reserve_mutex); |
305 | gfs2_log_lock(sdp); | 303 | gfs2_log_lock(sdp); |
306 | while(sdp->sd_log_blks_free <= (blks + reserved_blks)) { | 304 | while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) { |
307 | gfs2_log_unlock(sdp); | 305 | gfs2_log_unlock(sdp); |
308 | gfs2_ail1_empty(sdp, 0); | 306 | gfs2_ail1_empty(sdp, 0); |
309 | gfs2_log_flush(sdp, NULL); | 307 | gfs2_log_flush(sdp, NULL); |
@@ -312,7 +310,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | |||
312 | gfs2_ail1_start(sdp, 0); | 310 | gfs2_ail1_start(sdp, 0); |
313 | gfs2_log_lock(sdp); | 311 | gfs2_log_lock(sdp); |
314 | } | 312 | } |
315 | sdp->sd_log_blks_free -= blks; | 313 | atomic_sub(blks, &sdp->sd_log_blks_free); |
316 | gfs2_log_unlock(sdp); | 314 | gfs2_log_unlock(sdp); |
317 | mutex_unlock(&sdp->sd_log_reserve_mutex); | 315 | mutex_unlock(&sdp->sd_log_reserve_mutex); |
318 | 316 | ||
@@ -332,27 +330,23 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) | |||
332 | { | 330 | { |
333 | 331 | ||
334 | gfs2_log_lock(sdp); | 332 | gfs2_log_lock(sdp); |
335 | sdp->sd_log_blks_free += blks; | 333 | atomic_add(blks, &sdp->sd_log_blks_free); |
336 | gfs2_assert_withdraw(sdp, | 334 | gfs2_assert_withdraw(sdp, |
337 | sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); | 335 | atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); |
338 | gfs2_log_unlock(sdp); | 336 | gfs2_log_unlock(sdp); |
339 | up_read(&sdp->sd_log_flush_lock); | 337 | up_read(&sdp->sd_log_flush_lock); |
340 | } | 338 | } |
341 | 339 | ||
342 | static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) | 340 | static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) |
343 | { | 341 | { |
344 | struct inode *inode = sdp->sd_jdesc->jd_inode; | 342 | struct gfs2_journal_extent *je; |
345 | int error; | 343 | |
346 | struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; | 344 | list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { |
347 | 345 | if (lbn >= je->lblock && lbn < je->lblock + je->blocks) | |
348 | bh_map.b_size = 1 << inode->i_blkbits; | 346 | return je->dblock + lbn - je->lblock; |
349 | error = gfs2_block_map(inode, lbn, 0, &bh_map); | 347 | } |
350 | if (error || !bh_map.b_blocknr) | 348 | |
351 | printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, | 349 | return -1; |
352 | (unsigned long long)bh_map.b_blocknr, lbn); | ||
353 | gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr); | ||
354 | |||
355 | return bh_map.b_blocknr; | ||
356 | } | 350 | } |
357 | 351 | ||
358 | /** | 352 | /** |
@@ -561,8 +555,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
561 | ail2_empty(sdp, new_tail); | 555 | ail2_empty(sdp, new_tail); |
562 | 556 | ||
563 | gfs2_log_lock(sdp); | 557 | gfs2_log_lock(sdp); |
564 | sdp->sd_log_blks_free += dist; | 558 | atomic_add(dist, &sdp->sd_log_blks_free); |
565 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); | 559 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); |
566 | gfs2_log_unlock(sdp); | 560 | gfs2_log_unlock(sdp); |
567 | 561 | ||
568 | sdp->sd_log_tail = new_tail; | 562 | sdp->sd_log_tail = new_tail; |
@@ -652,7 +646,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) | |||
652 | get_bh(bh); | 646 | get_bh(bh); |
653 | gfs2_log_unlock(sdp); | 647 | gfs2_log_unlock(sdp); |
654 | lock_buffer(bh); | 648 | lock_buffer(bh); |
655 | if (test_clear_buffer_dirty(bh)) { | 649 | if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { |
656 | bh->b_end_io = end_buffer_write_sync; | 650 | bh->b_end_io = end_buffer_write_sync; |
657 | submit_bh(WRITE, bh); | 651 | submit_bh(WRITE, bh); |
658 | } else { | 652 | } else { |
@@ -694,20 +688,16 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) | |||
694 | * | 688 | * |
695 | */ | 689 | */ |
696 | 690 | ||
697 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | 691 | void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) |
698 | { | 692 | { |
699 | struct gfs2_ail *ai; | 693 | struct gfs2_ail *ai; |
700 | 694 | ||
701 | down_write(&sdp->sd_log_flush_lock); | 695 | down_write(&sdp->sd_log_flush_lock); |
702 | 696 | ||
703 | if (gl) { | 697 | /* Log might have been flushed while we waited for the flush lock */ |
704 | gfs2_log_lock(sdp); | 698 | if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) { |
705 | if (list_empty(&gl->gl_le.le_list)) { | 699 | up_write(&sdp->sd_log_flush_lock); |
706 | gfs2_log_unlock(sdp); | 700 | return; |
707 | up_write(&sdp->sd_log_flush_lock); | ||
708 | return; | ||
709 | } | ||
710 | gfs2_log_unlock(sdp); | ||
711 | } | 701 | } |
712 | 702 | ||
713 | ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); | 703 | ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); |
@@ -739,7 +729,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
739 | log_flush_commit(sdp); | 729 | log_flush_commit(sdp); |
740 | else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ | 730 | else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ |
741 | gfs2_log_lock(sdp); | 731 | gfs2_log_lock(sdp); |
742 | sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */ | 732 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ |
743 | gfs2_log_unlock(sdp); | 733 | gfs2_log_unlock(sdp); |
744 | log_write_header(sdp, 0, PULL); | 734 | log_write_header(sdp, 0, PULL); |
745 | } | 735 | } |
@@ -767,7 +757,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
767 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 757 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
768 | { | 758 | { |
769 | unsigned int reserved; | 759 | unsigned int reserved; |
770 | unsigned int old; | 760 | unsigned int unused; |
771 | 761 | ||
772 | gfs2_log_lock(sdp); | 762 | gfs2_log_lock(sdp); |
773 | 763 | ||
@@ -779,14 +769,11 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
779 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; | 769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; |
780 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); | 770 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); |
781 | reserved = calc_reserved(sdp); | 771 | reserved = calc_reserved(sdp); |
782 | old = sdp->sd_log_blks_free; | 772 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; |
783 | sdp->sd_log_blks_free += tr->tr_reserved - | 773 | gfs2_assert_withdraw(sdp, unused >= 0); |
784 | (reserved - sdp->sd_log_blks_reserved); | 774 | atomic_add(unused, &sdp->sd_log_blks_free); |
785 | 775 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= | |
786 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old); | ||
787 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= | ||
788 | sdp->sd_jdesc->jd_blocks); | 776 | sdp->sd_jdesc->jd_blocks); |
789 | |||
790 | sdp->sd_log_blks_reserved = reserved; | 777 | sdp->sd_log_blks_reserved = reserved; |
791 | 778 | ||
792 | gfs2_log_unlock(sdp); | 779 | gfs2_log_unlock(sdp); |
@@ -825,7 +812,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) | |||
825 | down_write(&sdp->sd_log_flush_lock); | 812 | down_write(&sdp->sd_log_flush_lock); |
826 | 813 | ||
827 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); | 814 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); |
828 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); | ||
829 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); | 815 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); |
830 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | 816 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
831 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); | 817 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); |
@@ -838,7 +824,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) | |||
838 | log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, | 824 | log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, |
839 | (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); | 825 | (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); |
840 | 826 | ||
841 | gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks); | 827 | gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); |
842 | gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); | 828 | gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); |
843 | gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); | 829 | gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); |
844 | 830 | ||
@@ -866,3 +852,42 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) | |||
866 | } | 852 | } |
867 | } | 853 | } |
868 | 854 | ||
855 | |||
856 | /** | ||
857 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks | ||
858 | * @sdp: Pointer to GFS2 superblock | ||
859 | * | ||
860 | * Also, periodically check to make sure that we're using the most recent | ||
861 | * journal index. | ||
862 | */ | ||
863 | |||
864 | int gfs2_logd(void *data) | ||
865 | { | ||
866 | struct gfs2_sbd *sdp = data; | ||
867 | unsigned long t; | ||
868 | int need_flush; | ||
869 | |||
870 | while (!kthread_should_stop()) { | ||
871 | /* Advance the log tail */ | ||
872 | |||
873 | t = sdp->sd_log_flush_time + | ||
874 | gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; | ||
875 | |||
876 | gfs2_ail1_empty(sdp, DIO_ALL); | ||
877 | gfs2_log_lock(sdp); | ||
878 | need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); | ||
879 | gfs2_log_unlock(sdp); | ||
880 | if (need_flush || time_after_eq(jiffies, t)) { | ||
881 | gfs2_log_flush(sdp, NULL); | ||
882 | sdp->sd_log_flush_time = jiffies; | ||
883 | } | ||
884 | |||
885 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; | ||
886 | if (freezing(current)) | ||
887 | refrigerator(); | ||
888 | schedule_timeout_interruptible(t); | ||
889 | } | ||
890 | |||
891 | return 0; | ||
892 | } | ||
893 | |||
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index dae282400627..771152816508 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h | |||
@@ -48,8 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, | |||
48 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | 48 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, |
49 | unsigned int ssize); | 49 | unsigned int ssize); |
50 | 50 | ||
51 | int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags); | ||
52 | |||
53 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); | 51 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); |
54 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); | 52 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); |
55 | void gfs2_log_incr_head(struct gfs2_sbd *sdp); | 53 | void gfs2_log_incr_head(struct gfs2_sbd *sdp); |
@@ -57,11 +55,19 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp); | |||
57 | struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); | 55 | struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); |
58 | struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, | 56 | struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, |
59 | struct buffer_head *real); | 57 | struct buffer_head *real); |
60 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); | 58 | void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); |
59 | |||
60 | static inline void gfs2_log_flush(struct gfs2_sbd *sbd, struct gfs2_glock *gl) | ||
61 | { | ||
62 | if (!gl || test_bit(GLF_LFLUSH, &gl->gl_flags)) | ||
63 | __gfs2_log_flush(sbd, gl); | ||
64 | } | ||
65 | |||
61 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); | 66 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); |
62 | void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd); | 67 | void gfs2_remove_from_ail(struct gfs2_bufdata *bd); |
63 | 68 | ||
64 | void gfs2_log_shutdown(struct gfs2_sbd *sdp); | 69 | void gfs2_log_shutdown(struct gfs2_sbd *sdp); |
65 | void gfs2_meta_syncfs(struct gfs2_sbd *sdp); | 70 | void gfs2_meta_syncfs(struct gfs2_sbd *sdp); |
71 | int gfs2_logd(void *data); | ||
66 | 72 | ||
67 | #endif /* __LOG_DOT_H__ */ | 73 | #endif /* __LOG_DOT_H__ */ |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6c27cea761c6..fae59d69d01a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -87,6 +87,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
87 | } | 87 | } |
88 | bd->bd_ail = ai; | 88 | bd->bd_ail = ai; |
89 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); | 89 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); |
90 | clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); | ||
90 | gfs2_log_unlock(sdp); | 91 | gfs2_log_unlock(sdp); |
91 | unlock_buffer(bh); | 92 | unlock_buffer(bh); |
92 | } | 93 | } |
@@ -124,49 +125,6 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) | |||
124 | return bh; | 125 | return bh; |
125 | } | 126 | } |
126 | 127 | ||
127 | static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
128 | { | ||
129 | struct gfs2_glock *gl; | ||
130 | struct gfs2_trans *tr = current->journal_info; | ||
131 | |||
132 | tr->tr_touched = 1; | ||
133 | |||
134 | gl = container_of(le, struct gfs2_glock, gl_le); | ||
135 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) | ||
136 | return; | ||
137 | |||
138 | if (!list_empty(&le->le_list)) | ||
139 | return; | ||
140 | |||
141 | gfs2_glock_hold(gl); | ||
142 | set_bit(GLF_DIRTY, &gl->gl_flags); | ||
143 | sdp->sd_log_num_gl++; | ||
144 | list_add(&le->le_list, &sdp->sd_log_le_gl); | ||
145 | } | ||
146 | |||
147 | static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
148 | { | ||
149 | gfs2_log_lock(sdp); | ||
150 | __glock_lo_add(sdp, le); | ||
151 | gfs2_log_unlock(sdp); | ||
152 | } | ||
153 | |||
154 | static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
155 | { | ||
156 | struct list_head *head = &sdp->sd_log_le_gl; | ||
157 | struct gfs2_glock *gl; | ||
158 | |||
159 | while (!list_empty(head)) { | ||
160 | gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list); | ||
161 | list_del_init(&gl->gl_le.le_list); | ||
162 | sdp->sd_log_num_gl--; | ||
163 | |||
164 | gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)); | ||
165 | gfs2_glock_put(gl); | ||
166 | } | ||
167 | gfs2_assert_warn(sdp, !sdp->sd_log_num_gl); | ||
168 | } | ||
169 | |||
170 | static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | 128 | static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) |
171 | { | 129 | { |
172 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); | 130 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); |
@@ -182,7 +140,8 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
182 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 140 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); |
183 | if (!list_empty(&le->le_list)) | 141 | if (!list_empty(&le->le_list)) |
184 | goto out; | 142 | goto out; |
185 | __glock_lo_add(sdp, &bd->bd_gl->gl_le); | 143 | set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); |
144 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | ||
186 | gfs2_meta_check(sdp, bd->bd_bh); | 145 | gfs2_meta_check(sdp, bd->bd_bh); |
187 | gfs2_pin(sdp, bd->bd_bh); | 146 | gfs2_pin(sdp, bd->bd_bh); |
188 | sdp->sd_log_num_buf++; | 147 | sdp->sd_log_num_buf++; |
@@ -556,17 +515,20 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
556 | 515 | ||
557 | lock_buffer(bd->bd_bh); | 516 | lock_buffer(bd->bd_bh); |
558 | gfs2_log_lock(sdp); | 517 | gfs2_log_lock(sdp); |
559 | if (!list_empty(&bd->bd_list_tr)) | 518 | if (tr) { |
560 | goto out; | 519 | if (!list_empty(&bd->bd_list_tr)) |
561 | tr->tr_touched = 1; | 520 | goto out; |
562 | if (gfs2_is_jdata(ip)) { | 521 | tr->tr_touched = 1; |
563 | tr->tr_num_buf++; | 522 | if (gfs2_is_jdata(ip)) { |
564 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 523 | tr->tr_num_buf++; |
524 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | ||
525 | } | ||
565 | } | 526 | } |
566 | if (!list_empty(&le->le_list)) | 527 | if (!list_empty(&le->le_list)) |
567 | goto out; | 528 | goto out; |
568 | 529 | ||
569 | __glock_lo_add(sdp, &bd->bd_gl->gl_le); | 530 | set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); |
531 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | ||
570 | if (gfs2_is_jdata(ip)) { | 532 | if (gfs2_is_jdata(ip)) { |
571 | gfs2_pin(sdp, bd->bd_bh); | 533 | gfs2_pin(sdp, bd->bd_bh); |
572 | tr->tr_num_databuf_new++; | 534 | tr->tr_num_databuf_new++; |
@@ -773,12 +735,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
773 | } | 735 | } |
774 | 736 | ||
775 | 737 | ||
776 | const struct gfs2_log_operations gfs2_glock_lops = { | ||
777 | .lo_add = glock_lo_add, | ||
778 | .lo_after_commit = glock_lo_after_commit, | ||
779 | .lo_name = "glock", | ||
780 | }; | ||
781 | |||
782 | const struct gfs2_log_operations gfs2_buf_lops = { | 738 | const struct gfs2_log_operations gfs2_buf_lops = { |
783 | .lo_add = buf_lo_add, | 739 | .lo_add = buf_lo_add, |
784 | .lo_incore_commit = buf_lo_incore_commit, | 740 | .lo_incore_commit = buf_lo_incore_commit, |
@@ -816,7 +772,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = { | |||
816 | }; | 772 | }; |
817 | 773 | ||
818 | const struct gfs2_log_operations *gfs2_log_ops[] = { | 774 | const struct gfs2_log_operations *gfs2_log_ops[] = { |
819 | &gfs2_glock_lops, | ||
820 | &gfs2_databuf_lops, | 775 | &gfs2_databuf_lops, |
821 | &gfs2_buf_lops, | 776 | &gfs2_buf_lops, |
822 | &gfs2_rg_lops, | 777 | &gfs2_rg_lops, |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7ecfe0d3a491..9c7765c12d62 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -29,9 +29,8 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) | |||
29 | struct gfs2_inode *ip = foo; | 29 | struct gfs2_inode *ip = foo; |
30 | 30 | ||
31 | inode_init_once(&ip->i_inode); | 31 | inode_init_once(&ip->i_inode); |
32 | spin_lock_init(&ip->i_spin); | ||
33 | init_rwsem(&ip->i_rw_mutex); | 32 | init_rwsem(&ip->i_rw_mutex); |
34 | memset(ip->i_cache, 0, sizeof(ip->i_cache)); | 33 | ip->i_alloc = NULL; |
35 | } | 34 | } |
36 | 35 | ||
37 | static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) | 36 | static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 4da423985e4f..85aea27b4a86 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -50,6 +50,7 @@ static int gfs2_aspace_writepage(struct page *page, | |||
50 | static const struct address_space_operations aspace_aops = { | 50 | static const struct address_space_operations aspace_aops = { |
51 | .writepage = gfs2_aspace_writepage, | 51 | .writepage = gfs2_aspace_writepage, |
52 | .releasepage = gfs2_releasepage, | 52 | .releasepage = gfs2_releasepage, |
53 | .sync_page = block_sync_page, | ||
53 | }; | 54 | }; |
54 | 55 | ||
55 | /** | 56 | /** |
@@ -221,13 +222,14 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | |||
221 | struct buffer_head **bhp) | 222 | struct buffer_head **bhp) |
222 | { | 223 | { |
223 | *bhp = getbuf(gl, blkno, CREATE); | 224 | *bhp = getbuf(gl, blkno, CREATE); |
224 | if (!buffer_uptodate(*bhp)) | 225 | if (!buffer_uptodate(*bhp)) { |
225 | ll_rw_block(READ_META, 1, bhp); | 226 | ll_rw_block(READ_META, 1, bhp); |
226 | if (flags & DIO_WAIT) { | 227 | if (flags & DIO_WAIT) { |
227 | int error = gfs2_meta_wait(gl->gl_sbd, *bhp); | 228 | int error = gfs2_meta_wait(gl->gl_sbd, *bhp); |
228 | if (error) { | 229 | if (error) { |
229 | brelse(*bhp); | 230 | brelse(*bhp); |
230 | return error; | 231 | return error; |
232 | } | ||
231 | } | 233 | } |
232 | } | 234 | } |
233 | 235 | ||
@@ -282,7 +284,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, | |||
282 | return; | 284 | return; |
283 | } | 285 | } |
284 | 286 | ||
285 | bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL), | 287 | bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL); |
286 | bd->bd_bh = bh; | 288 | bd->bd_bh = bh; |
287 | bd->bd_gl = gl; | 289 | bd->bd_gl = gl; |
288 | 290 | ||
@@ -317,7 +319,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
317 | } | 319 | } |
318 | if (bd) { | 320 | if (bd) { |
319 | if (bd->bd_ail) { | 321 | if (bd->bd_ail) { |
320 | gfs2_remove_from_ail(NULL, bd); | 322 | gfs2_remove_from_ail(bd); |
321 | bh->b_private = NULL; | 323 | bh->b_private = NULL; |
322 | bd->bd_bh = NULL; | 324 | bd->bd_bh = NULL; |
323 | bd->bd_blkno = bh->b_blocknr; | 325 | bd->bd_blkno = bh->b_blocknr; |
@@ -358,32 +360,6 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
358 | } | 360 | } |
359 | 361 | ||
360 | /** | 362 | /** |
361 | * gfs2_meta_cache_flush - get rid of any references on buffers for this inode | ||
362 | * @ip: The GFS2 inode | ||
363 | * | ||
364 | * This releases buffers that are in the most-recently-used array of | ||
365 | * blocks used for indirect block addressing for this inode. | ||
366 | */ | ||
367 | |||
368 | void gfs2_meta_cache_flush(struct gfs2_inode *ip) | ||
369 | { | ||
370 | struct buffer_head **bh_slot; | ||
371 | unsigned int x; | ||
372 | |||
373 | spin_lock(&ip->i_spin); | ||
374 | |||
375 | for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { | ||
376 | bh_slot = &ip->i_cache[x]; | ||
377 | if (*bh_slot) { | ||
378 | brelse(*bh_slot); | ||
379 | *bh_slot = NULL; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | spin_unlock(&ip->i_spin); | ||
384 | } | ||
385 | |||
386 | /** | ||
387 | * gfs2_meta_indirect_buffer - Get a metadata buffer | 363 | * gfs2_meta_indirect_buffer - Get a metadata buffer |
388 | * @ip: The GFS2 inode | 364 | * @ip: The GFS2 inode |
389 | * @height: The level of this buf in the metadata (indir addr) tree (if any) | 365 | * @height: The level of this buf in the metadata (indir addr) tree (if any) |
@@ -391,8 +367,6 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip) | |||
391 | * @new: Non-zero if we may create a new buffer | 367 | * @new: Non-zero if we may create a new buffer |
392 | * @bhp: the buffer is returned here | 368 | * @bhp: the buffer is returned here |
393 | * | 369 | * |
394 | * Try to use the gfs2_inode's MRU metadata tree cache. | ||
395 | * | ||
396 | * Returns: errno | 370 | * Returns: errno |
397 | */ | 371 | */ |
398 | 372 | ||
@@ -401,58 +375,25 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, | |||
401 | { | 375 | { |
402 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 376 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
403 | struct gfs2_glock *gl = ip->i_gl; | 377 | struct gfs2_glock *gl = ip->i_gl; |
404 | struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height; | 378 | struct buffer_head *bh; |
405 | int in_cache = 0; | 379 | int ret = 0; |
406 | |||
407 | BUG_ON(!gl); | ||
408 | BUG_ON(!sdp); | ||
409 | |||
410 | spin_lock(&ip->i_spin); | ||
411 | if (*bh_slot && (*bh_slot)->b_blocknr == num) { | ||
412 | bh = *bh_slot; | ||
413 | get_bh(bh); | ||
414 | in_cache = 1; | ||
415 | } | ||
416 | spin_unlock(&ip->i_spin); | ||
417 | |||
418 | if (!bh) | ||
419 | bh = getbuf(gl, num, CREATE); | ||
420 | |||
421 | if (!bh) | ||
422 | return -ENOBUFS; | ||
423 | 380 | ||
424 | if (new) { | 381 | if (new) { |
425 | if (gfs2_assert_warn(sdp, height)) | 382 | BUG_ON(height == 0); |
426 | goto err; | 383 | bh = gfs2_meta_new(gl, num); |
427 | meta_prep_new(bh); | ||
428 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 384 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
429 | gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); | 385 | gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); |
430 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); | 386 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); |
431 | } else { | 387 | } else { |
432 | u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; | 388 | u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; |
433 | if (!buffer_uptodate(bh)) { | 389 | ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh); |
434 | ll_rw_block(READ_META, 1, &bh); | 390 | if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { |
435 | if (gfs2_meta_wait(sdp, bh)) | 391 | brelse(bh); |
436 | goto err; | 392 | ret = -EIO; |
437 | } | 393 | } |
438 | if (gfs2_metatype_check(sdp, bh, mtype)) | ||
439 | goto err; | ||
440 | } | ||
441 | |||
442 | if (!in_cache) { | ||
443 | spin_lock(&ip->i_spin); | ||
444 | if (*bh_slot) | ||
445 | brelse(*bh_slot); | ||
446 | *bh_slot = bh; | ||
447 | get_bh(bh); | ||
448 | spin_unlock(&ip->i_spin); | ||
449 | } | 394 | } |
450 | |||
451 | *bhp = bh; | 395 | *bhp = bh; |
452 | return 0; | 396 | return ret; |
453 | err: | ||
454 | brelse(bh); | ||
455 | return -EIO; | ||
456 | } | 397 | } |
457 | 398 | ||
458 | /** | 399 | /** |
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index b7048222ebb4..73e3b1c76fe1 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h | |||
@@ -56,7 +56,6 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, | |||
56 | 56 | ||
57 | void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); | 57 | void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); |
58 | 58 | ||
59 | void gfs2_meta_cache_flush(struct gfs2_inode *ip); | ||
60 | int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, | 59 | int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, |
61 | int new, struct buffer_head **bhp); | 60 | int new, struct buffer_head **bhp); |
62 | 61 | ||
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 9679f8b9870d..38dbe99a30ed 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/backing-dev.h> | ||
24 | #include <linux/pagevec.h> | ||
23 | 25 | ||
24 | #include "gfs2.h" | 26 | #include "gfs2.h" |
25 | #include "incore.h" | 27 | #include "incore.h" |
@@ -32,7 +34,6 @@ | |||
32 | #include "quota.h" | 34 | #include "quota.h" |
33 | #include "trans.h" | 35 | #include "trans.h" |
34 | #include "rgrp.h" | 36 | #include "rgrp.h" |
35 | #include "ops_file.h" | ||
36 | #include "super.h" | 37 | #include "super.h" |
37 | #include "util.h" | 38 | #include "util.h" |
38 | #include "glops.h" | 39 | #include "glops.h" |
@@ -58,22 +59,6 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | |||
58 | } | 59 | } |
59 | 60 | ||
60 | /** | 61 | /** |
61 | * gfs2_get_block - Fills in a buffer head with details about a block | ||
62 | * @inode: The inode | ||
63 | * @lblock: The block number to look up | ||
64 | * @bh_result: The buffer head to return the result in | ||
65 | * @create: Non-zero if we may add block to the file | ||
66 | * | ||
67 | * Returns: errno | ||
68 | */ | ||
69 | |||
70 | int gfs2_get_block(struct inode *inode, sector_t lblock, | ||
71 | struct buffer_head *bh_result, int create) | ||
72 | { | ||
73 | return gfs2_block_map(inode, lblock, create, bh_result); | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * gfs2_get_block_noalloc - Fills in a buffer head with details about a block | 62 | * gfs2_get_block_noalloc - Fills in a buffer head with details about a block |
78 | * @inode: The inode | 63 | * @inode: The inode |
79 | * @lblock: The block number to look up | 64 | * @lblock: The block number to look up |
@@ -88,7 +73,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | |||
88 | { | 73 | { |
89 | int error; | 74 | int error; |
90 | 75 | ||
91 | error = gfs2_block_map(inode, lblock, 0, bh_result); | 76 | error = gfs2_block_map(inode, lblock, bh_result, 0); |
92 | if (error) | 77 | if (error) |
93 | return error; | 78 | return error; |
94 | if (!buffer_mapped(bh_result)) | 79 | if (!buffer_mapped(bh_result)) |
@@ -99,20 +84,19 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | |||
99 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, | 84 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, |
100 | struct buffer_head *bh_result, int create) | 85 | struct buffer_head *bh_result, int create) |
101 | { | 86 | { |
102 | return gfs2_block_map(inode, lblock, 0, bh_result); | 87 | return gfs2_block_map(inode, lblock, bh_result, 0); |
103 | } | 88 | } |
104 | 89 | ||
105 | /** | 90 | /** |
106 | * gfs2_writepage - Write complete page | 91 | * gfs2_writepage_common - Common bits of writepage |
107 | * @page: Page to write | 92 | * @page: The page to be written |
93 | * @wbc: The writeback control | ||
108 | * | 94 | * |
109 | * Returns: errno | 95 | * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. |
110 | * | ||
111 | * Some of this is copied from block_write_full_page() although we still | ||
112 | * call it to do most of the work. | ||
113 | */ | 96 | */ |
114 | 97 | ||
115 | static int gfs2_writepage(struct page *page, struct writeback_control *wbc) | 98 | static int gfs2_writepage_common(struct page *page, |
99 | struct writeback_control *wbc) | ||
116 | { | 100 | { |
117 | struct inode *inode = page->mapping->host; | 101 | struct inode *inode = page->mapping->host; |
118 | struct gfs2_inode *ip = GFS2_I(inode); | 102 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -120,41 +104,133 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) | |||
120 | loff_t i_size = i_size_read(inode); | 104 | loff_t i_size = i_size_read(inode); |
121 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 105 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
122 | unsigned offset; | 106 | unsigned offset; |
123 | int error; | 107 | int ret = -EIO; |
124 | int done_trans = 0; | ||
125 | 108 | ||
126 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { | 109 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) |
127 | unlock_page(page); | 110 | goto out; |
128 | return -EIO; | 111 | ret = 0; |
129 | } | ||
130 | if (current->journal_info) | 112 | if (current->journal_info) |
131 | goto out_ignore; | 113 | goto redirty; |
132 | |||
133 | /* Is the page fully outside i_size? (truncate in progress) */ | 114 | /* Is the page fully outside i_size? (truncate in progress) */ |
134 | offset = i_size & (PAGE_CACHE_SIZE-1); | 115 | offset = i_size & (PAGE_CACHE_SIZE-1); |
135 | if (page->index > end_index || (page->index == end_index && !offset)) { | 116 | if (page->index > end_index || (page->index == end_index && !offset)) { |
136 | page->mapping->a_ops->invalidatepage(page, 0); | 117 | page->mapping->a_ops->invalidatepage(page, 0); |
137 | unlock_page(page); | 118 | goto out; |
138 | return 0; /* don't care */ | 119 | } |
120 | return 1; | ||
121 | redirty: | ||
122 | redirty_page_for_writepage(wbc, page); | ||
123 | out: | ||
124 | unlock_page(page); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * gfs2_writeback_writepage - Write page for writeback mappings | ||
130 | * @page: The page | ||
131 | * @wbc: The writeback control | ||
132 | * | ||
133 | */ | ||
134 | |||
135 | static int gfs2_writeback_writepage(struct page *page, | ||
136 | struct writeback_control *wbc) | ||
137 | { | ||
138 | int ret; | ||
139 | |||
140 | ret = gfs2_writepage_common(page, wbc); | ||
141 | if (ret <= 0) | ||
142 | return ret; | ||
143 | |||
144 | ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); | ||
145 | if (ret == -EAGAIN) | ||
146 | ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
147 | return ret; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * gfs2_ordered_writepage - Write page for ordered data files | ||
152 | * @page: The page to write | ||
153 | * @wbc: The writeback control | ||
154 | * | ||
155 | */ | ||
156 | |||
157 | static int gfs2_ordered_writepage(struct page *page, | ||
158 | struct writeback_control *wbc) | ||
159 | { | ||
160 | struct inode *inode = page->mapping->host; | ||
161 | struct gfs2_inode *ip = GFS2_I(inode); | ||
162 | int ret; | ||
163 | |||
164 | ret = gfs2_writepage_common(page, wbc); | ||
165 | if (ret <= 0) | ||
166 | return ret; | ||
167 | |||
168 | if (!page_has_buffers(page)) { | ||
169 | create_empty_buffers(page, inode->i_sb->s_blocksize, | ||
170 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
139 | } | 171 | } |
172 | gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); | ||
173 | return block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
174 | } | ||
140 | 175 | ||
141 | if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) && | 176 | /** |
142 | PageChecked(page)) { | 177 | * __gfs2_jdata_writepage - The core of jdata writepage |
178 | * @page: The page to write | ||
179 | * @wbc: The writeback control | ||
180 | * | ||
181 | * This is shared between writepage and writepages and implements the | ||
182 | * core of the writepage operation. If a transaction is required then | ||
183 | * PageChecked will have been set and the transaction will have | ||
184 | * already been started before this is called. | ||
185 | */ | ||
186 | |||
187 | static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | ||
188 | { | ||
189 | struct inode *inode = page->mapping->host; | ||
190 | struct gfs2_inode *ip = GFS2_I(inode); | ||
191 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
192 | |||
193 | if (PageChecked(page)) { | ||
143 | ClearPageChecked(page); | 194 | ClearPageChecked(page); |
144 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | ||
145 | if (error) | ||
146 | goto out_ignore; | ||
147 | if (!page_has_buffers(page)) { | 195 | if (!page_has_buffers(page)) { |
148 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 196 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
149 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 197 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
150 | } | 198 | } |
151 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); | 199 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); |
200 | } | ||
201 | return block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * gfs2_jdata_writepage - Write complete page | ||
206 | * @page: Page to write | ||
207 | * | ||
208 | * Returns: errno | ||
209 | * | ||
210 | */ | ||
211 | |||
212 | static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | ||
213 | { | ||
214 | struct inode *inode = page->mapping->host; | ||
215 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
216 | int error; | ||
217 | int done_trans = 0; | ||
218 | |||
219 | error = gfs2_writepage_common(page, wbc); | ||
220 | if (error <= 0) | ||
221 | return error; | ||
222 | |||
223 | if (PageChecked(page)) { | ||
224 | if (wbc->sync_mode != WB_SYNC_ALL) | ||
225 | goto out_ignore; | ||
226 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | ||
227 | if (error) | ||
228 | goto out_ignore; | ||
152 | done_trans = 1; | 229 | done_trans = 1; |
153 | } | 230 | } |
154 | error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | 231 | error = __gfs2_jdata_writepage(page, wbc); |
155 | if (done_trans) | 232 | if (done_trans) |
156 | gfs2_trans_end(sdp); | 233 | gfs2_trans_end(sdp); |
157 | gfs2_meta_cache_flush(ip); | ||
158 | return error; | 234 | return error; |
159 | 235 | ||
160 | out_ignore: | 236 | out_ignore: |
@@ -164,29 +240,190 @@ out_ignore: | |||
164 | } | 240 | } |
165 | 241 | ||
166 | /** | 242 | /** |
167 | * gfs2_writepages - Write a bunch of dirty pages back to disk | 243 | * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk |
168 | * @mapping: The mapping to write | 244 | * @mapping: The mapping to write |
169 | * @wbc: Write-back control | 245 | * @wbc: Write-back control |
170 | * | 246 | * |
171 | * For journaled files and/or ordered writes this just falls back to the | 247 | * For the data=writeback case we can already ignore buffer heads |
172 | * kernel's default writepages path for now. We will probably want to change | ||
173 | * that eventually (i.e. when we look at allocate on flush). | ||
174 | * | ||
175 | * For the data=writeback case though we can already ignore buffer heads | ||
176 | * and write whole extents at once. This is a big reduction in the | 248 | * and write whole extents at once. This is a big reduction in the |
177 | * number of I/O requests we send and the bmap calls we make in this case. | 249 | * number of I/O requests we send and the bmap calls we make in this case. |
178 | */ | 250 | */ |
179 | static int gfs2_writepages(struct address_space *mapping, | 251 | static int gfs2_writeback_writepages(struct address_space *mapping, |
180 | struct writeback_control *wbc) | 252 | struct writeback_control *wbc) |
253 | { | ||
254 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages | ||
259 | * @mapping: The mapping | ||
260 | * @wbc: The writeback control | ||
261 | * @writepage: The writepage function to call for each page | ||
262 | * @pvec: The vector of pages | ||
263 | * @nr_pages: The number of pages to write | ||
264 | * | ||
265 | * Returns: non-zero if loop should terminate, zero otherwise | ||
266 | */ | ||
267 | |||
268 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, | ||
269 | struct writeback_control *wbc, | ||
270 | struct pagevec *pvec, | ||
271 | int nr_pages, pgoff_t end) | ||
181 | { | 272 | { |
182 | struct inode *inode = mapping->host; | 273 | struct inode *inode = mapping->host; |
183 | struct gfs2_inode *ip = GFS2_I(inode); | ||
184 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 274 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
275 | loff_t i_size = i_size_read(inode); | ||
276 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
277 | unsigned offset = i_size & (PAGE_CACHE_SIZE-1); | ||
278 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); | ||
279 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
280 | int i; | ||
281 | int ret; | ||
282 | |||
283 | ret = gfs2_trans_begin(sdp, nrblocks, 0); | ||
284 | if (ret < 0) | ||
285 | return ret; | ||
286 | |||
287 | for(i = 0; i < nr_pages; i++) { | ||
288 | struct page *page = pvec->pages[i]; | ||
289 | |||
290 | lock_page(page); | ||
291 | |||
292 | if (unlikely(page->mapping != mapping)) { | ||
293 | unlock_page(page); | ||
294 | continue; | ||
295 | } | ||
296 | |||
297 | if (!wbc->range_cyclic && page->index > end) { | ||
298 | ret = 1; | ||
299 | unlock_page(page); | ||
300 | continue; | ||
301 | } | ||
302 | |||
303 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
304 | wait_on_page_writeback(page); | ||
305 | |||
306 | if (PageWriteback(page) || | ||
307 | !clear_page_dirty_for_io(page)) { | ||
308 | unlock_page(page); | ||
309 | continue; | ||
310 | } | ||
311 | |||
312 | /* Is the page fully outside i_size? (truncate in progress) */ | ||
313 | if (page->index > end_index || (page->index == end_index && !offset)) { | ||
314 | page->mapping->a_ops->invalidatepage(page, 0); | ||
315 | unlock_page(page); | ||
316 | continue; | ||
317 | } | ||
318 | |||
319 | ret = __gfs2_jdata_writepage(page, wbc); | ||
320 | |||
321 | if (ret || (--(wbc->nr_to_write) <= 0)) | ||
322 | ret = 1; | ||
323 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
324 | wbc->encountered_congestion = 1; | ||
325 | ret = 1; | ||
326 | } | ||
327 | |||
328 | } | ||
329 | gfs2_trans_end(sdp); | ||
330 | return ret; | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * gfs2_write_cache_jdata - Like write_cache_pages but different | ||
335 | * @mapping: The mapping to write | ||
336 | * @wbc: The writeback control | ||
337 | * @writepage: The writepage function to call | ||
338 | * @data: The data to pass to writepage | ||
339 | * | ||
340 | * The reason that we use our own function here is that we need to | ||
341 | * start transactions before we grab page locks. This allows us | ||
342 | * to get the ordering right. | ||
343 | */ | ||
344 | |||
345 | static int gfs2_write_cache_jdata(struct address_space *mapping, | ||
346 | struct writeback_control *wbc) | ||
347 | { | ||
348 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
349 | int ret = 0; | ||
350 | int done = 0; | ||
351 | struct pagevec pvec; | ||
352 | int nr_pages; | ||
353 | pgoff_t index; | ||
354 | pgoff_t end; | ||
355 | int scanned = 0; | ||
356 | int range_whole = 0; | ||
357 | |||
358 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
359 | wbc->encountered_congestion = 1; | ||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | pagevec_init(&pvec, 0); | ||
364 | if (wbc->range_cyclic) { | ||
365 | index = mapping->writeback_index; /* Start from prev offset */ | ||
366 | end = -1; | ||
367 | } else { | ||
368 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
369 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
370 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
371 | range_whole = 1; | ||
372 | scanned = 1; | ||
373 | } | ||
185 | 374 | ||
186 | if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) | 375 | retry: |
187 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | 376 | while (!done && (index <= end) && |
377 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
378 | PAGECACHE_TAG_DIRTY, | ||
379 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
380 | scanned = 1; | ||
381 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); | ||
382 | if (ret) | ||
383 | done = 1; | ||
384 | if (ret > 0) | ||
385 | ret = 0; | ||
386 | |||
387 | pagevec_release(&pvec); | ||
388 | cond_resched(); | ||
389 | } | ||
390 | |||
391 | if (!scanned && !done) { | ||
392 | /* | ||
393 | * We hit the last page and there is more work to be done: wrap | ||
394 | * back to the start of the file | ||
395 | */ | ||
396 | scanned = 1; | ||
397 | index = 0; | ||
398 | goto retry; | ||
399 | } | ||
400 | |||
401 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
402 | mapping->writeback_index = index; | ||
403 | return ret; | ||
404 | } | ||
405 | |||
406 | |||
407 | /** | ||
408 | * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk | ||
409 | * @mapping: The mapping to write | ||
410 | * @wbc: The writeback control | ||
411 | * | ||
412 | */ | ||
188 | 413 | ||
189 | return generic_writepages(mapping, wbc); | 414 | static int gfs2_jdata_writepages(struct address_space *mapping, |
415 | struct writeback_control *wbc) | ||
416 | { | ||
417 | struct gfs2_inode *ip = GFS2_I(mapping->host); | ||
418 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); | ||
419 | int ret; | ||
420 | |||
421 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
422 | if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { | ||
423 | gfs2_log_flush(sdp, ip->i_gl); | ||
424 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
425 | } | ||
426 | return ret; | ||
190 | } | 427 | } |
191 | 428 | ||
192 | /** | 429 | /** |
@@ -231,62 +468,107 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) | |||
231 | 468 | ||
232 | 469 | ||
233 | /** | 470 | /** |
234 | * gfs2_readpage - readpage with locking | 471 | * __gfs2_readpage - readpage |
235 | * @file: The file to read a page for. N.B. This may be NULL if we are | 472 | * @file: The file to read a page for |
236 | * reading an internal file. | ||
237 | * @page: The page to read | 473 | * @page: The page to read |
238 | * | 474 | * |
239 | * Returns: errno | 475 | * This is the core of gfs2's readpage. Its used by the internal file |
476 | * reading code as in that case we already hold the glock. Also its | ||
477 | * called by gfs2_readpage() once the required lock has been granted. | ||
478 | * | ||
240 | */ | 479 | */ |
241 | 480 | ||
242 | static int gfs2_readpage(struct file *file, struct page *page) | 481 | static int __gfs2_readpage(void *file, struct page *page) |
243 | { | 482 | { |
244 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 483 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); |
245 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | 484 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); |
246 | struct gfs2_file *gf = NULL; | ||
247 | struct gfs2_holder gh; | ||
248 | int error; | 485 | int error; |
249 | int do_unlock = 0; | ||
250 | |||
251 | if (likely(file != &gfs2_internal_file_sentinel)) { | ||
252 | if (file) { | ||
253 | gf = file->private_data; | ||
254 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | ||
255 | /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ | ||
256 | goto skip_lock; | ||
257 | } | ||
258 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); | ||
259 | do_unlock = 1; | ||
260 | error = gfs2_glock_nq_atime(&gh); | ||
261 | if (unlikely(error)) | ||
262 | goto out_unlock; | ||
263 | } | ||
264 | 486 | ||
265 | skip_lock: | ||
266 | if (gfs2_is_stuffed(ip)) { | 487 | if (gfs2_is_stuffed(ip)) { |
267 | error = stuffed_readpage(ip, page); | 488 | error = stuffed_readpage(ip, page); |
268 | unlock_page(page); | 489 | unlock_page(page); |
269 | } else | 490 | } else { |
270 | error = mpage_readpage(page, gfs2_get_block); | 491 | error = mpage_readpage(page, gfs2_block_map); |
492 | } | ||
271 | 493 | ||
272 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 494 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
273 | error = -EIO; | 495 | return -EIO; |
496 | |||
497 | return error; | ||
498 | } | ||
499 | |||
500 | /** | ||
501 | * gfs2_readpage - read a page of a file | ||
502 | * @file: The file to read | ||
503 | * @page: The page of the file | ||
504 | * | ||
505 | * This deals with the locking required. We use a trylock in order to | ||
506 | * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE | ||
507 | * in the event that we are unable to get the lock. | ||
508 | */ | ||
509 | |||
510 | static int gfs2_readpage(struct file *file, struct page *page) | ||
511 | { | ||
512 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
513 | struct gfs2_holder gh; | ||
514 | int error; | ||
274 | 515 | ||
275 | if (do_unlock) { | 516 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); |
276 | gfs2_glock_dq_m(1, &gh); | 517 | error = gfs2_glock_nq_atime(&gh); |
277 | gfs2_holder_uninit(&gh); | 518 | if (unlikely(error)) { |
519 | unlock_page(page); | ||
520 | goto out; | ||
278 | } | 521 | } |
522 | error = __gfs2_readpage(file, page); | ||
523 | gfs2_glock_dq(&gh); | ||
279 | out: | 524 | out: |
280 | return error; | 525 | gfs2_holder_uninit(&gh); |
281 | out_unlock: | ||
282 | unlock_page(page); | ||
283 | if (error == GLR_TRYFAILED) { | 526 | if (error == GLR_TRYFAILED) { |
284 | error = AOP_TRUNCATED_PAGE; | ||
285 | yield(); | 527 | yield(); |
528 | return AOP_TRUNCATED_PAGE; | ||
286 | } | 529 | } |
287 | if (do_unlock) | 530 | return error; |
288 | gfs2_holder_uninit(&gh); | 531 | } |
289 | goto out; | 532 | |
533 | /** | ||
534 | * gfs2_internal_read - read an internal file | ||
535 | * @ip: The gfs2 inode | ||
536 | * @ra_state: The readahead state (or NULL for no readahead) | ||
537 | * @buf: The buffer to fill | ||
538 | * @pos: The file position | ||
539 | * @size: The amount to read | ||
540 | * | ||
541 | */ | ||
542 | |||
543 | int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, | ||
544 | char *buf, loff_t *pos, unsigned size) | ||
545 | { | ||
546 | struct address_space *mapping = ip->i_inode.i_mapping; | ||
547 | unsigned long index = *pos / PAGE_CACHE_SIZE; | ||
548 | unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); | ||
549 | unsigned copied = 0; | ||
550 | unsigned amt; | ||
551 | struct page *page; | ||
552 | void *p; | ||
553 | |||
554 | do { | ||
555 | amt = size - copied; | ||
556 | if (offset + size > PAGE_CACHE_SIZE) | ||
557 | amt = PAGE_CACHE_SIZE - offset; | ||
558 | page = read_cache_page(mapping, index, __gfs2_readpage, NULL); | ||
559 | if (IS_ERR(page)) | ||
560 | return PTR_ERR(page); | ||
561 | p = kmap_atomic(page, KM_USER0); | ||
562 | memcpy(buf + copied, p + offset, amt); | ||
563 | kunmap_atomic(p, KM_USER0); | ||
564 | mark_page_accessed(page); | ||
565 | page_cache_release(page); | ||
566 | copied += amt; | ||
567 | index++; | ||
568 | offset = 0; | ||
569 | } while(copied < size); | ||
570 | (*pos) += size; | ||
571 | return size; | ||
290 | } | 572 | } |
291 | 573 | ||
292 | /** | 574 | /** |
@@ -300,10 +582,9 @@ out_unlock: | |||
300 | * Any I/O we ignore at this time will be done via readpage later. | 582 | * Any I/O we ignore at this time will be done via readpage later. |
301 | * 2. We don't handle stuffed files here we let readpage do the honours. | 583 | * 2. We don't handle stuffed files here we let readpage do the honours. |
302 | * 3. mpage_readpages() does most of the heavy lifting in the common case. | 584 | * 3. mpage_readpages() does most of the heavy lifting in the common case. |
303 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. | 585 | * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. |
304 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as | ||
305 | * well as read-ahead. | ||
306 | */ | 586 | */ |
587 | |||
307 | static int gfs2_readpages(struct file *file, struct address_space *mapping, | 588 | static int gfs2_readpages(struct file *file, struct address_space *mapping, |
308 | struct list_head *pages, unsigned nr_pages) | 589 | struct list_head *pages, unsigned nr_pages) |
309 | { | 590 | { |
@@ -311,42 +592,20 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
311 | struct gfs2_inode *ip = GFS2_I(inode); | 592 | struct gfs2_inode *ip = GFS2_I(inode); |
312 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 593 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
313 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
314 | int ret = 0; | 595 | int ret; |
315 | int do_unlock = 0; | ||
316 | 596 | ||
317 | if (likely(file != &gfs2_internal_file_sentinel)) { | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); |
318 | if (file) { | 598 | ret = gfs2_glock_nq_atime(&gh); |
319 | struct gfs2_file *gf = file->private_data; | 599 | if (unlikely(ret)) |
320 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | 600 | goto out_uninit; |
321 | goto skip_lock; | ||
322 | } | ||
323 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, | ||
324 | LM_FLAG_TRY_1CB|GL_ATIME, &gh); | ||
325 | do_unlock = 1; | ||
326 | ret = gfs2_glock_nq_atime(&gh); | ||
327 | if (ret == GLR_TRYFAILED) | ||
328 | goto out_noerror; | ||
329 | if (unlikely(ret)) | ||
330 | goto out_unlock; | ||
331 | } | ||
332 | skip_lock: | ||
333 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
334 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); | 602 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); |
335 | 603 | gfs2_glock_dq(&gh); | |
336 | if (do_unlock) { | 604 | out_uninit: |
337 | gfs2_glock_dq_m(1, &gh); | 605 | gfs2_holder_uninit(&gh); |
338 | gfs2_holder_uninit(&gh); | ||
339 | } | ||
340 | out: | ||
341 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 606 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
342 | ret = -EIO; | 607 | ret = -EIO; |
343 | return ret; | 608 | return ret; |
344 | out_noerror: | ||
345 | ret = 0; | ||
346 | out_unlock: | ||
347 | if (do_unlock) | ||
348 | gfs2_holder_uninit(&gh); | ||
349 | goto out; | ||
350 | } | 609 | } |
351 | 610 | ||
352 | /** | 611 | /** |
@@ -382,20 +641,11 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
382 | if (unlikely(error)) | 641 | if (unlikely(error)) |
383 | goto out_uninit; | 642 | goto out_uninit; |
384 | 643 | ||
385 | error = -ENOMEM; | ||
386 | page = __grab_cache_page(mapping, index); | ||
387 | *pagep = page; | ||
388 | if (!page) | ||
389 | goto out_unlock; | ||
390 | |||
391 | gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); | 644 | gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); |
392 | |||
393 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); | 645 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); |
394 | if (error) | 646 | if (error) |
395 | goto out_putpage; | 647 | goto out_unlock; |
396 | |||
397 | 648 | ||
398 | ip->i_alloc.al_requested = 0; | ||
399 | if (alloc_required) { | 649 | if (alloc_required) { |
400 | al = gfs2_alloc_get(ip); | 650 | al = gfs2_alloc_get(ip); |
401 | 651 | ||
@@ -424,40 +674,47 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
424 | if (error) | 674 | if (error) |
425 | goto out_trans_fail; | 675 | goto out_trans_fail; |
426 | 676 | ||
677 | error = -ENOMEM; | ||
678 | page = __grab_cache_page(mapping, index); | ||
679 | *pagep = page; | ||
680 | if (unlikely(!page)) | ||
681 | goto out_endtrans; | ||
682 | |||
427 | if (gfs2_is_stuffed(ip)) { | 683 | if (gfs2_is_stuffed(ip)) { |
684 | error = 0; | ||
428 | if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | 685 | if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { |
429 | error = gfs2_unstuff_dinode(ip, page); | 686 | error = gfs2_unstuff_dinode(ip, page); |
430 | if (error == 0) | 687 | if (error == 0) |
431 | goto prepare_write; | 688 | goto prepare_write; |
432 | } else if (!PageUptodate(page)) | 689 | } else if (!PageUptodate(page)) { |
433 | error = stuffed_readpage(ip, page); | 690 | error = stuffed_readpage(ip, page); |
691 | } | ||
434 | goto out; | 692 | goto out; |
435 | } | 693 | } |
436 | 694 | ||
437 | prepare_write: | 695 | prepare_write: |
438 | error = block_prepare_write(page, from, to, gfs2_get_block); | 696 | error = block_prepare_write(page, from, to, gfs2_block_map); |
439 | |||
440 | out: | 697 | out: |
441 | if (error) { | 698 | if (error == 0) |
442 | gfs2_trans_end(sdp); | 699 | return 0; |
700 | |||
701 | page_cache_release(page); | ||
702 | if (pos + len > ip->i_inode.i_size) | ||
703 | vmtruncate(&ip->i_inode, ip->i_inode.i_size); | ||
704 | out_endtrans: | ||
705 | gfs2_trans_end(sdp); | ||
443 | out_trans_fail: | 706 | out_trans_fail: |
444 | if (alloc_required) { | 707 | if (alloc_required) { |
445 | gfs2_inplace_release(ip); | 708 | gfs2_inplace_release(ip); |
446 | out_qunlock: | 709 | out_qunlock: |
447 | gfs2_quota_unlock(ip); | 710 | gfs2_quota_unlock(ip); |
448 | out_alloc_put: | 711 | out_alloc_put: |
449 | gfs2_alloc_put(ip); | 712 | gfs2_alloc_put(ip); |
450 | } | 713 | } |
451 | out_putpage: | ||
452 | page_cache_release(page); | ||
453 | if (pos + len > ip->i_inode.i_size) | ||
454 | vmtruncate(&ip->i_inode, ip->i_inode.i_size); | ||
455 | out_unlock: | 714 | out_unlock: |
456 | gfs2_glock_dq_m(1, &ip->i_gh); | 715 | gfs2_glock_dq(&ip->i_gh); |
457 | out_uninit: | 716 | out_uninit: |
458 | gfs2_holder_uninit(&ip->i_gh); | 717 | gfs2_holder_uninit(&ip->i_gh); |
459 | } | ||
460 | |||
461 | return error; | 718 | return error; |
462 | } | 719 | } |
463 | 720 | ||
@@ -565,7 +822,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
565 | struct gfs2_inode *ip = GFS2_I(inode); | 822 | struct gfs2_inode *ip = GFS2_I(inode); |
566 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 823 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
567 | struct buffer_head *dibh; | 824 | struct buffer_head *dibh; |
568 | struct gfs2_alloc *al = &ip->i_alloc; | 825 | struct gfs2_alloc *al = ip->i_alloc; |
569 | struct gfs2_dinode *di; | 826 | struct gfs2_dinode *di; |
570 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); | 827 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); |
571 | unsigned int to = from + len; | 828 | unsigned int to = from + len; |
@@ -585,19 +842,16 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
585 | if (gfs2_is_stuffed(ip)) | 842 | if (gfs2_is_stuffed(ip)) |
586 | return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); | 843 | return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); |
587 | 844 | ||
588 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | 845 | if (!gfs2_is_writeback(ip)) |
589 | gfs2_page_add_databufs(ip, page, from, to); | 846 | gfs2_page_add_databufs(ip, page, from, to); |
590 | 847 | ||
591 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 848 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
592 | 849 | ||
593 | if (likely(ret >= 0)) { | 850 | if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) { |
594 | copied = ret; | 851 | di = (struct gfs2_dinode *)dibh->b_data; |
595 | if ((pos + copied) > inode->i_size) { | 852 | ip->i_di.di_size = inode->i_size; |
596 | di = (struct gfs2_dinode *)dibh->b_data; | 853 | di->di_size = cpu_to_be64(inode->i_size); |
597 | ip->i_di.di_size = inode->i_size; | 854 | mark_inode_dirty(inode); |
598 | di->di_size = cpu_to_be64(inode->i_size); | ||
599 | mark_inode_dirty(inode); | ||
600 | } | ||
601 | } | 855 | } |
602 | 856 | ||
603 | if (inode == sdp->sd_rindex) | 857 | if (inode == sdp->sd_rindex) |
@@ -606,7 +860,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
606 | brelse(dibh); | 860 | brelse(dibh); |
607 | gfs2_trans_end(sdp); | 861 | gfs2_trans_end(sdp); |
608 | failed: | 862 | failed: |
609 | if (al->al_requested) { | 863 | if (al) { |
610 | gfs2_inplace_release(ip); | 864 | gfs2_inplace_release(ip); |
611 | gfs2_quota_unlock(ip); | 865 | gfs2_quota_unlock(ip); |
612 | gfs2_alloc_put(ip); | 866 | gfs2_alloc_put(ip); |
@@ -625,11 +879,7 @@ failed: | |||
625 | 879 | ||
626 | static int gfs2_set_page_dirty(struct page *page) | 880 | static int gfs2_set_page_dirty(struct page *page) |
627 | { | 881 | { |
628 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 882 | SetPageChecked(page); |
629 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | ||
630 | |||
631 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | ||
632 | SetPageChecked(page); | ||
633 | return __set_page_dirty_buffers(page); | 883 | return __set_page_dirty_buffers(page); |
634 | } | 884 | } |
635 | 885 | ||
@@ -653,7 +903,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) | |||
653 | return 0; | 903 | return 0; |
654 | 904 | ||
655 | if (!gfs2_is_stuffed(ip)) | 905 | if (!gfs2_is_stuffed(ip)) |
656 | dblock = generic_block_bmap(mapping, lblock, gfs2_get_block); | 906 | dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); |
657 | 907 | ||
658 | gfs2_glock_dq_uninit(&i_gh); | 908 | gfs2_glock_dq_uninit(&i_gh); |
659 | 909 | ||
@@ -719,13 +969,9 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
719 | { | 969 | { |
720 | /* | 970 | /* |
721 | * Should we return an error here? I can't see that O_DIRECT for | 971 | * Should we return an error here? I can't see that O_DIRECT for |
722 | * a journaled file makes any sense. For now we'll silently fall | 972 | * a stuffed file makes any sense. For now we'll silently fall |
723 | * back to buffered I/O, likewise we do the same for stuffed | 973 | * back to buffered I/O |
724 | * files since they are (a) small and (b) unaligned. | ||
725 | */ | 974 | */ |
726 | if (gfs2_is_jdata(ip)) | ||
727 | return 0; | ||
728 | |||
729 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
730 | return 0; | 976 | return 0; |
731 | 977 | ||
@@ -836,9 +1082,23 @@ cannot_release: | |||
836 | return 0; | 1082 | return 0; |
837 | } | 1083 | } |
838 | 1084 | ||
839 | const struct address_space_operations gfs2_file_aops = { | 1085 | static const struct address_space_operations gfs2_writeback_aops = { |
840 | .writepage = gfs2_writepage, | 1086 | .writepage = gfs2_writeback_writepage, |
841 | .writepages = gfs2_writepages, | 1087 | .writepages = gfs2_writeback_writepages, |
1088 | .readpage = gfs2_readpage, | ||
1089 | .readpages = gfs2_readpages, | ||
1090 | .sync_page = block_sync_page, | ||
1091 | .write_begin = gfs2_write_begin, | ||
1092 | .write_end = gfs2_write_end, | ||
1093 | .bmap = gfs2_bmap, | ||
1094 | .invalidatepage = gfs2_invalidatepage, | ||
1095 | .releasepage = gfs2_releasepage, | ||
1096 | .direct_IO = gfs2_direct_IO, | ||
1097 | .migratepage = buffer_migrate_page, | ||
1098 | }; | ||
1099 | |||
1100 | static const struct address_space_operations gfs2_ordered_aops = { | ||
1101 | .writepage = gfs2_ordered_writepage, | ||
842 | .readpage = gfs2_readpage, | 1102 | .readpage = gfs2_readpage, |
843 | .readpages = gfs2_readpages, | 1103 | .readpages = gfs2_readpages, |
844 | .sync_page = block_sync_page, | 1104 | .sync_page = block_sync_page, |
@@ -849,5 +1109,34 @@ const struct address_space_operations gfs2_file_aops = { | |||
849 | .invalidatepage = gfs2_invalidatepage, | 1109 | .invalidatepage = gfs2_invalidatepage, |
850 | .releasepage = gfs2_releasepage, | 1110 | .releasepage = gfs2_releasepage, |
851 | .direct_IO = gfs2_direct_IO, | 1111 | .direct_IO = gfs2_direct_IO, |
1112 | .migratepage = buffer_migrate_page, | ||
852 | }; | 1113 | }; |
853 | 1114 | ||
1115 | static const struct address_space_operations gfs2_jdata_aops = { | ||
1116 | .writepage = gfs2_jdata_writepage, | ||
1117 | .writepages = gfs2_jdata_writepages, | ||
1118 | .readpage = gfs2_readpage, | ||
1119 | .readpages = gfs2_readpages, | ||
1120 | .sync_page = block_sync_page, | ||
1121 | .write_begin = gfs2_write_begin, | ||
1122 | .write_end = gfs2_write_end, | ||
1123 | .set_page_dirty = gfs2_set_page_dirty, | ||
1124 | .bmap = gfs2_bmap, | ||
1125 | .invalidatepage = gfs2_invalidatepage, | ||
1126 | .releasepage = gfs2_releasepage, | ||
1127 | }; | ||
1128 | |||
1129 | void gfs2_set_aops(struct inode *inode) | ||
1130 | { | ||
1131 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1132 | |||
1133 | if (gfs2_is_writeback(ip)) | ||
1134 | inode->i_mapping->a_ops = &gfs2_writeback_aops; | ||
1135 | else if (gfs2_is_ordered(ip)) | ||
1136 | inode->i_mapping->a_ops = &gfs2_ordered_aops; | ||
1137 | else if (gfs2_is_jdata(ip)) | ||
1138 | inode->i_mapping->a_ops = &gfs2_jdata_aops; | ||
1139 | else | ||
1140 | BUG(); | ||
1141 | } | ||
1142 | |||
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h index fa1b5b3d28b9..5da21285bba4 100644 --- a/fs/gfs2/ops_address.h +++ b/fs/gfs2/ops_address.h | |||
@@ -14,9 +14,10 @@ | |||
14 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | 16 | ||
17 | extern const struct address_space_operations gfs2_file_aops; | ||
18 | extern int gfs2_get_block(struct inode *inode, sector_t lblock, | ||
19 | struct buffer_head *bh_result, int create); | ||
20 | extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | 17 | extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); |
18 | extern int gfs2_internal_read(struct gfs2_inode *ip, | ||
19 | struct file_ra_state *ra_state, | ||
20 | char *buf, loff_t *pos, unsigned size); | ||
21 | extern void gfs2_set_aops(struct inode *inode); | ||
21 | 22 | ||
22 | #endif /* __OPS_ADDRESS_DOT_H__ */ | 23 | #endif /* __OPS_ADDRESS_DOT_H__ */ |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index bb11fd6752d3..f4842f2548cd 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -33,57 +33,12 @@ | |||
33 | #include "lm.h" | 33 | #include "lm.h" |
34 | #include "log.h" | 34 | #include "log.h" |
35 | #include "meta_io.h" | 35 | #include "meta_io.h" |
36 | #include "ops_file.h" | ||
37 | #include "ops_vm.h" | ||
38 | #include "quota.h" | 36 | #include "quota.h" |
39 | #include "rgrp.h" | 37 | #include "rgrp.h" |
40 | #include "trans.h" | 38 | #include "trans.h" |
41 | #include "util.h" | 39 | #include "util.h" |
42 | #include "eaops.h" | 40 | #include "eaops.h" |
43 | 41 | #include "ops_address.h" | |
44 | /* | ||
45 | * Most fields left uninitialised to catch anybody who tries to | ||
46 | * use them. f_flags set to prevent file_accessed() from touching | ||
47 | * any other part of this. Its use is purely as a flag so that we | ||
48 | * know (in readpage()) whether or not do to locking. | ||
49 | */ | ||
50 | struct file gfs2_internal_file_sentinel = { | ||
51 | .f_flags = O_NOATIME|O_RDONLY, | ||
52 | }; | ||
53 | |||
54 | static int gfs2_read_actor(read_descriptor_t *desc, struct page *page, | ||
55 | unsigned long offset, unsigned long size) | ||
56 | { | ||
57 | char *kaddr; | ||
58 | unsigned long count = desc->count; | ||
59 | |||
60 | if (size > count) | ||
61 | size = count; | ||
62 | |||
63 | kaddr = kmap(page); | ||
64 | memcpy(desc->arg.data, kaddr + offset, size); | ||
65 | kunmap(page); | ||
66 | |||
67 | desc->count = count - size; | ||
68 | desc->written += size; | ||
69 | desc->arg.buf += size; | ||
70 | return size; | ||
71 | } | ||
72 | |||
73 | int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, | ||
74 | char *buf, loff_t *pos, unsigned size) | ||
75 | { | ||
76 | struct inode *inode = &ip->i_inode; | ||
77 | read_descriptor_t desc; | ||
78 | desc.written = 0; | ||
79 | desc.arg.data = buf; | ||
80 | desc.count = size; | ||
81 | desc.error = 0; | ||
82 | do_generic_mapping_read(inode->i_mapping, ra_state, | ||
83 | &gfs2_internal_file_sentinel, pos, &desc, | ||
84 | gfs2_read_actor); | ||
85 | return desc.written ? desc.written : desc.error; | ||
86 | } | ||
87 | 42 | ||
88 | /** | 43 | /** |
89 | * gfs2_llseek - seek to a location in a file | 44 | * gfs2_llseek - seek to a location in a file |
@@ -214,7 +169,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
214 | if (put_user(fsflags, ptr)) | 169 | if (put_user(fsflags, ptr)) |
215 | error = -EFAULT; | 170 | error = -EFAULT; |
216 | 171 | ||
217 | gfs2_glock_dq_m(1, &gh); | 172 | gfs2_glock_dq(&gh); |
218 | gfs2_holder_uninit(&gh); | 173 | gfs2_holder_uninit(&gh); |
219 | return error; | 174 | return error; |
220 | } | 175 | } |
@@ -291,7 +246,16 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
291 | if (error) | 246 | if (error) |
292 | goto out; | 247 | goto out; |
293 | } | 248 | } |
294 | 249 | if ((flags ^ new_flags) & GFS2_DIF_JDATA) { | |
250 | if (flags & GFS2_DIF_JDATA) | ||
251 | gfs2_log_flush(sdp, ip->i_gl); | ||
252 | error = filemap_fdatawrite(inode->i_mapping); | ||
253 | if (error) | ||
254 | goto out; | ||
255 | error = filemap_fdatawait(inode->i_mapping); | ||
256 | if (error) | ||
257 | goto out; | ||
258 | } | ||
295 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 259 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); |
296 | if (error) | 260 | if (error) |
297 | goto out; | 261 | goto out; |
@@ -303,6 +267,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
303 | gfs2_dinode_out(ip, bh->b_data); | 267 | gfs2_dinode_out(ip, bh->b_data); |
304 | brelse(bh); | 268 | brelse(bh); |
305 | gfs2_set_inode_flags(inode); | 269 | gfs2_set_inode_flags(inode); |
270 | gfs2_set_aops(inode); | ||
306 | out_trans_end: | 271 | out_trans_end: |
307 | gfs2_trans_end(sdp); | 272 | gfs2_trans_end(sdp); |
308 | out: | 273 | out: |
@@ -338,6 +303,128 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
338 | return -ENOTTY; | 303 | return -ENOTTY; |
339 | } | 304 | } |
340 | 305 | ||
306 | /** | ||
307 | * gfs2_allocate_page_backing - Use bmap to allocate blocks | ||
308 | * @page: The (locked) page to allocate backing for | ||
309 | * | ||
310 | * We try to allocate all the blocks required for the page in | ||
311 | * one go. This might fail for various reasons, so we keep | ||
312 | * trying until all the blocks to back this page are allocated. | ||
313 | * If some of the blocks are already allocated, thats ok too. | ||
314 | */ | ||
315 | |||
316 | static int gfs2_allocate_page_backing(struct page *page) | ||
317 | { | ||
318 | struct inode *inode = page->mapping->host; | ||
319 | struct buffer_head bh; | ||
320 | unsigned long size = PAGE_CACHE_SIZE; | ||
321 | u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
322 | |||
323 | do { | ||
324 | bh.b_state = 0; | ||
325 | bh.b_size = size; | ||
326 | gfs2_block_map(inode, lblock, &bh, 1); | ||
327 | if (!buffer_mapped(&bh)) | ||
328 | return -EIO; | ||
329 | size -= bh.b_size; | ||
330 | lblock += (bh.b_size >> inode->i_blkbits); | ||
331 | } while(size > 0); | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | /** | ||
336 | * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable | ||
337 | * @vma: The virtual memory area | ||
338 | * @page: The page which is about to become writable | ||
339 | * | ||
340 | * When the page becomes writable, we need to ensure that we have | ||
341 | * blocks allocated on disk to back that page. | ||
342 | */ | ||
343 | |||
344 | static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
345 | { | ||
346 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | ||
347 | struct gfs2_inode *ip = GFS2_I(inode); | ||
348 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
349 | unsigned long last_index; | ||
350 | u64 pos = page->index << (PAGE_CACHE_SIZE - inode->i_blkbits); | ||
351 | unsigned int data_blocks, ind_blocks, rblocks; | ||
352 | int alloc_required = 0; | ||
353 | struct gfs2_holder gh; | ||
354 | struct gfs2_alloc *al; | ||
355 | int ret; | ||
356 | |||
357 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); | ||
358 | ret = gfs2_glock_nq_atime(&gh); | ||
359 | if (ret) | ||
360 | goto out; | ||
361 | |||
362 | set_bit(GIF_SW_PAGED, &ip->i_flags); | ||
363 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | ||
364 | ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); | ||
365 | if (ret || !alloc_required) | ||
366 | goto out_unlock; | ||
367 | ret = -ENOMEM; | ||
368 | al = gfs2_alloc_get(ip); | ||
369 | if (al == NULL) | ||
370 | goto out_unlock; | ||
371 | |||
372 | ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
373 | if (ret) | ||
374 | goto out_alloc_put; | ||
375 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
376 | if (ret) | ||
377 | goto out_quota_unlock; | ||
378 | al->al_requested = data_blocks + ind_blocks; | ||
379 | ret = gfs2_inplace_reserve(ip); | ||
380 | if (ret) | ||
381 | goto out_quota_unlock; | ||
382 | |||
383 | rblocks = RES_DINODE + ind_blocks; | ||
384 | if (gfs2_is_jdata(ip)) | ||
385 | rblocks += data_blocks ? data_blocks : 1; | ||
386 | if (ind_blocks || data_blocks) | ||
387 | rblocks += RES_STATFS + RES_QUOTA; | ||
388 | ret = gfs2_trans_begin(sdp, rblocks, 0); | ||
389 | if (ret) | ||
390 | goto out_trans_fail; | ||
391 | |||
392 | lock_page(page); | ||
393 | ret = -EINVAL; | ||
394 | last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; | ||
395 | if (page->index > last_index) | ||
396 | goto out_unlock_page; | ||
397 | ret = 0; | ||
398 | if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) | ||
399 | goto out_unlock_page; | ||
400 | if (gfs2_is_stuffed(ip)) { | ||
401 | ret = gfs2_unstuff_dinode(ip, page); | ||
402 | if (ret) | ||
403 | goto out_unlock_page; | ||
404 | } | ||
405 | ret = gfs2_allocate_page_backing(page); | ||
406 | |||
407 | out_unlock_page: | ||
408 | unlock_page(page); | ||
409 | gfs2_trans_end(sdp); | ||
410 | out_trans_fail: | ||
411 | gfs2_inplace_release(ip); | ||
412 | out_quota_unlock: | ||
413 | gfs2_quota_unlock(ip); | ||
414 | out_alloc_put: | ||
415 | gfs2_alloc_put(ip); | ||
416 | out_unlock: | ||
417 | gfs2_glock_dq(&gh); | ||
418 | out: | ||
419 | gfs2_holder_uninit(&gh); | ||
420 | return ret; | ||
421 | } | ||
422 | |||
423 | static struct vm_operations_struct gfs2_vm_ops = { | ||
424 | .fault = filemap_fault, | ||
425 | .page_mkwrite = gfs2_page_mkwrite, | ||
426 | }; | ||
427 | |||
341 | 428 | ||
342 | /** | 429 | /** |
343 | * gfs2_mmap - | 430 | * gfs2_mmap - |
@@ -360,14 +447,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
360 | return error; | 447 | return error; |
361 | } | 448 | } |
362 | 449 | ||
363 | /* This is VM_MAYWRITE instead of VM_WRITE because a call | 450 | vma->vm_ops = &gfs2_vm_ops; |
364 | to mprotect() can turn on VM_WRITE later. */ | ||
365 | |||
366 | if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == | ||
367 | (VM_MAYSHARE | VM_MAYWRITE)) | ||
368 | vma->vm_ops = &gfs2_vm_ops_sharewrite; | ||
369 | else | ||
370 | vma->vm_ops = &gfs2_vm_ops_private; | ||
371 | 451 | ||
372 | gfs2_glock_dq_uninit(&i_gh); | 452 | gfs2_glock_dq_uninit(&i_gh); |
373 | 453 | ||
@@ -538,15 +618,6 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) | |||
538 | if (__mandatory_lock(&ip->i_inode)) | 618 | if (__mandatory_lock(&ip->i_inode)) |
539 | return -ENOLCK; | 619 | return -ENOLCK; |
540 | 620 | ||
541 | if (sdp->sd_args.ar_localflocks) { | ||
542 | if (IS_GETLK(cmd)) { | ||
543 | posix_test_lock(file, fl); | ||
544 | return 0; | ||
545 | } else { | ||
546 | return posix_lock_file_wait(file, fl); | ||
547 | } | ||
548 | } | ||
549 | |||
550 | if (cmd == F_CANCELLK) { | 621 | if (cmd == F_CANCELLK) { |
551 | /* Hack: */ | 622 | /* Hack: */ |
552 | cmd = F_SETLK; | 623 | cmd = F_SETLK; |
@@ -632,16 +703,12 @@ static void do_unflock(struct file *file, struct file_lock *fl) | |||
632 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) | 703 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) |
633 | { | 704 | { |
634 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | 705 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); |
635 | struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); | ||
636 | 706 | ||
637 | if (!(fl->fl_flags & FL_FLOCK)) | 707 | if (!(fl->fl_flags & FL_FLOCK)) |
638 | return -ENOLCK; | 708 | return -ENOLCK; |
639 | if (__mandatory_lock(&ip->i_inode)) | 709 | if (__mandatory_lock(&ip->i_inode)) |
640 | return -ENOLCK; | 710 | return -ENOLCK; |
641 | 711 | ||
642 | if (sdp->sd_args.ar_localflocks) | ||
643 | return flock_lock_file_wait(file, fl); | ||
644 | |||
645 | if (fl->fl_type == F_UNLCK) { | 712 | if (fl->fl_type == F_UNLCK) { |
646 | do_unflock(file, fl); | 713 | do_unflock(file, fl); |
647 | return 0; | 714 | return 0; |
@@ -678,3 +745,27 @@ const struct file_operations gfs2_dir_fops = { | |||
678 | .flock = gfs2_flock, | 745 | .flock = gfs2_flock, |
679 | }; | 746 | }; |
680 | 747 | ||
748 | const struct file_operations gfs2_file_fops_nolock = { | ||
749 | .llseek = gfs2_llseek, | ||
750 | .read = do_sync_read, | ||
751 | .aio_read = generic_file_aio_read, | ||
752 | .write = do_sync_write, | ||
753 | .aio_write = generic_file_aio_write, | ||
754 | .unlocked_ioctl = gfs2_ioctl, | ||
755 | .mmap = gfs2_mmap, | ||
756 | .open = gfs2_open, | ||
757 | .release = gfs2_close, | ||
758 | .fsync = gfs2_fsync, | ||
759 | .splice_read = generic_file_splice_read, | ||
760 | .splice_write = generic_file_splice_write, | ||
761 | .setlease = gfs2_setlease, | ||
762 | }; | ||
763 | |||
764 | const struct file_operations gfs2_dir_fops_nolock = { | ||
765 | .readdir = gfs2_readdir, | ||
766 | .unlocked_ioctl = gfs2_ioctl, | ||
767 | .open = gfs2_open, | ||
768 | .release = gfs2_close, | ||
769 | .fsync = gfs2_fsync, | ||
770 | }; | ||
771 | |||
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h deleted file mode 100644 index 7e5d8ec9c846..000000000000 --- a/fs/gfs2/ops_file.h +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #ifndef __OPS_FILE_DOT_H__ | ||
11 | #define __OPS_FILE_DOT_H__ | ||
12 | |||
13 | #include <linux/fs.h> | ||
14 | struct gfs2_inode; | ||
15 | |||
16 | extern struct file gfs2_internal_file_sentinel; | ||
17 | extern int gfs2_internal_read(struct gfs2_inode *ip, | ||
18 | struct file_ra_state *ra_state, | ||
19 | char *buf, loff_t *pos, unsigned size); | ||
20 | extern void gfs2_set_inode_flags(struct inode *inode); | ||
21 | extern const struct file_operations gfs2_file_fops; | ||
22 | extern const struct file_operations gfs2_dir_fops; | ||
23 | |||
24 | #endif /* __OPS_FILE_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 17de58e83d92..43d511bba52d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include "gfs2.h" | 22 | #include "gfs2.h" |
23 | #include "incore.h" | 23 | #include "incore.h" |
24 | #include "bmap.h" | ||
24 | #include "daemon.h" | 25 | #include "daemon.h" |
25 | #include "glock.h" | 26 | #include "glock.h" |
26 | #include "glops.h" | 27 | #include "glops.h" |
@@ -59,7 +60,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
59 | 60 | ||
60 | mutex_init(&sdp->sd_inum_mutex); | 61 | mutex_init(&sdp->sd_inum_mutex); |
61 | spin_lock_init(&sdp->sd_statfs_spin); | 62 | spin_lock_init(&sdp->sd_statfs_spin); |
62 | mutex_init(&sdp->sd_statfs_mutex); | ||
63 | 63 | ||
64 | spin_lock_init(&sdp->sd_rindex_spin); | 64 | spin_lock_init(&sdp->sd_rindex_spin); |
65 | mutex_init(&sdp->sd_rindex_mutex); | 65 | mutex_init(&sdp->sd_rindex_mutex); |
@@ -77,7 +77,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
77 | 77 | ||
78 | spin_lock_init(&sdp->sd_log_lock); | 78 | spin_lock_init(&sdp->sd_log_lock); |
79 | 79 | ||
80 | INIT_LIST_HEAD(&sdp->sd_log_le_gl); | ||
81 | INIT_LIST_HEAD(&sdp->sd_log_le_buf); | 80 | INIT_LIST_HEAD(&sdp->sd_log_le_buf); |
82 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); | 81 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); |
83 | INIT_LIST_HEAD(&sdp->sd_log_le_rg); | 82 | INIT_LIST_HEAD(&sdp->sd_log_le_rg); |
@@ -303,6 +302,67 @@ out: | |||
303 | return error; | 302 | return error; |
304 | } | 303 | } |
305 | 304 | ||
305 | /** | ||
306 | * map_journal_extents - create a reusable "extent" mapping from all logical | ||
307 | * blocks to all physical blocks for the given journal. This will save | ||
308 | * us time when writing journal blocks. Most journals will have only one | ||
309 | * extent that maps all their logical blocks. That's because gfs2.mkfs | ||
310 | * arranges the journal blocks sequentially to maximize performance. | ||
311 | * So the extent would map the first block for the entire file length. | ||
312 | * However, gfs2_jadd can happen while file activity is happening, so | ||
313 | * those journals may not be sequential. Less likely is the case where | ||
314 | * the users created their own journals by mounting the metafs and | ||
315 | * laying it out. But it's still possible. These journals might have | ||
316 | * several extents. | ||
317 | * | ||
318 | * TODO: This should be done in bigger chunks rather than one block at a time, | ||
319 | * but since it's only done at mount time, I'm not worried about the | ||
320 | * time it takes. | ||
321 | */ | ||
322 | static int map_journal_extents(struct gfs2_sbd *sdp) | ||
323 | { | ||
324 | struct gfs2_jdesc *jd = sdp->sd_jdesc; | ||
325 | unsigned int lb; | ||
326 | u64 db, prev_db; /* logical block, disk block, prev disk block */ | ||
327 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
328 | struct gfs2_journal_extent *jext = NULL; | ||
329 | struct buffer_head bh; | ||
330 | int rc = 0; | ||
331 | |||
332 | prev_db = 0; | ||
333 | |||
334 | for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) { | ||
335 | bh.b_state = 0; | ||
336 | bh.b_blocknr = 0; | ||
337 | bh.b_size = 1 << ip->i_inode.i_blkbits; | ||
338 | rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0); | ||
339 | db = bh.b_blocknr; | ||
340 | if (rc || !db) { | ||
341 | printk(KERN_INFO "GFS2 journal mapping error %d: lb=" | ||
342 | "%u db=%llu\n", rc, lb, (unsigned long long)db); | ||
343 | break; | ||
344 | } | ||
345 | if (!prev_db || db != prev_db + 1) { | ||
346 | jext = kzalloc(sizeof(struct gfs2_journal_extent), | ||
347 | GFP_KERNEL); | ||
348 | if (!jext) { | ||
349 | printk(KERN_INFO "GFS2 error: out of memory " | ||
350 | "mapping journal extents.\n"); | ||
351 | rc = -ENOMEM; | ||
352 | break; | ||
353 | } | ||
354 | jext->dblock = db; | ||
355 | jext->lblock = lb; | ||
356 | jext->blocks = 1; | ||
357 | list_add_tail(&jext->extent_list, &jd->extent_list); | ||
358 | } else { | ||
359 | jext->blocks++; | ||
360 | } | ||
361 | prev_db = db; | ||
362 | } | ||
363 | return rc; | ||
364 | } | ||
365 | |||
306 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 366 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
307 | { | 367 | { |
308 | struct gfs2_holder ji_gh; | 368 | struct gfs2_holder ji_gh; |
@@ -340,7 +400,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
340 | 400 | ||
341 | if (sdp->sd_args.ar_spectator) { | 401 | if (sdp->sd_args.ar_spectator) { |
342 | sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); | 402 | sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); |
343 | sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; | 403 | atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); |
344 | } else { | 404 | } else { |
345 | if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { | 405 | if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { |
346 | fs_err(sdp, "can't mount journal #%u\n", | 406 | fs_err(sdp, "can't mount journal #%u\n", |
@@ -377,7 +437,10 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
377 | sdp->sd_jdesc->jd_jid, error); | 437 | sdp->sd_jdesc->jd_jid, error); |
378 | goto fail_jinode_gh; | 438 | goto fail_jinode_gh; |
379 | } | 439 | } |
380 | sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; | 440 | atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); |
441 | |||
442 | /* Map the extents for this journal's blocks */ | ||
443 | map_journal_extents(sdp); | ||
381 | } | 444 | } |
382 | 445 | ||
383 | if (sdp->sd_lockstruct.ls_first) { | 446 | if (sdp->sd_lockstruct.ls_first) { |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 291f0c7eaa3b..9f71372c1757 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -61,7 +61,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, | |||
61 | inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); | 61 | inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); |
62 | if (!IS_ERR(inode)) { | 62 | if (!IS_ERR(inode)) { |
63 | gfs2_trans_end(sdp); | 63 | gfs2_trans_end(sdp); |
64 | if (dip->i_alloc.al_rgd) | 64 | if (dip->i_alloc->al_rgd) |
65 | gfs2_inplace_release(dip); | 65 | gfs2_inplace_release(dip); |
66 | gfs2_quota_unlock(dip); | 66 | gfs2_quota_unlock(dip); |
67 | gfs2_alloc_put(dip); | 67 | gfs2_alloc_put(dip); |
@@ -113,8 +113,18 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
113 | if (inode && IS_ERR(inode)) | 113 | if (inode && IS_ERR(inode)) |
114 | return ERR_PTR(PTR_ERR(inode)); | 114 | return ERR_PTR(PTR_ERR(inode)); |
115 | 115 | ||
116 | if (inode) | 116 | if (inode) { |
117 | struct gfs2_glock *gl = GFS2_I(inode)->i_gl; | ||
118 | struct gfs2_holder gh; | ||
119 | int error; | ||
120 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | ||
121 | if (error) { | ||
122 | iput(inode); | ||
123 | return ERR_PTR(error); | ||
124 | } | ||
125 | gfs2_glock_dq_uninit(&gh); | ||
117 | return d_splice_alias(inode, dentry); | 126 | return d_splice_alias(inode, dentry); |
127 | } | ||
118 | d_add(dentry, inode); | 128 | d_add(dentry, inode); |
119 | 129 | ||
120 | return NULL; | 130 | return NULL; |
@@ -366,7 +376,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
366 | } | 376 | } |
367 | 377 | ||
368 | gfs2_trans_end(sdp); | 378 | gfs2_trans_end(sdp); |
369 | if (dip->i_alloc.al_rgd) | 379 | if (dip->i_alloc->al_rgd) |
370 | gfs2_inplace_release(dip); | 380 | gfs2_inplace_release(dip); |
371 | gfs2_quota_unlock(dip); | 381 | gfs2_quota_unlock(dip); |
372 | gfs2_alloc_put(dip); | 382 | gfs2_alloc_put(dip); |
@@ -442,7 +452,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
442 | gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ | 452 | gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ |
443 | 453 | ||
444 | gfs2_trans_end(sdp); | 454 | gfs2_trans_end(sdp); |
445 | if (dip->i_alloc.al_rgd) | 455 | if (dip->i_alloc->al_rgd) |
446 | gfs2_inplace_release(dip); | 456 | gfs2_inplace_release(dip); |
447 | gfs2_quota_unlock(dip); | 457 | gfs2_quota_unlock(dip); |
448 | gfs2_alloc_put(dip); | 458 | gfs2_alloc_put(dip); |
@@ -548,7 +558,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
548 | } | 558 | } |
549 | 559 | ||
550 | gfs2_trans_end(sdp); | 560 | gfs2_trans_end(sdp); |
551 | if (dip->i_alloc.al_rgd) | 561 | if (dip->i_alloc->al_rgd) |
552 | gfs2_inplace_release(dip); | 562 | gfs2_inplace_release(dip); |
553 | gfs2_quota_unlock(dip); | 563 | gfs2_quota_unlock(dip); |
554 | gfs2_alloc_put(dip); | 564 | gfs2_alloc_put(dip); |
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h index 34f0caac1a03..fd8cee231e1d 100644 --- a/fs/gfs2/ops_inode.h +++ b/fs/gfs2/ops_inode.h | |||
@@ -16,5 +16,11 @@ extern const struct inode_operations gfs2_file_iops; | |||
16 | extern const struct inode_operations gfs2_dir_iops; | 16 | extern const struct inode_operations gfs2_dir_iops; |
17 | extern const struct inode_operations gfs2_symlink_iops; | 17 | extern const struct inode_operations gfs2_symlink_iops; |
18 | extern const struct inode_operations gfs2_dev_iops; | 18 | extern const struct inode_operations gfs2_dev_iops; |
19 | extern const struct file_operations gfs2_file_fops; | ||
20 | extern const struct file_operations gfs2_dir_fops; | ||
21 | extern const struct file_operations gfs2_file_fops_nolock; | ||
22 | extern const struct file_operations gfs2_dir_fops_nolock; | ||
23 | |||
24 | extern void gfs2_set_inode_flags(struct inode *inode); | ||
19 | 25 | ||
20 | #endif /* __OPS_INODE_DOT_H__ */ | 26 | #endif /* __OPS_INODE_DOT_H__ */ |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 950f31460e8b..5e524217944a 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -487,7 +487,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) | |||
487 | if (ip) { | 487 | if (ip) { |
488 | ip->i_flags = 0; | 488 | ip->i_flags = 0; |
489 | ip->i_gl = NULL; | 489 | ip->i_gl = NULL; |
490 | ip->i_last_pfault = jiffies; | ||
491 | } | 490 | } |
492 | return &ip->i_inode; | 491 | return &ip->i_inode; |
493 | } | 492 | } |
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c deleted file mode 100644 index 927d739d4685..000000000000 --- a/fs/gfs2/ops_vm.c +++ /dev/null | |||
@@ -1,169 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/slab.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/completion.h> | ||
13 | #include <linux/buffer_head.h> | ||
14 | #include <linux/mm.h> | ||
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/gfs2_ondisk.h> | ||
17 | #include <linux/lm_interface.h> | ||
18 | |||
19 | #include "gfs2.h" | ||
20 | #include "incore.h" | ||
21 | #include "bmap.h" | ||
22 | #include "glock.h" | ||
23 | #include "inode.h" | ||
24 | #include "ops_vm.h" | ||
25 | #include "quota.h" | ||
26 | #include "rgrp.h" | ||
27 | #include "trans.h" | ||
28 | #include "util.h" | ||
29 | |||
30 | static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
31 | { | ||
32 | struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); | ||
33 | |||
34 | set_bit(GIF_PAGED, &ip->i_flags); | ||
35 | return filemap_fault(vma, vmf); | ||
36 | } | ||
37 | |||
38 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) | ||
39 | { | ||
40 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
41 | unsigned long index = page->index; | ||
42 | u64 lblock = index << (PAGE_CACHE_SHIFT - | ||
43 | sdp->sd_sb.sb_bsize_shift); | ||
44 | unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; | ||
45 | struct gfs2_alloc *al; | ||
46 | unsigned int data_blocks, ind_blocks; | ||
47 | unsigned int x; | ||
48 | int error; | ||
49 | |||
50 | al = gfs2_alloc_get(ip); | ||
51 | |||
52 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
53 | if (error) | ||
54 | goto out; | ||
55 | |||
56 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
57 | if (error) | ||
58 | goto out_gunlock_q; | ||
59 | |||
60 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | ||
61 | |||
62 | al->al_requested = data_blocks + ind_blocks; | ||
63 | |||
64 | error = gfs2_inplace_reserve(ip); | ||
65 | if (error) | ||
66 | goto out_gunlock_q; | ||
67 | |||
68 | error = gfs2_trans_begin(sdp, al->al_rgd->rd_length + | ||
69 | ind_blocks + RES_DINODE + | ||
70 | RES_STATFS + RES_QUOTA, 0); | ||
71 | if (error) | ||
72 | goto out_ipres; | ||
73 | |||
74 | if (gfs2_is_stuffed(ip)) { | ||
75 | error = gfs2_unstuff_dinode(ip, NULL); | ||
76 | if (error) | ||
77 | goto out_trans; | ||
78 | } | ||
79 | |||
80 | for (x = 0; x < blocks; ) { | ||
81 | u64 dblock; | ||
82 | unsigned int extlen; | ||
83 | int new = 1; | ||
84 | |||
85 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); | ||
86 | if (error) | ||
87 | goto out_trans; | ||
88 | |||
89 | lblock += extlen; | ||
90 | x += extlen; | ||
91 | } | ||
92 | |||
93 | gfs2_assert_warn(sdp, al->al_alloced); | ||
94 | |||
95 | out_trans: | ||
96 | gfs2_trans_end(sdp); | ||
97 | out_ipres: | ||
98 | gfs2_inplace_release(ip); | ||
99 | out_gunlock_q: | ||
100 | gfs2_quota_unlock(ip); | ||
101 | out: | ||
102 | gfs2_alloc_put(ip); | ||
103 | return error; | ||
104 | } | ||
105 | |||
106 | static int gfs2_sharewrite_fault(struct vm_area_struct *vma, | ||
107 | struct vm_fault *vmf) | ||
108 | { | ||
109 | struct file *file = vma->vm_file; | ||
110 | struct gfs2_file *gf = file->private_data; | ||
111 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
112 | struct gfs2_holder i_gh; | ||
113 | int alloc_required; | ||
114 | int error; | ||
115 | int ret = 0; | ||
116 | |||
117 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
118 | if (error) | ||
119 | goto out; | ||
120 | |||
121 | set_bit(GIF_PAGED, &ip->i_flags); | ||
122 | set_bit(GIF_SW_PAGED, &ip->i_flags); | ||
123 | |||
124 | error = gfs2_write_alloc_required(ip, | ||
125 | (u64)vmf->pgoff << PAGE_CACHE_SHIFT, | ||
126 | PAGE_CACHE_SIZE, &alloc_required); | ||
127 | if (error) { | ||
128 | ret = VM_FAULT_OOM; /* XXX: are these right? */ | ||
129 | goto out_unlock; | ||
130 | } | ||
131 | |||
132 | set_bit(GFF_EXLOCK, &gf->f_flags); | ||
133 | ret = filemap_fault(vma, vmf); | ||
134 | clear_bit(GFF_EXLOCK, &gf->f_flags); | ||
135 | if (ret & VM_FAULT_ERROR) | ||
136 | goto out_unlock; | ||
137 | |||
138 | if (alloc_required) { | ||
139 | /* XXX: do we need to drop page lock around alloc_page_backing?*/ | ||
140 | error = alloc_page_backing(ip, vmf->page); | ||
141 | if (error) { | ||
142 | /* | ||
143 | * VM_FAULT_LOCKED should always be the case for | ||
144 | * filemap_fault, but it may not be in a future | ||
145 | * implementation. | ||
146 | */ | ||
147 | if (ret & VM_FAULT_LOCKED) | ||
148 | unlock_page(vmf->page); | ||
149 | page_cache_release(vmf->page); | ||
150 | ret = VM_FAULT_OOM; | ||
151 | goto out_unlock; | ||
152 | } | ||
153 | set_page_dirty(vmf->page); | ||
154 | } | ||
155 | |||
156 | out_unlock: | ||
157 | gfs2_glock_dq_uninit(&i_gh); | ||
158 | out: | ||
159 | return ret; | ||
160 | } | ||
161 | |||
162 | struct vm_operations_struct gfs2_vm_ops_private = { | ||
163 | .fault = gfs2_private_fault, | ||
164 | }; | ||
165 | |||
166 | struct vm_operations_struct gfs2_vm_ops_sharewrite = { | ||
167 | .fault = gfs2_sharewrite_fault, | ||
168 | }; | ||
169 | |||
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h deleted file mode 100644 index 4ae8f43ed5e3..000000000000 --- a/fs/gfs2/ops_vm.h +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #ifndef __OPS_VM_DOT_H__ | ||
11 | #define __OPS_VM_DOT_H__ | ||
12 | |||
13 | #include <linux/mm.h> | ||
14 | |||
15 | extern struct vm_operations_struct gfs2_vm_ops_private; | ||
16 | extern struct vm_operations_struct gfs2_vm_ops_sharewrite; | ||
17 | |||
18 | #endif /* __OPS_VM_DOT_H__ */ | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index addb51e0f135..a08dabd6ce90 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -59,7 +59,6 @@ | |||
59 | #include "super.h" | 59 | #include "super.h" |
60 | #include "trans.h" | 60 | #include "trans.h" |
61 | #include "inode.h" | 61 | #include "inode.h" |
62 | #include "ops_file.h" | ||
63 | #include "ops_address.h" | 62 | #include "ops_address.h" |
64 | #include "util.h" | 63 | #include "util.h" |
65 | 64 | ||
@@ -274,10 +273,10 @@ static int bh_get(struct gfs2_quota_data *qd) | |||
274 | } | 273 | } |
275 | 274 | ||
276 | block = qd->qd_slot / sdp->sd_qc_per_block; | 275 | block = qd->qd_slot / sdp->sd_qc_per_block; |
277 | offset = qd->qd_slot % sdp->sd_qc_per_block;; | 276 | offset = qd->qd_slot % sdp->sd_qc_per_block; |
278 | 277 | ||
279 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; | 278 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; |
280 | error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map); | 279 | error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0); |
281 | if (error) | 280 | if (error) |
282 | goto fail; | 281 | goto fail; |
283 | error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); | 282 | error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); |
@@ -454,7 +453,7 @@ static void qdsb_put(struct gfs2_quota_data *qd) | |||
454 | int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) | 453 | int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) |
455 | { | 454 | { |
456 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 455 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
457 | struct gfs2_alloc *al = &ip->i_alloc; | 456 | struct gfs2_alloc *al = ip->i_alloc; |
458 | struct gfs2_quota_data **qd = al->al_qd; | 457 | struct gfs2_quota_data **qd = al->al_qd; |
459 | int error; | 458 | int error; |
460 | 459 | ||
@@ -502,7 +501,7 @@ out: | |||
502 | void gfs2_quota_unhold(struct gfs2_inode *ip) | 501 | void gfs2_quota_unhold(struct gfs2_inode *ip) |
503 | { | 502 | { |
504 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 503 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
505 | struct gfs2_alloc *al = &ip->i_alloc; | 504 | struct gfs2_alloc *al = ip->i_alloc; |
506 | unsigned int x; | 505 | unsigned int x; |
507 | 506 | ||
508 | gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); | 507 | gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); |
@@ -646,7 +645,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
646 | } | 645 | } |
647 | 646 | ||
648 | if (!buffer_mapped(bh)) { | 647 | if (!buffer_mapped(bh)) { |
649 | gfs2_get_block(inode, iblock, bh, 1); | 648 | gfs2_block_map(inode, iblock, bh, 1); |
650 | if (!buffer_mapped(bh)) | 649 | if (!buffer_mapped(bh)) |
651 | goto unlock; | 650 | goto unlock; |
652 | } | 651 | } |
@@ -793,11 +792,9 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, | |||
793 | struct gfs2_holder i_gh; | 792 | struct gfs2_holder i_gh; |
794 | struct gfs2_quota_host q; | 793 | struct gfs2_quota_host q; |
795 | char buf[sizeof(struct gfs2_quota)]; | 794 | char buf[sizeof(struct gfs2_quota)]; |
796 | struct file_ra_state ra_state; | ||
797 | int error; | 795 | int error; |
798 | struct gfs2_quota_lvb *qlvb; | 796 | struct gfs2_quota_lvb *qlvb; |
799 | 797 | ||
800 | file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping); | ||
801 | restart: | 798 | restart: |
802 | error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); | 799 | error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); |
803 | if (error) | 800 | if (error) |
@@ -820,8 +817,8 @@ restart: | |||
820 | 817 | ||
821 | memset(buf, 0, sizeof(struct gfs2_quota)); | 818 | memset(buf, 0, sizeof(struct gfs2_quota)); |
822 | pos = qd2offset(qd); | 819 | pos = qd2offset(qd); |
823 | error = gfs2_internal_read(ip, &ra_state, buf, | 820 | error = gfs2_internal_read(ip, NULL, buf, &pos, |
824 | &pos, sizeof(struct gfs2_quota)); | 821 | sizeof(struct gfs2_quota)); |
825 | if (error < 0) | 822 | if (error < 0) |
826 | goto fail_gunlock; | 823 | goto fail_gunlock; |
827 | 824 | ||
@@ -856,7 +853,7 @@ fail: | |||
856 | int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) | 853 | int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) |
857 | { | 854 | { |
858 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 855 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
859 | struct gfs2_alloc *al = &ip->i_alloc; | 856 | struct gfs2_alloc *al = ip->i_alloc; |
860 | unsigned int x; | 857 | unsigned int x; |
861 | int error = 0; | 858 | int error = 0; |
862 | 859 | ||
@@ -924,7 +921,7 @@ static int need_sync(struct gfs2_quota_data *qd) | |||
924 | 921 | ||
925 | void gfs2_quota_unlock(struct gfs2_inode *ip) | 922 | void gfs2_quota_unlock(struct gfs2_inode *ip) |
926 | { | 923 | { |
927 | struct gfs2_alloc *al = &ip->i_alloc; | 924 | struct gfs2_alloc *al = ip->i_alloc; |
928 | struct gfs2_quota_data *qda[4]; | 925 | struct gfs2_quota_data *qda[4]; |
929 | unsigned int count = 0; | 926 | unsigned int count = 0; |
930 | unsigned int x; | 927 | unsigned int x; |
@@ -972,7 +969,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type) | |||
972 | int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) | 969 | int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) |
973 | { | 970 | { |
974 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 971 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
975 | struct gfs2_alloc *al = &ip->i_alloc; | 972 | struct gfs2_alloc *al = ip->i_alloc; |
976 | struct gfs2_quota_data *qd; | 973 | struct gfs2_quota_data *qd; |
977 | s64 value; | 974 | s64 value; |
978 | unsigned int x; | 975 | unsigned int x; |
@@ -1016,10 +1013,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) | |||
1016 | void gfs2_quota_change(struct gfs2_inode *ip, s64 change, | 1013 | void gfs2_quota_change(struct gfs2_inode *ip, s64 change, |
1017 | u32 uid, u32 gid) | 1014 | u32 uid, u32 gid) |
1018 | { | 1015 | { |
1019 | struct gfs2_alloc *al = &ip->i_alloc; | 1016 | struct gfs2_alloc *al = ip->i_alloc; |
1020 | struct gfs2_quota_data *qd; | 1017 | struct gfs2_quota_data *qd; |
1021 | unsigned int x; | 1018 | unsigned int x; |
1022 | unsigned int found = 0; | ||
1023 | 1019 | ||
1024 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) | 1020 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) |
1025 | return; | 1021 | return; |
@@ -1032,7 +1028,6 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, | |||
1032 | if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || | 1028 | if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || |
1033 | (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { | 1029 | (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { |
1034 | do_qc(qd, change); | 1030 | do_qc(qd, change); |
1035 | found++; | ||
1036 | } | 1031 | } |
1037 | } | 1032 | } |
1038 | } | 1033 | } |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index beb6c7ac0086..b249e294a95b 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -391,7 +391,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea | |||
391 | lblock = head->lh_blkno; | 391 | lblock = head->lh_blkno; |
392 | gfs2_replay_incr_blk(sdp, &lblock); | 392 | gfs2_replay_incr_blk(sdp, &lblock); |
393 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; | 393 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; |
394 | error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map); | 394 | error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); |
395 | if (error) | 395 | if (error) |
396 | return error; | 396 | return error; |
397 | if (!bh_map.b_blocknr) { | 397 | if (!bh_map.b_blocknr) { |
@@ -504,13 +504,21 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) | |||
504 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) | 504 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) |
505 | ro = 1; | 505 | ro = 1; |
506 | } else { | 506 | } else { |
507 | if (sdp->sd_vfs->s_flags & MS_RDONLY) | 507 | if (sdp->sd_vfs->s_flags & MS_RDONLY) { |
508 | ro = 1; | 508 | /* check if device itself is read-only */ |
509 | ro = bdev_read_only(sdp->sd_vfs->s_bdev); | ||
510 | if (!ro) { | ||
511 | fs_info(sdp, "recovery required on " | ||
512 | "read-only filesystem.\n"); | ||
513 | fs_info(sdp, "write access will be " | ||
514 | "enabled during recovery.\n"); | ||
515 | } | ||
516 | } | ||
509 | } | 517 | } |
510 | 518 | ||
511 | if (ro) { | 519 | if (ro) { |
512 | fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n", | 520 | fs_warn(sdp, "jid=%u: Can't replay: read-only block " |
513 | jd->jd_jid); | 521 | "device\n", jd->jd_jid); |
514 | error = -EROFS; | 522 | error = -EROFS; |
515 | goto fail_gunlock_tr; | 523 | goto fail_gunlock_tr; |
516 | } | 524 | } |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 708c287e1d0e..3552110b2e5f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -25,10 +25,10 @@ | |||
25 | #include "rgrp.h" | 25 | #include "rgrp.h" |
26 | #include "super.h" | 26 | #include "super.h" |
27 | #include "trans.h" | 27 | #include "trans.h" |
28 | #include "ops_file.h" | ||
29 | #include "util.h" | 28 | #include "util.h" |
30 | #include "log.h" | 29 | #include "log.h" |
31 | #include "inode.h" | 30 | #include "inode.h" |
31 | #include "ops_address.h" | ||
32 | 32 | ||
33 | #define BFITNOENT ((u32)~0) | 33 | #define BFITNOENT ((u32)~0) |
34 | #define NO_BLOCK ((u64)~0) | 34 | #define NO_BLOCK ((u64)~0) |
@@ -126,41 +126,43 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | |||
126 | * Return: the block number (bitmap buffer scope) that was found | 126 | * Return: the block number (bitmap buffer scope) that was found |
127 | */ | 127 | */ |
128 | 128 | ||
129 | static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 129 | static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, |
130 | unsigned int buflen, u32 goal, | 130 | unsigned char old_state) |
131 | unsigned char old_state) | ||
132 | { | 131 | { |
133 | unsigned char *byte, *end, alloc; | 132 | unsigned char *byte; |
134 | u32 blk = goal; | 133 | u32 blk = goal; |
135 | unsigned int bit; | 134 | unsigned int bit, bitlong; |
135 | unsigned long *plong, plong55; | ||
136 | 136 | ||
137 | byte = buffer + (goal / GFS2_NBBY); | 137 | byte = buffer + (goal / GFS2_NBBY); |
138 | plong = (unsigned long *)(buffer + (goal / GFS2_NBBY)); | ||
138 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; | 139 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; |
139 | end = buffer + buflen; | 140 | bitlong = bit; |
140 | alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0; | 141 | #if BITS_PER_LONG == 32 |
141 | 142 | plong55 = 0x55555555; | |
142 | while (byte < end) { | 143 | #else |
143 | /* If we're looking for a free block we can eliminate all | 144 | plong55 = 0x5555555555555555; |
144 | bitmap settings with 0x55, which represents four data | 145 | #endif |
145 | blocks in a row. If we're looking for a data block, we can | 146 | while (byte < buffer + buflen) { |
146 | eliminate 0x00 which corresponds to four free blocks. */ | 147 | |
147 | if ((*byte & 0x55) == alloc) { | 148 | if (bitlong == 0 && old_state == 0 && *plong == plong55) { |
148 | blk += (8 - bit) >> 1; | 149 | plong++; |
149 | 150 | byte += sizeof(unsigned long); | |
150 | bit = 0; | 151 | blk += sizeof(unsigned long) * GFS2_NBBY; |
151 | byte++; | ||
152 | |||
153 | continue; | 152 | continue; |
154 | } | 153 | } |
155 | |||
156 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) | 154 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) |
157 | return blk; | 155 | return blk; |
158 | |||
159 | bit += GFS2_BIT_SIZE; | 156 | bit += GFS2_BIT_SIZE; |
160 | if (bit >= 8) { | 157 | if (bit >= 8) { |
161 | bit = 0; | 158 | bit = 0; |
162 | byte++; | 159 | byte++; |
163 | } | 160 | } |
161 | bitlong += GFS2_BIT_SIZE; | ||
162 | if (bitlong >= sizeof(unsigned long) * 8) { | ||
163 | bitlong = 0; | ||
164 | plong++; | ||
165 | } | ||
164 | 166 | ||
165 | blk++; | 167 | blk++; |
166 | } | 168 | } |
@@ -817,11 +819,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) | |||
817 | 819 | ||
818 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) | 820 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) |
819 | { | 821 | { |
820 | struct gfs2_alloc *al = &ip->i_alloc; | 822 | BUG_ON(ip->i_alloc != NULL); |
821 | 823 | ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL); | |
822 | /* FIXME: Should assert that the correct locks are held here... */ | 824 | return ip->i_alloc; |
823 | memset(al, 0, sizeof(*al)); | ||
824 | return al; | ||
825 | } | 825 | } |
826 | 826 | ||
827 | /** | 827 | /** |
@@ -1059,26 +1059,34 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1059 | struct inode *inode = NULL; | 1059 | struct inode *inode = NULL; |
1060 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1060 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1061 | struct gfs2_rgrpd *rgd, *begin = NULL; | 1061 | struct gfs2_rgrpd *rgd, *begin = NULL; |
1062 | struct gfs2_alloc *al = &ip->i_alloc; | 1062 | struct gfs2_alloc *al = ip->i_alloc; |
1063 | int flags = LM_FLAG_TRY; | 1063 | int flags = LM_FLAG_TRY; |
1064 | int skipped = 0; | 1064 | int skipped = 0; |
1065 | int loops = 0; | 1065 | int loops = 0; |
1066 | int error; | 1066 | int error, rg_locked; |
1067 | 1067 | ||
1068 | /* Try recently successful rgrps */ | 1068 | /* Try recently successful rgrps */ |
1069 | 1069 | ||
1070 | rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); | 1070 | rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); |
1071 | 1071 | ||
1072 | while (rgd) { | 1072 | while (rgd) { |
1073 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, | 1073 | rg_locked = 0; |
1074 | LM_FLAG_TRY, &al->al_rgd_gh); | 1074 | |
1075 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { | ||
1076 | rg_locked = 1; | ||
1077 | error = 0; | ||
1078 | } else { | ||
1079 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, | ||
1080 | LM_FLAG_TRY, &al->al_rgd_gh); | ||
1081 | } | ||
1075 | switch (error) { | 1082 | switch (error) { |
1076 | case 0: | 1083 | case 0: |
1077 | if (try_rgrp_fit(rgd, al)) | 1084 | if (try_rgrp_fit(rgd, al)) |
1078 | goto out; | 1085 | goto out; |
1079 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1086 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1080 | inode = try_rgrp_unlink(rgd, last_unlinked); | 1087 | inode = try_rgrp_unlink(rgd, last_unlinked); |
1081 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1088 | if (!rg_locked) |
1089 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
1082 | if (inode) | 1090 | if (inode) |
1083 | return inode; | 1091 | return inode; |
1084 | rgd = recent_rgrp_next(rgd, 1); | 1092 | rgd = recent_rgrp_next(rgd, 1); |
@@ -1098,15 +1106,23 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1098 | begin = rgd = forward_rgrp_get(sdp); | 1106 | begin = rgd = forward_rgrp_get(sdp); |
1099 | 1107 | ||
1100 | for (;;) { | 1108 | for (;;) { |
1101 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, | 1109 | rg_locked = 0; |
1102 | &al->al_rgd_gh); | 1110 | |
1111 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { | ||
1112 | rg_locked = 1; | ||
1113 | error = 0; | ||
1114 | } else { | ||
1115 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, | ||
1116 | &al->al_rgd_gh); | ||
1117 | } | ||
1103 | switch (error) { | 1118 | switch (error) { |
1104 | case 0: | 1119 | case 0: |
1105 | if (try_rgrp_fit(rgd, al)) | 1120 | if (try_rgrp_fit(rgd, al)) |
1106 | goto out; | 1121 | goto out; |
1107 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1122 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1108 | inode = try_rgrp_unlink(rgd, last_unlinked); | 1123 | inode = try_rgrp_unlink(rgd, last_unlinked); |
1109 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1124 | if (!rg_locked) |
1125 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
1110 | if (inode) | 1126 | if (inode) |
1111 | return inode; | 1127 | return inode; |
1112 | break; | 1128 | break; |
@@ -1158,7 +1174,7 @@ out: | |||
1158 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | 1174 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) |
1159 | { | 1175 | { |
1160 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1176 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1161 | struct gfs2_alloc *al = &ip->i_alloc; | 1177 | struct gfs2_alloc *al = ip->i_alloc; |
1162 | struct inode *inode; | 1178 | struct inode *inode; |
1163 | int error = 0; | 1179 | int error = 0; |
1164 | u64 last_unlinked = NO_BLOCK; | 1180 | u64 last_unlinked = NO_BLOCK; |
@@ -1204,7 +1220,7 @@ try_again: | |||
1204 | void gfs2_inplace_release(struct gfs2_inode *ip) | 1220 | void gfs2_inplace_release(struct gfs2_inode *ip) |
1205 | { | 1221 | { |
1206 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1222 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1207 | struct gfs2_alloc *al = &ip->i_alloc; | 1223 | struct gfs2_alloc *al = ip->i_alloc; |
1208 | 1224 | ||
1209 | if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) | 1225 | if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) |
1210 | fs_warn(sdp, "al_alloced = %u, al_requested = %u " | 1226 | fs_warn(sdp, "al_alloced = %u, al_requested = %u " |
@@ -1213,7 +1229,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1213 | al->al_line); | 1229 | al->al_line); |
1214 | 1230 | ||
1215 | al->al_rgd = NULL; | 1231 | al->al_rgd = NULL; |
1216 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1232 | if (al->al_rgd_gh.gh_gl) |
1233 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
1217 | if (ip != GFS2_I(sdp->sd_rindex)) | 1234 | if (ip != GFS2_I(sdp->sd_rindex)) |
1218 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1235 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1219 | } | 1236 | } |
@@ -1301,11 +1318,10 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
1301 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone | 1318 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone |
1302 | bitmaps, so we must search the originals for that. */ | 1319 | bitmaps, so we must search the originals for that. */ |
1303 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) | 1320 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) |
1304 | blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, | 1321 | blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset, |
1305 | bi->bi_len, goal, old_state); | 1322 | bi->bi_len, goal, old_state); |
1306 | else | 1323 | else |
1307 | blk = gfs2_bitfit(rgd, | 1324 | blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset, |
1308 | bi->bi_bh->b_data + bi->bi_offset, | ||
1309 | bi->bi_len, goal, old_state); | 1325 | bi->bi_len, goal, old_state); |
1310 | if (blk != BFITNOENT) | 1326 | if (blk != BFITNOENT) |
1311 | break; | 1327 | break; |
@@ -1394,7 +1410,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1394 | u64 gfs2_alloc_data(struct gfs2_inode *ip) | 1410 | u64 gfs2_alloc_data(struct gfs2_inode *ip) |
1395 | { | 1411 | { |
1396 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1412 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1397 | struct gfs2_alloc *al = &ip->i_alloc; | 1413 | struct gfs2_alloc *al = ip->i_alloc; |
1398 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1414 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1399 | u32 goal, blk; | 1415 | u32 goal, blk; |
1400 | u64 block; | 1416 | u64 block; |
@@ -1439,7 +1455,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) | |||
1439 | u64 gfs2_alloc_meta(struct gfs2_inode *ip) | 1455 | u64 gfs2_alloc_meta(struct gfs2_inode *ip) |
1440 | { | 1456 | { |
1441 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1457 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1442 | struct gfs2_alloc *al = &ip->i_alloc; | 1458 | struct gfs2_alloc *al = ip->i_alloc; |
1443 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1459 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1444 | u32 goal, blk; | 1460 | u32 goal, blk; |
1445 | u64 block; | 1461 | u64 block; |
@@ -1485,7 +1501,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip) | |||
1485 | u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | 1501 | u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) |
1486 | { | 1502 | { |
1487 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 1503 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
1488 | struct gfs2_alloc *al = &dip->i_alloc; | 1504 | struct gfs2_alloc *al = dip->i_alloc; |
1489 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1505 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1490 | u32 blk; | 1506 | u32 blk; |
1491 | u64 block; | 1507 | u64 block; |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4c6adfc6f2e..149bb161f4b6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -32,7 +32,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); | |||
32 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | 32 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
33 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) | 33 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) |
34 | { | 34 | { |
35 | return; /* So we can see where ip->i_alloc is used */ | 35 | BUG_ON(ip->i_alloc == NULL); |
36 | kfree(ip->i_alloc); | ||
37 | ip->i_alloc = NULL; | ||
36 | } | 38 | } |
37 | 39 | ||
38 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, | 40 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index dd3e737f528e..ef0562c3bc71 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -51,13 +51,9 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
51 | { | 51 | { |
52 | spin_lock_init(>->gt_spin); | 52 | spin_lock_init(>->gt_spin); |
53 | 53 | ||
54 | gt->gt_ilimit = 100; | ||
55 | gt->gt_ilimit_tries = 3; | ||
56 | gt->gt_ilimit_min = 1; | ||
57 | gt->gt_demote_secs = 300; | 54 | gt->gt_demote_secs = 300; |
58 | gt->gt_incore_log_blocks = 1024; | 55 | gt->gt_incore_log_blocks = 1024; |
59 | gt->gt_log_flush_secs = 60; | 56 | gt->gt_log_flush_secs = 60; |
60 | gt->gt_jindex_refresh_secs = 60; | ||
61 | gt->gt_recoverd_secs = 60; | 57 | gt->gt_recoverd_secs = 60; |
62 | gt->gt_logd_secs = 1; | 58 | gt->gt_logd_secs = 1; |
63 | gt->gt_quotad_secs = 5; | 59 | gt->gt_quotad_secs = 5; |
@@ -71,10 +67,8 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
71 | gt->gt_new_files_jdata = 0; | 67 | gt->gt_new_files_jdata = 0; |
72 | gt->gt_new_files_directio = 0; | 68 | gt->gt_new_files_directio = 0; |
73 | gt->gt_max_readahead = 1 << 18; | 69 | gt->gt_max_readahead = 1 << 18; |
74 | gt->gt_lockdump_size = 131072; | ||
75 | gt->gt_stall_secs = 600; | 70 | gt->gt_stall_secs = 600; |
76 | gt->gt_complain_secs = 10; | 71 | gt->gt_complain_secs = 10; |
77 | gt->gt_reclaim_limit = 5000; | ||
78 | gt->gt_statfs_quantum = 30; | 72 | gt->gt_statfs_quantum = 30; |
79 | gt->gt_statfs_slow = 0; | 73 | gt->gt_statfs_slow = 0; |
80 | } | 74 | } |
@@ -393,6 +387,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
393 | if (!jd) | 387 | if (!jd) |
394 | break; | 388 | break; |
395 | 389 | ||
390 | INIT_LIST_HEAD(&jd->extent_list); | ||
396 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); | 391 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); |
397 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { | 392 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { |
398 | if (!jd->jd_inode) | 393 | if (!jd->jd_inode) |
@@ -422,8 +417,9 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
422 | 417 | ||
423 | void gfs2_jindex_free(struct gfs2_sbd *sdp) | 418 | void gfs2_jindex_free(struct gfs2_sbd *sdp) |
424 | { | 419 | { |
425 | struct list_head list; | 420 | struct list_head list, *head; |
426 | struct gfs2_jdesc *jd; | 421 | struct gfs2_jdesc *jd; |
422 | struct gfs2_journal_extent *jext; | ||
427 | 423 | ||
428 | spin_lock(&sdp->sd_jindex_spin); | 424 | spin_lock(&sdp->sd_jindex_spin); |
429 | list_add(&list, &sdp->sd_jindex_list); | 425 | list_add(&list, &sdp->sd_jindex_list); |
@@ -433,6 +429,14 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) | |||
433 | 429 | ||
434 | while (!list_empty(&list)) { | 430 | while (!list_empty(&list)) { |
435 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); | 431 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); |
432 | head = &jd->extent_list; | ||
433 | while (!list_empty(head)) { | ||
434 | jext = list_entry(head->next, | ||
435 | struct gfs2_journal_extent, | ||
436 | extent_list); | ||
437 | list_del(&jext->extent_list); | ||
438 | kfree(jext); | ||
439 | } | ||
436 | list_del(&jd->jd_list); | 440 | list_del(&jd->jd_list); |
437 | iput(jd->jd_inode); | 441 | iput(jd->jd_inode); |
438 | kfree(jd); | 442 | kfree(jd); |
@@ -543,7 +547,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
543 | if (error) | 547 | if (error) |
544 | return error; | 548 | return error; |
545 | 549 | ||
546 | gfs2_meta_cache_flush(ip); | ||
547 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 550 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
548 | 551 | ||
549 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 552 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
@@ -686,9 +689,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, | |||
686 | if (error) | 689 | if (error) |
687 | return; | 690 | return; |
688 | 691 | ||
689 | mutex_lock(&sdp->sd_statfs_mutex); | ||
690 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | 692 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); |
691 | mutex_unlock(&sdp->sd_statfs_mutex); | ||
692 | 693 | ||
693 | spin_lock(&sdp->sd_statfs_spin); | 694 | spin_lock(&sdp->sd_statfs_spin); |
694 | l_sc->sc_total += total; | 695 | l_sc->sc_total += total; |
@@ -736,9 +737,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) | |||
736 | if (error) | 737 | if (error) |
737 | goto out_bh2; | 738 | goto out_bh2; |
738 | 739 | ||
739 | mutex_lock(&sdp->sd_statfs_mutex); | ||
740 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | 740 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); |
741 | mutex_unlock(&sdp->sd_statfs_mutex); | ||
742 | 741 | ||
743 | spin_lock(&sdp->sd_statfs_spin); | 742 | spin_lock(&sdp->sd_statfs_spin); |
744 | m_sc->sc_total += l_sc->sc_total; | 743 | m_sc->sc_total += l_sc->sc_total; |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 06e0b7768d97..eaa3b7b2f99e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -32,7 +32,8 @@ spinlock_t gfs2_sys_margs_lock; | |||
32 | 32 | ||
33 | static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) | 33 | static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) |
34 | { | 34 | { |
35 | return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id); | 35 | return snprintf(buf, PAGE_SIZE, "%u:%u\n", |
36 | MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev)); | ||
36 | } | 37 | } |
37 | 38 | ||
38 | static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) | 39 | static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) |
@@ -221,9 +222,7 @@ static struct kobj_type gfs2_ktype = { | |||
221 | .sysfs_ops = &gfs2_attr_ops, | 222 | .sysfs_ops = &gfs2_attr_ops, |
222 | }; | 223 | }; |
223 | 224 | ||
224 | static struct kset gfs2_kset = { | 225 | static struct kset *gfs2_kset; |
225 | .ktype = &gfs2_ktype, | ||
226 | }; | ||
227 | 226 | ||
228 | /* | 227 | /* |
229 | * display struct lm_lockstruct fields | 228 | * display struct lm_lockstruct fields |
@@ -427,13 +426,11 @@ TUNE_ATTR_2(name, name##_store) | |||
427 | TUNE_ATTR(demote_secs, 0); | 426 | TUNE_ATTR(demote_secs, 0); |
428 | TUNE_ATTR(incore_log_blocks, 0); | 427 | TUNE_ATTR(incore_log_blocks, 0); |
429 | TUNE_ATTR(log_flush_secs, 0); | 428 | TUNE_ATTR(log_flush_secs, 0); |
430 | TUNE_ATTR(jindex_refresh_secs, 0); | ||
431 | TUNE_ATTR(quota_warn_period, 0); | 429 | TUNE_ATTR(quota_warn_period, 0); |
432 | TUNE_ATTR(quota_quantum, 0); | 430 | TUNE_ATTR(quota_quantum, 0); |
433 | TUNE_ATTR(atime_quantum, 0); | 431 | TUNE_ATTR(atime_quantum, 0); |
434 | TUNE_ATTR(max_readahead, 0); | 432 | TUNE_ATTR(max_readahead, 0); |
435 | TUNE_ATTR(complain_secs, 0); | 433 | TUNE_ATTR(complain_secs, 0); |
436 | TUNE_ATTR(reclaim_limit, 0); | ||
437 | TUNE_ATTR(statfs_slow, 0); | 434 | TUNE_ATTR(statfs_slow, 0); |
438 | TUNE_ATTR(new_files_jdata, 0); | 435 | TUNE_ATTR(new_files_jdata, 0); |
439 | TUNE_ATTR(new_files_directio, 0); | 436 | TUNE_ATTR(new_files_directio, 0); |
@@ -450,13 +447,11 @@ static struct attribute *tune_attrs[] = { | |||
450 | &tune_attr_demote_secs.attr, | 447 | &tune_attr_demote_secs.attr, |
451 | &tune_attr_incore_log_blocks.attr, | 448 | &tune_attr_incore_log_blocks.attr, |
452 | &tune_attr_log_flush_secs.attr, | 449 | &tune_attr_log_flush_secs.attr, |
453 | &tune_attr_jindex_refresh_secs.attr, | ||
454 | &tune_attr_quota_warn_period.attr, | 450 | &tune_attr_quota_warn_period.attr, |
455 | &tune_attr_quota_quantum.attr, | 451 | &tune_attr_quota_quantum.attr, |
456 | &tune_attr_atime_quantum.attr, | 452 | &tune_attr_atime_quantum.attr, |
457 | &tune_attr_max_readahead.attr, | 453 | &tune_attr_max_readahead.attr, |
458 | &tune_attr_complain_secs.attr, | 454 | &tune_attr_complain_secs.attr, |
459 | &tune_attr_reclaim_limit.attr, | ||
460 | &tune_attr_statfs_slow.attr, | 455 | &tune_attr_statfs_slow.attr, |
461 | &tune_attr_quota_simul_sync.attr, | 456 | &tune_attr_quota_simul_sync.attr, |
462 | &tune_attr_quota_cache_secs.attr, | 457 | &tune_attr_quota_cache_secs.attr, |
@@ -495,14 +490,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | |||
495 | { | 490 | { |
496 | int error; | 491 | int error; |
497 | 492 | ||
498 | sdp->sd_kobj.kset = &gfs2_kset; | 493 | sdp->sd_kobj.kset = gfs2_kset; |
499 | sdp->sd_kobj.ktype = &gfs2_ktype; | 494 | error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, |
500 | 495 | "%s", sdp->sd_table_name); | |
501 | error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name); | ||
502 | if (error) | ||
503 | goto fail; | ||
504 | |||
505 | error = kobject_register(&sdp->sd_kobj); | ||
506 | if (error) | 496 | if (error) |
507 | goto fail; | 497 | goto fail; |
508 | 498 | ||
@@ -522,6 +512,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | |||
522 | if (error) | 512 | if (error) |
523 | goto fail_args; | 513 | goto fail_args; |
524 | 514 | ||
515 | kobject_uevent(&sdp->sd_kobj, KOBJ_ADD); | ||
525 | return 0; | 516 | return 0; |
526 | 517 | ||
527 | fail_args: | 518 | fail_args: |
@@ -531,7 +522,7 @@ fail_counters: | |||
531 | fail_lockstruct: | 522 | fail_lockstruct: |
532 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | 523 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); |
533 | fail_reg: | 524 | fail_reg: |
534 | kobject_unregister(&sdp->sd_kobj); | 525 | kobject_put(&sdp->sd_kobj); |
535 | fail: | 526 | fail: |
536 | fs_err(sdp, "error %d adding sysfs files", error); | 527 | fs_err(sdp, "error %d adding sysfs files", error); |
537 | return error; | 528 | return error; |
@@ -543,21 +534,22 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) | |||
543 | sysfs_remove_group(&sdp->sd_kobj, &args_group); | 534 | sysfs_remove_group(&sdp->sd_kobj, &args_group); |
544 | sysfs_remove_group(&sdp->sd_kobj, &counters_group); | 535 | sysfs_remove_group(&sdp->sd_kobj, &counters_group); |
545 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | 536 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); |
546 | kobject_unregister(&sdp->sd_kobj); | 537 | kobject_put(&sdp->sd_kobj); |
547 | } | 538 | } |
548 | 539 | ||
549 | int gfs2_sys_init(void) | 540 | int gfs2_sys_init(void) |
550 | { | 541 | { |
551 | gfs2_sys_margs = NULL; | 542 | gfs2_sys_margs = NULL; |
552 | spin_lock_init(&gfs2_sys_margs_lock); | 543 | spin_lock_init(&gfs2_sys_margs_lock); |
553 | kobject_set_name(&gfs2_kset.kobj, "gfs2"); | 544 | gfs2_kset = kset_create_and_add("gfs2", NULL, fs_kobj); |
554 | kobj_set_kset_s(&gfs2_kset, fs_subsys); | 545 | if (!gfs2_kset) |
555 | return kset_register(&gfs2_kset); | 546 | return -ENOMEM; |
547 | return 0; | ||
556 | } | 548 | } |
557 | 549 | ||
558 | void gfs2_sys_uninit(void) | 550 | void gfs2_sys_uninit(void) |
559 | { | 551 | { |
560 | kfree(gfs2_sys_margs); | 552 | kfree(gfs2_sys_margs); |
561 | kset_unregister(&gfs2_kset); | 553 | kset_unregister(gfs2_kset); |
562 | } | 554 | } |
563 | 555 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 717983e2c2ae..73e5d92a657c 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -114,11 +114,6 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) | |||
114 | gfs2_log_flush(sdp, NULL); | 114 | gfs2_log_flush(sdp, NULL); |
115 | } | 115 | } |
116 | 116 | ||
117 | void gfs2_trans_add_gl(struct gfs2_glock *gl) | ||
118 | { | ||
119 | lops_add(gl->gl_sbd, &gl->gl_le); | ||
120 | } | ||
121 | |||
122 | /** | 117 | /** |
123 | * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction | 118 | * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction |
124 | * @gl: the glock the buffer belongs to | 119 | * @gl: the glock the buffer belongs to |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 043d5f4b9c4c..e826f0dab80a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
@@ -30,7 +30,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
30 | 30 | ||
31 | void gfs2_trans_end(struct gfs2_sbd *sdp); | 31 | void gfs2_trans_end(struct gfs2_sbd *sdp); |
32 | 32 | ||
33 | void gfs2_trans_add_gl(struct gfs2_glock *gl); | ||
34 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); | 33 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); |
35 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); | 34 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); |
36 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); | 35 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); |
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 31284c77bba8..110dd3515dc8 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c | |||
@@ -61,7 +61,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
61 | mapping = tree->inode->i_mapping; | 61 | mapping = tree->inode->i_mapping; |
62 | page = read_mapping_page(mapping, 0, NULL); | 62 | page = read_mapping_page(mapping, 0, NULL); |
63 | if (IS_ERR(page)) | 63 | if (IS_ERR(page)) |
64 | goto free_tree; | 64 | goto free_inode; |
65 | 65 | ||
66 | /* Load the header */ | 66 | /* Load the header */ |
67 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); | 67 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); |
@@ -99,11 +99,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
99 | page_cache_release(page); | 99 | page_cache_release(page); |
100 | return tree; | 100 | return tree; |
101 | 101 | ||
102 | fail_page: | 102 | fail_page: |
103 | page_cache_release(page); | 103 | page_cache_release(page); |
104 | free_tree: | 104 | free_inode: |
105 | tree->inode->i_mapping->a_ops = &hfs_aops; | 105 | tree->inode->i_mapping->a_ops = &hfs_aops; |
106 | iput(tree->inode); | 106 | iput(tree->inode); |
107 | free_tree: | ||
107 | kfree(tree); | 108 | kfree(tree); |
108 | return NULL; | 109 | return NULL; |
109 | } | 110 | } |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 08ff6c7028cc..038ed7436199 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -288,10 +288,12 @@ handle_t *journal_start(journal_t *journal, int nblocks) | |||
288 | jbd_free_handle(handle); | 288 | jbd_free_handle(handle); |
289 | current->journal_info = NULL; | 289 | current->journal_info = NULL; |
290 | handle = ERR_PTR(err); | 290 | handle = ERR_PTR(err); |
291 | goto out; | ||
291 | } | 292 | } |
292 | 293 | ||
293 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); | 294 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); |
294 | 295 | ||
296 | out: | ||
295 | return handle; | 297 | return handle; |
296 | } | 298 | } |
297 | 299 | ||
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index df25ecc418af..4dcc05819998 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
@@ -284,11 +284,11 @@ static struct dir_table_slot *find_index(struct inode *ip, u32 index, | |||
284 | release_metapage(*mp); | 284 | release_metapage(*mp); |
285 | *mp = NULL; | 285 | *mp = NULL; |
286 | } | 286 | } |
287 | if (*mp == 0) { | 287 | if (!(*mp)) { |
288 | *lblock = blkno; | 288 | *lblock = blkno; |
289 | *mp = read_index_page(ip, blkno); | 289 | *mp = read_index_page(ip, blkno); |
290 | } | 290 | } |
291 | if (*mp == 0) { | 291 | if (!(*mp)) { |
292 | jfs_err("free_index: error reading directory table"); | 292 | jfs_err("free_index: error reading directory table"); |
293 | return NULL; | 293 | return NULL; |
294 | } | 294 | } |
@@ -413,7 +413,8 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) | |||
413 | } | 413 | } |
414 | ip->i_size = PSIZE; | 414 | ip->i_size = PSIZE; |
415 | 415 | ||
416 | if ((mp = get_index_page(ip, 0)) == 0) { | 416 | mp = get_index_page(ip, 0); |
417 | if (!mp) { | ||
417 | jfs_err("add_index: get_metapage failed!"); | 418 | jfs_err("add_index: get_metapage failed!"); |
418 | xtTruncate(tid, ip, 0, COMMIT_PWMAP); | 419 | xtTruncate(tid, ip, 0, COMMIT_PWMAP); |
419 | memcpy(&jfs_ip->i_dirtable, temp_table, | 420 | memcpy(&jfs_ip->i_dirtable, temp_table, |
@@ -461,7 +462,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) | |||
461 | } else | 462 | } else |
462 | mp = read_index_page(ip, blkno); | 463 | mp = read_index_page(ip, blkno); |
463 | 464 | ||
464 | if (mp == 0) { | 465 | if (!mp) { |
465 | jfs_err("add_index: get/read_metapage failed!"); | 466 | jfs_err("add_index: get/read_metapage failed!"); |
466 | goto clean_up; | 467 | goto clean_up; |
467 | } | 468 | } |
@@ -499,7 +500,7 @@ static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) | |||
499 | 500 | ||
500 | dirtab_slot = find_index(ip, index, &mp, &lblock); | 501 | dirtab_slot = find_index(ip, index, &mp, &lblock); |
501 | 502 | ||
502 | if (dirtab_slot == 0) | 503 | if (!dirtab_slot) |
503 | return; | 504 | return; |
504 | 505 | ||
505 | dirtab_slot->flag = DIR_INDEX_FREE; | 506 | dirtab_slot->flag = DIR_INDEX_FREE; |
@@ -526,7 +527,7 @@ static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, | |||
526 | 527 | ||
527 | dirtab_slot = find_index(ip, index, mp, lblock); | 528 | dirtab_slot = find_index(ip, index, mp, lblock); |
528 | 529 | ||
529 | if (dirtab_slot == 0) | 530 | if (!dirtab_slot) |
530 | return; | 531 | return; |
531 | 532 | ||
532 | DTSaddress(dirtab_slot, bn); | 533 | DTSaddress(dirtab_slot, bn); |
@@ -552,7 +553,7 @@ static int read_index(struct inode *ip, u32 index, | |||
552 | struct dir_table_slot *slot; | 553 | struct dir_table_slot *slot; |
553 | 554 | ||
554 | slot = find_index(ip, index, &mp, &lblock); | 555 | slot = find_index(ip, index, &mp, &lblock); |
555 | if (slot == 0) { | 556 | if (!slot) { |
556 | return -EIO; | 557 | return -EIO; |
557 | } | 558 | } |
558 | 559 | ||
@@ -592,10 +593,8 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, | |||
592 | struct component_name ciKey; | 593 | struct component_name ciKey; |
593 | struct super_block *sb = ip->i_sb; | 594 | struct super_block *sb = ip->i_sb; |
594 | 595 | ||
595 | ciKey.name = | 596 | ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS); |
596 | (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), | 597 | if (!ciKey.name) { |
597 | GFP_NOFS); | ||
598 | if (ciKey.name == 0) { | ||
599 | rc = -ENOMEM; | 598 | rc = -ENOMEM; |
600 | goto dtSearch_Exit2; | 599 | goto dtSearch_Exit2; |
601 | } | 600 | } |
@@ -957,10 +956,8 @@ static int dtSplitUp(tid_t tid, | |||
957 | smp = split->mp; | 956 | smp = split->mp; |
958 | sp = DT_PAGE(ip, smp); | 957 | sp = DT_PAGE(ip, smp); |
959 | 958 | ||
960 | key.name = | 959 | key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS); |
961 | (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), | 960 | if (!key.name) { |
962 | GFP_NOFS); | ||
963 | if (key.name == 0) { | ||
964 | DT_PUTPAGE(smp); | 961 | DT_PUTPAGE(smp); |
965 | rc = -ENOMEM; | 962 | rc = -ENOMEM; |
966 | goto dtSplitUp_Exit; | 963 | goto dtSplitUp_Exit; |
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index 8561c6ecece0..cdac2d5bafeb 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
@@ -74,7 +74,7 @@ struct idtentry { | |||
74 | #define DTIHDRDATALEN 11 | 74 | #define DTIHDRDATALEN 11 |
75 | 75 | ||
76 | /* compute number of slots for entry */ | 76 | /* compute number of slots for entry */ |
77 | #define NDTINTERNAL(klen) ( ((4 + (klen)) + (15 - 1)) / 15 ) | 77 | #define NDTINTERNAL(klen) (DIV_ROUND_UP((4 + (klen)), 15)) |
78 | 78 | ||
79 | 79 | ||
80 | /* | 80 | /* |
@@ -133,7 +133,7 @@ struct dir_table_slot { | |||
133 | ( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) ) | 133 | ( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) ) |
134 | 134 | ||
135 | /* compute number of slots for entry */ | 135 | /* compute number of slots for entry */ |
136 | #define NDTLEAF_LEGACY(klen) ( ((2 + (klen)) + (15 - 1)) / 15 ) | 136 | #define NDTLEAF_LEGACY(klen) (DIV_ROUND_UP((2 + (klen)), 15)) |
137 | #define NDTLEAF NDTINTERNAL | 137 | #define NDTLEAF NDTINTERNAL |
138 | 138 | ||
139 | 139 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 3870ba8b9086..9bf29f771737 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -381,7 +381,7 @@ int diRead(struct inode *ip) | |||
381 | 381 | ||
382 | /* read the page of disk inode */ | 382 | /* read the page of disk inode */ |
383 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); | 383 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); |
384 | if (mp == 0) { | 384 | if (!mp) { |
385 | jfs_err("diRead: read_metapage failed"); | 385 | jfs_err("diRead: read_metapage failed"); |
386 | return -EIO; | 386 | return -EIO; |
387 | } | 387 | } |
@@ -654,7 +654,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
654 | /* read the page of disk inode */ | 654 | /* read the page of disk inode */ |
655 | retry: | 655 | retry: |
656 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); | 656 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); |
657 | if (mp == 0) | 657 | if (!mp) |
658 | return -EIO; | 658 | return -EIO; |
659 | 659 | ||
660 | /* get the pointer to the disk inode */ | 660 | /* get the pointer to the disk inode */ |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 15a3974cdeeb..325a9679b95a 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
@@ -208,6 +208,17 @@ static struct lmStat { | |||
208 | } lmStat; | 208 | } lmStat; |
209 | #endif | 209 | #endif |
210 | 210 | ||
211 | static void write_special_inodes(struct jfs_log *log, | ||
212 | int (*writer)(struct address_space *)) | ||
213 | { | ||
214 | struct jfs_sb_info *sbi; | ||
215 | |||
216 | list_for_each_entry(sbi, &log->sb_list, log_list) { | ||
217 | writer(sbi->ipbmap->i_mapping); | ||
218 | writer(sbi->ipimap->i_mapping); | ||
219 | writer(sbi->direct_inode->i_mapping); | ||
220 | } | ||
221 | } | ||
211 | 222 | ||
212 | /* | 223 | /* |
213 | * NAME: lmLog() | 224 | * NAME: lmLog() |
@@ -935,22 +946,13 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) | |||
935 | struct lrd lrd; | 946 | struct lrd lrd; |
936 | int lsn; | 947 | int lsn; |
937 | struct logsyncblk *lp; | 948 | struct logsyncblk *lp; |
938 | struct jfs_sb_info *sbi; | ||
939 | unsigned long flags; | 949 | unsigned long flags; |
940 | 950 | ||
941 | /* push dirty metapages out to disk */ | 951 | /* push dirty metapages out to disk */ |
942 | if (hard_sync) | 952 | if (hard_sync) |
943 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 953 | write_special_inodes(log, filemap_fdatawrite); |
944 | filemap_fdatawrite(sbi->ipbmap->i_mapping); | ||
945 | filemap_fdatawrite(sbi->ipimap->i_mapping); | ||
946 | filemap_fdatawrite(sbi->direct_inode->i_mapping); | ||
947 | } | ||
948 | else | 954 | else |
949 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 955 | write_special_inodes(log, filemap_flush); |
950 | filemap_flush(sbi->ipbmap->i_mapping); | ||
951 | filemap_flush(sbi->ipimap->i_mapping); | ||
952 | filemap_flush(sbi->direct_inode->i_mapping); | ||
953 | } | ||
954 | 956 | ||
955 | /* | 957 | /* |
956 | * forward syncpt | 958 | * forward syncpt |
@@ -1536,7 +1538,6 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
1536 | { | 1538 | { |
1537 | int i; | 1539 | int i; |
1538 | struct tblock *target = NULL; | 1540 | struct tblock *target = NULL; |
1539 | struct jfs_sb_info *sbi; | ||
1540 | 1541 | ||
1541 | /* jfs_write_inode may call us during read-only mount */ | 1542 | /* jfs_write_inode may call us during read-only mount */ |
1542 | if (!log) | 1543 | if (!log) |
@@ -1598,11 +1599,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
1598 | if (wait < 2) | 1599 | if (wait < 2) |
1599 | return; | 1600 | return; |
1600 | 1601 | ||
1601 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 1602 | write_special_inodes(log, filemap_fdatawrite); |
1602 | filemap_fdatawrite(sbi->ipbmap->i_mapping); | ||
1603 | filemap_fdatawrite(sbi->ipimap->i_mapping); | ||
1604 | filemap_fdatawrite(sbi->direct_inode->i_mapping); | ||
1605 | } | ||
1606 | 1603 | ||
1607 | /* | 1604 | /* |
1608 | * If there was recent activity, we may need to wait | 1605 | * If there was recent activity, we may need to wait |
@@ -1611,6 +1608,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
1611 | if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { | 1608 | if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { |
1612 | for (i = 0; i < 200; i++) { /* Too much? */ | 1609 | for (i = 0; i < 200; i++) { /* Too much? */ |
1613 | msleep(250); | 1610 | msleep(250); |
1611 | write_special_inodes(log, filemap_fdatawrite); | ||
1614 | if (list_empty(&log->cqueue) && | 1612 | if (list_empty(&log->cqueue) && |
1615 | list_empty(&log->synclist)) | 1613 | list_empty(&log->synclist)) |
1616 | break; | 1614 | break; |
@@ -2347,7 +2345,7 @@ int jfsIOWait(void *arg) | |||
2347 | 2345 | ||
2348 | do { | 2346 | do { |
2349 | spin_lock_irq(&log_redrive_lock); | 2347 | spin_lock_irq(&log_redrive_lock); |
2350 | while ((bp = log_redrive_list) != 0) { | 2348 | while ((bp = log_redrive_list)) { |
2351 | log_redrive_list = bp->l_redrive_next; | 2349 | log_redrive_list = bp->l_redrive_next; |
2352 | bp->l_redrive_next = NULL; | 2350 | bp->l_redrive_next = NULL; |
2353 | spin_unlock_irq(&log_redrive_lock); | 2351 | spin_unlock_irq(&log_redrive_lock); |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index f5cd8d38af7a..d1e64f2f2fcd 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
@@ -39,11 +39,11 @@ static struct { | |||
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) | 41 | #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) |
42 | #define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag) | 42 | #define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag) |
43 | 43 | ||
44 | static inline void unlock_metapage(struct metapage *mp) | 44 | static inline void unlock_metapage(struct metapage *mp) |
45 | { | 45 | { |
46 | clear_bit(META_locked, &mp->flag); | 46 | clear_bit_unlock(META_locked, &mp->flag); |
47 | wake_up(&mp->wait); | 47 | wake_up(&mp->wait); |
48 | } | 48 | } |
49 | 49 | ||
@@ -88,7 +88,7 @@ struct meta_anchor { | |||
88 | }; | 88 | }; |
89 | #define mp_anchor(page) ((struct meta_anchor *)page_private(page)) | 89 | #define mp_anchor(page) ((struct meta_anchor *)page_private(page)) |
90 | 90 | ||
91 | static inline struct metapage *page_to_mp(struct page *page, uint offset) | 91 | static inline struct metapage *page_to_mp(struct page *page, int offset) |
92 | { | 92 | { |
93 | if (!PagePrivate(page)) | 93 | if (!PagePrivate(page)) |
94 | return NULL; | 94 | return NULL; |
@@ -153,7 +153,7 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *)) | |||
153 | } | 153 | } |
154 | 154 | ||
155 | #else | 155 | #else |
156 | static inline struct metapage *page_to_mp(struct page *page, uint offset) | 156 | static inline struct metapage *page_to_mp(struct page *page, int offset) |
157 | { | 157 | { |
158 | return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; | 158 | return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; |
159 | } | 159 | } |
@@ -249,7 +249,7 @@ static inline void drop_metapage(struct page *page, struct metapage *mp) | |||
249 | */ | 249 | */ |
250 | 250 | ||
251 | static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, | 251 | static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, |
252 | unsigned int *len) | 252 | int *len) |
253 | { | 253 | { |
254 | int rc = 0; | 254 | int rc = 0; |
255 | int xflag; | 255 | int xflag; |
@@ -352,25 +352,27 @@ static void metapage_write_end_io(struct bio *bio, int err) | |||
352 | static int metapage_writepage(struct page *page, struct writeback_control *wbc) | 352 | static int metapage_writepage(struct page *page, struct writeback_control *wbc) |
353 | { | 353 | { |
354 | struct bio *bio = NULL; | 354 | struct bio *bio = NULL; |
355 | unsigned int block_offset; /* block offset of mp within page */ | 355 | int block_offset; /* block offset of mp within page */ |
356 | struct inode *inode = page->mapping->host; | 356 | struct inode *inode = page->mapping->host; |
357 | unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; | 357 | int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; |
358 | unsigned int len; | 358 | int len; |
359 | unsigned int xlen; | 359 | int xlen; |
360 | struct metapage *mp; | 360 | struct metapage *mp; |
361 | int redirty = 0; | 361 | int redirty = 0; |
362 | sector_t lblock; | 362 | sector_t lblock; |
363 | int nr_underway = 0; | ||
363 | sector_t pblock; | 364 | sector_t pblock; |
364 | sector_t next_block = 0; | 365 | sector_t next_block = 0; |
365 | sector_t page_start; | 366 | sector_t page_start; |
366 | unsigned long bio_bytes = 0; | 367 | unsigned long bio_bytes = 0; |
367 | unsigned long bio_offset = 0; | 368 | unsigned long bio_offset = 0; |
368 | unsigned int offset; | 369 | int offset; |
369 | 370 | ||
370 | page_start = (sector_t)page->index << | 371 | page_start = (sector_t)page->index << |
371 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 372 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
372 | BUG_ON(!PageLocked(page)); | 373 | BUG_ON(!PageLocked(page)); |
373 | BUG_ON(PageWriteback(page)); | 374 | BUG_ON(PageWriteback(page)); |
375 | set_page_writeback(page); | ||
374 | 376 | ||
375 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { | 377 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { |
376 | mp = page_to_mp(page, offset); | 378 | mp = page_to_mp(page, offset); |
@@ -413,11 +415,10 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) | |||
413 | if (!bio->bi_size) | 415 | if (!bio->bi_size) |
414 | goto dump_bio; | 416 | goto dump_bio; |
415 | submit_bio(WRITE, bio); | 417 | submit_bio(WRITE, bio); |
418 | nr_underway++; | ||
416 | bio = NULL; | 419 | bio = NULL; |
417 | } else { | 420 | } else |
418 | set_page_writeback(page); | ||
419 | inc_io(page); | 421 | inc_io(page); |
420 | } | ||
421 | xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; | 422 | xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; |
422 | pblock = metapage_get_blocks(inode, lblock, &xlen); | 423 | pblock = metapage_get_blocks(inode, lblock, &xlen); |
423 | if (!pblock) { | 424 | if (!pblock) { |
@@ -427,7 +428,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) | |||
427 | continue; | 428 | continue; |
428 | } | 429 | } |
429 | set_bit(META_io, &mp->flag); | 430 | set_bit(META_io, &mp->flag); |
430 | len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage); | 431 | len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage); |
431 | 432 | ||
432 | bio = bio_alloc(GFP_NOFS, 1); | 433 | bio = bio_alloc(GFP_NOFS, 1); |
433 | bio->bi_bdev = inode->i_sb->s_bdev; | 434 | bio->bi_bdev = inode->i_sb->s_bdev; |
@@ -449,12 +450,16 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) | |||
449 | goto dump_bio; | 450 | goto dump_bio; |
450 | 451 | ||
451 | submit_bio(WRITE, bio); | 452 | submit_bio(WRITE, bio); |
453 | nr_underway++; | ||
452 | } | 454 | } |
453 | if (redirty) | 455 | if (redirty) |
454 | redirty_page_for_writepage(wbc, page); | 456 | redirty_page_for_writepage(wbc, page); |
455 | 457 | ||
456 | unlock_page(page); | 458 | unlock_page(page); |
457 | 459 | ||
460 | if (nr_underway == 0) | ||
461 | end_page_writeback(page); | ||
462 | |||
458 | return 0; | 463 | return 0; |
459 | add_failed: | 464 | add_failed: |
460 | /* We should never reach here, since we're only adding one vec */ | 465 | /* We should never reach here, since we're only adding one vec */ |
@@ -475,13 +480,13 @@ static int metapage_readpage(struct file *fp, struct page *page) | |||
475 | { | 480 | { |
476 | struct inode *inode = page->mapping->host; | 481 | struct inode *inode = page->mapping->host; |
477 | struct bio *bio = NULL; | 482 | struct bio *bio = NULL; |
478 | unsigned int block_offset; | 483 | int block_offset; |
479 | unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; | 484 | int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; |
480 | sector_t page_start; /* address of page in fs blocks */ | 485 | sector_t page_start; /* address of page in fs blocks */ |
481 | sector_t pblock; | 486 | sector_t pblock; |
482 | unsigned int xlen; | 487 | int xlen; |
483 | unsigned int len; | 488 | unsigned int len; |
484 | unsigned int offset; | 489 | int offset; |
485 | 490 | ||
486 | BUG_ON(!PageLocked(page)); | 491 | BUG_ON(!PageLocked(page)); |
487 | page_start = (sector_t)page->index << | 492 | page_start = (sector_t)page->index << |
@@ -530,7 +535,7 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) | |||
530 | { | 535 | { |
531 | struct metapage *mp; | 536 | struct metapage *mp; |
532 | int ret = 1; | 537 | int ret = 1; |
533 | unsigned int offset; | 538 | int offset; |
534 | 539 | ||
535 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { | 540 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { |
536 | mp = page_to_mp(page, offset); | 541 | mp = page_to_mp(page, offset); |
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 644429acb8c0..7b698f2ec45a 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c | |||
@@ -147,7 +147,7 @@ int jfs_mount(struct super_block *sb) | |||
147 | */ | 147 | */ |
148 | if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { | 148 | if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { |
149 | ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); | 149 | ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); |
150 | if (ipaimap2 == 0) { | 150 | if (!ipaimap2) { |
151 | jfs_err("jfs_mount: Faild to read AGGREGATE_I"); | 151 | jfs_err("jfs_mount: Faild to read AGGREGATE_I"); |
152 | rc = -EIO; | 152 | rc = -EIO; |
153 | goto errout35; | 153 | goto errout35; |
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index 7971f37534a3..adcf92d3b603 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c | |||
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) | |||
68 | /* | 68 | /* |
69 | * Wait for outstanding transactions to be written to log: | 69 | * Wait for outstanding transactions to be written to log: |
70 | */ | 70 | */ |
71 | jfs_flush_journal(log, 2); | 71 | jfs_flush_journal(log, 1); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * close fileset inode allocation map (aka fileset inode) | 74 | * close fileset inode allocation map (aka fileset inode) |
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) | |||
146 | * | 146 | * |
147 | * remove file system from log active file system list. | 147 | * remove file system from log active file system list. |
148 | */ | 148 | */ |
149 | jfs_flush_journal(log, 2); | 149 | jfs_flush_journal(log, 1); |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Make sure all metadata makes it to disk | 152 | * Make sure all metadata makes it to disk |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 4e0a8493cef6..f8718de3505e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -1103,8 +1103,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1103 | * Make sure dest inode number (if any) is what we think it is | 1103 | * Make sure dest inode number (if any) is what we think it is |
1104 | */ | 1104 | */ |
1105 | rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP); | 1105 | rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP); |
1106 | if (rc == 0) { | 1106 | if (!rc) { |
1107 | if ((new_ip == 0) || (ino != new_ip->i_ino)) { | 1107 | if ((!new_ip) || (ino != new_ip->i_ino)) { |
1108 | rc = -ESTALE; | 1108 | rc = -ESTALE; |
1109 | goto out3; | 1109 | goto out3; |
1110 | } | 1110 | } |
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 71984ee95346..7f24a0bb08ca 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
@@ -172,7 +172,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
172 | */ | 172 | */ |
173 | t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP) | 173 | t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP) |
174 | << L2BPERDMAP; | 174 | << L2BPERDMAP; |
175 | t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50; | 175 | t32 = DIV_ROUND_UP(t64, BITSPERPAGE) + 1 + 50; |
176 | newFSCKSize = t32 << sbi->l2nbperpage; | 176 | newFSCKSize = t32 << sbi->l2nbperpage; |
177 | newFSCKAddress = newLogAddress - newFSCKSize; | 177 | newFSCKAddress = newLogAddress - newFSCKSize; |
178 | 178 | ||
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 314bb4ff1ba8..70a14001c98f 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -598,6 +598,12 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
598 | seq_printf(seq, ",umask=%03o", sbi->umask); | 598 | seq_printf(seq, ",umask=%03o", sbi->umask); |
599 | if (sbi->flag & JFS_NOINTEGRITY) | 599 | if (sbi->flag & JFS_NOINTEGRITY) |
600 | seq_puts(seq, ",nointegrity"); | 600 | seq_puts(seq, ",nointegrity"); |
601 | if (sbi->nls_tab) | ||
602 | seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); | ||
603 | if (sbi->flag & JFS_ERR_CONTINUE) | ||
604 | seq_printf(seq, ",errors=continue"); | ||
605 | if (sbi->flag & JFS_ERR_PANIC) | ||
606 | seq_printf(seq, ",errors=panic"); | ||
601 | 607 | ||
602 | #ifdef CONFIG_QUOTA | 608 | #ifdef CONFIG_QUOTA |
603 | if (sbi->flag & JFS_USRQUOTA) | 609 | if (sbi->flag & JFS_USRQUOTA) |
diff --git a/fs/namei.c b/fs/namei.c index 3b993db26cee..73e2e665817a 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1605,7 +1605,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1605 | if (S_ISLNK(inode->i_mode)) | 1605 | if (S_ISLNK(inode->i_mode)) |
1606 | return -ELOOP; | 1606 | return -ELOOP; |
1607 | 1607 | ||
1608 | if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) | 1608 | if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE)) |
1609 | return -EISDIR; | 1609 | return -EISDIR; |
1610 | 1610 | ||
1611 | /* | 1611 | /* |
@@ -1620,7 +1620,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1620 | return -EACCES; | 1620 | return -EACCES; |
1621 | 1621 | ||
1622 | flag &= ~O_TRUNC; | 1622 | flag &= ~O_TRUNC; |
1623 | } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) | 1623 | } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) |
1624 | return -EROFS; | 1624 | return -EROFS; |
1625 | 1625 | ||
1626 | error = vfs_permission(nd, acc_mode); | 1626 | error = vfs_permission(nd, acc_mode); |
diff --git a/fs/namespace.c b/fs/namespace.c index 06083885b21e..61bf376e29e8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -41,8 +41,8 @@ static struct kmem_cache *mnt_cache __read_mostly; | |||
41 | static struct rw_semaphore namespace_sem; | 41 | static struct rw_semaphore namespace_sem; |
42 | 42 | ||
43 | /* /sys/fs */ | 43 | /* /sys/fs */ |
44 | decl_subsys(fs, NULL, NULL); | 44 | struct kobject *fs_kobj; |
45 | EXPORT_SYMBOL_GPL(fs_subsys); | 45 | EXPORT_SYMBOL_GPL(fs_kobj); |
46 | 46 | ||
47 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 47 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) |
48 | { | 48 | { |
@@ -1861,10 +1861,9 @@ void __init mnt_init(void) | |||
1861 | if (err) | 1861 | if (err) |
1862 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", | 1862 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", |
1863 | __FUNCTION__, err); | 1863 | __FUNCTION__, err); |
1864 | err = subsystem_register(&fs_subsys); | 1864 | fs_kobj = kobject_create_and_add("fs", NULL); |
1865 | if (err) | 1865 | if (!fs_kobj) |
1866 | printk(KERN_WARNING "%s: subsystem_register error: %d\n", | 1866 | printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); |
1867 | __FUNCTION__, err); | ||
1868 | init_rootfs(); | 1867 | init_rootfs(); |
1869 | init_mount_tree(); | 1868 | init_mount_tree(); |
1870 | } | 1869 | } |
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2d116d2298f8..f917fd25858a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -388,8 +388,11 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | |||
388 | * Round the length of the data which was specified up to | 388 | * Round the length of the data which was specified up to |
389 | * the next multiple of XDR units and then compare that | 389 | * the next multiple of XDR units and then compare that |
390 | * against the length which was actually received. | 390 | * against the length which was actually received. |
391 | * Note that when RPCSEC/GSS (for example) is used, the | ||
392 | * data buffer can be padded so dlen might be larger | ||
393 | * than required. It must never be smaller. | ||
391 | */ | 394 | */ |
392 | if (dlen != XDR_QUADLEN(len)*4) | 395 | if (dlen < XDR_QUADLEN(len)*4) |
393 | return 0; | 396 | return 0; |
394 | 397 | ||
395 | if (args->count > max_blocksize) { | 398 | if (args->count > max_blocksize) { |
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 986f9b32083c..b86e3658a0af 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c | |||
@@ -313,8 +313,11 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | |||
313 | * Round the length of the data which was specified up to | 313 | * Round the length of the data which was specified up to |
314 | * the next multiple of XDR units and then compare that | 314 | * the next multiple of XDR units and then compare that |
315 | * against the length which was actually received. | 315 | * against the length which was actually received. |
316 | * Note that when RPCSEC/GSS (for example) is used, the | ||
317 | * data buffer can be padded so dlen might be larger | ||
318 | * than required. It must never be smaller. | ||
316 | */ | 319 | */ |
317 | if (dlen != XDR_QUADLEN(len)*4) | 320 | if (dlen < XDR_QUADLEN(len)*4) |
318 | return 0; | 321 | return 0; |
319 | 322 | ||
320 | rqstp->rq_vec[0].iov_base = (void*)p; | 323 | rqstp->rq_vec[0].iov_base = (void*)p; |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 9fb8132f19b0..4d4ce48bb42c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -19,16 +19,17 @@ ocfs2-objs := \ | |||
19 | ioctl.o \ | 19 | ioctl.o \ |
20 | journal.o \ | 20 | journal.o \ |
21 | localalloc.o \ | 21 | localalloc.o \ |
22 | locks.o \ | ||
22 | mmap.o \ | 23 | mmap.o \ |
23 | namei.o \ | 24 | namei.o \ |
25 | resize.o \ | ||
24 | slot_map.o \ | 26 | slot_map.o \ |
25 | suballoc.o \ | 27 | suballoc.o \ |
26 | super.o \ | 28 | super.o \ |
27 | symlink.o \ | 29 | symlink.o \ |
28 | sysfile.o \ | 30 | sysfile.o \ |
29 | uptodate.o \ | 31 | uptodate.o \ |
30 | ver.o \ | 32 | ver.o |
31 | vote.o | ||
32 | 33 | ||
33 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 34 | obj-$(CONFIG_OCFS2_FS) += cluster/ |
34 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 35 | obj-$(CONFIG_OCFS2_FS) += dlm/ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 23c8cda43f19..e6df06ac6405 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -4731,7 +4731,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
4731 | 4731 | ||
4732 | mutex_lock(&data_alloc_inode->i_mutex); | 4732 | mutex_lock(&data_alloc_inode->i_mutex); |
4733 | 4733 | ||
4734 | status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1); | 4734 | status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1); |
4735 | if (status < 0) { | 4735 | if (status < 0) { |
4736 | mlog_errno(status); | 4736 | mlog_errno(status); |
4737 | goto out_mutex; | 4737 | goto out_mutex; |
@@ -4753,7 +4753,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
4753 | 4753 | ||
4754 | out_unlock: | 4754 | out_unlock: |
4755 | brelse(data_alloc_bh); | 4755 | brelse(data_alloc_bh); |
4756 | ocfs2_meta_unlock(data_alloc_inode, 1); | 4756 | ocfs2_inode_unlock(data_alloc_inode, 1); |
4757 | 4757 | ||
4758 | out_mutex: | 4758 | out_mutex: |
4759 | mutex_unlock(&data_alloc_inode->i_mutex); | 4759 | mutex_unlock(&data_alloc_inode->i_mutex); |
@@ -5077,7 +5077,7 @@ static int ocfs2_free_cached_items(struct ocfs2_super *osb, | |||
5077 | 5077 | ||
5078 | mutex_lock(&inode->i_mutex); | 5078 | mutex_lock(&inode->i_mutex); |
5079 | 5079 | ||
5080 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 5080 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
5081 | if (ret) { | 5081 | if (ret) { |
5082 | mlog_errno(ret); | 5082 | mlog_errno(ret); |
5083 | goto out_mutex; | 5083 | goto out_mutex; |
@@ -5118,7 +5118,7 @@ out_journal: | |||
5118 | ocfs2_commit_trans(osb, handle); | 5118 | ocfs2_commit_trans(osb, handle); |
5119 | 5119 | ||
5120 | out_unlock: | 5120 | out_unlock: |
5121 | ocfs2_meta_unlock(inode, 1); | 5121 | ocfs2_inode_unlock(inode, 1); |
5122 | brelse(di_bh); | 5122 | brelse(di_bh); |
5123 | out_mutex: | 5123 | out_mutex: |
5124 | mutex_unlock(&inode->i_mutex); | 5124 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 56f7790cad46..bc7b4cbbe8ec 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> |
29 | #include <linux/mpage.h> | ||
29 | 30 | ||
30 | #define MLOG_MASK_PREFIX ML_FILE_IO | 31 | #define MLOG_MASK_PREFIX ML_FILE_IO |
31 | #include <cluster/masklog.h> | 32 | #include <cluster/masklog.h> |
@@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
139 | { | 140 | { |
140 | int err = 0; | 141 | int err = 0; |
141 | unsigned int ext_flags; | 142 | unsigned int ext_flags; |
142 | u64 p_blkno, past_eof; | 143 | u64 max_blocks = bh_result->b_size >> inode->i_blkbits; |
144 | u64 p_blkno, count, past_eof; | ||
143 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
144 | 146 | ||
145 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 147 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
@@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
155 | goto bail; | 157 | goto bail; |
156 | } | 158 | } |
157 | 159 | ||
158 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, | 160 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, |
159 | &ext_flags); | 161 | &ext_flags); |
160 | if (err) { | 162 | if (err) { |
161 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 163 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " |
@@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
164 | goto bail; | 166 | goto bail; |
165 | } | 167 | } |
166 | 168 | ||
169 | if (max_blocks < count) | ||
170 | count = max_blocks; | ||
171 | |||
167 | /* | 172 | /* |
168 | * ocfs2 never allocates in this function - the only time we | 173 | * ocfs2 never allocates in this function - the only time we |
169 | * need to use BH_New is when we're extending i_size on a file | 174 | * need to use BH_New is when we're extending i_size on a file |
@@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
178 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 183 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) |
179 | map_bh(bh_result, inode->i_sb, p_blkno); | 184 | map_bh(bh_result, inode->i_sb, p_blkno); |
180 | 185 | ||
186 | bh_result->b_size = count << inode->i_blkbits; | ||
187 | |||
181 | if (!ocfs2_sparse_alloc(osb)) { | 188 | if (!ocfs2_sparse_alloc(osb)) { |
182 | if (p_blkno == 0) { | 189 | if (p_blkno == 0) { |
183 | err = -EIO; | 190 | err = -EIO; |
@@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
210 | struct buffer_head *di_bh) | 217 | struct buffer_head *di_bh) |
211 | { | 218 | { |
212 | void *kaddr; | 219 | void *kaddr; |
213 | unsigned int size; | 220 | loff_t size; |
214 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 221 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
215 | 222 | ||
216 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { | 223 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { |
@@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
224 | if (size > PAGE_CACHE_SIZE || | 231 | if (size > PAGE_CACHE_SIZE || |
225 | size > ocfs2_max_inline_data(inode->i_sb)) { | 232 | size > ocfs2_max_inline_data(inode->i_sb)) { |
226 | ocfs2_error(inode->i_sb, | 233 | ocfs2_error(inode->i_sb, |
227 | "Inode %llu has with inline data has bad size: %u", | 234 | "Inode %llu has with inline data has bad size: %Lu", |
228 | (unsigned long long)OCFS2_I(inode)->ip_blkno, size); | 235 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
236 | (unsigned long long)size); | ||
229 | return -EROFS; | 237 | return -EROFS; |
230 | } | 238 | } |
231 | 239 | ||
@@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
275 | 283 | ||
276 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 284 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); |
277 | 285 | ||
278 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 286 | ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); |
279 | if (ret != 0) { | 287 | if (ret != 0) { |
280 | if (ret == AOP_TRUNCATED_PAGE) | 288 | if (ret == AOP_TRUNCATED_PAGE) |
281 | unlock = 0; | 289 | unlock = 0; |
@@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
285 | 293 | ||
286 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | 294 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { |
287 | ret = AOP_TRUNCATED_PAGE; | 295 | ret = AOP_TRUNCATED_PAGE; |
288 | goto out_meta_unlock; | 296 | goto out_inode_unlock; |
289 | } | 297 | } |
290 | 298 | ||
291 | /* | 299 | /* |
@@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
305 | goto out_alloc; | 313 | goto out_alloc; |
306 | } | 314 | } |
307 | 315 | ||
308 | ret = ocfs2_data_lock_with_page(inode, 0, page); | ||
309 | if (ret != 0) { | ||
310 | if (ret == AOP_TRUNCATED_PAGE) | ||
311 | unlock = 0; | ||
312 | mlog_errno(ret); | ||
313 | goto out_alloc; | ||
314 | } | ||
315 | |||
316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
317 | ret = ocfs2_readpage_inline(inode, page); | 317 | ret = ocfs2_readpage_inline(inode, page); |
318 | else | 318 | else |
319 | ret = block_read_full_page(page, ocfs2_get_block); | 319 | ret = block_read_full_page(page, ocfs2_get_block); |
320 | unlock = 0; | 320 | unlock = 0; |
321 | 321 | ||
322 | ocfs2_data_unlock(inode, 0); | ||
323 | out_alloc: | 322 | out_alloc: |
324 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 323 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
325 | out_meta_unlock: | 324 | out_inode_unlock: |
326 | ocfs2_meta_unlock(inode, 0); | 325 | ocfs2_inode_unlock(inode, 0); |
327 | out: | 326 | out: |
328 | if (unlock) | 327 | if (unlock) |
329 | unlock_page(page); | 328 | unlock_page(page); |
@@ -331,6 +330,62 @@ out: | |||
331 | return ret; | 330 | return ret; |
332 | } | 331 | } |
333 | 332 | ||
333 | /* | ||
334 | * This is used only for read-ahead. Failures or difficult to handle | ||
335 | * situations are safe to ignore. | ||
336 | * | ||
337 | * Right now, we don't bother with BH_Boundary - in-inode extent lists | ||
338 | * are quite large (243 extents on 4k blocks), so most inodes don't | ||
339 | * grow out to a tree. If need be, detecting boundary extents could | ||
340 | * trivially be added in a future version of ocfs2_get_block(). | ||
341 | */ | ||
342 | static int ocfs2_readpages(struct file *filp, struct address_space *mapping, | ||
343 | struct list_head *pages, unsigned nr_pages) | ||
344 | { | ||
345 | int ret, err = -EIO; | ||
346 | struct inode *inode = mapping->host; | ||
347 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
348 | loff_t start; | ||
349 | struct page *last; | ||
350 | |||
351 | /* | ||
352 | * Use the nonblocking flag for the dlm code to avoid page | ||
353 | * lock inversion, but don't bother with retrying. | ||
354 | */ | ||
355 | ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); | ||
356 | if (ret) | ||
357 | return err; | ||
358 | |||
359 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | ||
360 | ocfs2_inode_unlock(inode, 0); | ||
361 | return err; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Don't bother with inline-data. There isn't anything | ||
366 | * to read-ahead in that case anyway... | ||
367 | */ | ||
368 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
369 | goto out_unlock; | ||
370 | |||
371 | /* | ||
372 | * Check whether a remote node truncated this file - we just | ||
373 | * drop out in that case as it's not worth handling here. | ||
374 | */ | ||
375 | last = list_entry(pages->prev, struct page, lru); | ||
376 | start = (loff_t)last->index << PAGE_CACHE_SHIFT; | ||
377 | if (start >= i_size_read(inode)) | ||
378 | goto out_unlock; | ||
379 | |||
380 | err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); | ||
381 | |||
382 | out_unlock: | ||
383 | up_read(&oi->ip_alloc_sem); | ||
384 | ocfs2_inode_unlock(inode, 0); | ||
385 | |||
386 | return err; | ||
387 | } | ||
388 | |||
334 | /* Note: Because we don't support holes, our allocation has | 389 | /* Note: Because we don't support holes, our allocation has |
335 | * already happened (allocation writes zeros to the file data) | 390 | * already happened (allocation writes zeros to the file data) |
336 | * so we don't have to worry about ordered writes in | 391 | * so we don't have to worry about ordered writes in |
@@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
452 | * accessed concurrently from multiple nodes. | 507 | * accessed concurrently from multiple nodes. |
453 | */ | 508 | */ |
454 | if (!INODE_JOURNAL(inode)) { | 509 | if (!INODE_JOURNAL(inode)) { |
455 | err = ocfs2_meta_lock(inode, NULL, 0); | 510 | err = ocfs2_inode_lock(inode, NULL, 0); |
456 | if (err) { | 511 | if (err) { |
457 | if (err != -ENOENT) | 512 | if (err != -ENOENT) |
458 | mlog_errno(err); | 513 | mlog_errno(err); |
@@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
467 | 522 | ||
468 | if (!INODE_JOURNAL(inode)) { | 523 | if (!INODE_JOURNAL(inode)) { |
469 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 524 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
470 | ocfs2_meta_unlock(inode, 0); | 525 | ocfs2_inode_unlock(inode, 0); |
471 | } | 526 | } |
472 | 527 | ||
473 | if (err) { | 528 | if (err) { |
@@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
638 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 693 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
639 | return 0; | 694 | return 0; |
640 | 695 | ||
641 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | ||
642 | /* | ||
643 | * We get PR data locks even for O_DIRECT. This | ||
644 | * allows concurrent O_DIRECT I/O but doesn't let | ||
645 | * O_DIRECT with extending and buffered zeroing writes | ||
646 | * race. If they did race then the buffered zeroing | ||
647 | * could be written back after the O_DIRECT I/O. It's | ||
648 | * one thing to tell people not to mix buffered and | ||
649 | * O_DIRECT writes, but expecting them to understand | ||
650 | * that file extension is also an implicit buffered | ||
651 | * write is too much. By getting the PR we force | ||
652 | * writeback of the buffered zeroing before | ||
653 | * proceeding. | ||
654 | */ | ||
655 | ret = ocfs2_data_lock(inode, 0); | ||
656 | if (ret < 0) { | ||
657 | mlog_errno(ret); | ||
658 | goto out; | ||
659 | } | ||
660 | ocfs2_data_unlock(inode, 0); | ||
661 | } | ||
662 | |||
663 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 696 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
664 | inode->i_sb->s_bdev, iov, offset, | 697 | inode->i_sb->s_bdev, iov, offset, |
665 | nr_segs, | 698 | nr_segs, |
666 | ocfs2_direct_IO_get_blocks, | 699 | ocfs2_direct_IO_get_blocks, |
667 | ocfs2_dio_end_io); | 700 | ocfs2_dio_end_io); |
668 | out: | 701 | |
669 | mlog_exit(ret); | 702 | mlog_exit(ret); |
670 | return ret; | 703 | return ret; |
671 | } | 704 | } |
@@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1754 | struct buffer_head *di_bh = NULL; | 1787 | struct buffer_head *di_bh = NULL; |
1755 | struct inode *inode = mapping->host; | 1788 | struct inode *inode = mapping->host; |
1756 | 1789 | ||
1757 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1790 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
1758 | if (ret) { | 1791 | if (ret) { |
1759 | mlog_errno(ret); | 1792 | mlog_errno(ret); |
1760 | return ret; | 1793 | return ret; |
@@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1769 | */ | 1802 | */ |
1770 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1803 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1771 | 1804 | ||
1772 | ret = ocfs2_data_lock(inode, 1); | ||
1773 | if (ret) { | ||
1774 | mlog_errno(ret); | ||
1775 | goto out_fail; | ||
1776 | } | ||
1777 | |||
1778 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1805 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, |
1779 | fsdata, di_bh, NULL); | 1806 | fsdata, di_bh, NULL); |
1780 | if (ret) { | 1807 | if (ret) { |
1781 | mlog_errno(ret); | 1808 | mlog_errno(ret); |
1782 | goto out_fail_data; | 1809 | goto out_fail; |
1783 | } | 1810 | } |
1784 | 1811 | ||
1785 | brelse(di_bh); | 1812 | brelse(di_bh); |
1786 | 1813 | ||
1787 | return 0; | 1814 | return 0; |
1788 | 1815 | ||
1789 | out_fail_data: | ||
1790 | ocfs2_data_unlock(inode, 1); | ||
1791 | out_fail: | 1816 | out_fail: |
1792 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1817 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1793 | 1818 | ||
1794 | brelse(di_bh); | 1819 | brelse(di_bh); |
1795 | ocfs2_meta_unlock(inode, 1); | 1820 | ocfs2_inode_unlock(inode, 1); |
1796 | 1821 | ||
1797 | return ret; | 1822 | return ret; |
1798 | } | 1823 | } |
@@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
1908 | 1933 | ||
1909 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); | 1934 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); |
1910 | 1935 | ||
1911 | ocfs2_data_unlock(inode, 1); | ||
1912 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1936 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1913 | ocfs2_meta_unlock(inode, 1); | 1937 | ocfs2_inode_unlock(inode, 1); |
1914 | 1938 | ||
1915 | return ret; | 1939 | return ret; |
1916 | } | 1940 | } |
1917 | 1941 | ||
1918 | const struct address_space_operations ocfs2_aops = { | 1942 | const struct address_space_operations ocfs2_aops = { |
1919 | .readpage = ocfs2_readpage, | 1943 | .readpage = ocfs2_readpage, |
1944 | .readpages = ocfs2_readpages, | ||
1920 | .writepage = ocfs2_writepage, | 1945 | .writepage = ocfs2_writepage, |
1921 | .write_begin = ocfs2_write_begin, | 1946 | .write_begin = ocfs2_write_begin, |
1922 | .write_end = ocfs2_write_end, | 1947 | .write_end = ocfs2_write_end, |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index c9037414f4f6..f136639f5b41 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
79 | * information for this bh as it's not marked locally | 79 | * information for this bh as it's not marked locally |
80 | * uptodate. */ | 80 | * uptodate. */ |
81 | ret = -EIO; | 81 | ret = -EIO; |
82 | brelse(bh); | 82 | put_bh(bh); |
83 | } | 83 | } |
84 | 84 | ||
85 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 85 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
@@ -256,7 +256,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
256 | * for this bh as it's not marked locally | 256 | * for this bh as it's not marked locally |
257 | * uptodate. */ | 257 | * uptodate. */ |
258 | status = -EIO; | 258 | status = -EIO; |
259 | brelse(bh); | 259 | put_bh(bh); |
260 | bhs[i] = NULL; | 260 | bhs[i] = NULL; |
261 | continue; | 261 | continue; |
262 | } | 262 | } |
@@ -280,3 +280,64 @@ bail: | |||
280 | mlog_exit(status); | 280 | mlog_exit(status); |
281 | return status; | 281 | return status; |
282 | } | 282 | } |
283 | |||
284 | /* Check whether the blkno is the super block or one of the backups. */ | ||
285 | static void ocfs2_check_super_or_backup(struct super_block *sb, | ||
286 | sector_t blkno) | ||
287 | { | ||
288 | int i; | ||
289 | u64 backup_blkno; | ||
290 | |||
291 | if (blkno == OCFS2_SUPER_BLOCK_BLKNO) | ||
292 | return; | ||
293 | |||
294 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
295 | backup_blkno = ocfs2_backup_super_blkno(sb, i); | ||
296 | if (backup_blkno == blkno) | ||
297 | return; | ||
298 | } | ||
299 | |||
300 | BUG(); | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * Write super block and backups doesn't need to collaborate with journal, | ||
305 | * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed | ||
306 | * into this function. | ||
307 | */ | ||
308 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | ||
309 | struct buffer_head *bh) | ||
310 | { | ||
311 | int ret = 0; | ||
312 | |||
313 | mlog_entry_void(); | ||
314 | |||
315 | BUG_ON(buffer_jbd(bh)); | ||
316 | ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); | ||
317 | |||
318 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { | ||
319 | ret = -EROFS; | ||
320 | goto out; | ||
321 | } | ||
322 | |||
323 | lock_buffer(bh); | ||
324 | set_buffer_uptodate(bh); | ||
325 | |||
326 | /* remove from dirty list before I/O. */ | ||
327 | clear_buffer_dirty(bh); | ||
328 | |||
329 | get_bh(bh); /* for end_buffer_write_sync() */ | ||
330 | bh->b_end_io = end_buffer_write_sync; | ||
331 | submit_bh(WRITE, bh); | ||
332 | |||
333 | wait_on_buffer(bh); | ||
334 | |||
335 | if (!buffer_uptodate(bh)) { | ||
336 | ret = -EIO; | ||
337 | put_bh(bh); | ||
338 | } | ||
339 | |||
340 | out: | ||
341 | mlog_exit(ret); | ||
342 | return ret; | ||
343 | } | ||
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 6cc20930fac3..c2e78614c3e5 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
@@ -47,6 +47,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, | |||
47 | int flags, | 47 | int flags, |
48 | struct inode *inode); | 48 | struct inode *inode); |
49 | 49 | ||
50 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | ||
51 | struct buffer_head *bh); | ||
50 | 52 | ||
51 | #define OCFS2_BH_CACHED 1 | 53 | #define OCFS2_BH_CACHED 1 |
52 | #define OCFS2_BH_READAHEAD 8 | 54 | #define OCFS2_BH_READAHEAD 8 |
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 35397dd5ecdb..e511339886b3 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -35,7 +35,7 @@ | |||
35 | #define O2HB_LIVE_THRESHOLD 2 | 35 | #define O2HB_LIVE_THRESHOLD 2 |
36 | /* number of equal samples to be seen as dead */ | 36 | /* number of equal samples to be seen as dead */ |
37 | extern unsigned int o2hb_dead_threshold; | 37 | extern unsigned int o2hb_dead_threshold; |
38 | #define O2HB_DEFAULT_DEAD_THRESHOLD 7 | 38 | #define O2HB_DEFAULT_DEAD_THRESHOLD 31 |
39 | /* Otherwise MAX_WRITE_TIMEOUT will be zero... */ | 39 | /* Otherwise MAX_WRITE_TIMEOUT will be zero... */ |
40 | #define O2HB_MIN_DEAD_THRESHOLD 2 | 40 | #define O2HB_MIN_DEAD_THRESHOLD 2 |
41 | #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1)) | 41 | #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1)) |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index a4882c8df945..23c732f27529 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -146,7 +146,7 @@ static struct kset mlog_kset = { | |||
146 | .kobj = {.ktype = &mlog_ktype}, | 146 | .kobj = {.ktype = &mlog_ktype}, |
147 | }; | 147 | }; |
148 | 148 | ||
149 | int mlog_sys_init(struct kset *o2cb_subsys) | 149 | int mlog_sys_init(struct kset *o2cb_kset) |
150 | { | 150 | { |
151 | int i = 0; | 151 | int i = 0; |
152 | 152 | ||
@@ -157,7 +157,7 @@ int mlog_sys_init(struct kset *o2cb_subsys) | |||
157 | mlog_attr_ptrs[i] = NULL; | 157 | mlog_attr_ptrs[i] = NULL; |
158 | 158 | ||
159 | kobject_set_name(&mlog_kset.kobj, "logmask"); | 159 | kobject_set_name(&mlog_kset.kobj, "logmask"); |
160 | kobj_set_kset_s(&mlog_kset, *o2cb_subsys); | 160 | mlog_kset.kobj.kset = o2cb_kset; |
161 | return kset_register(&mlog_kset); | 161 | return kset_register(&mlog_kset); |
162 | } | 162 | } |
163 | 163 | ||
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 64f6f378fd09..a4b07730b2e1 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
@@ -28,96 +28,55 @@ | |||
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/kobject.h> | 29 | #include <linux/kobject.h> |
30 | #include <linux/sysfs.h> | 30 | #include <linux/sysfs.h> |
31 | #include <linux/fs.h> | ||
31 | 32 | ||
32 | #include "ocfs2_nodemanager.h" | 33 | #include "ocfs2_nodemanager.h" |
33 | #include "masklog.h" | 34 | #include "masklog.h" |
34 | #include "sys.h" | 35 | #include "sys.h" |
35 | 36 | ||
36 | struct o2cb_attribute { | ||
37 | struct attribute attr; | ||
38 | ssize_t (*show)(char *buf); | ||
39 | ssize_t (*store)(const char *buf, size_t count); | ||
40 | }; | ||
41 | |||
42 | #define O2CB_ATTR(_name, _mode, _show, _store) \ | ||
43 | struct o2cb_attribute o2cb_attr_##_name = __ATTR(_name, _mode, _show, _store) | ||
44 | |||
45 | #define to_o2cb_attr(_attr) container_of(_attr, struct o2cb_attribute, attr) | ||
46 | 37 | ||
47 | static ssize_t o2cb_interface_revision_show(char *buf) | 38 | static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr, |
39 | char *buf) | ||
48 | { | 40 | { |
49 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); | 41 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); |
50 | } | 42 | } |
51 | 43 | static struct kobj_attribute attr_version = | |
52 | static O2CB_ATTR(interface_revision, S_IFREG | S_IRUGO, o2cb_interface_revision_show, NULL); | 44 | __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL); |
53 | 45 | ||
54 | static struct attribute *o2cb_attrs[] = { | 46 | static struct attribute *o2cb_attrs[] = { |
55 | &o2cb_attr_interface_revision.attr, | 47 | &attr_version.attr, |
56 | NULL, | 48 | NULL, |
57 | }; | 49 | }; |
58 | 50 | ||
59 | static ssize_t | 51 | static struct attribute_group o2cb_attr_group = { |
60 | o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer); | 52 | .attrs = o2cb_attrs, |
61 | static ssize_t | ||
62 | o2cb_store(struct kobject * kobj, struct attribute * attr, | ||
63 | const char * buffer, size_t count); | ||
64 | static struct sysfs_ops o2cb_sysfs_ops = { | ||
65 | .show = o2cb_show, | ||
66 | .store = o2cb_store, | ||
67 | }; | 53 | }; |
68 | 54 | ||
69 | static struct kobj_type o2cb_subsys_type = { | 55 | static struct kset *o2cb_kset; |
70 | .default_attrs = o2cb_attrs, | ||
71 | .sysfs_ops = &o2cb_sysfs_ops, | ||
72 | }; | ||
73 | |||
74 | /* gives us o2cb_subsys */ | ||
75 | static decl_subsys(o2cb, NULL, NULL); | ||
76 | |||
77 | static ssize_t | ||
78 | o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer) | ||
79 | { | ||
80 | struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); | ||
81 | struct kset *sbs = to_kset(kobj); | ||
82 | |||
83 | BUG_ON(sbs != &o2cb_subsys); | ||
84 | |||
85 | if (o2cb_attr->show) | ||
86 | return o2cb_attr->show(buffer); | ||
87 | return -EIO; | ||
88 | } | ||
89 | |||
90 | static ssize_t | ||
91 | o2cb_store(struct kobject * kobj, struct attribute * attr, | ||
92 | const char * buffer, size_t count) | ||
93 | { | ||
94 | struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); | ||
95 | struct kset *sbs = to_kset(kobj); | ||
96 | |||
97 | BUG_ON(sbs != &o2cb_subsys); | ||
98 | |||
99 | if (o2cb_attr->store) | ||
100 | return o2cb_attr->store(buffer, count); | ||
101 | return -EIO; | ||
102 | } | ||
103 | 56 | ||
104 | void o2cb_sys_shutdown(void) | 57 | void o2cb_sys_shutdown(void) |
105 | { | 58 | { |
106 | mlog_sys_shutdown(); | 59 | mlog_sys_shutdown(); |
107 | subsystem_unregister(&o2cb_subsys); | 60 | kset_unregister(o2cb_kset); |
108 | } | 61 | } |
109 | 62 | ||
110 | int o2cb_sys_init(void) | 63 | int o2cb_sys_init(void) |
111 | { | 64 | { |
112 | int ret; | 65 | int ret; |
113 | 66 | ||
114 | o2cb_subsys.kobj.ktype = &o2cb_subsys_type; | 67 | o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj); |
115 | ret = subsystem_register(&o2cb_subsys); | 68 | if (!o2cb_kset) |
69 | return -ENOMEM; | ||
70 | |||
71 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); | ||
116 | if (ret) | 72 | if (ret) |
117 | return ret; | 73 | goto error; |
118 | 74 | ||
119 | ret = mlog_sys_init(&o2cb_subsys); | 75 | ret = mlog_sys_init(o2cb_kset); |
120 | if (ret) | 76 | if (ret) |
121 | subsystem_unregister(&o2cb_subsys); | 77 | goto error; |
78 | return 0; | ||
79 | error: | ||
80 | kset_unregister(o2cb_kset); | ||
122 | return ret; | 81 | return ret; |
123 | } | 82 | } |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index da880fc215f0..f36f66aab3dd 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -60,8 +60,8 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data, | |||
60 | /* same as hb delay, we're waiting for another node to recognize our hb */ | 60 | /* same as hb delay, we're waiting for another node to recognize our hb */ |
61 | #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 | 61 | #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 |
62 | 62 | ||
63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000 | 63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 |
64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000 | 64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 |
65 | 65 | ||
66 | 66 | ||
67 | /* TODO: figure this out.... */ | 67 | /* TODO: figure this out.... */ |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 9606111fe89d..b2e832aca567 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -38,6 +38,12 @@ | |||
38 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should |
39 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. |
40 | * | 40 | * |
41 | * New in version 10: | ||
42 | * - Meta/data locks combined | ||
43 | * | ||
44 | * New in version 9: | ||
45 | * - All votes removed | ||
46 | * | ||
41 | * New in version 8: | 47 | * New in version 8: |
42 | * - Replace delete inode votes with a cluster lock | 48 | * - Replace delete inode votes with a cluster lock |
43 | * | 49 | * |
@@ -60,7 +66,7 @@ | |||
60 | * - full 64 bit i_size in the metadata lock lvbs | 66 | * - full 64 bit i_size in the metadata lock lvbs |
61 | * - introduction of "rw" lock and pushing meta/data locking down | 67 | * - introduction of "rw" lock and pushing meta/data locking down |
62 | */ | 68 | */ |
63 | #define O2NET_PROTOCOL_VERSION 8ULL | 69 | #define O2NET_PROTOCOL_VERSION 10ULL |
64 | struct o2net_handshake { | 70 | struct o2net_handshake { |
65 | __be64 protocol_version; | 71 | __be64 protocol_version; |
66 | __be64 connector_id; | 72 | __be64 connector_id; |
diff --git a/fs/ocfs2/cluster/ver.c b/fs/ocfs2/cluster/ver.c index 7286c48bb30d..a56eee6abad3 100644 --- a/fs/ocfs2/cluster/ver.c +++ b/fs/ocfs2/cluster/ver.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | #include "ver.h" | 29 | #include "ver.h" |
30 | 30 | ||
31 | #define CLUSTER_BUILD_VERSION "1.3.3" | 31 | #define CLUSTER_BUILD_VERSION "1.5.0" |
32 | 32 | ||
33 | #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION |
34 | 34 | ||
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 9923278ea6d4..b1cc7c381e88 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry, | |||
128 | /* | 128 | /* |
129 | * Walk the inode alias list, and find a dentry which has a given | 129 | * Walk the inode alias list, and find a dentry which has a given |
130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | 130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it |
131 | * is looking for a dentry_lock reference. The vote thread is looking | 131 | * is looking for a dentry_lock reference. The downconvert thread is |
132 | * to unhash aliases, so we allow it to skip any that already have | 132 | * looking to unhash aliases, so we allow it to skip any that already |
133 | * that property. | 133 | * have that property. |
134 | */ | 134 | */ |
135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | 135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, |
136 | u64 parent_blkno, | 136 | u64 parent_blkno, |
@@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
266 | dl->dl_count = 0; | 266 | dl->dl_count = 0; |
267 | /* | 267 | /* |
268 | * Does this have to happen below, for all attaches, in case | 268 | * Does this have to happen below, for all attaches, in case |
269 | * the struct inode gets blown away by votes? | 269 | * the struct inode gets blown away by the downconvert thread? |
270 | */ | 270 | */ |
271 | dl->dl_inode = igrab(inode); | 271 | dl->dl_inode = igrab(inode); |
272 | dl->dl_parent_blkno = parent_blkno; | 272 | dl->dl_parent_blkno = parent_blkno; |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 63b28fdceb4a..6b0107f21344 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -846,14 +846,14 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
846 | mlog_entry("dirino=%llu\n", | 846 | mlog_entry("dirino=%llu\n", |
847 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 847 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
848 | 848 | ||
849 | error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 849 | error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
850 | if (lock_level && error >= 0) { | 850 | if (lock_level && error >= 0) { |
851 | /* We release EX lock which used to update atime | 851 | /* We release EX lock which used to update atime |
852 | * and get PR lock again to reduce contention | 852 | * and get PR lock again to reduce contention |
853 | * on commonly accessed directories. */ | 853 | * on commonly accessed directories. */ |
854 | ocfs2_meta_unlock(inode, 1); | 854 | ocfs2_inode_unlock(inode, 1); |
855 | lock_level = 0; | 855 | lock_level = 0; |
856 | error = ocfs2_meta_lock(inode, NULL, 0); | 856 | error = ocfs2_inode_lock(inode, NULL, 0); |
857 | } | 857 | } |
858 | if (error < 0) { | 858 | if (error < 0) { |
859 | if (error != -ENOENT) | 859 | if (error != -ENOENT) |
@@ -865,7 +865,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
865 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, | 865 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, |
866 | dirent, filldir, NULL); | 866 | dirent, filldir, NULL); |
867 | 867 | ||
868 | ocfs2_meta_unlock(inode, lock_level); | 868 | ocfs2_inode_unlock(inode, lock_level); |
869 | 869 | ||
870 | bail_nolock: | 870 | bail_nolock: |
871 | mlog_exit(error); | 871 | mlog_exit(error); |
diff --git a/fs/ocfs2/dlm/dlmfsver.c b/fs/ocfs2/dlm/dlmfsver.c index d2be3ad841f9..a733b3321f83 100644 --- a/fs/ocfs2/dlm/dlmfsver.c +++ b/fs/ocfs2/dlm/dlmfsver.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | #include "dlmfsver.h" | 29 | #include "dlmfsver.h" |
30 | 30 | ||
31 | #define DLM_BUILD_VERSION "1.3.3" | 31 | #define DLM_BUILD_VERSION "1.5.0" |
32 | 32 | ||
33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION |
34 | 34 | ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 2fde7bf91434..91f747b8a538 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -2270,6 +2270,12 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
2270 | } | 2270 | } |
2271 | } | 2271 | } |
2272 | 2272 | ||
2273 | /* Clean up join state on node death. */ | ||
2274 | if (dlm->joining_node == idx) { | ||
2275 | mlog(0, "Clearing join state for node %u\n", idx); | ||
2276 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | ||
2277 | } | ||
2278 | |||
2273 | /* check to see if the node is already considered dead */ | 2279 | /* check to see if the node is already considered dead */ |
2274 | if (!test_bit(idx, dlm->live_nodes_map)) { | 2280 | if (!test_bit(idx, dlm->live_nodes_map)) { |
2275 | mlog(0, "for domain %s, node %d is already dead. " | 2281 | mlog(0, "for domain %s, node %d is already dead. " |
@@ -2288,12 +2294,6 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
2288 | 2294 | ||
2289 | clear_bit(idx, dlm->live_nodes_map); | 2295 | clear_bit(idx, dlm->live_nodes_map); |
2290 | 2296 | ||
2291 | /* Clean up join state on node death. */ | ||
2292 | if (dlm->joining_node == idx) { | ||
2293 | mlog(0, "Clearing join state for node %u\n", idx); | ||
2294 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | ||
2295 | } | ||
2296 | |||
2297 | /* make sure local cleanup occurs before the heartbeat events */ | 2297 | /* make sure local cleanup occurs before the heartbeat events */ |
2298 | if (!test_bit(idx, dlm->recovery_map)) | 2298 | if (!test_bit(idx, dlm->recovery_map)) |
2299 | dlm_do_local_recovery_cleanup(dlm, idx); | 2299 | dlm_do_local_recovery_cleanup(dlm, idx); |
@@ -2321,6 +2321,13 @@ void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data) | |||
2321 | if (!dlm_grab(dlm)) | 2321 | if (!dlm_grab(dlm)) |
2322 | return; | 2322 | return; |
2323 | 2323 | ||
2324 | /* | ||
2325 | * This will notify any dlm users that a node in our domain | ||
2326 | * went away without notifying us first. | ||
2327 | */ | ||
2328 | if (test_bit(idx, dlm->domain_map)) | ||
2329 | dlm_fire_domain_eviction_callbacks(dlm, idx); | ||
2330 | |||
2324 | spin_lock(&dlm->spinlock); | 2331 | spin_lock(&dlm->spinlock); |
2325 | __dlm_hb_node_down(dlm, idx); | 2332 | __dlm_hb_node_down(dlm, idx); |
2326 | spin_unlock(&dlm->spinlock); | 2333 | spin_unlock(&dlm->spinlock); |
diff --git a/fs/ocfs2/dlm/dlmver.c b/fs/ocfs2/dlm/dlmver.c index 7ef2653f8f41..dfc0da4d158d 100644 --- a/fs/ocfs2/dlm/dlmver.c +++ b/fs/ocfs2/dlm/dlmver.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | #include "dlmver.h" | 29 | #include "dlmver.h" |
30 | 30 | ||
31 | #define DLM_BUILD_VERSION "1.3.3" | 31 | #define DLM_BUILD_VERSION "1.5.0" |
32 | 32 | ||
33 | #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION |
34 | 34 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4e97dcceaf8f..3867244fb144 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include "slot_map.h" | 55 | #include "slot_map.h" |
56 | #include "super.h" | 56 | #include "super.h" |
57 | #include "uptodate.h" | 57 | #include "uptodate.h" |
58 | #include "vote.h" | ||
59 | 58 | ||
60 | #include "buffer_head_io.h" | 59 | #include "buffer_head_io.h" |
61 | 60 | ||
@@ -69,6 +68,7 @@ struct ocfs2_mask_waiter { | |||
69 | 68 | ||
70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 69 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 70 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | ||
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Return value from ->downconvert_worker functions. | 74 | * Return value from ->downconvert_worker functions. |
@@ -153,10 +153,10 @@ struct ocfs2_lock_res_ops { | |||
153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Optionally called in the downconvert (or "vote") thread | 156 | * Optionally called in the downconvert thread after a |
157 | * after a successful downconvert. The lockres will not be | 157 | * successful downconvert. The lockres will not be referenced |
158 | * referenced after this callback is called, so it is safe to | 158 | * after this callback is called, so it is safe to free |
159 | * free memory, etc. | 159 | * memory, etc. |
160 | * | 160 | * |
161 | * The exact semantics of when this is called are controlled | 161 | * The exact semantics of when this is called are controlled |
162 | * by ->downconvert_worker() | 162 | * by ->downconvert_worker() |
@@ -225,17 +225,12 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | |||
225 | .flags = 0, | 225 | .flags = 0, |
226 | }; | 226 | }; |
227 | 227 | ||
228 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 228 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { |
229 | .get_osb = ocfs2_get_inode_osb, | 229 | .get_osb = ocfs2_get_inode_osb, |
230 | .check_downconvert = ocfs2_check_meta_downconvert, | 230 | .check_downconvert = ocfs2_check_meta_downconvert, |
231 | .set_lvb = ocfs2_set_meta_lvb, | 231 | .set_lvb = ocfs2_set_meta_lvb, |
232 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | ||
233 | }; | ||
234 | |||
235 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | ||
236 | .get_osb = ocfs2_get_inode_osb, | ||
237 | .downconvert_worker = ocfs2_data_convert_worker, | 232 | .downconvert_worker = ocfs2_data_convert_worker, |
238 | .flags = 0, | 233 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
239 | }; | 234 | }; |
240 | 235 | ||
241 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 236 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { |
@@ -258,10 +253,14 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | |||
258 | .flags = 0, | 253 | .flags = 0, |
259 | }; | 254 | }; |
260 | 255 | ||
256 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | ||
257 | .get_osb = ocfs2_get_file_osb, | ||
258 | .flags = 0, | ||
259 | }; | ||
260 | |||
261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
262 | { | 262 | { |
263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
264 | lockres->l_type == OCFS2_LOCK_TYPE_DATA || | ||
265 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 264 | lockres->l_type == OCFS2_LOCK_TYPE_RW || |
266 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 265 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; |
267 | } | 266 | } |
@@ -310,12 +309,24 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
310 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 309 | "resource %s: %s\n", dlm_errname(_stat), _func, \ |
311 | _lockres->l_name, dlm_errmsg(_stat)); \ | 310 | _lockres->l_name, dlm_errmsg(_stat)); \ |
312 | } while (0) | 311 | } while (0) |
313 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 312 | static int ocfs2_downconvert_thread(void *arg); |
314 | struct ocfs2_lock_res *lockres); | 313 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
315 | static int ocfs2_meta_lock_update(struct inode *inode, | 314 | struct ocfs2_lock_res *lockres); |
315 | static int ocfs2_inode_lock_update(struct inode *inode, | ||
316 | struct buffer_head **bh); | 316 | struct buffer_head **bh); |
317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
318 | static inline int ocfs2_highest_compat_lock_level(int level); | 318 | static inline int ocfs2_highest_compat_lock_level(int level); |
319 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | ||
320 | int new_level); | ||
321 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | ||
322 | struct ocfs2_lock_res *lockres, | ||
323 | int new_level, | ||
324 | int lvb); | ||
325 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | ||
326 | struct ocfs2_lock_res *lockres); | ||
327 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | ||
328 | struct ocfs2_lock_res *lockres); | ||
329 | |||
319 | 330 | ||
320 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 331 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
321 | u64 blkno, | 332 | u64 blkno, |
@@ -402,10 +413,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
402 | ops = &ocfs2_inode_rw_lops; | 413 | ops = &ocfs2_inode_rw_lops; |
403 | break; | 414 | break; |
404 | case OCFS2_LOCK_TYPE_META: | 415 | case OCFS2_LOCK_TYPE_META: |
405 | ops = &ocfs2_inode_meta_lops; | 416 | ops = &ocfs2_inode_inode_lops; |
406 | break; | ||
407 | case OCFS2_LOCK_TYPE_DATA: | ||
408 | ops = &ocfs2_inode_data_lops; | ||
409 | break; | 417 | break; |
410 | case OCFS2_LOCK_TYPE_OPEN: | 418 | case OCFS2_LOCK_TYPE_OPEN: |
411 | ops = &ocfs2_inode_open_lops; | 419 | ops = &ocfs2_inode_open_lops; |
@@ -428,6 +436,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | |||
428 | return OCFS2_SB(inode->i_sb); | 436 | return OCFS2_SB(inode->i_sb); |
429 | } | 437 | } |
430 | 438 | ||
439 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | ||
440 | { | ||
441 | struct ocfs2_file_private *fp = lockres->l_priv; | ||
442 | |||
443 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | ||
444 | } | ||
445 | |||
431 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 446 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) |
432 | { | 447 | { |
433 | __be64 inode_blkno_be; | 448 | __be64 inode_blkno_be; |
@@ -508,6 +523,21 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
508 | &ocfs2_rename_lops, osb); | 523 | &ocfs2_rename_lops, osb); |
509 | } | 524 | } |
510 | 525 | ||
526 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
527 | struct ocfs2_file_private *fp) | ||
528 | { | ||
529 | struct inode *inode = fp->fp_file->f_mapping->host; | ||
530 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
531 | |||
532 | ocfs2_lock_res_init_once(lockres); | ||
533 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | ||
534 | inode->i_generation, lockres->l_name); | ||
535 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
536 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | ||
537 | fp); | ||
538 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | ||
539 | } | ||
540 | |||
511 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 541 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) |
512 | { | 542 | { |
513 | mlog_entry_void(); | 543 | mlog_entry_void(); |
@@ -724,6 +754,13 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
724 | lockres->l_name, level, lockres->l_level, | 754 | lockres->l_name, level, lockres->l_level, |
725 | ocfs2_lock_type_string(lockres->l_type)); | 755 | ocfs2_lock_type_string(lockres->l_type)); |
726 | 756 | ||
757 | /* | ||
758 | * We can skip the bast for locks which don't enable caching - | ||
759 | * they'll be dropped at the earliest possible time anyway. | ||
760 | */ | ||
761 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | ||
762 | return; | ||
763 | |||
727 | spin_lock_irqsave(&lockres->l_lock, flags); | 764 | spin_lock_irqsave(&lockres->l_lock, flags); |
728 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 765 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
729 | if (needs_downconvert) | 766 | if (needs_downconvert) |
@@ -732,7 +769,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
732 | 769 | ||
733 | wake_up(&lockres->l_event); | 770 | wake_up(&lockres->l_event); |
734 | 771 | ||
735 | ocfs2_kick_vote_thread(osb); | 772 | ocfs2_wake_downconvert_thread(osb); |
736 | } | 773 | } |
737 | 774 | ||
738 | static void ocfs2_locking_ast(void *opaque) | 775 | static void ocfs2_locking_ast(void *opaque) |
@@ -935,6 +972,21 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | |||
935 | 972 | ||
936 | } | 973 | } |
937 | 974 | ||
975 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | ||
976 | struct ocfs2_lock_res *lockres) | ||
977 | { | ||
978 | int ret; | ||
979 | |||
980 | ret = wait_for_completion_interruptible(&mw->mw_complete); | ||
981 | if (ret) | ||
982 | lockres_remove_mask_waiter(lockres, mw); | ||
983 | else | ||
984 | ret = mw->mw_status; | ||
985 | /* Re-arm the completion in case we want to wait on it again */ | ||
986 | INIT_COMPLETION(mw->mw_complete); | ||
987 | return ret; | ||
988 | } | ||
989 | |||
938 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 990 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
939 | struct ocfs2_lock_res *lockres, | 991 | struct ocfs2_lock_res *lockres, |
940 | int level, | 992 | int level, |
@@ -1089,7 +1141,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
1089 | mlog_entry_void(); | 1141 | mlog_entry_void(); |
1090 | spin_lock_irqsave(&lockres->l_lock, flags); | 1142 | spin_lock_irqsave(&lockres->l_lock, flags); |
1091 | ocfs2_dec_holders(lockres, level); | 1143 | ocfs2_dec_holders(lockres, level); |
1092 | ocfs2_vote_on_unlock(osb, lockres); | 1144 | ocfs2_downconvert_on_unlock(osb, lockres); |
1093 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1145 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1094 | mlog_exit_void(); | 1146 | mlog_exit_void(); |
1095 | } | 1147 | } |
@@ -1147,13 +1199,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
1147 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1199 | * We don't want to use LKM_LOCAL on a meta data lock as they |
1148 | * don't use a generation in their lock names. | 1200 | * don't use a generation in their lock names. |
1149 | */ | 1201 | */ |
1150 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | 1202 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
1151 | if (ret) { | ||
1152 | mlog_errno(ret); | ||
1153 | goto bail; | ||
1154 | } | ||
1155 | |||
1156 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); | ||
1157 | if (ret) { | 1203 | if (ret) { |
1158 | mlog_errno(ret); | 1204 | mlog_errno(ret); |
1159 | goto bail; | 1205 | goto bail; |
@@ -1311,76 +1357,221 @@ out: | |||
1311 | mlog_exit_void(); | 1357 | mlog_exit_void(); |
1312 | } | 1358 | } |
1313 | 1359 | ||
1314 | int ocfs2_data_lock_full(struct inode *inode, | 1360 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, |
1315 | int write, | 1361 | int level) |
1316 | int arg_flags) | ||
1317 | { | 1362 | { |
1318 | int status = 0, level; | 1363 | int ret; |
1319 | struct ocfs2_lock_res *lockres; | 1364 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1320 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1365 | unsigned long flags; |
1366 | struct ocfs2_mask_waiter mw; | ||
1321 | 1367 | ||
1322 | BUG_ON(!inode); | 1368 | ocfs2_init_mask_waiter(&mw); |
1323 | 1369 | ||
1324 | mlog_entry_void(); | 1370 | retry_cancel: |
1371 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1372 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
1373 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
1374 | if (ret) { | ||
1375 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1376 | ret = ocfs2_cancel_convert(osb, lockres); | ||
1377 | if (ret < 0) { | ||
1378 | mlog_errno(ret); | ||
1379 | goto out; | ||
1380 | } | ||
1381 | goto retry_cancel; | ||
1382 | } | ||
1383 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1384 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1325 | 1385 | ||
1326 | mlog(0, "inode %llu take %s DATA lock\n", | 1386 | ocfs2_wait_for_mask(&mw); |
1327 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1387 | goto retry_cancel; |
1328 | write ? "EXMODE" : "PRMODE"); | 1388 | } |
1329 | 1389 | ||
1330 | /* We'll allow faking a readonly data lock for | 1390 | ret = -ERESTARTSYS; |
1331 | * rodevices. */ | 1391 | /* |
1332 | if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) { | 1392 | * We may still have gotten the lock, in which case there's no |
1333 | if (write) { | 1393 | * point to restarting the syscall. |
1334 | status = -EROFS; | 1394 | */ |
1335 | mlog_errno(status); | 1395 | if (lockres->l_level == level) |
1396 | ret = 0; | ||
1397 | |||
1398 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | ||
1399 | lockres->l_flags, lockres->l_level, lockres->l_action); | ||
1400 | |||
1401 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1402 | |||
1403 | out: | ||
1404 | return ret; | ||
1405 | } | ||
1406 | |||
1407 | /* | ||
1408 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | ||
1409 | * flock() calls. The locking approach this requires is sufficiently | ||
1410 | * different from all other cluster lock types that we implement a | ||
1411 | * seperate path to the "low-level" dlm calls. In particular: | ||
1412 | * | ||
1413 | * - No optimization of lock levels is done - we take at exactly | ||
1414 | * what's been requested. | ||
1415 | * | ||
1416 | * - No lock caching is employed. We immediately downconvert to | ||
1417 | * no-lock at unlock time. This also means flock locks never go on | ||
1418 | * the blocking list). | ||
1419 | * | ||
1420 | * - Since userspace can trivially deadlock itself with flock, we make | ||
1421 | * sure to allow cancellation of a misbehaving applications flock() | ||
1422 | * request. | ||
1423 | * | ||
1424 | * - Access to any flock lockres doesn't require concurrency, so we | ||
1425 | * can simplify the code by requiring the caller to guarantee | ||
1426 | * serialization of dlmglue flock calls. | ||
1427 | */ | ||
1428 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | ||
1429 | { | ||
1430 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
1431 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | ||
1432 | unsigned long flags; | ||
1433 | struct ocfs2_file_private *fp = file->private_data; | ||
1434 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1435 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1436 | struct ocfs2_mask_waiter mw; | ||
1437 | |||
1438 | ocfs2_init_mask_waiter(&mw); | ||
1439 | |||
1440 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | ||
1441 | (lockres->l_level > LKM_NLMODE)) { | ||
1442 | mlog(ML_ERROR, | ||
1443 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | ||
1444 | "level: %u\n", lockres->l_name, lockres->l_flags, | ||
1445 | lockres->l_level); | ||
1446 | return -EINVAL; | ||
1447 | } | ||
1448 | |||
1449 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1450 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | ||
1451 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1452 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1453 | |||
1454 | /* | ||
1455 | * Get the lock at NLMODE to start - that way we | ||
1456 | * can cancel the upconvert request if need be. | ||
1457 | */ | ||
1458 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | ||
1459 | if (ret < 0) { | ||
1460 | mlog_errno(ret); | ||
1461 | goto out; | ||
1336 | } | 1462 | } |
1337 | goto out; | 1463 | |
1464 | ret = ocfs2_wait_for_mask(&mw); | ||
1465 | if (ret) { | ||
1466 | mlog_errno(ret); | ||
1467 | goto out; | ||
1468 | } | ||
1469 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1338 | } | 1470 | } |
1339 | 1471 | ||
1340 | if (ocfs2_mount_local(osb)) | 1472 | lockres->l_action = OCFS2_AST_CONVERT; |
1341 | goto out; | 1473 | lkm_flags |= LKM_CONVERT; |
1474 | lockres->l_requested = level; | ||
1475 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | ||
1342 | 1476 | ||
1343 | lockres = &OCFS2_I(inode)->ip_data_lockres; | 1477 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1478 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1344 | 1479 | ||
1345 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1480 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, |
1481 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | ||
1482 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | ||
1483 | if (ret != DLM_NORMAL) { | ||
1484 | if (trylock && ret == DLM_NOTQUEUED) | ||
1485 | ret = -EAGAIN; | ||
1486 | else { | ||
1487 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
1488 | ret = -EINVAL; | ||
1489 | } | ||
1346 | 1490 | ||
1347 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, | 1491 | ocfs2_recover_from_dlm_error(lockres, 1); |
1348 | 0, arg_flags); | 1492 | lockres_remove_mask_waiter(lockres, &mw); |
1349 | if (status < 0 && status != -EAGAIN) | 1493 | goto out; |
1350 | mlog_errno(status); | 1494 | } |
1495 | |||
1496 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | ||
1497 | if (ret == -ERESTARTSYS) { | ||
1498 | /* | ||
1499 | * Userspace can cause deadlock itself with | ||
1500 | * flock(). Current behavior locally is to allow the | ||
1501 | * deadlock, but abort the system call if a signal is | ||
1502 | * received. We follow this example, otherwise a | ||
1503 | * poorly written program could sit in kernel until | ||
1504 | * reboot. | ||
1505 | * | ||
1506 | * Handling this is a bit more complicated for Ocfs2 | ||
1507 | * though. We can't exit this function with an | ||
1508 | * outstanding lock request, so a cancel convert is | ||
1509 | * required. We intentionally overwrite 'ret' - if the | ||
1510 | * cancel fails and the lock was granted, it's easier | ||
1511 | * to just bubble sucess back up to the user. | ||
1512 | */ | ||
1513 | ret = ocfs2_flock_handle_signal(lockres, level); | ||
1514 | } | ||
1351 | 1515 | ||
1352 | out: | 1516 | out: |
1353 | mlog_exit(status); | 1517 | |
1354 | return status; | 1518 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", |
1519 | lockres->l_name, ex, trylock, ret); | ||
1520 | return ret; | ||
1355 | } | 1521 | } |
1356 | 1522 | ||
1357 | /* see ocfs2_meta_lock_with_page() */ | 1523 | void ocfs2_file_unlock(struct file *file) |
1358 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
1359 | int write, | ||
1360 | struct page *page) | ||
1361 | { | 1524 | { |
1362 | int ret; | 1525 | int ret; |
1526 | unsigned long flags; | ||
1527 | struct ocfs2_file_private *fp = file->private_data; | ||
1528 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1529 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1530 | struct ocfs2_mask_waiter mw; | ||
1363 | 1531 | ||
1364 | ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK); | 1532 | ocfs2_init_mask_waiter(&mw); |
1365 | if (ret == -EAGAIN) { | 1533 | |
1366 | unlock_page(page); | 1534 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
1367 | if (ocfs2_data_lock(inode, write) == 0) | 1535 | return; |
1368 | ocfs2_data_unlock(inode, write); | 1536 | |
1369 | ret = AOP_TRUNCATED_PAGE; | 1537 | if (lockres->l_level == LKM_NLMODE) |
1538 | return; | ||
1539 | |||
1540 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | ||
1541 | lockres->l_name, lockres->l_flags, lockres->l_level, | ||
1542 | lockres->l_action); | ||
1543 | |||
1544 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1545 | /* | ||
1546 | * Fake a blocking ast for the downconvert code. | ||
1547 | */ | ||
1548 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
1549 | lockres->l_blocking = LKM_EXMODE; | ||
1550 | |||
1551 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | ||
1552 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1553 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1554 | |||
1555 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | ||
1556 | if (ret) { | ||
1557 | mlog_errno(ret); | ||
1558 | return; | ||
1370 | } | 1559 | } |
1371 | 1560 | ||
1372 | return ret; | 1561 | ret = ocfs2_wait_for_mask(&mw); |
1562 | if (ret) | ||
1563 | mlog_errno(ret); | ||
1373 | } | 1564 | } |
1374 | 1565 | ||
1375 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 1566 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
1376 | struct ocfs2_lock_res *lockres) | 1567 | struct ocfs2_lock_res *lockres) |
1377 | { | 1568 | { |
1378 | int kick = 0; | 1569 | int kick = 0; |
1379 | 1570 | ||
1380 | mlog_entry_void(); | 1571 | mlog_entry_void(); |
1381 | 1572 | ||
1382 | /* If we know that another node is waiting on our lock, kick | 1573 | /* If we know that another node is waiting on our lock, kick |
1383 | * the vote thread * pre-emptively when we reach a release | 1574 | * the downconvert thread * pre-emptively when we reach a release |
1384 | * condition. */ | 1575 | * condition. */ |
1385 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1576 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
1386 | switch(lockres->l_blocking) { | 1577 | switch(lockres->l_blocking) { |
@@ -1398,27 +1589,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | |||
1398 | } | 1589 | } |
1399 | 1590 | ||
1400 | if (kick) | 1591 | if (kick) |
1401 | ocfs2_kick_vote_thread(osb); | 1592 | ocfs2_wake_downconvert_thread(osb); |
1402 | |||
1403 | mlog_exit_void(); | ||
1404 | } | ||
1405 | |||
1406 | void ocfs2_data_unlock(struct inode *inode, | ||
1407 | int write) | ||
1408 | { | ||
1409 | int level = write ? LKM_EXMODE : LKM_PRMODE; | ||
1410 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; | ||
1411 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1412 | |||
1413 | mlog_entry_void(); | ||
1414 | |||
1415 | mlog(0, "inode %llu drop %s DATA lock\n", | ||
1416 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1417 | write ? "EXMODE" : "PRMODE"); | ||
1418 | |||
1419 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | ||
1420 | !ocfs2_mount_local(osb)) | ||
1421 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | ||
1422 | 1593 | ||
1423 | mlog_exit_void(); | 1594 | mlog_exit_void(); |
1424 | } | 1595 | } |
@@ -1442,11 +1613,11 @@ static u64 ocfs2_pack_timespec(struct timespec *spec) | |||
1442 | 1613 | ||
1443 | /* Call this with the lockres locked. I am reasonably sure we don't | 1614 | /* Call this with the lockres locked. I am reasonably sure we don't |
1444 | * need ip_lock in this function as anyone who would be changing those | 1615 | * need ip_lock in this function as anyone who would be changing those |
1445 | * values is supposed to be blocked in ocfs2_meta_lock right now. */ | 1616 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ |
1446 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 1617 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) |
1447 | { | 1618 | { |
1448 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1619 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1449 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1620 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1450 | struct ocfs2_meta_lvb *lvb; | 1621 | struct ocfs2_meta_lvb *lvb; |
1451 | 1622 | ||
1452 | mlog_entry_void(); | 1623 | mlog_entry_void(); |
@@ -1496,7 +1667,7 @@ static void ocfs2_unpack_timespec(struct timespec *spec, | |||
1496 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 1667 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) |
1497 | { | 1668 | { |
1498 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1669 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1499 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1670 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1500 | struct ocfs2_meta_lvb *lvb; | 1671 | struct ocfs2_meta_lvb *lvb; |
1501 | 1672 | ||
1502 | mlog_entry_void(); | 1673 | mlog_entry_void(); |
@@ -1604,12 +1775,12 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre | |||
1604 | } | 1775 | } |
1605 | 1776 | ||
1606 | /* may or may not return a bh if it went to disk. */ | 1777 | /* may or may not return a bh if it went to disk. */ |
1607 | static int ocfs2_meta_lock_update(struct inode *inode, | 1778 | static int ocfs2_inode_lock_update(struct inode *inode, |
1608 | struct buffer_head **bh) | 1779 | struct buffer_head **bh) |
1609 | { | 1780 | { |
1610 | int status = 0; | 1781 | int status = 0; |
1611 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1782 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1612 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1783 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1613 | struct ocfs2_dinode *fe; | 1784 | struct ocfs2_dinode *fe; |
1614 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1785 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1615 | 1786 | ||
@@ -1721,7 +1892,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
1721 | * returns < 0 error if the callback will never be called, otherwise | 1892 | * returns < 0 error if the callback will never be called, otherwise |
1722 | * the result of the lock will be communicated via the callback. | 1893 | * the result of the lock will be communicated via the callback. |
1723 | */ | 1894 | */ |
1724 | int ocfs2_meta_lock_full(struct inode *inode, | 1895 | int ocfs2_inode_lock_full(struct inode *inode, |
1725 | struct buffer_head **ret_bh, | 1896 | struct buffer_head **ret_bh, |
1726 | int ex, | 1897 | int ex, |
1727 | int arg_flags) | 1898 | int arg_flags) |
@@ -1756,7 +1927,7 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
1756 | wait_event(osb->recovery_event, | 1927 | wait_event(osb->recovery_event, |
1757 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1928 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); |
1758 | 1929 | ||
1759 | lockres = &OCFS2_I(inode)->ip_meta_lockres; | 1930 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1760 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 1931 | level = ex ? LKM_EXMODE : LKM_PRMODE; |
1761 | dlm_flags = 0; | 1932 | dlm_flags = 0; |
1762 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 1933 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
@@ -1795,11 +1966,11 @@ local: | |||
1795 | } | 1966 | } |
1796 | 1967 | ||
1797 | /* This is fun. The caller may want a bh back, or it may | 1968 | /* This is fun. The caller may want a bh back, or it may |
1798 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1969 | * not. ocfs2_inode_lock_update definitely wants one in, but |
1799 | * may or may not read one, depending on what's in the | 1970 | * may or may not read one, depending on what's in the |
1800 | * LVB. The result of all of this is that we've *only* gone to | 1971 | * LVB. The result of all of this is that we've *only* gone to |
1801 | * disk if we have to, so the complexity is worthwhile. */ | 1972 | * disk if we have to, so the complexity is worthwhile. */ |
1802 | status = ocfs2_meta_lock_update(inode, &local_bh); | 1973 | status = ocfs2_inode_lock_update(inode, &local_bh); |
1803 | if (status < 0) { | 1974 | if (status < 0) { |
1804 | if (status != -ENOENT) | 1975 | if (status != -ENOENT) |
1805 | mlog_errno(status); | 1976 | mlog_errno(status); |
@@ -1821,7 +1992,7 @@ bail: | |||
1821 | *ret_bh = NULL; | 1992 | *ret_bh = NULL; |
1822 | } | 1993 | } |
1823 | if (acquired) | 1994 | if (acquired) |
1824 | ocfs2_meta_unlock(inode, ex); | 1995 | ocfs2_inode_unlock(inode, ex); |
1825 | } | 1996 | } |
1826 | 1997 | ||
1827 | if (local_bh) | 1998 | if (local_bh) |
@@ -1832,19 +2003,20 @@ bail: | |||
1832 | } | 2003 | } |
1833 | 2004 | ||
1834 | /* | 2005 | /* |
1835 | * This is working around a lock inversion between tasks acquiring DLM locks | 2006 | * This is working around a lock inversion between tasks acquiring DLM |
1836 | * while holding a page lock and the vote thread which blocks dlm lock acquiry | 2007 | * locks while holding a page lock and the downconvert thread which |
1837 | * while acquiring page locks. | 2008 | * blocks dlm lock acquiry while acquiring page locks. |
1838 | * | 2009 | * |
1839 | * ** These _with_page variantes are only intended to be called from aop | 2010 | * ** These _with_page variantes are only intended to be called from aop |
1840 | * methods that hold page locks and return a very specific *positive* error | 2011 | * methods that hold page locks and return a very specific *positive* error |
1841 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 2012 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** |
1842 | * | 2013 | * |
1843 | * The DLM is called such that it returns -EAGAIN if it would have blocked | 2014 | * The DLM is called such that it returns -EAGAIN if it would have |
1844 | * waiting for the vote thread. In that case we unlock our page so the vote | 2015 | * blocked waiting for the downconvert thread. In that case we unlock |
1845 | * thread can make progress. Once we've done this we have to return | 2016 | * our page so the downconvert thread can make progress. Once we've |
1846 | * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up | 2017 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method |
1847 | * into the VFS who will then immediately retry the aop call. | 2018 | * that called us can bubble that back up into the VFS who will then |
2019 | * immediately retry the aop call. | ||
1848 | * | 2020 | * |
1849 | * We do a blocking lock and immediate unlock before returning, though, so that | 2021 | * We do a blocking lock and immediate unlock before returning, though, so that |
1850 | * the lock has a great chance of being cached on this node by the time the VFS | 2022 | * the lock has a great chance of being cached on this node by the time the VFS |
@@ -1852,32 +2024,32 @@ bail: | |||
1852 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 2024 | * ping locks back and forth, but that's a risk we're willing to take to avoid |
1853 | * the lock inversion simply. | 2025 | * the lock inversion simply. |
1854 | */ | 2026 | */ |
1855 | int ocfs2_meta_lock_with_page(struct inode *inode, | 2027 | int ocfs2_inode_lock_with_page(struct inode *inode, |
1856 | struct buffer_head **ret_bh, | 2028 | struct buffer_head **ret_bh, |
1857 | int ex, | 2029 | int ex, |
1858 | struct page *page) | 2030 | struct page *page) |
1859 | { | 2031 | { |
1860 | int ret; | 2032 | int ret; |
1861 | 2033 | ||
1862 | ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 2034 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); |
1863 | if (ret == -EAGAIN) { | 2035 | if (ret == -EAGAIN) { |
1864 | unlock_page(page); | 2036 | unlock_page(page); |
1865 | if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) | 2037 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) |
1866 | ocfs2_meta_unlock(inode, ex); | 2038 | ocfs2_inode_unlock(inode, ex); |
1867 | ret = AOP_TRUNCATED_PAGE; | 2039 | ret = AOP_TRUNCATED_PAGE; |
1868 | } | 2040 | } |
1869 | 2041 | ||
1870 | return ret; | 2042 | return ret; |
1871 | } | 2043 | } |
1872 | 2044 | ||
1873 | int ocfs2_meta_lock_atime(struct inode *inode, | 2045 | int ocfs2_inode_lock_atime(struct inode *inode, |
1874 | struct vfsmount *vfsmnt, | 2046 | struct vfsmount *vfsmnt, |
1875 | int *level) | 2047 | int *level) |
1876 | { | 2048 | { |
1877 | int ret; | 2049 | int ret; |
1878 | 2050 | ||
1879 | mlog_entry_void(); | 2051 | mlog_entry_void(); |
1880 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2052 | ret = ocfs2_inode_lock(inode, NULL, 0); |
1881 | if (ret < 0) { | 2053 | if (ret < 0) { |
1882 | mlog_errno(ret); | 2054 | mlog_errno(ret); |
1883 | return ret; | 2055 | return ret; |
@@ -1890,8 +2062,8 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1890 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 2062 | if (ocfs2_should_update_atime(inode, vfsmnt)) { |
1891 | struct buffer_head *bh = NULL; | 2063 | struct buffer_head *bh = NULL; |
1892 | 2064 | ||
1893 | ocfs2_meta_unlock(inode, 0); | 2065 | ocfs2_inode_unlock(inode, 0); |
1894 | ret = ocfs2_meta_lock(inode, &bh, 1); | 2066 | ret = ocfs2_inode_lock(inode, &bh, 1); |
1895 | if (ret < 0) { | 2067 | if (ret < 0) { |
1896 | mlog_errno(ret); | 2068 | mlog_errno(ret); |
1897 | return ret; | 2069 | return ret; |
@@ -1908,11 +2080,11 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1908 | return ret; | 2080 | return ret; |
1909 | } | 2081 | } |
1910 | 2082 | ||
1911 | void ocfs2_meta_unlock(struct inode *inode, | 2083 | void ocfs2_inode_unlock(struct inode *inode, |
1912 | int ex) | 2084 | int ex) |
1913 | { | 2085 | { |
1914 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2086 | int level = ex ? LKM_EXMODE : LKM_PRMODE; |
1915 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | 2087 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1916 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2088 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1917 | 2089 | ||
1918 | mlog_entry_void(); | 2090 | mlog_entry_void(); |
@@ -2320,11 +2492,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2320 | goto bail; | 2492 | goto bail; |
2321 | } | 2493 | } |
2322 | 2494 | ||
2323 | /* launch vote thread */ | 2495 | /* launch downconvert thread */ |
2324 | osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); | 2496 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); |
2325 | if (IS_ERR(osb->vote_task)) { | 2497 | if (IS_ERR(osb->dc_task)) { |
2326 | status = PTR_ERR(osb->vote_task); | 2498 | status = PTR_ERR(osb->dc_task); |
2327 | osb->vote_task = NULL; | 2499 | osb->dc_task = NULL; |
2328 | mlog_errno(status); | 2500 | mlog_errno(status); |
2329 | goto bail; | 2501 | goto bail; |
2330 | } | 2502 | } |
@@ -2353,8 +2525,8 @@ local: | |||
2353 | bail: | 2525 | bail: |
2354 | if (status < 0) { | 2526 | if (status < 0) { |
2355 | ocfs2_dlm_shutdown_debug(osb); | 2527 | ocfs2_dlm_shutdown_debug(osb); |
2356 | if (osb->vote_task) | 2528 | if (osb->dc_task) |
2357 | kthread_stop(osb->vote_task); | 2529 | kthread_stop(osb->dc_task); |
2358 | } | 2530 | } |
2359 | 2531 | ||
2360 | mlog_exit(status); | 2532 | mlog_exit(status); |
@@ -2369,9 +2541,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
2369 | 2541 | ||
2370 | ocfs2_drop_osb_locks(osb); | 2542 | ocfs2_drop_osb_locks(osb); |
2371 | 2543 | ||
2372 | if (osb->vote_task) { | 2544 | if (osb->dc_task) { |
2373 | kthread_stop(osb->vote_task); | 2545 | kthread_stop(osb->dc_task); |
2374 | osb->vote_task = NULL; | 2546 | osb->dc_task = NULL; |
2375 | } | 2547 | } |
2376 | 2548 | ||
2377 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2549 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
@@ -2527,7 +2699,7 @@ out: | |||
2527 | 2699 | ||
2528 | /* Mark the lockres as being dropped. It will no longer be | 2700 | /* Mark the lockres as being dropped. It will no longer be |
2529 | * queued if blocking, but we still may have to wait on it | 2701 | * queued if blocking, but we still may have to wait on it |
2530 | * being dequeued from the vote thread before we can consider | 2702 | * being dequeued from the downconvert thread before we can consider |
2531 | * it safe to drop. | 2703 | * it safe to drop. |
2532 | * | 2704 | * |
2533 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 2705 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
@@ -2590,14 +2762,7 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
2590 | status = err; | 2762 | status = err; |
2591 | 2763 | ||
2592 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2764 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
2593 | &OCFS2_I(inode)->ip_data_lockres); | 2765 | &OCFS2_I(inode)->ip_inode_lockres); |
2594 | if (err < 0) | ||
2595 | mlog_errno(err); | ||
2596 | if (err < 0 && !status) | ||
2597 | status = err; | ||
2598 | |||
2599 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | ||
2600 | &OCFS2_I(inode)->ip_meta_lockres); | ||
2601 | if (err < 0) | 2766 | if (err < 0) |
2602 | mlog_errno(err); | 2767 | mlog_errno(err); |
2603 | if (err < 0 && !status) | 2768 | if (err < 0 && !status) |
@@ -2850,6 +3015,9 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2850 | inode = ocfs2_lock_res_inode(lockres); | 3015 | inode = ocfs2_lock_res_inode(lockres); |
2851 | mapping = inode->i_mapping; | 3016 | mapping = inode->i_mapping; |
2852 | 3017 | ||
3018 | if (S_ISREG(inode->i_mode)) | ||
3019 | goto out; | ||
3020 | |||
2853 | /* | 3021 | /* |
2854 | * We need this before the filemap_fdatawrite() so that it can | 3022 | * We need this before the filemap_fdatawrite() so that it can |
2855 | * transfer the dirty bit from the PTE to the | 3023 | * transfer the dirty bit from the PTE to the |
@@ -2875,6 +3043,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2875 | filemap_fdatawait(mapping); | 3043 | filemap_fdatawait(mapping); |
2876 | } | 3044 | } |
2877 | 3045 | ||
3046 | out: | ||
2878 | return UNBLOCK_CONTINUE; | 3047 | return UNBLOCK_CONTINUE; |
2879 | } | 3048 | } |
2880 | 3049 | ||
@@ -2903,7 +3072,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | |||
2903 | 3072 | ||
2904 | /* | 3073 | /* |
2905 | * Does the final reference drop on our dentry lock. Right now this | 3074 | * Does the final reference drop on our dentry lock. Right now this |
2906 | * happens in the vote thread, but we could choose to simplify the | 3075 | * happens in the downconvert thread, but we could choose to simplify the |
2907 | * dlmglue API and push these off to the ocfs2_wq in the future. | 3076 | * dlmglue API and push these off to the ocfs2_wq in the future. |
2908 | */ | 3077 | */ |
2909 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 3078 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, |
@@ -3042,7 +3211,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
3042 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 3211 | mlog(0, "lockres %s blocked.\n", lockres->l_name); |
3043 | 3212 | ||
3044 | /* Detect whether a lock has been marked as going away while | 3213 | /* Detect whether a lock has been marked as going away while |
3045 | * the vote thread was processing other things. A lock can | 3214 | * the downconvert thread was processing other things. A lock can |
3046 | * still be marked with OCFS2_LOCK_FREEING after this check, | 3215 | * still be marked with OCFS2_LOCK_FREEING after this check, |
3047 | * but short circuiting here will still save us some | 3216 | * but short circuiting here will still save us some |
3048 | * performance. */ | 3217 | * performance. */ |
@@ -3091,13 +3260,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
3091 | 3260 | ||
3092 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 3261 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); |
3093 | 3262 | ||
3094 | spin_lock(&osb->vote_task_lock); | 3263 | spin_lock(&osb->dc_task_lock); |
3095 | if (list_empty(&lockres->l_blocked_list)) { | 3264 | if (list_empty(&lockres->l_blocked_list)) { |
3096 | list_add_tail(&lockres->l_blocked_list, | 3265 | list_add_tail(&lockres->l_blocked_list, |
3097 | &osb->blocked_lock_list); | 3266 | &osb->blocked_lock_list); |
3098 | osb->blocked_lock_count++; | 3267 | osb->blocked_lock_count++; |
3099 | } | 3268 | } |
3100 | spin_unlock(&osb->vote_task_lock); | 3269 | spin_unlock(&osb->dc_task_lock); |
3270 | |||
3271 | mlog_exit_void(); | ||
3272 | } | ||
3273 | |||
3274 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | ||
3275 | { | ||
3276 | unsigned long processed; | ||
3277 | struct ocfs2_lock_res *lockres; | ||
3278 | |||
3279 | mlog_entry_void(); | ||
3280 | |||
3281 | spin_lock(&osb->dc_task_lock); | ||
3282 | /* grab this early so we know to try again if a state change and | ||
3283 | * wake happens part-way through our work */ | ||
3284 | osb->dc_work_sequence = osb->dc_wake_sequence; | ||
3285 | |||
3286 | processed = osb->blocked_lock_count; | ||
3287 | while (processed) { | ||
3288 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
3289 | |||
3290 | lockres = list_entry(osb->blocked_lock_list.next, | ||
3291 | struct ocfs2_lock_res, l_blocked_list); | ||
3292 | list_del_init(&lockres->l_blocked_list); | ||
3293 | osb->blocked_lock_count--; | ||
3294 | spin_unlock(&osb->dc_task_lock); | ||
3295 | |||
3296 | BUG_ON(!processed); | ||
3297 | processed--; | ||
3298 | |||
3299 | ocfs2_process_blocked_lock(osb, lockres); | ||
3300 | |||
3301 | spin_lock(&osb->dc_task_lock); | ||
3302 | } | ||
3303 | spin_unlock(&osb->dc_task_lock); | ||
3101 | 3304 | ||
3102 | mlog_exit_void(); | 3305 | mlog_exit_void(); |
3103 | } | 3306 | } |
3307 | |||
3308 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | ||
3309 | { | ||
3310 | int empty = 0; | ||
3311 | |||
3312 | spin_lock(&osb->dc_task_lock); | ||
3313 | if (list_empty(&osb->blocked_lock_list)) | ||
3314 | empty = 1; | ||
3315 | |||
3316 | spin_unlock(&osb->dc_task_lock); | ||
3317 | return empty; | ||
3318 | } | ||
3319 | |||
3320 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | ||
3321 | { | ||
3322 | int should_wake = 0; | ||
3323 | |||
3324 | spin_lock(&osb->dc_task_lock); | ||
3325 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | ||
3326 | should_wake = 1; | ||
3327 | spin_unlock(&osb->dc_task_lock); | ||
3328 | |||
3329 | return should_wake; | ||
3330 | } | ||
3331 | |||
3332 | int ocfs2_downconvert_thread(void *arg) | ||
3333 | { | ||
3334 | int status = 0; | ||
3335 | struct ocfs2_super *osb = arg; | ||
3336 | |||
3337 | /* only quit once we've been asked to stop and there is no more | ||
3338 | * work available */ | ||
3339 | while (!(kthread_should_stop() && | ||
3340 | ocfs2_downconvert_thread_lists_empty(osb))) { | ||
3341 | |||
3342 | wait_event_interruptible(osb->dc_event, | ||
3343 | ocfs2_downconvert_thread_should_wake(osb) || | ||
3344 | kthread_should_stop()); | ||
3345 | |||
3346 | mlog(0, "downconvert_thread: awoken\n"); | ||
3347 | |||
3348 | ocfs2_downconvert_thread_do_work(osb); | ||
3349 | } | ||
3350 | |||
3351 | osb->dc_task = NULL; | ||
3352 | return status; | ||
3353 | } | ||
3354 | |||
3355 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | ||
3356 | { | ||
3357 | spin_lock(&osb->dc_task_lock); | ||
3358 | /* make sure the voting thread gets a swipe at whatever changes | ||
3359 | * the caller may have made to the voting state */ | ||
3360 | osb->dc_wake_sequence++; | ||
3361 | spin_unlock(&osb->dc_task_lock); | ||
3362 | wake_up(&osb->dc_event); | ||
3363 | } | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 87a785e41205..5f17243ba501 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -49,12 +49,12 @@ struct ocfs2_meta_lvb { | |||
49 | __be32 lvb_reserved2; | 49 | __be32 lvb_reserved2; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ | 52 | /* ocfs2_inode_lock_full() 'arg_flags' flags */ |
53 | /* don't wait on recovery. */ | 53 | /* don't wait on recovery. */ |
54 | #define OCFS2_META_LOCK_RECOVERY (0x01) | 54 | #define OCFS2_META_LOCK_RECOVERY (0x01) |
55 | /* Instruct the dlm not to queue ourselves on the other node. */ | 55 | /* Instruct the dlm not to queue ourselves on the other node. */ |
56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) | 56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) |
57 | /* don't block waiting for the vote thread, instead return -EAGAIN */ | 57 | /* don't block waiting for the downconvert thread, instead return -EAGAIN */ |
58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 58 | #define OCFS2_LOCK_NONBLOCK (0x04) |
59 | 59 | ||
60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
@@ -66,38 +66,32 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
66 | struct inode *inode); | 66 | struct inode *inode); |
67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, |
68 | u64 parent, struct inode *inode); | 68 | u64 parent, struct inode *inode); |
69 | struct ocfs2_file_private; | ||
70 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
71 | struct ocfs2_file_private *fp); | ||
69 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); | 72 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); |
70 | int ocfs2_create_new_inode_locks(struct inode *inode); | 73 | int ocfs2_create_new_inode_locks(struct inode *inode); |
71 | int ocfs2_drop_inode_locks(struct inode *inode); | 74 | int ocfs2_drop_inode_locks(struct inode *inode); |
72 | int ocfs2_data_lock_full(struct inode *inode, | ||
73 | int write, | ||
74 | int arg_flags); | ||
75 | #define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0) | ||
76 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
77 | int write, | ||
78 | struct page *page); | ||
79 | void ocfs2_data_unlock(struct inode *inode, | ||
80 | int write); | ||
81 | int ocfs2_rw_lock(struct inode *inode, int write); | 75 | int ocfs2_rw_lock(struct inode *inode, int write); |
82 | void ocfs2_rw_unlock(struct inode *inode, int write); | 76 | void ocfs2_rw_unlock(struct inode *inode, int write); |
83 | int ocfs2_open_lock(struct inode *inode); | 77 | int ocfs2_open_lock(struct inode *inode); |
84 | int ocfs2_try_open_lock(struct inode *inode, int write); | 78 | int ocfs2_try_open_lock(struct inode *inode, int write); |
85 | void ocfs2_open_unlock(struct inode *inode); | 79 | void ocfs2_open_unlock(struct inode *inode); |
86 | int ocfs2_meta_lock_atime(struct inode *inode, | 80 | int ocfs2_inode_lock_atime(struct inode *inode, |
87 | struct vfsmount *vfsmnt, | 81 | struct vfsmount *vfsmnt, |
88 | int *level); | 82 | int *level); |
89 | int ocfs2_meta_lock_full(struct inode *inode, | 83 | int ocfs2_inode_lock_full(struct inode *inode, |
90 | struct buffer_head **ret_bh, | 84 | struct buffer_head **ret_bh, |
91 | int ex, | 85 | int ex, |
92 | int arg_flags); | 86 | int arg_flags); |
93 | int ocfs2_meta_lock_with_page(struct inode *inode, | 87 | int ocfs2_inode_lock_with_page(struct inode *inode, |
94 | struct buffer_head **ret_bh, | 88 | struct buffer_head **ret_bh, |
95 | int ex, | 89 | int ex, |
96 | struct page *page); | 90 | struct page *page); |
97 | /* 99% of the time we don't want to supply any additional flags -- | 91 | /* 99% of the time we don't want to supply any additional flags -- |
98 | * those are for very specific cases only. */ | 92 | * those are for very specific cases only. */ |
99 | #define ocfs2_meta_lock(i, b, e) ocfs2_meta_lock_full(i, b, e, 0) | 93 | #define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) |
100 | void ocfs2_meta_unlock(struct inode *inode, | 94 | void ocfs2_inode_unlock(struct inode *inode, |
101 | int ex); | 95 | int ex); |
102 | int ocfs2_super_lock(struct ocfs2_super *osb, | 96 | int ocfs2_super_lock(struct ocfs2_super *osb, |
103 | int ex); | 97 | int ex); |
@@ -107,14 +101,17 @@ int ocfs2_rename_lock(struct ocfs2_super *osb); | |||
107 | void ocfs2_rename_unlock(struct ocfs2_super *osb); | 101 | void ocfs2_rename_unlock(struct ocfs2_super *osb); |
108 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); | 102 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); |
109 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); | 103 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); |
104 | int ocfs2_file_lock(struct file *file, int ex, int trylock); | ||
105 | void ocfs2_file_unlock(struct file *file); | ||
110 | 106 | ||
111 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 107 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); |
112 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 108 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
113 | struct ocfs2_lock_res *lockres); | 109 | struct ocfs2_lock_res *lockres); |
114 | 110 | ||
115 | /* for the vote thread */ | 111 | /* for the downconvert thread */ |
116 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 112 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
117 | struct ocfs2_lock_res *lockres); | 113 | struct ocfs2_lock_res *lockres); |
114 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); | ||
118 | 115 | ||
119 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 116 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); |
120 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 117 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); |
diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h index ff257628af16..1942e09f6ee5 100644 --- a/fs/ocfs2/endian.h +++ b/fs/ocfs2/endian.h | |||
@@ -37,11 +37,6 @@ static inline void le64_add_cpu(__le64 *var, u64 val) | |||
37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); | 37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); |
38 | } | 38 | } |
39 | 39 | ||
40 | static inline void le32_and_cpu(__le32 *var, u32 val) | ||
41 | { | ||
42 | *var = cpu_to_le32(le32_to_cpu(*var) & val); | ||
43 | } | ||
44 | |||
45 | static inline void be32_add_cpu(__be32 *var, u32 val) | 40 | static inline void be32_add_cpu(__be32 *var, u32 val) |
46 | { | 41 | { |
47 | *var = cpu_to_be32(be32_to_cpu(*var) + val); | 42 | *var = cpu_to_be32(be32_to_cpu(*var) + val); |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 535bfa9568a4..67527cebf214 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
58 | return ERR_PTR(-ESTALE); | 58 | return ERR_PTR(-ESTALE); |
59 | } | 59 | } |
60 | 60 | ||
61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0); | 61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0); |
62 | 62 | ||
63 | if (IS_ERR(inode)) | 63 | if (IS_ERR(inode)) |
64 | return (void *)inode; | 64 | return (void *)inode; |
@@ -95,7 +95,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
95 | mlog(0, "find parent of directory %llu\n", | 95 | mlog(0, "find parent of directory %llu\n", |
96 | (unsigned long long)OCFS2_I(dir)->ip_blkno); | 96 | (unsigned long long)OCFS2_I(dir)->ip_blkno); |
97 | 97 | ||
98 | status = ocfs2_meta_lock(dir, NULL, 0); | 98 | status = ocfs2_inode_lock(dir, NULL, 0); |
99 | if (status < 0) { | 99 | if (status < 0) { |
100 | if (status != -ENOENT) | 100 | if (status != -ENOENT) |
101 | mlog_errno(status); | 101 | mlog_errno(status); |
@@ -109,7 +109,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
109 | goto bail_unlock; | 109 | goto bail_unlock; |
110 | } | 110 | } |
111 | 111 | ||
112 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); | 112 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); |
113 | if (IS_ERR(inode)) { | 113 | if (IS_ERR(inode)) { |
114 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 114 | mlog(ML_ERROR, "Unable to create inode %llu\n", |
115 | (unsigned long long)blkno); | 115 | (unsigned long long)blkno); |
@@ -126,7 +126,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
126 | parent->d_op = &ocfs2_dentry_ops; | 126 | parent->d_op = &ocfs2_dentry_ops; |
127 | 127 | ||
128 | bail_unlock: | 128 | bail_unlock: |
129 | ocfs2_meta_unlock(dir, 0); | 129 | ocfs2_inode_unlock(dir, 0); |
130 | 130 | ||
131 | bail: | 131 | bail: |
132 | mlog_exit_ptr(parent); | 132 | mlog_exit_ptr(parent); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b75b2e1f0e42..ed5d5232e85d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include "inode.h" | 51 | #include "inode.h" |
52 | #include "ioctl.h" | 52 | #include "ioctl.h" |
53 | #include "journal.h" | 53 | #include "journal.h" |
54 | #include "locks.h" | ||
54 | #include "mmap.h" | 55 | #include "mmap.h" |
55 | #include "suballoc.h" | 56 | #include "suballoc.h" |
56 | #include "super.h" | 57 | #include "super.h" |
@@ -63,6 +64,35 @@ static int ocfs2_sync_inode(struct inode *inode) | |||
63 | return sync_mapping_buffers(inode->i_mapping); | 64 | return sync_mapping_buffers(inode->i_mapping); |
64 | } | 65 | } |
65 | 66 | ||
67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | ||
68 | { | ||
69 | struct ocfs2_file_private *fp; | ||
70 | |||
71 | fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL); | ||
72 | if (!fp) | ||
73 | return -ENOMEM; | ||
74 | |||
75 | fp->fp_file = file; | ||
76 | mutex_init(&fp->fp_mutex); | ||
77 | ocfs2_file_lock_res_init(&fp->fp_flock, fp); | ||
78 | file->private_data = fp; | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static void ocfs2_free_file_private(struct inode *inode, struct file *file) | ||
84 | { | ||
85 | struct ocfs2_file_private *fp = file->private_data; | ||
86 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
87 | |||
88 | if (fp) { | ||
89 | ocfs2_simple_drop_lockres(osb, &fp->fp_flock); | ||
90 | ocfs2_lock_res_free(&fp->fp_flock); | ||
91 | kfree(fp); | ||
92 | file->private_data = NULL; | ||
93 | } | ||
94 | } | ||
95 | |||
66 | static int ocfs2_file_open(struct inode *inode, struct file *file) | 96 | static int ocfs2_file_open(struct inode *inode, struct file *file) |
67 | { | 97 | { |
68 | int status; | 98 | int status; |
@@ -89,7 +119,18 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) | |||
89 | 119 | ||
90 | oi->ip_open_count++; | 120 | oi->ip_open_count++; |
91 | spin_unlock(&oi->ip_lock); | 121 | spin_unlock(&oi->ip_lock); |
92 | status = 0; | 122 | |
123 | status = ocfs2_init_file_private(inode, file); | ||
124 | if (status) { | ||
125 | /* | ||
126 | * We want to set open count back if we're failing the | ||
127 | * open. | ||
128 | */ | ||
129 | spin_lock(&oi->ip_lock); | ||
130 | oi->ip_open_count--; | ||
131 | spin_unlock(&oi->ip_lock); | ||
132 | } | ||
133 | |||
93 | leave: | 134 | leave: |
94 | mlog_exit(status); | 135 | mlog_exit(status); |
95 | return status; | 136 | return status; |
@@ -108,11 +149,24 @@ static int ocfs2_file_release(struct inode *inode, struct file *file) | |||
108 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; | 149 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; |
109 | spin_unlock(&oi->ip_lock); | 150 | spin_unlock(&oi->ip_lock); |
110 | 151 | ||
152 | ocfs2_free_file_private(inode, file); | ||
153 | |||
111 | mlog_exit(0); | 154 | mlog_exit(0); |
112 | 155 | ||
113 | return 0; | 156 | return 0; |
114 | } | 157 | } |
115 | 158 | ||
159 | static int ocfs2_dir_open(struct inode *inode, struct file *file) | ||
160 | { | ||
161 | return ocfs2_init_file_private(inode, file); | ||
162 | } | ||
163 | |||
164 | static int ocfs2_dir_release(struct inode *inode, struct file *file) | ||
165 | { | ||
166 | ocfs2_free_file_private(inode, file); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
116 | static int ocfs2_sync_file(struct file *file, | 170 | static int ocfs2_sync_file(struct file *file, |
117 | struct dentry *dentry, | 171 | struct dentry *dentry, |
118 | int datasync) | 172 | int datasync) |
@@ -382,18 +436,13 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
382 | 436 | ||
383 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 437 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
384 | 438 | ||
385 | /* This forces other nodes to sync and drop their pages. Do | 439 | /* |
386 | * this even if we have a truncate without allocation change - | 440 | * The inode lock forced other nodes to sync and drop their |
387 | * ocfs2 cluster sizes can be much greater than page size, so | 441 | * pages, which (correctly) happens even if we have a truncate |
388 | * we have to truncate them anyway. */ | 442 | * without allocation change - ocfs2 cluster sizes can be much |
389 | status = ocfs2_data_lock(inode, 1); | 443 | * greater than page size, so we have to truncate them |
390 | if (status < 0) { | 444 | * anyway. |
391 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 445 | */ |
392 | |||
393 | mlog_errno(status); | ||
394 | goto bail; | ||
395 | } | ||
396 | |||
397 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | 446 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); |
398 | truncate_inode_pages(inode->i_mapping, new_i_size); | 447 | truncate_inode_pages(inode->i_mapping, new_i_size); |
399 | 448 | ||
@@ -403,7 +452,7 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
403 | if (status) | 452 | if (status) |
404 | mlog_errno(status); | 453 | mlog_errno(status); |
405 | 454 | ||
406 | goto bail_unlock_data; | 455 | goto bail_unlock_sem; |
407 | } | 456 | } |
408 | 457 | ||
409 | /* alright, we're going to need to do a full blown alloc size | 458 | /* alright, we're going to need to do a full blown alloc size |
@@ -413,25 +462,23 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
413 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); | 462 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); |
414 | if (status < 0) { | 463 | if (status < 0) { |
415 | mlog_errno(status); | 464 | mlog_errno(status); |
416 | goto bail_unlock_data; | 465 | goto bail_unlock_sem; |
417 | } | 466 | } |
418 | 467 | ||
419 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); | 468 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); |
420 | if (status < 0) { | 469 | if (status < 0) { |
421 | mlog_errno(status); | 470 | mlog_errno(status); |
422 | goto bail_unlock_data; | 471 | goto bail_unlock_sem; |
423 | } | 472 | } |
424 | 473 | ||
425 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); | 474 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); |
426 | if (status < 0) { | 475 | if (status < 0) { |
427 | mlog_errno(status); | 476 | mlog_errno(status); |
428 | goto bail_unlock_data; | 477 | goto bail_unlock_sem; |
429 | } | 478 | } |
430 | 479 | ||
431 | /* TODO: orphan dir cleanup here. */ | 480 | /* TODO: orphan dir cleanup here. */ |
432 | bail_unlock_data: | 481 | bail_unlock_sem: |
433 | ocfs2_data_unlock(inode, 1); | ||
434 | |||
435 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 482 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
436 | 483 | ||
437 | bail: | 484 | bail: |
@@ -579,7 +626,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
579 | 626 | ||
580 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 627 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
581 | "clusters_to_add = %u, extents_to_split = %u\n", | 628 | "clusters_to_add = %u, extents_to_split = %u\n", |
582 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), | 629 | (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), |
583 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); | 630 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); |
584 | 631 | ||
585 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | 632 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); |
@@ -760,7 +807,7 @@ restarted_transaction: | |||
760 | le32_to_cpu(fe->i_clusters), | 807 | le32_to_cpu(fe->i_clusters), |
761 | (unsigned long long)le64_to_cpu(fe->i_size)); | 808 | (unsigned long long)le64_to_cpu(fe->i_size)); |
762 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", | 809 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", |
763 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); | 810 | OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); |
764 | 811 | ||
765 | leave: | 812 | leave: |
766 | if (handle) { | 813 | if (handle) { |
@@ -917,7 +964,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
917 | struct buffer_head *di_bh, | 964 | struct buffer_head *di_bh, |
918 | u64 new_i_size) | 965 | u64 new_i_size) |
919 | { | 966 | { |
920 | int ret = 0, data_locked = 0; | 967 | int ret = 0; |
921 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 968 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
922 | 969 | ||
923 | BUG_ON(!di_bh); | 970 | BUG_ON(!di_bh); |
@@ -943,20 +990,6 @@ static int ocfs2_extend_file(struct inode *inode, | |||
943 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 990 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
944 | goto out_update_size; | 991 | goto out_update_size; |
945 | 992 | ||
946 | /* | ||
947 | * protect the pages that ocfs2_zero_extend is going to be | ||
948 | * pulling into the page cache.. we do this before the | ||
949 | * metadata extend so that we don't get into the situation | ||
950 | * where we've extended the metadata but can't get the data | ||
951 | * lock to zero. | ||
952 | */ | ||
953 | ret = ocfs2_data_lock(inode, 1); | ||
954 | if (ret < 0) { | ||
955 | mlog_errno(ret); | ||
956 | goto out; | ||
957 | } | ||
958 | data_locked = 1; | ||
959 | |||
960 | /* | 993 | /* |
961 | * The alloc sem blocks people in read/write from reading our | 994 | * The alloc sem blocks people in read/write from reading our |
962 | * allocation until we're done changing it. We depend on | 995 | * allocation until we're done changing it. We depend on |
@@ -980,7 +1013,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
980 | up_write(&oi->ip_alloc_sem); | 1013 | up_write(&oi->ip_alloc_sem); |
981 | 1014 | ||
982 | mlog_errno(ret); | 1015 | mlog_errno(ret); |
983 | goto out_unlock; | 1016 | goto out; |
984 | } | 1017 | } |
985 | } | 1018 | } |
986 | 1019 | ||
@@ -991,7 +1024,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
991 | 1024 | ||
992 | if (ret < 0) { | 1025 | if (ret < 0) { |
993 | mlog_errno(ret); | 1026 | mlog_errno(ret); |
994 | goto out_unlock; | 1027 | goto out; |
995 | } | 1028 | } |
996 | 1029 | ||
997 | out_update_size: | 1030 | out_update_size: |
@@ -999,10 +1032,6 @@ out_update_size: | |||
999 | if (ret < 0) | 1032 | if (ret < 0) |
1000 | mlog_errno(ret); | 1033 | mlog_errno(ret); |
1001 | 1034 | ||
1002 | out_unlock: | ||
1003 | if (data_locked) | ||
1004 | ocfs2_data_unlock(inode, 1); | ||
1005 | |||
1006 | out: | 1035 | out: |
1007 | return ret; | 1036 | return ret; |
1008 | } | 1037 | } |
@@ -1050,7 +1079,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1050 | } | 1079 | } |
1051 | } | 1080 | } |
1052 | 1081 | ||
1053 | status = ocfs2_meta_lock(inode, &bh, 1); | 1082 | status = ocfs2_inode_lock(inode, &bh, 1); |
1054 | if (status < 0) { | 1083 | if (status < 0) { |
1055 | if (status != -ENOENT) | 1084 | if (status != -ENOENT) |
1056 | mlog_errno(status); | 1085 | mlog_errno(status); |
@@ -1102,7 +1131,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1102 | bail_commit: | 1131 | bail_commit: |
1103 | ocfs2_commit_trans(osb, handle); | 1132 | ocfs2_commit_trans(osb, handle); |
1104 | bail_unlock: | 1133 | bail_unlock: |
1105 | ocfs2_meta_unlock(inode, 1); | 1134 | ocfs2_inode_unlock(inode, 1); |
1106 | bail_unlock_rw: | 1135 | bail_unlock_rw: |
1107 | if (size_change) | 1136 | if (size_change) |
1108 | ocfs2_rw_unlock(inode, 1); | 1137 | ocfs2_rw_unlock(inode, 1); |
@@ -1149,7 +1178,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
1149 | 1178 | ||
1150 | mlog_entry_void(); | 1179 | mlog_entry_void(); |
1151 | 1180 | ||
1152 | ret = ocfs2_meta_lock(inode, NULL, 0); | 1181 | ret = ocfs2_inode_lock(inode, NULL, 0); |
1153 | if (ret) { | 1182 | if (ret) { |
1154 | if (ret != -ENOENT) | 1183 | if (ret != -ENOENT) |
1155 | mlog_errno(ret); | 1184 | mlog_errno(ret); |
@@ -1158,7 +1187,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
1158 | 1187 | ||
1159 | ret = generic_permission(inode, mask, NULL); | 1188 | ret = generic_permission(inode, mask, NULL); |
1160 | 1189 | ||
1161 | ocfs2_meta_unlock(inode, 0); | 1190 | ocfs2_inode_unlock(inode, 0); |
1162 | out: | 1191 | out: |
1163 | mlog_exit(ret); | 1192 | mlog_exit(ret); |
1164 | return ret; | 1193 | return ret; |
@@ -1630,7 +1659,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1630 | goto out; | 1659 | goto out; |
1631 | } | 1660 | } |
1632 | 1661 | ||
1633 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1662 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
1634 | if (ret) { | 1663 | if (ret) { |
1635 | mlog_errno(ret); | 1664 | mlog_errno(ret); |
1636 | goto out_rw_unlock; | 1665 | goto out_rw_unlock; |
@@ -1638,7 +1667,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1638 | 1667 | ||
1639 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | 1668 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { |
1640 | ret = -EPERM; | 1669 | ret = -EPERM; |
1641 | goto out_meta_unlock; | 1670 | goto out_inode_unlock; |
1642 | } | 1671 | } |
1643 | 1672 | ||
1644 | switch (sr->l_whence) { | 1673 | switch (sr->l_whence) { |
@@ -1652,7 +1681,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1652 | break; | 1681 | break; |
1653 | default: | 1682 | default: |
1654 | ret = -EINVAL; | 1683 | ret = -EINVAL; |
1655 | goto out_meta_unlock; | 1684 | goto out_inode_unlock; |
1656 | } | 1685 | } |
1657 | sr->l_whence = 0; | 1686 | sr->l_whence = 0; |
1658 | 1687 | ||
@@ -1663,14 +1692,14 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1663 | || (sr->l_start + llen) < 0 | 1692 | || (sr->l_start + llen) < 0 |
1664 | || (sr->l_start + llen) > max_off) { | 1693 | || (sr->l_start + llen) > max_off) { |
1665 | ret = -EINVAL; | 1694 | ret = -EINVAL; |
1666 | goto out_meta_unlock; | 1695 | goto out_inode_unlock; |
1667 | } | 1696 | } |
1668 | size = sr->l_start + sr->l_len; | 1697 | size = sr->l_start + sr->l_len; |
1669 | 1698 | ||
1670 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | 1699 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { |
1671 | if (sr->l_len <= 0) { | 1700 | if (sr->l_len <= 0) { |
1672 | ret = -EINVAL; | 1701 | ret = -EINVAL; |
1673 | goto out_meta_unlock; | 1702 | goto out_inode_unlock; |
1674 | } | 1703 | } |
1675 | } | 1704 | } |
1676 | 1705 | ||
@@ -1678,7 +1707,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1678 | ret = __ocfs2_write_remove_suid(inode, di_bh); | 1707 | ret = __ocfs2_write_remove_suid(inode, di_bh); |
1679 | if (ret) { | 1708 | if (ret) { |
1680 | mlog_errno(ret); | 1709 | mlog_errno(ret); |
1681 | goto out_meta_unlock; | 1710 | goto out_inode_unlock; |
1682 | } | 1711 | } |
1683 | } | 1712 | } |
1684 | 1713 | ||
@@ -1704,7 +1733,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1704 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1733 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1705 | if (ret) { | 1734 | if (ret) { |
1706 | mlog_errno(ret); | 1735 | mlog_errno(ret); |
1707 | goto out_meta_unlock; | 1736 | goto out_inode_unlock; |
1708 | } | 1737 | } |
1709 | 1738 | ||
1710 | /* | 1739 | /* |
@@ -1714,7 +1743,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1714 | if (IS_ERR(handle)) { | 1743 | if (IS_ERR(handle)) { |
1715 | ret = PTR_ERR(handle); | 1744 | ret = PTR_ERR(handle); |
1716 | mlog_errno(ret); | 1745 | mlog_errno(ret); |
1717 | goto out_meta_unlock; | 1746 | goto out_inode_unlock; |
1718 | } | 1747 | } |
1719 | 1748 | ||
1720 | if (change_size && i_size_read(inode) < size) | 1749 | if (change_size && i_size_read(inode) < size) |
@@ -1727,9 +1756,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1727 | 1756 | ||
1728 | ocfs2_commit_trans(osb, handle); | 1757 | ocfs2_commit_trans(osb, handle); |
1729 | 1758 | ||
1730 | out_meta_unlock: | 1759 | out_inode_unlock: |
1731 | brelse(di_bh); | 1760 | brelse(di_bh); |
1732 | ocfs2_meta_unlock(inode, 1); | 1761 | ocfs2_inode_unlock(inode, 1); |
1733 | out_rw_unlock: | 1762 | out_rw_unlock: |
1734 | ocfs2_rw_unlock(inode, 1); | 1763 | ocfs2_rw_unlock(inode, 1); |
1735 | 1764 | ||
@@ -1799,7 +1828,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1799 | * if we need to make modifications here. | 1828 | * if we need to make modifications here. |
1800 | */ | 1829 | */ |
1801 | for(;;) { | 1830 | for(;;) { |
1802 | ret = ocfs2_meta_lock(inode, NULL, meta_level); | 1831 | ret = ocfs2_inode_lock(inode, NULL, meta_level); |
1803 | if (ret < 0) { | 1832 | if (ret < 0) { |
1804 | meta_level = -1; | 1833 | meta_level = -1; |
1805 | mlog_errno(ret); | 1834 | mlog_errno(ret); |
@@ -1817,7 +1846,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1817 | * set inode->i_size at the end of a write. */ | 1846 | * set inode->i_size at the end of a write. */ |
1818 | if (should_remove_suid(dentry)) { | 1847 | if (should_remove_suid(dentry)) { |
1819 | if (meta_level == 0) { | 1848 | if (meta_level == 0) { |
1820 | ocfs2_meta_unlock(inode, meta_level); | 1849 | ocfs2_inode_unlock(inode, meta_level); |
1821 | meta_level = 1; | 1850 | meta_level = 1; |
1822 | continue; | 1851 | continue; |
1823 | } | 1852 | } |
@@ -1886,7 +1915,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1886 | *ppos = saved_pos; | 1915 | *ppos = saved_pos; |
1887 | 1916 | ||
1888 | out_unlock: | 1917 | out_unlock: |
1889 | ocfs2_meta_unlock(inode, meta_level); | 1918 | ocfs2_inode_unlock(inode, meta_level); |
1890 | 1919 | ||
1891 | out: | 1920 | out: |
1892 | return ret; | 1921 | return ret; |
@@ -2099,12 +2128,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in, | |||
2099 | /* | 2128 | /* |
2100 | * See the comment in ocfs2_file_aio_read() | 2129 | * See the comment in ocfs2_file_aio_read() |
2101 | */ | 2130 | */ |
2102 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2131 | ret = ocfs2_inode_lock(inode, NULL, 0); |
2103 | if (ret < 0) { | 2132 | if (ret < 0) { |
2104 | mlog_errno(ret); | 2133 | mlog_errno(ret); |
2105 | goto bail; | 2134 | goto bail; |
2106 | } | 2135 | } |
2107 | ocfs2_meta_unlock(inode, 0); | 2136 | ocfs2_inode_unlock(inode, 0); |
2108 | 2137 | ||
2109 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | 2138 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); |
2110 | 2139 | ||
@@ -2160,12 +2189,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
2160 | * like i_size. This allows the checks down below | 2189 | * like i_size. This allows the checks down below |
2161 | * generic_file_aio_read() a chance of actually working. | 2190 | * generic_file_aio_read() a chance of actually working. |
2162 | */ | 2191 | */ |
2163 | ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 2192 | ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
2164 | if (ret < 0) { | 2193 | if (ret < 0) { |
2165 | mlog_errno(ret); | 2194 | mlog_errno(ret); |
2166 | goto bail; | 2195 | goto bail; |
2167 | } | 2196 | } |
2168 | ocfs2_meta_unlock(inode, lock_level); | 2197 | ocfs2_inode_unlock(inode, lock_level); |
2169 | 2198 | ||
2170 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); | 2199 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); |
2171 | if (ret == -EINVAL) | 2200 | if (ret == -EINVAL) |
@@ -2204,6 +2233,7 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
2204 | }; | 2233 | }; |
2205 | 2234 | ||
2206 | const struct file_operations ocfs2_fops = { | 2235 | const struct file_operations ocfs2_fops = { |
2236 | .llseek = generic_file_llseek, | ||
2207 | .read = do_sync_read, | 2237 | .read = do_sync_read, |
2208 | .write = do_sync_write, | 2238 | .write = do_sync_write, |
2209 | .mmap = ocfs2_mmap, | 2239 | .mmap = ocfs2_mmap, |
@@ -2216,16 +2246,21 @@ const struct file_operations ocfs2_fops = { | |||
2216 | #ifdef CONFIG_COMPAT | 2246 | #ifdef CONFIG_COMPAT |
2217 | .compat_ioctl = ocfs2_compat_ioctl, | 2247 | .compat_ioctl = ocfs2_compat_ioctl, |
2218 | #endif | 2248 | #endif |
2249 | .flock = ocfs2_flock, | ||
2219 | .splice_read = ocfs2_file_splice_read, | 2250 | .splice_read = ocfs2_file_splice_read, |
2220 | .splice_write = ocfs2_file_splice_write, | 2251 | .splice_write = ocfs2_file_splice_write, |
2221 | }; | 2252 | }; |
2222 | 2253 | ||
2223 | const struct file_operations ocfs2_dops = { | 2254 | const struct file_operations ocfs2_dops = { |
2255 | .llseek = generic_file_llseek, | ||
2224 | .read = generic_read_dir, | 2256 | .read = generic_read_dir, |
2225 | .readdir = ocfs2_readdir, | 2257 | .readdir = ocfs2_readdir, |
2226 | .fsync = ocfs2_sync_file, | 2258 | .fsync = ocfs2_sync_file, |
2259 | .release = ocfs2_dir_release, | ||
2260 | .open = ocfs2_dir_open, | ||
2227 | .ioctl = ocfs2_ioctl, | 2261 | .ioctl = ocfs2_ioctl, |
2228 | #ifdef CONFIG_COMPAT | 2262 | #ifdef CONFIG_COMPAT |
2229 | .compat_ioctl = ocfs2_compat_ioctl, | 2263 | .compat_ioctl = ocfs2_compat_ioctl, |
2230 | #endif | 2264 | #endif |
2265 | .flock = ocfs2_flock, | ||
2231 | }; | 2266 | }; |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 066f14add3a8..048ddcaf5c80 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -32,6 +32,12 @@ extern const struct inode_operations ocfs2_file_iops; | |||
32 | extern const struct inode_operations ocfs2_special_file_iops; | 32 | extern const struct inode_operations ocfs2_special_file_iops; |
33 | struct ocfs2_alloc_context; | 33 | struct ocfs2_alloc_context; |
34 | 34 | ||
35 | struct ocfs2_file_private { | ||
36 | struct file *fp_file; | ||
37 | struct mutex fp_mutex; | ||
38 | struct ocfs2_lock_res fp_flock; | ||
39 | }; | ||
40 | |||
35 | enum ocfs2_alloc_restarted { | 41 | enum ocfs2_alloc_restarted { |
36 | RESTART_NONE = 0, | 42 | RESTART_NONE = 0, |
37 | RESTART_TRANS, | 43 | RESTART_TRANS, |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index c4c36171240d..c0efd9489fe8 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
@@ -30,9 +30,6 @@ | |||
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/kmod.h> | 31 | #include <linux/kmod.h> |
32 | 32 | ||
33 | #include <cluster/heartbeat.h> | ||
34 | #include <cluster/nodemanager.h> | ||
35 | |||
36 | #include <dlm/dlmapi.h> | 33 | #include <dlm/dlmapi.h> |
37 | 34 | ||
38 | #define MLOG_MASK_PREFIX ML_SUPER | 35 | #define MLOG_MASK_PREFIX ML_SUPER |
@@ -44,13 +41,9 @@ | |||
44 | #include "heartbeat.h" | 41 | #include "heartbeat.h" |
45 | #include "inode.h" | 42 | #include "inode.h" |
46 | #include "journal.h" | 43 | #include "journal.h" |
47 | #include "vote.h" | ||
48 | 44 | ||
49 | #include "buffer_head_io.h" | 45 | #include "buffer_head_io.h" |
50 | 46 | ||
51 | #define OCFS2_HB_NODE_DOWN_PRI (0x0000002) | ||
52 | #define OCFS2_HB_NODE_UP_PRI OCFS2_HB_NODE_DOWN_PRI | ||
53 | |||
54 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | 47 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, |
55 | int bit); | 48 | int bit); |
56 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, |
@@ -64,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | |||
64 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 57 | void ocfs2_init_node_maps(struct ocfs2_super *osb) |
65 | { | 58 | { |
66 | spin_lock_init(&osb->node_map_lock); | 59 | spin_lock_init(&osb->node_map_lock); |
67 | ocfs2_node_map_init(&osb->mounted_map); | ||
68 | ocfs2_node_map_init(&osb->recovery_map); | 60 | ocfs2_node_map_init(&osb->recovery_map); |
69 | ocfs2_node_map_init(&osb->umount_map); | ||
70 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 61 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); |
71 | } | 62 | } |
72 | 63 | ||
@@ -87,24 +78,7 @@ static void ocfs2_do_node_down(int node_num, | |||
87 | return; | 78 | return; |
88 | } | 79 | } |
89 | 80 | ||
90 | if (ocfs2_node_map_test_bit(osb, &osb->umount_map, node_num)) { | ||
91 | /* If a node is in the umount map, then we've been | ||
92 | * expecting him to go down and we know ahead of time | ||
93 | * that recovery is not necessary. */ | ||
94 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
95 | return; | ||
96 | } | ||
97 | |||
98 | ocfs2_recovery_thread(osb, node_num); | 81 | ocfs2_recovery_thread(osb, node_num); |
99 | |||
100 | ocfs2_remove_node_from_vote_queues(osb, node_num); | ||
101 | } | ||
102 | |||
103 | static void ocfs2_hb_node_down_cb(struct o2nm_node *node, | ||
104 | int node_num, | ||
105 | void *data) | ||
106 | { | ||
107 | ocfs2_do_node_down(node_num, (struct ocfs2_super *) data); | ||
108 | } | 82 | } |
109 | 83 | ||
110 | /* Called from the dlm when it's about to evict a node. We may also | 84 | /* Called from the dlm when it's about to evict a node. We may also |
@@ -121,27 +95,8 @@ static void ocfs2_dlm_eviction_cb(int node_num, | |||
121 | ocfs2_do_node_down(node_num, osb); | 95 | ocfs2_do_node_down(node_num, osb); |
122 | } | 96 | } |
123 | 97 | ||
124 | static void ocfs2_hb_node_up_cb(struct o2nm_node *node, | ||
125 | int node_num, | ||
126 | void *data) | ||
127 | { | ||
128 | struct ocfs2_super *osb = data; | ||
129 | |||
130 | BUG_ON(osb->node_num == node_num); | ||
131 | |||
132 | mlog(0, "node up event for %d\n", node_num); | ||
133 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
134 | } | ||
135 | |||
136 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | 98 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) |
137 | { | 99 | { |
138 | o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB, | ||
139 | ocfs2_hb_node_down_cb, osb, | ||
140 | OCFS2_HB_NODE_DOWN_PRI); | ||
141 | |||
142 | o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB, | ||
143 | ocfs2_hb_node_up_cb, osb, OCFS2_HB_NODE_UP_PRI); | ||
144 | |||
145 | /* Not exactly a heartbeat callback, but leads to essentially | 100 | /* Not exactly a heartbeat callback, but leads to essentially |
146 | * the same path so we set it up here. */ | 101 | * the same path so we set it up here. */ |
147 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, | 102 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, |
@@ -149,39 +104,6 @@ void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | |||
149 | osb); | 104 | osb); |
150 | } | 105 | } |
151 | 106 | ||
152 | /* Most functions here are just stubs for now... */ | ||
153 | int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) | ||
154 | { | ||
155 | int status; | ||
156 | |||
157 | if (ocfs2_mount_local(osb)) | ||
158 | return 0; | ||
159 | |||
160 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down); | ||
161 | if (status < 0) { | ||
162 | mlog_errno(status); | ||
163 | goto bail; | ||
164 | } | ||
165 | |||
166 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up); | ||
167 | if (status < 0) { | ||
168 | mlog_errno(status); | ||
169 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); | ||
170 | } | ||
171 | |||
172 | bail: | ||
173 | return status; | ||
174 | } | ||
175 | |||
176 | void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) | ||
177 | { | ||
178 | if (ocfs2_mount_local(osb)) | ||
179 | return; | ||
180 | |||
181 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); | ||
182 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up); | ||
183 | } | ||
184 | |||
185 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | 107 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) |
186 | { | 108 | { |
187 | int ret; | 109 | int ret; |
@@ -341,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb, | |||
341 | 263 | ||
342 | spin_lock(&osb->node_map_lock); | 264 | spin_lock(&osb->node_map_lock); |
343 | 265 | ||
344 | __ocfs2_node_map_clear_bit(&osb->mounted_map, num); | ||
345 | |||
346 | if (!test_bit(num, osb->recovery_map.map)) { | 266 | if (!test_bit(num, osb->recovery_map.map)) { |
347 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | 267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); |
348 | set = 1; | 268 | set = 1; |
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index e8fb079122e4..56859211888a 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h | |||
@@ -29,8 +29,6 @@ | |||
29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); | 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); |
30 | 30 | ||
31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); | 31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); |
32 | int ocfs2_register_hb_callbacks(struct ocfs2_super *osb); | ||
33 | void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb); | ||
34 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | 32 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); |
35 | 33 | ||
36 | /* node map functions - used to keep track of mounted and in-recovery | 34 | /* node map functions - used to keep track of mounted and in-recovery |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ebb2bbe30f35..7e9e4c79aec7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include "symlink.h" | 49 | #include "symlink.h" |
50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
51 | #include "uptodate.h" | 51 | #include "uptodate.h" |
52 | #include "vote.h" | ||
53 | 52 | ||
54 | #include "buffer_head_io.h" | 53 | #include "buffer_head_io.h" |
55 | 54 | ||
@@ -58,8 +57,11 @@ struct ocfs2_find_inode_args | |||
58 | u64 fi_blkno; | 57 | u64 fi_blkno; |
59 | unsigned long fi_ino; | 58 | unsigned long fi_ino; |
60 | unsigned int fi_flags; | 59 | unsigned int fi_flags; |
60 | unsigned int fi_sysfile_type; | ||
61 | }; | 61 | }; |
62 | 62 | ||
63 | static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; | ||
64 | |||
63 | static int ocfs2_read_locked_inode(struct inode *inode, | 65 | static int ocfs2_read_locked_inode(struct inode *inode, |
64 | struct ocfs2_find_inode_args *args); | 66 | struct ocfs2_find_inode_args *args); |
65 | static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); | 67 | static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); |
@@ -107,7 +109,8 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi) | |||
107 | oi->ip_attr |= OCFS2_DIRSYNC_FL; | 109 | oi->ip_attr |= OCFS2_DIRSYNC_FL; |
108 | } | 110 | } |
109 | 111 | ||
110 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) | 112 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, |
113 | int sysfile_type) | ||
111 | { | 114 | { |
112 | struct inode *inode = NULL; | 115 | struct inode *inode = NULL; |
113 | struct super_block *sb = osb->sb; | 116 | struct super_block *sb = osb->sb; |
@@ -127,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) | |||
127 | args.fi_blkno = blkno; | 130 | args.fi_blkno = blkno; |
128 | args.fi_flags = flags; | 131 | args.fi_flags = flags; |
129 | args.fi_ino = ino_from_blkno(sb, blkno); | 132 | args.fi_ino = ino_from_blkno(sb, blkno); |
133 | args.fi_sysfile_type = sysfile_type; | ||
130 | 134 | ||
131 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, | 135 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, |
132 | ocfs2_init_locked_inode, &args); | 136 | ocfs2_init_locked_inode, &args); |
@@ -201,6 +205,9 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) | |||
201 | 205 | ||
202 | inode->i_ino = args->fi_ino; | 206 | inode->i_ino = args->fi_ino; |
203 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; | 207 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; |
208 | if (args->fi_sysfile_type != 0) | ||
209 | lockdep_set_class(&inode->i_mutex, | ||
210 | &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); | ||
204 | 211 | ||
205 | mlog_exit(0); | 212 | mlog_exit(0); |
206 | return 0; | 213 | return 0; |
@@ -322,7 +329,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
322 | */ | 329 | */ |
323 | BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL); | 330 | BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL); |
324 | 331 | ||
325 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 332 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, |
326 | OCFS2_LOCK_TYPE_META, 0, inode); | 333 | OCFS2_LOCK_TYPE_META, 0, inode); |
327 | 334 | ||
328 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, | 335 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, |
@@ -333,10 +340,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
333 | OCFS2_LOCK_TYPE_RW, inode->i_generation, | 340 | OCFS2_LOCK_TYPE_RW, inode->i_generation, |
334 | inode); | 341 | inode); |
335 | 342 | ||
336 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, | ||
337 | OCFS2_LOCK_TYPE_DATA, inode->i_generation, | ||
338 | inode); | ||
339 | |||
340 | ocfs2_set_inode_flags(inode); | 343 | ocfs2_set_inode_flags(inode); |
341 | 344 | ||
342 | status = 0; | 345 | status = 0; |
@@ -414,7 +417,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
414 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | 417 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) |
415 | generation = osb->fs_generation; | 418 | generation = osb->fs_generation; |
416 | 419 | ||
417 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 420 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, |
418 | OCFS2_LOCK_TYPE_META, | 421 | OCFS2_LOCK_TYPE_META, |
419 | generation, inode); | 422 | generation, inode); |
420 | 423 | ||
@@ -429,7 +432,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
429 | mlog_errno(status); | 432 | mlog_errno(status); |
430 | return status; | 433 | return status; |
431 | } | 434 | } |
432 | status = ocfs2_meta_lock(inode, NULL, 0); | 435 | status = ocfs2_inode_lock(inode, NULL, 0); |
433 | if (status) { | 436 | if (status) { |
434 | make_bad_inode(inode); | 437 | make_bad_inode(inode); |
435 | mlog_errno(status); | 438 | mlog_errno(status); |
@@ -484,7 +487,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
484 | 487 | ||
485 | bail: | 488 | bail: |
486 | if (can_lock) | 489 | if (can_lock) |
487 | ocfs2_meta_unlock(inode, 0); | 490 | ocfs2_inode_unlock(inode, 0); |
488 | 491 | ||
489 | if (status < 0) | 492 | if (status < 0) |
490 | make_bad_inode(inode); | 493 | make_bad_inode(inode); |
@@ -586,7 +589,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
586 | } | 589 | } |
587 | 590 | ||
588 | mutex_lock(&inode_alloc_inode->i_mutex); | 591 | mutex_lock(&inode_alloc_inode->i_mutex); |
589 | status = ocfs2_meta_lock(inode_alloc_inode, &inode_alloc_bh, 1); | 592 | status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1); |
590 | if (status < 0) { | 593 | if (status < 0) { |
591 | mutex_unlock(&inode_alloc_inode->i_mutex); | 594 | mutex_unlock(&inode_alloc_inode->i_mutex); |
592 | 595 | ||
@@ -617,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
617 | } | 620 | } |
618 | 621 | ||
619 | di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); | 622 | di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); |
620 | le32_and_cpu(&di->i_flags, ~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); | 623 | di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); |
621 | 624 | ||
622 | status = ocfs2_journal_dirty(handle, di_bh); | 625 | status = ocfs2_journal_dirty(handle, di_bh); |
623 | if (status < 0) { | 626 | if (status < 0) { |
@@ -635,7 +638,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
635 | bail_commit: | 638 | bail_commit: |
636 | ocfs2_commit_trans(osb, handle); | 639 | ocfs2_commit_trans(osb, handle); |
637 | bail_unlock: | 640 | bail_unlock: |
638 | ocfs2_meta_unlock(inode_alloc_inode, 1); | 641 | ocfs2_inode_unlock(inode_alloc_inode, 1); |
639 | mutex_unlock(&inode_alloc_inode->i_mutex); | 642 | mutex_unlock(&inode_alloc_inode->i_mutex); |
640 | brelse(inode_alloc_bh); | 643 | brelse(inode_alloc_bh); |
641 | bail: | 644 | bail: |
@@ -709,7 +712,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
709 | * delete_inode operation. We do this now to avoid races with | 712 | * delete_inode operation. We do this now to avoid races with |
710 | * recovery completion on other nodes. */ | 713 | * recovery completion on other nodes. */ |
711 | mutex_lock(&orphan_dir_inode->i_mutex); | 714 | mutex_lock(&orphan_dir_inode->i_mutex); |
712 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 715 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
713 | if (status < 0) { | 716 | if (status < 0) { |
714 | mutex_unlock(&orphan_dir_inode->i_mutex); | 717 | mutex_unlock(&orphan_dir_inode->i_mutex); |
715 | 718 | ||
@@ -718,8 +721,8 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
718 | } | 721 | } |
719 | 722 | ||
720 | /* we do this while holding the orphan dir lock because we | 723 | /* we do this while holding the orphan dir lock because we |
721 | * don't want recovery being run from another node to vote for | 724 | * don't want recovery being run from another node to try an |
722 | * an inode delete on us -- this will result in two nodes | 725 | * inode delete underneath us -- this will result in two nodes |
723 | * truncating the same file! */ | 726 | * truncating the same file! */ |
724 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); | 727 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); |
725 | if (status < 0) { | 728 | if (status < 0) { |
@@ -733,7 +736,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
733 | mlog_errno(status); | 736 | mlog_errno(status); |
734 | 737 | ||
735 | bail_unlock_dir: | 738 | bail_unlock_dir: |
736 | ocfs2_meta_unlock(orphan_dir_inode, 1); | 739 | ocfs2_inode_unlock(orphan_dir_inode, 1); |
737 | mutex_unlock(&orphan_dir_inode->i_mutex); | 740 | mutex_unlock(&orphan_dir_inode->i_mutex); |
738 | brelse(orphan_dir_bh); | 741 | brelse(orphan_dir_bh); |
739 | bail: | 742 | bail: |
@@ -744,7 +747,7 @@ bail: | |||
744 | } | 747 | } |
745 | 748 | ||
746 | /* There is a series of simple checks that should be done before a | 749 | /* There is a series of simple checks that should be done before a |
747 | * vote is even considered. Encapsulate those in this function. */ | 750 | * trylock is even considered. Encapsulate those in this function. */ |
748 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | 751 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) |
749 | { | 752 | { |
750 | int ret = 0; | 753 | int ret = 0; |
@@ -758,14 +761,14 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
758 | goto bail; | 761 | goto bail; |
759 | } | 762 | } |
760 | 763 | ||
761 | /* If we're coming from process_vote we can't go into our own | 764 | /* If we're coming from downconvert_thread we can't go into our own |
762 | * voting [hello, deadlock city!], so unforuntately we just | 765 | * voting [hello, deadlock city!], so unforuntately we just |
763 | * have to skip deleting this guy. That's OK though because | 766 | * have to skip deleting this guy. That's OK though because |
764 | * the node who's doing the actual deleting should handle it | 767 | * the node who's doing the actual deleting should handle it |
765 | * anyway. */ | 768 | * anyway. */ |
766 | if (current == osb->vote_task) { | 769 | if (current == osb->dc_task) { |
767 | mlog(0, "Skipping delete of %lu because we're currently " | 770 | mlog(0, "Skipping delete of %lu because we're currently " |
768 | "in process_vote\n", inode->i_ino); | 771 | "in downconvert\n", inode->i_ino); |
769 | goto bail; | 772 | goto bail; |
770 | } | 773 | } |
771 | 774 | ||
@@ -779,10 +782,9 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
779 | goto bail_unlock; | 782 | goto bail_unlock; |
780 | } | 783 | } |
781 | 784 | ||
782 | /* If we have voted "yes" on the wipe of this inode for | 785 | /* If we have allowd wipe of this inode for another node, it |
783 | * another node, it will be marked here so we can safely skip | 786 | * will be marked here so we can safely skip it. Recovery will |
784 | * it. Recovery will cleanup any inodes we might inadvertantly | 787 | * cleanup any inodes we might inadvertantly skip here. */ |
785 | * skip here. */ | ||
786 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { | 788 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { |
787 | mlog(0, "Skipping delete of %lu because another node " | 789 | mlog(0, "Skipping delete of %lu because another node " |
788 | "has done this for us.\n", inode->i_ino); | 790 | "has done this for us.\n", inode->i_ino); |
@@ -929,13 +931,13 @@ void ocfs2_delete_inode(struct inode *inode) | |||
929 | 931 | ||
930 | /* Lock down the inode. This gives us an up to date view of | 932 | /* Lock down the inode. This gives us an up to date view of |
931 | * it's metadata (for verification), and allows us to | 933 | * it's metadata (for verification), and allows us to |
932 | * serialize delete_inode votes. | 934 | * serialize delete_inode on multiple nodes. |
933 | * | 935 | * |
934 | * Even though we might be doing a truncate, we don't take the | 936 | * Even though we might be doing a truncate, we don't take the |
935 | * allocation lock here as it won't be needed - nobody will | 937 | * allocation lock here as it won't be needed - nobody will |
936 | * have the file open. | 938 | * have the file open. |
937 | */ | 939 | */ |
938 | status = ocfs2_meta_lock(inode, &di_bh, 1); | 940 | status = ocfs2_inode_lock(inode, &di_bh, 1); |
939 | if (status < 0) { | 941 | if (status < 0) { |
940 | if (status != -ENOENT) | 942 | if (status != -ENOENT) |
941 | mlog_errno(status); | 943 | mlog_errno(status); |
@@ -947,15 +949,15 @@ void ocfs2_delete_inode(struct inode *inode) | |||
947 | * before we go ahead and wipe the inode. */ | 949 | * before we go ahead and wipe the inode. */ |
948 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); | 950 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); |
949 | if (!wipe || status < 0) { | 951 | if (!wipe || status < 0) { |
950 | /* Error and inode busy vote both mean we won't be | 952 | /* Error and remote inode busy both mean we won't be |
951 | * removing the inode, so they take almost the same | 953 | * removing the inode, so they take almost the same |
952 | * path. */ | 954 | * path. */ |
953 | if (status < 0) | 955 | if (status < 0) |
954 | mlog_errno(status); | 956 | mlog_errno(status); |
955 | 957 | ||
956 | /* Someone in the cluster has voted to not wipe this | 958 | /* Someone in the cluster has disallowed a wipe of |
957 | * inode, or it was never completely orphaned. Write | 959 | * this inode, or it was never completely |
958 | * out the pages and exit now. */ | 960 | * orphaned. Write out the pages and exit now. */ |
959 | ocfs2_cleanup_delete_inode(inode, 1); | 961 | ocfs2_cleanup_delete_inode(inode, 1); |
960 | goto bail_unlock_inode; | 962 | goto bail_unlock_inode; |
961 | } | 963 | } |
@@ -981,7 +983,7 @@ void ocfs2_delete_inode(struct inode *inode) | |||
981 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; | 983 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; |
982 | 984 | ||
983 | bail_unlock_inode: | 985 | bail_unlock_inode: |
984 | ocfs2_meta_unlock(inode, 1); | 986 | ocfs2_inode_unlock(inode, 1); |
985 | brelse(di_bh); | 987 | brelse(di_bh); |
986 | bail_unblock: | 988 | bail_unblock: |
987 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); | 989 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); |
@@ -1008,15 +1010,14 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1008 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 1010 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, |
1009 | "Inode=%lu\n", inode->i_ino); | 1011 | "Inode=%lu\n", inode->i_ino); |
1010 | 1012 | ||
1011 | /* For remove delete_inode vote, we hold open lock before, | 1013 | /* To preven remote deletes we hold open lock before, now it |
1012 | * now it is time to unlock PR and EX open locks. */ | 1014 | * is time to unlock PR and EX open locks. */ |
1013 | ocfs2_open_unlock(inode); | 1015 | ocfs2_open_unlock(inode); |
1014 | 1016 | ||
1015 | /* Do these before all the other work so that we don't bounce | 1017 | /* Do these before all the other work so that we don't bounce |
1016 | * the vote thread while waiting to destroy the locks. */ | 1018 | * the downconvert thread while waiting to destroy the locks. */ |
1017 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 1019 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); |
1018 | ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); | 1020 | ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); |
1019 | ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); | ||
1020 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); | 1021 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); |
1021 | 1022 | ||
1022 | /* We very well may get a clear_inode before all an inodes | 1023 | /* We very well may get a clear_inode before all an inodes |
@@ -1039,8 +1040,7 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1039 | mlog_errno(status); | 1040 | mlog_errno(status); |
1040 | 1041 | ||
1041 | ocfs2_lock_res_free(&oi->ip_rw_lockres); | 1042 | ocfs2_lock_res_free(&oi->ip_rw_lockres); |
1042 | ocfs2_lock_res_free(&oi->ip_meta_lockres); | 1043 | ocfs2_lock_res_free(&oi->ip_inode_lockres); |
1043 | ocfs2_lock_res_free(&oi->ip_data_lockres); | ||
1044 | ocfs2_lock_res_free(&oi->ip_open_lockres); | 1044 | ocfs2_lock_res_free(&oi->ip_open_lockres); |
1045 | 1045 | ||
1046 | ocfs2_metadata_cache_purge(inode); | 1046 | ocfs2_metadata_cache_purge(inode); |
@@ -1184,15 +1184,15 @@ int ocfs2_inode_revalidate(struct dentry *dentry) | |||
1184 | } | 1184 | } |
1185 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 1185 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
1186 | 1186 | ||
1187 | /* Let ocfs2_meta_lock do the work of updating our struct | 1187 | /* Let ocfs2_inode_lock do the work of updating our struct |
1188 | * inode for us. */ | 1188 | * inode for us. */ |
1189 | status = ocfs2_meta_lock(inode, NULL, 0); | 1189 | status = ocfs2_inode_lock(inode, NULL, 0); |
1190 | if (status < 0) { | 1190 | if (status < 0) { |
1191 | if (status != -ENOENT) | 1191 | if (status != -ENOENT) |
1192 | mlog_errno(status); | 1192 | mlog_errno(status); |
1193 | goto bail; | 1193 | goto bail; |
1194 | } | 1194 | } |
1195 | ocfs2_meta_unlock(inode, 0); | 1195 | ocfs2_inode_unlock(inode, 0); |
1196 | bail: | 1196 | bail: |
1197 | mlog_exit(status); | 1197 | mlog_exit(status); |
1198 | 1198 | ||
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 70e881c55536..390a85596aa0 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -34,8 +34,7 @@ struct ocfs2_inode_info | |||
34 | u64 ip_blkno; | 34 | u64 ip_blkno; |
35 | 35 | ||
36 | struct ocfs2_lock_res ip_rw_lockres; | 36 | struct ocfs2_lock_res ip_rw_lockres; |
37 | struct ocfs2_lock_res ip_meta_lockres; | 37 | struct ocfs2_lock_res ip_inode_lockres; |
38 | struct ocfs2_lock_res ip_data_lockres; | ||
39 | struct ocfs2_lock_res ip_open_lockres; | 38 | struct ocfs2_lock_res ip_open_lockres; |
40 | 39 | ||
41 | /* protects allocation changes on this inode. */ | 40 | /* protects allocation changes on this inode. */ |
@@ -121,9 +120,10 @@ void ocfs2_delete_inode(struct inode *inode); | |||
121 | void ocfs2_drop_inode(struct inode *inode); | 120 | void ocfs2_drop_inode(struct inode *inode); |
122 | 121 | ||
123 | /* Flags for ocfs2_iget() */ | 122 | /* Flags for ocfs2_iget() */ |
124 | #define OCFS2_FI_FLAG_SYSFILE 0x4 | 123 | #define OCFS2_FI_FLAG_SYSFILE 0x1 |
125 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x8 | 124 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2 |
126 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags); | 125 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, |
126 | int sysfile_type); | ||
127 | int ocfs2_inode_init_private(struct inode *inode); | 127 | int ocfs2_inode_init_private(struct inode *inode); |
128 | int ocfs2_inode_revalidate(struct dentry *dentry); | 128 | int ocfs2_inode_revalidate(struct dentry *dentry); |
129 | int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | 129 | int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 87dcece7e1b5..5177fba5162b 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -20,6 +20,7 @@ | |||
20 | 20 | ||
21 | #include "ocfs2_fs.h" | 21 | #include "ocfs2_fs.h" |
22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
23 | #include "resize.h" | ||
23 | 24 | ||
24 | #include <linux/ext2_fs.h> | 25 | #include <linux/ext2_fs.h> |
25 | 26 | ||
@@ -27,14 +28,14 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | |||
27 | { | 28 | { |
28 | int status; | 29 | int status; |
29 | 30 | ||
30 | status = ocfs2_meta_lock(inode, NULL, 0); | 31 | status = ocfs2_inode_lock(inode, NULL, 0); |
31 | if (status < 0) { | 32 | if (status < 0) { |
32 | mlog_errno(status); | 33 | mlog_errno(status); |
33 | return status; | 34 | return status; |
34 | } | 35 | } |
35 | ocfs2_get_inode_flags(OCFS2_I(inode)); | 36 | ocfs2_get_inode_flags(OCFS2_I(inode)); |
36 | *flags = OCFS2_I(inode)->ip_attr; | 37 | *flags = OCFS2_I(inode)->ip_attr; |
37 | ocfs2_meta_unlock(inode, 0); | 38 | ocfs2_inode_unlock(inode, 0); |
38 | 39 | ||
39 | mlog_exit(status); | 40 | mlog_exit(status); |
40 | return status; | 41 | return status; |
@@ -52,7 +53,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
52 | 53 | ||
53 | mutex_lock(&inode->i_mutex); | 54 | mutex_lock(&inode->i_mutex); |
54 | 55 | ||
55 | status = ocfs2_meta_lock(inode, &bh, 1); | 56 | status = ocfs2_inode_lock(inode, &bh, 1); |
56 | if (status < 0) { | 57 | if (status < 0) { |
57 | mlog_errno(status); | 58 | mlog_errno(status); |
58 | goto bail; | 59 | goto bail; |
@@ -100,7 +101,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
100 | 101 | ||
101 | ocfs2_commit_trans(osb, handle); | 102 | ocfs2_commit_trans(osb, handle); |
102 | bail_unlock: | 103 | bail_unlock: |
103 | ocfs2_meta_unlock(inode, 1); | 104 | ocfs2_inode_unlock(inode, 1); |
104 | bail: | 105 | bail: |
105 | mutex_unlock(&inode->i_mutex); | 106 | mutex_unlock(&inode->i_mutex); |
106 | 107 | ||
@@ -115,8 +116,10 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
115 | unsigned int cmd, unsigned long arg) | 116 | unsigned int cmd, unsigned long arg) |
116 | { | 117 | { |
117 | unsigned int flags; | 118 | unsigned int flags; |
119 | int new_clusters; | ||
118 | int status; | 120 | int status; |
119 | struct ocfs2_space_resv sr; | 121 | struct ocfs2_space_resv sr; |
122 | struct ocfs2_new_group_input input; | ||
120 | 123 | ||
121 | switch (cmd) { | 124 | switch (cmd) { |
122 | case OCFS2_IOC_GETFLAGS: | 125 | case OCFS2_IOC_GETFLAGS: |
@@ -140,6 +143,23 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
140 | return -EFAULT; | 143 | return -EFAULT; |
141 | 144 | ||
142 | return ocfs2_change_file_space(filp, cmd, &sr); | 145 | return ocfs2_change_file_space(filp, cmd, &sr); |
146 | case OCFS2_IOC_GROUP_EXTEND: | ||
147 | if (!capable(CAP_SYS_RESOURCE)) | ||
148 | return -EPERM; | ||
149 | |||
150 | if (get_user(new_clusters, (int __user *)arg)) | ||
151 | return -EFAULT; | ||
152 | |||
153 | return ocfs2_group_extend(inode, new_clusters); | ||
154 | case OCFS2_IOC_GROUP_ADD: | ||
155 | case OCFS2_IOC_GROUP_ADD64: | ||
156 | if (!capable(CAP_SYS_RESOURCE)) | ||
157 | return -EPERM; | ||
158 | |||
159 | if (copy_from_user(&input, (int __user *) arg, sizeof(input))) | ||
160 | return -EFAULT; | ||
161 | |||
162 | return ocfs2_group_add(inode, &input); | ||
143 | default: | 163 | default: |
144 | return -ENOTTY; | 164 | return -ENOTTY; |
145 | } | 165 | } |
@@ -162,6 +182,9 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
162 | case OCFS2_IOC_RESVSP64: | 182 | case OCFS2_IOC_RESVSP64: |
163 | case OCFS2_IOC_UNRESVSP: | 183 | case OCFS2_IOC_UNRESVSP: |
164 | case OCFS2_IOC_UNRESVSP64: | 184 | case OCFS2_IOC_UNRESVSP64: |
185 | case OCFS2_IOC_GROUP_EXTEND: | ||
186 | case OCFS2_IOC_GROUP_ADD: | ||
187 | case OCFS2_IOC_GROUP_ADD64: | ||
165 | break; | 188 | break; |
166 | default: | 189 | default: |
167 | return -ENOIOCTLCMD; | 190 | return -ENOIOCTLCMD; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 8d81f6c1b877..f31c7e8c19c3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include "localalloc.h" | 44 | #include "localalloc.h" |
45 | #include "slot_map.h" | 45 | #include "slot_map.h" |
46 | #include "super.h" | 46 | #include "super.h" |
47 | #include "vote.h" | ||
48 | #include "sysfile.h" | 47 | #include "sysfile.h" |
49 | 48 | ||
50 | #include "buffer_head_io.h" | 49 | #include "buffer_head_io.h" |
@@ -103,7 +102,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
103 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", | 102 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", |
104 | journal->j_trans_id, flushed); | 103 | journal->j_trans_id, flushed); |
105 | 104 | ||
106 | ocfs2_kick_vote_thread(osb); | 105 | ocfs2_wake_downconvert_thread(osb); |
107 | wake_up(&journal->j_checkpointed); | 106 | wake_up(&journal->j_checkpointed); |
108 | finally: | 107 | finally: |
109 | mlog_exit(status); | 108 | mlog_exit(status); |
@@ -314,14 +313,18 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
314 | return err; | 313 | return err; |
315 | } | 314 | } |
316 | 315 | ||
317 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * 5) | 316 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) |
318 | 317 | ||
319 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 318 | void ocfs2_set_journal_params(struct ocfs2_super *osb) |
320 | { | 319 | { |
321 | journal_t *journal = osb->journal->j_journal; | 320 | journal_t *journal = osb->journal->j_journal; |
321 | unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; | ||
322 | |||
323 | if (osb->osb_commit_interval) | ||
324 | commit_interval = osb->osb_commit_interval; | ||
322 | 325 | ||
323 | spin_lock(&journal->j_state_lock); | 326 | spin_lock(&journal->j_state_lock); |
324 | journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; | 327 | journal->j_commit_interval = commit_interval; |
325 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 328 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
326 | journal->j_flags |= JFS_BARRIER; | 329 | journal->j_flags |= JFS_BARRIER; |
327 | else | 330 | else |
@@ -337,7 +340,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
337 | struct ocfs2_dinode *di = NULL; | 340 | struct ocfs2_dinode *di = NULL; |
338 | struct buffer_head *bh = NULL; | 341 | struct buffer_head *bh = NULL; |
339 | struct ocfs2_super *osb; | 342 | struct ocfs2_super *osb; |
340 | int meta_lock = 0; | 343 | int inode_lock = 0; |
341 | 344 | ||
342 | mlog_entry_void(); | 345 | mlog_entry_void(); |
343 | 346 | ||
@@ -367,14 +370,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
367 | /* Skip recovery waits here - journal inode metadata never | 370 | /* Skip recovery waits here - journal inode metadata never |
368 | * changes in a live cluster so it can be considered an | 371 | * changes in a live cluster so it can be considered an |
369 | * exception to the rule. */ | 372 | * exception to the rule. */ |
370 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 373 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); |
371 | if (status < 0) { | 374 | if (status < 0) { |
372 | if (status != -ERESTARTSYS) | 375 | if (status != -ERESTARTSYS) |
373 | mlog(ML_ERROR, "Could not get lock on journal!\n"); | 376 | mlog(ML_ERROR, "Could not get lock on journal!\n"); |
374 | goto done; | 377 | goto done; |
375 | } | 378 | } |
376 | 379 | ||
377 | meta_lock = 1; | 380 | inode_lock = 1; |
378 | di = (struct ocfs2_dinode *)bh->b_data; | 381 | di = (struct ocfs2_dinode *)bh->b_data; |
379 | 382 | ||
380 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { | 383 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { |
@@ -414,8 +417,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
414 | status = 0; | 417 | status = 0; |
415 | done: | 418 | done: |
416 | if (status < 0) { | 419 | if (status < 0) { |
417 | if (meta_lock) | 420 | if (inode_lock) |
418 | ocfs2_meta_unlock(inode, 1); | 421 | ocfs2_inode_unlock(inode, 1); |
419 | if (bh != NULL) | 422 | if (bh != NULL) |
420 | brelse(bh); | 423 | brelse(bh); |
421 | if (inode) { | 424 | if (inode) { |
@@ -544,7 +547,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
544 | OCFS2_I(inode)->ip_open_count--; | 547 | OCFS2_I(inode)->ip_open_count--; |
545 | 548 | ||
546 | /* unlock our journal */ | 549 | /* unlock our journal */ |
547 | ocfs2_meta_unlock(inode, 1); | 550 | ocfs2_inode_unlock(inode, 1); |
548 | 551 | ||
549 | brelse(journal->j_bh); | 552 | brelse(journal->j_bh); |
550 | journal->j_bh = NULL; | 553 | journal->j_bh = NULL; |
@@ -883,8 +886,8 @@ restart: | |||
883 | ocfs2_super_unlock(osb, 1); | 886 | ocfs2_super_unlock(osb, 1); |
884 | 887 | ||
885 | /* We always run recovery on our own orphan dir - the dead | 888 | /* We always run recovery on our own orphan dir - the dead |
886 | * node(s) may have voted "no" on an inode delete earlier. A | 889 | * node(s) may have disallowd a previos inode delete. Re-processing |
887 | * revote is therefore required. */ | 890 | * is therefore required. */ |
888 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 891 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, |
889 | NULL); | 892 | NULL); |
890 | 893 | ||
@@ -973,9 +976,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
973 | } | 976 | } |
974 | SET_INODE_JOURNAL(inode); | 977 | SET_INODE_JOURNAL(inode); |
975 | 978 | ||
976 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 979 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); |
977 | if (status < 0) { | 980 | if (status < 0) { |
978 | mlog(0, "status returned from ocfs2_meta_lock=%d\n", status); | 981 | mlog(0, "status returned from ocfs2_inode_lock=%d\n", status); |
979 | if (status != -ERESTARTSYS) | 982 | if (status != -ERESTARTSYS) |
980 | mlog(ML_ERROR, "Could not lock journal!\n"); | 983 | mlog(ML_ERROR, "Could not lock journal!\n"); |
981 | goto done; | 984 | goto done; |
@@ -1047,7 +1050,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1047 | done: | 1050 | done: |
1048 | /* drop the lock on this nodes journal */ | 1051 | /* drop the lock on this nodes journal */ |
1049 | if (got_lock) | 1052 | if (got_lock) |
1050 | ocfs2_meta_unlock(inode, 1); | 1053 | ocfs2_inode_unlock(inode, 1); |
1051 | 1054 | ||
1052 | if (inode) | 1055 | if (inode) |
1053 | iput(inode); | 1056 | iput(inode); |
@@ -1162,14 +1165,14 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb, | |||
1162 | SET_INODE_JOURNAL(inode); | 1165 | SET_INODE_JOURNAL(inode); |
1163 | 1166 | ||
1164 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; | 1167 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; |
1165 | status = ocfs2_meta_lock_full(inode, NULL, 1, flags); | 1168 | status = ocfs2_inode_lock_full(inode, NULL, 1, flags); |
1166 | if (status < 0) { | 1169 | if (status < 0) { |
1167 | if (status != -EAGAIN) | 1170 | if (status != -EAGAIN) |
1168 | mlog_errno(status); | 1171 | mlog_errno(status); |
1169 | goto bail; | 1172 | goto bail; |
1170 | } | 1173 | } |
1171 | 1174 | ||
1172 | ocfs2_meta_unlock(inode, 1); | 1175 | ocfs2_inode_unlock(inode, 1); |
1173 | bail: | 1176 | bail: |
1174 | if (inode) | 1177 | if (inode) |
1175 | iput(inode); | 1178 | iput(inode); |
@@ -1241,7 +1244,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, | |||
1241 | 1244 | ||
1242 | /* Skip bad inodes so that recovery can continue */ | 1245 | /* Skip bad inodes so that recovery can continue */ |
1243 | iter = ocfs2_iget(p->osb, ino, | 1246 | iter = ocfs2_iget(p->osb, ino, |
1244 | OCFS2_FI_FLAG_ORPHAN_RECOVERY); | 1247 | OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0); |
1245 | if (IS_ERR(iter)) | 1248 | if (IS_ERR(iter)) |
1246 | return 0; | 1249 | return 0; |
1247 | 1250 | ||
@@ -1277,7 +1280,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
1277 | } | 1280 | } |
1278 | 1281 | ||
1279 | mutex_lock(&orphan_dir_inode->i_mutex); | 1282 | mutex_lock(&orphan_dir_inode->i_mutex); |
1280 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0); | 1283 | status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); |
1281 | if (status < 0) { | 1284 | if (status < 0) { |
1282 | mlog_errno(status); | 1285 | mlog_errno(status); |
1283 | goto out; | 1286 | goto out; |
@@ -1293,7 +1296,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
1293 | *head = priv.head; | 1296 | *head = priv.head; |
1294 | 1297 | ||
1295 | out_cluster: | 1298 | out_cluster: |
1296 | ocfs2_meta_unlock(orphan_dir_inode, 0); | 1299 | ocfs2_inode_unlock(orphan_dir_inode, 0); |
1297 | out: | 1300 | out: |
1298 | mutex_unlock(&orphan_dir_inode->i_mutex); | 1301 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1299 | iput(orphan_dir_inode); | 1302 | iput(orphan_dir_inode); |
@@ -1380,10 +1383,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1380 | iter = oi->ip_next_orphan; | 1383 | iter = oi->ip_next_orphan; |
1381 | 1384 | ||
1382 | spin_lock(&oi->ip_lock); | 1385 | spin_lock(&oi->ip_lock); |
1383 | /* Delete voting may have set these on the assumption | 1386 | /* The remote delete code may have set these on the |
1384 | * that the other node would wipe them successfully. | 1387 | * assumption that the other node would wipe them |
1385 | * If they are still in the node's orphan dir, we need | 1388 | * successfully. If they are still in the node's |
1386 | * to reset that state. */ | 1389 | * orphan dir, we need to reset that state. */ |
1387 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | 1390 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); |
1388 | 1391 | ||
1389 | /* Set the proper information to get us going into | 1392 | /* Set the proper information to get us going into |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 4b32e0961568..220f3e818e78 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -278,6 +278,12 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
278 | /* simple file updates like chmod, etc. */ | 278 | /* simple file updates like chmod, etc. */ |
279 | #define OCFS2_INODE_UPDATE_CREDITS 1 | 279 | #define OCFS2_INODE_UPDATE_CREDITS 1 |
280 | 280 | ||
281 | /* group extend. inode update and last group update. */ | ||
282 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
283 | |||
284 | /* group add. inode update and the new group update. */ | ||
285 | #define OCFS2_GROUP_ADD_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
286 | |||
281 | /* get one bit out of a suballocator: dinode + group descriptor + | 287 | /* get one bit out of a suballocator: dinode + group descriptor + |
282 | * prev. group desc. if we relink. */ | 288 | * prev. group desc. if we relink. */ |
283 | #define OCFS2_SUBALLOC_ALLOC (3) | 289 | #define OCFS2_SUBALLOC_ALLOC (3) |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 58ea88b5af36..add1ffdc5c6c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -75,18 +75,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
76 | struct inode *local_alloc_inode); | 76 | struct inode *local_alloc_inode); |
77 | 77 | ||
78 | /* | ||
79 | * Determine how large our local alloc window should be, in bits. | ||
80 | * | ||
81 | * These values (and the behavior in ocfs2_alloc_should_use_local) have | ||
82 | * been chosen so that most allocations, including new block groups go | ||
83 | * through local alloc. | ||
84 | */ | ||
85 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 78 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) |
86 | { | 79 | { |
87 | BUG_ON(osb->s_clustersize_bits < 12); | 80 | BUG_ON(osb->s_clustersize_bits > 20); |
88 | 81 | ||
89 | return 2048 >> (osb->s_clustersize_bits - 12); | 82 | /* Size local alloc windows by the megabyte */ |
83 | return osb->local_alloc_size << (20 - osb->s_clustersize_bits); | ||
90 | } | 84 | } |
91 | 85 | ||
92 | /* | 86 | /* |
@@ -96,18 +90,23 @@ static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | |||
96 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 90 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) |
97 | { | 91 | { |
98 | int la_bits = ocfs2_local_alloc_window_bits(osb); | 92 | int la_bits = ocfs2_local_alloc_window_bits(osb); |
93 | int ret = 0; | ||
99 | 94 | ||
100 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 95 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) |
101 | return 0; | 96 | goto bail; |
102 | 97 | ||
103 | /* la_bits should be at least twice the size (in clusters) of | 98 | /* la_bits should be at least twice the size (in clusters) of |
104 | * a new block group. We want to be sure block group | 99 | * a new block group. We want to be sure block group |
105 | * allocations go through the local alloc, so allow an | 100 | * allocations go through the local alloc, so allow an |
106 | * allocation to take up to half the bitmap. */ | 101 | * allocation to take up to half the bitmap. */ |
107 | if (bits > (la_bits / 2)) | 102 | if (bits > (la_bits / 2)) |
108 | return 0; | 103 | goto bail; |
109 | 104 | ||
110 | return 1; | 105 | ret = 1; |
106 | bail: | ||
107 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | ||
108 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | ||
109 | return ret; | ||
111 | } | 110 | } |
112 | 111 | ||
113 | int ocfs2_load_local_alloc(struct ocfs2_super *osb) | 112 | int ocfs2_load_local_alloc(struct ocfs2_super *osb) |
@@ -121,6 +120,19 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
121 | 120 | ||
122 | mlog_entry_void(); | 121 | mlog_entry_void(); |
123 | 122 | ||
123 | if (ocfs2_mount_local(osb)) | ||
124 | goto bail; | ||
125 | |||
126 | if (osb->local_alloc_size == 0) | ||
127 | goto bail; | ||
128 | |||
129 | if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { | ||
130 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " | ||
131 | "than max possible %u. Using defaults.\n", | ||
132 | ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); | ||
133 | osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | ||
134 | } | ||
135 | |||
124 | /* read the alloc off disk */ | 136 | /* read the alloc off disk */ |
125 | inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, | 137 | inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, |
126 | osb->slot_num); | 138 | osb->slot_num); |
@@ -181,6 +193,9 @@ bail: | |||
181 | if (inode) | 193 | if (inode) |
182 | iput(inode); | 194 | iput(inode); |
183 | 195 | ||
196 | mlog(0, "Local alloc window bits = %d\n", | ||
197 | ocfs2_local_alloc_window_bits(osb)); | ||
198 | |||
184 | mlog_exit(status); | 199 | mlog_exit(status); |
185 | return status; | 200 | return status; |
186 | } | 201 | } |
@@ -231,7 +246,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
231 | 246 | ||
232 | mutex_lock(&main_bm_inode->i_mutex); | 247 | mutex_lock(&main_bm_inode->i_mutex); |
233 | 248 | ||
234 | status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1); | 249 | status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); |
235 | if (status < 0) { | 250 | if (status < 0) { |
236 | mlog_errno(status); | 251 | mlog_errno(status); |
237 | goto out_mutex; | 252 | goto out_mutex; |
@@ -286,7 +301,7 @@ out_unlock: | |||
286 | if (main_bm_bh) | 301 | if (main_bm_bh) |
287 | brelse(main_bm_bh); | 302 | brelse(main_bm_bh); |
288 | 303 | ||
289 | ocfs2_meta_unlock(main_bm_inode, 1); | 304 | ocfs2_inode_unlock(main_bm_inode, 1); |
290 | 305 | ||
291 | out_mutex: | 306 | out_mutex: |
292 | mutex_unlock(&main_bm_inode->i_mutex); | 307 | mutex_unlock(&main_bm_inode->i_mutex); |
@@ -399,7 +414,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, | |||
399 | 414 | ||
400 | mutex_lock(&main_bm_inode->i_mutex); | 415 | mutex_lock(&main_bm_inode->i_mutex); |
401 | 416 | ||
402 | status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1); | 417 | status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); |
403 | if (status < 0) { | 418 | if (status < 0) { |
404 | mlog_errno(status); | 419 | mlog_errno(status); |
405 | goto out_mutex; | 420 | goto out_mutex; |
@@ -424,7 +439,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, | |||
424 | ocfs2_commit_trans(osb, handle); | 439 | ocfs2_commit_trans(osb, handle); |
425 | 440 | ||
426 | out_unlock: | 441 | out_unlock: |
427 | ocfs2_meta_unlock(main_bm_inode, 1); | 442 | ocfs2_inode_unlock(main_bm_inode, 1); |
428 | 443 | ||
429 | out_mutex: | 444 | out_mutex: |
430 | mutex_unlock(&main_bm_inode->i_mutex); | 445 | mutex_unlock(&main_bm_inode->i_mutex); |
@@ -521,6 +536,9 @@ bail: | |||
521 | iput(local_alloc_inode); | 536 | iput(local_alloc_inode); |
522 | } | 537 | } |
523 | 538 | ||
539 | mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, | ||
540 | status); | ||
541 | |||
524 | mlog_exit(status); | 542 | mlog_exit(status); |
525 | return status; | 543 | return status; |
526 | } | 544 | } |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c new file mode 100644 index 000000000000..203f87143877 --- /dev/null +++ b/fs/ocfs2/locks.c | |||
@@ -0,0 +1,125 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * locks.c | ||
5 | * | ||
6 | * Userspace file locking support | ||
7 | * | ||
8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #define MLOG_MASK_PREFIX ML_INODE | ||
29 | #include <cluster/masklog.h> | ||
30 | |||
31 | #include "ocfs2.h" | ||
32 | |||
33 | #include "dlmglue.h" | ||
34 | #include "file.h" | ||
35 | #include "locks.h" | ||
36 | |||
37 | static int ocfs2_do_flock(struct file *file, struct inode *inode, | ||
38 | int cmd, struct file_lock *fl) | ||
39 | { | ||
40 | int ret = 0, level = 0, trylock = 0; | ||
41 | struct ocfs2_file_private *fp = file->private_data; | ||
42 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
43 | |||
44 | if (fl->fl_type == F_WRLCK) | ||
45 | level = 1; | ||
46 | if (!IS_SETLKW(cmd)) | ||
47 | trylock = 1; | ||
48 | |||
49 | mutex_lock(&fp->fp_mutex); | ||
50 | |||
51 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | ||
52 | lockres->l_level > LKM_NLMODE) { | ||
53 | int old_level = 0; | ||
54 | |||
55 | if (lockres->l_level == LKM_EXMODE) | ||
56 | old_level = 1; | ||
57 | |||
58 | if (level == old_level) | ||
59 | goto out; | ||
60 | |||
61 | /* | ||
62 | * Converting an existing lock is not guaranteed to be | ||
63 | * atomic, so we can get away with simply unlocking | ||
64 | * here and allowing the lock code to try at the new | ||
65 | * level. | ||
66 | */ | ||
67 | |||
68 | flock_lock_file_wait(file, | ||
69 | &(struct file_lock){.fl_type = F_UNLCK}); | ||
70 | |||
71 | ocfs2_file_unlock(file); | ||
72 | } | ||
73 | |||
74 | ret = ocfs2_file_lock(file, level, trylock); | ||
75 | if (ret) { | ||
76 | if (ret == -EAGAIN && trylock) | ||
77 | ret = -EWOULDBLOCK; | ||
78 | else | ||
79 | mlog_errno(ret); | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | ret = flock_lock_file_wait(file, fl); | ||
84 | |||
85 | out: | ||
86 | mutex_unlock(&fp->fp_mutex); | ||
87 | |||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | static int ocfs2_do_funlock(struct file *file, int cmd, struct file_lock *fl) | ||
92 | { | ||
93 | int ret; | ||
94 | struct ocfs2_file_private *fp = file->private_data; | ||
95 | |||
96 | mutex_lock(&fp->fp_mutex); | ||
97 | ocfs2_file_unlock(file); | ||
98 | ret = flock_lock_file_wait(file, fl); | ||
99 | mutex_unlock(&fp->fp_mutex); | ||
100 | |||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Overall flow of ocfs2_flock() was influenced by gfs2_flock(). | ||
106 | */ | ||
107 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) | ||
108 | { | ||
109 | struct inode *inode = file->f_mapping->host; | ||
110 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
111 | |||
112 | if (!(fl->fl_flags & FL_FLOCK)) | ||
113 | return -ENOLCK; | ||
114 | if (__mandatory_lock(inode)) | ||
115 | return -ENOLCK; | ||
116 | |||
117 | if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || | ||
118 | ocfs2_mount_local(osb)) | ||
119 | return flock_lock_file_wait(file, fl); | ||
120 | |||
121 | if (fl->fl_type == F_UNLCK) | ||
122 | return ocfs2_do_funlock(file, cmd, fl); | ||
123 | else | ||
124 | return ocfs2_do_flock(file, inode, cmd, fl); | ||
125 | } | ||
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/locks.h index 9ea46f62de31..9743ef2324ec 100644 --- a/fs/ocfs2/vote.h +++ b/fs/ocfs2/locks.h | |||
@@ -1,9 +1,9 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * vote.h | 4 | * locks.h |
5 | * | 5 | * |
6 | * description here | 6 | * Function prototypes for Userspace file locking support |
7 | * | 7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
@@ -23,26 +23,9 @@ | |||
23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #ifndef OCFS2_LOCKS_H | ||
27 | #define OCFS2_LOCKS_H | ||
26 | 28 | ||
27 | #ifndef VOTE_H | 29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); |
28 | #define VOTE_H | ||
29 | 30 | ||
30 | int ocfs2_vote_thread(void *arg); | 31 | #endif /* OCFS2_LOCKS_H */ |
31 | static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb) | ||
32 | { | ||
33 | spin_lock(&osb->vote_task_lock); | ||
34 | /* make sure the voting thread gets a swipe at whatever changes | ||
35 | * the caller may have made to the voting state */ | ||
36 | osb->vote_wake_sequence++; | ||
37 | spin_unlock(&osb->vote_task_lock); | ||
38 | wake_up(&osb->vote_event); | ||
39 | } | ||
40 | |||
41 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); | ||
42 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); | ||
43 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); | ||
44 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb); | ||
45 | |||
46 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
47 | int node_num); | ||
48 | #endif | ||
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 98756156d298..3dc18d67557c 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -168,7 +168,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
168 | * node. Taking the data lock will also ensure that we don't | 168 | * node. Taking the data lock will also ensure that we don't |
169 | * attempt page truncation as part of a downconvert. | 169 | * attempt page truncation as part of a downconvert. |
170 | */ | 170 | */ |
171 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 171 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
172 | if (ret < 0) { | 172 | if (ret < 0) { |
173 | mlog_errno(ret); | 173 | mlog_errno(ret); |
174 | goto out; | 174 | goto out; |
@@ -181,21 +181,12 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
181 | */ | 181 | */ |
182 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 182 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
183 | 183 | ||
184 | ret = ocfs2_data_lock(inode, 1); | ||
185 | if (ret < 0) { | ||
186 | mlog_errno(ret); | ||
187 | goto out_meta_unlock; | ||
188 | } | ||
189 | |||
190 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 184 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); |
191 | 185 | ||
192 | ocfs2_data_unlock(inode, 1); | ||
193 | |||
194 | out_meta_unlock: | ||
195 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 186 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
196 | 187 | ||
197 | brelse(di_bh); | 188 | brelse(di_bh); |
198 | ocfs2_meta_unlock(inode, 1); | 189 | ocfs2_inode_unlock(inode, 1); |
199 | 190 | ||
200 | out: | 191 | out: |
201 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | 192 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); |
@@ -214,13 +205,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
214 | { | 205 | { |
215 | int ret = 0, lock_level = 0; | 206 | int ret = 0, lock_level = 0; |
216 | 207 | ||
217 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, | 208 | ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode, |
218 | file->f_vfsmnt, &lock_level); | 209 | file->f_vfsmnt, &lock_level); |
219 | if (ret < 0) { | 210 | if (ret < 0) { |
220 | mlog_errno(ret); | 211 | mlog_errno(ret); |
221 | goto out; | 212 | goto out; |
222 | } | 213 | } |
223 | ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); | 214 | ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); |
224 | out: | 215 | out: |
225 | vma->vm_ops = &ocfs2_file_vm_ops; | 216 | vma->vm_ops = &ocfs2_file_vm_ops; |
226 | vma->vm_flags |= VM_CAN_NONLINEAR; | 217 | vma->vm_flags |= VM_CAN_NONLINEAR; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 989ac2718587..ae9ad9587516 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -60,7 +60,6 @@ | |||
60 | #include "symlink.h" | 60 | #include "symlink.h" |
61 | #include "sysfile.h" | 61 | #include "sysfile.h" |
62 | #include "uptodate.h" | 62 | #include "uptodate.h" |
63 | #include "vote.h" | ||
64 | 63 | ||
65 | #include "buffer_head_io.h" | 64 | #include "buffer_head_io.h" |
66 | 65 | ||
@@ -116,7 +115,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
116 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, | 115 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, |
117 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 116 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); |
118 | 117 | ||
119 | status = ocfs2_meta_lock(dir, NULL, 0); | 118 | status = ocfs2_inode_lock(dir, NULL, 0); |
120 | if (status < 0) { | 119 | if (status < 0) { |
121 | if (status != -ENOENT) | 120 | if (status != -ENOENT) |
122 | mlog_errno(status); | 121 | mlog_errno(status); |
@@ -129,7 +128,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
129 | if (status < 0) | 128 | if (status < 0) |
130 | goto bail_add; | 129 | goto bail_add; |
131 | 130 | ||
132 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); | 131 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); |
133 | if (IS_ERR(inode)) { | 132 | if (IS_ERR(inode)) { |
134 | ret = ERR_PTR(-EACCES); | 133 | ret = ERR_PTR(-EACCES); |
135 | goto bail_unlock; | 134 | goto bail_unlock; |
@@ -176,8 +175,8 @@ bail_unlock: | |||
176 | /* Don't drop the cluster lock until *after* the d_add -- | 175 | /* Don't drop the cluster lock until *after* the d_add -- |
177 | * unlink on another node will message us to remove that | 176 | * unlink on another node will message us to remove that |
178 | * dentry under this lock so otherwise we can race this with | 177 | * dentry under this lock so otherwise we can race this with |
179 | * the vote thread and have a stale dentry. */ | 178 | * the downconvert thread and have a stale dentry. */ |
180 | ocfs2_meta_unlock(dir, 0); | 179 | ocfs2_inode_unlock(dir, 0); |
181 | 180 | ||
182 | bail: | 181 | bail: |
183 | 182 | ||
@@ -209,7 +208,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
209 | /* get our super block */ | 208 | /* get our super block */ |
210 | osb = OCFS2_SB(dir->i_sb); | 209 | osb = OCFS2_SB(dir->i_sb); |
211 | 210 | ||
212 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 211 | status = ocfs2_inode_lock(dir, &parent_fe_bh, 1); |
213 | if (status < 0) { | 212 | if (status < 0) { |
214 | if (status != -ENOENT) | 213 | if (status != -ENOENT) |
215 | mlog_errno(status); | 214 | mlog_errno(status); |
@@ -323,7 +322,7 @@ leave: | |||
323 | if (handle) | 322 | if (handle) |
324 | ocfs2_commit_trans(osb, handle); | 323 | ocfs2_commit_trans(osb, handle); |
325 | 324 | ||
326 | ocfs2_meta_unlock(dir, 1); | 325 | ocfs2_inode_unlock(dir, 1); |
327 | 326 | ||
328 | if (status == -ENOSPC) | 327 | if (status == -ENOSPC) |
329 | mlog(0, "Disk is full\n"); | 328 | mlog(0, "Disk is full\n"); |
@@ -553,7 +552,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
553 | if (S_ISDIR(inode->i_mode)) | 552 | if (S_ISDIR(inode->i_mode)) |
554 | return -EPERM; | 553 | return -EPERM; |
555 | 554 | ||
556 | err = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 555 | err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); |
557 | if (err < 0) { | 556 | if (err < 0) { |
558 | if (err != -ENOENT) | 557 | if (err != -ENOENT) |
559 | mlog_errno(err); | 558 | mlog_errno(err); |
@@ -578,7 +577,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
578 | goto out; | 577 | goto out; |
579 | } | 578 | } |
580 | 579 | ||
581 | err = ocfs2_meta_lock(inode, &fe_bh, 1); | 580 | err = ocfs2_inode_lock(inode, &fe_bh, 1); |
582 | if (err < 0) { | 581 | if (err < 0) { |
583 | if (err != -ENOENT) | 582 | if (err != -ENOENT) |
584 | mlog_errno(err); | 583 | mlog_errno(err); |
@@ -643,10 +642,10 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
643 | out_commit: | 642 | out_commit: |
644 | ocfs2_commit_trans(osb, handle); | 643 | ocfs2_commit_trans(osb, handle); |
645 | out_unlock_inode: | 644 | out_unlock_inode: |
646 | ocfs2_meta_unlock(inode, 1); | 645 | ocfs2_inode_unlock(inode, 1); |
647 | 646 | ||
648 | out: | 647 | out: |
649 | ocfs2_meta_unlock(dir, 1); | 648 | ocfs2_inode_unlock(dir, 1); |
650 | 649 | ||
651 | if (de_bh) | 650 | if (de_bh) |
652 | brelse(de_bh); | 651 | brelse(de_bh); |
@@ -720,7 +719,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
720 | return -EPERM; | 719 | return -EPERM; |
721 | } | 720 | } |
722 | 721 | ||
723 | status = ocfs2_meta_lock(dir, &parent_node_bh, 1); | 722 | status = ocfs2_inode_lock(dir, &parent_node_bh, 1); |
724 | if (status < 0) { | 723 | if (status < 0) { |
725 | if (status != -ENOENT) | 724 | if (status != -ENOENT) |
726 | mlog_errno(status); | 725 | mlog_errno(status); |
@@ -745,7 +744,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
745 | goto leave; | 744 | goto leave; |
746 | } | 745 | } |
747 | 746 | ||
748 | status = ocfs2_meta_lock(inode, &fe_bh, 1); | 747 | status = ocfs2_inode_lock(inode, &fe_bh, 1); |
749 | if (status < 0) { | 748 | if (status < 0) { |
750 | if (status != -ENOENT) | 749 | if (status != -ENOENT) |
751 | mlog_errno(status); | 750 | mlog_errno(status); |
@@ -765,7 +764,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
765 | 764 | ||
766 | status = ocfs2_remote_dentry_delete(dentry); | 765 | status = ocfs2_remote_dentry_delete(dentry); |
767 | if (status < 0) { | 766 | if (status < 0) { |
768 | /* This vote should succeed under all normal | 767 | /* This remote delete should succeed under all normal |
769 | * circumstances. */ | 768 | * circumstances. */ |
770 | mlog_errno(status); | 769 | mlog_errno(status); |
771 | goto leave; | 770 | goto leave; |
@@ -841,13 +840,13 @@ leave: | |||
841 | ocfs2_commit_trans(osb, handle); | 840 | ocfs2_commit_trans(osb, handle); |
842 | 841 | ||
843 | if (child_locked) | 842 | if (child_locked) |
844 | ocfs2_meta_unlock(inode, 1); | 843 | ocfs2_inode_unlock(inode, 1); |
845 | 844 | ||
846 | ocfs2_meta_unlock(dir, 1); | 845 | ocfs2_inode_unlock(dir, 1); |
847 | 846 | ||
848 | if (orphan_dir) { | 847 | if (orphan_dir) { |
849 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 848 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ |
850 | ocfs2_meta_unlock(orphan_dir, 1); | 849 | ocfs2_inode_unlock(orphan_dir, 1); |
851 | mutex_unlock(&orphan_dir->i_mutex); | 850 | mutex_unlock(&orphan_dir->i_mutex); |
852 | iput(orphan_dir); | 851 | iput(orphan_dir); |
853 | } | 852 | } |
@@ -908,7 +907,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
908 | inode1 = tmpinode; | 907 | inode1 = tmpinode; |
909 | } | 908 | } |
910 | /* lock id2 */ | 909 | /* lock id2 */ |
911 | status = ocfs2_meta_lock(inode2, bh2, 1); | 910 | status = ocfs2_inode_lock(inode2, bh2, 1); |
912 | if (status < 0) { | 911 | if (status < 0) { |
913 | if (status != -ENOENT) | 912 | if (status != -ENOENT) |
914 | mlog_errno(status); | 913 | mlog_errno(status); |
@@ -917,14 +916,14 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
917 | } | 916 | } |
918 | 917 | ||
919 | /* lock id1 */ | 918 | /* lock id1 */ |
920 | status = ocfs2_meta_lock(inode1, bh1, 1); | 919 | status = ocfs2_inode_lock(inode1, bh1, 1); |
921 | if (status < 0) { | 920 | if (status < 0) { |
922 | /* | 921 | /* |
923 | * An error return must mean that no cluster locks | 922 | * An error return must mean that no cluster locks |
924 | * were held on function exit. | 923 | * were held on function exit. |
925 | */ | 924 | */ |
926 | if (oi1->ip_blkno != oi2->ip_blkno) | 925 | if (oi1->ip_blkno != oi2->ip_blkno) |
927 | ocfs2_meta_unlock(inode2, 1); | 926 | ocfs2_inode_unlock(inode2, 1); |
928 | 927 | ||
929 | if (status != -ENOENT) | 928 | if (status != -ENOENT) |
930 | mlog_errno(status); | 929 | mlog_errno(status); |
@@ -937,10 +936,10 @@ bail: | |||
937 | 936 | ||
938 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) | 937 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) |
939 | { | 938 | { |
940 | ocfs2_meta_unlock(inode1, 1); | 939 | ocfs2_inode_unlock(inode1, 1); |
941 | 940 | ||
942 | if (inode1 != inode2) | 941 | if (inode1 != inode2) |
943 | ocfs2_meta_unlock(inode2, 1); | 942 | ocfs2_inode_unlock(inode2, 1); |
944 | } | 943 | } |
945 | 944 | ||
946 | static int ocfs2_rename(struct inode *old_dir, | 945 | static int ocfs2_rename(struct inode *old_dir, |
@@ -1031,10 +1030,11 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1031 | 1030 | ||
1032 | /* | 1031 | /* |
1033 | * Aside from allowing a meta data update, the locking here | 1032 | * Aside from allowing a meta data update, the locking here |
1034 | * also ensures that the vote thread on other nodes won't have | 1033 | * also ensures that the downconvert thread on other nodes |
1035 | * to concurrently downconvert the inode and the dentry locks. | 1034 | * won't have to concurrently downconvert the inode and the |
1035 | * dentry locks. | ||
1036 | */ | 1036 | */ |
1037 | status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); | 1037 | status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); |
1038 | if (status < 0) { | 1038 | if (status < 0) { |
1039 | if (status != -ENOENT) | 1039 | if (status != -ENOENT) |
1040 | mlog_errno(status); | 1040 | mlog_errno(status); |
@@ -1143,7 +1143,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1143 | goto bail; | 1143 | goto bail; |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | status = ocfs2_meta_lock(new_inode, &newfe_bh, 1); | 1146 | status = ocfs2_inode_lock(new_inode, &newfe_bh, 1); |
1147 | if (status < 0) { | 1147 | if (status < 0) { |
1148 | if (status != -ENOENT) | 1148 | if (status != -ENOENT) |
1149 | mlog_errno(status); | 1149 | mlog_errno(status); |
@@ -1355,14 +1355,14 @@ bail: | |||
1355 | ocfs2_double_unlock(old_dir, new_dir); | 1355 | ocfs2_double_unlock(old_dir, new_dir); |
1356 | 1356 | ||
1357 | if (old_child_locked) | 1357 | if (old_child_locked) |
1358 | ocfs2_meta_unlock(old_inode, 1); | 1358 | ocfs2_inode_unlock(old_inode, 1); |
1359 | 1359 | ||
1360 | if (new_child_locked) | 1360 | if (new_child_locked) |
1361 | ocfs2_meta_unlock(new_inode, 1); | 1361 | ocfs2_inode_unlock(new_inode, 1); |
1362 | 1362 | ||
1363 | if (orphan_dir) { | 1363 | if (orphan_dir) { |
1364 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 1364 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ |
1365 | ocfs2_meta_unlock(orphan_dir, 1); | 1365 | ocfs2_inode_unlock(orphan_dir, 1); |
1366 | mutex_unlock(&orphan_dir->i_mutex); | 1366 | mutex_unlock(&orphan_dir->i_mutex); |
1367 | iput(orphan_dir); | 1367 | iput(orphan_dir); |
1368 | } | 1368 | } |
@@ -1530,7 +1530,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1530 | credits = ocfs2_calc_symlink_credits(sb); | 1530 | credits = ocfs2_calc_symlink_credits(sb); |
1531 | 1531 | ||
1532 | /* lock the parent directory */ | 1532 | /* lock the parent directory */ |
1533 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 1533 | status = ocfs2_inode_lock(dir, &parent_fe_bh, 1); |
1534 | if (status < 0) { | 1534 | if (status < 0) { |
1535 | if (status != -ENOENT) | 1535 | if (status != -ENOENT) |
1536 | mlog_errno(status); | 1536 | mlog_errno(status); |
@@ -1657,7 +1657,7 @@ bail: | |||
1657 | if (handle) | 1657 | if (handle) |
1658 | ocfs2_commit_trans(osb, handle); | 1658 | ocfs2_commit_trans(osb, handle); |
1659 | 1659 | ||
1660 | ocfs2_meta_unlock(dir, 1); | 1660 | ocfs2_inode_unlock(dir, 1); |
1661 | 1661 | ||
1662 | if (new_fe_bh) | 1662 | if (new_fe_bh) |
1663 | brelse(new_fe_bh); | 1663 | brelse(new_fe_bh); |
@@ -1735,7 +1735,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
1735 | 1735 | ||
1736 | mutex_lock(&orphan_dir_inode->i_mutex); | 1736 | mutex_lock(&orphan_dir_inode->i_mutex); |
1737 | 1737 | ||
1738 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 1738 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
1739 | if (status < 0) { | 1739 | if (status < 0) { |
1740 | mlog_errno(status); | 1740 | mlog_errno(status); |
1741 | goto leave; | 1741 | goto leave; |
@@ -1745,7 +1745,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
1745 | orphan_dir_bh, name, | 1745 | orphan_dir_bh, name, |
1746 | OCFS2_ORPHAN_NAMELEN, de_bh); | 1746 | OCFS2_ORPHAN_NAMELEN, de_bh); |
1747 | if (status < 0) { | 1747 | if (status < 0) { |
1748 | ocfs2_meta_unlock(orphan_dir_inode, 1); | 1748 | ocfs2_inode_unlock(orphan_dir_inode, 1); |
1749 | 1749 | ||
1750 | mlog_errno(status); | 1750 | mlog_errno(status); |
1751 | goto leave; | 1751 | goto leave; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 60a23e1906b0..d08480580470 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -101,6 +101,7 @@ enum ocfs2_unlock_action { | |||
101 | * about to be | 101 | * about to be |
102 | * dropped. */ | 102 | * dropped. */ |
103 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 103 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ |
104 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ | ||
104 | 105 | ||
105 | struct ocfs2_lock_res_ops; | 106 | struct ocfs2_lock_res_ops; |
106 | 107 | ||
@@ -170,6 +171,7 @@ enum ocfs2_mount_options | |||
170 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 171 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
171 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 172 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
172 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 173 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ |
174 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | ||
173 | }; | 175 | }; |
174 | 176 | ||
175 | #define OCFS2_OSB_SOFT_RO 0x0001 | 177 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -189,9 +191,7 @@ struct ocfs2_super | |||
189 | struct ocfs2_slot_info *slot_info; | 191 | struct ocfs2_slot_info *slot_info; |
190 | 192 | ||
191 | spinlock_t node_map_lock; | 193 | spinlock_t node_map_lock; |
192 | struct ocfs2_node_map mounted_map; | ||
193 | struct ocfs2_node_map recovery_map; | 194 | struct ocfs2_node_map recovery_map; |
194 | struct ocfs2_node_map umount_map; | ||
195 | 195 | ||
196 | u64 root_blkno; | 196 | u64 root_blkno; |
197 | u64 system_dir_blkno; | 197 | u64 system_dir_blkno; |
@@ -231,7 +231,9 @@ struct ocfs2_super | |||
231 | wait_queue_head_t checkpoint_event; | 231 | wait_queue_head_t checkpoint_event; |
232 | atomic_t needs_checkpoint; | 232 | atomic_t needs_checkpoint; |
233 | struct ocfs2_journal *journal; | 233 | struct ocfs2_journal *journal; |
234 | unsigned long osb_commit_interval; | ||
234 | 235 | ||
236 | int local_alloc_size; | ||
235 | enum ocfs2_local_alloc_state local_alloc_state; | 237 | enum ocfs2_local_alloc_state local_alloc_state; |
236 | struct buffer_head *local_alloc_bh; | 238 | struct buffer_head *local_alloc_bh; |
237 | u64 la_last_gd; | 239 | u64 la_last_gd; |
@@ -254,28 +256,21 @@ struct ocfs2_super | |||
254 | 256 | ||
255 | wait_queue_head_t recovery_event; | 257 | wait_queue_head_t recovery_event; |
256 | 258 | ||
257 | spinlock_t vote_task_lock; | 259 | spinlock_t dc_task_lock; |
258 | struct task_struct *vote_task; | 260 | struct task_struct *dc_task; |
259 | wait_queue_head_t vote_event; | 261 | wait_queue_head_t dc_event; |
260 | unsigned long vote_wake_sequence; | 262 | unsigned long dc_wake_sequence; |
261 | unsigned long vote_work_sequence; | 263 | unsigned long dc_work_sequence; |
262 | 264 | ||
265 | /* | ||
266 | * Any thread can add locks to the list, but the downconvert | ||
267 | * thread is the only one allowed to remove locks. Any change | ||
268 | * to this rule requires updating | ||
269 | * ocfs2_downconvert_thread_do_work(). | ||
270 | */ | ||
263 | struct list_head blocked_lock_list; | 271 | struct list_head blocked_lock_list; |
264 | unsigned long blocked_lock_count; | 272 | unsigned long blocked_lock_count; |
265 | 273 | ||
266 | struct list_head vote_list; | ||
267 | int vote_count; | ||
268 | |||
269 | u32 net_key; | ||
270 | spinlock_t net_response_lock; | ||
271 | unsigned int net_response_ids; | ||
272 | struct list_head net_response_list; | ||
273 | |||
274 | struct o2hb_callback_func osb_hb_up; | ||
275 | struct o2hb_callback_func osb_hb_down; | ||
276 | |||
277 | struct list_head osb_net_handlers; | ||
278 | |||
279 | wait_queue_head_t osb_mount_event; | 274 | wait_queue_head_t osb_mount_event; |
280 | 275 | ||
281 | /* Truncate log info */ | 276 | /* Truncate log info */ |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 6ef876759a73..3633edd3982f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -231,6 +231,20 @@ struct ocfs2_space_resv { | |||
231 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | 231 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) |
232 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | 232 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) |
233 | 233 | ||
234 | /* Used to pass group descriptor data when online resize is done */ | ||
235 | struct ocfs2_new_group_input { | ||
236 | __u64 group; /* Group descriptor's blkno. */ | ||
237 | __u32 clusters; /* Total number of clusters in this group */ | ||
238 | __u32 frees; /* Total free clusters in this group */ | ||
239 | __u16 chain; /* Chain for this group */ | ||
240 | __u16 reserved1; | ||
241 | __u32 reserved2; | ||
242 | }; | ||
243 | |||
244 | #define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) | ||
245 | #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) | ||
246 | #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) | ||
247 | |||
234 | /* | 248 | /* |
235 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 249 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) |
236 | */ | 250 | */ |
@@ -256,6 +270,14 @@ struct ocfs2_space_resv { | |||
256 | /* Journal limits (in bytes) */ | 270 | /* Journal limits (in bytes) */ |
257 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 271 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
258 | 272 | ||
273 | /* | ||
274 | * Default local alloc size (in megabytes) | ||
275 | * | ||
276 | * The value chosen should be such that most allocations, including new | ||
277 | * block groups, use local alloc. | ||
278 | */ | ||
279 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 | ||
280 | |||
259 | struct ocfs2_system_inode_info { | 281 | struct ocfs2_system_inode_info { |
260 | char *si_name; | 282 | char *si_name; |
261 | int si_iflags; | 283 | int si_iflags; |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 4ca02b1c38ac..86f3e3799c2b 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -45,6 +45,7 @@ enum ocfs2_lock_type { | |||
45 | OCFS2_LOCK_TYPE_RW, | 45 | OCFS2_LOCK_TYPE_RW, |
46 | OCFS2_LOCK_TYPE_DENTRY, | 46 | OCFS2_LOCK_TYPE_DENTRY, |
47 | OCFS2_LOCK_TYPE_OPEN, | 47 | OCFS2_LOCK_TYPE_OPEN, |
48 | OCFS2_LOCK_TYPE_FLOCK, | ||
48 | OCFS2_NUM_LOCK_TYPES | 49 | OCFS2_NUM_LOCK_TYPES |
49 | }; | 50 | }; |
50 | 51 | ||
@@ -73,6 +74,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
73 | case OCFS2_LOCK_TYPE_OPEN: | 74 | case OCFS2_LOCK_TYPE_OPEN: |
74 | c = 'O'; | 75 | c = 'O'; |
75 | break; | 76 | break; |
77 | case OCFS2_LOCK_TYPE_FLOCK: | ||
78 | c = 'F'; | ||
79 | break; | ||
76 | default: | 80 | default: |
77 | c = '\0'; | 81 | c = '\0'; |
78 | } | 82 | } |
@@ -90,6 +94,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
90 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | 94 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", |
91 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", | 95 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", |
92 | [OCFS2_LOCK_TYPE_OPEN] = "Open", | 96 | [OCFS2_LOCK_TYPE_OPEN] = "Open", |
97 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", | ||
93 | }; | 98 | }; |
94 | 99 | ||
95 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) |
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c new file mode 100644 index 000000000000..37835ffcb039 --- /dev/null +++ b/fs/ocfs2/resize.c | |||
@@ -0,0 +1,634 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * resize.c | ||
5 | * | ||
6 | * volume resize. | ||
7 | * Inspired by ext3/resize.c. | ||
8 | * | ||
9 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public | ||
13 | * License as published by the Free Software Foundation; either | ||
14 | * version 2 of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public | ||
22 | * License along with this program; if not, write to the | ||
23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
24 | * Boston, MA 021110-1307, USA. | ||
25 | */ | ||
26 | |||
27 | #include <linux/fs.h> | ||
28 | #include <linux/types.h> | ||
29 | |||
30 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | ||
31 | #include <cluster/masklog.h> | ||
32 | |||
33 | #include "ocfs2.h" | ||
34 | |||
35 | #include "alloc.h" | ||
36 | #include "dlmglue.h" | ||
37 | #include "inode.h" | ||
38 | #include "journal.h" | ||
39 | #include "super.h" | ||
40 | #include "sysfile.h" | ||
41 | #include "uptodate.h" | ||
42 | |||
43 | #include "buffer_head_io.h" | ||
44 | #include "suballoc.h" | ||
45 | #include "resize.h" | ||
46 | |||
47 | /* | ||
48 | * Check whether there are new backup superblocks exist | ||
49 | * in the last group. If there are some, mark them or clear | ||
50 | * them in the bitmap. | ||
51 | * | ||
52 | * Return how many backups we find in the last group. | ||
53 | */ | ||
54 | static u16 ocfs2_calc_new_backup_super(struct inode *inode, | ||
55 | struct ocfs2_group_desc *gd, | ||
56 | int new_clusters, | ||
57 | u32 first_new_cluster, | ||
58 | u16 cl_cpg, | ||
59 | int set) | ||
60 | { | ||
61 | int i; | ||
62 | u16 backups = 0; | ||
63 | u32 cluster; | ||
64 | u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno); | ||
65 | |||
66 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
67 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
68 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
69 | |||
70 | gd_blkno = ocfs2_which_cluster_group(inode, cluster); | ||
71 | if (gd_blkno < lgd_blkno) | ||
72 | continue; | ||
73 | else if (gd_blkno > lgd_blkno) | ||
74 | break; | ||
75 | |||
76 | if (set) | ||
77 | ocfs2_set_bit(cluster % cl_cpg, | ||
78 | (unsigned long *)gd->bg_bitmap); | ||
79 | else | ||
80 | ocfs2_clear_bit(cluster % cl_cpg, | ||
81 | (unsigned long *)gd->bg_bitmap); | ||
82 | backups++; | ||
83 | } | ||
84 | |||
85 | mlog_exit_void(); | ||
86 | return backups; | ||
87 | } | ||
88 | |||
89 | static int ocfs2_update_last_group_and_inode(handle_t *handle, | ||
90 | struct inode *bm_inode, | ||
91 | struct buffer_head *bm_bh, | ||
92 | struct buffer_head *group_bh, | ||
93 | u32 first_new_cluster, | ||
94 | int new_clusters) | ||
95 | { | ||
96 | int ret = 0; | ||
97 | struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb); | ||
98 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data; | ||
99 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | ||
100 | struct ocfs2_chain_rec *cr; | ||
101 | struct ocfs2_group_desc *group; | ||
102 | u16 chain, num_bits, backups = 0; | ||
103 | u16 cl_bpc = le16_to_cpu(cl->cl_bpc); | ||
104 | u16 cl_cpg = le16_to_cpu(cl->cl_cpg); | ||
105 | |||
106 | mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", | ||
107 | new_clusters, first_new_cluster); | ||
108 | |||
109 | ret = ocfs2_journal_access(handle, bm_inode, group_bh, | ||
110 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
111 | if (ret < 0) { | ||
112 | mlog_errno(ret); | ||
113 | goto out; | ||
114 | } | ||
115 | |||
116 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
117 | |||
118 | /* update the group first. */ | ||
119 | num_bits = new_clusters * cl_bpc; | ||
120 | le16_add_cpu(&group->bg_bits, num_bits); | ||
121 | le16_add_cpu(&group->bg_free_bits_count, num_bits); | ||
122 | |||
123 | /* | ||
124 | * check whether there are some new backup superblocks exist in | ||
125 | * this group and update the group bitmap accordingly. | ||
126 | */ | ||
127 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
128 | OCFS2_FEATURE_COMPAT_BACKUP_SB)) { | ||
129 | backups = ocfs2_calc_new_backup_super(bm_inode, | ||
130 | group, | ||
131 | new_clusters, | ||
132 | first_new_cluster, | ||
133 | cl_cpg, 1); | ||
134 | le16_add_cpu(&group->bg_free_bits_count, -1 * backups); | ||
135 | } | ||
136 | |||
137 | ret = ocfs2_journal_dirty(handle, group_bh); | ||
138 | if (ret < 0) { | ||
139 | mlog_errno(ret); | ||
140 | goto out_rollback; | ||
141 | } | ||
142 | |||
143 | /* update the inode accordingly. */ | ||
144 | ret = ocfs2_journal_access(handle, bm_inode, bm_bh, | ||
145 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
146 | if (ret < 0) { | ||
147 | mlog_errno(ret); | ||
148 | goto out_rollback; | ||
149 | } | ||
150 | |||
151 | chain = le16_to_cpu(group->bg_chain); | ||
152 | cr = (&cl->cl_recs[chain]); | ||
153 | le32_add_cpu(&cr->c_total, num_bits); | ||
154 | le32_add_cpu(&cr->c_free, num_bits); | ||
155 | le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits); | ||
156 | le32_add_cpu(&fe->i_clusters, new_clusters); | ||
157 | |||
158 | if (backups) { | ||
159 | le32_add_cpu(&cr->c_free, -1 * backups); | ||
160 | le32_add_cpu(&fe->id1.bitmap1.i_used, backups); | ||
161 | } | ||
162 | |||
163 | spin_lock(&OCFS2_I(bm_inode)->ip_lock); | ||
164 | OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | ||
165 | le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits); | ||
166 | spin_unlock(&OCFS2_I(bm_inode)->ip_lock); | ||
167 | i_size_write(bm_inode, le64_to_cpu(fe->i_size)); | ||
168 | |||
169 | ocfs2_journal_dirty(handle, bm_bh); | ||
170 | |||
171 | out_rollback: | ||
172 | if (ret < 0) { | ||
173 | ocfs2_calc_new_backup_super(bm_inode, | ||
174 | group, | ||
175 | new_clusters, | ||
176 | first_new_cluster, | ||
177 | cl_cpg, 0); | ||
178 | le16_add_cpu(&group->bg_free_bits_count, backups); | ||
179 | le16_add_cpu(&group->bg_bits, -1 * num_bits); | ||
180 | le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); | ||
181 | } | ||
182 | out: | ||
183 | mlog_exit(ret); | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | static int update_backups(struct inode * inode, u32 clusters, char *data) | ||
188 | { | ||
189 | int i, ret = 0; | ||
190 | u32 cluster; | ||
191 | u64 blkno; | ||
192 | struct buffer_head *backup = NULL; | ||
193 | struct ocfs2_dinode *backup_di = NULL; | ||
194 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
195 | |||
196 | /* calculate the real backups we need to update. */ | ||
197 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
198 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
199 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
200 | if (cluster > clusters) | ||
201 | break; | ||
202 | |||
203 | ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); | ||
204 | if (ret < 0) { | ||
205 | mlog_errno(ret); | ||
206 | break; | ||
207 | } | ||
208 | |||
209 | memcpy(backup->b_data, data, inode->i_sb->s_blocksize); | ||
210 | |||
211 | backup_di = (struct ocfs2_dinode *)backup->b_data; | ||
212 | backup_di->i_blkno = cpu_to_le64(blkno); | ||
213 | |||
214 | ret = ocfs2_write_super_or_backup(osb, backup); | ||
215 | brelse(backup); | ||
216 | backup = NULL; | ||
217 | if (ret < 0) { | ||
218 | mlog_errno(ret); | ||
219 | break; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | static void ocfs2_update_super_and_backups(struct inode *inode, | ||
227 | int new_clusters) | ||
228 | { | ||
229 | int ret; | ||
230 | u32 clusters = 0; | ||
231 | struct buffer_head *super_bh = NULL; | ||
232 | struct ocfs2_dinode *super_di = NULL; | ||
233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
234 | |||
235 | /* | ||
236 | * update the superblock last. | ||
237 | * It doesn't matter if the write failed. | ||
238 | */ | ||
239 | ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, | ||
240 | &super_bh, 0, NULL); | ||
241 | if (ret < 0) { | ||
242 | mlog_errno(ret); | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | super_di = (struct ocfs2_dinode *)super_bh->b_data; | ||
247 | le32_add_cpu(&super_di->i_clusters, new_clusters); | ||
248 | clusters = le32_to_cpu(super_di->i_clusters); | ||
249 | |||
250 | ret = ocfs2_write_super_or_backup(osb, super_bh); | ||
251 | if (ret < 0) { | ||
252 | mlog_errno(ret); | ||
253 | goto out; | ||
254 | } | ||
255 | |||
256 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB)) | ||
257 | ret = update_backups(inode, clusters, super_bh->b_data); | ||
258 | |||
259 | out: | ||
260 | brelse(super_bh); | ||
261 | if (ret) | ||
262 | printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s" | ||
263 | " during fs resize. This condition is not fatal," | ||
264 | " but fsck.ocfs2 should be run to fix it\n", | ||
265 | osb->dev_str); | ||
266 | return; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Extend the filesystem to the new number of clusters specified. This entry | ||
271 | * point is only used to extend the current filesystem to the end of the last | ||
272 | * existing group. | ||
273 | */ | ||
274 | int ocfs2_group_extend(struct inode * inode, int new_clusters) | ||
275 | { | ||
276 | int ret; | ||
277 | handle_t *handle; | ||
278 | struct buffer_head *main_bm_bh = NULL; | ||
279 | struct buffer_head *group_bh = NULL; | ||
280 | struct inode *main_bm_inode = NULL; | ||
281 | struct ocfs2_dinode *fe = NULL; | ||
282 | struct ocfs2_group_desc *group = NULL; | ||
283 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
284 | u16 cl_bpc; | ||
285 | u32 first_new_cluster; | ||
286 | u64 lgd_blkno; | ||
287 | |||
288 | mlog_entry_void(); | ||
289 | |||
290 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
291 | return -EROFS; | ||
292 | |||
293 | if (new_clusters < 0) | ||
294 | return -EINVAL; | ||
295 | else if (new_clusters == 0) | ||
296 | return 0; | ||
297 | |||
298 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
299 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
300 | OCFS2_INVALID_SLOT); | ||
301 | if (!main_bm_inode) { | ||
302 | ret = -EINVAL; | ||
303 | mlog_errno(ret); | ||
304 | goto out; | ||
305 | } | ||
306 | |||
307 | mutex_lock(&main_bm_inode->i_mutex); | ||
308 | |||
309 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | ||
310 | if (ret < 0) { | ||
311 | mlog_errno(ret); | ||
312 | goto out_mutex; | ||
313 | } | ||
314 | |||
315 | fe = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
316 | |||
317 | if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != | ||
318 | ocfs2_group_bitmap_size(osb->sb) * 8) { | ||
319 | mlog(ML_ERROR, "The disk is too old and small. " | ||
320 | "Force to do offline resize."); | ||
321 | ret = -EINVAL; | ||
322 | goto out_unlock; | ||
323 | } | ||
324 | |||
325 | if (!OCFS2_IS_VALID_DINODE(fe)) { | ||
326 | OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); | ||
327 | ret = -EIO; | ||
328 | goto out_unlock; | ||
329 | } | ||
330 | |||
331 | first_new_cluster = le32_to_cpu(fe->i_clusters); | ||
332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, | ||
333 | first_new_cluster - 1); | ||
334 | |||
335 | ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, | ||
336 | main_bm_inode); | ||
337 | if (ret < 0) { | ||
338 | mlog_errno(ret); | ||
339 | goto out_unlock; | ||
340 | } | ||
341 | |||
342 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
343 | |||
344 | ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); | ||
345 | if (ret) { | ||
346 | mlog_errno(ret); | ||
347 | goto out_unlock; | ||
348 | } | ||
349 | |||
350 | cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); | ||
351 | if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > | ||
352 | le16_to_cpu(fe->id2.i_chain.cl_cpg)) { | ||
353 | ret = -EINVAL; | ||
354 | goto out_unlock; | ||
355 | } | ||
356 | |||
357 | mlog(0, "extend the last group at %llu, new clusters = %d\n", | ||
358 | (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters); | ||
359 | |||
360 | handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); | ||
361 | if (IS_ERR(handle)) { | ||
362 | mlog_errno(PTR_ERR(handle)); | ||
363 | ret = -EINVAL; | ||
364 | goto out_unlock; | ||
365 | } | ||
366 | |||
367 | /* update the last group descriptor and inode. */ | ||
368 | ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode, | ||
369 | main_bm_bh, group_bh, | ||
370 | first_new_cluster, | ||
371 | new_clusters); | ||
372 | if (ret) { | ||
373 | mlog_errno(ret); | ||
374 | goto out_commit; | ||
375 | } | ||
376 | |||
377 | ocfs2_update_super_and_backups(main_bm_inode, new_clusters); | ||
378 | |||
379 | out_commit: | ||
380 | ocfs2_commit_trans(osb, handle); | ||
381 | out_unlock: | ||
382 | brelse(group_bh); | ||
383 | brelse(main_bm_bh); | ||
384 | |||
385 | ocfs2_inode_unlock(main_bm_inode, 1); | ||
386 | |||
387 | out_mutex: | ||
388 | mutex_unlock(&main_bm_inode->i_mutex); | ||
389 | iput(main_bm_inode); | ||
390 | |||
391 | out: | ||
392 | mlog_exit_void(); | ||
393 | return ret; | ||
394 | } | ||
395 | |||
396 | static int ocfs2_check_new_group(struct inode *inode, | ||
397 | struct ocfs2_dinode *di, | ||
398 | struct ocfs2_new_group_input *input, | ||
399 | struct buffer_head *group_bh) | ||
400 | { | ||
401 | int ret; | ||
402 | struct ocfs2_group_desc *gd; | ||
403 | u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); | ||
404 | unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * | ||
405 | le16_to_cpu(di->id2.i_chain.cl_bpc); | ||
406 | |||
407 | |||
408 | gd = (struct ocfs2_group_desc *)group_bh->b_data; | ||
409 | |||
410 | ret = -EIO; | ||
411 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) | ||
412 | mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n", | ||
413 | (unsigned long long)le64_to_cpu(gd->bg_blkno)); | ||
414 | else if (di->i_blkno != gd->bg_parent_dinode) | ||
415 | mlog(ML_ERROR, "Group descriptor # %llu has bad parent " | ||
416 | "pointer (%llu, expected %llu)\n", | ||
417 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
418 | (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), | ||
419 | (unsigned long long)le64_to_cpu(di->i_blkno)); | ||
420 | else if (le16_to_cpu(gd->bg_bits) > max_bits) | ||
421 | mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n", | ||
422 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
423 | le16_to_cpu(gd->bg_bits)); | ||
424 | else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) | ||
425 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
426 | "claims that %u are free\n", | ||
427 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
428 | le16_to_cpu(gd->bg_bits), | ||
429 | le16_to_cpu(gd->bg_free_bits_count)); | ||
430 | else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) | ||
431 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
432 | "max bitmap bits of %u\n", | ||
433 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
434 | le16_to_cpu(gd->bg_bits), | ||
435 | 8 * le16_to_cpu(gd->bg_size)); | ||
436 | else if (le16_to_cpu(gd->bg_chain) != input->chain) | ||
437 | mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " | ||
438 | "while input has %u set.\n", | ||
439 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
440 | le16_to_cpu(gd->bg_chain), input->chain); | ||
441 | else if (le16_to_cpu(gd->bg_bits) != input->clusters * cl_bpc) | ||
442 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
443 | "input has %u clusters set\n", | ||
444 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
445 | le16_to_cpu(gd->bg_bits), input->clusters); | ||
446 | else if (le16_to_cpu(gd->bg_free_bits_count) != input->frees * cl_bpc) | ||
447 | mlog(ML_ERROR, "Group descriptor # %llu has free bit count %u " | ||
448 | "but it should have %u set\n", | ||
449 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
450 | le16_to_cpu(gd->bg_bits), | ||
451 | input->frees * cl_bpc); | ||
452 | else | ||
453 | ret = 0; | ||
454 | |||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static int ocfs2_verify_group_and_input(struct inode *inode, | ||
459 | struct ocfs2_dinode *di, | ||
460 | struct ocfs2_new_group_input *input, | ||
461 | struct buffer_head *group_bh) | ||
462 | { | ||
463 | u16 cl_count = le16_to_cpu(di->id2.i_chain.cl_count); | ||
464 | u16 cl_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); | ||
465 | u16 next_free = le16_to_cpu(di->id2.i_chain.cl_next_free_rec); | ||
466 | u32 cluster = ocfs2_blocks_to_clusters(inode->i_sb, input->group); | ||
467 | u32 total_clusters = le32_to_cpu(di->i_clusters); | ||
468 | int ret = -EINVAL; | ||
469 | |||
470 | if (cluster < total_clusters) | ||
471 | mlog(ML_ERROR, "add a group which is in the current volume.\n"); | ||
472 | else if (input->chain >= cl_count) | ||
473 | mlog(ML_ERROR, "input chain exceeds the limit.\n"); | ||
474 | else if (next_free != cl_count && next_free != input->chain) | ||
475 | mlog(ML_ERROR, | ||
476 | "the add group should be in chain %u\n", next_free); | ||
477 | else if (total_clusters + input->clusters < total_clusters) | ||
478 | mlog(ML_ERROR, "add group's clusters overflow.\n"); | ||
479 | else if (input->clusters > cl_cpg) | ||
480 | mlog(ML_ERROR, "the cluster exceeds the maximum of a group\n"); | ||
481 | else if (input->frees > input->clusters) | ||
482 | mlog(ML_ERROR, "the free cluster exceeds the total clusters\n"); | ||
483 | else if (total_clusters % cl_cpg != 0) | ||
484 | mlog(ML_ERROR, | ||
485 | "the last group isn't full. Use group extend first.\n"); | ||
486 | else if (input->group != ocfs2_which_cluster_group(inode, cluster)) | ||
487 | mlog(ML_ERROR, "group blkno is invalid\n"); | ||
488 | else if ((ret = ocfs2_check_new_group(inode, di, input, group_bh))) | ||
489 | mlog(ML_ERROR, "group descriptor check failed.\n"); | ||
490 | else | ||
491 | ret = 0; | ||
492 | |||
493 | return ret; | ||
494 | } | ||
495 | |||
496 | /* Add a new group descriptor to global_bitmap. */ | ||
497 | int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | ||
498 | { | ||
499 | int ret; | ||
500 | handle_t *handle; | ||
501 | struct buffer_head *main_bm_bh = NULL; | ||
502 | struct inode *main_bm_inode = NULL; | ||
503 | struct ocfs2_dinode *fe = NULL; | ||
504 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
505 | struct buffer_head *group_bh = NULL; | ||
506 | struct ocfs2_group_desc *group = NULL; | ||
507 | struct ocfs2_chain_list *cl; | ||
508 | struct ocfs2_chain_rec *cr; | ||
509 | u16 cl_bpc; | ||
510 | |||
511 | mlog_entry_void(); | ||
512 | |||
513 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
514 | return -EROFS; | ||
515 | |||
516 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
517 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
518 | OCFS2_INVALID_SLOT); | ||
519 | if (!main_bm_inode) { | ||
520 | ret = -EINVAL; | ||
521 | mlog_errno(ret); | ||
522 | goto out; | ||
523 | } | ||
524 | |||
525 | mutex_lock(&main_bm_inode->i_mutex); | ||
526 | |||
527 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | ||
528 | if (ret < 0) { | ||
529 | mlog_errno(ret); | ||
530 | goto out_mutex; | ||
531 | } | ||
532 | |||
533 | fe = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
534 | |||
535 | if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != | ||
536 | ocfs2_group_bitmap_size(osb->sb) * 8) { | ||
537 | mlog(ML_ERROR, "The disk is too old and small." | ||
538 | " Force to do offline resize."); | ||
539 | ret = -EINVAL; | ||
540 | goto out_unlock; | ||
541 | } | ||
542 | |||
543 | ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); | ||
544 | if (ret < 0) { | ||
545 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " | ||
546 | "from the device.", (unsigned long long)input->group); | ||
547 | goto out_unlock; | ||
548 | } | ||
549 | |||
550 | ocfs2_set_new_buffer_uptodate(inode, group_bh); | ||
551 | |||
552 | ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); | ||
553 | if (ret) { | ||
554 | mlog_errno(ret); | ||
555 | goto out_unlock; | ||
556 | } | ||
557 | |||
558 | mlog(0, "Add a new group %llu in chain = %u, length = %u\n", | ||
559 | (unsigned long long)input->group, input->chain, input->clusters); | ||
560 | |||
561 | handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS); | ||
562 | if (IS_ERR(handle)) { | ||
563 | mlog_errno(PTR_ERR(handle)); | ||
564 | ret = -EINVAL; | ||
565 | goto out_unlock; | ||
566 | } | ||
567 | |||
568 | cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); | ||
569 | cl = &fe->id2.i_chain; | ||
570 | cr = &cl->cl_recs[input->chain]; | ||
571 | |||
572 | ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, | ||
573 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
574 | if (ret < 0) { | ||
575 | mlog_errno(ret); | ||
576 | goto out_commit; | ||
577 | } | ||
578 | |||
579 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
580 | group->bg_next_group = cr->c_blkno; | ||
581 | |||
582 | ret = ocfs2_journal_dirty(handle, group_bh); | ||
583 | if (ret < 0) { | ||
584 | mlog_errno(ret); | ||
585 | goto out_commit; | ||
586 | } | ||
587 | |||
588 | ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, | ||
589 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
590 | if (ret < 0) { | ||
591 | mlog_errno(ret); | ||
592 | goto out_commit; | ||
593 | } | ||
594 | |||
595 | if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) { | ||
596 | le16_add_cpu(&cl->cl_next_free_rec, 1); | ||
597 | memset(cr, 0, sizeof(struct ocfs2_chain_rec)); | ||
598 | } | ||
599 | |||
600 | cr->c_blkno = le64_to_cpu(input->group); | ||
601 | le32_add_cpu(&cr->c_total, input->clusters * cl_bpc); | ||
602 | le32_add_cpu(&cr->c_free, input->frees * cl_bpc); | ||
603 | |||
604 | le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc); | ||
605 | le32_add_cpu(&fe->id1.bitmap1.i_used, | ||
606 | (input->clusters - input->frees) * cl_bpc); | ||
607 | le32_add_cpu(&fe->i_clusters, input->clusters); | ||
608 | |||
609 | ocfs2_journal_dirty(handle, main_bm_bh); | ||
610 | |||
611 | spin_lock(&OCFS2_I(main_bm_inode)->ip_lock); | ||
612 | OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | ||
613 | le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits); | ||
614 | spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock); | ||
615 | i_size_write(main_bm_inode, le64_to_cpu(fe->i_size)); | ||
616 | |||
617 | ocfs2_update_super_and_backups(main_bm_inode, input->clusters); | ||
618 | |||
619 | out_commit: | ||
620 | ocfs2_commit_trans(osb, handle); | ||
621 | out_unlock: | ||
622 | brelse(group_bh); | ||
623 | brelse(main_bm_bh); | ||
624 | |||
625 | ocfs2_inode_unlock(main_bm_inode, 1); | ||
626 | |||
627 | out_mutex: | ||
628 | mutex_unlock(&main_bm_inode->i_mutex); | ||
629 | iput(main_bm_inode); | ||
630 | |||
631 | out: | ||
632 | mlog_exit_void(); | ||
633 | return ret; | ||
634 | } | ||
diff --git a/fs/ocfs2/resize.h b/fs/ocfs2/resize.h new file mode 100644 index 000000000000..f38841abf10b --- /dev/null +++ b/fs/ocfs2/resize.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * resize.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef OCFS2_RESIZE_H | ||
27 | #define OCFS2_RESIZE_H | ||
28 | |||
29 | int ocfs2_group_extend(struct inode * inode, int new_clusters); | ||
30 | int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input); | ||
31 | |||
32 | #endif /* OCFS2_RESIZE_H */ | ||
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index af4882b62cfa..3a50ce555e64 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -48,25 +48,6 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | |||
48 | s16 slot_num, | 48 | s16 slot_num, |
49 | s16 node_num); | 49 | s16 node_num); |
50 | 50 | ||
51 | /* Use the slot information we've collected to create a map of mounted | ||
52 | * nodes. Should be holding an EX on super block. assumes slot info is | ||
53 | * up to date. Note that we call this *after* we find a slot, so our | ||
54 | * own node should be set in the map too... */ | ||
55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb) | ||
56 | { | ||
57 | int i; | ||
58 | struct ocfs2_slot_info *si = osb->slot_info; | ||
59 | |||
60 | spin_lock(&si->si_lock); | ||
61 | |||
62 | for (i = 0; i < si->si_size; i++) | ||
63 | if (si->si_global_node_nums[i] != OCFS2_INVALID_SLOT) | ||
64 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, | ||
65 | si->si_global_node_nums[i]); | ||
66 | |||
67 | spin_unlock(&si->si_lock); | ||
68 | } | ||
69 | |||
70 | /* post the slot information on disk into our slot_info struct. */ | 51 | /* post the slot information on disk into our slot_info struct. */ |
71 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 52 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) |
72 | { | 53 | { |
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index d8c8ceed031b..1025872aaade 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h | |||
@@ -52,8 +52,6 @@ s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | |||
52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, |
53 | s16 slot_num); | 53 | s16 slot_num); |
54 | 54 | ||
55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb); | ||
56 | |||
57 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 55 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, |
58 | int slot_num) | 56 | int slot_num) |
59 | { | 57 | { |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8f09f5235e3a..7e397e2c25dd 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -101,8 +101,6 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg | |||
101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | 101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, |
102 | u64 bg_blkno, | 102 | u64 bg_blkno, |
103 | u16 bg_bit_off); | 103 | u16 bg_bit_off); |
104 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | ||
105 | u32 cluster); | ||
106 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, | 104 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, |
107 | u64 data_blkno, | 105 | u64 data_blkno, |
108 | u64 *bg_blkno, | 106 | u64 *bg_blkno, |
@@ -114,7 +112,7 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |||
114 | 112 | ||
115 | if (inode) { | 113 | if (inode) { |
116 | if (ac->ac_which != OCFS2_AC_USE_LOCAL) | 114 | if (ac->ac_which != OCFS2_AC_USE_LOCAL) |
117 | ocfs2_meta_unlock(inode, 1); | 115 | ocfs2_inode_unlock(inode, 1); |
118 | 116 | ||
119 | mutex_unlock(&inode->i_mutex); | 117 | mutex_unlock(&inode->i_mutex); |
120 | 118 | ||
@@ -131,9 +129,9 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
131 | } | 129 | } |
132 | 130 | ||
133 | /* somewhat more expensive than our other checks, so use sparingly. */ | 131 | /* somewhat more expensive than our other checks, so use sparingly. */ |
134 | static int ocfs2_check_group_descriptor(struct super_block *sb, | 132 | int ocfs2_check_group_descriptor(struct super_block *sb, |
135 | struct ocfs2_dinode *di, | 133 | struct ocfs2_dinode *di, |
136 | struct ocfs2_group_desc *gd) | 134 | struct ocfs2_group_desc *gd) |
137 | { | 135 | { |
138 | unsigned int max_bits; | 136 | unsigned int max_bits; |
139 | 137 | ||
@@ -412,7 +410,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
412 | 410 | ||
413 | mutex_lock(&alloc_inode->i_mutex); | 411 | mutex_lock(&alloc_inode->i_mutex); |
414 | 412 | ||
415 | status = ocfs2_meta_lock(alloc_inode, &bh, 1); | 413 | status = ocfs2_inode_lock(alloc_inode, &bh, 1); |
416 | if (status < 0) { | 414 | if (status < 0) { |
417 | mutex_unlock(&alloc_inode->i_mutex); | 415 | mutex_unlock(&alloc_inode->i_mutex); |
418 | iput(alloc_inode); | 416 | iput(alloc_inode); |
@@ -1443,8 +1441,7 @@ static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | |||
1443 | 1441 | ||
1444 | /* given a cluster offset, calculate which block group it belongs to | 1442 | /* given a cluster offset, calculate which block group it belongs to |
1445 | * and return that block offset. */ | 1443 | * and return that block offset. */ |
1446 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | 1444 | u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster) |
1447 | u32 cluster) | ||
1448 | { | 1445 | { |
1449 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1446 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1450 | u32 group_no; | 1447 | u32 group_no; |
@@ -1519,8 +1516,9 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
1519 | if (min_clusters > (osb->bitmap_cpg - 1)) { | 1516 | if (min_clusters > (osb->bitmap_cpg - 1)) { |
1520 | /* The only paths asking for contiguousness | 1517 | /* The only paths asking for contiguousness |
1521 | * should know about this already. */ | 1518 | * should know about this already. */ |
1522 | mlog(ML_ERROR, "minimum allocation requested exceeds " | 1519 | mlog(ML_ERROR, "minimum allocation requested %u exceeds " |
1523 | "group bitmap size!"); | 1520 | "group bitmap size %u!\n", min_clusters, |
1521 | osb->bitmap_cpg); | ||
1524 | status = -ENOSPC; | 1522 | status = -ENOSPC; |
1525 | goto bail; | 1523 | goto bail; |
1526 | } | 1524 | } |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index cafe93703095..8799033bb459 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -147,4 +147,12 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode) | |||
147 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | 147 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, |
148 | struct ocfs2_alloc_context *ac); | 148 | struct ocfs2_alloc_context *ac); |
149 | 149 | ||
150 | /* given a cluster offset, calculate which block group it belongs to | ||
151 | * and return that block offset. */ | ||
152 | u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); | ||
153 | |||
154 | /* somewhat more expensive than our other checks, so use sparingly. */ | ||
155 | int ocfs2_check_group_descriptor(struct super_block *sb, | ||
156 | struct ocfs2_dinode *di, | ||
157 | struct ocfs2_group_desc *gd); | ||
150 | #endif /* _CHAINALLOC_H_ */ | 158 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5ee775420665..01fe40ee5ea9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -65,7 +65,6 @@ | |||
65 | #include "sysfile.h" | 65 | #include "sysfile.h" |
66 | #include "uptodate.h" | 66 | #include "uptodate.h" |
67 | #include "ver.h" | 67 | #include "ver.h" |
68 | #include "vote.h" | ||
69 | 68 | ||
70 | #include "buffer_head_io.h" | 69 | #include "buffer_head_io.h" |
71 | 70 | ||
@@ -84,9 +83,11 @@ MODULE_LICENSE("GPL"); | |||
84 | 83 | ||
85 | struct mount_options | 84 | struct mount_options |
86 | { | 85 | { |
86 | unsigned long commit_interval; | ||
87 | unsigned long mount_opt; | 87 | unsigned long mount_opt; |
88 | unsigned int atime_quantum; | 88 | unsigned int atime_quantum; |
89 | signed short slot; | 89 | signed short slot; |
90 | unsigned int localalloc_opt; | ||
90 | }; | 91 | }; |
91 | 92 | ||
92 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
@@ -150,6 +151,9 @@ enum { | |||
150 | Opt_data_writeback, | 151 | Opt_data_writeback, |
151 | Opt_atime_quantum, | 152 | Opt_atime_quantum, |
152 | Opt_slot, | 153 | Opt_slot, |
154 | Opt_commit, | ||
155 | Opt_localalloc, | ||
156 | Opt_localflocks, | ||
153 | Opt_err, | 157 | Opt_err, |
154 | }; | 158 | }; |
155 | 159 | ||
@@ -165,6 +169,9 @@ static match_table_t tokens = { | |||
165 | {Opt_data_writeback, "data=writeback"}, | 169 | {Opt_data_writeback, "data=writeback"}, |
166 | {Opt_atime_quantum, "atime_quantum=%u"}, | 170 | {Opt_atime_quantum, "atime_quantum=%u"}, |
167 | {Opt_slot, "preferred_slot=%u"}, | 171 | {Opt_slot, "preferred_slot=%u"}, |
172 | {Opt_commit, "commit=%u"}, | ||
173 | {Opt_localalloc, "localalloc=%d"}, | ||
174 | {Opt_localflocks, "localflocks"}, | ||
168 | {Opt_err, NULL} | 175 | {Opt_err, NULL} |
169 | }; | 176 | }; |
170 | 177 | ||
@@ -213,7 +220,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
213 | 220 | ||
214 | mlog_entry_void(); | 221 | mlog_entry_void(); |
215 | 222 | ||
216 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE); | 223 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); |
217 | if (IS_ERR(new)) { | 224 | if (IS_ERR(new)) { |
218 | status = PTR_ERR(new); | 225 | status = PTR_ERR(new); |
219 | mlog_errno(status); | 226 | mlog_errno(status); |
@@ -221,7 +228,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
221 | } | 228 | } |
222 | osb->root_inode = new; | 229 | osb->root_inode = new; |
223 | 230 | ||
224 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE); | 231 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0); |
225 | if (IS_ERR(new)) { | 232 | if (IS_ERR(new)) { |
226 | status = PTR_ERR(new); | 233 | status = PTR_ERR(new); |
227 | mlog_errno(status); | 234 | mlog_errno(status); |
@@ -443,6 +450,8 @@ unlock_osb: | |||
443 | osb->s_mount_opt = parsed_options.mount_opt; | 450 | osb->s_mount_opt = parsed_options.mount_opt; |
444 | osb->s_atime_quantum = parsed_options.atime_quantum; | 451 | osb->s_atime_quantum = parsed_options.atime_quantum; |
445 | osb->preferred_slot = parsed_options.slot; | 452 | osb->preferred_slot = parsed_options.slot; |
453 | if (parsed_options.commit_interval) | ||
454 | osb->osb_commit_interval = parsed_options.commit_interval; | ||
446 | 455 | ||
447 | if (!ocfs2_is_hard_readonly(osb)) | 456 | if (!ocfs2_is_hard_readonly(osb)) |
448 | ocfs2_set_journal_params(osb); | 457 | ocfs2_set_journal_params(osb); |
@@ -597,6 +606,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
597 | osb->s_mount_opt = parsed_options.mount_opt; | 606 | osb->s_mount_opt = parsed_options.mount_opt; |
598 | osb->s_atime_quantum = parsed_options.atime_quantum; | 607 | osb->s_atime_quantum = parsed_options.atime_quantum; |
599 | osb->preferred_slot = parsed_options.slot; | 608 | osb->preferred_slot = parsed_options.slot; |
609 | osb->osb_commit_interval = parsed_options.commit_interval; | ||
610 | osb->local_alloc_size = parsed_options.localalloc_opt; | ||
600 | 611 | ||
601 | sb->s_magic = OCFS2_SUPER_MAGIC; | 612 | sb->s_magic = OCFS2_SUPER_MAGIC; |
602 | 613 | ||
@@ -747,9 +758,11 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
747 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 758 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, |
748 | options ? options : "(none)"); | 759 | options ? options : "(none)"); |
749 | 760 | ||
761 | mopt->commit_interval = 0; | ||
750 | mopt->mount_opt = 0; | 762 | mopt->mount_opt = 0; |
751 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 763 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
752 | mopt->slot = OCFS2_INVALID_SLOT; | 764 | mopt->slot = OCFS2_INVALID_SLOT; |
765 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | ||
753 | 766 | ||
754 | if (!options) { | 767 | if (!options) { |
755 | status = 1; | 768 | status = 1; |
@@ -816,6 +829,41 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
816 | if (option) | 829 | if (option) |
817 | mopt->slot = (s16)option; | 830 | mopt->slot = (s16)option; |
818 | break; | 831 | break; |
832 | case Opt_commit: | ||
833 | option = 0; | ||
834 | if (match_int(&args[0], &option)) { | ||
835 | status = 0; | ||
836 | goto bail; | ||
837 | } | ||
838 | if (option < 0) | ||
839 | return 0; | ||
840 | if (option == 0) | ||
841 | option = JBD_DEFAULT_MAX_COMMIT_AGE; | ||
842 | mopt->commit_interval = HZ * option; | ||
843 | break; | ||
844 | case Opt_localalloc: | ||
845 | option = 0; | ||
846 | if (match_int(&args[0], &option)) { | ||
847 | status = 0; | ||
848 | goto bail; | ||
849 | } | ||
850 | if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) | ||
851 | mopt->localalloc_opt = option; | ||
852 | break; | ||
853 | case Opt_localflocks: | ||
854 | /* | ||
855 | * Changing this during remount could race | ||
856 | * flock() requests, or "unbalance" existing | ||
857 | * ones (e.g., a lock is taken in one mode but | ||
858 | * dropped in the other). If users care enough | ||
859 | * to flip locking modes during remount, we | ||
860 | * could add a "local" flag to individual | ||
861 | * flock structures for proper tracking of | ||
862 | * state. | ||
863 | */ | ||
864 | if (!is_remount) | ||
865 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; | ||
866 | break; | ||
819 | default: | 867 | default: |
820 | mlog(ML_ERROR, | 868 | mlog(ML_ERROR, |
821 | "Unrecognized mount option \"%s\" " | 869 | "Unrecognized mount option \"%s\" " |
@@ -864,6 +912,16 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
864 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) | 912 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) |
865 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); | 913 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); |
866 | 914 | ||
915 | if (osb->osb_commit_interval) | ||
916 | seq_printf(s, ",commit=%u", | ||
917 | (unsigned) (osb->osb_commit_interval / HZ)); | ||
918 | |||
919 | if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) | ||
920 | seq_printf(s, ",localalloc=%d", osb->local_alloc_size); | ||
921 | |||
922 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | ||
923 | seq_printf(s, ",localflocks,"); | ||
924 | |||
867 | return 0; | 925 | return 0; |
868 | } | 926 | } |
869 | 927 | ||
@@ -965,7 +1023,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
965 | goto bail; | 1023 | goto bail; |
966 | } | 1024 | } |
967 | 1025 | ||
968 | status = ocfs2_meta_lock(inode, &bh, 0); | 1026 | status = ocfs2_inode_lock(inode, &bh, 0); |
969 | if (status < 0) { | 1027 | if (status < 0) { |
970 | mlog_errno(status); | 1028 | mlog_errno(status); |
971 | goto bail; | 1029 | goto bail; |
@@ -989,7 +1047,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
989 | 1047 | ||
990 | brelse(bh); | 1048 | brelse(bh); |
991 | 1049 | ||
992 | ocfs2_meta_unlock(inode, 0); | 1050 | ocfs2_inode_unlock(inode, 0); |
993 | status = 0; | 1051 | status = 0; |
994 | bail: | 1052 | bail: |
995 | if (inode) | 1053 | if (inode) |
@@ -1020,8 +1078,7 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) | |||
1020 | oi->ip_clusters = 0; | 1078 | oi->ip_clusters = 0; |
1021 | 1079 | ||
1022 | ocfs2_lock_res_init_once(&oi->ip_rw_lockres); | 1080 | ocfs2_lock_res_init_once(&oi->ip_rw_lockres); |
1023 | ocfs2_lock_res_init_once(&oi->ip_meta_lockres); | 1081 | ocfs2_lock_res_init_once(&oi->ip_inode_lockres); |
1024 | ocfs2_lock_res_init_once(&oi->ip_data_lockres); | ||
1025 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); | 1082 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); |
1026 | 1083 | ||
1027 | ocfs2_metadata_cache_init(&oi->vfs_inode); | 1084 | ocfs2_metadata_cache_init(&oi->vfs_inode); |
@@ -1117,25 +1174,12 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1117 | goto leave; | 1174 | goto leave; |
1118 | } | 1175 | } |
1119 | 1176 | ||
1120 | status = ocfs2_register_hb_callbacks(osb); | ||
1121 | if (status < 0) { | ||
1122 | mlog_errno(status); | ||
1123 | goto leave; | ||
1124 | } | ||
1125 | |||
1126 | status = ocfs2_dlm_init(osb); | 1177 | status = ocfs2_dlm_init(osb); |
1127 | if (status < 0) { | 1178 | if (status < 0) { |
1128 | mlog_errno(status); | 1179 | mlog_errno(status); |
1129 | goto leave; | 1180 | goto leave; |
1130 | } | 1181 | } |
1131 | 1182 | ||
1132 | /* requires vote_thread to be running. */ | ||
1133 | status = ocfs2_register_net_handlers(osb); | ||
1134 | if (status < 0) { | ||
1135 | mlog_errno(status); | ||
1136 | goto leave; | ||
1137 | } | ||
1138 | |||
1139 | status = ocfs2_super_lock(osb, 1); | 1183 | status = ocfs2_super_lock(osb, 1); |
1140 | if (status < 0) { | 1184 | if (status < 0) { |
1141 | mlog_errno(status); | 1185 | mlog_errno(status); |
@@ -1150,8 +1194,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1150 | goto leave; | 1194 | goto leave; |
1151 | } | 1195 | } |
1152 | 1196 | ||
1153 | ocfs2_populate_mounted_map(osb); | ||
1154 | |||
1155 | /* load all node-local system inodes */ | 1197 | /* load all node-local system inodes */ |
1156 | status = ocfs2_init_local_system_inodes(osb); | 1198 | status = ocfs2_init_local_system_inodes(osb); |
1157 | if (status < 0) { | 1199 | if (status < 0) { |
@@ -1174,15 +1216,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1174 | if (ocfs2_mount_local(osb)) | 1216 | if (ocfs2_mount_local(osb)) |
1175 | goto leave; | 1217 | goto leave; |
1176 | 1218 | ||
1177 | /* This should be sent *after* we recovered our journal as it | ||
1178 | * will cause other nodes to unmark us as needing | ||
1179 | * recovery. However, we need to send it *before* dropping the | ||
1180 | * super block lock as otherwise their recovery threads might | ||
1181 | * try to clean us up while we're live! */ | ||
1182 | status = ocfs2_request_mount_vote(osb); | ||
1183 | if (status < 0) | ||
1184 | mlog_errno(status); | ||
1185 | |||
1186 | leave: | 1219 | leave: |
1187 | if (unlock_super) | 1220 | if (unlock_super) |
1188 | ocfs2_super_unlock(osb, 1); | 1221 | ocfs2_super_unlock(osb, 1); |
@@ -1240,10 +1273,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1240 | mlog_errno(tmp); | 1273 | mlog_errno(tmp); |
1241 | return; | 1274 | return; |
1242 | } | 1275 | } |
1243 | |||
1244 | tmp = ocfs2_request_umount_vote(osb); | ||
1245 | if (tmp < 0) | ||
1246 | mlog_errno(tmp); | ||
1247 | } | 1276 | } |
1248 | 1277 | ||
1249 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 1278 | if (osb->slot_num != OCFS2_INVALID_SLOT) |
@@ -1254,13 +1283,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1254 | 1283 | ||
1255 | ocfs2_release_system_inodes(osb); | 1284 | ocfs2_release_system_inodes(osb); |
1256 | 1285 | ||
1257 | if (osb->dlm) { | 1286 | if (osb->dlm) |
1258 | ocfs2_unregister_net_handlers(osb); | ||
1259 | |||
1260 | ocfs2_dlm_shutdown(osb); | 1287 | ocfs2_dlm_shutdown(osb); |
1261 | } | ||
1262 | |||
1263 | ocfs2_clear_hb_callbacks(osb); | ||
1264 | 1288 | ||
1265 | debugfs_remove(osb->osb_debug_root); | 1289 | debugfs_remove(osb->osb_debug_root); |
1266 | 1290 | ||
@@ -1315,7 +1339,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1315 | int i, cbits, bbits; | 1339 | int i, cbits, bbits; |
1316 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | 1340 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; |
1317 | struct inode *inode = NULL; | 1341 | struct inode *inode = NULL; |
1318 | struct buffer_head *bitmap_bh = NULL; | ||
1319 | struct ocfs2_journal *journal; | 1342 | struct ocfs2_journal *journal; |
1320 | __le32 uuid_net_key; | 1343 | __le32 uuid_net_key; |
1321 | struct ocfs2_super *osb; | 1344 | struct ocfs2_super *osb; |
@@ -1344,19 +1367,13 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1344 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1367 | osb->s_sectsize_bits = blksize_bits(sector_size); |
1345 | BUG_ON(!osb->s_sectsize_bits); | 1368 | BUG_ON(!osb->s_sectsize_bits); |
1346 | 1369 | ||
1347 | osb->net_response_ids = 0; | ||
1348 | spin_lock_init(&osb->net_response_lock); | ||
1349 | INIT_LIST_HEAD(&osb->net_response_list); | ||
1350 | |||
1351 | INIT_LIST_HEAD(&osb->osb_net_handlers); | ||
1352 | init_waitqueue_head(&osb->recovery_event); | 1370 | init_waitqueue_head(&osb->recovery_event); |
1353 | spin_lock_init(&osb->vote_task_lock); | 1371 | spin_lock_init(&osb->dc_task_lock); |
1354 | init_waitqueue_head(&osb->vote_event); | 1372 | init_waitqueue_head(&osb->dc_event); |
1355 | osb->vote_work_sequence = 0; | 1373 | osb->dc_work_sequence = 0; |
1356 | osb->vote_wake_sequence = 0; | 1374 | osb->dc_wake_sequence = 0; |
1357 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1375 | INIT_LIST_HEAD(&osb->blocked_lock_list); |
1358 | osb->blocked_lock_count = 0; | 1376 | osb->blocked_lock_count = 0; |
1359 | INIT_LIST_HEAD(&osb->vote_list); | ||
1360 | spin_lock_init(&osb->osb_lock); | 1377 | spin_lock_init(&osb->osb_lock); |
1361 | 1378 | ||
1362 | atomic_set(&osb->alloc_stats.moves, 0); | 1379 | atomic_set(&osb->alloc_stats.moves, 0); |
@@ -1496,7 +1513,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1496 | } | 1513 | } |
1497 | 1514 | ||
1498 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); | 1515 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); |
1499 | osb->net_key = le32_to_cpu(uuid_net_key); | ||
1500 | 1516 | ||
1501 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); | 1517 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); |
1502 | osb->vol_label[63] = '\0'; | 1518 | osb->vol_label[63] = '\0'; |
@@ -1539,25 +1555,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1539 | } | 1555 | } |
1540 | 1556 | ||
1541 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; | 1557 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; |
1542 | |||
1543 | /* We don't have a cluster lock on the bitmap here because | ||
1544 | * we're only interested in static information and the extra | ||
1545 | * complexity at mount time isn't worht it. Don't pass the | ||
1546 | * inode in to the read function though as we don't want it to | ||
1547 | * be put in the cache. */ | ||
1548 | status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, | ||
1549 | NULL); | ||
1550 | iput(inode); | 1558 | iput(inode); |
1551 | if (status < 0) { | ||
1552 | mlog_errno(status); | ||
1553 | goto bail; | ||
1554 | } | ||
1555 | 1559 | ||
1556 | di = (struct ocfs2_dinode *) bitmap_bh->b_data; | 1560 | osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; |
1557 | osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); | ||
1558 | brelse(bitmap_bh); | ||
1559 | mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", | ||
1560 | (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); | ||
1561 | 1561 | ||
1562 | status = ocfs2_init_slot_info(osb); | 1562 | status = ocfs2_init_slot_info(osb); |
1563 | if (status < 0) { | 1563 | if (status < 0) { |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index fd2e846e3e6f..ab713ebdd546 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -112,7 +112,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
112 | goto bail; | 112 | goto bail; |
113 | } | 113 | } |
114 | 114 | ||
115 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE); | 115 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE, type); |
116 | if (IS_ERR(inode)) { | 116 | if (IS_ERR(inode)) { |
117 | mlog_errno(PTR_ERR(inode)); | 117 | mlog_errno(PTR_ERR(inode)); |
118 | inode = NULL; | 118 | inode = NULL; |
diff --git a/fs/ocfs2/ver.c b/fs/ocfs2/ver.c index 5405ce121c99..e2488f4128a2 100644 --- a/fs/ocfs2/ver.c +++ b/fs/ocfs2/ver.c | |||
@@ -29,7 +29,7 @@ | |||
29 | 29 | ||
30 | #include "ver.h" | 30 | #include "ver.h" |
31 | 31 | ||
32 | #define OCFS2_BUILD_VERSION "1.3.3" | 32 | #define OCFS2_BUILD_VERSION "1.5.0" |
33 | 33 | ||
34 | #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION | 34 | #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION |
35 | 35 | ||
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c deleted file mode 100644 index c05358538f2b..000000000000 --- a/fs/ocfs2/vote.c +++ /dev/null | |||
@@ -1,756 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * vote.c | ||
5 | * | ||
6 | * description here | ||
7 | * | ||
8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/slab.h> | ||
28 | #include <linux/highmem.h> | ||
29 | #include <linux/kthread.h> | ||
30 | |||
31 | #include <cluster/heartbeat.h> | ||
32 | #include <cluster/nodemanager.h> | ||
33 | #include <cluster/tcp.h> | ||
34 | |||
35 | #include <dlm/dlmapi.h> | ||
36 | |||
37 | #define MLOG_MASK_PREFIX ML_VOTE | ||
38 | #include <cluster/masklog.h> | ||
39 | |||
40 | #include "ocfs2.h" | ||
41 | |||
42 | #include "alloc.h" | ||
43 | #include "dlmglue.h" | ||
44 | #include "extent_map.h" | ||
45 | #include "heartbeat.h" | ||
46 | #include "inode.h" | ||
47 | #include "journal.h" | ||
48 | #include "slot_map.h" | ||
49 | #include "vote.h" | ||
50 | |||
51 | #include "buffer_head_io.h" | ||
52 | |||
53 | #define OCFS2_MESSAGE_TYPE_VOTE (0x1) | ||
54 | #define OCFS2_MESSAGE_TYPE_RESPONSE (0x2) | ||
55 | struct ocfs2_msg_hdr | ||
56 | { | ||
57 | __be32 h_response_id; /* used to lookup message handle on sending | ||
58 | * node. */ | ||
59 | __be32 h_request; | ||
60 | __be64 h_blkno; | ||
61 | __be32 h_generation; | ||
62 | __be32 h_node_num; /* node sending this particular message. */ | ||
63 | }; | ||
64 | |||
65 | struct ocfs2_vote_msg | ||
66 | { | ||
67 | struct ocfs2_msg_hdr v_hdr; | ||
68 | __be32 v_reserved1; | ||
69 | } __attribute__ ((packed)); | ||
70 | |||
71 | /* Responses are given these values to maintain backwards | ||
72 | * compatibility with older ocfs2 versions */ | ||
73 | #define OCFS2_RESPONSE_OK (0) | ||
74 | #define OCFS2_RESPONSE_BUSY (-16) | ||
75 | #define OCFS2_RESPONSE_BAD_MSG (-22) | ||
76 | |||
77 | struct ocfs2_response_msg | ||
78 | { | ||
79 | struct ocfs2_msg_hdr r_hdr; | ||
80 | __be32 r_response; | ||
81 | } __attribute__ ((packed)); | ||
82 | |||
83 | struct ocfs2_vote_work { | ||
84 | struct list_head w_list; | ||
85 | struct ocfs2_vote_msg w_msg; | ||
86 | }; | ||
87 | |||
88 | enum ocfs2_vote_request { | ||
89 | OCFS2_VOTE_REQ_INVALID = 0, | ||
90 | OCFS2_VOTE_REQ_MOUNT, | ||
91 | OCFS2_VOTE_REQ_UMOUNT, | ||
92 | OCFS2_VOTE_REQ_LAST | ||
93 | }; | ||
94 | |||
95 | static inline int ocfs2_is_valid_vote_request(int request) | ||
96 | { | ||
97 | return OCFS2_VOTE_REQ_INVALID < request && | ||
98 | request < OCFS2_VOTE_REQ_LAST; | ||
99 | } | ||
100 | |||
101 | typedef void (*ocfs2_net_response_callback)(void *priv, | ||
102 | struct ocfs2_response_msg *resp); | ||
103 | struct ocfs2_net_response_cb { | ||
104 | ocfs2_net_response_callback rc_cb; | ||
105 | void *rc_priv; | ||
106 | }; | ||
107 | |||
108 | struct ocfs2_net_wait_ctxt { | ||
109 | struct list_head n_list; | ||
110 | u32 n_response_id; | ||
111 | wait_queue_head_t n_event; | ||
112 | struct ocfs2_node_map n_node_map; | ||
113 | int n_response; /* an agreggate response. 0 if | ||
114 | * all nodes are go, < 0 on any | ||
115 | * negative response from any | ||
116 | * node or network error. */ | ||
117 | struct ocfs2_net_response_cb *n_callback; | ||
118 | }; | ||
119 | |||
120 | static void ocfs2_process_mount_request(struct ocfs2_super *osb, | ||
121 | unsigned int node_num) | ||
122 | { | ||
123 | mlog(0, "MOUNT vote from node %u\n", node_num); | ||
124 | /* The other node only sends us this message when he has an EX | ||
125 | * on the superblock, so our recovery threads (if having been | ||
126 | * launched) are waiting on it.*/ | ||
127 | ocfs2_recovery_map_clear(osb, node_num); | ||
128 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, node_num); | ||
129 | |||
130 | /* We clear the umount map here because a node may have been | ||
131 | * previously mounted, safely unmounted but never stopped | ||
132 | * heartbeating - in which case we'd have a stale entry. */ | ||
133 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
134 | } | ||
135 | |||
136 | static void ocfs2_process_umount_request(struct ocfs2_super *osb, | ||
137 | unsigned int node_num) | ||
138 | { | ||
139 | mlog(0, "UMOUNT vote from node %u\n", node_num); | ||
140 | ocfs2_node_map_clear_bit(osb, &osb->mounted_map, node_num); | ||
141 | ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num); | ||
142 | } | ||
143 | |||
144 | static void ocfs2_process_vote(struct ocfs2_super *osb, | ||
145 | struct ocfs2_vote_msg *msg) | ||
146 | { | ||
147 | int net_status, vote_response; | ||
148 | unsigned int node_num; | ||
149 | u64 blkno; | ||
150 | enum ocfs2_vote_request request; | ||
151 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; | ||
152 | struct ocfs2_response_msg response; | ||
153 | |||
154 | /* decode the network mumbo jumbo into local variables. */ | ||
155 | request = be32_to_cpu(hdr->h_request); | ||
156 | blkno = be64_to_cpu(hdr->h_blkno); | ||
157 | node_num = be32_to_cpu(hdr->h_node_num); | ||
158 | |||
159 | mlog(0, "processing vote: request = %u, blkno = %llu, node_num = %u\n", | ||
160 | request, (unsigned long long)blkno, node_num); | ||
161 | |||
162 | if (!ocfs2_is_valid_vote_request(request)) { | ||
163 | mlog(ML_ERROR, "Invalid vote request %d from node %u\n", | ||
164 | request, node_num); | ||
165 | vote_response = OCFS2_RESPONSE_BAD_MSG; | ||
166 | goto respond; | ||
167 | } | ||
168 | |||
169 | vote_response = OCFS2_RESPONSE_OK; | ||
170 | |||
171 | switch (request) { | ||
172 | case OCFS2_VOTE_REQ_UMOUNT: | ||
173 | ocfs2_process_umount_request(osb, node_num); | ||
174 | goto respond; | ||
175 | case OCFS2_VOTE_REQ_MOUNT: | ||
176 | ocfs2_process_mount_request(osb, node_num); | ||
177 | goto respond; | ||
178 | default: | ||
179 | /* avoids a gcc warning */ | ||
180 | break; | ||
181 | } | ||
182 | |||
183 | respond: | ||
184 | /* Response struture is small so we just put it on the stack | ||
185 | * and stuff it inline. */ | ||
186 | memset(&response, 0, sizeof(struct ocfs2_response_msg)); | ||
187 | response.r_hdr.h_response_id = hdr->h_response_id; | ||
188 | response.r_hdr.h_blkno = hdr->h_blkno; | ||
189 | response.r_hdr.h_generation = hdr->h_generation; | ||
190 | response.r_hdr.h_node_num = cpu_to_be32(osb->node_num); | ||
191 | response.r_response = cpu_to_be32(vote_response); | ||
192 | |||
193 | net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
194 | osb->net_key, | ||
195 | &response, | ||
196 | sizeof(struct ocfs2_response_msg), | ||
197 | node_num, | ||
198 | NULL); | ||
199 | /* We still want to error print for ENOPROTOOPT here. The | ||
200 | * sending node shouldn't have unregistered his net handler | ||
201 | * without sending an unmount vote 1st */ | ||
202 | if (net_status < 0 | ||
203 | && net_status != -ETIMEDOUT | ||
204 | && net_status != -ENOTCONN) | ||
205 | mlog(ML_ERROR, "message to node %u fails with error %d!\n", | ||
206 | node_num, net_status); | ||
207 | } | ||
208 | |||
209 | static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb) | ||
210 | { | ||
211 | unsigned long processed; | ||
212 | struct ocfs2_lock_res *lockres; | ||
213 | struct ocfs2_vote_work *work; | ||
214 | |||
215 | mlog_entry_void(); | ||
216 | |||
217 | spin_lock(&osb->vote_task_lock); | ||
218 | /* grab this early so we know to try again if a state change and | ||
219 | * wake happens part-way through our work */ | ||
220 | osb->vote_work_sequence = osb->vote_wake_sequence; | ||
221 | |||
222 | processed = osb->blocked_lock_count; | ||
223 | while (processed) { | ||
224 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
225 | |||
226 | lockres = list_entry(osb->blocked_lock_list.next, | ||
227 | struct ocfs2_lock_res, l_blocked_list); | ||
228 | list_del_init(&lockres->l_blocked_list); | ||
229 | osb->blocked_lock_count--; | ||
230 | spin_unlock(&osb->vote_task_lock); | ||
231 | |||
232 | BUG_ON(!processed); | ||
233 | processed--; | ||
234 | |||
235 | ocfs2_process_blocked_lock(osb, lockres); | ||
236 | |||
237 | spin_lock(&osb->vote_task_lock); | ||
238 | } | ||
239 | |||
240 | while (osb->vote_count) { | ||
241 | BUG_ON(list_empty(&osb->vote_list)); | ||
242 | work = list_entry(osb->vote_list.next, | ||
243 | struct ocfs2_vote_work, w_list); | ||
244 | list_del(&work->w_list); | ||
245 | osb->vote_count--; | ||
246 | spin_unlock(&osb->vote_task_lock); | ||
247 | |||
248 | ocfs2_process_vote(osb, &work->w_msg); | ||
249 | kfree(work); | ||
250 | |||
251 | spin_lock(&osb->vote_task_lock); | ||
252 | } | ||
253 | spin_unlock(&osb->vote_task_lock); | ||
254 | |||
255 | mlog_exit_void(); | ||
256 | } | ||
257 | |||
258 | static int ocfs2_vote_thread_lists_empty(struct ocfs2_super *osb) | ||
259 | { | ||
260 | int empty = 0; | ||
261 | |||
262 | spin_lock(&osb->vote_task_lock); | ||
263 | if (list_empty(&osb->blocked_lock_list) && | ||
264 | list_empty(&osb->vote_list)) | ||
265 | empty = 1; | ||
266 | |||
267 | spin_unlock(&osb->vote_task_lock); | ||
268 | return empty; | ||
269 | } | ||
270 | |||
271 | static int ocfs2_vote_thread_should_wake(struct ocfs2_super *osb) | ||
272 | { | ||
273 | int should_wake = 0; | ||
274 | |||
275 | spin_lock(&osb->vote_task_lock); | ||
276 | if (osb->vote_work_sequence != osb->vote_wake_sequence) | ||
277 | should_wake = 1; | ||
278 | spin_unlock(&osb->vote_task_lock); | ||
279 | |||
280 | return should_wake; | ||
281 | } | ||
282 | |||
283 | int ocfs2_vote_thread(void *arg) | ||
284 | { | ||
285 | int status = 0; | ||
286 | struct ocfs2_super *osb = arg; | ||
287 | |||
288 | /* only quit once we've been asked to stop and there is no more | ||
289 | * work available */ | ||
290 | while (!(kthread_should_stop() && | ||
291 | ocfs2_vote_thread_lists_empty(osb))) { | ||
292 | |||
293 | wait_event_interruptible(osb->vote_event, | ||
294 | ocfs2_vote_thread_should_wake(osb) || | ||
295 | kthread_should_stop()); | ||
296 | |||
297 | mlog(0, "vote_thread: awoken\n"); | ||
298 | |||
299 | ocfs2_vote_thread_do_work(osb); | ||
300 | } | ||
301 | |||
302 | osb->vote_task = NULL; | ||
303 | return status; | ||
304 | } | ||
305 | |||
306 | static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response_id) | ||
307 | { | ||
308 | struct ocfs2_net_wait_ctxt *w; | ||
309 | |||
310 | w = kzalloc(sizeof(*w), GFP_NOFS); | ||
311 | if (!w) { | ||
312 | mlog_errno(-ENOMEM); | ||
313 | goto bail; | ||
314 | } | ||
315 | |||
316 | INIT_LIST_HEAD(&w->n_list); | ||
317 | init_waitqueue_head(&w->n_event); | ||
318 | ocfs2_node_map_init(&w->n_node_map); | ||
319 | w->n_response_id = response_id; | ||
320 | w->n_callback = NULL; | ||
321 | bail: | ||
322 | return w; | ||
323 | } | ||
324 | |||
325 | static unsigned int ocfs2_new_response_id(struct ocfs2_super *osb) | ||
326 | { | ||
327 | unsigned int ret; | ||
328 | |||
329 | spin_lock(&osb->net_response_lock); | ||
330 | ret = ++osb->net_response_ids; | ||
331 | spin_unlock(&osb->net_response_lock); | ||
332 | |||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static void ocfs2_dequeue_net_wait_ctxt(struct ocfs2_super *osb, | ||
337 | struct ocfs2_net_wait_ctxt *w) | ||
338 | { | ||
339 | spin_lock(&osb->net_response_lock); | ||
340 | list_del(&w->n_list); | ||
341 | spin_unlock(&osb->net_response_lock); | ||
342 | } | ||
343 | |||
344 | static void ocfs2_queue_net_wait_ctxt(struct ocfs2_super *osb, | ||
345 | struct ocfs2_net_wait_ctxt *w) | ||
346 | { | ||
347 | spin_lock(&osb->net_response_lock); | ||
348 | list_add_tail(&w->n_list, | ||
349 | &osb->net_response_list); | ||
350 | spin_unlock(&osb->net_response_lock); | ||
351 | } | ||
352 | |||
353 | static void __ocfs2_mark_node_responded(struct ocfs2_super *osb, | ||
354 | struct ocfs2_net_wait_ctxt *w, | ||
355 | int node_num) | ||
356 | { | ||
357 | assert_spin_locked(&osb->net_response_lock); | ||
358 | |||
359 | ocfs2_node_map_clear_bit(osb, &w->n_node_map, node_num); | ||
360 | if (ocfs2_node_map_is_empty(osb, &w->n_node_map)) | ||
361 | wake_up(&w->n_event); | ||
362 | } | ||
363 | |||
364 | /* Intended to be called from the node down callback, we fake remove | ||
365 | * the node from all our response contexts */ | ||
366 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
367 | int node_num) | ||
368 | { | ||
369 | struct list_head *p; | ||
370 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
371 | |||
372 | spin_lock(&osb->net_response_lock); | ||
373 | |||
374 | list_for_each(p, &osb->net_response_list) { | ||
375 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
376 | |||
377 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
378 | } | ||
379 | |||
380 | spin_unlock(&osb->net_response_lock); | ||
381 | } | ||
382 | |||
383 | static int ocfs2_broadcast_vote(struct ocfs2_super *osb, | ||
384 | struct ocfs2_vote_msg *request, | ||
385 | unsigned int response_id, | ||
386 | int *response, | ||
387 | struct ocfs2_net_response_cb *callback) | ||
388 | { | ||
389 | int status, i, remote_err; | ||
390 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
391 | int dequeued = 0; | ||
392 | |||
393 | mlog_entry_void(); | ||
394 | |||
395 | w = ocfs2_new_net_wait_ctxt(response_id); | ||
396 | if (!w) { | ||
397 | status = -ENOMEM; | ||
398 | mlog_errno(status); | ||
399 | goto bail; | ||
400 | } | ||
401 | w->n_callback = callback; | ||
402 | |||
403 | /* we're pretty much ready to go at this point, and this fills | ||
404 | * in n_response which we need anyway... */ | ||
405 | ocfs2_queue_net_wait_ctxt(osb, w); | ||
406 | |||
407 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, 0); | ||
408 | |||
409 | while (i != O2NM_INVALID_NODE_NUM) { | ||
410 | if (i != osb->node_num) { | ||
411 | mlog(0, "trying to send request to node %i\n", i); | ||
412 | ocfs2_node_map_set_bit(osb, &w->n_node_map, i); | ||
413 | |||
414 | remote_err = 0; | ||
415 | status = o2net_send_message(OCFS2_MESSAGE_TYPE_VOTE, | ||
416 | osb->net_key, | ||
417 | request, | ||
418 | sizeof(*request), | ||
419 | i, | ||
420 | &remote_err); | ||
421 | if (status == -ETIMEDOUT) { | ||
422 | mlog(0, "remote node %d timed out!\n", i); | ||
423 | status = -EAGAIN; | ||
424 | goto bail; | ||
425 | } | ||
426 | if (remote_err < 0) { | ||
427 | status = remote_err; | ||
428 | mlog(0, "remote error %d on node %d!\n", | ||
429 | remote_err, i); | ||
430 | mlog_errno(status); | ||
431 | goto bail; | ||
432 | } | ||
433 | if (status < 0) { | ||
434 | mlog_errno(status); | ||
435 | goto bail; | ||
436 | } | ||
437 | } | ||
438 | i++; | ||
439 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, i); | ||
440 | mlog(0, "next is %d, i am %d\n", i, osb->node_num); | ||
441 | } | ||
442 | mlog(0, "done sending, now waiting on responses...\n"); | ||
443 | |||
444 | wait_event(w->n_event, ocfs2_node_map_is_empty(osb, &w->n_node_map)); | ||
445 | |||
446 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
447 | dequeued = 1; | ||
448 | |||
449 | *response = w->n_response; | ||
450 | status = 0; | ||
451 | bail: | ||
452 | if (w) { | ||
453 | if (!dequeued) | ||
454 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
455 | kfree(w); | ||
456 | } | ||
457 | |||
458 | mlog_exit(status); | ||
459 | return status; | ||
460 | } | ||
461 | |||
462 | static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, | ||
463 | u64 blkno, | ||
464 | unsigned int generation, | ||
465 | enum ocfs2_vote_request type) | ||
466 | { | ||
467 | struct ocfs2_vote_msg *request; | ||
468 | struct ocfs2_msg_hdr *hdr; | ||
469 | |||
470 | BUG_ON(!ocfs2_is_valid_vote_request(type)); | ||
471 | |||
472 | request = kzalloc(sizeof(*request), GFP_NOFS); | ||
473 | if (!request) { | ||
474 | mlog_errno(-ENOMEM); | ||
475 | } else { | ||
476 | hdr = &request->v_hdr; | ||
477 | hdr->h_node_num = cpu_to_be32(osb->node_num); | ||
478 | hdr->h_request = cpu_to_be32(type); | ||
479 | hdr->h_blkno = cpu_to_be64(blkno); | ||
480 | hdr->h_generation = cpu_to_be32(generation); | ||
481 | } | ||
482 | |||
483 | return request; | ||
484 | } | ||
485 | |||
486 | /* Complete the buildup of a new vote request and process the | ||
487 | * broadcast return value. */ | ||
488 | static int ocfs2_do_request_vote(struct ocfs2_super *osb, | ||
489 | struct ocfs2_vote_msg *request, | ||
490 | struct ocfs2_net_response_cb *callback) | ||
491 | { | ||
492 | int status, response = -EBUSY; | ||
493 | unsigned int response_id; | ||
494 | struct ocfs2_msg_hdr *hdr; | ||
495 | |||
496 | response_id = ocfs2_new_response_id(osb); | ||
497 | |||
498 | hdr = &request->v_hdr; | ||
499 | hdr->h_response_id = cpu_to_be32(response_id); | ||
500 | |||
501 | status = ocfs2_broadcast_vote(osb, request, response_id, &response, | ||
502 | callback); | ||
503 | if (status < 0) { | ||
504 | mlog_errno(status); | ||
505 | goto bail; | ||
506 | } | ||
507 | |||
508 | status = response; | ||
509 | bail: | ||
510 | |||
511 | return status; | ||
512 | } | ||
513 | |||
514 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) | ||
515 | { | ||
516 | int status; | ||
517 | struct ocfs2_vote_msg *request = NULL; | ||
518 | |||
519 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_MOUNT); | ||
520 | if (!request) { | ||
521 | status = -ENOMEM; | ||
522 | goto bail; | ||
523 | } | ||
524 | |||
525 | status = -EAGAIN; | ||
526 | while (status == -EAGAIN) { | ||
527 | if (!(osb->s_mount_opt & OCFS2_MOUNT_NOINTR) && | ||
528 | signal_pending(current)) { | ||
529 | status = -ERESTARTSYS; | ||
530 | goto bail; | ||
531 | } | ||
532 | |||
533 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
534 | osb->node_num)) { | ||
535 | status = 0; | ||
536 | goto bail; | ||
537 | } | ||
538 | |||
539 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
540 | } | ||
541 | |||
542 | bail: | ||
543 | kfree(request); | ||
544 | return status; | ||
545 | } | ||
546 | |||
547 | int ocfs2_request_umount_vote(struct ocfs2_super *osb) | ||
548 | { | ||
549 | int status; | ||
550 | struct ocfs2_vote_msg *request = NULL; | ||
551 | |||
552 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_UMOUNT); | ||
553 | if (!request) { | ||
554 | status = -ENOMEM; | ||
555 | goto bail; | ||
556 | } | ||
557 | |||
558 | status = -EAGAIN; | ||
559 | while (status == -EAGAIN) { | ||
560 | /* Do not check signals on this vote... We really want | ||
561 | * this one to go all the way through. */ | ||
562 | |||
563 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
564 | osb->node_num)) { | ||
565 | status = 0; | ||
566 | goto bail; | ||
567 | } | ||
568 | |||
569 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
570 | } | ||
571 | |||
572 | bail: | ||
573 | kfree(request); | ||
574 | return status; | ||
575 | } | ||
576 | |||
577 | /* TODO: This should eventually be a hash table! */ | ||
578 | static struct ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(struct ocfs2_super *osb, | ||
579 | u32 response_id) | ||
580 | { | ||
581 | struct list_head *p; | ||
582 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
583 | |||
584 | list_for_each(p, &osb->net_response_list) { | ||
585 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
586 | if (response_id == w->n_response_id) | ||
587 | break; | ||
588 | w = NULL; | ||
589 | } | ||
590 | |||
591 | return w; | ||
592 | } | ||
593 | |||
594 | /* Translate response codes into local node errno values */ | ||
595 | static inline int ocfs2_translate_response(int response) | ||
596 | { | ||
597 | int ret; | ||
598 | |||
599 | switch (response) { | ||
600 | case OCFS2_RESPONSE_OK: | ||
601 | ret = 0; | ||
602 | break; | ||
603 | |||
604 | case OCFS2_RESPONSE_BUSY: | ||
605 | ret = -EBUSY; | ||
606 | break; | ||
607 | |||
608 | default: | ||
609 | ret = -EINVAL; | ||
610 | } | ||
611 | |||
612 | return ret; | ||
613 | } | ||
614 | |||
615 | static int ocfs2_handle_response_message(struct o2net_msg *msg, | ||
616 | u32 len, | ||
617 | void *data, void **ret_data) | ||
618 | { | ||
619 | unsigned int response_id, node_num; | ||
620 | int response_status; | ||
621 | struct ocfs2_super *osb = data; | ||
622 | struct ocfs2_response_msg *resp; | ||
623 | struct ocfs2_net_wait_ctxt * w; | ||
624 | struct ocfs2_net_response_cb *resp_cb; | ||
625 | |||
626 | resp = (struct ocfs2_response_msg *) msg->buf; | ||
627 | |||
628 | response_id = be32_to_cpu(resp->r_hdr.h_response_id); | ||
629 | node_num = be32_to_cpu(resp->r_hdr.h_node_num); | ||
630 | response_status = | ||
631 | ocfs2_translate_response(be32_to_cpu(resp->r_response)); | ||
632 | |||
633 | mlog(0, "received response message:\n"); | ||
634 | mlog(0, "h_response_id = %u\n", response_id); | ||
635 | mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request)); | ||
636 | mlog(0, "h_blkno = %llu\n", | ||
637 | (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno)); | ||
638 | mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation)); | ||
639 | mlog(0, "h_node_num = %u\n", node_num); | ||
640 | mlog(0, "r_response = %d\n", response_status); | ||
641 | |||
642 | spin_lock(&osb->net_response_lock); | ||
643 | w = __ocfs2_find_net_wait_ctxt(osb, response_id); | ||
644 | if (!w) { | ||
645 | mlog(0, "request not found!\n"); | ||
646 | goto bail; | ||
647 | } | ||
648 | resp_cb = w->n_callback; | ||
649 | |||
650 | if (response_status && (!w->n_response)) { | ||
651 | /* we only really need one negative response so don't | ||
652 | * set it twice. */ | ||
653 | w->n_response = response_status; | ||
654 | } | ||
655 | |||
656 | if (resp_cb) { | ||
657 | spin_unlock(&osb->net_response_lock); | ||
658 | |||
659 | resp_cb->rc_cb(resp_cb->rc_priv, resp); | ||
660 | |||
661 | spin_lock(&osb->net_response_lock); | ||
662 | } | ||
663 | |||
664 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
665 | bail: | ||
666 | spin_unlock(&osb->net_response_lock); | ||
667 | |||
668 | return 0; | ||
669 | } | ||
670 | |||
671 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, | ||
672 | u32 len, | ||
673 | void *data, void **ret_data) | ||
674 | { | ||
675 | int status; | ||
676 | struct ocfs2_super *osb = data; | ||
677 | struct ocfs2_vote_work *work; | ||
678 | |||
679 | work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); | ||
680 | if (!work) { | ||
681 | status = -ENOMEM; | ||
682 | mlog_errno(status); | ||
683 | goto bail; | ||
684 | } | ||
685 | |||
686 | INIT_LIST_HEAD(&work->w_list); | ||
687 | memcpy(&work->w_msg, msg->buf, sizeof(struct ocfs2_vote_msg)); | ||
688 | |||
689 | mlog(0, "scheduling vote request:\n"); | ||
690 | mlog(0, "h_response_id = %u\n", | ||
691 | be32_to_cpu(work->w_msg.v_hdr.h_response_id)); | ||
692 | mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request)); | ||
693 | mlog(0, "h_blkno = %llu\n", | ||
694 | (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno)); | ||
695 | mlog(0, "h_generation = %u\n", | ||
696 | be32_to_cpu(work->w_msg.v_hdr.h_generation)); | ||
697 | mlog(0, "h_node_num = %u\n", | ||
698 | be32_to_cpu(work->w_msg.v_hdr.h_node_num)); | ||
699 | |||
700 | spin_lock(&osb->vote_task_lock); | ||
701 | list_add_tail(&work->w_list, &osb->vote_list); | ||
702 | osb->vote_count++; | ||
703 | spin_unlock(&osb->vote_task_lock); | ||
704 | |||
705 | ocfs2_kick_vote_thread(osb); | ||
706 | |||
707 | status = 0; | ||
708 | bail: | ||
709 | return status; | ||
710 | } | ||
711 | |||
712 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb) | ||
713 | { | ||
714 | if (!osb->net_key) | ||
715 | return; | ||
716 | |||
717 | o2net_unregister_handler_list(&osb->osb_net_handlers); | ||
718 | |||
719 | if (!list_empty(&osb->net_response_list)) | ||
720 | mlog(ML_ERROR, "net response list not empty!\n"); | ||
721 | |||
722 | osb->net_key = 0; | ||
723 | } | ||
724 | |||
725 | int ocfs2_register_net_handlers(struct ocfs2_super *osb) | ||
726 | { | ||
727 | int status = 0; | ||
728 | |||
729 | if (ocfs2_mount_local(osb)) | ||
730 | return 0; | ||
731 | |||
732 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
733 | osb->net_key, | ||
734 | sizeof(struct ocfs2_response_msg), | ||
735 | ocfs2_handle_response_message, | ||
736 | osb, NULL, &osb->osb_net_handlers); | ||
737 | if (status) { | ||
738 | mlog_errno(status); | ||
739 | goto bail; | ||
740 | } | ||
741 | |||
742 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_VOTE, | ||
743 | osb->net_key, | ||
744 | sizeof(struct ocfs2_vote_msg), | ||
745 | ocfs2_handle_vote_message, | ||
746 | osb, NULL, &osb->osb_net_handlers); | ||
747 | if (status) { | ||
748 | mlog_errno(status); | ||
749 | goto bail; | ||
750 | } | ||
751 | bail: | ||
752 | if (status < 0) | ||
753 | ocfs2_unregister_net_handlers(osb); | ||
754 | |||
755 | return status; | ||
756 | } | ||
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index d88173840082..6b7ff1618945 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -131,7 +131,7 @@ static void property_stop(struct seq_file *f, void *v) | |||
131 | /* Nothing to do */ | 131 | /* Nothing to do */ |
132 | } | 132 | } |
133 | 133 | ||
134 | static struct seq_operations property_op = { | 134 | static const struct seq_operations property_op = { |
135 | .start = property_start, | 135 | .start = property_start, |
136 | .next = property_next, | 136 | .next = property_next, |
137 | .stop = property_stop, | 137 | .stop = property_stop, |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 722e12e5acc7..739da701ae7b 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -195,96 +195,45 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
195 | return ERR_PTR(res); | 195 | return ERR_PTR(res); |
196 | } | 196 | } |
197 | 197 | ||
198 | /* | 198 | static ssize_t part_start_show(struct device *dev, |
199 | * sysfs bindings for partitions | 199 | struct device_attribute *attr, char *buf) |
200 | */ | ||
201 | |||
202 | struct part_attribute { | ||
203 | struct attribute attr; | ||
204 | ssize_t (*show)(struct hd_struct *,char *); | ||
205 | ssize_t (*store)(struct hd_struct *,const char *, size_t); | ||
206 | }; | ||
207 | |||
208 | static ssize_t | ||
209 | part_attr_show(struct kobject * kobj, struct attribute * attr, char * page) | ||
210 | { | 200 | { |
211 | struct hd_struct * p = container_of(kobj,struct hd_struct,kobj); | 201 | struct hd_struct *p = dev_to_part(dev); |
212 | struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr); | ||
213 | ssize_t ret = 0; | ||
214 | if (part_attr->show) | ||
215 | ret = part_attr->show(p, page); | ||
216 | return ret; | ||
217 | } | ||
218 | static ssize_t | ||
219 | part_attr_store(struct kobject * kobj, struct attribute * attr, | ||
220 | const char *page, size_t count) | ||
221 | { | ||
222 | struct hd_struct * p = container_of(kobj,struct hd_struct,kobj); | ||
223 | struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr); | ||
224 | ssize_t ret = 0; | ||
225 | 202 | ||
226 | if (part_attr->store) | 203 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); |
227 | ret = part_attr->store(p, page, count); | ||
228 | return ret; | ||
229 | } | 204 | } |
230 | 205 | ||
231 | static struct sysfs_ops part_sysfs_ops = { | 206 | static ssize_t part_size_show(struct device *dev, |
232 | .show = part_attr_show, | 207 | struct device_attribute *attr, char *buf) |
233 | .store = part_attr_store, | ||
234 | }; | ||
235 | |||
236 | static ssize_t part_uevent_store(struct hd_struct * p, | ||
237 | const char *page, size_t count) | ||
238 | { | 208 | { |
239 | kobject_uevent(&p->kobj, KOBJ_ADD); | 209 | struct hd_struct *p = dev_to_part(dev); |
240 | return count; | 210 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
241 | } | 211 | } |
242 | static ssize_t part_dev_read(struct hd_struct * p, char *page) | 212 | |
243 | { | 213 | static ssize_t part_stat_show(struct device *dev, |
244 | struct gendisk *disk = container_of(p->kobj.parent,struct gendisk,kobj); | 214 | struct device_attribute *attr, char *buf) |
245 | dev_t dev = MKDEV(disk->major, disk->first_minor + p->partno); | ||
246 | return print_dev_t(page, dev); | ||
247 | } | ||
248 | static ssize_t part_start_read(struct hd_struct * p, char *page) | ||
249 | { | ||
250 | return sprintf(page, "%llu\n",(unsigned long long)p->start_sect); | ||
251 | } | ||
252 | static ssize_t part_size_read(struct hd_struct * p, char *page) | ||
253 | { | ||
254 | return sprintf(page, "%llu\n",(unsigned long long)p->nr_sects); | ||
255 | } | ||
256 | static ssize_t part_stat_read(struct hd_struct * p, char *page) | ||
257 | { | 215 | { |
258 | return sprintf(page, "%8u %8llu %8u %8llu\n", | 216 | struct hd_struct *p = dev_to_part(dev); |
217 | |||
218 | return sprintf(buf, "%8u %8llu %8u %8llu\n", | ||
259 | p->ios[0], (unsigned long long)p->sectors[0], | 219 | p->ios[0], (unsigned long long)p->sectors[0], |
260 | p->ios[1], (unsigned long long)p->sectors[1]); | 220 | p->ios[1], (unsigned long long)p->sectors[1]); |
261 | } | 221 | } |
262 | static struct part_attribute part_attr_uevent = { | ||
263 | .attr = {.name = "uevent", .mode = S_IWUSR }, | ||
264 | .store = part_uevent_store | ||
265 | }; | ||
266 | static struct part_attribute part_attr_dev = { | ||
267 | .attr = {.name = "dev", .mode = S_IRUGO }, | ||
268 | .show = part_dev_read | ||
269 | }; | ||
270 | static struct part_attribute part_attr_start = { | ||
271 | .attr = {.name = "start", .mode = S_IRUGO }, | ||
272 | .show = part_start_read | ||
273 | }; | ||
274 | static struct part_attribute part_attr_size = { | ||
275 | .attr = {.name = "size", .mode = S_IRUGO }, | ||
276 | .show = part_size_read | ||
277 | }; | ||
278 | static struct part_attribute part_attr_stat = { | ||
279 | .attr = {.name = "stat", .mode = S_IRUGO }, | ||
280 | .show = part_stat_read | ||
281 | }; | ||
282 | 222 | ||
283 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 223 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
224 | static ssize_t part_fail_show(struct device *dev, | ||
225 | struct device_attribute *attr, char *buf) | ||
226 | { | ||
227 | struct hd_struct *p = dev_to_part(dev); | ||
284 | 228 | ||
285 | static ssize_t part_fail_store(struct hd_struct * p, | 229 | return sprintf(buf, "%d\n", p->make_it_fail); |
230 | } | ||
231 | |||
232 | static ssize_t part_fail_store(struct device *dev, | ||
233 | struct device_attribute *attr, | ||
286 | const char *buf, size_t count) | 234 | const char *buf, size_t count) |
287 | { | 235 | { |
236 | struct hd_struct *p = dev_to_part(dev); | ||
288 | int i; | 237 | int i; |
289 | 238 | ||
290 | if (count > 0 && sscanf(buf, "%d", &i) > 0) | 239 | if (count > 0 && sscanf(buf, "%d", &i) > 0) |
@@ -292,50 +241,53 @@ static ssize_t part_fail_store(struct hd_struct * p, | |||
292 | 241 | ||
293 | return count; | 242 | return count; |
294 | } | 243 | } |
295 | static ssize_t part_fail_read(struct hd_struct * p, char *page) | 244 | #endif |
296 | { | ||
297 | return sprintf(page, "%d\n", p->make_it_fail); | ||
298 | } | ||
299 | static struct part_attribute part_attr_fail = { | ||
300 | .attr = {.name = "make-it-fail", .mode = S_IRUGO | S_IWUSR }, | ||
301 | .store = part_fail_store, | ||
302 | .show = part_fail_read | ||
303 | }; | ||
304 | 245 | ||
246 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); | ||
247 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | ||
248 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | ||
249 | #ifdef CONFIG_FAIL_MAKE_REQUEST | ||
250 | static struct device_attribute dev_attr_fail = | ||
251 | __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); | ||
305 | #endif | 252 | #endif |
306 | 253 | ||
307 | static struct attribute * default_attrs[] = { | 254 | static struct attribute *part_attrs[] = { |
308 | &part_attr_uevent.attr, | 255 | &dev_attr_start.attr, |
309 | &part_attr_dev.attr, | 256 | &dev_attr_size.attr, |
310 | &part_attr_start.attr, | 257 | &dev_attr_stat.attr, |
311 | &part_attr_size.attr, | ||
312 | &part_attr_stat.attr, | ||
313 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 258 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
314 | &part_attr_fail.attr, | 259 | &dev_attr_fail.attr, |
315 | #endif | 260 | #endif |
316 | NULL, | 261 | NULL |
317 | }; | 262 | }; |
318 | 263 | ||
319 | extern struct kset block_subsys; | 264 | static struct attribute_group part_attr_group = { |
265 | .attrs = part_attrs, | ||
266 | }; | ||
320 | 267 | ||
321 | static void part_release(struct kobject *kobj) | 268 | static struct attribute_group *part_attr_groups[] = { |
269 | &part_attr_group, | ||
270 | NULL | ||
271 | }; | ||
272 | |||
273 | static void part_release(struct device *dev) | ||
322 | { | 274 | { |
323 | struct hd_struct * p = container_of(kobj,struct hd_struct,kobj); | 275 | struct hd_struct *p = dev_to_part(dev); |
324 | kfree(p); | 276 | kfree(p); |
325 | } | 277 | } |
326 | 278 | ||
327 | struct kobj_type ktype_part = { | 279 | struct device_type part_type = { |
280 | .name = "partition", | ||
281 | .groups = part_attr_groups, | ||
328 | .release = part_release, | 282 | .release = part_release, |
329 | .default_attrs = default_attrs, | ||
330 | .sysfs_ops = &part_sysfs_ops, | ||
331 | }; | 283 | }; |
332 | 284 | ||
333 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) | 285 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) |
334 | { | 286 | { |
335 | struct kobject *k; | 287 | struct kobject *k; |
336 | 288 | ||
337 | k = kobject_get(&p->kobj); | 289 | k = kobject_get(&p->dev.kobj); |
338 | p->holder_dir = kobject_add_dir(k, "holders"); | 290 | p->holder_dir = kobject_create_and_add("holders", k); |
339 | kobject_put(k); | 291 | kobject_put(k); |
340 | } | 292 | } |
341 | 293 | ||
@@ -343,15 +295,16 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | |||
343 | { | 295 | { |
344 | struct kobject *k; | 296 | struct kobject *k; |
345 | 297 | ||
346 | k = kobject_get(&disk->kobj); | 298 | k = kobject_get(&disk->dev.kobj); |
347 | disk->holder_dir = kobject_add_dir(k, "holders"); | 299 | disk->holder_dir = kobject_create_and_add("holders", k); |
348 | disk->slave_dir = kobject_add_dir(k, "slaves"); | 300 | disk->slave_dir = kobject_create_and_add("slaves", k); |
349 | kobject_put(k); | 301 | kobject_put(k); |
350 | } | 302 | } |
351 | 303 | ||
352 | void delete_partition(struct gendisk *disk, int part) | 304 | void delete_partition(struct gendisk *disk, int part) |
353 | { | 305 | { |
354 | struct hd_struct *p = disk->part[part-1]; | 306 | struct hd_struct *p = disk->part[part-1]; |
307 | |||
355 | if (!p) | 308 | if (!p) |
356 | return; | 309 | return; |
357 | if (!p->nr_sects) | 310 | if (!p->nr_sects) |
@@ -361,113 +314,55 @@ void delete_partition(struct gendisk *disk, int part) | |||
361 | p->nr_sects = 0; | 314 | p->nr_sects = 0; |
362 | p->ios[0] = p->ios[1] = 0; | 315 | p->ios[0] = p->ios[1] = 0; |
363 | p->sectors[0] = p->sectors[1] = 0; | 316 | p->sectors[0] = p->sectors[1] = 0; |
364 | sysfs_remove_link(&p->kobj, "subsystem"); | 317 | kobject_put(p->holder_dir); |
365 | kobject_unregister(p->holder_dir); | 318 | device_del(&p->dev); |
366 | kobject_uevent(&p->kobj, KOBJ_REMOVE); | 319 | put_device(&p->dev); |
367 | kobject_del(&p->kobj); | ||
368 | kobject_put(&p->kobj); | ||
369 | } | 320 | } |
370 | 321 | ||
371 | void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) | 322 | void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) |
372 | { | 323 | { |
373 | struct hd_struct *p; | 324 | struct hd_struct *p; |
325 | int err; | ||
374 | 326 | ||
375 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 327 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
376 | if (!p) | 328 | if (!p) |
377 | return; | 329 | return; |
378 | 330 | ||
379 | p->start_sect = start; | 331 | p->start_sect = start; |
380 | p->nr_sects = len; | 332 | p->nr_sects = len; |
381 | p->partno = part; | 333 | p->partno = part; |
382 | p->policy = disk->policy; | 334 | p->policy = disk->policy; |
383 | 335 | ||
384 | if (isdigit(disk->kobj.k_name[strlen(disk->kobj.k_name)-1])) | 336 | if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) |
385 | kobject_set_name(&p->kobj, "%sp%d", | 337 | snprintf(p->dev.bus_id, BUS_ID_SIZE, |
386 | kobject_name(&disk->kobj), part); | 338 | "%sp%d", disk->dev.bus_id, part); |
387 | else | 339 | else |
388 | kobject_set_name(&p->kobj, "%s%d", | 340 | snprintf(p->dev.bus_id, BUS_ID_SIZE, |
389 | kobject_name(&disk->kobj),part); | 341 | "%s%d", disk->dev.bus_id, part); |
390 | p->kobj.parent = &disk->kobj; | 342 | |
391 | p->kobj.ktype = &ktype_part; | 343 | device_initialize(&p->dev); |
392 | kobject_init(&p->kobj); | 344 | p->dev.devt = MKDEV(disk->major, disk->first_minor + part); |
393 | kobject_add(&p->kobj); | 345 | p->dev.class = &block_class; |
394 | if (!disk->part_uevent_suppress) | 346 | p->dev.type = &part_type; |
395 | kobject_uevent(&p->kobj, KOBJ_ADD); | 347 | p->dev.parent = &disk->dev; |
396 | sysfs_create_link(&p->kobj, &block_subsys.kobj, "subsystem"); | 348 | disk->part[part-1] = p; |
349 | |||
350 | /* delay uevent until 'holders' subdir is created */ | ||
351 | p->dev.uevent_suppress = 1; | ||
352 | device_add(&p->dev); | ||
353 | partition_sysfs_add_subdir(p); | ||
354 | p->dev.uevent_suppress = 0; | ||
397 | if (flags & ADDPART_FLAG_WHOLEDISK) { | 355 | if (flags & ADDPART_FLAG_WHOLEDISK) { |
398 | static struct attribute addpartattr = { | 356 | static struct attribute addpartattr = { |
399 | .name = "whole_disk", | 357 | .name = "whole_disk", |
400 | .mode = S_IRUSR | S_IRGRP | S_IROTH, | 358 | .mode = S_IRUSR | S_IRGRP | S_IROTH, |
401 | }; | 359 | }; |
402 | 360 | err = sysfs_create_file(&p->dev.kobj, &addpartattr); | |
403 | sysfs_create_file(&p->kobj, &addpartattr); | ||
404 | } | 361 | } |
405 | partition_sysfs_add_subdir(p); | ||
406 | disk->part[part-1] = p; | ||
407 | } | ||
408 | 362 | ||
409 | static char *make_block_name(struct gendisk *disk) | 363 | /* suppress uevent if the disk supresses it */ |
410 | { | 364 | if (!disk->dev.uevent_suppress) |
411 | char *name; | 365 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); |
412 | static char *block_str = "block:"; | ||
413 | int size; | ||
414 | char *s; | ||
415 | |||
416 | size = strlen(block_str) + strlen(disk->disk_name) + 1; | ||
417 | name = kmalloc(size, GFP_KERNEL); | ||
418 | if (!name) | ||
419 | return NULL; | ||
420 | strcpy(name, block_str); | ||
421 | strcat(name, disk->disk_name); | ||
422 | /* ewww... some of these buggers have / in name... */ | ||
423 | s = strchr(name, '/'); | ||
424 | if (s) | ||
425 | *s = '!'; | ||
426 | return name; | ||
427 | } | ||
428 | |||
429 | static int disk_sysfs_symlinks(struct gendisk *disk) | ||
430 | { | ||
431 | struct device *target = get_device(disk->driverfs_dev); | ||
432 | int err; | ||
433 | char *disk_name = NULL; | ||
434 | |||
435 | if (target) { | ||
436 | disk_name = make_block_name(disk); | ||
437 | if (!disk_name) { | ||
438 | err = -ENOMEM; | ||
439 | goto err_out; | ||
440 | } | ||
441 | |||
442 | err = sysfs_create_link(&disk->kobj, &target->kobj, "device"); | ||
443 | if (err) | ||
444 | goto err_out_disk_name; | ||
445 | |||
446 | err = sysfs_create_link(&target->kobj, &disk->kobj, disk_name); | ||
447 | if (err) | ||
448 | goto err_out_dev_link; | ||
449 | } | ||
450 | |||
451 | err = sysfs_create_link(&disk->kobj, &block_subsys.kobj, | ||
452 | "subsystem"); | ||
453 | if (err) | ||
454 | goto err_out_disk_name_lnk; | ||
455 | |||
456 | kfree(disk_name); | ||
457 | |||
458 | return 0; | ||
459 | |||
460 | err_out_disk_name_lnk: | ||
461 | if (target) { | ||
462 | sysfs_remove_link(&target->kobj, disk_name); | ||
463 | err_out_dev_link: | ||
464 | sysfs_remove_link(&disk->kobj, "device"); | ||
465 | err_out_disk_name: | ||
466 | kfree(disk_name); | ||
467 | err_out: | ||
468 | put_device(target); | ||
469 | } | ||
470 | return err; | ||
471 | } | 366 | } |
472 | 367 | ||
473 | /* Not exported, helper to add_disk(). */ | 368 | /* Not exported, helper to add_disk(). */ |
@@ -479,19 +374,29 @@ void register_disk(struct gendisk *disk) | |||
479 | struct hd_struct *p; | 374 | struct hd_struct *p; |
480 | int err; | 375 | int err; |
481 | 376 | ||
482 | kobject_set_name(&disk->kobj, "%s", disk->disk_name); | 377 | disk->dev.parent = disk->driverfs_dev; |
483 | /* ewww... some of these buggers have / in name... */ | 378 | disk->dev.devt = MKDEV(disk->major, disk->first_minor); |
484 | s = strchr(disk->kobj.k_name, '/'); | 379 | |
380 | strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN); | ||
381 | /* ewww... some of these buggers have / in the name... */ | ||
382 | s = strchr(disk->dev.bus_id, '/'); | ||
485 | if (s) | 383 | if (s) |
486 | *s = '!'; | 384 | *s = '!'; |
487 | if ((err = kobject_add(&disk->kobj))) | 385 | |
386 | /* delay uevents, until we scanned partition table */ | ||
387 | disk->dev.uevent_suppress = 1; | ||
388 | |||
389 | if (device_add(&disk->dev)) | ||
488 | return; | 390 | return; |
489 | err = disk_sysfs_symlinks(disk); | 391 | #ifndef CONFIG_SYSFS_DEPRECATED |
392 | err = sysfs_create_link(block_depr, &disk->dev.kobj, | ||
393 | kobject_name(&disk->dev.kobj)); | ||
490 | if (err) { | 394 | if (err) { |
491 | kobject_del(&disk->kobj); | 395 | device_del(&disk->dev); |
492 | return; | 396 | return; |
493 | } | 397 | } |
494 | disk_sysfs_add_subdirs(disk); | 398 | #endif |
399 | disk_sysfs_add_subdirs(disk); | ||
495 | 400 | ||
496 | /* No minors to use for partitions */ | 401 | /* No minors to use for partitions */ |
497 | if (disk->minors == 1) | 402 | if (disk->minors == 1) |
@@ -505,25 +410,23 @@ void register_disk(struct gendisk *disk) | |||
505 | if (!bdev) | 410 | if (!bdev) |
506 | goto exit; | 411 | goto exit; |
507 | 412 | ||
508 | /* scan partition table, but suppress uevents */ | ||
509 | bdev->bd_invalidated = 1; | 413 | bdev->bd_invalidated = 1; |
510 | disk->part_uevent_suppress = 1; | ||
511 | err = blkdev_get(bdev, FMODE_READ, 0); | 414 | err = blkdev_get(bdev, FMODE_READ, 0); |
512 | disk->part_uevent_suppress = 0; | ||
513 | if (err < 0) | 415 | if (err < 0) |
514 | goto exit; | 416 | goto exit; |
515 | blkdev_put(bdev); | 417 | blkdev_put(bdev); |
516 | 418 | ||
517 | exit: | 419 | exit: |
518 | /* announce disk after possible partitions are already created */ | 420 | /* announce disk after possible partitions are created */ |
519 | kobject_uevent(&disk->kobj, KOBJ_ADD); | 421 | disk->dev.uevent_suppress = 0; |
422 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | ||
520 | 423 | ||
521 | /* announce possible partitions */ | 424 | /* announce possible partitions */ |
522 | for (i = 1; i < disk->minors; i++) { | 425 | for (i = 1; i < disk->minors; i++) { |
523 | p = disk->part[i-1]; | 426 | p = disk->part[i-1]; |
524 | if (!p || !p->nr_sects) | 427 | if (!p || !p->nr_sects) |
525 | continue; | 428 | continue; |
526 | kobject_uevent(&p->kobj, KOBJ_ADD); | 429 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); |
527 | } | 430 | } |
528 | } | 431 | } |
529 | 432 | ||
@@ -602,19 +505,11 @@ void del_gendisk(struct gendisk *disk) | |||
602 | disk_stat_set_all(disk, 0); | 505 | disk_stat_set_all(disk, 0); |
603 | disk->stamp = 0; | 506 | disk->stamp = 0; |
604 | 507 | ||
605 | kobject_uevent(&disk->kobj, KOBJ_REMOVE); | 508 | kobject_put(disk->holder_dir); |
606 | kobject_unregister(disk->holder_dir); | 509 | kobject_put(disk->slave_dir); |
607 | kobject_unregister(disk->slave_dir); | 510 | disk->driverfs_dev = NULL; |
608 | if (disk->driverfs_dev) { | 511 | #ifndef CONFIG_SYSFS_DEPRECATED |
609 | char *disk_name = make_block_name(disk); | 512 | sysfs_remove_link(block_depr, disk->dev.bus_id); |
610 | sysfs_remove_link(&disk->kobj, "device"); | 513 | #endif |
611 | if (disk_name) { | 514 | device_del(&disk->dev); |
612 | sysfs_remove_link(&disk->driverfs_dev->kobj, disk_name); | ||
613 | kfree(disk_name); | ||
614 | } | ||
615 | put_device(disk->driverfs_dev); | ||
616 | disk->driverfs_dev = NULL; | ||
617 | } | ||
618 | sysfs_remove_link(&disk->kobj, "subsystem"); | ||
619 | kobject_del(&disk->kobj); | ||
620 | } | 515 | } |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 65c62e1bfd6f..eb97f2897e2b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -169,7 +169,7 @@ static inline char *task_state(struct task_struct *p, char *buffer) | |||
169 | ppid = pid_alive(p) ? | 169 | ppid = pid_alive(p) ? |
170 | task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; | 170 | task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; |
171 | tpid = pid_alive(p) && p->ptrace ? | 171 | tpid = pid_alive(p) && p->ptrace ? |
172 | task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0; | 172 | task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; |
173 | buffer += sprintf(buffer, | 173 | buffer += sprintf(buffer, |
174 | "State:\t%s\n" | 174 | "State:\t%s\n" |
175 | "Tgid:\t%d\n" | 175 | "Tgid:\t%d\n" |
@@ -464,8 +464,8 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) | |||
464 | } | 464 | } |
465 | 465 | ||
466 | sid = task_session_nr_ns(task, ns); | 466 | sid = task_session_nr_ns(task, ns); |
467 | ppid = task_tgid_nr_ns(task->real_parent, ns); | ||
467 | pgid = task_pgrp_nr_ns(task, ns); | 468 | pgid = task_pgrp_nr_ns(task, ns); |
468 | ppid = task_ppid_nr_ns(task, ns); | ||
469 | 469 | ||
470 | unlock_task_sighand(task, &flags); | 470 | unlock_task_sighand(task, &flags); |
471 | } | 471 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 7411bfb0b7cc..91fa8e6ce8ad 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -310,6 +310,77 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer) | |||
310 | } | 310 | } |
311 | #endif | 311 | #endif |
312 | 312 | ||
313 | #ifdef CONFIG_LATENCYTOP | ||
314 | static int lstats_show_proc(struct seq_file *m, void *v) | ||
315 | { | ||
316 | int i; | ||
317 | struct task_struct *task = m->private; | ||
318 | seq_puts(m, "Latency Top version : v0.1\n"); | ||
319 | |||
320 | for (i = 0; i < 32; i++) { | ||
321 | if (task->latency_record[i].backtrace[0]) { | ||
322 | int q; | ||
323 | seq_printf(m, "%i %li %li ", | ||
324 | task->latency_record[i].count, | ||
325 | task->latency_record[i].time, | ||
326 | task->latency_record[i].max); | ||
327 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { | ||
328 | char sym[KSYM_NAME_LEN]; | ||
329 | char *c; | ||
330 | if (!task->latency_record[i].backtrace[q]) | ||
331 | break; | ||
332 | if (task->latency_record[i].backtrace[q] == ULONG_MAX) | ||
333 | break; | ||
334 | sprint_symbol(sym, task->latency_record[i].backtrace[q]); | ||
335 | c = strchr(sym, '+'); | ||
336 | if (c) | ||
337 | *c = 0; | ||
338 | seq_printf(m, "%s ", sym); | ||
339 | } | ||
340 | seq_printf(m, "\n"); | ||
341 | } | ||
342 | |||
343 | } | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static int lstats_open(struct inode *inode, struct file *file) | ||
348 | { | ||
349 | int ret; | ||
350 | struct seq_file *m; | ||
351 | struct task_struct *task = get_proc_task(inode); | ||
352 | |||
353 | ret = single_open(file, lstats_show_proc, NULL); | ||
354 | if (!ret) { | ||
355 | m = file->private_data; | ||
356 | m->private = task; | ||
357 | } | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | static ssize_t lstats_write(struct file *file, const char __user *buf, | ||
362 | size_t count, loff_t *offs) | ||
363 | { | ||
364 | struct seq_file *m; | ||
365 | struct task_struct *task; | ||
366 | |||
367 | m = file->private_data; | ||
368 | task = m->private; | ||
369 | clear_all_latency_tracing(task); | ||
370 | |||
371 | return count; | ||
372 | } | ||
373 | |||
374 | static const struct file_operations proc_lstats_operations = { | ||
375 | .open = lstats_open, | ||
376 | .read = seq_read, | ||
377 | .write = lstats_write, | ||
378 | .llseek = seq_lseek, | ||
379 | .release = single_release, | ||
380 | }; | ||
381 | |||
382 | #endif | ||
383 | |||
313 | /* The badness from the OOM killer */ | 384 | /* The badness from the OOM killer */ |
314 | unsigned long badness(struct task_struct *p, unsigned long uptime); | 385 | unsigned long badness(struct task_struct *p, unsigned long uptime); |
315 | static int proc_oom_score(struct task_struct *task, char *buffer) | 386 | static int proc_oom_score(struct task_struct *task, char *buffer) |
@@ -1020,6 +1091,7 @@ static const struct file_operations proc_fault_inject_operations = { | |||
1020 | }; | 1091 | }; |
1021 | #endif | 1092 | #endif |
1022 | 1093 | ||
1094 | |||
1023 | #ifdef CONFIG_SCHED_DEBUG | 1095 | #ifdef CONFIG_SCHED_DEBUG |
1024 | /* | 1096 | /* |
1025 | * Print out various scheduling related per-task fields: | 1097 | * Print out various scheduling related per-task fields: |
@@ -2230,6 +2302,9 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2230 | #ifdef CONFIG_SCHEDSTATS | 2302 | #ifdef CONFIG_SCHEDSTATS |
2231 | INF("schedstat", S_IRUGO, pid_schedstat), | 2303 | INF("schedstat", S_IRUGO, pid_schedstat), |
2232 | #endif | 2304 | #endif |
2305 | #ifdef CONFIG_LATENCYTOP | ||
2306 | REG("latency", S_IRUGO, lstats), | ||
2307 | #endif | ||
2233 | #ifdef CONFIG_PROC_PID_CPUSET | 2308 | #ifdef CONFIG_PROC_PID_CPUSET |
2234 | REG("cpuset", S_IRUGO, cpuset), | 2309 | REG("cpuset", S_IRUGO, cpuset), |
2235 | #endif | 2310 | #endif |
@@ -2555,6 +2630,9 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2555 | #ifdef CONFIG_SCHEDSTATS | 2630 | #ifdef CONFIG_SCHEDSTATS |
2556 | INF("schedstat", S_IRUGO, pid_schedstat), | 2631 | INF("schedstat", S_IRUGO, pid_schedstat), |
2557 | #endif | 2632 | #endif |
2633 | #ifdef CONFIG_LATENCYTOP | ||
2634 | REG("latency", S_IRUGO, lstats), | ||
2635 | #endif | ||
2558 | #ifdef CONFIG_PROC_PID_CPUSET | 2636 | #ifdef CONFIG_PROC_PID_CPUSET |
2559 | REG("cpuset", S_IRUGO, cpuset), | 2637 | REG("cpuset", S_IRUGO, cpuset), |
2560 | #endif | 2638 | #endif |
diff --git a/fs/read_write.c b/fs/read_write.c index ea1f94cc722e..c4d3d17923f1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -197,25 +197,27 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count | |||
197 | { | 197 | { |
198 | struct inode *inode; | 198 | struct inode *inode; |
199 | loff_t pos; | 199 | loff_t pos; |
200 | int retval = -EINVAL; | ||
200 | 201 | ||
201 | inode = file->f_path.dentry->d_inode; | 202 | inode = file->f_path.dentry->d_inode; |
202 | if (unlikely((ssize_t) count < 0)) | 203 | if (unlikely((ssize_t) count < 0)) |
203 | goto Einval; | 204 | return retval; |
204 | pos = *ppos; | 205 | pos = *ppos; |
205 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) | 206 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) |
206 | goto Einval; | 207 | return retval; |
207 | 208 | ||
208 | if (unlikely(inode->i_flock && mandatory_lock(inode))) { | 209 | if (unlikely(inode->i_flock && mandatory_lock(inode))) { |
209 | int retval = locks_mandatory_area( | 210 | retval = locks_mandatory_area( |
210 | read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, | 211 | read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, |
211 | inode, file, pos, count); | 212 | inode, file, pos, count); |
212 | if (retval < 0) | 213 | if (retval < 0) |
213 | return retval; | 214 | return retval; |
214 | } | 215 | } |
216 | retval = security_file_permission(file, | ||
217 | read_write == READ ? MAY_READ : MAY_WRITE); | ||
218 | if (retval) | ||
219 | return retval; | ||
215 | return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; | 220 | return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; |
216 | |||
217 | Einval: | ||
218 | return -EINVAL; | ||
219 | } | 221 | } |
220 | 222 | ||
221 | static void wait_on_retry_sync_kiocb(struct kiocb *iocb) | 223 | static void wait_on_retry_sync_kiocb(struct kiocb *iocb) |
@@ -267,18 +269,15 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) | |||
267 | ret = rw_verify_area(READ, file, pos, count); | 269 | ret = rw_verify_area(READ, file, pos, count); |
268 | if (ret >= 0) { | 270 | if (ret >= 0) { |
269 | count = ret; | 271 | count = ret; |
270 | ret = security_file_permission (file, MAY_READ); | 272 | if (file->f_op->read) |
271 | if (!ret) { | 273 | ret = file->f_op->read(file, buf, count, pos); |
272 | if (file->f_op->read) | 274 | else |
273 | ret = file->f_op->read(file, buf, count, pos); | 275 | ret = do_sync_read(file, buf, count, pos); |
274 | else | 276 | if (ret > 0) { |
275 | ret = do_sync_read(file, buf, count, pos); | 277 | fsnotify_access(file->f_path.dentry); |
276 | if (ret > 0) { | 278 | add_rchar(current, ret); |
277 | fsnotify_access(file->f_path.dentry); | ||
278 | add_rchar(current, ret); | ||
279 | } | ||
280 | inc_syscr(current); | ||
281 | } | 279 | } |
280 | inc_syscr(current); | ||
282 | } | 281 | } |
283 | 282 | ||
284 | return ret; | 283 | return ret; |
@@ -325,18 +324,15 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ | |||
325 | ret = rw_verify_area(WRITE, file, pos, count); | 324 | ret = rw_verify_area(WRITE, file, pos, count); |
326 | if (ret >= 0) { | 325 | if (ret >= 0) { |
327 | count = ret; | 326 | count = ret; |
328 | ret = security_file_permission (file, MAY_WRITE); | 327 | if (file->f_op->write) |
329 | if (!ret) { | 328 | ret = file->f_op->write(file, buf, count, pos); |
330 | if (file->f_op->write) | 329 | else |
331 | ret = file->f_op->write(file, buf, count, pos); | 330 | ret = do_sync_write(file, buf, count, pos); |
332 | else | 331 | if (ret > 0) { |
333 | ret = do_sync_write(file, buf, count, pos); | 332 | fsnotify_modify(file->f_path.dentry); |
334 | if (ret > 0) { | 333 | add_wchar(current, ret); |
335 | fsnotify_modify(file->f_path.dentry); | ||
336 | add_wchar(current, ret); | ||
337 | } | ||
338 | inc_syscw(current); | ||
339 | } | 334 | } |
335 | inc_syscw(current); | ||
340 | } | 336 | } |
341 | 337 | ||
342 | return ret; | 338 | return ret; |
@@ -603,9 +599,6 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
603 | ret = rw_verify_area(type, file, pos, tot_len); | 599 | ret = rw_verify_area(type, file, pos, tot_len); |
604 | if (ret < 0) | 600 | if (ret < 0) |
605 | goto out; | 601 | goto out; |
606 | ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); | ||
607 | if (ret) | ||
608 | goto out; | ||
609 | 602 | ||
610 | fnv = NULL; | 603 | fnv = NULL; |
611 | if (type == READ) { | 604 | if (type == READ) { |
@@ -737,10 +730,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
737 | goto fput_in; | 730 | goto fput_in; |
738 | count = retval; | 731 | count = retval; |
739 | 732 | ||
740 | retval = security_file_permission (in_file, MAY_READ); | ||
741 | if (retval) | ||
742 | goto fput_in; | ||
743 | |||
744 | /* | 733 | /* |
745 | * Get output file, and verify that it is ok.. | 734 | * Get output file, and verify that it is ok.. |
746 | */ | 735 | */ |
@@ -759,10 +748,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
759 | goto fput_out; | 748 | goto fput_out; |
760 | count = retval; | 749 | count = retval; |
761 | 750 | ||
762 | retval = security_file_permission (out_file, MAY_WRITE); | ||
763 | if (retval) | ||
764 | goto fput_out; | ||
765 | |||
766 | if (!max) | 751 | if (!max) |
767 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); | 752 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); |
768 | 753 | ||
diff --git a/fs/splice.c b/fs/splice.c index 6bdcb6107bc3..56b802bfbfa4 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -908,10 +908,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
908 | if (unlikely(ret < 0)) | 908 | if (unlikely(ret < 0)) |
909 | return ret; | 909 | return ret; |
910 | 910 | ||
911 | ret = security_file_permission(out, MAY_WRITE); | ||
912 | if (unlikely(ret < 0)) | ||
913 | return ret; | ||
914 | |||
915 | return out->f_op->splice_write(pipe, out, ppos, len, flags); | 911 | return out->f_op->splice_write(pipe, out, ppos, len, flags); |
916 | } | 912 | } |
917 | 913 | ||
@@ -934,10 +930,6 @@ static long do_splice_to(struct file *in, loff_t *ppos, | |||
934 | if (unlikely(ret < 0)) | 930 | if (unlikely(ret < 0)) |
935 | return ret; | 931 | return ret; |
936 | 932 | ||
937 | ret = security_file_permission(in, MAY_READ); | ||
938 | if (unlikely(ret < 0)) | ||
939 | return ret; | ||
940 | |||
941 | return in->f_op->splice_read(in, ppos, pipe, len, flags); | 933 | return in->f_op->splice_read(in, ppos, pipe, len, flags); |
942 | } | 934 | } |
943 | 935 | ||
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 337162935d21..4948d9bc405d 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -440,7 +440,7 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) | |||
440 | /** | 440 | /** |
441 | * sysfs_remove_one - remove sysfs_dirent from parent | 441 | * sysfs_remove_one - remove sysfs_dirent from parent |
442 | * @acxt: addrm context to use | 442 | * @acxt: addrm context to use |
443 | * @sd: sysfs_dirent to be added | 443 | * @sd: sysfs_dirent to be removed |
444 | * | 444 | * |
445 | * Mark @sd removed and drop nlink of parent inode if @sd is a | 445 | * Mark @sd removed and drop nlink of parent inode if @sd is a |
446 | * directory. @sd is unlinked from the children list. | 446 | * directory. @sd is unlinked from the children list. |
@@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, | |||
678 | sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); | 678 | sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); |
679 | 679 | ||
680 | /* no such entry */ | 680 | /* no such entry */ |
681 | if (!sd) | 681 | if (!sd) { |
682 | ret = ERR_PTR(-ENOENT); | ||
682 | goto out_unlock; | 683 | goto out_unlock; |
684 | } | ||
683 | 685 | ||
684 | /* attach dentry and inode */ | 686 | /* attach dentry and inode */ |
685 | inode = sysfs_get_inode(sd); | 687 | inode = sysfs_get_inode(sd); |
@@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) | |||
781 | old_dentry = sysfs_get_dentry(sd); | 783 | old_dentry = sysfs_get_dentry(sd); |
782 | if (IS_ERR(old_dentry)) { | 784 | if (IS_ERR(old_dentry)) { |
783 | error = PTR_ERR(old_dentry); | 785 | error = PTR_ERR(old_dentry); |
786 | old_dentry = NULL; | ||
784 | goto out; | 787 | goto out; |
785 | } | 788 | } |
786 | 789 | ||
@@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) | |||
848 | old_dentry = sysfs_get_dentry(sd); | 851 | old_dentry = sysfs_get_dentry(sd); |
849 | if (IS_ERR(old_dentry)) { | 852 | if (IS_ERR(old_dentry)) { |
850 | error = PTR_ERR(old_dentry); | 853 | error = PTR_ERR(old_dentry); |
854 | old_dentry = NULL; | ||
851 | goto out; | 855 | goto out; |
852 | } | 856 | } |
853 | old_parent = old_dentry->d_parent; | 857 | old_parent = old_dentry->d_parent; |
@@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) | |||
855 | new_parent = sysfs_get_dentry(new_parent_sd); | 859 | new_parent = sysfs_get_dentry(new_parent_sd); |
856 | if (IS_ERR(new_parent)) { | 860 | if (IS_ERR(new_parent)) { |
857 | error = PTR_ERR(new_parent); | 861 | error = PTR_ERR(new_parent); |
862 | new_parent = NULL; | ||
858 | goto out; | 863 | goto out; |
859 | } | 864 | } |
860 | 865 | ||
@@ -878,7 +883,6 @@ again: | |||
878 | error = 0; | 883 | error = 0; |
879 | d_add(new_dentry, NULL); | 884 | d_add(new_dentry, NULL); |
880 | d_move(old_dentry, new_dentry); | 885 | d_move(old_dentry, new_dentry); |
881 | dput(new_dentry); | ||
882 | 886 | ||
883 | /* Remove from old parent's list and insert into new parent's list. */ | 887 | /* Remove from old parent's list and insert into new parent's list. */ |
884 | sysfs_unlink_sibling(sd); | 888 | sysfs_unlink_sibling(sd); |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b834f1709f9f..a271c87c4472 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -20,43 +20,6 @@ | |||
20 | 20 | ||
21 | #include "sysfs.h" | 21 | #include "sysfs.h" |
22 | 22 | ||
23 | #define to_sattr(a) container_of(a,struct subsys_attribute, attr) | ||
24 | |||
25 | /* | ||
26 | * Subsystem file operations. | ||
27 | * These operations allow subsystems to have files that can be | ||
28 | * read/written. | ||
29 | */ | ||
30 | static ssize_t | ||
31 | subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page) | ||
32 | { | ||
33 | struct kset *kset = to_kset(kobj); | ||
34 | struct subsys_attribute * sattr = to_sattr(attr); | ||
35 | ssize_t ret = -EIO; | ||
36 | |||
37 | if (sattr->show) | ||
38 | ret = sattr->show(kset, page); | ||
39 | return ret; | ||
40 | } | ||
41 | |||
42 | static ssize_t | ||
43 | subsys_attr_store(struct kobject * kobj, struct attribute * attr, | ||
44 | const char * page, size_t count) | ||
45 | { | ||
46 | struct kset *kset = to_kset(kobj); | ||
47 | struct subsys_attribute * sattr = to_sattr(attr); | ||
48 | ssize_t ret = -EIO; | ||
49 | |||
50 | if (sattr->store) | ||
51 | ret = sattr->store(kset, page, count); | ||
52 | return ret; | ||
53 | } | ||
54 | |||
55 | static struct sysfs_ops subsys_sysfs_ops = { | ||
56 | .show = subsys_attr_show, | ||
57 | .store = subsys_attr_store, | ||
58 | }; | ||
59 | |||
60 | /* | 23 | /* |
61 | * There's one sysfs_buffer for each open file and one | 24 | * There's one sysfs_buffer for each open file and one |
62 | * sysfs_open_dirent for each sysfs_dirent with one or more open | 25 | * sysfs_open_dirent for each sysfs_dirent with one or more open |
@@ -66,7 +29,7 @@ static struct sysfs_ops subsys_sysfs_ops = { | |||
66 | * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open | 29 | * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open |
67 | * is protected by sysfs_open_dirent_lock. | 30 | * is protected by sysfs_open_dirent_lock. |
68 | */ | 31 | */ |
69 | static spinlock_t sysfs_open_dirent_lock = SPIN_LOCK_UNLOCKED; | 32 | static DEFINE_SPINLOCK(sysfs_open_dirent_lock); |
70 | 33 | ||
71 | struct sysfs_open_dirent { | 34 | struct sysfs_open_dirent { |
72 | atomic_t refcnt; | 35 | atomic_t refcnt; |
@@ -354,31 +317,23 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
354 | { | 317 | { |
355 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 318 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
356 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; | 319 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; |
357 | struct sysfs_buffer * buffer; | 320 | struct sysfs_buffer *buffer; |
358 | struct sysfs_ops * ops = NULL; | 321 | struct sysfs_ops *ops; |
359 | int error; | 322 | int error = -EACCES; |
360 | 323 | ||
361 | /* need attr_sd for attr and ops, its parent for kobj */ | 324 | /* need attr_sd for attr and ops, its parent for kobj */ |
362 | if (!sysfs_get_active_two(attr_sd)) | 325 | if (!sysfs_get_active_two(attr_sd)) |
363 | return -ENODEV; | 326 | return -ENODEV; |
364 | 327 | ||
365 | /* if the kobject has no ktype, then we assume that it is a subsystem | 328 | /* every kobject with an attribute needs a ktype assigned */ |
366 | * itself, and use ops for it. | 329 | if (kobj->ktype && kobj->ktype->sysfs_ops) |
367 | */ | ||
368 | if (kobj->kset && kobj->kset->ktype) | ||
369 | ops = kobj->kset->ktype->sysfs_ops; | ||
370 | else if (kobj->ktype) | ||
371 | ops = kobj->ktype->sysfs_ops; | 330 | ops = kobj->ktype->sysfs_ops; |
372 | else | 331 | else { |
373 | ops = &subsys_sysfs_ops; | 332 | printk(KERN_ERR "missing sysfs attribute operations for " |
374 | 333 | "kobject: %s\n", kobject_name(kobj)); | |
375 | error = -EACCES; | 334 | WARN_ON(1); |
376 | |||
377 | /* No sysfs operations, either from having no subsystem, | ||
378 | * or the subsystem have no operations. | ||
379 | */ | ||
380 | if (!ops) | ||
381 | goto err_out; | 335 | goto err_out; |
336 | } | ||
382 | 337 | ||
383 | /* File needs write support. | 338 | /* File needs write support. |
384 | * The inode's perms must say it's ok, | 339 | * The inode's perms must say it's ok, |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 3eac20c63c41..5f66c4466151 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -19,39 +19,6 @@ | |||
19 | 19 | ||
20 | #include "sysfs.h" | 20 | #include "sysfs.h" |
21 | 21 | ||
22 | static int object_depth(struct sysfs_dirent *sd) | ||
23 | { | ||
24 | int depth = 0; | ||
25 | |||
26 | for (; sd->s_parent; sd = sd->s_parent) | ||
27 | depth++; | ||
28 | |||
29 | return depth; | ||
30 | } | ||
31 | |||
32 | static int object_path_length(struct sysfs_dirent * sd) | ||
33 | { | ||
34 | int length = 1; | ||
35 | |||
36 | for (; sd->s_parent; sd = sd->s_parent) | ||
37 | length += strlen(sd->s_name) + 1; | ||
38 | |||
39 | return length; | ||
40 | } | ||
41 | |||
42 | static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length) | ||
43 | { | ||
44 | --length; | ||
45 | for (; sd->s_parent; sd = sd->s_parent) { | ||
46 | int cur = strlen(sd->s_name); | ||
47 | |||
48 | /* back up enough to print this bus id with '/' */ | ||
49 | length -= cur; | ||
50 | strncpy(buffer + length, sd->s_name, cur); | ||
51 | *(buffer + --length) = '/'; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | /** | 22 | /** |
56 | * sysfs_create_link - create symlink between two objects. | 23 | * sysfs_create_link - create symlink between two objects. |
57 | * @kobj: object whose directory we're creating the link in. | 24 | * @kobj: object whose directory we're creating the link in. |
@@ -112,7 +79,6 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char | |||
112 | return error; | 79 | return error; |
113 | } | 80 | } |
114 | 81 | ||
115 | |||
116 | /** | 82 | /** |
117 | * sysfs_remove_link - remove symlink in object's directory. | 83 | * sysfs_remove_link - remove symlink in object's directory. |
118 | * @kobj: object we're acting for. | 84 | * @kobj: object we're acting for. |
@@ -124,24 +90,54 @@ void sysfs_remove_link(struct kobject * kobj, const char * name) | |||
124 | sysfs_hash_and_remove(kobj->sd, name); | 90 | sysfs_hash_and_remove(kobj->sd, name); |
125 | } | 91 | } |
126 | 92 | ||
127 | static int sysfs_get_target_path(struct sysfs_dirent * parent_sd, | 93 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, |
128 | struct sysfs_dirent * target_sd, char *path) | 94 | struct sysfs_dirent *target_sd, char *path) |
129 | { | 95 | { |
130 | char * s; | 96 | struct sysfs_dirent *base, *sd; |
131 | int depth, size; | 97 | char *s = path; |
98 | int len = 0; | ||
99 | |||
100 | /* go up to the root, stop at the base */ | ||
101 | base = parent_sd; | ||
102 | while (base->s_parent) { | ||
103 | sd = target_sd->s_parent; | ||
104 | while (sd->s_parent && base != sd) | ||
105 | sd = sd->s_parent; | ||
106 | |||
107 | if (base == sd) | ||
108 | break; | ||
109 | |||
110 | strcpy(s, "../"); | ||
111 | s += 3; | ||
112 | base = base->s_parent; | ||
113 | } | ||
114 | |||
115 | /* determine end of target string for reverse fillup */ | ||
116 | sd = target_sd; | ||
117 | while (sd->s_parent && sd != base) { | ||
118 | len += strlen(sd->s_name) + 1; | ||
119 | sd = sd->s_parent; | ||
120 | } | ||
132 | 121 | ||
133 | depth = object_depth(parent_sd); | 122 | /* check limits */ |
134 | size = object_path_length(target_sd) + depth * 3 - 1; | 123 | if (len < 2) |
135 | if (size > PATH_MAX) | 124 | return -EINVAL; |
125 | len--; | ||
126 | if ((s - path) + len > PATH_MAX) | ||
136 | return -ENAMETOOLONG; | 127 | return -ENAMETOOLONG; |
137 | 128 | ||
138 | pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); | 129 | /* reverse fillup of target string from target to base */ |
130 | sd = target_sd; | ||
131 | while (sd->s_parent && sd != base) { | ||
132 | int slen = strlen(sd->s_name); | ||
139 | 133 | ||
140 | for (s = path; depth--; s += 3) | 134 | len -= slen; |
141 | strcpy(s,"../"); | 135 | strncpy(s + len, sd->s_name, slen); |
136 | if (len) | ||
137 | s[--len] = '/'; | ||
142 | 138 | ||
143 | fill_object_path(target_sd, path, size); | 139 | sd = sd->s_parent; |
144 | pr_debug("%s: path = '%s'\n", __FUNCTION__, path); | 140 | } |
145 | 141 | ||
146 | return 0; | 142 | return 0; |
147 | } | 143 | } |