diff options
53 files changed, 2005 insertions, 1592 deletions
| diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index ed55238023a9..c318a8bbb1ef 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
| @@ -35,7 +35,6 @@ Features which OCFS2 does not support yet: | |||
| 35 | - Directory change notification (F_NOTIFY) | 35 | - Directory change notification (F_NOTIFY) | 
| 36 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | 36 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | 
| 37 | - POSIX ACLs | 37 | - POSIX ACLs | 
| 38 | - readpages / writepages (not user visible) | ||
| 39 | 38 | ||
| 40 | Mount options | 39 | Mount options | 
| 41 | ============= | 40 | ============= | 
| @@ -62,3 +61,18 @@ data=writeback Data ordering is not preserved, data may be written | |||
| 62 | preferred_slot=0(*) During mount, try to use this filesystem slot first. If | 61 | preferred_slot=0(*) During mount, try to use this filesystem slot first. If | 
| 63 | it is in use by another node, the first empty one found | 62 | it is in use by another node, the first empty one found | 
| 64 | will be chosen. Invalid values will be ignored. | 63 | will be chosen. Invalid values will be ignored. | 
| 64 | commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata | ||
| 65 | every 'nrsec' seconds. The default value is 5 seconds. | ||
| 66 | This means that if you lose your power, you will lose | ||
| 67 | as much as the latest 5 seconds of work (your | ||
| 68 | filesystem will not be damaged though, thanks to the | ||
| 69 | journaling). This default value (or any low value) | ||
| 70 | will hurt performance, but it's good for data-safety. | ||
| 71 | Setting it to 0 will have the same effect as leaving | ||
| 72 | it at the default (5 seconds). | ||
| 73 | Setting it to very large values will improve | ||
| 74 | performance. | ||
| 75 | localalloc=8(*) Allows custom localalloc size in MB. If the value is too | ||
| 76 | large, the fs will silently revert it to the default. | ||
| 77 | Localalloc is not enabled for local mounts. | ||
| 78 | localflocks This disables cluster aware flock. | ||
| diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index 5c7fbf9d96b4..c18363bd8d11 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt | |||
| @@ -138,6 +138,7 @@ Code Seq# Include File Comments | |||
| 138 | 'm' 00-1F net/irda/irmod.h conflict! | 138 | 'm' 00-1F net/irda/irmod.h conflict! | 
| 139 | 'n' 00-7F linux/ncp_fs.h | 139 | 'n' 00-7F linux/ncp_fs.h | 
| 140 | 'n' E0-FF video/matrox.h matroxfb | 140 | 'n' E0-FF video/matrox.h matroxfb | 
| 141 | 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 | ||
| 141 | 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) | 142 | 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) | 
| 142 | 'p' 00-3F linux/mc146818rtc.h conflict! | 143 | 'p' 00-3F linux/mc146818rtc.h conflict! | 
| 143 | 'p' 40-7F linux/nvram.h | 144 | 'p' 40-7F linux/nvram.h | 
| diff --git a/fs/Kconfig b/fs/Kconfig index b4799efaf9e8..b6df18f1f677 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -440,14 +440,8 @@ config OCFS2_FS | |||
| 440 | Tools web page: http://oss.oracle.com/projects/ocfs2-tools | 440 | Tools web page: http://oss.oracle.com/projects/ocfs2-tools | 
| 441 | OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ | 441 | OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ | 
| 442 | 442 | ||
| 443 | Note: Features which OCFS2 does not support yet: | 443 | For more information on OCFS2, see the file | 
| 444 | - extended attributes | 444 | <file:Documentation/filesystems/ocfs2.txt>. | 
| 445 | - quotas | ||
| 446 | - cluster aware flock | ||
| 447 | - Directory change notification (F_NOTIFY) | ||
| 448 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | ||
| 449 | - POSIX ACLs | ||
| 450 | - readpages / writepages (not user visible) | ||
| 451 | 445 | ||
| 452 | config OCFS2_DEBUG_MASKLOG | 446 | config OCFS2_DEBUG_MASKLOG | 
| 453 | bool "OCFS2 logging support" | 447 | bool "OCFS2 logging support" | 
| @@ -1028,8 +1022,8 @@ config HUGETLB_PAGE | |||
| 1028 | def_bool HUGETLBFS | 1022 | def_bool HUGETLBFS | 
| 1029 | 1023 | ||
| 1030 | config CONFIGFS_FS | 1024 | config CONFIGFS_FS | 
| 1031 | tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" | 1025 | tristate "Userspace-driven configuration filesystem" | 
| 1032 | depends on SYSFS && EXPERIMENTAL | 1026 | depends on SYSFS | 
| 1033 | help | 1027 | help | 
| 1034 | configfs is a ram-based filesystem that provides the converse | 1028 | configfs is a ram-based filesystem that provides the converse | 
| 1035 | of sysfs's functionality. Where sysfs is a filesystem-based | 1029 | of sysfs's functionality. Where sysfs is a filesystem-based | 
| diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 50ed691098bc..a48dc7dd8765 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
| @@ -546,7 +546,7 @@ static int populate_groups(struct config_group *group) | |||
| 546 | * That said, taking our i_mutex is closer to mkdir | 546 | * That said, taking our i_mutex is closer to mkdir | 
| 547 | * emulation, and shouldn't hurt. | 547 | * emulation, and shouldn't hurt. | 
| 548 | */ | 548 | */ | 
| 549 | mutex_lock(&dentry->d_inode->i_mutex); | 549 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); | 
| 550 | 550 | ||
| 551 | for (i = 0; group->default_groups[i]; i++) { | 551 | for (i = 0; group->default_groups[i]; i++) { | 
| 552 | new_group = group->default_groups[i]; | 552 | new_group = group->default_groups[i]; | 
| @@ -1405,7 +1405,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) | |||
| 1405 | sd = configfs_sb->s_root->d_fsdata; | 1405 | sd = configfs_sb->s_root->d_fsdata; | 
| 1406 | link_group(to_config_group(sd->s_element), group); | 1406 | link_group(to_config_group(sd->s_element), group); | 
| 1407 | 1407 | ||
| 1408 | mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); | 1408 | mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, | 
| 1409 | I_MUTEX_PARENT); | ||
| 1409 | 1410 | ||
| 1410 | name.name = group->cg_item.ci_name; | 1411 | name.name = group->cg_item.ci_name; | 
| 1411 | name.len = strlen(name.name); | 1412 | name.len = strlen(name.name); | 
| diff --git a/fs/configfs/file.c b/fs/configfs/file.c index a3658f9a082c..397cb503a180 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
| @@ -320,7 +320,7 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att | |||
| 320 | umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; | 320 | umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; | 
| 321 | int error = 0; | 321 | int error = 0; | 
| 322 | 322 | ||
| 323 | mutex_lock(&dir->d_inode->i_mutex); | 323 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL); | 
| 324 | error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); | 324 | error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); | 
| 325 | mutex_unlock(&dir->d_inode->i_mutex); | 325 | mutex_unlock(&dir->d_inode->i_mutex); | 
| 326 | 326 | ||
| diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 9fb8132f19b0..4d4ce48bb42c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
| @@ -19,16 +19,17 @@ ocfs2-objs := \ | |||
| 19 | ioctl.o \ | 19 | ioctl.o \ | 
| 20 | journal.o \ | 20 | journal.o \ | 
| 21 | localalloc.o \ | 21 | localalloc.o \ | 
| 22 | locks.o \ | ||
| 22 | mmap.o \ | 23 | mmap.o \ | 
| 23 | namei.o \ | 24 | namei.o \ | 
| 25 | resize.o \ | ||
| 24 | slot_map.o \ | 26 | slot_map.o \ | 
| 25 | suballoc.o \ | 27 | suballoc.o \ | 
| 26 | super.o \ | 28 | super.o \ | 
| 27 | symlink.o \ | 29 | symlink.o \ | 
| 28 | sysfile.o \ | 30 | sysfile.o \ | 
| 29 | uptodate.o \ | 31 | uptodate.o \ | 
| 30 | ver.o \ | 32 | ver.o | 
| 31 | vote.o | ||
| 32 | 33 | ||
| 33 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 34 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 
| 34 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 35 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 
| diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 23c8cda43f19..e6df06ac6405 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -4731,7 +4731,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
| 4731 | 4731 | ||
| 4732 | mutex_lock(&data_alloc_inode->i_mutex); | 4732 | mutex_lock(&data_alloc_inode->i_mutex); | 
| 4733 | 4733 | ||
| 4734 | status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1); | 4734 | status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1); | 
| 4735 | if (status < 0) { | 4735 | if (status < 0) { | 
| 4736 | mlog_errno(status); | 4736 | mlog_errno(status); | 
| 4737 | goto out_mutex; | 4737 | goto out_mutex; | 
| @@ -4753,7 +4753,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
| 4753 | 4753 | ||
| 4754 | out_unlock: | 4754 | out_unlock: | 
| 4755 | brelse(data_alloc_bh); | 4755 | brelse(data_alloc_bh); | 
| 4756 | ocfs2_meta_unlock(data_alloc_inode, 1); | 4756 | ocfs2_inode_unlock(data_alloc_inode, 1); | 
| 4757 | 4757 | ||
| 4758 | out_mutex: | 4758 | out_mutex: | 
| 4759 | mutex_unlock(&data_alloc_inode->i_mutex); | 4759 | mutex_unlock(&data_alloc_inode->i_mutex); | 
| @@ -5077,7 +5077,7 @@ static int ocfs2_free_cached_items(struct ocfs2_super *osb, | |||
| 5077 | 5077 | ||
| 5078 | mutex_lock(&inode->i_mutex); | 5078 | mutex_lock(&inode->i_mutex); | 
| 5079 | 5079 | ||
| 5080 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 5080 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 
| 5081 | if (ret) { | 5081 | if (ret) { | 
| 5082 | mlog_errno(ret); | 5082 | mlog_errno(ret); | 
| 5083 | goto out_mutex; | 5083 | goto out_mutex; | 
| @@ -5118,7 +5118,7 @@ out_journal: | |||
| 5118 | ocfs2_commit_trans(osb, handle); | 5118 | ocfs2_commit_trans(osb, handle); | 
| 5119 | 5119 | ||
| 5120 | out_unlock: | 5120 | out_unlock: | 
| 5121 | ocfs2_meta_unlock(inode, 1); | 5121 | ocfs2_inode_unlock(inode, 1); | 
| 5122 | brelse(di_bh); | 5122 | brelse(di_bh); | 
| 5123 | out_mutex: | 5123 | out_mutex: | 
| 5124 | mutex_unlock(&inode->i_mutex); | 5124 | mutex_unlock(&inode->i_mutex); | 
| diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 56f7790cad46..bc7b4cbbe8ec 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> | 
| 27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> | 
| 28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> | 
| 29 | #include <linux/mpage.h> | ||
| 29 | 30 | ||
| 30 | #define MLOG_MASK_PREFIX ML_FILE_IO | 31 | #define MLOG_MASK_PREFIX ML_FILE_IO | 
| 31 | #include <cluster/masklog.h> | 32 | #include <cluster/masklog.h> | 
| @@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 139 | { | 140 | { | 
| 140 | int err = 0; | 141 | int err = 0; | 
| 141 | unsigned int ext_flags; | 142 | unsigned int ext_flags; | 
| 142 | u64 p_blkno, past_eof; | 143 | u64 max_blocks = bh_result->b_size >> inode->i_blkbits; | 
| 144 | u64 p_blkno, count, past_eof; | ||
| 143 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| 144 | 146 | ||
| 145 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 147 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 
| @@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 155 | goto bail; | 157 | goto bail; | 
| 156 | } | 158 | } | 
| 157 | 159 | ||
| 158 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, | 160 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, | 
| 159 | &ext_flags); | 161 | &ext_flags); | 
| 160 | if (err) { | 162 | if (err) { | 
| 161 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 163 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 
| @@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 164 | goto bail; | 166 | goto bail; | 
| 165 | } | 167 | } | 
| 166 | 168 | ||
| 169 | if (max_blocks < count) | ||
| 170 | count = max_blocks; | ||
| 171 | |||
| 167 | /* | 172 | /* | 
| 168 | * ocfs2 never allocates in this function - the only time we | 173 | * ocfs2 never allocates in this function - the only time we | 
| 169 | * need to use BH_New is when we're extending i_size on a file | 174 | * need to use BH_New is when we're extending i_size on a file | 
| @@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 178 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 183 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 
| 179 | map_bh(bh_result, inode->i_sb, p_blkno); | 184 | map_bh(bh_result, inode->i_sb, p_blkno); | 
| 180 | 185 | ||
| 186 | bh_result->b_size = count << inode->i_blkbits; | ||
| 187 | |||
| 181 | if (!ocfs2_sparse_alloc(osb)) { | 188 | if (!ocfs2_sparse_alloc(osb)) { | 
| 182 | if (p_blkno == 0) { | 189 | if (p_blkno == 0) { | 
| 183 | err = -EIO; | 190 | err = -EIO; | 
| @@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
| 210 | struct buffer_head *di_bh) | 217 | struct buffer_head *di_bh) | 
| 211 | { | 218 | { | 
| 212 | void *kaddr; | 219 | void *kaddr; | 
| 213 | unsigned int size; | 220 | loff_t size; | 
| 214 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 221 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 
| 215 | 222 | ||
| 216 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { | 223 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { | 
| @@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
| 224 | if (size > PAGE_CACHE_SIZE || | 231 | if (size > PAGE_CACHE_SIZE || | 
| 225 | size > ocfs2_max_inline_data(inode->i_sb)) { | 232 | size > ocfs2_max_inline_data(inode->i_sb)) { | 
| 226 | ocfs2_error(inode->i_sb, | 233 | ocfs2_error(inode->i_sb, | 
| 227 | "Inode %llu has with inline data has bad size: %u", | 234 | "Inode %llu has with inline data has bad size: %Lu", | 
| 228 | (unsigned long long)OCFS2_I(inode)->ip_blkno, size); | 235 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 
| 236 | (unsigned long long)size); | ||
| 229 | return -EROFS; | 237 | return -EROFS; | 
| 230 | } | 238 | } | 
| 231 | 239 | ||
| @@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
| 275 | 283 | ||
| 276 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 284 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 
| 277 | 285 | ||
| 278 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 286 | ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); | 
| 279 | if (ret != 0) { | 287 | if (ret != 0) { | 
| 280 | if (ret == AOP_TRUNCATED_PAGE) | 288 | if (ret == AOP_TRUNCATED_PAGE) | 
| 281 | unlock = 0; | 289 | unlock = 0; | 
| @@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
| 285 | 293 | ||
| 286 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | 294 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | 
| 287 | ret = AOP_TRUNCATED_PAGE; | 295 | ret = AOP_TRUNCATED_PAGE; | 
| 288 | goto out_meta_unlock; | 296 | goto out_inode_unlock; | 
| 289 | } | 297 | } | 
| 290 | 298 | ||
| 291 | /* | 299 | /* | 
| @@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
| 305 | goto out_alloc; | 313 | goto out_alloc; | 
| 306 | } | 314 | } | 
| 307 | 315 | ||
| 308 | ret = ocfs2_data_lock_with_page(inode, 0, page); | ||
| 309 | if (ret != 0) { | ||
| 310 | if (ret == AOP_TRUNCATED_PAGE) | ||
| 311 | unlock = 0; | ||
| 312 | mlog_errno(ret); | ||
| 313 | goto out_alloc; | ||
| 314 | } | ||
| 315 | |||
| 316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 
| 317 | ret = ocfs2_readpage_inline(inode, page); | 317 | ret = ocfs2_readpage_inline(inode, page); | 
| 318 | else | 318 | else | 
| 319 | ret = block_read_full_page(page, ocfs2_get_block); | 319 | ret = block_read_full_page(page, ocfs2_get_block); | 
| 320 | unlock = 0; | 320 | unlock = 0; | 
| 321 | 321 | ||
| 322 | ocfs2_data_unlock(inode, 0); | ||
| 323 | out_alloc: | 322 | out_alloc: | 
| 324 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 323 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 
| 325 | out_meta_unlock: | 324 | out_inode_unlock: | 
| 326 | ocfs2_meta_unlock(inode, 0); | 325 | ocfs2_inode_unlock(inode, 0); | 
| 327 | out: | 326 | out: | 
| 328 | if (unlock) | 327 | if (unlock) | 
| 329 | unlock_page(page); | 328 | unlock_page(page); | 
| @@ -331,6 +330,62 @@ out: | |||
| 331 | return ret; | 330 | return ret; | 
| 332 | } | 331 | } | 
| 333 | 332 | ||
| 333 | /* | ||
| 334 | * This is used only for read-ahead. Failures or difficult to handle | ||
| 335 | * situations are safe to ignore. | ||
| 336 | * | ||
| 337 | * Right now, we don't bother with BH_Boundary - in-inode extent lists | ||
| 338 | * are quite large (243 extents on 4k blocks), so most inodes don't | ||
| 339 | * grow out to a tree. If need be, detecting boundary extents could | ||
| 340 | * trivially be added in a future version of ocfs2_get_block(). | ||
| 341 | */ | ||
| 342 | static int ocfs2_readpages(struct file *filp, struct address_space *mapping, | ||
| 343 | struct list_head *pages, unsigned nr_pages) | ||
| 344 | { | ||
| 345 | int ret, err = -EIO; | ||
| 346 | struct inode *inode = mapping->host; | ||
| 347 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 348 | loff_t start; | ||
| 349 | struct page *last; | ||
| 350 | |||
| 351 | /* | ||
| 352 | * Use the nonblocking flag for the dlm code to avoid page | ||
| 353 | * lock inversion, but don't bother with retrying. | ||
| 354 | */ | ||
| 355 | ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); | ||
| 356 | if (ret) | ||
| 357 | return err; | ||
| 358 | |||
| 359 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | ||
| 360 | ocfs2_inode_unlock(inode, 0); | ||
| 361 | return err; | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * Don't bother with inline-data. There isn't anything | ||
| 366 | * to read-ahead in that case anyway... | ||
| 367 | */ | ||
| 368 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
| 369 | goto out_unlock; | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Check whether a remote node truncated this file - we just | ||
| 373 | * drop out in that case as it's not worth handling here. | ||
| 374 | */ | ||
| 375 | last = list_entry(pages->prev, struct page, lru); | ||
| 376 | start = (loff_t)last->index << PAGE_CACHE_SHIFT; | ||
| 377 | if (start >= i_size_read(inode)) | ||
| 378 | goto out_unlock; | ||
| 379 | |||
| 380 | err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); | ||
| 381 | |||
| 382 | out_unlock: | ||
| 383 | up_read(&oi->ip_alloc_sem); | ||
| 384 | ocfs2_inode_unlock(inode, 0); | ||
| 385 | |||
| 386 | return err; | ||
| 387 | } | ||
| 388 | |||
| 334 | /* Note: Because we don't support holes, our allocation has | 389 | /* Note: Because we don't support holes, our allocation has | 
| 335 | * already happened (allocation writes zeros to the file data) | 390 | * already happened (allocation writes zeros to the file data) | 
| 336 | * so we don't have to worry about ordered writes in | 391 | * so we don't have to worry about ordered writes in | 
| @@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
| 452 | * accessed concurrently from multiple nodes. | 507 | * accessed concurrently from multiple nodes. | 
| 453 | */ | 508 | */ | 
| 454 | if (!INODE_JOURNAL(inode)) { | 509 | if (!INODE_JOURNAL(inode)) { | 
| 455 | err = ocfs2_meta_lock(inode, NULL, 0); | 510 | err = ocfs2_inode_lock(inode, NULL, 0); | 
| 456 | if (err) { | 511 | if (err) { | 
| 457 | if (err != -ENOENT) | 512 | if (err != -ENOENT) | 
| 458 | mlog_errno(err); | 513 | mlog_errno(err); | 
| @@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
| 467 | 522 | ||
| 468 | if (!INODE_JOURNAL(inode)) { | 523 | if (!INODE_JOURNAL(inode)) { | 
| 469 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 524 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 
| 470 | ocfs2_meta_unlock(inode, 0); | 525 | ocfs2_inode_unlock(inode, 0); | 
| 471 | } | 526 | } | 
| 472 | 527 | ||
| 473 | if (err) { | 528 | if (err) { | 
| @@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
| 638 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 693 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 
| 639 | return 0; | 694 | return 0; | 
| 640 | 695 | ||
| 641 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | ||
| 642 | /* | ||
| 643 | * We get PR data locks even for O_DIRECT. This | ||
| 644 | * allows concurrent O_DIRECT I/O but doesn't let | ||
| 645 | * O_DIRECT with extending and buffered zeroing writes | ||
| 646 | * race. If they did race then the buffered zeroing | ||
| 647 | * could be written back after the O_DIRECT I/O. It's | ||
| 648 | * one thing to tell people not to mix buffered and | ||
| 649 | * O_DIRECT writes, but expecting them to understand | ||
| 650 | * that file extension is also an implicit buffered | ||
| 651 | * write is too much. By getting the PR we force | ||
| 652 | * writeback of the buffered zeroing before | ||
| 653 | * proceeding. | ||
| 654 | */ | ||
| 655 | ret = ocfs2_data_lock(inode, 0); | ||
| 656 | if (ret < 0) { | ||
| 657 | mlog_errno(ret); | ||
| 658 | goto out; | ||
| 659 | } | ||
| 660 | ocfs2_data_unlock(inode, 0); | ||
| 661 | } | ||
| 662 | |||
| 663 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 696 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 
| 664 | inode->i_sb->s_bdev, iov, offset, | 697 | inode->i_sb->s_bdev, iov, offset, | 
| 665 | nr_segs, | 698 | nr_segs, | 
| 666 | ocfs2_direct_IO_get_blocks, | 699 | ocfs2_direct_IO_get_blocks, | 
| 667 | ocfs2_dio_end_io); | 700 | ocfs2_dio_end_io); | 
| 668 | out: | 701 | |
| 669 | mlog_exit(ret); | 702 | mlog_exit(ret); | 
| 670 | return ret; | 703 | return ret; | 
| 671 | } | 704 | } | 
| @@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 1754 | struct buffer_head *di_bh = NULL; | 1787 | struct buffer_head *di_bh = NULL; | 
| 1755 | struct inode *inode = mapping->host; | 1788 | struct inode *inode = mapping->host; | 
| 1756 | 1789 | ||
| 1757 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1790 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 
| 1758 | if (ret) { | 1791 | if (ret) { | 
| 1759 | mlog_errno(ret); | 1792 | mlog_errno(ret); | 
| 1760 | return ret; | 1793 | return ret; | 
| @@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 1769 | */ | 1802 | */ | 
| 1770 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1803 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 1771 | 1804 | ||
| 1772 | ret = ocfs2_data_lock(inode, 1); | ||
| 1773 | if (ret) { | ||
| 1774 | mlog_errno(ret); | ||
| 1775 | goto out_fail; | ||
| 1776 | } | ||
| 1777 | |||
| 1778 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1805 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 
| 1779 | fsdata, di_bh, NULL); | 1806 | fsdata, di_bh, NULL); | 
| 1780 | if (ret) { | 1807 | if (ret) { | 
| 1781 | mlog_errno(ret); | 1808 | mlog_errno(ret); | 
| 1782 | goto out_fail_data; | 1809 | goto out_fail; | 
| 1783 | } | 1810 | } | 
| 1784 | 1811 | ||
| 1785 | brelse(di_bh); | 1812 | brelse(di_bh); | 
| 1786 | 1813 | ||
| 1787 | return 0; | 1814 | return 0; | 
| 1788 | 1815 | ||
| 1789 | out_fail_data: | ||
| 1790 | ocfs2_data_unlock(inode, 1); | ||
| 1791 | out_fail: | 1816 | out_fail: | 
| 1792 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1817 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 1793 | 1818 | ||
| 1794 | brelse(di_bh); | 1819 | brelse(di_bh); | 
| 1795 | ocfs2_meta_unlock(inode, 1); | 1820 | ocfs2_inode_unlock(inode, 1); | 
| 1796 | 1821 | ||
| 1797 | return ret; | 1822 | return ret; | 
| 1798 | } | 1823 | } | 
| @@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
| 1908 | 1933 | ||
| 1909 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); | 1934 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); | 
| 1910 | 1935 | ||
| 1911 | ocfs2_data_unlock(inode, 1); | ||
| 1912 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1936 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 1913 | ocfs2_meta_unlock(inode, 1); | 1937 | ocfs2_inode_unlock(inode, 1); | 
| 1914 | 1938 | ||
| 1915 | return ret; | 1939 | return ret; | 
| 1916 | } | 1940 | } | 
| 1917 | 1941 | ||
| 1918 | const struct address_space_operations ocfs2_aops = { | 1942 | const struct address_space_operations ocfs2_aops = { | 
| 1919 | .readpage = ocfs2_readpage, | 1943 | .readpage = ocfs2_readpage, | 
| 1944 | .readpages = ocfs2_readpages, | ||
| 1920 | .writepage = ocfs2_writepage, | 1945 | .writepage = ocfs2_writepage, | 
| 1921 | .write_begin = ocfs2_write_begin, | 1946 | .write_begin = ocfs2_write_begin, | 
| 1922 | .write_end = ocfs2_write_end, | 1947 | .write_end = ocfs2_write_end, | 
| diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index c9037414f4f6..f136639f5b41 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
| @@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
| 79 | * information for this bh as it's not marked locally | 79 | * information for this bh as it's not marked locally | 
| 80 | * uptodate. */ | 80 | * uptodate. */ | 
| 81 | ret = -EIO; | 81 | ret = -EIO; | 
| 82 | brelse(bh); | 82 | put_bh(bh); | 
| 83 | } | 83 | } | 
| 84 | 84 | ||
| 85 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 85 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 
| @@ -256,7 +256,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
| 256 | * for this bh as it's not marked locally | 256 | * for this bh as it's not marked locally | 
| 257 | * uptodate. */ | 257 | * uptodate. */ | 
| 258 | status = -EIO; | 258 | status = -EIO; | 
| 259 | brelse(bh); | 259 | put_bh(bh); | 
| 260 | bhs[i] = NULL; | 260 | bhs[i] = NULL; | 
| 261 | continue; | 261 | continue; | 
| 262 | } | 262 | } | 
| @@ -280,3 +280,64 @@ bail: | |||
| 280 | mlog_exit(status); | 280 | mlog_exit(status); | 
| 281 | return status; | 281 | return status; | 
| 282 | } | 282 | } | 
| 283 | |||
| 284 | /* Check whether the blkno is the super block or one of the backups. */ | ||
| 285 | static void ocfs2_check_super_or_backup(struct super_block *sb, | ||
| 286 | sector_t blkno) | ||
| 287 | { | ||
| 288 | int i; | ||
| 289 | u64 backup_blkno; | ||
| 290 | |||
| 291 | if (blkno == OCFS2_SUPER_BLOCK_BLKNO) | ||
| 292 | return; | ||
| 293 | |||
| 294 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
| 295 | backup_blkno = ocfs2_backup_super_blkno(sb, i); | ||
| 296 | if (backup_blkno == blkno) | ||
| 297 | return; | ||
| 298 | } | ||
| 299 | |||
| 300 | BUG(); | ||
| 301 | } | ||
| 302 | |||
| 303 | /* | ||
| 304 | * Write super block and backups doesn't need to collaborate with journal, | ||
| 305 | * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed | ||
| 306 | * into this function. | ||
| 307 | */ | ||
| 308 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | ||
| 309 | struct buffer_head *bh) | ||
| 310 | { | ||
| 311 | int ret = 0; | ||
| 312 | |||
| 313 | mlog_entry_void(); | ||
| 314 | |||
| 315 | BUG_ON(buffer_jbd(bh)); | ||
| 316 | ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); | ||
| 317 | |||
| 318 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { | ||
| 319 | ret = -EROFS; | ||
| 320 | goto out; | ||
| 321 | } | ||
| 322 | |||
| 323 | lock_buffer(bh); | ||
| 324 | set_buffer_uptodate(bh); | ||
| 325 | |||
| 326 | /* remove from dirty list before I/O. */ | ||
| 327 | clear_buffer_dirty(bh); | ||
| 328 | |||
| 329 | get_bh(bh); /* for end_buffer_write_sync() */ | ||
| 330 | bh->b_end_io = end_buffer_write_sync; | ||
| 331 | submit_bh(WRITE, bh); | ||
| 332 | |||
| 333 | wait_on_buffer(bh); | ||
| 334 | |||
| 335 | if (!buffer_uptodate(bh)) { | ||
| 336 | ret = -EIO; | ||
| 337 | put_bh(bh); | ||
| 338 | } | ||
| 339 | |||
| 340 | out: | ||
| 341 | mlog_exit(ret); | ||
| 342 | return ret; | ||
| 343 | } | ||
| diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 6cc20930fac3..c2e78614c3e5 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
| @@ -47,6 +47,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, | |||
| 47 | int flags, | 47 | int flags, | 
| 48 | struct inode *inode); | 48 | struct inode *inode); | 
| 49 | 49 | ||
| 50 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | ||
| 51 | struct buffer_head *bh); | ||
| 50 | 52 | ||
| 51 | #define OCFS2_BH_CACHED 1 | 53 | #define OCFS2_BH_CACHED 1 | 
| 52 | #define OCFS2_BH_READAHEAD 8 | 54 | #define OCFS2_BH_READAHEAD 8 | 
| diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 35397dd5ecdb..e511339886b3 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
| @@ -35,7 +35,7 @@ | |||
| 35 | #define O2HB_LIVE_THRESHOLD 2 | 35 | #define O2HB_LIVE_THRESHOLD 2 | 
| 36 | /* number of equal samples to be seen as dead */ | 36 | /* number of equal samples to be seen as dead */ | 
| 37 | extern unsigned int o2hb_dead_threshold; | 37 | extern unsigned int o2hb_dead_threshold; | 
| 38 | #define O2HB_DEFAULT_DEAD_THRESHOLD 7 | 38 | #define O2HB_DEFAULT_DEAD_THRESHOLD 31 | 
| 39 | /* Otherwise MAX_WRITE_TIMEOUT will be zero... */ | 39 | /* Otherwise MAX_WRITE_TIMEOUT will be zero... */ | 
| 40 | #define O2HB_MIN_DEAD_THRESHOLD 2 | 40 | #define O2HB_MIN_DEAD_THRESHOLD 2 | 
| 41 | #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1)) | 41 | #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1)) | 
| diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index da880fc215f0..f36f66aab3dd 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
| @@ -60,8 +60,8 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data, | |||
| 60 | /* same as hb delay, we're waiting for another node to recognize our hb */ | 60 | /* same as hb delay, we're waiting for another node to recognize our hb */ | 
| 61 | #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 | 61 | #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 | 
| 62 | 62 | ||
| 63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000 | 63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 | 
| 64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000 | 64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 | 
| 65 | 65 | ||
| 66 | 66 | ||
| 67 | /* TODO: figure this out.... */ | 67 | /* TODO: figure this out.... */ | 
| diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 9606111fe89d..b2e832aca567 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
| @@ -38,6 +38,12 @@ | |||
| 38 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should | 
| 39 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. | 
| 40 | * | 40 | * | 
| 41 | * New in version 10: | ||
| 42 | * - Meta/data locks combined | ||
| 43 | * | ||
| 44 | * New in version 9: | ||
| 45 | * - All votes removed | ||
| 46 | * | ||
| 41 | * New in version 8: | 47 | * New in version 8: | 
| 42 | * - Replace delete inode votes with a cluster lock | 48 | * - Replace delete inode votes with a cluster lock | 
| 43 | * | 49 | * | 
| @@ -60,7 +66,7 @@ | |||
| 60 | * - full 64 bit i_size in the metadata lock lvbs | 66 | * - full 64 bit i_size in the metadata lock lvbs | 
| 61 | * - introduction of "rw" lock and pushing meta/data locking down | 67 | * - introduction of "rw" lock and pushing meta/data locking down | 
| 62 | */ | 68 | */ | 
| 63 | #define O2NET_PROTOCOL_VERSION 8ULL | 69 | #define O2NET_PROTOCOL_VERSION 10ULL | 
| 64 | struct o2net_handshake { | 70 | struct o2net_handshake { | 
| 65 | __be64 protocol_version; | 71 | __be64 protocol_version; | 
| 66 | __be64 connector_id; | 72 | __be64 connector_id; | 
| diff --git a/fs/ocfs2/cluster/ver.c b/fs/ocfs2/cluster/ver.c index 7286c48bb30d..a56eee6abad3 100644 --- a/fs/ocfs2/cluster/ver.c +++ b/fs/ocfs2/cluster/ver.c | |||
| @@ -28,7 +28,7 @@ | |||
| 28 | 28 | ||
| 29 | #include "ver.h" | 29 | #include "ver.h" | 
| 30 | 30 | ||
| 31 | #define CLUSTER_BUILD_VERSION "1.3.3" | 31 | #define CLUSTER_BUILD_VERSION "1.5.0" | 
| 32 | 32 | ||
| 33 | #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION | 
| 34 | 34 | ||
| diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 9923278ea6d4..b1cc7c381e88 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
| @@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry, | |||
| 128 | /* | 128 | /* | 
| 129 | * Walk the inode alias list, and find a dentry which has a given | 129 | * Walk the inode alias list, and find a dentry which has a given | 
| 130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | 130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | 
| 131 | * is looking for a dentry_lock reference. The vote thread is looking | 131 | * is looking for a dentry_lock reference. The downconvert thread is | 
| 132 | * to unhash aliases, so we allow it to skip any that already have | 132 | * looking to unhash aliases, so we allow it to skip any that already | 
| 133 | * that property. | 133 | * have that property. | 
| 134 | */ | 134 | */ | 
| 135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | 135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | 
| 136 | u64 parent_blkno, | 136 | u64 parent_blkno, | 
| @@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
| 266 | dl->dl_count = 0; | 266 | dl->dl_count = 0; | 
| 267 | /* | 267 | /* | 
| 268 | * Does this have to happen below, for all attaches, in case | 268 | * Does this have to happen below, for all attaches, in case | 
| 269 | * the struct inode gets blown away by votes? | 269 | * the struct inode gets blown away by the downconvert thread? | 
| 270 | */ | 270 | */ | 
| 271 | dl->dl_inode = igrab(inode); | 271 | dl->dl_inode = igrab(inode); | 
| 272 | dl->dl_parent_blkno = parent_blkno; | 272 | dl->dl_parent_blkno = parent_blkno; | 
| diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 63b28fdceb4a..6b0107f21344 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -846,14 +846,14 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 846 | mlog_entry("dirino=%llu\n", | 846 | mlog_entry("dirino=%llu\n", | 
| 847 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 847 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 
| 848 | 848 | ||
| 849 | error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 849 | error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 
| 850 | if (lock_level && error >= 0) { | 850 | if (lock_level && error >= 0) { | 
| 851 | /* We release EX lock which used to update atime | 851 | /* We release EX lock which used to update atime | 
| 852 | * and get PR lock again to reduce contention | 852 | * and get PR lock again to reduce contention | 
| 853 | * on commonly accessed directories. */ | 853 | * on commonly accessed directories. */ | 
| 854 | ocfs2_meta_unlock(inode, 1); | 854 | ocfs2_inode_unlock(inode, 1); | 
| 855 | lock_level = 0; | 855 | lock_level = 0; | 
| 856 | error = ocfs2_meta_lock(inode, NULL, 0); | 856 | error = ocfs2_inode_lock(inode, NULL, 0); | 
| 857 | } | 857 | } | 
| 858 | if (error < 0) { | 858 | if (error < 0) { | 
| 859 | if (error != -ENOENT) | 859 | if (error != -ENOENT) | 
| @@ -865,7 +865,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 865 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, | 865 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, | 
| 866 | dirent, filldir, NULL); | 866 | dirent, filldir, NULL); | 
| 867 | 867 | ||
| 868 | ocfs2_meta_unlock(inode, lock_level); | 868 | ocfs2_inode_unlock(inode, lock_level); | 
| 869 | 869 | ||
| 870 | bail_nolock: | 870 | bail_nolock: | 
| 871 | mlog_exit(error); | 871 | mlog_exit(error); | 
| diff --git a/fs/ocfs2/dlm/dlmfsver.c b/fs/ocfs2/dlm/dlmfsver.c index d2be3ad841f9..a733b3321f83 100644 --- a/fs/ocfs2/dlm/dlmfsver.c +++ b/fs/ocfs2/dlm/dlmfsver.c | |||
| @@ -28,7 +28,7 @@ | |||
| 28 | 28 | ||
| 29 | #include "dlmfsver.h" | 29 | #include "dlmfsver.h" | 
| 30 | 30 | ||
| 31 | #define DLM_BUILD_VERSION "1.3.3" | 31 | #define DLM_BUILD_VERSION "1.5.0" | 
| 32 | 32 | ||
| 33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | 
| 34 | 34 | ||
| diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 2fde7bf91434..91f747b8a538 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -2270,6 +2270,12 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
| 2270 | } | 2270 | } | 
| 2271 | } | 2271 | } | 
| 2272 | 2272 | ||
| 2273 | /* Clean up join state on node death. */ | ||
| 2274 | if (dlm->joining_node == idx) { | ||
| 2275 | mlog(0, "Clearing join state for node %u\n", idx); | ||
| 2276 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | ||
| 2277 | } | ||
| 2278 | |||
| 2273 | /* check to see if the node is already considered dead */ | 2279 | /* check to see if the node is already considered dead */ | 
| 2274 | if (!test_bit(idx, dlm->live_nodes_map)) { | 2280 | if (!test_bit(idx, dlm->live_nodes_map)) { | 
| 2275 | mlog(0, "for domain %s, node %d is already dead. " | 2281 | mlog(0, "for domain %s, node %d is already dead. " | 
| @@ -2288,12 +2294,6 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
| 2288 | 2294 | ||
| 2289 | clear_bit(idx, dlm->live_nodes_map); | 2295 | clear_bit(idx, dlm->live_nodes_map); | 
| 2290 | 2296 | ||
| 2291 | /* Clean up join state on node death. */ | ||
| 2292 | if (dlm->joining_node == idx) { | ||
| 2293 | mlog(0, "Clearing join state for node %u\n", idx); | ||
| 2294 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | ||
| 2295 | } | ||
| 2296 | |||
| 2297 | /* make sure local cleanup occurs before the heartbeat events */ | 2297 | /* make sure local cleanup occurs before the heartbeat events */ | 
| 2298 | if (!test_bit(idx, dlm->recovery_map)) | 2298 | if (!test_bit(idx, dlm->recovery_map)) | 
| 2299 | dlm_do_local_recovery_cleanup(dlm, idx); | 2299 | dlm_do_local_recovery_cleanup(dlm, idx); | 
| @@ -2321,6 +2321,13 @@ void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data) | |||
| 2321 | if (!dlm_grab(dlm)) | 2321 | if (!dlm_grab(dlm)) | 
| 2322 | return; | 2322 | return; | 
| 2323 | 2323 | ||
| 2324 | /* | ||
| 2325 | * This will notify any dlm users that a node in our domain | ||
| 2326 | * went away without notifying us first. | ||
| 2327 | */ | ||
| 2328 | if (test_bit(idx, dlm->domain_map)) | ||
| 2329 | dlm_fire_domain_eviction_callbacks(dlm, idx); | ||
| 2330 | |||
| 2324 | spin_lock(&dlm->spinlock); | 2331 | spin_lock(&dlm->spinlock); | 
| 2325 | __dlm_hb_node_down(dlm, idx); | 2332 | __dlm_hb_node_down(dlm, idx); | 
| 2326 | spin_unlock(&dlm->spinlock); | 2333 | spin_unlock(&dlm->spinlock); | 
| diff --git a/fs/ocfs2/dlm/dlmver.c b/fs/ocfs2/dlm/dlmver.c index 7ef2653f8f41..dfc0da4d158d 100644 --- a/fs/ocfs2/dlm/dlmver.c +++ b/fs/ocfs2/dlm/dlmver.c | |||
| @@ -28,7 +28,7 @@ | |||
| 28 | 28 | ||
| 29 | #include "dlmver.h" | 29 | #include "dlmver.h" | 
| 30 | 30 | ||
| 31 | #define DLM_BUILD_VERSION "1.3.3" | 31 | #define DLM_BUILD_VERSION "1.5.0" | 
| 32 | 32 | ||
| 33 | #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION | 
| 34 | 34 | ||
| diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4e97dcceaf8f..3867244fb144 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | #include "slot_map.h" | 55 | #include "slot_map.h" | 
| 56 | #include "super.h" | 56 | #include "super.h" | 
| 57 | #include "uptodate.h" | 57 | #include "uptodate.h" | 
| 58 | #include "vote.h" | ||
| 59 | 58 | ||
| 60 | #include "buffer_head_io.h" | 59 | #include "buffer_head_io.h" | 
| 61 | 60 | ||
| @@ -69,6 +68,7 @@ struct ocfs2_mask_waiter { | |||
| 69 | 68 | ||
| 70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 69 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 
| 71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 70 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 
| 71 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | ||
| 72 | 72 | ||
| 73 | /* | 73 | /* | 
| 74 | * Return value from ->downconvert_worker functions. | 74 | * Return value from ->downconvert_worker functions. | 
| @@ -153,10 +153,10 @@ struct ocfs2_lock_res_ops { | |||
| 153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 
| 154 | 154 | ||
| 155 | /* | 155 | /* | 
| 156 | * Optionally called in the downconvert (or "vote") thread | 156 | * Optionally called in the downconvert thread after a | 
| 157 | * after a successful downconvert. The lockres will not be | 157 | * successful downconvert. The lockres will not be referenced | 
| 158 | * referenced after this callback is called, so it is safe to | 158 | * after this callback is called, so it is safe to free | 
| 159 | * free memory, etc. | 159 | * memory, etc. | 
| 160 | * | 160 | * | 
| 161 | * The exact semantics of when this is called are controlled | 161 | * The exact semantics of when this is called are controlled | 
| 162 | * by ->downconvert_worker() | 162 | * by ->downconvert_worker() | 
| @@ -225,17 +225,12 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | |||
| 225 | .flags = 0, | 225 | .flags = 0, | 
| 226 | }; | 226 | }; | 
| 227 | 227 | ||
| 228 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 228 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { | 
| 229 | .get_osb = ocfs2_get_inode_osb, | 229 | .get_osb = ocfs2_get_inode_osb, | 
| 230 | .check_downconvert = ocfs2_check_meta_downconvert, | 230 | .check_downconvert = ocfs2_check_meta_downconvert, | 
| 231 | .set_lvb = ocfs2_set_meta_lvb, | 231 | .set_lvb = ocfs2_set_meta_lvb, | 
| 232 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | ||
| 233 | }; | ||
| 234 | |||
| 235 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | ||
| 236 | .get_osb = ocfs2_get_inode_osb, | ||
| 237 | .downconvert_worker = ocfs2_data_convert_worker, | 232 | .downconvert_worker = ocfs2_data_convert_worker, | 
| 238 | .flags = 0, | 233 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 
| 239 | }; | 234 | }; | 
| 240 | 235 | ||
| 241 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 236 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 
| @@ -258,10 +253,14 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | |||
| 258 | .flags = 0, | 253 | .flags = 0, | 
| 259 | }; | 254 | }; | 
| 260 | 255 | ||
| 256 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | ||
| 257 | .get_osb = ocfs2_get_file_osb, | ||
| 258 | .flags = 0, | ||
| 259 | }; | ||
| 260 | |||
| 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 
| 262 | { | 262 | { | 
| 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 
| 264 | lockres->l_type == OCFS2_LOCK_TYPE_DATA || | ||
| 265 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 264 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 
| 266 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 265 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 
| 267 | } | 266 | } | 
| @@ -310,12 +309,24 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
| 310 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 309 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 
| 311 | _lockres->l_name, dlm_errmsg(_stat)); \ | 310 | _lockres->l_name, dlm_errmsg(_stat)); \ | 
| 312 | } while (0) | 311 | } while (0) | 
| 313 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 312 | static int ocfs2_downconvert_thread(void *arg); | 
| 314 | struct ocfs2_lock_res *lockres); | 313 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 
| 315 | static int ocfs2_meta_lock_update(struct inode *inode, | 314 | struct ocfs2_lock_res *lockres); | 
| 315 | static int ocfs2_inode_lock_update(struct inode *inode, | ||
| 316 | struct buffer_head **bh); | 316 | struct buffer_head **bh); | 
| 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 
| 318 | static inline int ocfs2_highest_compat_lock_level(int level); | 318 | static inline int ocfs2_highest_compat_lock_level(int level); | 
| 319 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | ||
| 320 | int new_level); | ||
| 321 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | ||
| 322 | struct ocfs2_lock_res *lockres, | ||
| 323 | int new_level, | ||
| 324 | int lvb); | ||
| 325 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | ||
| 326 | struct ocfs2_lock_res *lockres); | ||
| 327 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | ||
| 328 | struct ocfs2_lock_res *lockres); | ||
| 329 | |||
| 319 | 330 | ||
| 320 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 331 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 
| 321 | u64 blkno, | 332 | u64 blkno, | 
| @@ -402,10 +413,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
| 402 | ops = &ocfs2_inode_rw_lops; | 413 | ops = &ocfs2_inode_rw_lops; | 
| 403 | break; | 414 | break; | 
| 404 | case OCFS2_LOCK_TYPE_META: | 415 | case OCFS2_LOCK_TYPE_META: | 
| 405 | ops = &ocfs2_inode_meta_lops; | 416 | ops = &ocfs2_inode_inode_lops; | 
| 406 | break; | ||
| 407 | case OCFS2_LOCK_TYPE_DATA: | ||
| 408 | ops = &ocfs2_inode_data_lops; | ||
| 409 | break; | 417 | break; | 
| 410 | case OCFS2_LOCK_TYPE_OPEN: | 418 | case OCFS2_LOCK_TYPE_OPEN: | 
| 411 | ops = &ocfs2_inode_open_lops; | 419 | ops = &ocfs2_inode_open_lops; | 
| @@ -428,6 +436,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | |||
| 428 | return OCFS2_SB(inode->i_sb); | 436 | return OCFS2_SB(inode->i_sb); | 
| 429 | } | 437 | } | 
| 430 | 438 | ||
| 439 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | ||
| 440 | { | ||
| 441 | struct ocfs2_file_private *fp = lockres->l_priv; | ||
| 442 | |||
| 443 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | ||
| 444 | } | ||
| 445 | |||
| 431 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 446 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 
| 432 | { | 447 | { | 
| 433 | __be64 inode_blkno_be; | 448 | __be64 inode_blkno_be; | 
| @@ -508,6 +523,21 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
| 508 | &ocfs2_rename_lops, osb); | 523 | &ocfs2_rename_lops, osb); | 
| 509 | } | 524 | } | 
| 510 | 525 | ||
| 526 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
| 527 | struct ocfs2_file_private *fp) | ||
| 528 | { | ||
| 529 | struct inode *inode = fp->fp_file->f_mapping->host; | ||
| 530 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 531 | |||
| 532 | ocfs2_lock_res_init_once(lockres); | ||
| 533 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | ||
| 534 | inode->i_generation, lockres->l_name); | ||
| 535 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
| 536 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | ||
| 537 | fp); | ||
| 538 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | ||
| 539 | } | ||
| 540 | |||
| 511 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 541 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 
| 512 | { | 542 | { | 
| 513 | mlog_entry_void(); | 543 | mlog_entry_void(); | 
| @@ -724,6 +754,13 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
| 724 | lockres->l_name, level, lockres->l_level, | 754 | lockres->l_name, level, lockres->l_level, | 
| 725 | ocfs2_lock_type_string(lockres->l_type)); | 755 | ocfs2_lock_type_string(lockres->l_type)); | 
| 726 | 756 | ||
| 757 | /* | ||
| 758 | * We can skip the bast for locks which don't enable caching - | ||
| 759 | * they'll be dropped at the earliest possible time anyway. | ||
| 760 | */ | ||
| 761 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | ||
| 762 | return; | ||
| 763 | |||
| 727 | spin_lock_irqsave(&lockres->l_lock, flags); | 764 | spin_lock_irqsave(&lockres->l_lock, flags); | 
| 728 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 765 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 
| 729 | if (needs_downconvert) | 766 | if (needs_downconvert) | 
| @@ -732,7 +769,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
| 732 | 769 | ||
| 733 | wake_up(&lockres->l_event); | 770 | wake_up(&lockres->l_event); | 
| 734 | 771 | ||
| 735 | ocfs2_kick_vote_thread(osb); | 772 | ocfs2_wake_downconvert_thread(osb); | 
| 736 | } | 773 | } | 
| 737 | 774 | ||
| 738 | static void ocfs2_locking_ast(void *opaque) | 775 | static void ocfs2_locking_ast(void *opaque) | 
| @@ -935,6 +972,21 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | |||
| 935 | 972 | ||
| 936 | } | 973 | } | 
| 937 | 974 | ||
| 975 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | ||
| 976 | struct ocfs2_lock_res *lockres) | ||
| 977 | { | ||
| 978 | int ret; | ||
| 979 | |||
| 980 | ret = wait_for_completion_interruptible(&mw->mw_complete); | ||
| 981 | if (ret) | ||
| 982 | lockres_remove_mask_waiter(lockres, mw); | ||
| 983 | else | ||
| 984 | ret = mw->mw_status; | ||
| 985 | /* Re-arm the completion in case we want to wait on it again */ | ||
| 986 | INIT_COMPLETION(mw->mw_complete); | ||
| 987 | return ret; | ||
| 988 | } | ||
| 989 | |||
| 938 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 990 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 
| 939 | struct ocfs2_lock_res *lockres, | 991 | struct ocfs2_lock_res *lockres, | 
| 940 | int level, | 992 | int level, | 
| @@ -1089,7 +1141,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
| 1089 | mlog_entry_void(); | 1141 | mlog_entry_void(); | 
| 1090 | spin_lock_irqsave(&lockres->l_lock, flags); | 1142 | spin_lock_irqsave(&lockres->l_lock, flags); | 
| 1091 | ocfs2_dec_holders(lockres, level); | 1143 | ocfs2_dec_holders(lockres, level); | 
| 1092 | ocfs2_vote_on_unlock(osb, lockres); | 1144 | ocfs2_downconvert_on_unlock(osb, lockres); | 
| 1093 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1145 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 
| 1094 | mlog_exit_void(); | 1146 | mlog_exit_void(); | 
| 1095 | } | 1147 | } | 
| @@ -1147,13 +1199,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
| 1147 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1199 | * We don't want to use LKM_LOCAL on a meta data lock as they | 
| 1148 | * don't use a generation in their lock names. | 1200 | * don't use a generation in their lock names. | 
| 1149 | */ | 1201 | */ | 
| 1150 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | 1202 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); | 
| 1151 | if (ret) { | ||
| 1152 | mlog_errno(ret); | ||
| 1153 | goto bail; | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); | ||
| 1157 | if (ret) { | 1203 | if (ret) { | 
| 1158 | mlog_errno(ret); | 1204 | mlog_errno(ret); | 
| 1159 | goto bail; | 1205 | goto bail; | 
| @@ -1311,76 +1357,221 @@ out: | |||
| 1311 | mlog_exit_void(); | 1357 | mlog_exit_void(); | 
| 1312 | } | 1358 | } | 
| 1313 | 1359 | ||
| 1314 | int ocfs2_data_lock_full(struct inode *inode, | 1360 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, | 
| 1315 | int write, | 1361 | int level) | 
| 1316 | int arg_flags) | ||
| 1317 | { | 1362 | { | 
| 1318 | int status = 0, level; | 1363 | int ret; | 
| 1319 | struct ocfs2_lock_res *lockres; | 1364 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 
| 1320 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1365 | unsigned long flags; | 
| 1366 | struct ocfs2_mask_waiter mw; | ||
| 1321 | 1367 | ||
| 1322 | BUG_ON(!inode); | 1368 | ocfs2_init_mask_waiter(&mw); | 
| 1323 | 1369 | ||
| 1324 | mlog_entry_void(); | 1370 | retry_cancel: | 
| 1371 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 1372 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
| 1373 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
| 1374 | if (ret) { | ||
| 1375 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 1376 | ret = ocfs2_cancel_convert(osb, lockres); | ||
| 1377 | if (ret < 0) { | ||
| 1378 | mlog_errno(ret); | ||
| 1379 | goto out; | ||
| 1380 | } | ||
| 1381 | goto retry_cancel; | ||
| 1382 | } | ||
| 1383 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
| 1384 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 1325 | 1385 | ||
| 1326 | mlog(0, "inode %llu take %s DATA lock\n", | 1386 | ocfs2_wait_for_mask(&mw); | 
| 1327 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1387 | goto retry_cancel; | 
| 1328 | write ? "EXMODE" : "PRMODE"); | 1388 | } | 
| 1329 | 1389 | ||
| 1330 | /* We'll allow faking a readonly data lock for | 1390 | ret = -ERESTARTSYS; | 
| 1331 | * rodevices. */ | 1391 | /* | 
| 1332 | if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) { | 1392 | * We may still have gotten the lock, in which case there's no | 
| 1333 | if (write) { | 1393 | * point to restarting the syscall. | 
| 1334 | status = -EROFS; | 1394 | */ | 
| 1335 | mlog_errno(status); | 1395 | if (lockres->l_level == level) | 
| 1396 | ret = 0; | ||
| 1397 | |||
| 1398 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | ||
| 1399 | lockres->l_flags, lockres->l_level, lockres->l_action); | ||
| 1400 | |||
| 1401 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 1402 | |||
| 1403 | out: | ||
| 1404 | return ret; | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | /* | ||
| 1408 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | ||
| 1409 | * flock() calls. The locking approach this requires is sufficiently | ||
| 1410 | * different from all other cluster lock types that we implement a | ||
| 1411 | * seperate path to the "low-level" dlm calls. In particular: | ||
| 1412 | * | ||
| 1413 | * - No optimization of lock levels is done - we take at exactly | ||
| 1414 | * what's been requested. | ||
| 1415 | * | ||
| 1416 | * - No lock caching is employed. We immediately downconvert to | ||
| 1417 | * no-lock at unlock time. This also means flock locks never go on | ||
| 1418 | * the blocking list). | ||
| 1419 | * | ||
| 1420 | * - Since userspace can trivially deadlock itself with flock, we make | ||
| 1421 | * sure to allow cancellation of a misbehaving applications flock() | ||
| 1422 | * request. | ||
| 1423 | * | ||
| 1424 | * - Access to any flock lockres doesn't require concurrency, so we | ||
| 1425 | * can simplify the code by requiring the caller to guarantee | ||
| 1426 | * serialization of dlmglue flock calls. | ||
| 1427 | */ | ||
| 1428 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | ||
| 1429 | { | ||
| 1430 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
| 1431 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | ||
| 1432 | unsigned long flags; | ||
| 1433 | struct ocfs2_file_private *fp = file->private_data; | ||
| 1434 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
| 1435 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
| 1436 | struct ocfs2_mask_waiter mw; | ||
| 1437 | |||
| 1438 | ocfs2_init_mask_waiter(&mw); | ||
| 1439 | |||
| 1440 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | ||
| 1441 | (lockres->l_level > LKM_NLMODE)) { | ||
| 1442 | mlog(ML_ERROR, | ||
| 1443 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | ||
| 1444 | "level: %u\n", lockres->l_name, lockres->l_flags, | ||
| 1445 | lockres->l_level); | ||
| 1446 | return -EINVAL; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 1450 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | ||
| 1451 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
| 1452 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 1453 | |||
| 1454 | /* | ||
| 1455 | * Get the lock at NLMODE to start - that way we | ||
| 1456 | * can cancel the upconvert request if need be. | ||
| 1457 | */ | ||
| 1458 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | ||
| 1459 | if (ret < 0) { | ||
| 1460 | mlog_errno(ret); | ||
| 1461 | goto out; | ||
| 1336 | } | 1462 | } | 
| 1337 | goto out; | 1463 | |
| 1464 | ret = ocfs2_wait_for_mask(&mw); | ||
| 1465 | if (ret) { | ||
| 1466 | mlog_errno(ret); | ||
| 1467 | goto out; | ||
| 1468 | } | ||
| 1469 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 1338 | } | 1470 | } | 
| 1339 | 1471 | ||
| 1340 | if (ocfs2_mount_local(osb)) | 1472 | lockres->l_action = OCFS2_AST_CONVERT; | 
| 1341 | goto out; | 1473 | lkm_flags |= LKM_CONVERT; | 
| 1474 | lockres->l_requested = level; | ||
| 1475 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | ||
| 1342 | 1476 | ||
| 1343 | lockres = &OCFS2_I(inode)->ip_data_lockres; | 1477 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 
| 1478 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 1344 | 1479 | ||
| 1345 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1480 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, | 
| 1481 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | ||
| 1482 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | ||
| 1483 | if (ret != DLM_NORMAL) { | ||
| 1484 | if (trylock && ret == DLM_NOTQUEUED) | ||
| 1485 | ret = -EAGAIN; | ||
| 1486 | else { | ||
| 1487 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
| 1488 | ret = -EINVAL; | ||
| 1489 | } | ||
| 1346 | 1490 | ||
| 1347 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, | 1491 | ocfs2_recover_from_dlm_error(lockres, 1); | 
| 1348 | 0, arg_flags); | 1492 | lockres_remove_mask_waiter(lockres, &mw); | 
| 1349 | if (status < 0 && status != -EAGAIN) | 1493 | goto out; | 
| 1350 | mlog_errno(status); | 1494 | } | 
| 1495 | |||
| 1496 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | ||
| 1497 | if (ret == -ERESTARTSYS) { | ||
| 1498 | /* | ||
| 1499 | * Userspace can cause deadlock itself with | ||
| 1500 | * flock(). Current behavior locally is to allow the | ||
| 1501 | * deadlock, but abort the system call if a signal is | ||
| 1502 | * received. We follow this example, otherwise a | ||
| 1503 | * poorly written program could sit in kernel until | ||
| 1504 | * reboot. | ||
| 1505 | * | ||
| 1506 | * Handling this is a bit more complicated for Ocfs2 | ||
| 1507 | * though. We can't exit this function with an | ||
| 1508 | * outstanding lock request, so a cancel convert is | ||
| 1509 | * required. We intentionally overwrite 'ret' - if the | ||
| 1510 | * cancel fails and the lock was granted, it's easier | ||
| 1511 | * to just bubble sucess back up to the user. | ||
| 1512 | */ | ||
| 1513 | ret = ocfs2_flock_handle_signal(lockres, level); | ||
| 1514 | } | ||
| 1351 | 1515 | ||
| 1352 | out: | 1516 | out: | 
| 1353 | mlog_exit(status); | 1517 | |
| 1354 | return status; | 1518 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", | 
| 1519 | lockres->l_name, ex, trylock, ret); | ||
| 1520 | return ret; | ||
| 1355 | } | 1521 | } | 
| 1356 | 1522 | ||
| 1357 | /* see ocfs2_meta_lock_with_page() */ | 1523 | void ocfs2_file_unlock(struct file *file) | 
| 1358 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
| 1359 | int write, | ||
| 1360 | struct page *page) | ||
| 1361 | { | 1524 | { | 
| 1362 | int ret; | 1525 | int ret; | 
| 1526 | unsigned long flags; | ||
| 1527 | struct ocfs2_file_private *fp = file->private_data; | ||
| 1528 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
| 1529 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
| 1530 | struct ocfs2_mask_waiter mw; | ||
| 1363 | 1531 | ||
| 1364 | ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK); | 1532 | ocfs2_init_mask_waiter(&mw); | 
| 1365 | if (ret == -EAGAIN) { | 1533 | |
| 1366 | unlock_page(page); | 1534 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | 
| 1367 | if (ocfs2_data_lock(inode, write) == 0) | 1535 | return; | 
| 1368 | ocfs2_data_unlock(inode, write); | 1536 | |
| 1369 | ret = AOP_TRUNCATED_PAGE; | 1537 | if (lockres->l_level == LKM_NLMODE) | 
| 1538 | return; | ||
| 1539 | |||
| 1540 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | ||
| 1541 | lockres->l_name, lockres->l_flags, lockres->l_level, | ||
| 1542 | lockres->l_action); | ||
| 1543 | |||
| 1544 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 1545 | /* | ||
| 1546 | * Fake a blocking ast for the downconvert code. | ||
| 1547 | */ | ||
| 1548 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
| 1549 | lockres->l_blocking = LKM_EXMODE; | ||
| 1550 | |||
| 1551 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | ||
| 1552 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
| 1553 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 1554 | |||
| 1555 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | ||
| 1556 | if (ret) { | ||
| 1557 | mlog_errno(ret); | ||
| 1558 | return; | ||
| 1370 | } | 1559 | } | 
| 1371 | 1560 | ||
| 1372 | return ret; | 1561 | ret = ocfs2_wait_for_mask(&mw); | 
| 1562 | if (ret) | ||
| 1563 | mlog_errno(ret); | ||
| 1373 | } | 1564 | } | 
| 1374 | 1565 | ||
| 1375 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 1566 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 
| 1376 | struct ocfs2_lock_res *lockres) | 1567 | struct ocfs2_lock_res *lockres) | 
| 1377 | { | 1568 | { | 
| 1378 | int kick = 0; | 1569 | int kick = 0; | 
| 1379 | 1570 | ||
| 1380 | mlog_entry_void(); | 1571 | mlog_entry_void(); | 
| 1381 | 1572 | ||
| 1382 | /* If we know that another node is waiting on our lock, kick | 1573 | /* If we know that another node is waiting on our lock, kick | 
| 1383 | * the vote thread * pre-emptively when we reach a release | 1574 | * the downconvert thread * pre-emptively when we reach a release | 
| 1384 | * condition. */ | 1575 | * condition. */ | 
| 1385 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1576 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 
| 1386 | switch(lockres->l_blocking) { | 1577 | switch(lockres->l_blocking) { | 
| @@ -1398,27 +1589,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | |||
| 1398 | } | 1589 | } | 
| 1399 | 1590 | ||
| 1400 | if (kick) | 1591 | if (kick) | 
| 1401 | ocfs2_kick_vote_thread(osb); | 1592 | ocfs2_wake_downconvert_thread(osb); | 
| 1402 | |||
| 1403 | mlog_exit_void(); | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | void ocfs2_data_unlock(struct inode *inode, | ||
| 1407 | int write) | ||
| 1408 | { | ||
| 1409 | int level = write ? LKM_EXMODE : LKM_PRMODE; | ||
| 1410 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; | ||
| 1411 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1412 | |||
| 1413 | mlog_entry_void(); | ||
| 1414 | |||
| 1415 | mlog(0, "inode %llu drop %s DATA lock\n", | ||
| 1416 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 1417 | write ? "EXMODE" : "PRMODE"); | ||
| 1418 | |||
| 1419 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | ||
| 1420 | !ocfs2_mount_local(osb)) | ||
| 1421 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | ||
| 1422 | 1593 | ||
| 1423 | mlog_exit_void(); | 1594 | mlog_exit_void(); | 
| 1424 | } | 1595 | } | 
| @@ -1442,11 +1613,11 @@ static u64 ocfs2_pack_timespec(struct timespec *spec) | |||
| 1442 | 1613 | ||
| 1443 | /* Call this with the lockres locked. I am reasonably sure we don't | 1614 | /* Call this with the lockres locked. I am reasonably sure we don't | 
| 1444 | * need ip_lock in this function as anyone who would be changing those | 1615 | * need ip_lock in this function as anyone who would be changing those | 
| 1445 | * values is supposed to be blocked in ocfs2_meta_lock right now. */ | 1616 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ | 
| 1446 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 1617 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 
| 1447 | { | 1618 | { | 
| 1448 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1619 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| 1449 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1620 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| 1450 | struct ocfs2_meta_lvb *lvb; | 1621 | struct ocfs2_meta_lvb *lvb; | 
| 1451 | 1622 | ||
| 1452 | mlog_entry_void(); | 1623 | mlog_entry_void(); | 
| @@ -1496,7 +1667,7 @@ static void ocfs2_unpack_timespec(struct timespec *spec, | |||
| 1496 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 1667 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 
| 1497 | { | 1668 | { | 
| 1498 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1669 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| 1499 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1670 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| 1500 | struct ocfs2_meta_lvb *lvb; | 1671 | struct ocfs2_meta_lvb *lvb; | 
| 1501 | 1672 | ||
| 1502 | mlog_entry_void(); | 1673 | mlog_entry_void(); | 
| @@ -1604,12 +1775,12 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre | |||
| 1604 | } | 1775 | } | 
| 1605 | 1776 | ||
| 1606 | /* may or may not return a bh if it went to disk. */ | 1777 | /* may or may not return a bh if it went to disk. */ | 
| 1607 | static int ocfs2_meta_lock_update(struct inode *inode, | 1778 | static int ocfs2_inode_lock_update(struct inode *inode, | 
| 1608 | struct buffer_head **bh) | 1779 | struct buffer_head **bh) | 
| 1609 | { | 1780 | { | 
| 1610 | int status = 0; | 1781 | int status = 0; | 
| 1611 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1782 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| 1612 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1783 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 
| 1613 | struct ocfs2_dinode *fe; | 1784 | struct ocfs2_dinode *fe; | 
| 1614 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1785 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| 1615 | 1786 | ||
| @@ -1721,7 +1892,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
| 1721 | * returns < 0 error if the callback will never be called, otherwise | 1892 | * returns < 0 error if the callback will never be called, otherwise | 
| 1722 | * the result of the lock will be communicated via the callback. | 1893 | * the result of the lock will be communicated via the callback. | 
| 1723 | */ | 1894 | */ | 
| 1724 | int ocfs2_meta_lock_full(struct inode *inode, | 1895 | int ocfs2_inode_lock_full(struct inode *inode, | 
| 1725 | struct buffer_head **ret_bh, | 1896 | struct buffer_head **ret_bh, | 
| 1726 | int ex, | 1897 | int ex, | 
| 1727 | int arg_flags) | 1898 | int arg_flags) | 
| @@ -1756,7 +1927,7 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
| 1756 | wait_event(osb->recovery_event, | 1927 | wait_event(osb->recovery_event, | 
| 1757 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1928 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 
| 1758 | 1929 | ||
| 1759 | lockres = &OCFS2_I(inode)->ip_meta_lockres; | 1930 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 
| 1760 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 1931 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 
| 1761 | dlm_flags = 0; | 1932 | dlm_flags = 0; | 
| 1762 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 1933 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 
| @@ -1795,11 +1966,11 @@ local: | |||
| 1795 | } | 1966 | } | 
| 1796 | 1967 | ||
| 1797 | /* This is fun. The caller may want a bh back, or it may | 1968 | /* This is fun. The caller may want a bh back, or it may | 
| 1798 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1969 | * not. ocfs2_inode_lock_update definitely wants one in, but | 
| 1799 | * may or may not read one, depending on what's in the | 1970 | * may or may not read one, depending on what's in the | 
| 1800 | * LVB. The result of all of this is that we've *only* gone to | 1971 | * LVB. The result of all of this is that we've *only* gone to | 
| 1801 | * disk if we have to, so the complexity is worthwhile. */ | 1972 | * disk if we have to, so the complexity is worthwhile. */ | 
| 1802 | status = ocfs2_meta_lock_update(inode, &local_bh); | 1973 | status = ocfs2_inode_lock_update(inode, &local_bh); | 
| 1803 | if (status < 0) { | 1974 | if (status < 0) { | 
| 1804 | if (status != -ENOENT) | 1975 | if (status != -ENOENT) | 
| 1805 | mlog_errno(status); | 1976 | mlog_errno(status); | 
| @@ -1821,7 +1992,7 @@ bail: | |||
| 1821 | *ret_bh = NULL; | 1992 | *ret_bh = NULL; | 
| 1822 | } | 1993 | } | 
| 1823 | if (acquired) | 1994 | if (acquired) | 
| 1824 | ocfs2_meta_unlock(inode, ex); | 1995 | ocfs2_inode_unlock(inode, ex); | 
| 1825 | } | 1996 | } | 
| 1826 | 1997 | ||
| 1827 | if (local_bh) | 1998 | if (local_bh) | 
| @@ -1832,19 +2003,20 @@ bail: | |||
| 1832 | } | 2003 | } | 
| 1833 | 2004 | ||
| 1834 | /* | 2005 | /* | 
| 1835 | * This is working around a lock inversion between tasks acquiring DLM locks | 2006 | * This is working around a lock inversion between tasks acquiring DLM | 
| 1836 | * while holding a page lock and the vote thread which blocks dlm lock acquiry | 2007 | * locks while holding a page lock and the downconvert thread which | 
| 1837 | * while acquiring page locks. | 2008 | * blocks dlm lock acquiry while acquiring page locks. | 
| 1838 | * | 2009 | * | 
| 1839 | * ** These _with_page variantes are only intended to be called from aop | 2010 | * ** These _with_page variantes are only intended to be called from aop | 
| 1840 | * methods that hold page locks and return a very specific *positive* error | 2011 | * methods that hold page locks and return a very specific *positive* error | 
| 1841 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 2012 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 
| 1842 | * | 2013 | * | 
| 1843 | * The DLM is called such that it returns -EAGAIN if it would have blocked | 2014 | * The DLM is called such that it returns -EAGAIN if it would have | 
| 1844 | * waiting for the vote thread. In that case we unlock our page so the vote | 2015 | * blocked waiting for the downconvert thread. In that case we unlock | 
| 1845 | * thread can make progress. Once we've done this we have to return | 2016 | * our page so the downconvert thread can make progress. Once we've | 
| 1846 | * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up | 2017 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method | 
| 1847 | * into the VFS who will then immediately retry the aop call. | 2018 | * that called us can bubble that back up into the VFS who will then | 
| 2019 | * immediately retry the aop call. | ||
| 1848 | * | 2020 | * | 
| 1849 | * We do a blocking lock and immediate unlock before returning, though, so that | 2021 | * We do a blocking lock and immediate unlock before returning, though, so that | 
| 1850 | * the lock has a great chance of being cached on this node by the time the VFS | 2022 | * the lock has a great chance of being cached on this node by the time the VFS | 
| @@ -1852,32 +2024,32 @@ bail: | |||
| 1852 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 2024 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 
| 1853 | * the lock inversion simply. | 2025 | * the lock inversion simply. | 
| 1854 | */ | 2026 | */ | 
| 1855 | int ocfs2_meta_lock_with_page(struct inode *inode, | 2027 | int ocfs2_inode_lock_with_page(struct inode *inode, | 
| 1856 | struct buffer_head **ret_bh, | 2028 | struct buffer_head **ret_bh, | 
| 1857 | int ex, | 2029 | int ex, | 
| 1858 | struct page *page) | 2030 | struct page *page) | 
| 1859 | { | 2031 | { | 
| 1860 | int ret; | 2032 | int ret; | 
| 1861 | 2033 | ||
| 1862 | ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 2034 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 
| 1863 | if (ret == -EAGAIN) { | 2035 | if (ret == -EAGAIN) { | 
| 1864 | unlock_page(page); | 2036 | unlock_page(page); | 
| 1865 | if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) | 2037 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) | 
| 1866 | ocfs2_meta_unlock(inode, ex); | 2038 | ocfs2_inode_unlock(inode, ex); | 
| 1867 | ret = AOP_TRUNCATED_PAGE; | 2039 | ret = AOP_TRUNCATED_PAGE; | 
| 1868 | } | 2040 | } | 
| 1869 | 2041 | ||
| 1870 | return ret; | 2042 | return ret; | 
| 1871 | } | 2043 | } | 
| 1872 | 2044 | ||
| 1873 | int ocfs2_meta_lock_atime(struct inode *inode, | 2045 | int ocfs2_inode_lock_atime(struct inode *inode, | 
| 1874 | struct vfsmount *vfsmnt, | 2046 | struct vfsmount *vfsmnt, | 
| 1875 | int *level) | 2047 | int *level) | 
| 1876 | { | 2048 | { | 
| 1877 | int ret; | 2049 | int ret; | 
| 1878 | 2050 | ||
| 1879 | mlog_entry_void(); | 2051 | mlog_entry_void(); | 
| 1880 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2052 | ret = ocfs2_inode_lock(inode, NULL, 0); | 
| 1881 | if (ret < 0) { | 2053 | if (ret < 0) { | 
| 1882 | mlog_errno(ret); | 2054 | mlog_errno(ret); | 
| 1883 | return ret; | 2055 | return ret; | 
| @@ -1890,8 +2062,8 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
| 1890 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 2062 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 
| 1891 | struct buffer_head *bh = NULL; | 2063 | struct buffer_head *bh = NULL; | 
| 1892 | 2064 | ||
| 1893 | ocfs2_meta_unlock(inode, 0); | 2065 | ocfs2_inode_unlock(inode, 0); | 
| 1894 | ret = ocfs2_meta_lock(inode, &bh, 1); | 2066 | ret = ocfs2_inode_lock(inode, &bh, 1); | 
| 1895 | if (ret < 0) { | 2067 | if (ret < 0) { | 
| 1896 | mlog_errno(ret); | 2068 | mlog_errno(ret); | 
| 1897 | return ret; | 2069 | return ret; | 
| @@ -1908,11 +2080,11 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
| 1908 | return ret; | 2080 | return ret; | 
| 1909 | } | 2081 | } | 
| 1910 | 2082 | ||
| 1911 | void ocfs2_meta_unlock(struct inode *inode, | 2083 | void ocfs2_inode_unlock(struct inode *inode, | 
| 1912 | int ex) | 2084 | int ex) | 
| 1913 | { | 2085 | { | 
| 1914 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2086 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 
| 1915 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | 2087 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; | 
| 1916 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2088 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| 1917 | 2089 | ||
| 1918 | mlog_entry_void(); | 2090 | mlog_entry_void(); | 
| @@ -2320,11 +2492,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
| 2320 | goto bail; | 2492 | goto bail; | 
| 2321 | } | 2493 | } | 
| 2322 | 2494 | ||
| 2323 | /* launch vote thread */ | 2495 | /* launch downconvert thread */ | 
| 2324 | osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); | 2496 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); | 
| 2325 | if (IS_ERR(osb->vote_task)) { | 2497 | if (IS_ERR(osb->dc_task)) { | 
| 2326 | status = PTR_ERR(osb->vote_task); | 2498 | status = PTR_ERR(osb->dc_task); | 
| 2327 | osb->vote_task = NULL; | 2499 | osb->dc_task = NULL; | 
| 2328 | mlog_errno(status); | 2500 | mlog_errno(status); | 
| 2329 | goto bail; | 2501 | goto bail; | 
| 2330 | } | 2502 | } | 
| @@ -2353,8 +2525,8 @@ local: | |||
| 2353 | bail: | 2525 | bail: | 
| 2354 | if (status < 0) { | 2526 | if (status < 0) { | 
| 2355 | ocfs2_dlm_shutdown_debug(osb); | 2527 | ocfs2_dlm_shutdown_debug(osb); | 
| 2356 | if (osb->vote_task) | 2528 | if (osb->dc_task) | 
| 2357 | kthread_stop(osb->vote_task); | 2529 | kthread_stop(osb->dc_task); | 
| 2358 | } | 2530 | } | 
| 2359 | 2531 | ||
| 2360 | mlog_exit(status); | 2532 | mlog_exit(status); | 
| @@ -2369,9 +2541,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
| 2369 | 2541 | ||
| 2370 | ocfs2_drop_osb_locks(osb); | 2542 | ocfs2_drop_osb_locks(osb); | 
| 2371 | 2543 | ||
| 2372 | if (osb->vote_task) { | 2544 | if (osb->dc_task) { | 
| 2373 | kthread_stop(osb->vote_task); | 2545 | kthread_stop(osb->dc_task); | 
| 2374 | osb->vote_task = NULL; | 2546 | osb->dc_task = NULL; | 
| 2375 | } | 2547 | } | 
| 2376 | 2548 | ||
| 2377 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2549 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 
| @@ -2527,7 +2699,7 @@ out: | |||
| 2527 | 2699 | ||
| 2528 | /* Mark the lockres as being dropped. It will no longer be | 2700 | /* Mark the lockres as being dropped. It will no longer be | 
| 2529 | * queued if blocking, but we still may have to wait on it | 2701 | * queued if blocking, but we still may have to wait on it | 
| 2530 | * being dequeued from the vote thread before we can consider | 2702 | * being dequeued from the downconvert thread before we can consider | 
| 2531 | * it safe to drop. | 2703 | * it safe to drop. | 
| 2532 | * | 2704 | * | 
| 2533 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 2705 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 
| @@ -2590,14 +2762,7 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
| 2590 | status = err; | 2762 | status = err; | 
| 2591 | 2763 | ||
| 2592 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2764 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 
| 2593 | &OCFS2_I(inode)->ip_data_lockres); | 2765 | &OCFS2_I(inode)->ip_inode_lockres); | 
| 2594 | if (err < 0) | ||
| 2595 | mlog_errno(err); | ||
| 2596 | if (err < 0 && !status) | ||
| 2597 | status = err; | ||
| 2598 | |||
| 2599 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | ||
| 2600 | &OCFS2_I(inode)->ip_meta_lockres); | ||
| 2601 | if (err < 0) | 2766 | if (err < 0) | 
| 2602 | mlog_errno(err); | 2767 | mlog_errno(err); | 
| 2603 | if (err < 0 && !status) | 2768 | if (err < 0 && !status) | 
| @@ -2850,6 +3015,9 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 2850 | inode = ocfs2_lock_res_inode(lockres); | 3015 | inode = ocfs2_lock_res_inode(lockres); | 
| 2851 | mapping = inode->i_mapping; | 3016 | mapping = inode->i_mapping; | 
| 2852 | 3017 | ||
| 3018 | if (S_ISREG(inode->i_mode)) | ||
| 3019 | goto out; | ||
| 3020 | |||
| 2853 | /* | 3021 | /* | 
| 2854 | * We need this before the filemap_fdatawrite() so that it can | 3022 | * We need this before the filemap_fdatawrite() so that it can | 
| 2855 | * transfer the dirty bit from the PTE to the | 3023 | * transfer the dirty bit from the PTE to the | 
| @@ -2875,6 +3043,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 2875 | filemap_fdatawait(mapping); | 3043 | filemap_fdatawait(mapping); | 
| 2876 | } | 3044 | } | 
| 2877 | 3045 | ||
| 3046 | out: | ||
| 2878 | return UNBLOCK_CONTINUE; | 3047 | return UNBLOCK_CONTINUE; | 
| 2879 | } | 3048 | } | 
| 2880 | 3049 | ||
| @@ -2903,7 +3072,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | |||
| 2903 | 3072 | ||
| 2904 | /* | 3073 | /* | 
| 2905 | * Does the final reference drop on our dentry lock. Right now this | 3074 | * Does the final reference drop on our dentry lock. Right now this | 
| 2906 | * happens in the vote thread, but we could choose to simplify the | 3075 | * happens in the downconvert thread, but we could choose to simplify the | 
| 2907 | * dlmglue API and push these off to the ocfs2_wq in the future. | 3076 | * dlmglue API and push these off to the ocfs2_wq in the future. | 
| 2908 | */ | 3077 | */ | 
| 2909 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 3078 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 
| @@ -3042,7 +3211,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
| 3042 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 3211 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 
| 3043 | 3212 | ||
| 3044 | /* Detect whether a lock has been marked as going away while | 3213 | /* Detect whether a lock has been marked as going away while | 
| 3045 | * the vote thread was processing other things. A lock can | 3214 | * the downconvert thread was processing other things. A lock can | 
| 3046 | * still be marked with OCFS2_LOCK_FREEING after this check, | 3215 | * still be marked with OCFS2_LOCK_FREEING after this check, | 
| 3047 | * but short circuiting here will still save us some | 3216 | * but short circuiting here will still save us some | 
| 3048 | * performance. */ | 3217 | * performance. */ | 
| @@ -3091,13 +3260,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
| 3091 | 3260 | ||
| 3092 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 3261 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 
| 3093 | 3262 | ||
| 3094 | spin_lock(&osb->vote_task_lock); | 3263 | spin_lock(&osb->dc_task_lock); | 
| 3095 | if (list_empty(&lockres->l_blocked_list)) { | 3264 | if (list_empty(&lockres->l_blocked_list)) { | 
| 3096 | list_add_tail(&lockres->l_blocked_list, | 3265 | list_add_tail(&lockres->l_blocked_list, | 
| 3097 | &osb->blocked_lock_list); | 3266 | &osb->blocked_lock_list); | 
| 3098 | osb->blocked_lock_count++; | 3267 | osb->blocked_lock_count++; | 
| 3099 | } | 3268 | } | 
| 3100 | spin_unlock(&osb->vote_task_lock); | 3269 | spin_unlock(&osb->dc_task_lock); | 
| 3270 | |||
| 3271 | mlog_exit_void(); | ||
| 3272 | } | ||
| 3273 | |||
| 3274 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | ||
| 3275 | { | ||
| 3276 | unsigned long processed; | ||
| 3277 | struct ocfs2_lock_res *lockres; | ||
| 3278 | |||
| 3279 | mlog_entry_void(); | ||
| 3280 | |||
| 3281 | spin_lock(&osb->dc_task_lock); | ||
| 3282 | /* grab this early so we know to try again if a state change and | ||
| 3283 | * wake happens part-way through our work */ | ||
| 3284 | osb->dc_work_sequence = osb->dc_wake_sequence; | ||
| 3285 | |||
| 3286 | processed = osb->blocked_lock_count; | ||
| 3287 | while (processed) { | ||
| 3288 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
| 3289 | |||
| 3290 | lockres = list_entry(osb->blocked_lock_list.next, | ||
| 3291 | struct ocfs2_lock_res, l_blocked_list); | ||
| 3292 | list_del_init(&lockres->l_blocked_list); | ||
| 3293 | osb->blocked_lock_count--; | ||
| 3294 | spin_unlock(&osb->dc_task_lock); | ||
| 3295 | |||
| 3296 | BUG_ON(!processed); | ||
| 3297 | processed--; | ||
| 3298 | |||
| 3299 | ocfs2_process_blocked_lock(osb, lockres); | ||
| 3300 | |||
| 3301 | spin_lock(&osb->dc_task_lock); | ||
| 3302 | } | ||
| 3303 | spin_unlock(&osb->dc_task_lock); | ||
| 3101 | 3304 | ||
| 3102 | mlog_exit_void(); | 3305 | mlog_exit_void(); | 
| 3103 | } | 3306 | } | 
| 3307 | |||
| 3308 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | ||
| 3309 | { | ||
| 3310 | int empty = 0; | ||
| 3311 | |||
| 3312 | spin_lock(&osb->dc_task_lock); | ||
| 3313 | if (list_empty(&osb->blocked_lock_list)) | ||
| 3314 | empty = 1; | ||
| 3315 | |||
| 3316 | spin_unlock(&osb->dc_task_lock); | ||
| 3317 | return empty; | ||
| 3318 | } | ||
| 3319 | |||
| 3320 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | ||
| 3321 | { | ||
| 3322 | int should_wake = 0; | ||
| 3323 | |||
| 3324 | spin_lock(&osb->dc_task_lock); | ||
| 3325 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | ||
| 3326 | should_wake = 1; | ||
| 3327 | spin_unlock(&osb->dc_task_lock); | ||
| 3328 | |||
| 3329 | return should_wake; | ||
| 3330 | } | ||
| 3331 | |||
| 3332 | int ocfs2_downconvert_thread(void *arg) | ||
| 3333 | { | ||
| 3334 | int status = 0; | ||
| 3335 | struct ocfs2_super *osb = arg; | ||
| 3336 | |||
| 3337 | /* only quit once we've been asked to stop and there is no more | ||
| 3338 | * work available */ | ||
| 3339 | while (!(kthread_should_stop() && | ||
| 3340 | ocfs2_downconvert_thread_lists_empty(osb))) { | ||
| 3341 | |||
| 3342 | wait_event_interruptible(osb->dc_event, | ||
| 3343 | ocfs2_downconvert_thread_should_wake(osb) || | ||
| 3344 | kthread_should_stop()); | ||
| 3345 | |||
| 3346 | mlog(0, "downconvert_thread: awoken\n"); | ||
| 3347 | |||
| 3348 | ocfs2_downconvert_thread_do_work(osb); | ||
| 3349 | } | ||
| 3350 | |||
| 3351 | osb->dc_task = NULL; | ||
| 3352 | return status; | ||
| 3353 | } | ||
| 3354 | |||
| 3355 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | ||
| 3356 | { | ||
| 3357 | spin_lock(&osb->dc_task_lock); | ||
| 3358 | /* make sure the voting thread gets a swipe at whatever changes | ||
| 3359 | * the caller may have made to the voting state */ | ||
| 3360 | osb->dc_wake_sequence++; | ||
| 3361 | spin_unlock(&osb->dc_task_lock); | ||
| 3362 | wake_up(&osb->dc_event); | ||
| 3363 | } | ||
| diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 87a785e41205..5f17243ba501 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
| @@ -49,12 +49,12 @@ struct ocfs2_meta_lvb { | |||
| 49 | __be32 lvb_reserved2; | 49 | __be32 lvb_reserved2; | 
| 50 | }; | 50 | }; | 
| 51 | 51 | ||
| 52 | /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ | 52 | /* ocfs2_inode_lock_full() 'arg_flags' flags */ | 
| 53 | /* don't wait on recovery. */ | 53 | /* don't wait on recovery. */ | 
| 54 | #define OCFS2_META_LOCK_RECOVERY (0x01) | 54 | #define OCFS2_META_LOCK_RECOVERY (0x01) | 
| 55 | /* Instruct the dlm not to queue ourselves on the other node. */ | 55 | /* Instruct the dlm not to queue ourselves on the other node. */ | 
| 56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) | 56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) | 
| 57 | /* don't block waiting for the vote thread, instead return -EAGAIN */ | 57 | /* don't block waiting for the downconvert thread, instead return -EAGAIN */ | 
| 58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 
| 59 | 59 | ||
| 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 
| @@ -66,38 +66,32 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
| 66 | struct inode *inode); | 66 | struct inode *inode); | 
| 67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 
| 68 | u64 parent, struct inode *inode); | 68 | u64 parent, struct inode *inode); | 
| 69 | struct ocfs2_file_private; | ||
| 70 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
| 71 | struct ocfs2_file_private *fp); | ||
| 69 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); | 72 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); | 
| 70 | int ocfs2_create_new_inode_locks(struct inode *inode); | 73 | int ocfs2_create_new_inode_locks(struct inode *inode); | 
| 71 | int ocfs2_drop_inode_locks(struct inode *inode); | 74 | int ocfs2_drop_inode_locks(struct inode *inode); | 
| 72 | int ocfs2_data_lock_full(struct inode *inode, | ||
| 73 | int write, | ||
| 74 | int arg_flags); | ||
| 75 | #define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0) | ||
| 76 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
| 77 | int write, | ||
| 78 | struct page *page); | ||
| 79 | void ocfs2_data_unlock(struct inode *inode, | ||
| 80 | int write); | ||
| 81 | int ocfs2_rw_lock(struct inode *inode, int write); | 75 | int ocfs2_rw_lock(struct inode *inode, int write); | 
| 82 | void ocfs2_rw_unlock(struct inode *inode, int write); | 76 | void ocfs2_rw_unlock(struct inode *inode, int write); | 
| 83 | int ocfs2_open_lock(struct inode *inode); | 77 | int ocfs2_open_lock(struct inode *inode); | 
| 84 | int ocfs2_try_open_lock(struct inode *inode, int write); | 78 | int ocfs2_try_open_lock(struct inode *inode, int write); | 
| 85 | void ocfs2_open_unlock(struct inode *inode); | 79 | void ocfs2_open_unlock(struct inode *inode); | 
| 86 | int ocfs2_meta_lock_atime(struct inode *inode, | 80 | int ocfs2_inode_lock_atime(struct inode *inode, | 
| 87 | struct vfsmount *vfsmnt, | 81 | struct vfsmount *vfsmnt, | 
| 88 | int *level); | 82 | int *level); | 
| 89 | int ocfs2_meta_lock_full(struct inode *inode, | 83 | int ocfs2_inode_lock_full(struct inode *inode, | 
| 90 | struct buffer_head **ret_bh, | 84 | struct buffer_head **ret_bh, | 
| 91 | int ex, | 85 | int ex, | 
| 92 | int arg_flags); | 86 | int arg_flags); | 
| 93 | int ocfs2_meta_lock_with_page(struct inode *inode, | 87 | int ocfs2_inode_lock_with_page(struct inode *inode, | 
| 94 | struct buffer_head **ret_bh, | 88 | struct buffer_head **ret_bh, | 
| 95 | int ex, | 89 | int ex, | 
| 96 | struct page *page); | 90 | struct page *page); | 
| 97 | /* 99% of the time we don't want to supply any additional flags -- | 91 | /* 99% of the time we don't want to supply any additional flags -- | 
| 98 | * those are for very specific cases only. */ | 92 | * those are for very specific cases only. */ | 
| 99 | #define ocfs2_meta_lock(i, b, e) ocfs2_meta_lock_full(i, b, e, 0) | 93 | #define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) | 
| 100 | void ocfs2_meta_unlock(struct inode *inode, | 94 | void ocfs2_inode_unlock(struct inode *inode, | 
| 101 | int ex); | 95 | int ex); | 
| 102 | int ocfs2_super_lock(struct ocfs2_super *osb, | 96 | int ocfs2_super_lock(struct ocfs2_super *osb, | 
| 103 | int ex); | 97 | int ex); | 
| @@ -107,14 +101,17 @@ int ocfs2_rename_lock(struct ocfs2_super *osb); | |||
| 107 | void ocfs2_rename_unlock(struct ocfs2_super *osb); | 101 | void ocfs2_rename_unlock(struct ocfs2_super *osb); | 
| 108 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); | 102 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); | 
| 109 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); | 103 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); | 
| 104 | int ocfs2_file_lock(struct file *file, int ex, int trylock); | ||
| 105 | void ocfs2_file_unlock(struct file *file); | ||
| 110 | 106 | ||
| 111 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 107 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 
| 112 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 108 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 
| 113 | struct ocfs2_lock_res *lockres); | 109 | struct ocfs2_lock_res *lockres); | 
| 114 | 110 | ||
| 115 | /* for the vote thread */ | 111 | /* for the downconvert thread */ | 
| 116 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 112 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 
| 117 | struct ocfs2_lock_res *lockres); | 113 | struct ocfs2_lock_res *lockres); | 
| 114 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); | ||
| 118 | 115 | ||
| 119 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 116 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 
| 120 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 117 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 
| diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h index ff257628af16..1942e09f6ee5 100644 --- a/fs/ocfs2/endian.h +++ b/fs/ocfs2/endian.h | |||
| @@ -37,11 +37,6 @@ static inline void le64_add_cpu(__le64 *var, u64 val) | |||
| 37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); | 37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); | 
| 38 | } | 38 | } | 
| 39 | 39 | ||
| 40 | static inline void le32_and_cpu(__le32 *var, u32 val) | ||
| 41 | { | ||
| 42 | *var = cpu_to_le32(le32_to_cpu(*var) & val); | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline void be32_add_cpu(__be32 *var, u32 val) | 40 | static inline void be32_add_cpu(__be32 *var, u32 val) | 
| 46 | { | 41 | { | 
| 47 | *var = cpu_to_be32(be32_to_cpu(*var) + val); | 42 | *var = cpu_to_be32(be32_to_cpu(*var) + val); | 
| diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 535bfa9568a4..67527cebf214 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
| @@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
| 58 | return ERR_PTR(-ESTALE); | 58 | return ERR_PTR(-ESTALE); | 
| 59 | } | 59 | } | 
| 60 | 60 | ||
| 61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0); | 61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0); | 
| 62 | 62 | ||
| 63 | if (IS_ERR(inode)) | 63 | if (IS_ERR(inode)) | 
| 64 | return (void *)inode; | 64 | return (void *)inode; | 
| @@ -95,7 +95,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
| 95 | mlog(0, "find parent of directory %llu\n", | 95 | mlog(0, "find parent of directory %llu\n", | 
| 96 | (unsigned long long)OCFS2_I(dir)->ip_blkno); | 96 | (unsigned long long)OCFS2_I(dir)->ip_blkno); | 
| 97 | 97 | ||
| 98 | status = ocfs2_meta_lock(dir, NULL, 0); | 98 | status = ocfs2_inode_lock(dir, NULL, 0); | 
| 99 | if (status < 0) { | 99 | if (status < 0) { | 
| 100 | if (status != -ENOENT) | 100 | if (status != -ENOENT) | 
| 101 | mlog_errno(status); | 101 | mlog_errno(status); | 
| @@ -109,7 +109,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
| 109 | goto bail_unlock; | 109 | goto bail_unlock; | 
| 110 | } | 110 | } | 
| 111 | 111 | ||
| 112 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); | 112 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); | 
| 113 | if (IS_ERR(inode)) { | 113 | if (IS_ERR(inode)) { | 
| 114 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 114 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 
| 115 | (unsigned long long)blkno); | 115 | (unsigned long long)blkno); | 
| @@ -126,7 +126,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
| 126 | parent->d_op = &ocfs2_dentry_ops; | 126 | parent->d_op = &ocfs2_dentry_ops; | 
| 127 | 127 | ||
| 128 | bail_unlock: | 128 | bail_unlock: | 
| 129 | ocfs2_meta_unlock(dir, 0); | 129 | ocfs2_inode_unlock(dir, 0); | 
| 130 | 130 | ||
| 131 | bail: | 131 | bail: | 
| 132 | mlog_exit_ptr(parent); | 132 | mlog_exit_ptr(parent); | 
| diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b75b2e1f0e42..ed5d5232e85d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include "inode.h" | 51 | #include "inode.h" | 
| 52 | #include "ioctl.h" | 52 | #include "ioctl.h" | 
| 53 | #include "journal.h" | 53 | #include "journal.h" | 
| 54 | #include "locks.h" | ||
| 54 | #include "mmap.h" | 55 | #include "mmap.h" | 
| 55 | #include "suballoc.h" | 56 | #include "suballoc.h" | 
| 56 | #include "super.h" | 57 | #include "super.h" | 
| @@ -63,6 +64,35 @@ static int ocfs2_sync_inode(struct inode *inode) | |||
| 63 | return sync_mapping_buffers(inode->i_mapping); | 64 | return sync_mapping_buffers(inode->i_mapping); | 
| 64 | } | 65 | } | 
| 65 | 66 | ||
| 67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | ||
| 68 | { | ||
| 69 | struct ocfs2_file_private *fp; | ||
| 70 | |||
| 71 | fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL); | ||
| 72 | if (!fp) | ||
| 73 | return -ENOMEM; | ||
| 74 | |||
| 75 | fp->fp_file = file; | ||
| 76 | mutex_init(&fp->fp_mutex); | ||
| 77 | ocfs2_file_lock_res_init(&fp->fp_flock, fp); | ||
| 78 | file->private_data = fp; | ||
| 79 | |||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | static void ocfs2_free_file_private(struct inode *inode, struct file *file) | ||
| 84 | { | ||
| 85 | struct ocfs2_file_private *fp = file->private_data; | ||
| 86 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 87 | |||
| 88 | if (fp) { | ||
| 89 | ocfs2_simple_drop_lockres(osb, &fp->fp_flock); | ||
| 90 | ocfs2_lock_res_free(&fp->fp_flock); | ||
| 91 | kfree(fp); | ||
| 92 | file->private_data = NULL; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 66 | static int ocfs2_file_open(struct inode *inode, struct file *file) | 96 | static int ocfs2_file_open(struct inode *inode, struct file *file) | 
| 67 | { | 97 | { | 
| 68 | int status; | 98 | int status; | 
| @@ -89,7 +119,18 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) | |||
| 89 | 119 | ||
| 90 | oi->ip_open_count++; | 120 | oi->ip_open_count++; | 
| 91 | spin_unlock(&oi->ip_lock); | 121 | spin_unlock(&oi->ip_lock); | 
| 92 | status = 0; | 122 | |
| 123 | status = ocfs2_init_file_private(inode, file); | ||
| 124 | if (status) { | ||
| 125 | /* | ||
| 126 | * We want to set open count back if we're failing the | ||
| 127 | * open. | ||
| 128 | */ | ||
| 129 | spin_lock(&oi->ip_lock); | ||
| 130 | oi->ip_open_count--; | ||
| 131 | spin_unlock(&oi->ip_lock); | ||
| 132 | } | ||
| 133 | |||
| 93 | leave: | 134 | leave: | 
| 94 | mlog_exit(status); | 135 | mlog_exit(status); | 
| 95 | return status; | 136 | return status; | 
| @@ -108,11 +149,24 @@ static int ocfs2_file_release(struct inode *inode, struct file *file) | |||
| 108 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; | 149 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; | 
| 109 | spin_unlock(&oi->ip_lock); | 150 | spin_unlock(&oi->ip_lock); | 
| 110 | 151 | ||
| 152 | ocfs2_free_file_private(inode, file); | ||
| 153 | |||
| 111 | mlog_exit(0); | 154 | mlog_exit(0); | 
| 112 | 155 | ||
| 113 | return 0; | 156 | return 0; | 
| 114 | } | 157 | } | 
| 115 | 158 | ||
| 159 | static int ocfs2_dir_open(struct inode *inode, struct file *file) | ||
| 160 | { | ||
| 161 | return ocfs2_init_file_private(inode, file); | ||
| 162 | } | ||
| 163 | |||
| 164 | static int ocfs2_dir_release(struct inode *inode, struct file *file) | ||
| 165 | { | ||
| 166 | ocfs2_free_file_private(inode, file); | ||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | |||
| 116 | static int ocfs2_sync_file(struct file *file, | 170 | static int ocfs2_sync_file(struct file *file, | 
| 117 | struct dentry *dentry, | 171 | struct dentry *dentry, | 
| 118 | int datasync) | 172 | int datasync) | 
| @@ -382,18 +436,13 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
| 382 | 436 | ||
| 383 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 437 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 384 | 438 | ||
| 385 | /* This forces other nodes to sync and drop their pages. Do | 439 | /* | 
| 386 | * this even if we have a truncate without allocation change - | 440 | * The inode lock forced other nodes to sync and drop their | 
| 387 | * ocfs2 cluster sizes can be much greater than page size, so | 441 | * pages, which (correctly) happens even if we have a truncate | 
| 388 | * we have to truncate them anyway. */ | 442 | * without allocation change - ocfs2 cluster sizes can be much | 
| 389 | status = ocfs2_data_lock(inode, 1); | 443 | * greater than page size, so we have to truncate them | 
| 390 | if (status < 0) { | 444 | * anyway. | 
| 391 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 445 | */ | 
| 392 | |||
| 393 | mlog_errno(status); | ||
| 394 | goto bail; | ||
| 395 | } | ||
| 396 | |||
| 397 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | 446 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | 
| 398 | truncate_inode_pages(inode->i_mapping, new_i_size); | 447 | truncate_inode_pages(inode->i_mapping, new_i_size); | 
| 399 | 448 | ||
| @@ -403,7 +452,7 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
| 403 | if (status) | 452 | if (status) | 
| 404 | mlog_errno(status); | 453 | mlog_errno(status); | 
| 405 | 454 | ||
| 406 | goto bail_unlock_data; | 455 | goto bail_unlock_sem; | 
| 407 | } | 456 | } | 
| 408 | 457 | ||
| 409 | /* alright, we're going to need to do a full blown alloc size | 458 | /* alright, we're going to need to do a full blown alloc size | 
| @@ -413,25 +462,23 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
| 413 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); | 462 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); | 
| 414 | if (status < 0) { | 463 | if (status < 0) { | 
| 415 | mlog_errno(status); | 464 | mlog_errno(status); | 
| 416 | goto bail_unlock_data; | 465 | goto bail_unlock_sem; | 
| 417 | } | 466 | } | 
| 418 | 467 | ||
| 419 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); | 468 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); | 
| 420 | if (status < 0) { | 469 | if (status < 0) { | 
| 421 | mlog_errno(status); | 470 | mlog_errno(status); | 
| 422 | goto bail_unlock_data; | 471 | goto bail_unlock_sem; | 
| 423 | } | 472 | } | 
| 424 | 473 | ||
| 425 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); | 474 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); | 
| 426 | if (status < 0) { | 475 | if (status < 0) { | 
| 427 | mlog_errno(status); | 476 | mlog_errno(status); | 
| 428 | goto bail_unlock_data; | 477 | goto bail_unlock_sem; | 
| 429 | } | 478 | } | 
| 430 | 479 | ||
| 431 | /* TODO: orphan dir cleanup here. */ | 480 | /* TODO: orphan dir cleanup here. */ | 
| 432 | bail_unlock_data: | 481 | bail_unlock_sem: | 
| 433 | ocfs2_data_unlock(inode, 1); | ||
| 434 | |||
| 435 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 482 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 436 | 483 | ||
| 437 | bail: | 484 | bail: | 
| @@ -579,7 +626,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
| 579 | 626 | ||
| 580 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 627 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 
| 581 | "clusters_to_add = %u, extents_to_split = %u\n", | 628 | "clusters_to_add = %u, extents_to_split = %u\n", | 
| 582 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), | 629 | (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), | 
| 583 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); | 630 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); | 
| 584 | 631 | ||
| 585 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | 632 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | 
| @@ -760,7 +807,7 @@ restarted_transaction: | |||
| 760 | le32_to_cpu(fe->i_clusters), | 807 | le32_to_cpu(fe->i_clusters), | 
| 761 | (unsigned long long)le64_to_cpu(fe->i_size)); | 808 | (unsigned long long)le64_to_cpu(fe->i_size)); | 
| 762 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", | 809 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", | 
| 763 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); | 810 | OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); | 
| 764 | 811 | ||
| 765 | leave: | 812 | leave: | 
| 766 | if (handle) { | 813 | if (handle) { | 
| @@ -917,7 +964,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
| 917 | struct buffer_head *di_bh, | 964 | struct buffer_head *di_bh, | 
| 918 | u64 new_i_size) | 965 | u64 new_i_size) | 
| 919 | { | 966 | { | 
| 920 | int ret = 0, data_locked = 0; | 967 | int ret = 0; | 
| 921 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 968 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 
| 922 | 969 | ||
| 923 | BUG_ON(!di_bh); | 970 | BUG_ON(!di_bh); | 
| @@ -943,20 +990,6 @@ static int ocfs2_extend_file(struct inode *inode, | |||
| 943 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 990 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 
| 944 | goto out_update_size; | 991 | goto out_update_size; | 
| 945 | 992 | ||
| 946 | /* | ||
| 947 | * protect the pages that ocfs2_zero_extend is going to be | ||
| 948 | * pulling into the page cache.. we do this before the | ||
| 949 | * metadata extend so that we don't get into the situation | ||
| 950 | * where we've extended the metadata but can't get the data | ||
| 951 | * lock to zero. | ||
| 952 | */ | ||
| 953 | ret = ocfs2_data_lock(inode, 1); | ||
| 954 | if (ret < 0) { | ||
| 955 | mlog_errno(ret); | ||
| 956 | goto out; | ||
| 957 | } | ||
| 958 | data_locked = 1; | ||
| 959 | |||
| 960 | /* | 993 | /* | 
| 961 | * The alloc sem blocks people in read/write from reading our | 994 | * The alloc sem blocks people in read/write from reading our | 
| 962 | * allocation until we're done changing it. We depend on | 995 | * allocation until we're done changing it. We depend on | 
| @@ -980,7 +1013,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
| 980 | up_write(&oi->ip_alloc_sem); | 1013 | up_write(&oi->ip_alloc_sem); | 
| 981 | 1014 | ||
| 982 | mlog_errno(ret); | 1015 | mlog_errno(ret); | 
| 983 | goto out_unlock; | 1016 | goto out; | 
| 984 | } | 1017 | } | 
| 985 | } | 1018 | } | 
| 986 | 1019 | ||
| @@ -991,7 +1024,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
| 991 | 1024 | ||
| 992 | if (ret < 0) { | 1025 | if (ret < 0) { | 
| 993 | mlog_errno(ret); | 1026 | mlog_errno(ret); | 
| 994 | goto out_unlock; | 1027 | goto out; | 
| 995 | } | 1028 | } | 
| 996 | 1029 | ||
| 997 | out_update_size: | 1030 | out_update_size: | 
| @@ -999,10 +1032,6 @@ out_update_size: | |||
| 999 | if (ret < 0) | 1032 | if (ret < 0) | 
| 1000 | mlog_errno(ret); | 1033 | mlog_errno(ret); | 
| 1001 | 1034 | ||
| 1002 | out_unlock: | ||
| 1003 | if (data_locked) | ||
| 1004 | ocfs2_data_unlock(inode, 1); | ||
| 1005 | |||
| 1006 | out: | 1035 | out: | 
| 1007 | return ret; | 1036 | return ret; | 
| 1008 | } | 1037 | } | 
| @@ -1050,7 +1079,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1050 | } | 1079 | } | 
| 1051 | } | 1080 | } | 
| 1052 | 1081 | ||
| 1053 | status = ocfs2_meta_lock(inode, &bh, 1); | 1082 | status = ocfs2_inode_lock(inode, &bh, 1); | 
| 1054 | if (status < 0) { | 1083 | if (status < 0) { | 
| 1055 | if (status != -ENOENT) | 1084 | if (status != -ENOENT) | 
| 1056 | mlog_errno(status); | 1085 | mlog_errno(status); | 
| @@ -1102,7 +1131,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1102 | bail_commit: | 1131 | bail_commit: | 
| 1103 | ocfs2_commit_trans(osb, handle); | 1132 | ocfs2_commit_trans(osb, handle); | 
| 1104 | bail_unlock: | 1133 | bail_unlock: | 
| 1105 | ocfs2_meta_unlock(inode, 1); | 1134 | ocfs2_inode_unlock(inode, 1); | 
| 1106 | bail_unlock_rw: | 1135 | bail_unlock_rw: | 
| 1107 | if (size_change) | 1136 | if (size_change) | 
| 1108 | ocfs2_rw_unlock(inode, 1); | 1137 | ocfs2_rw_unlock(inode, 1); | 
| @@ -1149,7 +1178,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 1149 | 1178 | ||
| 1150 | mlog_entry_void(); | 1179 | mlog_entry_void(); | 
| 1151 | 1180 | ||
| 1152 | ret = ocfs2_meta_lock(inode, NULL, 0); | 1181 | ret = ocfs2_inode_lock(inode, NULL, 0); | 
| 1153 | if (ret) { | 1182 | if (ret) { | 
| 1154 | if (ret != -ENOENT) | 1183 | if (ret != -ENOENT) | 
| 1155 | mlog_errno(ret); | 1184 | mlog_errno(ret); | 
| @@ -1158,7 +1187,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 1158 | 1187 | ||
| 1159 | ret = generic_permission(inode, mask, NULL); | 1188 | ret = generic_permission(inode, mask, NULL); | 
| 1160 | 1189 | ||
| 1161 | ocfs2_meta_unlock(inode, 0); | 1190 | ocfs2_inode_unlock(inode, 0); | 
| 1162 | out: | 1191 | out: | 
| 1163 | mlog_exit(ret); | 1192 | mlog_exit(ret); | 
| 1164 | return ret; | 1193 | return ret; | 
| @@ -1630,7 +1659,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1630 | goto out; | 1659 | goto out; | 
| 1631 | } | 1660 | } | 
| 1632 | 1661 | ||
| 1633 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1662 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 
| 1634 | if (ret) { | 1663 | if (ret) { | 
| 1635 | mlog_errno(ret); | 1664 | mlog_errno(ret); | 
| 1636 | goto out_rw_unlock; | 1665 | goto out_rw_unlock; | 
| @@ -1638,7 +1667,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1638 | 1667 | ||
| 1639 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | 1668 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | 
| 1640 | ret = -EPERM; | 1669 | ret = -EPERM; | 
| 1641 | goto out_meta_unlock; | 1670 | goto out_inode_unlock; | 
| 1642 | } | 1671 | } | 
| 1643 | 1672 | ||
| 1644 | switch (sr->l_whence) { | 1673 | switch (sr->l_whence) { | 
| @@ -1652,7 +1681,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1652 | break; | 1681 | break; | 
| 1653 | default: | 1682 | default: | 
| 1654 | ret = -EINVAL; | 1683 | ret = -EINVAL; | 
| 1655 | goto out_meta_unlock; | 1684 | goto out_inode_unlock; | 
| 1656 | } | 1685 | } | 
| 1657 | sr->l_whence = 0; | 1686 | sr->l_whence = 0; | 
| 1658 | 1687 | ||
| @@ -1663,14 +1692,14 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1663 | || (sr->l_start + llen) < 0 | 1692 | || (sr->l_start + llen) < 0 | 
| 1664 | || (sr->l_start + llen) > max_off) { | 1693 | || (sr->l_start + llen) > max_off) { | 
| 1665 | ret = -EINVAL; | 1694 | ret = -EINVAL; | 
| 1666 | goto out_meta_unlock; | 1695 | goto out_inode_unlock; | 
| 1667 | } | 1696 | } | 
| 1668 | size = sr->l_start + sr->l_len; | 1697 | size = sr->l_start + sr->l_len; | 
| 1669 | 1698 | ||
| 1670 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | 1699 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | 
| 1671 | if (sr->l_len <= 0) { | 1700 | if (sr->l_len <= 0) { | 
| 1672 | ret = -EINVAL; | 1701 | ret = -EINVAL; | 
| 1673 | goto out_meta_unlock; | 1702 | goto out_inode_unlock; | 
| 1674 | } | 1703 | } | 
| 1675 | } | 1704 | } | 
| 1676 | 1705 | ||
| @@ -1678,7 +1707,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1678 | ret = __ocfs2_write_remove_suid(inode, di_bh); | 1707 | ret = __ocfs2_write_remove_suid(inode, di_bh); | 
| 1679 | if (ret) { | 1708 | if (ret) { | 
| 1680 | mlog_errno(ret); | 1709 | mlog_errno(ret); | 
| 1681 | goto out_meta_unlock; | 1710 | goto out_inode_unlock; | 
| 1682 | } | 1711 | } | 
| 1683 | } | 1712 | } | 
| 1684 | 1713 | ||
| @@ -1704,7 +1733,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1704 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1733 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 1705 | if (ret) { | 1734 | if (ret) { | 
| 1706 | mlog_errno(ret); | 1735 | mlog_errno(ret); | 
| 1707 | goto out_meta_unlock; | 1736 | goto out_inode_unlock; | 
| 1708 | } | 1737 | } | 
| 1709 | 1738 | ||
| 1710 | /* | 1739 | /* | 
| @@ -1714,7 +1743,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1714 | if (IS_ERR(handle)) { | 1743 | if (IS_ERR(handle)) { | 
| 1715 | ret = PTR_ERR(handle); | 1744 | ret = PTR_ERR(handle); | 
| 1716 | mlog_errno(ret); | 1745 | mlog_errno(ret); | 
| 1717 | goto out_meta_unlock; | 1746 | goto out_inode_unlock; | 
| 1718 | } | 1747 | } | 
| 1719 | 1748 | ||
| 1720 | if (change_size && i_size_read(inode) < size) | 1749 | if (change_size && i_size_read(inode) < size) | 
| @@ -1727,9 +1756,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1727 | 1756 | ||
| 1728 | ocfs2_commit_trans(osb, handle); | 1757 | ocfs2_commit_trans(osb, handle); | 
| 1729 | 1758 | ||
| 1730 | out_meta_unlock: | 1759 | out_inode_unlock: | 
| 1731 | brelse(di_bh); | 1760 | brelse(di_bh); | 
| 1732 | ocfs2_meta_unlock(inode, 1); | 1761 | ocfs2_inode_unlock(inode, 1); | 
| 1733 | out_rw_unlock: | 1762 | out_rw_unlock: | 
| 1734 | ocfs2_rw_unlock(inode, 1); | 1763 | ocfs2_rw_unlock(inode, 1); | 
| 1735 | 1764 | ||
| @@ -1799,7 +1828,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
| 1799 | * if we need to make modifications here. | 1828 | * if we need to make modifications here. | 
| 1800 | */ | 1829 | */ | 
| 1801 | for(;;) { | 1830 | for(;;) { | 
| 1802 | ret = ocfs2_meta_lock(inode, NULL, meta_level); | 1831 | ret = ocfs2_inode_lock(inode, NULL, meta_level); | 
| 1803 | if (ret < 0) { | 1832 | if (ret < 0) { | 
| 1804 | meta_level = -1; | 1833 | meta_level = -1; | 
| 1805 | mlog_errno(ret); | 1834 | mlog_errno(ret); | 
| @@ -1817,7 +1846,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
| 1817 | * set inode->i_size at the end of a write. */ | 1846 | * set inode->i_size at the end of a write. */ | 
| 1818 | if (should_remove_suid(dentry)) { | 1847 | if (should_remove_suid(dentry)) { | 
| 1819 | if (meta_level == 0) { | 1848 | if (meta_level == 0) { | 
| 1820 | ocfs2_meta_unlock(inode, meta_level); | 1849 | ocfs2_inode_unlock(inode, meta_level); | 
| 1821 | meta_level = 1; | 1850 | meta_level = 1; | 
| 1822 | continue; | 1851 | continue; | 
| 1823 | } | 1852 | } | 
| @@ -1886,7 +1915,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
| 1886 | *ppos = saved_pos; | 1915 | *ppos = saved_pos; | 
| 1887 | 1916 | ||
| 1888 | out_unlock: | 1917 | out_unlock: | 
| 1889 | ocfs2_meta_unlock(inode, meta_level); | 1918 | ocfs2_inode_unlock(inode, meta_level); | 
| 1890 | 1919 | ||
| 1891 | out: | 1920 | out: | 
| 1892 | return ret; | 1921 | return ret; | 
| @@ -2099,12 +2128,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in, | |||
| 2099 | /* | 2128 | /* | 
| 2100 | * See the comment in ocfs2_file_aio_read() | 2129 | * See the comment in ocfs2_file_aio_read() | 
| 2101 | */ | 2130 | */ | 
| 2102 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2131 | ret = ocfs2_inode_lock(inode, NULL, 0); | 
| 2103 | if (ret < 0) { | 2132 | if (ret < 0) { | 
| 2104 | mlog_errno(ret); | 2133 | mlog_errno(ret); | 
| 2105 | goto bail; | 2134 | goto bail; | 
| 2106 | } | 2135 | } | 
| 2107 | ocfs2_meta_unlock(inode, 0); | 2136 | ocfs2_inode_unlock(inode, 0); | 
| 2108 | 2137 | ||
| 2109 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | 2138 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | 
| 2110 | 2139 | ||
| @@ -2160,12 +2189,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
| 2160 | * like i_size. This allows the checks down below | 2189 | * like i_size. This allows the checks down below | 
| 2161 | * generic_file_aio_read() a chance of actually working. | 2190 | * generic_file_aio_read() a chance of actually working. | 
| 2162 | */ | 2191 | */ | 
| 2163 | ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 2192 | ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 
| 2164 | if (ret < 0) { | 2193 | if (ret < 0) { | 
| 2165 | mlog_errno(ret); | 2194 | mlog_errno(ret); | 
| 2166 | goto bail; | 2195 | goto bail; | 
| 2167 | } | 2196 | } | 
| 2168 | ocfs2_meta_unlock(inode, lock_level); | 2197 | ocfs2_inode_unlock(inode, lock_level); | 
| 2169 | 2198 | ||
| 2170 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); | 2199 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); | 
| 2171 | if (ret == -EINVAL) | 2200 | if (ret == -EINVAL) | 
| @@ -2204,6 +2233,7 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
| 2204 | }; | 2233 | }; | 
| 2205 | 2234 | ||
| 2206 | const struct file_operations ocfs2_fops = { | 2235 | const struct file_operations ocfs2_fops = { | 
| 2236 | .llseek = generic_file_llseek, | ||
| 2207 | .read = do_sync_read, | 2237 | .read = do_sync_read, | 
| 2208 | .write = do_sync_write, | 2238 | .write = do_sync_write, | 
| 2209 | .mmap = ocfs2_mmap, | 2239 | .mmap = ocfs2_mmap, | 
| @@ -2216,16 +2246,21 @@ const struct file_operations ocfs2_fops = { | |||
| 2216 | #ifdef CONFIG_COMPAT | 2246 | #ifdef CONFIG_COMPAT | 
| 2217 | .compat_ioctl = ocfs2_compat_ioctl, | 2247 | .compat_ioctl = ocfs2_compat_ioctl, | 
| 2218 | #endif | 2248 | #endif | 
| 2249 | .flock = ocfs2_flock, | ||
| 2219 | .splice_read = ocfs2_file_splice_read, | 2250 | .splice_read = ocfs2_file_splice_read, | 
| 2220 | .splice_write = ocfs2_file_splice_write, | 2251 | .splice_write = ocfs2_file_splice_write, | 
| 2221 | }; | 2252 | }; | 
| 2222 | 2253 | ||
| 2223 | const struct file_operations ocfs2_dops = { | 2254 | const struct file_operations ocfs2_dops = { | 
| 2255 | .llseek = generic_file_llseek, | ||
| 2224 | .read = generic_read_dir, | 2256 | .read = generic_read_dir, | 
| 2225 | .readdir = ocfs2_readdir, | 2257 | .readdir = ocfs2_readdir, | 
| 2226 | .fsync = ocfs2_sync_file, | 2258 | .fsync = ocfs2_sync_file, | 
| 2259 | .release = ocfs2_dir_release, | ||
| 2260 | .open = ocfs2_dir_open, | ||
| 2227 | .ioctl = ocfs2_ioctl, | 2261 | .ioctl = ocfs2_ioctl, | 
| 2228 | #ifdef CONFIG_COMPAT | 2262 | #ifdef CONFIG_COMPAT | 
| 2229 | .compat_ioctl = ocfs2_compat_ioctl, | 2263 | .compat_ioctl = ocfs2_compat_ioctl, | 
| 2230 | #endif | 2264 | #endif | 
| 2265 | .flock = ocfs2_flock, | ||
| 2231 | }; | 2266 | }; | 
| diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 066f14add3a8..048ddcaf5c80 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
| @@ -32,6 +32,12 @@ extern const struct inode_operations ocfs2_file_iops; | |||
| 32 | extern const struct inode_operations ocfs2_special_file_iops; | 32 | extern const struct inode_operations ocfs2_special_file_iops; | 
| 33 | struct ocfs2_alloc_context; | 33 | struct ocfs2_alloc_context; | 
| 34 | 34 | ||
| 35 | struct ocfs2_file_private { | ||
| 36 | struct file *fp_file; | ||
| 37 | struct mutex fp_mutex; | ||
| 38 | struct ocfs2_lock_res fp_flock; | ||
| 39 | }; | ||
| 40 | |||
| 35 | enum ocfs2_alloc_restarted { | 41 | enum ocfs2_alloc_restarted { | 
| 36 | RESTART_NONE = 0, | 42 | RESTART_NONE = 0, | 
| 37 | RESTART_TRANS, | 43 | RESTART_TRANS, | 
| diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index c4c36171240d..c0efd9489fe8 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
| @@ -30,9 +30,6 @@ | |||
| 30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> | 
| 31 | #include <linux/kmod.h> | 31 | #include <linux/kmod.h> | 
| 32 | 32 | ||
| 33 | #include <cluster/heartbeat.h> | ||
| 34 | #include <cluster/nodemanager.h> | ||
| 35 | |||
| 36 | #include <dlm/dlmapi.h> | 33 | #include <dlm/dlmapi.h> | 
| 37 | 34 | ||
| 38 | #define MLOG_MASK_PREFIX ML_SUPER | 35 | #define MLOG_MASK_PREFIX ML_SUPER | 
| @@ -44,13 +41,9 @@ | |||
| 44 | #include "heartbeat.h" | 41 | #include "heartbeat.h" | 
| 45 | #include "inode.h" | 42 | #include "inode.h" | 
| 46 | #include "journal.h" | 43 | #include "journal.h" | 
| 47 | #include "vote.h" | ||
| 48 | 44 | ||
| 49 | #include "buffer_head_io.h" | 45 | #include "buffer_head_io.h" | 
| 50 | 46 | ||
| 51 | #define OCFS2_HB_NODE_DOWN_PRI (0x0000002) | ||
| 52 | #define OCFS2_HB_NODE_UP_PRI OCFS2_HB_NODE_DOWN_PRI | ||
| 53 | |||
| 54 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | 47 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | 
| 55 | int bit); | 48 | int bit); | 
| 56 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 
| @@ -64,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | |||
| 64 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 57 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 
| 65 | { | 58 | { | 
| 66 | spin_lock_init(&osb->node_map_lock); | 59 | spin_lock_init(&osb->node_map_lock); | 
| 67 | ocfs2_node_map_init(&osb->mounted_map); | ||
| 68 | ocfs2_node_map_init(&osb->recovery_map); | 60 | ocfs2_node_map_init(&osb->recovery_map); | 
| 69 | ocfs2_node_map_init(&osb->umount_map); | ||
| 70 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 61 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 
| 71 | } | 62 | } | 
| 72 | 63 | ||
| @@ -87,24 +78,7 @@ static void ocfs2_do_node_down(int node_num, | |||
| 87 | return; | 78 | return; | 
| 88 | } | 79 | } | 
| 89 | 80 | ||
| 90 | if (ocfs2_node_map_test_bit(osb, &osb->umount_map, node_num)) { | ||
| 91 | /* If a node is in the umount map, then we've been | ||
| 92 | * expecting him to go down and we know ahead of time | ||
| 93 | * that recovery is not necessary. */ | ||
| 94 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
| 95 | return; | ||
| 96 | } | ||
| 97 | |||
| 98 | ocfs2_recovery_thread(osb, node_num); | 81 | ocfs2_recovery_thread(osb, node_num); | 
| 99 | |||
| 100 | ocfs2_remove_node_from_vote_queues(osb, node_num); | ||
| 101 | } | ||
| 102 | |||
| 103 | static void ocfs2_hb_node_down_cb(struct o2nm_node *node, | ||
| 104 | int node_num, | ||
| 105 | void *data) | ||
| 106 | { | ||
| 107 | ocfs2_do_node_down(node_num, (struct ocfs2_super *) data); | ||
| 108 | } | 82 | } | 
| 109 | 83 | ||
| 110 | /* Called from the dlm when it's about to evict a node. We may also | 84 | /* Called from the dlm when it's about to evict a node. We may also | 
| @@ -121,27 +95,8 @@ static void ocfs2_dlm_eviction_cb(int node_num, | |||
| 121 | ocfs2_do_node_down(node_num, osb); | 95 | ocfs2_do_node_down(node_num, osb); | 
| 122 | } | 96 | } | 
| 123 | 97 | ||
| 124 | static void ocfs2_hb_node_up_cb(struct o2nm_node *node, | ||
| 125 | int node_num, | ||
| 126 | void *data) | ||
| 127 | { | ||
| 128 | struct ocfs2_super *osb = data; | ||
| 129 | |||
| 130 | BUG_ON(osb->node_num == node_num); | ||
| 131 | |||
| 132 | mlog(0, "node up event for %d\n", node_num); | ||
| 133 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
| 134 | } | ||
| 135 | |||
| 136 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | 98 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | 
| 137 | { | 99 | { | 
| 138 | o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB, | ||
| 139 | ocfs2_hb_node_down_cb, osb, | ||
| 140 | OCFS2_HB_NODE_DOWN_PRI); | ||
| 141 | |||
| 142 | o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB, | ||
| 143 | ocfs2_hb_node_up_cb, osb, OCFS2_HB_NODE_UP_PRI); | ||
| 144 | |||
| 145 | /* Not exactly a heartbeat callback, but leads to essentially | 100 | /* Not exactly a heartbeat callback, but leads to essentially | 
| 146 | * the same path so we set it up here. */ | 101 | * the same path so we set it up here. */ | 
| 147 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, | 102 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, | 
| @@ -149,39 +104,6 @@ void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | |||
| 149 | osb); | 104 | osb); | 
| 150 | } | 105 | } | 
| 151 | 106 | ||
| 152 | /* Most functions here are just stubs for now... */ | ||
| 153 | int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) | ||
| 154 | { | ||
| 155 | int status; | ||
| 156 | |||
| 157 | if (ocfs2_mount_local(osb)) | ||
| 158 | return 0; | ||
| 159 | |||
| 160 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down); | ||
| 161 | if (status < 0) { | ||
| 162 | mlog_errno(status); | ||
| 163 | goto bail; | ||
| 164 | } | ||
| 165 | |||
| 166 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up); | ||
| 167 | if (status < 0) { | ||
| 168 | mlog_errno(status); | ||
| 169 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); | ||
| 170 | } | ||
| 171 | |||
| 172 | bail: | ||
| 173 | return status; | ||
| 174 | } | ||
| 175 | |||
| 176 | void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) | ||
| 177 | { | ||
| 178 | if (ocfs2_mount_local(osb)) | ||
| 179 | return; | ||
| 180 | |||
| 181 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); | ||
| 182 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up); | ||
| 183 | } | ||
| 184 | |||
| 185 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | 107 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | 
| 186 | { | 108 | { | 
| 187 | int ret; | 109 | int ret; | 
| @@ -341,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb, | |||
| 341 | 263 | ||
| 342 | spin_lock(&osb->node_map_lock); | 264 | spin_lock(&osb->node_map_lock); | 
| 343 | 265 | ||
| 344 | __ocfs2_node_map_clear_bit(&osb->mounted_map, num); | ||
| 345 | |||
| 346 | if (!test_bit(num, osb->recovery_map.map)) { | 266 | if (!test_bit(num, osb->recovery_map.map)) { | 
| 347 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | 267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | 
| 348 | set = 1; | 268 | set = 1; | 
| diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index e8fb079122e4..56859211888a 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h | |||
| @@ -29,8 +29,6 @@ | |||
| 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); | 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); | 
| 30 | 30 | ||
| 31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); | 31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); | 
| 32 | int ocfs2_register_hb_callbacks(struct ocfs2_super *osb); | ||
| 33 | void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb); | ||
| 34 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | 32 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | 
| 35 | 33 | ||
| 36 | /* node map functions - used to keep track of mounted and in-recovery | 34 | /* node map functions - used to keep track of mounted and in-recovery | 
| diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ebb2bbe30f35..7e9e4c79aec7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -49,7 +49,6 @@ | |||
| 49 | #include "symlink.h" | 49 | #include "symlink.h" | 
| 50 | #include "sysfile.h" | 50 | #include "sysfile.h" | 
| 51 | #include "uptodate.h" | 51 | #include "uptodate.h" | 
| 52 | #include "vote.h" | ||
| 53 | 52 | ||
| 54 | #include "buffer_head_io.h" | 53 | #include "buffer_head_io.h" | 
| 55 | 54 | ||
| @@ -58,8 +57,11 @@ struct ocfs2_find_inode_args | |||
| 58 | u64 fi_blkno; | 57 | u64 fi_blkno; | 
| 59 | unsigned long fi_ino; | 58 | unsigned long fi_ino; | 
| 60 | unsigned int fi_flags; | 59 | unsigned int fi_flags; | 
| 60 | unsigned int fi_sysfile_type; | ||
| 61 | }; | 61 | }; | 
| 62 | 62 | ||
| 63 | static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; | ||
| 64 | |||
| 63 | static int ocfs2_read_locked_inode(struct inode *inode, | 65 | static int ocfs2_read_locked_inode(struct inode *inode, | 
| 64 | struct ocfs2_find_inode_args *args); | 66 | struct ocfs2_find_inode_args *args); | 
| 65 | static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); | 67 | static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); | 
| @@ -107,7 +109,8 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi) | |||
| 107 | oi->ip_attr |= OCFS2_DIRSYNC_FL; | 109 | oi->ip_attr |= OCFS2_DIRSYNC_FL; | 
| 108 | } | 110 | } | 
| 109 | 111 | ||
| 110 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) | 112 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, | 
| 113 | int sysfile_type) | ||
| 111 | { | 114 | { | 
| 112 | struct inode *inode = NULL; | 115 | struct inode *inode = NULL; | 
| 113 | struct super_block *sb = osb->sb; | 116 | struct super_block *sb = osb->sb; | 
| @@ -127,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) | |||
| 127 | args.fi_blkno = blkno; | 130 | args.fi_blkno = blkno; | 
| 128 | args.fi_flags = flags; | 131 | args.fi_flags = flags; | 
| 129 | args.fi_ino = ino_from_blkno(sb, blkno); | 132 | args.fi_ino = ino_from_blkno(sb, blkno); | 
| 133 | args.fi_sysfile_type = sysfile_type; | ||
| 130 | 134 | ||
| 131 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, | 135 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, | 
| 132 | ocfs2_init_locked_inode, &args); | 136 | ocfs2_init_locked_inode, &args); | 
| @@ -201,6 +205,9 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) | |||
| 201 | 205 | ||
| 202 | inode->i_ino = args->fi_ino; | 206 | inode->i_ino = args->fi_ino; | 
| 203 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; | 207 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; | 
| 208 | if (args->fi_sysfile_type != 0) | ||
| 209 | lockdep_set_class(&inode->i_mutex, | ||
| 210 | &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); | ||
| 204 | 211 | ||
| 205 | mlog_exit(0); | 212 | mlog_exit(0); | 
| 206 | return 0; | 213 | return 0; | 
| @@ -322,7 +329,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 322 | */ | 329 | */ | 
| 323 | BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL); | 330 | BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL); | 
| 324 | 331 | ||
| 325 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 332 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, | 
| 326 | OCFS2_LOCK_TYPE_META, 0, inode); | 333 | OCFS2_LOCK_TYPE_META, 0, inode); | 
| 327 | 334 | ||
| 328 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, | 335 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, | 
| @@ -333,10 +340,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 333 | OCFS2_LOCK_TYPE_RW, inode->i_generation, | 340 | OCFS2_LOCK_TYPE_RW, inode->i_generation, | 
| 334 | inode); | 341 | inode); | 
| 335 | 342 | ||
| 336 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, | ||
| 337 | OCFS2_LOCK_TYPE_DATA, inode->i_generation, | ||
| 338 | inode); | ||
| 339 | |||
| 340 | ocfs2_set_inode_flags(inode); | 343 | ocfs2_set_inode_flags(inode); | 
| 341 | 344 | ||
| 342 | status = 0; | 345 | status = 0; | 
| @@ -414,7 +417,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
| 414 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | 417 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | 
| 415 | generation = osb->fs_generation; | 418 | generation = osb->fs_generation; | 
| 416 | 419 | ||
| 417 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 420 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, | 
| 418 | OCFS2_LOCK_TYPE_META, | 421 | OCFS2_LOCK_TYPE_META, | 
| 419 | generation, inode); | 422 | generation, inode); | 
| 420 | 423 | ||
| @@ -429,7 +432,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
| 429 | mlog_errno(status); | 432 | mlog_errno(status); | 
| 430 | return status; | 433 | return status; | 
| 431 | } | 434 | } | 
| 432 | status = ocfs2_meta_lock(inode, NULL, 0); | 435 | status = ocfs2_inode_lock(inode, NULL, 0); | 
| 433 | if (status) { | 436 | if (status) { | 
| 434 | make_bad_inode(inode); | 437 | make_bad_inode(inode); | 
| 435 | mlog_errno(status); | 438 | mlog_errno(status); | 
| @@ -484,7 +487,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
| 484 | 487 | ||
| 485 | bail: | 488 | bail: | 
| 486 | if (can_lock) | 489 | if (can_lock) | 
| 487 | ocfs2_meta_unlock(inode, 0); | 490 | ocfs2_inode_unlock(inode, 0); | 
| 488 | 491 | ||
| 489 | if (status < 0) | 492 | if (status < 0) | 
| 490 | make_bad_inode(inode); | 493 | make_bad_inode(inode); | 
| @@ -586,7 +589,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
| 586 | } | 589 | } | 
| 587 | 590 | ||
| 588 | mutex_lock(&inode_alloc_inode->i_mutex); | 591 | mutex_lock(&inode_alloc_inode->i_mutex); | 
| 589 | status = ocfs2_meta_lock(inode_alloc_inode, &inode_alloc_bh, 1); | 592 | status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1); | 
| 590 | if (status < 0) { | 593 | if (status < 0) { | 
| 591 | mutex_unlock(&inode_alloc_inode->i_mutex); | 594 | mutex_unlock(&inode_alloc_inode->i_mutex); | 
| 592 | 595 | ||
| @@ -617,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
| 617 | } | 620 | } | 
| 618 | 621 | ||
| 619 | di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); | 622 | di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); | 
| 620 | le32_and_cpu(&di->i_flags, ~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); | 623 | di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); | 
| 621 | 624 | ||
| 622 | status = ocfs2_journal_dirty(handle, di_bh); | 625 | status = ocfs2_journal_dirty(handle, di_bh); | 
| 623 | if (status < 0) { | 626 | if (status < 0) { | 
| @@ -635,7 +638,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
| 635 | bail_commit: | 638 | bail_commit: | 
| 636 | ocfs2_commit_trans(osb, handle); | 639 | ocfs2_commit_trans(osb, handle); | 
| 637 | bail_unlock: | 640 | bail_unlock: | 
| 638 | ocfs2_meta_unlock(inode_alloc_inode, 1); | 641 | ocfs2_inode_unlock(inode_alloc_inode, 1); | 
| 639 | mutex_unlock(&inode_alloc_inode->i_mutex); | 642 | mutex_unlock(&inode_alloc_inode->i_mutex); | 
| 640 | brelse(inode_alloc_bh); | 643 | brelse(inode_alloc_bh); | 
| 641 | bail: | 644 | bail: | 
| @@ -709,7 +712,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 709 | * delete_inode operation. We do this now to avoid races with | 712 | * delete_inode operation. We do this now to avoid races with | 
| 710 | * recovery completion on other nodes. */ | 713 | * recovery completion on other nodes. */ | 
| 711 | mutex_lock(&orphan_dir_inode->i_mutex); | 714 | mutex_lock(&orphan_dir_inode->i_mutex); | 
| 712 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 715 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 
| 713 | if (status < 0) { | 716 | if (status < 0) { | 
| 714 | mutex_unlock(&orphan_dir_inode->i_mutex); | 717 | mutex_unlock(&orphan_dir_inode->i_mutex); | 
| 715 | 718 | ||
| @@ -718,8 +721,8 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 718 | } | 721 | } | 
| 719 | 722 | ||
| 720 | /* we do this while holding the orphan dir lock because we | 723 | /* we do this while holding the orphan dir lock because we | 
| 721 | * don't want recovery being run from another node to vote for | 724 | * don't want recovery being run from another node to try an | 
| 722 | * an inode delete on us -- this will result in two nodes | 725 | * inode delete underneath us -- this will result in two nodes | 
| 723 | * truncating the same file! */ | 726 | * truncating the same file! */ | 
| 724 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); | 727 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); | 
| 725 | if (status < 0) { | 728 | if (status < 0) { | 
| @@ -733,7 +736,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 733 | mlog_errno(status); | 736 | mlog_errno(status); | 
| 734 | 737 | ||
| 735 | bail_unlock_dir: | 738 | bail_unlock_dir: | 
| 736 | ocfs2_meta_unlock(orphan_dir_inode, 1); | 739 | ocfs2_inode_unlock(orphan_dir_inode, 1); | 
| 737 | mutex_unlock(&orphan_dir_inode->i_mutex); | 740 | mutex_unlock(&orphan_dir_inode->i_mutex); | 
| 738 | brelse(orphan_dir_bh); | 741 | brelse(orphan_dir_bh); | 
| 739 | bail: | 742 | bail: | 
| @@ -744,7 +747,7 @@ bail: | |||
| 744 | } | 747 | } | 
| 745 | 748 | ||
| 746 | /* There is a series of simple checks that should be done before a | 749 | /* There is a series of simple checks that should be done before a | 
| 747 | * vote is even considered. Encapsulate those in this function. */ | 750 | * trylock is even considered. Encapsulate those in this function. */ | 
| 748 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | 751 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | 
| 749 | { | 752 | { | 
| 750 | int ret = 0; | 753 | int ret = 0; | 
| @@ -758,14 +761,14 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
| 758 | goto bail; | 761 | goto bail; | 
| 759 | } | 762 | } | 
| 760 | 763 | ||
| 761 | /* If we're coming from process_vote we can't go into our own | 764 | /* If we're coming from downconvert_thread we can't go into our own | 
| 762 | * voting [hello, deadlock city!], so unforuntately we just | 765 | * voting [hello, deadlock city!], so unforuntately we just | 
| 763 | * have to skip deleting this guy. That's OK though because | 766 | * have to skip deleting this guy. That's OK though because | 
| 764 | * the node who's doing the actual deleting should handle it | 767 | * the node who's doing the actual deleting should handle it | 
| 765 | * anyway. */ | 768 | * anyway. */ | 
| 766 | if (current == osb->vote_task) { | 769 | if (current == osb->dc_task) { | 
| 767 | mlog(0, "Skipping delete of %lu because we're currently " | 770 | mlog(0, "Skipping delete of %lu because we're currently " | 
| 768 | "in process_vote\n", inode->i_ino); | 771 | "in downconvert\n", inode->i_ino); | 
| 769 | goto bail; | 772 | goto bail; | 
| 770 | } | 773 | } | 
| 771 | 774 | ||
| @@ -779,10 +782,9 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
| 779 | goto bail_unlock; | 782 | goto bail_unlock; | 
| 780 | } | 783 | } | 
| 781 | 784 | ||
| 782 | /* If we have voted "yes" on the wipe of this inode for | 785 | /* If we have allowd wipe of this inode for another node, it | 
| 783 | * another node, it will be marked here so we can safely skip | 786 | * will be marked here so we can safely skip it. Recovery will | 
| 784 | * it. Recovery will cleanup any inodes we might inadvertantly | 787 | * cleanup any inodes we might inadvertantly skip here. */ | 
| 785 | * skip here. */ | ||
| 786 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { | 788 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { | 
| 787 | mlog(0, "Skipping delete of %lu because another node " | 789 | mlog(0, "Skipping delete of %lu because another node " | 
| 788 | "has done this for us.\n", inode->i_ino); | 790 | "has done this for us.\n", inode->i_ino); | 
| @@ -929,13 +931,13 @@ void ocfs2_delete_inode(struct inode *inode) | |||
| 929 | 931 | ||
| 930 | /* Lock down the inode. This gives us an up to date view of | 932 | /* Lock down the inode. This gives us an up to date view of | 
| 931 | * it's metadata (for verification), and allows us to | 933 | * it's metadata (for verification), and allows us to | 
| 932 | * serialize delete_inode votes. | 934 | * serialize delete_inode on multiple nodes. | 
| 933 | * | 935 | * | 
| 934 | * Even though we might be doing a truncate, we don't take the | 936 | * Even though we might be doing a truncate, we don't take the | 
| 935 | * allocation lock here as it won't be needed - nobody will | 937 | * allocation lock here as it won't be needed - nobody will | 
| 936 | * have the file open. | 938 | * have the file open. | 
| 937 | */ | 939 | */ | 
| 938 | status = ocfs2_meta_lock(inode, &di_bh, 1); | 940 | status = ocfs2_inode_lock(inode, &di_bh, 1); | 
| 939 | if (status < 0) { | 941 | if (status < 0) { | 
| 940 | if (status != -ENOENT) | 942 | if (status != -ENOENT) | 
| 941 | mlog_errno(status); | 943 | mlog_errno(status); | 
| @@ -947,15 +949,15 @@ void ocfs2_delete_inode(struct inode *inode) | |||
| 947 | * before we go ahead and wipe the inode. */ | 949 | * before we go ahead and wipe the inode. */ | 
| 948 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); | 950 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); | 
| 949 | if (!wipe || status < 0) { | 951 | if (!wipe || status < 0) { | 
| 950 | /* Error and inode busy vote both mean we won't be | 952 | /* Error and remote inode busy both mean we won't be | 
| 951 | * removing the inode, so they take almost the same | 953 | * removing the inode, so they take almost the same | 
| 952 | * path. */ | 954 | * path. */ | 
| 953 | if (status < 0) | 955 | if (status < 0) | 
| 954 | mlog_errno(status); | 956 | mlog_errno(status); | 
| 955 | 957 | ||
| 956 | /* Someone in the cluster has voted to not wipe this | 958 | /* Someone in the cluster has disallowed a wipe of | 
| 957 | * inode, or it was never completely orphaned. Write | 959 | * this inode, or it was never completely | 
| 958 | * out the pages and exit now. */ | 960 | * orphaned. Write out the pages and exit now. */ | 
| 959 | ocfs2_cleanup_delete_inode(inode, 1); | 961 | ocfs2_cleanup_delete_inode(inode, 1); | 
| 960 | goto bail_unlock_inode; | 962 | goto bail_unlock_inode; | 
| 961 | } | 963 | } | 
| @@ -981,7 +983,7 @@ void ocfs2_delete_inode(struct inode *inode) | |||
| 981 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; | 983 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; | 
| 982 | 984 | ||
| 983 | bail_unlock_inode: | 985 | bail_unlock_inode: | 
| 984 | ocfs2_meta_unlock(inode, 1); | 986 | ocfs2_inode_unlock(inode, 1); | 
| 985 | brelse(di_bh); | 987 | brelse(di_bh); | 
| 986 | bail_unblock: | 988 | bail_unblock: | 
| 987 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); | 989 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); | 
| @@ -1008,15 +1010,14 @@ void ocfs2_clear_inode(struct inode *inode) | |||
| 1008 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 1010 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 
| 1009 | "Inode=%lu\n", inode->i_ino); | 1011 | "Inode=%lu\n", inode->i_ino); | 
| 1010 | 1012 | ||
| 1011 | /* For remove delete_inode vote, we hold open lock before, | 1013 | /* To preven remote deletes we hold open lock before, now it | 
| 1012 | * now it is time to unlock PR and EX open locks. */ | 1014 | * is time to unlock PR and EX open locks. */ | 
| 1013 | ocfs2_open_unlock(inode); | 1015 | ocfs2_open_unlock(inode); | 
| 1014 | 1016 | ||
| 1015 | /* Do these before all the other work so that we don't bounce | 1017 | /* Do these before all the other work so that we don't bounce | 
| 1016 | * the vote thread while waiting to destroy the locks. */ | 1018 | * the downconvert thread while waiting to destroy the locks. */ | 
| 1017 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 1019 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 
| 1018 | ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); | 1020 | ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); | 
| 1019 | ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); | ||
| 1020 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); | 1021 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); | 
| 1021 | 1022 | ||
| 1022 | /* We very well may get a clear_inode before all an inodes | 1023 | /* We very well may get a clear_inode before all an inodes | 
| @@ -1039,8 +1040,7 @@ void ocfs2_clear_inode(struct inode *inode) | |||
| 1039 | mlog_errno(status); | 1040 | mlog_errno(status); | 
| 1040 | 1041 | ||
| 1041 | ocfs2_lock_res_free(&oi->ip_rw_lockres); | 1042 | ocfs2_lock_res_free(&oi->ip_rw_lockres); | 
| 1042 | ocfs2_lock_res_free(&oi->ip_meta_lockres); | 1043 | ocfs2_lock_res_free(&oi->ip_inode_lockres); | 
| 1043 | ocfs2_lock_res_free(&oi->ip_data_lockres); | ||
| 1044 | ocfs2_lock_res_free(&oi->ip_open_lockres); | 1044 | ocfs2_lock_res_free(&oi->ip_open_lockres); | 
| 1045 | 1045 | ||
| 1046 | ocfs2_metadata_cache_purge(inode); | 1046 | ocfs2_metadata_cache_purge(inode); | 
| @@ -1184,15 +1184,15 @@ int ocfs2_inode_revalidate(struct dentry *dentry) | |||
| 1184 | } | 1184 | } | 
| 1185 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 1185 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 
| 1186 | 1186 | ||
| 1187 | /* Let ocfs2_meta_lock do the work of updating our struct | 1187 | /* Let ocfs2_inode_lock do the work of updating our struct | 
| 1188 | * inode for us. */ | 1188 | * inode for us. */ | 
| 1189 | status = ocfs2_meta_lock(inode, NULL, 0); | 1189 | status = ocfs2_inode_lock(inode, NULL, 0); | 
| 1190 | if (status < 0) { | 1190 | if (status < 0) { | 
| 1191 | if (status != -ENOENT) | 1191 | if (status != -ENOENT) | 
| 1192 | mlog_errno(status); | 1192 | mlog_errno(status); | 
| 1193 | goto bail; | 1193 | goto bail; | 
| 1194 | } | 1194 | } | 
| 1195 | ocfs2_meta_unlock(inode, 0); | 1195 | ocfs2_inode_unlock(inode, 0); | 
| 1196 | bail: | 1196 | bail: | 
| 1197 | mlog_exit(status); | 1197 | mlog_exit(status); | 
| 1198 | 1198 | ||
| diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 70e881c55536..390a85596aa0 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
| @@ -34,8 +34,7 @@ struct ocfs2_inode_info | |||
| 34 | u64 ip_blkno; | 34 | u64 ip_blkno; | 
| 35 | 35 | ||
| 36 | struct ocfs2_lock_res ip_rw_lockres; | 36 | struct ocfs2_lock_res ip_rw_lockres; | 
| 37 | struct ocfs2_lock_res ip_meta_lockres; | 37 | struct ocfs2_lock_res ip_inode_lockres; | 
| 38 | struct ocfs2_lock_res ip_data_lockres; | ||
| 39 | struct ocfs2_lock_res ip_open_lockres; | 38 | struct ocfs2_lock_res ip_open_lockres; | 
| 40 | 39 | ||
| 41 | /* protects allocation changes on this inode. */ | 40 | /* protects allocation changes on this inode. */ | 
| @@ -121,9 +120,10 @@ void ocfs2_delete_inode(struct inode *inode); | |||
| 121 | void ocfs2_drop_inode(struct inode *inode); | 120 | void ocfs2_drop_inode(struct inode *inode); | 
| 122 | 121 | ||
| 123 | /* Flags for ocfs2_iget() */ | 122 | /* Flags for ocfs2_iget() */ | 
| 124 | #define OCFS2_FI_FLAG_SYSFILE 0x4 | 123 | #define OCFS2_FI_FLAG_SYSFILE 0x1 | 
| 125 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x8 | 124 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2 | 
| 126 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags); | 125 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, | 
| 126 | int sysfile_type); | ||
| 127 | int ocfs2_inode_init_private(struct inode *inode); | 127 | int ocfs2_inode_init_private(struct inode *inode); | 
| 128 | int ocfs2_inode_revalidate(struct dentry *dentry); | 128 | int ocfs2_inode_revalidate(struct dentry *dentry); | 
| 129 | int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | 129 | int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | 
| diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 87dcece7e1b5..5177fba5162b 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | 20 | ||
| 21 | #include "ocfs2_fs.h" | 21 | #include "ocfs2_fs.h" | 
| 22 | #include "ioctl.h" | 22 | #include "ioctl.h" | 
| 23 | #include "resize.h" | ||
| 23 | 24 | ||
| 24 | #include <linux/ext2_fs.h> | 25 | #include <linux/ext2_fs.h> | 
| 25 | 26 | ||
| @@ -27,14 +28,14 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | |||
| 27 | { | 28 | { | 
| 28 | int status; | 29 | int status; | 
| 29 | 30 | ||
| 30 | status = ocfs2_meta_lock(inode, NULL, 0); | 31 | status = ocfs2_inode_lock(inode, NULL, 0); | 
| 31 | if (status < 0) { | 32 | if (status < 0) { | 
| 32 | mlog_errno(status); | 33 | mlog_errno(status); | 
| 33 | return status; | 34 | return status; | 
| 34 | } | 35 | } | 
| 35 | ocfs2_get_inode_flags(OCFS2_I(inode)); | 36 | ocfs2_get_inode_flags(OCFS2_I(inode)); | 
| 36 | *flags = OCFS2_I(inode)->ip_attr; | 37 | *flags = OCFS2_I(inode)->ip_attr; | 
| 37 | ocfs2_meta_unlock(inode, 0); | 38 | ocfs2_inode_unlock(inode, 0); | 
| 38 | 39 | ||
| 39 | mlog_exit(status); | 40 | mlog_exit(status); | 
| 40 | return status; | 41 | return status; | 
| @@ -52,7 +53,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
| 52 | 53 | ||
| 53 | mutex_lock(&inode->i_mutex); | 54 | mutex_lock(&inode->i_mutex); | 
| 54 | 55 | ||
| 55 | status = ocfs2_meta_lock(inode, &bh, 1); | 56 | status = ocfs2_inode_lock(inode, &bh, 1); | 
| 56 | if (status < 0) { | 57 | if (status < 0) { | 
| 57 | mlog_errno(status); | 58 | mlog_errno(status); | 
| 58 | goto bail; | 59 | goto bail; | 
| @@ -100,7 +101,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
| 100 | 101 | ||
| 101 | ocfs2_commit_trans(osb, handle); | 102 | ocfs2_commit_trans(osb, handle); | 
| 102 | bail_unlock: | 103 | bail_unlock: | 
| 103 | ocfs2_meta_unlock(inode, 1); | 104 | ocfs2_inode_unlock(inode, 1); | 
| 104 | bail: | 105 | bail: | 
| 105 | mutex_unlock(&inode->i_mutex); | 106 | mutex_unlock(&inode->i_mutex); | 
| 106 | 107 | ||
| @@ -115,8 +116,10 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
| 115 | unsigned int cmd, unsigned long arg) | 116 | unsigned int cmd, unsigned long arg) | 
| 116 | { | 117 | { | 
| 117 | unsigned int flags; | 118 | unsigned int flags; | 
| 119 | int new_clusters; | ||
| 118 | int status; | 120 | int status; | 
| 119 | struct ocfs2_space_resv sr; | 121 | struct ocfs2_space_resv sr; | 
| 122 | struct ocfs2_new_group_input input; | ||
| 120 | 123 | ||
| 121 | switch (cmd) { | 124 | switch (cmd) { | 
| 122 | case OCFS2_IOC_GETFLAGS: | 125 | case OCFS2_IOC_GETFLAGS: | 
| @@ -140,6 +143,23 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
| 140 | return -EFAULT; | 143 | return -EFAULT; | 
| 141 | 144 | ||
| 142 | return ocfs2_change_file_space(filp, cmd, &sr); | 145 | return ocfs2_change_file_space(filp, cmd, &sr); | 
| 146 | case OCFS2_IOC_GROUP_EXTEND: | ||
| 147 | if (!capable(CAP_SYS_RESOURCE)) | ||
| 148 | return -EPERM; | ||
| 149 | |||
| 150 | if (get_user(new_clusters, (int __user *)arg)) | ||
| 151 | return -EFAULT; | ||
| 152 | |||
| 153 | return ocfs2_group_extend(inode, new_clusters); | ||
| 154 | case OCFS2_IOC_GROUP_ADD: | ||
| 155 | case OCFS2_IOC_GROUP_ADD64: | ||
| 156 | if (!capable(CAP_SYS_RESOURCE)) | ||
| 157 | return -EPERM; | ||
| 158 | |||
| 159 | if (copy_from_user(&input, (int __user *) arg, sizeof(input))) | ||
| 160 | return -EFAULT; | ||
| 161 | |||
| 162 | return ocfs2_group_add(inode, &input); | ||
| 143 | default: | 163 | default: | 
| 144 | return -ENOTTY; | 164 | return -ENOTTY; | 
| 145 | } | 165 | } | 
| @@ -162,6 +182,9 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 162 | case OCFS2_IOC_RESVSP64: | 182 | case OCFS2_IOC_RESVSP64: | 
| 163 | case OCFS2_IOC_UNRESVSP: | 183 | case OCFS2_IOC_UNRESVSP: | 
| 164 | case OCFS2_IOC_UNRESVSP64: | 184 | case OCFS2_IOC_UNRESVSP64: | 
| 185 | case OCFS2_IOC_GROUP_EXTEND: | ||
| 186 | case OCFS2_IOC_GROUP_ADD: | ||
| 187 | case OCFS2_IOC_GROUP_ADD64: | ||
| 165 | break; | 188 | break; | 
| 166 | default: | 189 | default: | 
| 167 | return -ENOIOCTLCMD; | 190 | return -ENOIOCTLCMD; | 
| diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 8d81f6c1b877..f31c7e8c19c3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -44,7 +44,6 @@ | |||
| 44 | #include "localalloc.h" | 44 | #include "localalloc.h" | 
| 45 | #include "slot_map.h" | 45 | #include "slot_map.h" | 
| 46 | #include "super.h" | 46 | #include "super.h" | 
| 47 | #include "vote.h" | ||
| 48 | #include "sysfile.h" | 47 | #include "sysfile.h" | 
| 49 | 48 | ||
| 50 | #include "buffer_head_io.h" | 49 | #include "buffer_head_io.h" | 
| @@ -103,7 +102,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
| 103 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", | 102 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", | 
| 104 | journal->j_trans_id, flushed); | 103 | journal->j_trans_id, flushed); | 
| 105 | 104 | ||
| 106 | ocfs2_kick_vote_thread(osb); | 105 | ocfs2_wake_downconvert_thread(osb); | 
| 107 | wake_up(&journal->j_checkpointed); | 106 | wake_up(&journal->j_checkpointed); | 
| 108 | finally: | 107 | finally: | 
| 109 | mlog_exit(status); | 108 | mlog_exit(status); | 
| @@ -314,14 +313,18 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
| 314 | return err; | 313 | return err; | 
| 315 | } | 314 | } | 
| 316 | 315 | ||
| 317 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * 5) | 316 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) | 
| 318 | 317 | ||
| 319 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 318 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 
| 320 | { | 319 | { | 
| 321 | journal_t *journal = osb->journal->j_journal; | 320 | journal_t *journal = osb->journal->j_journal; | 
| 321 | unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; | ||
| 322 | |||
| 323 | if (osb->osb_commit_interval) | ||
| 324 | commit_interval = osb->osb_commit_interval; | ||
| 322 | 325 | ||
| 323 | spin_lock(&journal->j_state_lock); | 326 | spin_lock(&journal->j_state_lock); | 
| 324 | journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; | 327 | journal->j_commit_interval = commit_interval; | 
| 325 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 328 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 
| 326 | journal->j_flags |= JFS_BARRIER; | 329 | journal->j_flags |= JFS_BARRIER; | 
| 327 | else | 330 | else | 
| @@ -337,7 +340,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
| 337 | struct ocfs2_dinode *di = NULL; | 340 | struct ocfs2_dinode *di = NULL; | 
| 338 | struct buffer_head *bh = NULL; | 341 | struct buffer_head *bh = NULL; | 
| 339 | struct ocfs2_super *osb; | 342 | struct ocfs2_super *osb; | 
| 340 | int meta_lock = 0; | 343 | int inode_lock = 0; | 
| 341 | 344 | ||
| 342 | mlog_entry_void(); | 345 | mlog_entry_void(); | 
| 343 | 346 | ||
| @@ -367,14 +370,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
| 367 | /* Skip recovery waits here - journal inode metadata never | 370 | /* Skip recovery waits here - journal inode metadata never | 
| 368 | * changes in a live cluster so it can be considered an | 371 | * changes in a live cluster so it can be considered an | 
| 369 | * exception to the rule. */ | 372 | * exception to the rule. */ | 
| 370 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 373 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 
| 371 | if (status < 0) { | 374 | if (status < 0) { | 
| 372 | if (status != -ERESTARTSYS) | 375 | if (status != -ERESTARTSYS) | 
| 373 | mlog(ML_ERROR, "Could not get lock on journal!\n"); | 376 | mlog(ML_ERROR, "Could not get lock on journal!\n"); | 
| 374 | goto done; | 377 | goto done; | 
| 375 | } | 378 | } | 
| 376 | 379 | ||
| 377 | meta_lock = 1; | 380 | inode_lock = 1; | 
| 378 | di = (struct ocfs2_dinode *)bh->b_data; | 381 | di = (struct ocfs2_dinode *)bh->b_data; | 
| 379 | 382 | ||
| 380 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { | 383 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { | 
| @@ -414,8 +417,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
| 414 | status = 0; | 417 | status = 0; | 
| 415 | done: | 418 | done: | 
| 416 | if (status < 0) { | 419 | if (status < 0) { | 
| 417 | if (meta_lock) | 420 | if (inode_lock) | 
| 418 | ocfs2_meta_unlock(inode, 1); | 421 | ocfs2_inode_unlock(inode, 1); | 
| 419 | if (bh != NULL) | 422 | if (bh != NULL) | 
| 420 | brelse(bh); | 423 | brelse(bh); | 
| 421 | if (inode) { | 424 | if (inode) { | 
| @@ -544,7 +547,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
| 544 | OCFS2_I(inode)->ip_open_count--; | 547 | OCFS2_I(inode)->ip_open_count--; | 
| 545 | 548 | ||
| 546 | /* unlock our journal */ | 549 | /* unlock our journal */ | 
| 547 | ocfs2_meta_unlock(inode, 1); | 550 | ocfs2_inode_unlock(inode, 1); | 
| 548 | 551 | ||
| 549 | brelse(journal->j_bh); | 552 | brelse(journal->j_bh); | 
| 550 | journal->j_bh = NULL; | 553 | journal->j_bh = NULL; | 
| @@ -883,8 +886,8 @@ restart: | |||
| 883 | ocfs2_super_unlock(osb, 1); | 886 | ocfs2_super_unlock(osb, 1); | 
| 884 | 887 | ||
| 885 | /* We always run recovery on our own orphan dir - the dead | 888 | /* We always run recovery on our own orphan dir - the dead | 
| 886 | * node(s) may have voted "no" on an inode delete earlier. A | 889 | * node(s) may have disallowd a previos inode delete. Re-processing | 
| 887 | * revote is therefore required. */ | 890 | * is therefore required. */ | 
| 888 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 891 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 
| 889 | NULL); | 892 | NULL); | 
| 890 | 893 | ||
| @@ -973,9 +976,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
| 973 | } | 976 | } | 
| 974 | SET_INODE_JOURNAL(inode); | 977 | SET_INODE_JOURNAL(inode); | 
| 975 | 978 | ||
| 976 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 979 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 
| 977 | if (status < 0) { | 980 | if (status < 0) { | 
| 978 | mlog(0, "status returned from ocfs2_meta_lock=%d\n", status); | 981 | mlog(0, "status returned from ocfs2_inode_lock=%d\n", status); | 
| 979 | if (status != -ERESTARTSYS) | 982 | if (status != -ERESTARTSYS) | 
| 980 | mlog(ML_ERROR, "Could not lock journal!\n"); | 983 | mlog(ML_ERROR, "Could not lock journal!\n"); | 
| 981 | goto done; | 984 | goto done; | 
| @@ -1047,7 +1050,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
| 1047 | done: | 1050 | done: | 
| 1048 | /* drop the lock on this nodes journal */ | 1051 | /* drop the lock on this nodes journal */ | 
| 1049 | if (got_lock) | 1052 | if (got_lock) | 
| 1050 | ocfs2_meta_unlock(inode, 1); | 1053 | ocfs2_inode_unlock(inode, 1); | 
| 1051 | 1054 | ||
| 1052 | if (inode) | 1055 | if (inode) | 
| 1053 | iput(inode); | 1056 | iput(inode); | 
| @@ -1162,14 +1165,14 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb, | |||
| 1162 | SET_INODE_JOURNAL(inode); | 1165 | SET_INODE_JOURNAL(inode); | 
| 1163 | 1166 | ||
| 1164 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; | 1167 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; | 
| 1165 | status = ocfs2_meta_lock_full(inode, NULL, 1, flags); | 1168 | status = ocfs2_inode_lock_full(inode, NULL, 1, flags); | 
| 1166 | if (status < 0) { | 1169 | if (status < 0) { | 
| 1167 | if (status != -EAGAIN) | 1170 | if (status != -EAGAIN) | 
| 1168 | mlog_errno(status); | 1171 | mlog_errno(status); | 
| 1169 | goto bail; | 1172 | goto bail; | 
| 1170 | } | 1173 | } | 
| 1171 | 1174 | ||
| 1172 | ocfs2_meta_unlock(inode, 1); | 1175 | ocfs2_inode_unlock(inode, 1); | 
| 1173 | bail: | 1176 | bail: | 
| 1174 | if (inode) | 1177 | if (inode) | 
| 1175 | iput(inode); | 1178 | iput(inode); | 
| @@ -1241,7 +1244,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, | |||
| 1241 | 1244 | ||
| 1242 | /* Skip bad inodes so that recovery can continue */ | 1245 | /* Skip bad inodes so that recovery can continue */ | 
| 1243 | iter = ocfs2_iget(p->osb, ino, | 1246 | iter = ocfs2_iget(p->osb, ino, | 
| 1244 | OCFS2_FI_FLAG_ORPHAN_RECOVERY); | 1247 | OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0); | 
| 1245 | if (IS_ERR(iter)) | 1248 | if (IS_ERR(iter)) | 
| 1246 | return 0; | 1249 | return 0; | 
| 1247 | 1250 | ||
| @@ -1277,7 +1280,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
| 1277 | } | 1280 | } | 
| 1278 | 1281 | ||
| 1279 | mutex_lock(&orphan_dir_inode->i_mutex); | 1282 | mutex_lock(&orphan_dir_inode->i_mutex); | 
| 1280 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0); | 1283 | status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); | 
| 1281 | if (status < 0) { | 1284 | if (status < 0) { | 
| 1282 | mlog_errno(status); | 1285 | mlog_errno(status); | 
| 1283 | goto out; | 1286 | goto out; | 
| @@ -1293,7 +1296,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
| 1293 | *head = priv.head; | 1296 | *head = priv.head; | 
| 1294 | 1297 | ||
| 1295 | out_cluster: | 1298 | out_cluster: | 
| 1296 | ocfs2_meta_unlock(orphan_dir_inode, 0); | 1299 | ocfs2_inode_unlock(orphan_dir_inode, 0); | 
| 1297 | out: | 1300 | out: | 
| 1298 | mutex_unlock(&orphan_dir_inode->i_mutex); | 1301 | mutex_unlock(&orphan_dir_inode->i_mutex); | 
| 1299 | iput(orphan_dir_inode); | 1302 | iput(orphan_dir_inode); | 
| @@ -1380,10 +1383,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1380 | iter = oi->ip_next_orphan; | 1383 | iter = oi->ip_next_orphan; | 
| 1381 | 1384 | ||
| 1382 | spin_lock(&oi->ip_lock); | 1385 | spin_lock(&oi->ip_lock); | 
| 1383 | /* Delete voting may have set these on the assumption | 1386 | /* The remote delete code may have set these on the | 
| 1384 | * that the other node would wipe them successfully. | 1387 | * assumption that the other node would wipe them | 
| 1385 | * If they are still in the node's orphan dir, we need | 1388 | * successfully. If they are still in the node's | 
| 1386 | * to reset that state. */ | 1389 | * orphan dir, we need to reset that state. */ | 
| 1387 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | 1390 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | 
| 1388 | 1391 | ||
| 1389 | /* Set the proper information to get us going into | 1392 | /* Set the proper information to get us going into | 
| diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 4b32e0961568..220f3e818e78 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -278,6 +278,12 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
| 278 | /* simple file updates like chmod, etc. */ | 278 | /* simple file updates like chmod, etc. */ | 
| 279 | #define OCFS2_INODE_UPDATE_CREDITS 1 | 279 | #define OCFS2_INODE_UPDATE_CREDITS 1 | 
| 280 | 280 | ||
| 281 | /* group extend. inode update and last group update. */ | ||
| 282 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
| 283 | |||
| 284 | /* group add. inode update and the new group update. */ | ||
| 285 | #define OCFS2_GROUP_ADD_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
| 286 | |||
| 281 | /* get one bit out of a suballocator: dinode + group descriptor + | 287 | /* get one bit out of a suballocator: dinode + group descriptor + | 
| 282 | * prev. group desc. if we relink. */ | 288 | * prev. group desc. if we relink. */ | 
| 283 | #define OCFS2_SUBALLOC_ALLOC (3) | 289 | #define OCFS2_SUBALLOC_ALLOC (3) | 
| diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 58ea88b5af36..add1ffdc5c6c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -75,18 +75,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
| 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 
| 76 | struct inode *local_alloc_inode); | 76 | struct inode *local_alloc_inode); | 
| 77 | 77 | ||
| 78 | /* | ||
| 79 | * Determine how large our local alloc window should be, in bits. | ||
| 80 | * | ||
| 81 | * These values (and the behavior in ocfs2_alloc_should_use_local) have | ||
| 82 | * been chosen so that most allocations, including new block groups go | ||
| 83 | * through local alloc. | ||
| 84 | */ | ||
| 85 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 78 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 
| 86 | { | 79 | { | 
| 87 | BUG_ON(osb->s_clustersize_bits < 12); | 80 | BUG_ON(osb->s_clustersize_bits > 20); | 
| 88 | 81 | ||
| 89 | return 2048 >> (osb->s_clustersize_bits - 12); | 82 | /* Size local alloc windows by the megabyte */ | 
| 83 | return osb->local_alloc_size << (20 - osb->s_clustersize_bits); | ||
| 90 | } | 84 | } | 
| 91 | 85 | ||
| 92 | /* | 86 | /* | 
| @@ -96,18 +90,23 @@ static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | |||
| 96 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 90 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 
| 97 | { | 91 | { | 
| 98 | int la_bits = ocfs2_local_alloc_window_bits(osb); | 92 | int la_bits = ocfs2_local_alloc_window_bits(osb); | 
| 93 | int ret = 0; | ||
| 99 | 94 | ||
| 100 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 95 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 
| 101 | return 0; | 96 | goto bail; | 
| 102 | 97 | ||
| 103 | /* la_bits should be at least twice the size (in clusters) of | 98 | /* la_bits should be at least twice the size (in clusters) of | 
| 104 | * a new block group. We want to be sure block group | 99 | * a new block group. We want to be sure block group | 
| 105 | * allocations go through the local alloc, so allow an | 100 | * allocations go through the local alloc, so allow an | 
| 106 | * allocation to take up to half the bitmap. */ | 101 | * allocation to take up to half the bitmap. */ | 
| 107 | if (bits > (la_bits / 2)) | 102 | if (bits > (la_bits / 2)) | 
| 108 | return 0; | 103 | goto bail; | 
| 109 | 104 | ||
| 110 | return 1; | 105 | ret = 1; | 
| 106 | bail: | ||
| 107 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | ||
| 108 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | ||
| 109 | return ret; | ||
| 111 | } | 110 | } | 
| 112 | 111 | ||
| 113 | int ocfs2_load_local_alloc(struct ocfs2_super *osb) | 112 | int ocfs2_load_local_alloc(struct ocfs2_super *osb) | 
| @@ -121,6 +120,19 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
| 121 | 120 | ||
| 122 | mlog_entry_void(); | 121 | mlog_entry_void(); | 
| 123 | 122 | ||
| 123 | if (ocfs2_mount_local(osb)) | ||
| 124 | goto bail; | ||
| 125 | |||
| 126 | if (osb->local_alloc_size == 0) | ||
| 127 | goto bail; | ||
| 128 | |||
| 129 | if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { | ||
| 130 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " | ||
| 131 | "than max possible %u. Using defaults.\n", | ||
| 132 | ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); | ||
| 133 | osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | ||
| 134 | } | ||
| 135 | |||
| 124 | /* read the alloc off disk */ | 136 | /* read the alloc off disk */ | 
| 125 | inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, | 137 | inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, | 
| 126 | osb->slot_num); | 138 | osb->slot_num); | 
| @@ -181,6 +193,9 @@ bail: | |||
| 181 | if (inode) | 193 | if (inode) | 
| 182 | iput(inode); | 194 | iput(inode); | 
| 183 | 195 | ||
| 196 | mlog(0, "Local alloc window bits = %d\n", | ||
| 197 | ocfs2_local_alloc_window_bits(osb)); | ||
| 198 | |||
| 184 | mlog_exit(status); | 199 | mlog_exit(status); | 
| 185 | return status; | 200 | return status; | 
| 186 | } | 201 | } | 
| @@ -231,7 +246,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
| 231 | 246 | ||
| 232 | mutex_lock(&main_bm_inode->i_mutex); | 247 | mutex_lock(&main_bm_inode->i_mutex); | 
| 233 | 248 | ||
| 234 | status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1); | 249 | status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | 
| 235 | if (status < 0) { | 250 | if (status < 0) { | 
| 236 | mlog_errno(status); | 251 | mlog_errno(status); | 
| 237 | goto out_mutex; | 252 | goto out_mutex; | 
| @@ -286,7 +301,7 @@ out_unlock: | |||
| 286 | if (main_bm_bh) | 301 | if (main_bm_bh) | 
| 287 | brelse(main_bm_bh); | 302 | brelse(main_bm_bh); | 
| 288 | 303 | ||
| 289 | ocfs2_meta_unlock(main_bm_inode, 1); | 304 | ocfs2_inode_unlock(main_bm_inode, 1); | 
| 290 | 305 | ||
| 291 | out_mutex: | 306 | out_mutex: | 
| 292 | mutex_unlock(&main_bm_inode->i_mutex); | 307 | mutex_unlock(&main_bm_inode->i_mutex); | 
| @@ -399,7 +414,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, | |||
| 399 | 414 | ||
| 400 | mutex_lock(&main_bm_inode->i_mutex); | 415 | mutex_lock(&main_bm_inode->i_mutex); | 
| 401 | 416 | ||
| 402 | status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1); | 417 | status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | 
| 403 | if (status < 0) { | 418 | if (status < 0) { | 
| 404 | mlog_errno(status); | 419 | mlog_errno(status); | 
| 405 | goto out_mutex; | 420 | goto out_mutex; | 
| @@ -424,7 +439,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, | |||
| 424 | ocfs2_commit_trans(osb, handle); | 439 | ocfs2_commit_trans(osb, handle); | 
| 425 | 440 | ||
| 426 | out_unlock: | 441 | out_unlock: | 
| 427 | ocfs2_meta_unlock(main_bm_inode, 1); | 442 | ocfs2_inode_unlock(main_bm_inode, 1); | 
| 428 | 443 | ||
| 429 | out_mutex: | 444 | out_mutex: | 
| 430 | mutex_unlock(&main_bm_inode->i_mutex); | 445 | mutex_unlock(&main_bm_inode->i_mutex); | 
| @@ -521,6 +536,9 @@ bail: | |||
| 521 | iput(local_alloc_inode); | 536 | iput(local_alloc_inode); | 
| 522 | } | 537 | } | 
| 523 | 538 | ||
| 539 | mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, | ||
| 540 | status); | ||
| 541 | |||
| 524 | mlog_exit(status); | 542 | mlog_exit(status); | 
| 525 | return status; | 543 | return status; | 
| 526 | } | 544 | } | 
| diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c new file mode 100644 index 000000000000..203f87143877 --- /dev/null +++ b/fs/ocfs2/locks.c | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * locks.c | ||
| 5 | * | ||
| 6 | * Userspace file locking support | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation; either | ||
| 13 | * version 2 of the License, or (at your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public | ||
| 21 | * License along with this program; if not, write to the | ||
| 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 23 | * Boston, MA 021110-1307, USA. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/fs.h> | ||
| 27 | |||
| 28 | #define MLOG_MASK_PREFIX ML_INODE | ||
| 29 | #include <cluster/masklog.h> | ||
| 30 | |||
| 31 | #include "ocfs2.h" | ||
| 32 | |||
| 33 | #include "dlmglue.h" | ||
| 34 | #include "file.h" | ||
| 35 | #include "locks.h" | ||
| 36 | |||
| 37 | static int ocfs2_do_flock(struct file *file, struct inode *inode, | ||
| 38 | int cmd, struct file_lock *fl) | ||
| 39 | { | ||
| 40 | int ret = 0, level = 0, trylock = 0; | ||
| 41 | struct ocfs2_file_private *fp = file->private_data; | ||
| 42 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
| 43 | |||
| 44 | if (fl->fl_type == F_WRLCK) | ||
| 45 | level = 1; | ||
| 46 | if (!IS_SETLKW(cmd)) | ||
| 47 | trylock = 1; | ||
| 48 | |||
| 49 | mutex_lock(&fp->fp_mutex); | ||
| 50 | |||
| 51 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | ||
| 52 | lockres->l_level > LKM_NLMODE) { | ||
| 53 | int old_level = 0; | ||
| 54 | |||
| 55 | if (lockres->l_level == LKM_EXMODE) | ||
| 56 | old_level = 1; | ||
| 57 | |||
| 58 | if (level == old_level) | ||
| 59 | goto out; | ||
| 60 | |||
| 61 | /* | ||
| 62 | * Converting an existing lock is not guaranteed to be | ||
| 63 | * atomic, so we can get away with simply unlocking | ||
| 64 | * here and allowing the lock code to try at the new | ||
| 65 | * level. | ||
| 66 | */ | ||
| 67 | |||
| 68 | flock_lock_file_wait(file, | ||
| 69 | &(struct file_lock){.fl_type = F_UNLCK}); | ||
| 70 | |||
| 71 | ocfs2_file_unlock(file); | ||
| 72 | } | ||
| 73 | |||
| 74 | ret = ocfs2_file_lock(file, level, trylock); | ||
| 75 | if (ret) { | ||
| 76 | if (ret == -EAGAIN && trylock) | ||
| 77 | ret = -EWOULDBLOCK; | ||
| 78 | else | ||
| 79 | mlog_errno(ret); | ||
| 80 | goto out; | ||
| 81 | } | ||
| 82 | |||
| 83 | ret = flock_lock_file_wait(file, fl); | ||
| 84 | |||
| 85 | out: | ||
| 86 | mutex_unlock(&fp->fp_mutex); | ||
| 87 | |||
| 88 | return ret; | ||
| 89 | } | ||
| 90 | |||
| 91 | static int ocfs2_do_funlock(struct file *file, int cmd, struct file_lock *fl) | ||
| 92 | { | ||
| 93 | int ret; | ||
| 94 | struct ocfs2_file_private *fp = file->private_data; | ||
| 95 | |||
| 96 | mutex_lock(&fp->fp_mutex); | ||
| 97 | ocfs2_file_unlock(file); | ||
| 98 | ret = flock_lock_file_wait(file, fl); | ||
| 99 | mutex_unlock(&fp->fp_mutex); | ||
| 100 | |||
| 101 | return ret; | ||
| 102 | } | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Overall flow of ocfs2_flock() was influenced by gfs2_flock(). | ||
| 106 | */ | ||
| 107 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) | ||
| 108 | { | ||
| 109 | struct inode *inode = file->f_mapping->host; | ||
| 110 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 111 | |||
| 112 | if (!(fl->fl_flags & FL_FLOCK)) | ||
| 113 | return -ENOLCK; | ||
| 114 | if (__mandatory_lock(inode)) | ||
| 115 | return -ENOLCK; | ||
| 116 | |||
| 117 | if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || | ||
| 118 | ocfs2_mount_local(osb)) | ||
| 119 | return flock_lock_file_wait(file, fl); | ||
| 120 | |||
| 121 | if (fl->fl_type == F_UNLCK) | ||
| 122 | return ocfs2_do_funlock(file, cmd, fl); | ||
| 123 | else | ||
| 124 | return ocfs2_do_flock(file, inode, cmd, fl); | ||
| 125 | } | ||
| diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/locks.h index 9ea46f62de31..9743ef2324ec 100644 --- a/fs/ocfs2/vote.h +++ b/fs/ocfs2/locks.h | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- | 
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 
| 3 | * | 3 | * | 
| 4 | * vote.h | 4 | * locks.h | 
| 5 | * | 5 | * | 
| 6 | * description here | 6 | * Function prototypes for Userspace file locking support | 
| 7 | * | 7 | * | 
| 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 
| 9 | * | 9 | * | 
| @@ -23,26 +23,9 @@ | |||
| 23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. | 
| 24 | */ | 24 | */ | 
| 25 | 25 | ||
| 26 | #ifndef OCFS2_LOCKS_H | ||
| 27 | #define OCFS2_LOCKS_H | ||
| 26 | 28 | ||
| 27 | #ifndef VOTE_H | 29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); | 
| 28 | #define VOTE_H | ||
| 29 | 30 | ||
| 30 | int ocfs2_vote_thread(void *arg); | 31 | #endif /* OCFS2_LOCKS_H */ | 
| 31 | static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb) | ||
| 32 | { | ||
| 33 | spin_lock(&osb->vote_task_lock); | ||
| 34 | /* make sure the voting thread gets a swipe at whatever changes | ||
| 35 | * the caller may have made to the voting state */ | ||
| 36 | osb->vote_wake_sequence++; | ||
| 37 | spin_unlock(&osb->vote_task_lock); | ||
| 38 | wake_up(&osb->vote_event); | ||
| 39 | } | ||
| 40 | |||
| 41 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); | ||
| 42 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); | ||
| 43 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); | ||
| 44 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb); | ||
| 45 | |||
| 46 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
| 47 | int node_num); | ||
| 48 | #endif | ||
| diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 98756156d298..3dc18d67557c 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
| @@ -168,7 +168,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
| 168 | * node. Taking the data lock will also ensure that we don't | 168 | * node. Taking the data lock will also ensure that we don't | 
| 169 | * attempt page truncation as part of a downconvert. | 169 | * attempt page truncation as part of a downconvert. | 
| 170 | */ | 170 | */ | 
| 171 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 171 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 
| 172 | if (ret < 0) { | 172 | if (ret < 0) { | 
| 173 | mlog_errno(ret); | 173 | mlog_errno(ret); | 
| 174 | goto out; | 174 | goto out; | 
| @@ -181,21 +181,12 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
| 181 | */ | 181 | */ | 
| 182 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 182 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 183 | 183 | ||
| 184 | ret = ocfs2_data_lock(inode, 1); | ||
| 185 | if (ret < 0) { | ||
| 186 | mlog_errno(ret); | ||
| 187 | goto out_meta_unlock; | ||
| 188 | } | ||
| 189 | |||
| 190 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 184 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 
| 191 | 185 | ||
| 192 | ocfs2_data_unlock(inode, 1); | ||
| 193 | |||
| 194 | out_meta_unlock: | ||
| 195 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 186 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 
| 196 | 187 | ||
| 197 | brelse(di_bh); | 188 | brelse(di_bh); | 
| 198 | ocfs2_meta_unlock(inode, 1); | 189 | ocfs2_inode_unlock(inode, 1); | 
| 199 | 190 | ||
| 200 | out: | 191 | out: | 
| 201 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | 192 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | 
| @@ -214,13 +205,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 214 | { | 205 | { | 
| 215 | int ret = 0, lock_level = 0; | 206 | int ret = 0, lock_level = 0; | 
| 216 | 207 | ||
| 217 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, | 208 | ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode, | 
| 218 | file->f_vfsmnt, &lock_level); | 209 | file->f_vfsmnt, &lock_level); | 
| 219 | if (ret < 0) { | 210 | if (ret < 0) { | 
| 220 | mlog_errno(ret); | 211 | mlog_errno(ret); | 
| 221 | goto out; | 212 | goto out; | 
| 222 | } | 213 | } | 
| 223 | ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); | 214 | ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); | 
| 224 | out: | 215 | out: | 
| 225 | vma->vm_ops = &ocfs2_file_vm_ops; | 216 | vma->vm_ops = &ocfs2_file_vm_ops; | 
| 226 | vma->vm_flags |= VM_CAN_NONLINEAR; | 217 | vma->vm_flags |= VM_CAN_NONLINEAR; | 
| diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 989ac2718587..ae9ad9587516 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -60,7 +60,6 @@ | |||
| 60 | #include "symlink.h" | 60 | #include "symlink.h" | 
| 61 | #include "sysfile.h" | 61 | #include "sysfile.h" | 
| 62 | #include "uptodate.h" | 62 | #include "uptodate.h" | 
| 63 | #include "vote.h" | ||
| 64 | 63 | ||
| 65 | #include "buffer_head_io.h" | 64 | #include "buffer_head_io.h" | 
| 66 | 65 | ||
| @@ -116,7 +115,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
| 116 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, | 115 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, | 
| 117 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 116 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 
| 118 | 117 | ||
| 119 | status = ocfs2_meta_lock(dir, NULL, 0); | 118 | status = ocfs2_inode_lock(dir, NULL, 0); | 
| 120 | if (status < 0) { | 119 | if (status < 0) { | 
| 121 | if (status != -ENOENT) | 120 | if (status != -ENOENT) | 
| 122 | mlog_errno(status); | 121 | mlog_errno(status); | 
| @@ -129,7 +128,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
| 129 | if (status < 0) | 128 | if (status < 0) | 
| 130 | goto bail_add; | 129 | goto bail_add; | 
| 131 | 130 | ||
| 132 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); | 131 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); | 
| 133 | if (IS_ERR(inode)) { | 132 | if (IS_ERR(inode)) { | 
| 134 | ret = ERR_PTR(-EACCES); | 133 | ret = ERR_PTR(-EACCES); | 
| 135 | goto bail_unlock; | 134 | goto bail_unlock; | 
| @@ -176,8 +175,8 @@ bail_unlock: | |||
| 176 | /* Don't drop the cluster lock until *after* the d_add -- | 175 | /* Don't drop the cluster lock until *after* the d_add -- | 
| 177 | * unlink on another node will message us to remove that | 176 | * unlink on another node will message us to remove that | 
| 178 | * dentry under this lock so otherwise we can race this with | 177 | * dentry under this lock so otherwise we can race this with | 
| 179 | * the vote thread and have a stale dentry. */ | 178 | * the downconvert thread and have a stale dentry. */ | 
| 180 | ocfs2_meta_unlock(dir, 0); | 179 | ocfs2_inode_unlock(dir, 0); | 
| 181 | 180 | ||
| 182 | bail: | 181 | bail: | 
| 183 | 182 | ||
| @@ -209,7 +208,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
| 209 | /* get our super block */ | 208 | /* get our super block */ | 
| 210 | osb = OCFS2_SB(dir->i_sb); | 209 | osb = OCFS2_SB(dir->i_sb); | 
| 211 | 210 | ||
| 212 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 211 | status = ocfs2_inode_lock(dir, &parent_fe_bh, 1); | 
| 213 | if (status < 0) { | 212 | if (status < 0) { | 
| 214 | if (status != -ENOENT) | 213 | if (status != -ENOENT) | 
| 215 | mlog_errno(status); | 214 | mlog_errno(status); | 
| @@ -323,7 +322,7 @@ leave: | |||
| 323 | if (handle) | 322 | if (handle) | 
| 324 | ocfs2_commit_trans(osb, handle); | 323 | ocfs2_commit_trans(osb, handle); | 
| 325 | 324 | ||
| 326 | ocfs2_meta_unlock(dir, 1); | 325 | ocfs2_inode_unlock(dir, 1); | 
| 327 | 326 | ||
| 328 | if (status == -ENOSPC) | 327 | if (status == -ENOSPC) | 
| 329 | mlog(0, "Disk is full\n"); | 328 | mlog(0, "Disk is full\n"); | 
| @@ -553,7 +552,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
| 553 | if (S_ISDIR(inode->i_mode)) | 552 | if (S_ISDIR(inode->i_mode)) | 
| 554 | return -EPERM; | 553 | return -EPERM; | 
| 555 | 554 | ||
| 556 | err = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 555 | err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); | 
| 557 | if (err < 0) { | 556 | if (err < 0) { | 
| 558 | if (err != -ENOENT) | 557 | if (err != -ENOENT) | 
| 559 | mlog_errno(err); | 558 | mlog_errno(err); | 
| @@ -578,7 +577,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
| 578 | goto out; | 577 | goto out; | 
| 579 | } | 578 | } | 
| 580 | 579 | ||
| 581 | err = ocfs2_meta_lock(inode, &fe_bh, 1); | 580 | err = ocfs2_inode_lock(inode, &fe_bh, 1); | 
| 582 | if (err < 0) { | 581 | if (err < 0) { | 
| 583 | if (err != -ENOENT) | 582 | if (err != -ENOENT) | 
| 584 | mlog_errno(err); | 583 | mlog_errno(err); | 
| @@ -643,10 +642,10 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
| 643 | out_commit: | 642 | out_commit: | 
| 644 | ocfs2_commit_trans(osb, handle); | 643 | ocfs2_commit_trans(osb, handle); | 
| 645 | out_unlock_inode: | 644 | out_unlock_inode: | 
| 646 | ocfs2_meta_unlock(inode, 1); | 645 | ocfs2_inode_unlock(inode, 1); | 
| 647 | 646 | ||
| 648 | out: | 647 | out: | 
| 649 | ocfs2_meta_unlock(dir, 1); | 648 | ocfs2_inode_unlock(dir, 1); | 
| 650 | 649 | ||
| 651 | if (de_bh) | 650 | if (de_bh) | 
| 652 | brelse(de_bh); | 651 | brelse(de_bh); | 
| @@ -720,7 +719,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 720 | return -EPERM; | 719 | return -EPERM; | 
| 721 | } | 720 | } | 
| 722 | 721 | ||
| 723 | status = ocfs2_meta_lock(dir, &parent_node_bh, 1); | 722 | status = ocfs2_inode_lock(dir, &parent_node_bh, 1); | 
| 724 | if (status < 0) { | 723 | if (status < 0) { | 
| 725 | if (status != -ENOENT) | 724 | if (status != -ENOENT) | 
| 726 | mlog_errno(status); | 725 | mlog_errno(status); | 
| @@ -745,7 +744,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 745 | goto leave; | 744 | goto leave; | 
| 746 | } | 745 | } | 
| 747 | 746 | ||
| 748 | status = ocfs2_meta_lock(inode, &fe_bh, 1); | 747 | status = ocfs2_inode_lock(inode, &fe_bh, 1); | 
| 749 | if (status < 0) { | 748 | if (status < 0) { | 
| 750 | if (status != -ENOENT) | 749 | if (status != -ENOENT) | 
| 751 | mlog_errno(status); | 750 | mlog_errno(status); | 
| @@ -765,7 +764,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 765 | 764 | ||
| 766 | status = ocfs2_remote_dentry_delete(dentry); | 765 | status = ocfs2_remote_dentry_delete(dentry); | 
| 767 | if (status < 0) { | 766 | if (status < 0) { | 
| 768 | /* This vote should succeed under all normal | 767 | /* This remote delete should succeed under all normal | 
| 769 | * circumstances. */ | 768 | * circumstances. */ | 
| 770 | mlog_errno(status); | 769 | mlog_errno(status); | 
| 771 | goto leave; | 770 | goto leave; | 
| @@ -841,13 +840,13 @@ leave: | |||
| 841 | ocfs2_commit_trans(osb, handle); | 840 | ocfs2_commit_trans(osb, handle); | 
| 842 | 841 | ||
| 843 | if (child_locked) | 842 | if (child_locked) | 
| 844 | ocfs2_meta_unlock(inode, 1); | 843 | ocfs2_inode_unlock(inode, 1); | 
| 845 | 844 | ||
| 846 | ocfs2_meta_unlock(dir, 1); | 845 | ocfs2_inode_unlock(dir, 1); | 
| 847 | 846 | ||
| 848 | if (orphan_dir) { | 847 | if (orphan_dir) { | 
| 849 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 848 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 
| 850 | ocfs2_meta_unlock(orphan_dir, 1); | 849 | ocfs2_inode_unlock(orphan_dir, 1); | 
| 851 | mutex_unlock(&orphan_dir->i_mutex); | 850 | mutex_unlock(&orphan_dir->i_mutex); | 
| 852 | iput(orphan_dir); | 851 | iput(orphan_dir); | 
| 853 | } | 852 | } | 
| @@ -908,7 +907,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
| 908 | inode1 = tmpinode; | 907 | inode1 = tmpinode; | 
| 909 | } | 908 | } | 
| 910 | /* lock id2 */ | 909 | /* lock id2 */ | 
| 911 | status = ocfs2_meta_lock(inode2, bh2, 1); | 910 | status = ocfs2_inode_lock(inode2, bh2, 1); | 
| 912 | if (status < 0) { | 911 | if (status < 0) { | 
| 913 | if (status != -ENOENT) | 912 | if (status != -ENOENT) | 
| 914 | mlog_errno(status); | 913 | mlog_errno(status); | 
| @@ -917,14 +916,14 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
| 917 | } | 916 | } | 
| 918 | 917 | ||
| 919 | /* lock id1 */ | 918 | /* lock id1 */ | 
| 920 | status = ocfs2_meta_lock(inode1, bh1, 1); | 919 | status = ocfs2_inode_lock(inode1, bh1, 1); | 
| 921 | if (status < 0) { | 920 | if (status < 0) { | 
| 922 | /* | 921 | /* | 
| 923 | * An error return must mean that no cluster locks | 922 | * An error return must mean that no cluster locks | 
| 924 | * were held on function exit. | 923 | * were held on function exit. | 
| 925 | */ | 924 | */ | 
| 926 | if (oi1->ip_blkno != oi2->ip_blkno) | 925 | if (oi1->ip_blkno != oi2->ip_blkno) | 
| 927 | ocfs2_meta_unlock(inode2, 1); | 926 | ocfs2_inode_unlock(inode2, 1); | 
| 928 | 927 | ||
| 929 | if (status != -ENOENT) | 928 | if (status != -ENOENT) | 
| 930 | mlog_errno(status); | 929 | mlog_errno(status); | 
| @@ -937,10 +936,10 @@ bail: | |||
| 937 | 936 | ||
| 938 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) | 937 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) | 
| 939 | { | 938 | { | 
| 940 | ocfs2_meta_unlock(inode1, 1); | 939 | ocfs2_inode_unlock(inode1, 1); | 
| 941 | 940 | ||
| 942 | if (inode1 != inode2) | 941 | if (inode1 != inode2) | 
| 943 | ocfs2_meta_unlock(inode2, 1); | 942 | ocfs2_inode_unlock(inode2, 1); | 
| 944 | } | 943 | } | 
| 945 | 944 | ||
| 946 | static int ocfs2_rename(struct inode *old_dir, | 945 | static int ocfs2_rename(struct inode *old_dir, | 
| @@ -1031,10 +1030,11 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1031 | 1030 | ||
| 1032 | /* | 1031 | /* | 
| 1033 | * Aside from allowing a meta data update, the locking here | 1032 | * Aside from allowing a meta data update, the locking here | 
| 1034 | * also ensures that the vote thread on other nodes won't have | 1033 | * also ensures that the downconvert thread on other nodes | 
| 1035 | * to concurrently downconvert the inode and the dentry locks. | 1034 | * won't have to concurrently downconvert the inode and the | 
| 1035 | * dentry locks. | ||
| 1036 | */ | 1036 | */ | 
| 1037 | status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); | 1037 | status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); | 
| 1038 | if (status < 0) { | 1038 | if (status < 0) { | 
| 1039 | if (status != -ENOENT) | 1039 | if (status != -ENOENT) | 
| 1040 | mlog_errno(status); | 1040 | mlog_errno(status); | 
| @@ -1143,7 +1143,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1143 | goto bail; | 1143 | goto bail; | 
| 1144 | } | 1144 | } | 
| 1145 | 1145 | ||
| 1146 | status = ocfs2_meta_lock(new_inode, &newfe_bh, 1); | 1146 | status = ocfs2_inode_lock(new_inode, &newfe_bh, 1); | 
| 1147 | if (status < 0) { | 1147 | if (status < 0) { | 
| 1148 | if (status != -ENOENT) | 1148 | if (status != -ENOENT) | 
| 1149 | mlog_errno(status); | 1149 | mlog_errno(status); | 
| @@ -1355,14 +1355,14 @@ bail: | |||
| 1355 | ocfs2_double_unlock(old_dir, new_dir); | 1355 | ocfs2_double_unlock(old_dir, new_dir); | 
| 1356 | 1356 | ||
| 1357 | if (old_child_locked) | 1357 | if (old_child_locked) | 
| 1358 | ocfs2_meta_unlock(old_inode, 1); | 1358 | ocfs2_inode_unlock(old_inode, 1); | 
| 1359 | 1359 | ||
| 1360 | if (new_child_locked) | 1360 | if (new_child_locked) | 
| 1361 | ocfs2_meta_unlock(new_inode, 1); | 1361 | ocfs2_inode_unlock(new_inode, 1); | 
| 1362 | 1362 | ||
| 1363 | if (orphan_dir) { | 1363 | if (orphan_dir) { | 
| 1364 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 1364 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 
| 1365 | ocfs2_meta_unlock(orphan_dir, 1); | 1365 | ocfs2_inode_unlock(orphan_dir, 1); | 
| 1366 | mutex_unlock(&orphan_dir->i_mutex); | 1366 | mutex_unlock(&orphan_dir->i_mutex); | 
| 1367 | iput(orphan_dir); | 1367 | iput(orphan_dir); | 
| 1368 | } | 1368 | } | 
| @@ -1530,7 +1530,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
| 1530 | credits = ocfs2_calc_symlink_credits(sb); | 1530 | credits = ocfs2_calc_symlink_credits(sb); | 
| 1531 | 1531 | ||
| 1532 | /* lock the parent directory */ | 1532 | /* lock the parent directory */ | 
| 1533 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 1533 | status = ocfs2_inode_lock(dir, &parent_fe_bh, 1); | 
| 1534 | if (status < 0) { | 1534 | if (status < 0) { | 
| 1535 | if (status != -ENOENT) | 1535 | if (status != -ENOENT) | 
| 1536 | mlog_errno(status); | 1536 | mlog_errno(status); | 
| @@ -1657,7 +1657,7 @@ bail: | |||
| 1657 | if (handle) | 1657 | if (handle) | 
| 1658 | ocfs2_commit_trans(osb, handle); | 1658 | ocfs2_commit_trans(osb, handle); | 
| 1659 | 1659 | ||
| 1660 | ocfs2_meta_unlock(dir, 1); | 1660 | ocfs2_inode_unlock(dir, 1); | 
| 1661 | 1661 | ||
| 1662 | if (new_fe_bh) | 1662 | if (new_fe_bh) | 
| 1663 | brelse(new_fe_bh); | 1663 | brelse(new_fe_bh); | 
| @@ -1735,7 +1735,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
| 1735 | 1735 | ||
| 1736 | mutex_lock(&orphan_dir_inode->i_mutex); | 1736 | mutex_lock(&orphan_dir_inode->i_mutex); | 
| 1737 | 1737 | ||
| 1738 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 1738 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 
| 1739 | if (status < 0) { | 1739 | if (status < 0) { | 
| 1740 | mlog_errno(status); | 1740 | mlog_errno(status); | 
| 1741 | goto leave; | 1741 | goto leave; | 
| @@ -1745,7 +1745,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
| 1745 | orphan_dir_bh, name, | 1745 | orphan_dir_bh, name, | 
| 1746 | OCFS2_ORPHAN_NAMELEN, de_bh); | 1746 | OCFS2_ORPHAN_NAMELEN, de_bh); | 
| 1747 | if (status < 0) { | 1747 | if (status < 0) { | 
| 1748 | ocfs2_meta_unlock(orphan_dir_inode, 1); | 1748 | ocfs2_inode_unlock(orphan_dir_inode, 1); | 
| 1749 | 1749 | ||
| 1750 | mlog_errno(status); | 1750 | mlog_errno(status); | 
| 1751 | goto leave; | 1751 | goto leave; | 
| diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 60a23e1906b0..d08480580470 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -101,6 +101,7 @@ enum ocfs2_unlock_action { | |||
| 101 | * about to be | 101 | * about to be | 
| 102 | * dropped. */ | 102 | * dropped. */ | 
| 103 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 103 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 
| 104 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ | ||
| 104 | 105 | ||
| 105 | struct ocfs2_lock_res_ops; | 106 | struct ocfs2_lock_res_ops; | 
| 106 | 107 | ||
| @@ -170,6 +171,7 @@ enum ocfs2_mount_options | |||
| 170 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 171 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 
| 171 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 172 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 
| 172 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 173 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 
| 174 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | ||
| 173 | }; | 175 | }; | 
| 174 | 176 | ||
| 175 | #define OCFS2_OSB_SOFT_RO 0x0001 | 177 | #define OCFS2_OSB_SOFT_RO 0x0001 | 
| @@ -189,9 +191,7 @@ struct ocfs2_super | |||
| 189 | struct ocfs2_slot_info *slot_info; | 191 | struct ocfs2_slot_info *slot_info; | 
| 190 | 192 | ||
| 191 | spinlock_t node_map_lock; | 193 | spinlock_t node_map_lock; | 
| 192 | struct ocfs2_node_map mounted_map; | ||
| 193 | struct ocfs2_node_map recovery_map; | 194 | struct ocfs2_node_map recovery_map; | 
| 194 | struct ocfs2_node_map umount_map; | ||
| 195 | 195 | ||
| 196 | u64 root_blkno; | 196 | u64 root_blkno; | 
| 197 | u64 system_dir_blkno; | 197 | u64 system_dir_blkno; | 
| @@ -231,7 +231,9 @@ struct ocfs2_super | |||
| 231 | wait_queue_head_t checkpoint_event; | 231 | wait_queue_head_t checkpoint_event; | 
| 232 | atomic_t needs_checkpoint; | 232 | atomic_t needs_checkpoint; | 
| 233 | struct ocfs2_journal *journal; | 233 | struct ocfs2_journal *journal; | 
| 234 | unsigned long osb_commit_interval; | ||
| 234 | 235 | ||
| 236 | int local_alloc_size; | ||
| 235 | enum ocfs2_local_alloc_state local_alloc_state; | 237 | enum ocfs2_local_alloc_state local_alloc_state; | 
| 236 | struct buffer_head *local_alloc_bh; | 238 | struct buffer_head *local_alloc_bh; | 
| 237 | u64 la_last_gd; | 239 | u64 la_last_gd; | 
| @@ -254,28 +256,21 @@ struct ocfs2_super | |||
| 254 | 256 | ||
| 255 | wait_queue_head_t recovery_event; | 257 | wait_queue_head_t recovery_event; | 
| 256 | 258 | ||
| 257 | spinlock_t vote_task_lock; | 259 | spinlock_t dc_task_lock; | 
| 258 | struct task_struct *vote_task; | 260 | struct task_struct *dc_task; | 
| 259 | wait_queue_head_t vote_event; | 261 | wait_queue_head_t dc_event; | 
| 260 | unsigned long vote_wake_sequence; | 262 | unsigned long dc_wake_sequence; | 
| 261 | unsigned long vote_work_sequence; | 263 | unsigned long dc_work_sequence; | 
| 262 | 264 | ||
| 265 | /* | ||
| 266 | * Any thread can add locks to the list, but the downconvert | ||
| 267 | * thread is the only one allowed to remove locks. Any change | ||
| 268 | * to this rule requires updating | ||
| 269 | * ocfs2_downconvert_thread_do_work(). | ||
| 270 | */ | ||
| 263 | struct list_head blocked_lock_list; | 271 | struct list_head blocked_lock_list; | 
| 264 | unsigned long blocked_lock_count; | 272 | unsigned long blocked_lock_count; | 
| 265 | 273 | ||
| 266 | struct list_head vote_list; | ||
| 267 | int vote_count; | ||
| 268 | |||
| 269 | u32 net_key; | ||
| 270 | spinlock_t net_response_lock; | ||
| 271 | unsigned int net_response_ids; | ||
| 272 | struct list_head net_response_list; | ||
| 273 | |||
| 274 | struct o2hb_callback_func osb_hb_up; | ||
| 275 | struct o2hb_callback_func osb_hb_down; | ||
| 276 | |||
| 277 | struct list_head osb_net_handlers; | ||
| 278 | |||
| 279 | wait_queue_head_t osb_mount_event; | 274 | wait_queue_head_t osb_mount_event; | 
| 280 | 275 | ||
| 281 | /* Truncate log info */ | 276 | /* Truncate log info */ | 
| diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 6ef876759a73..3633edd3982f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
| @@ -231,6 +231,20 @@ struct ocfs2_space_resv { | |||
| 231 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | 231 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | 
| 232 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | 232 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | 
| 233 | 233 | ||
| 234 | /* Used to pass group descriptor data when online resize is done */ | ||
| 235 | struct ocfs2_new_group_input { | ||
| 236 | __u64 group; /* Group descriptor's blkno. */ | ||
| 237 | __u32 clusters; /* Total number of clusters in this group */ | ||
| 238 | __u32 frees; /* Total free clusters in this group */ | ||
| 239 | __u16 chain; /* Chain for this group */ | ||
| 240 | __u16 reserved1; | ||
| 241 | __u32 reserved2; | ||
| 242 | }; | ||
| 243 | |||
| 244 | #define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) | ||
| 245 | #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) | ||
| 246 | #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) | ||
| 247 | |||
| 234 | /* | 248 | /* | 
| 235 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 249 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 
| 236 | */ | 250 | */ | 
| @@ -256,6 +270,14 @@ struct ocfs2_space_resv { | |||
| 256 | /* Journal limits (in bytes) */ | 270 | /* Journal limits (in bytes) */ | 
| 257 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 271 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 
| 258 | 272 | ||
| 273 | /* | ||
| 274 | * Default local alloc size (in megabytes) | ||
| 275 | * | ||
| 276 | * The value chosen should be such that most allocations, including new | ||
| 277 | * block groups, use local alloc. | ||
| 278 | */ | ||
| 279 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 | ||
| 280 | |||
| 259 | struct ocfs2_system_inode_info { | 281 | struct ocfs2_system_inode_info { | 
| 260 | char *si_name; | 282 | char *si_name; | 
| 261 | int si_iflags; | 283 | int si_iflags; | 
| diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 4ca02b1c38ac..86f3e3799c2b 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
| @@ -45,6 +45,7 @@ enum ocfs2_lock_type { | |||
| 45 | OCFS2_LOCK_TYPE_RW, | 45 | OCFS2_LOCK_TYPE_RW, | 
| 46 | OCFS2_LOCK_TYPE_DENTRY, | 46 | OCFS2_LOCK_TYPE_DENTRY, | 
| 47 | OCFS2_LOCK_TYPE_OPEN, | 47 | OCFS2_LOCK_TYPE_OPEN, | 
| 48 | OCFS2_LOCK_TYPE_FLOCK, | ||
| 48 | OCFS2_NUM_LOCK_TYPES | 49 | OCFS2_NUM_LOCK_TYPES | 
| 49 | }; | 50 | }; | 
| 50 | 51 | ||
| @@ -73,6 +74,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
| 73 | case OCFS2_LOCK_TYPE_OPEN: | 74 | case OCFS2_LOCK_TYPE_OPEN: | 
| 74 | c = 'O'; | 75 | c = 'O'; | 
| 75 | break; | 76 | break; | 
| 77 | case OCFS2_LOCK_TYPE_FLOCK: | ||
| 78 | c = 'F'; | ||
| 79 | break; | ||
| 76 | default: | 80 | default: | 
| 77 | c = '\0'; | 81 | c = '\0'; | 
| 78 | } | 82 | } | 
| @@ -90,6 +94,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
| 90 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | 94 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | 
| 91 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", | 95 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", | 
| 92 | [OCFS2_LOCK_TYPE_OPEN] = "Open", | 96 | [OCFS2_LOCK_TYPE_OPEN] = "Open", | 
| 97 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", | ||
| 93 | }; | 98 | }; | 
| 94 | 99 | ||
| 95 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 
| diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c new file mode 100644 index 000000000000..37835ffcb039 --- /dev/null +++ b/fs/ocfs2/resize.c | |||
| @@ -0,0 +1,634 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * resize.c | ||
| 5 | * | ||
| 6 | * volume resize. | ||
| 7 | * Inspired by ext3/resize.c. | ||
| 8 | * | ||
| 9 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or | ||
| 12 | * modify it under the terms of the GNU General Public | ||
| 13 | * License as published by the Free Software Foundation; either | ||
| 14 | * version 2 of the License, or (at your option) any later version. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 19 | * General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public | ||
| 22 | * License along with this program; if not, write to the | ||
| 23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 24 | * Boston, MA 021110-1307, USA. | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/fs.h> | ||
| 28 | #include <linux/types.h> | ||
| 29 | |||
| 30 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | ||
| 31 | #include <cluster/masklog.h> | ||
| 32 | |||
| 33 | #include "ocfs2.h" | ||
| 34 | |||
| 35 | #include "alloc.h" | ||
| 36 | #include "dlmglue.h" | ||
| 37 | #include "inode.h" | ||
| 38 | #include "journal.h" | ||
| 39 | #include "super.h" | ||
| 40 | #include "sysfile.h" | ||
| 41 | #include "uptodate.h" | ||
| 42 | |||
| 43 | #include "buffer_head_io.h" | ||
| 44 | #include "suballoc.h" | ||
| 45 | #include "resize.h" | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Check whether there are new backup superblocks exist | ||
| 49 | * in the last group. If there are some, mark them or clear | ||
| 50 | * them in the bitmap. | ||
| 51 | * | ||
| 52 | * Return how many backups we find in the last group. | ||
| 53 | */ | ||
| 54 | static u16 ocfs2_calc_new_backup_super(struct inode *inode, | ||
| 55 | struct ocfs2_group_desc *gd, | ||
| 56 | int new_clusters, | ||
| 57 | u32 first_new_cluster, | ||
| 58 | u16 cl_cpg, | ||
| 59 | int set) | ||
| 60 | { | ||
| 61 | int i; | ||
| 62 | u16 backups = 0; | ||
| 63 | u32 cluster; | ||
| 64 | u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno); | ||
| 65 | |||
| 66 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
| 67 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
| 68 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
| 69 | |||
| 70 | gd_blkno = ocfs2_which_cluster_group(inode, cluster); | ||
| 71 | if (gd_blkno < lgd_blkno) | ||
| 72 | continue; | ||
| 73 | else if (gd_blkno > lgd_blkno) | ||
| 74 | break; | ||
| 75 | |||
| 76 | if (set) | ||
| 77 | ocfs2_set_bit(cluster % cl_cpg, | ||
| 78 | (unsigned long *)gd->bg_bitmap); | ||
| 79 | else | ||
| 80 | ocfs2_clear_bit(cluster % cl_cpg, | ||
| 81 | (unsigned long *)gd->bg_bitmap); | ||
| 82 | backups++; | ||
| 83 | } | ||
| 84 | |||
| 85 | mlog_exit_void(); | ||
| 86 | return backups; | ||
| 87 | } | ||
| 88 | |||
| 89 | static int ocfs2_update_last_group_and_inode(handle_t *handle, | ||
| 90 | struct inode *bm_inode, | ||
| 91 | struct buffer_head *bm_bh, | ||
| 92 | struct buffer_head *group_bh, | ||
| 93 | u32 first_new_cluster, | ||
| 94 | int new_clusters) | ||
| 95 | { | ||
| 96 | int ret = 0; | ||
| 97 | struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb); | ||
| 98 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data; | ||
| 99 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | ||
| 100 | struct ocfs2_chain_rec *cr; | ||
| 101 | struct ocfs2_group_desc *group; | ||
| 102 | u16 chain, num_bits, backups = 0; | ||
| 103 | u16 cl_bpc = le16_to_cpu(cl->cl_bpc); | ||
| 104 | u16 cl_cpg = le16_to_cpu(cl->cl_cpg); | ||
| 105 | |||
| 106 | mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", | ||
| 107 | new_clusters, first_new_cluster); | ||
| 108 | |||
| 109 | ret = ocfs2_journal_access(handle, bm_inode, group_bh, | ||
| 110 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 111 | if (ret < 0) { | ||
| 112 | mlog_errno(ret); | ||
| 113 | goto out; | ||
| 114 | } | ||
| 115 | |||
| 116 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
| 117 | |||
| 118 | /* update the group first. */ | ||
| 119 | num_bits = new_clusters * cl_bpc; | ||
| 120 | le16_add_cpu(&group->bg_bits, num_bits); | ||
| 121 | le16_add_cpu(&group->bg_free_bits_count, num_bits); | ||
| 122 | |||
| 123 | /* | ||
| 124 | * check whether there are some new backup superblocks exist in | ||
| 125 | * this group and update the group bitmap accordingly. | ||
| 126 | */ | ||
| 127 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
| 128 | OCFS2_FEATURE_COMPAT_BACKUP_SB)) { | ||
| 129 | backups = ocfs2_calc_new_backup_super(bm_inode, | ||
| 130 | group, | ||
| 131 | new_clusters, | ||
| 132 | first_new_cluster, | ||
| 133 | cl_cpg, 1); | ||
| 134 | le16_add_cpu(&group->bg_free_bits_count, -1 * backups); | ||
| 135 | } | ||
| 136 | |||
| 137 | ret = ocfs2_journal_dirty(handle, group_bh); | ||
| 138 | if (ret < 0) { | ||
| 139 | mlog_errno(ret); | ||
| 140 | goto out_rollback; | ||
| 141 | } | ||
| 142 | |||
| 143 | /* update the inode accordingly. */ | ||
| 144 | ret = ocfs2_journal_access(handle, bm_inode, bm_bh, | ||
| 145 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 146 | if (ret < 0) { | ||
| 147 | mlog_errno(ret); | ||
| 148 | goto out_rollback; | ||
| 149 | } | ||
| 150 | |||
| 151 | chain = le16_to_cpu(group->bg_chain); | ||
| 152 | cr = (&cl->cl_recs[chain]); | ||
| 153 | le32_add_cpu(&cr->c_total, num_bits); | ||
| 154 | le32_add_cpu(&cr->c_free, num_bits); | ||
| 155 | le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits); | ||
| 156 | le32_add_cpu(&fe->i_clusters, new_clusters); | ||
| 157 | |||
| 158 | if (backups) { | ||
| 159 | le32_add_cpu(&cr->c_free, -1 * backups); | ||
| 160 | le32_add_cpu(&fe->id1.bitmap1.i_used, backups); | ||
| 161 | } | ||
| 162 | |||
| 163 | spin_lock(&OCFS2_I(bm_inode)->ip_lock); | ||
| 164 | OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | ||
| 165 | le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits); | ||
| 166 | spin_unlock(&OCFS2_I(bm_inode)->ip_lock); | ||
| 167 | i_size_write(bm_inode, le64_to_cpu(fe->i_size)); | ||
| 168 | |||
| 169 | ocfs2_journal_dirty(handle, bm_bh); | ||
| 170 | |||
| 171 | out_rollback: | ||
| 172 | if (ret < 0) { | ||
| 173 | ocfs2_calc_new_backup_super(bm_inode, | ||
| 174 | group, | ||
| 175 | new_clusters, | ||
| 176 | first_new_cluster, | ||
| 177 | cl_cpg, 0); | ||
| 178 | le16_add_cpu(&group->bg_free_bits_count, backups); | ||
| 179 | le16_add_cpu(&group->bg_bits, -1 * num_bits); | ||
| 180 | le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); | ||
| 181 | } | ||
| 182 | out: | ||
| 183 | mlog_exit(ret); | ||
| 184 | return ret; | ||
| 185 | } | ||
| 186 | |||
| 187 | static int update_backups(struct inode * inode, u32 clusters, char *data) | ||
| 188 | { | ||
| 189 | int i, ret = 0; | ||
| 190 | u32 cluster; | ||
| 191 | u64 blkno; | ||
| 192 | struct buffer_head *backup = NULL; | ||
| 193 | struct ocfs2_dinode *backup_di = NULL; | ||
| 194 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 195 | |||
| 196 | /* calculate the real backups we need to update. */ | ||
| 197 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
| 198 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
| 199 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
| 200 | if (cluster > clusters) | ||
| 201 | break; | ||
| 202 | |||
| 203 | ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); | ||
| 204 | if (ret < 0) { | ||
| 205 | mlog_errno(ret); | ||
| 206 | break; | ||
| 207 | } | ||
| 208 | |||
| 209 | memcpy(backup->b_data, data, inode->i_sb->s_blocksize); | ||
| 210 | |||
| 211 | backup_di = (struct ocfs2_dinode *)backup->b_data; | ||
| 212 | backup_di->i_blkno = cpu_to_le64(blkno); | ||
| 213 | |||
| 214 | ret = ocfs2_write_super_or_backup(osb, backup); | ||
| 215 | brelse(backup); | ||
| 216 | backup = NULL; | ||
| 217 | if (ret < 0) { | ||
| 218 | mlog_errno(ret); | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 223 | return ret; | ||
| 224 | } | ||
| 225 | |||
| 226 | static void ocfs2_update_super_and_backups(struct inode *inode, | ||
| 227 | int new_clusters) | ||
| 228 | { | ||
| 229 | int ret; | ||
| 230 | u32 clusters = 0; | ||
| 231 | struct buffer_head *super_bh = NULL; | ||
| 232 | struct ocfs2_dinode *super_di = NULL; | ||
| 233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 234 | |||
| 235 | /* | ||
| 236 | * update the superblock last. | ||
| 237 | * It doesn't matter if the write failed. | ||
| 238 | */ | ||
| 239 | ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, | ||
| 240 | &super_bh, 0, NULL); | ||
| 241 | if (ret < 0) { | ||
| 242 | mlog_errno(ret); | ||
| 243 | goto out; | ||
| 244 | } | ||
| 245 | |||
| 246 | super_di = (struct ocfs2_dinode *)super_bh->b_data; | ||
| 247 | le32_add_cpu(&super_di->i_clusters, new_clusters); | ||
| 248 | clusters = le32_to_cpu(super_di->i_clusters); | ||
| 249 | |||
| 250 | ret = ocfs2_write_super_or_backup(osb, super_bh); | ||
| 251 | if (ret < 0) { | ||
| 252 | mlog_errno(ret); | ||
| 253 | goto out; | ||
| 254 | } | ||
| 255 | |||
| 256 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB)) | ||
| 257 | ret = update_backups(inode, clusters, super_bh->b_data); | ||
| 258 | |||
| 259 | out: | ||
| 260 | brelse(super_bh); | ||
| 261 | if (ret) | ||
| 262 | printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s" | ||
| 263 | " during fs resize. This condition is not fatal," | ||
| 264 | " but fsck.ocfs2 should be run to fix it\n", | ||
| 265 | osb->dev_str); | ||
| 266 | return; | ||
| 267 | } | ||
| 268 | |||
| 269 | /* | ||
| 270 | * Extend the filesystem to the new number of clusters specified. This entry | ||
| 271 | * point is only used to extend the current filesystem to the end of the last | ||
| 272 | * existing group. | ||
| 273 | */ | ||
| 274 | int ocfs2_group_extend(struct inode * inode, int new_clusters) | ||
| 275 | { | ||
| 276 | int ret; | ||
| 277 | handle_t *handle; | ||
| 278 | struct buffer_head *main_bm_bh = NULL; | ||
| 279 | struct buffer_head *group_bh = NULL; | ||
| 280 | struct inode *main_bm_inode = NULL; | ||
| 281 | struct ocfs2_dinode *fe = NULL; | ||
| 282 | struct ocfs2_group_desc *group = NULL; | ||
| 283 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 284 | u16 cl_bpc; | ||
| 285 | u32 first_new_cluster; | ||
| 286 | u64 lgd_blkno; | ||
| 287 | |||
| 288 | mlog_entry_void(); | ||
| 289 | |||
| 290 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
| 291 | return -EROFS; | ||
| 292 | |||
| 293 | if (new_clusters < 0) | ||
| 294 | return -EINVAL; | ||
| 295 | else if (new_clusters == 0) | ||
| 296 | return 0; | ||
| 297 | |||
| 298 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
| 299 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 300 | OCFS2_INVALID_SLOT); | ||
| 301 | if (!main_bm_inode) { | ||
| 302 | ret = -EINVAL; | ||
| 303 | mlog_errno(ret); | ||
| 304 | goto out; | ||
| 305 | } | ||
| 306 | |||
| 307 | mutex_lock(&main_bm_inode->i_mutex); | ||
| 308 | |||
| 309 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | ||
| 310 | if (ret < 0) { | ||
| 311 | mlog_errno(ret); | ||
| 312 | goto out_mutex; | ||
| 313 | } | ||
| 314 | |||
| 315 | fe = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
| 316 | |||
| 317 | if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != | ||
| 318 | ocfs2_group_bitmap_size(osb->sb) * 8) { | ||
| 319 | mlog(ML_ERROR, "The disk is too old and small. " | ||
| 320 | "Force to do offline resize."); | ||
| 321 | ret = -EINVAL; | ||
| 322 | goto out_unlock; | ||
| 323 | } | ||
| 324 | |||
| 325 | if (!OCFS2_IS_VALID_DINODE(fe)) { | ||
| 326 | OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); | ||
| 327 | ret = -EIO; | ||
| 328 | goto out_unlock; | ||
| 329 | } | ||
| 330 | |||
| 331 | first_new_cluster = le32_to_cpu(fe->i_clusters); | ||
| 332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, | ||
| 333 | first_new_cluster - 1); | ||
| 334 | |||
| 335 | ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, | ||
| 336 | main_bm_inode); | ||
| 337 | if (ret < 0) { | ||
| 338 | mlog_errno(ret); | ||
| 339 | goto out_unlock; | ||
| 340 | } | ||
| 341 | |||
| 342 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
| 343 | |||
| 344 | ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); | ||
| 345 | if (ret) { | ||
| 346 | mlog_errno(ret); | ||
| 347 | goto out_unlock; | ||
| 348 | } | ||
| 349 | |||
| 350 | cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); | ||
| 351 | if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > | ||
| 352 | le16_to_cpu(fe->id2.i_chain.cl_cpg)) { | ||
| 353 | ret = -EINVAL; | ||
| 354 | goto out_unlock; | ||
| 355 | } | ||
| 356 | |||
| 357 | mlog(0, "extend the last group at %llu, new clusters = %d\n", | ||
| 358 | (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters); | ||
| 359 | |||
| 360 | handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); | ||
| 361 | if (IS_ERR(handle)) { | ||
| 362 | mlog_errno(PTR_ERR(handle)); | ||
| 363 | ret = -EINVAL; | ||
| 364 | goto out_unlock; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* update the last group descriptor and inode. */ | ||
| 368 | ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode, | ||
| 369 | main_bm_bh, group_bh, | ||
| 370 | first_new_cluster, | ||
| 371 | new_clusters); | ||
| 372 | if (ret) { | ||
| 373 | mlog_errno(ret); | ||
| 374 | goto out_commit; | ||
| 375 | } | ||
| 376 | |||
| 377 | ocfs2_update_super_and_backups(main_bm_inode, new_clusters); | ||
| 378 | |||
| 379 | out_commit: | ||
| 380 | ocfs2_commit_trans(osb, handle); | ||
| 381 | out_unlock: | ||
| 382 | brelse(group_bh); | ||
| 383 | brelse(main_bm_bh); | ||
| 384 | |||
| 385 | ocfs2_inode_unlock(main_bm_inode, 1); | ||
| 386 | |||
| 387 | out_mutex: | ||
| 388 | mutex_unlock(&main_bm_inode->i_mutex); | ||
| 389 | iput(main_bm_inode); | ||
| 390 | |||
| 391 | out: | ||
| 392 | mlog_exit_void(); | ||
| 393 | return ret; | ||
| 394 | } | ||
| 395 | |||
| 396 | static int ocfs2_check_new_group(struct inode *inode, | ||
| 397 | struct ocfs2_dinode *di, | ||
| 398 | struct ocfs2_new_group_input *input, | ||
| 399 | struct buffer_head *group_bh) | ||
| 400 | { | ||
| 401 | int ret; | ||
| 402 | struct ocfs2_group_desc *gd; | ||
| 403 | u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); | ||
| 404 | unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * | ||
| 405 | le16_to_cpu(di->id2.i_chain.cl_bpc); | ||
| 406 | |||
| 407 | |||
| 408 | gd = (struct ocfs2_group_desc *)group_bh->b_data; | ||
| 409 | |||
| 410 | ret = -EIO; | ||
| 411 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) | ||
| 412 | mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n", | ||
| 413 | (unsigned long long)le64_to_cpu(gd->bg_blkno)); | ||
| 414 | else if (di->i_blkno != gd->bg_parent_dinode) | ||
| 415 | mlog(ML_ERROR, "Group descriptor # %llu has bad parent " | ||
| 416 | "pointer (%llu, expected %llu)\n", | ||
| 417 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 418 | (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), | ||
| 419 | (unsigned long long)le64_to_cpu(di->i_blkno)); | ||
| 420 | else if (le16_to_cpu(gd->bg_bits) > max_bits) | ||
| 421 | mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n", | ||
| 422 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 423 | le16_to_cpu(gd->bg_bits)); | ||
| 424 | else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) | ||
| 425 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
| 426 | "claims that %u are free\n", | ||
| 427 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 428 | le16_to_cpu(gd->bg_bits), | ||
| 429 | le16_to_cpu(gd->bg_free_bits_count)); | ||
| 430 | else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) | ||
| 431 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
| 432 | "max bitmap bits of %u\n", | ||
| 433 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 434 | le16_to_cpu(gd->bg_bits), | ||
| 435 | 8 * le16_to_cpu(gd->bg_size)); | ||
| 436 | else if (le16_to_cpu(gd->bg_chain) != input->chain) | ||
| 437 | mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " | ||
| 438 | "while input has %u set.\n", | ||
| 439 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 440 | le16_to_cpu(gd->bg_chain), input->chain); | ||
| 441 | else if (le16_to_cpu(gd->bg_bits) != input->clusters * cl_bpc) | ||
| 442 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
| 443 | "input has %u clusters set\n", | ||
| 444 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 445 | le16_to_cpu(gd->bg_bits), input->clusters); | ||
| 446 | else if (le16_to_cpu(gd->bg_free_bits_count) != input->frees * cl_bpc) | ||
| 447 | mlog(ML_ERROR, "Group descriptor # %llu has free bit count %u " | ||
| 448 | "but it should have %u set\n", | ||
| 449 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 450 | le16_to_cpu(gd->bg_bits), | ||
| 451 | input->frees * cl_bpc); | ||
| 452 | else | ||
| 453 | ret = 0; | ||
| 454 | |||
| 455 | return ret; | ||
| 456 | } | ||
| 457 | |||
| 458 | static int ocfs2_verify_group_and_input(struct inode *inode, | ||
| 459 | struct ocfs2_dinode *di, | ||
| 460 | struct ocfs2_new_group_input *input, | ||
| 461 | struct buffer_head *group_bh) | ||
| 462 | { | ||
| 463 | u16 cl_count = le16_to_cpu(di->id2.i_chain.cl_count); | ||
| 464 | u16 cl_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); | ||
| 465 | u16 next_free = le16_to_cpu(di->id2.i_chain.cl_next_free_rec); | ||
| 466 | u32 cluster = ocfs2_blocks_to_clusters(inode->i_sb, input->group); | ||
| 467 | u32 total_clusters = le32_to_cpu(di->i_clusters); | ||
| 468 | int ret = -EINVAL; | ||
| 469 | |||
| 470 | if (cluster < total_clusters) | ||
| 471 | mlog(ML_ERROR, "add a group which is in the current volume.\n"); | ||
| 472 | else if (input->chain >= cl_count) | ||
| 473 | mlog(ML_ERROR, "input chain exceeds the limit.\n"); | ||
| 474 | else if (next_free != cl_count && next_free != input->chain) | ||
| 475 | mlog(ML_ERROR, | ||
| 476 | "the add group should be in chain %u\n", next_free); | ||
| 477 | else if (total_clusters + input->clusters < total_clusters) | ||
| 478 | mlog(ML_ERROR, "add group's clusters overflow.\n"); | ||
| 479 | else if (input->clusters > cl_cpg) | ||
| 480 | mlog(ML_ERROR, "the cluster exceeds the maximum of a group\n"); | ||
| 481 | else if (input->frees > input->clusters) | ||
| 482 | mlog(ML_ERROR, "the free cluster exceeds the total clusters\n"); | ||
| 483 | else if (total_clusters % cl_cpg != 0) | ||
| 484 | mlog(ML_ERROR, | ||
| 485 | "the last group isn't full. Use group extend first.\n"); | ||
| 486 | else if (input->group != ocfs2_which_cluster_group(inode, cluster)) | ||
| 487 | mlog(ML_ERROR, "group blkno is invalid\n"); | ||
| 488 | else if ((ret = ocfs2_check_new_group(inode, di, input, group_bh))) | ||
| 489 | mlog(ML_ERROR, "group descriptor check failed.\n"); | ||
| 490 | else | ||
| 491 | ret = 0; | ||
| 492 | |||
| 493 | return ret; | ||
| 494 | } | ||
| 495 | |||
| 496 | /* Add a new group descriptor to global_bitmap. */ | ||
| 497 | int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | ||
| 498 | { | ||
| 499 | int ret; | ||
| 500 | handle_t *handle; | ||
| 501 | struct buffer_head *main_bm_bh = NULL; | ||
| 502 | struct inode *main_bm_inode = NULL; | ||
| 503 | struct ocfs2_dinode *fe = NULL; | ||
| 504 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 505 | struct buffer_head *group_bh = NULL; | ||
| 506 | struct ocfs2_group_desc *group = NULL; | ||
| 507 | struct ocfs2_chain_list *cl; | ||
| 508 | struct ocfs2_chain_rec *cr; | ||
| 509 | u16 cl_bpc; | ||
| 510 | |||
| 511 | mlog_entry_void(); | ||
| 512 | |||
| 513 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
| 514 | return -EROFS; | ||
| 515 | |||
| 516 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
| 517 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 518 | OCFS2_INVALID_SLOT); | ||
| 519 | if (!main_bm_inode) { | ||
| 520 | ret = -EINVAL; | ||
| 521 | mlog_errno(ret); | ||
| 522 | goto out; | ||
| 523 | } | ||
| 524 | |||
| 525 | mutex_lock(&main_bm_inode->i_mutex); | ||
| 526 | |||
| 527 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | ||
| 528 | if (ret < 0) { | ||
| 529 | mlog_errno(ret); | ||
| 530 | goto out_mutex; | ||
| 531 | } | ||
| 532 | |||
| 533 | fe = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
| 534 | |||
| 535 | if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != | ||
| 536 | ocfs2_group_bitmap_size(osb->sb) * 8) { | ||
| 537 | mlog(ML_ERROR, "The disk is too old and small." | ||
| 538 | " Force to do offline resize."); | ||
| 539 | ret = -EINVAL; | ||
| 540 | goto out_unlock; | ||
| 541 | } | ||
| 542 | |||
| 543 | ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); | ||
| 544 | if (ret < 0) { | ||
| 545 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " | ||
| 546 | "from the device.", (unsigned long long)input->group); | ||
| 547 | goto out_unlock; | ||
| 548 | } | ||
| 549 | |||
| 550 | ocfs2_set_new_buffer_uptodate(inode, group_bh); | ||
| 551 | |||
| 552 | ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); | ||
| 553 | if (ret) { | ||
| 554 | mlog_errno(ret); | ||
| 555 | goto out_unlock; | ||
| 556 | } | ||
| 557 | |||
| 558 | mlog(0, "Add a new group %llu in chain = %u, length = %u\n", | ||
| 559 | (unsigned long long)input->group, input->chain, input->clusters); | ||
| 560 | |||
| 561 | handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS); | ||
| 562 | if (IS_ERR(handle)) { | ||
| 563 | mlog_errno(PTR_ERR(handle)); | ||
| 564 | ret = -EINVAL; | ||
| 565 | goto out_unlock; | ||
| 566 | } | ||
| 567 | |||
| 568 | cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); | ||
| 569 | cl = &fe->id2.i_chain; | ||
| 570 | cr = &cl->cl_recs[input->chain]; | ||
| 571 | |||
| 572 | ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, | ||
| 573 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 574 | if (ret < 0) { | ||
| 575 | mlog_errno(ret); | ||
| 576 | goto out_commit; | ||
| 577 | } | ||
| 578 | |||
| 579 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
| 580 | group->bg_next_group = cr->c_blkno; | ||
| 581 | |||
| 582 | ret = ocfs2_journal_dirty(handle, group_bh); | ||
| 583 | if (ret < 0) { | ||
| 584 | mlog_errno(ret); | ||
| 585 | goto out_commit; | ||
| 586 | } | ||
| 587 | |||
| 588 | ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, | ||
| 589 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 590 | if (ret < 0) { | ||
| 591 | mlog_errno(ret); | ||
| 592 | goto out_commit; | ||
| 593 | } | ||
| 594 | |||
| 595 | if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) { | ||
| 596 | le16_add_cpu(&cl->cl_next_free_rec, 1); | ||
| 597 | memset(cr, 0, sizeof(struct ocfs2_chain_rec)); | ||
| 598 | } | ||
| 599 | |||
| 600 | cr->c_blkno = le64_to_cpu(input->group); | ||
| 601 | le32_add_cpu(&cr->c_total, input->clusters * cl_bpc); | ||
| 602 | le32_add_cpu(&cr->c_free, input->frees * cl_bpc); | ||
| 603 | |||
| 604 | le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc); | ||
| 605 | le32_add_cpu(&fe->id1.bitmap1.i_used, | ||
| 606 | (input->clusters - input->frees) * cl_bpc); | ||
| 607 | le32_add_cpu(&fe->i_clusters, input->clusters); | ||
| 608 | |||
| 609 | ocfs2_journal_dirty(handle, main_bm_bh); | ||
| 610 | |||
| 611 | spin_lock(&OCFS2_I(main_bm_inode)->ip_lock); | ||
| 612 | OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | ||
| 613 | le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits); | ||
| 614 | spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock); | ||
| 615 | i_size_write(main_bm_inode, le64_to_cpu(fe->i_size)); | ||
| 616 | |||
| 617 | ocfs2_update_super_and_backups(main_bm_inode, input->clusters); | ||
| 618 | |||
| 619 | out_commit: | ||
| 620 | ocfs2_commit_trans(osb, handle); | ||
| 621 | out_unlock: | ||
| 622 | brelse(group_bh); | ||
| 623 | brelse(main_bm_bh); | ||
| 624 | |||
| 625 | ocfs2_inode_unlock(main_bm_inode, 1); | ||
| 626 | |||
| 627 | out_mutex: | ||
| 628 | mutex_unlock(&main_bm_inode->i_mutex); | ||
| 629 | iput(main_bm_inode); | ||
| 630 | |||
| 631 | out: | ||
| 632 | mlog_exit_void(); | ||
| 633 | return ret; | ||
| 634 | } | ||
| diff --git a/fs/ocfs2/resize.h b/fs/ocfs2/resize.h new file mode 100644 index 000000000000..f38841abf10b --- /dev/null +++ b/fs/ocfs2/resize.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * resize.h | ||
| 5 | * | ||
| 6 | * Function prototypes | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation; either | ||
| 13 | * version 2 of the License, or (at your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public | ||
| 21 | * License along with this program; if not, write to the | ||
| 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 23 | * Boston, MA 021110-1307, USA. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #ifndef OCFS2_RESIZE_H | ||
| 27 | #define OCFS2_RESIZE_H | ||
| 28 | |||
| 29 | int ocfs2_group_extend(struct inode * inode, int new_clusters); | ||
| 30 | int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input); | ||
| 31 | |||
| 32 | #endif /* OCFS2_RESIZE_H */ | ||
| diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index af4882b62cfa..3a50ce555e64 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -48,25 +48,6 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | |||
| 48 | s16 slot_num, | 48 | s16 slot_num, | 
| 49 | s16 node_num); | 49 | s16 node_num); | 
| 50 | 50 | ||
| 51 | /* Use the slot information we've collected to create a map of mounted | ||
| 52 | * nodes. Should be holding an EX on super block. assumes slot info is | ||
| 53 | * up to date. Note that we call this *after* we find a slot, so our | ||
| 54 | * own node should be set in the map too... */ | ||
| 55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb) | ||
| 56 | { | ||
| 57 | int i; | ||
| 58 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 59 | |||
| 60 | spin_lock(&si->si_lock); | ||
| 61 | |||
| 62 | for (i = 0; i < si->si_size; i++) | ||
| 63 | if (si->si_global_node_nums[i] != OCFS2_INVALID_SLOT) | ||
| 64 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, | ||
| 65 | si->si_global_node_nums[i]); | ||
| 66 | |||
| 67 | spin_unlock(&si->si_lock); | ||
| 68 | } | ||
| 69 | |||
| 70 | /* post the slot information on disk into our slot_info struct. */ | 51 | /* post the slot information on disk into our slot_info struct. */ | 
| 71 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 52 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 
| 72 | { | 53 | { | 
| diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index d8c8ceed031b..1025872aaade 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h | |||
| @@ -52,8 +52,6 @@ s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | |||
| 52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 
| 53 | s16 slot_num); | 53 | s16 slot_num); | 
| 54 | 54 | ||
| 55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb); | ||
| 56 | |||
| 57 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 55 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 
| 58 | int slot_num) | 56 | int slot_num) | 
| 59 | { | 57 | { | 
| diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8f09f5235e3a..7e397e2c25dd 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -101,8 +101,6 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg | |||
| 101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | 101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | 
| 102 | u64 bg_blkno, | 102 | u64 bg_blkno, | 
| 103 | u16 bg_bit_off); | 103 | u16 bg_bit_off); | 
| 104 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | ||
| 105 | u32 cluster); | ||
| 106 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, | 104 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, | 
| 107 | u64 data_blkno, | 105 | u64 data_blkno, | 
| 108 | u64 *bg_blkno, | 106 | u64 *bg_blkno, | 
| @@ -114,7 +112,7 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |||
| 114 | 112 | ||
| 115 | if (inode) { | 113 | if (inode) { | 
| 116 | if (ac->ac_which != OCFS2_AC_USE_LOCAL) | 114 | if (ac->ac_which != OCFS2_AC_USE_LOCAL) | 
| 117 | ocfs2_meta_unlock(inode, 1); | 115 | ocfs2_inode_unlock(inode, 1); | 
| 118 | 116 | ||
| 119 | mutex_unlock(&inode->i_mutex); | 117 | mutex_unlock(&inode->i_mutex); | 
| 120 | 118 | ||
| @@ -131,9 +129,9 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
| 131 | } | 129 | } | 
| 132 | 130 | ||
| 133 | /* somewhat more expensive than our other checks, so use sparingly. */ | 131 | /* somewhat more expensive than our other checks, so use sparingly. */ | 
| 134 | static int ocfs2_check_group_descriptor(struct super_block *sb, | 132 | int ocfs2_check_group_descriptor(struct super_block *sb, | 
| 135 | struct ocfs2_dinode *di, | 133 | struct ocfs2_dinode *di, | 
| 136 | struct ocfs2_group_desc *gd) | 134 | struct ocfs2_group_desc *gd) | 
| 137 | { | 135 | { | 
| 138 | unsigned int max_bits; | 136 | unsigned int max_bits; | 
| 139 | 137 | ||
| @@ -412,7 +410,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
| 412 | 410 | ||
| 413 | mutex_lock(&alloc_inode->i_mutex); | 411 | mutex_lock(&alloc_inode->i_mutex); | 
| 414 | 412 | ||
| 415 | status = ocfs2_meta_lock(alloc_inode, &bh, 1); | 413 | status = ocfs2_inode_lock(alloc_inode, &bh, 1); | 
| 416 | if (status < 0) { | 414 | if (status < 0) { | 
| 417 | mutex_unlock(&alloc_inode->i_mutex); | 415 | mutex_unlock(&alloc_inode->i_mutex); | 
| 418 | iput(alloc_inode); | 416 | iput(alloc_inode); | 
| @@ -1443,8 +1441,7 @@ static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | |||
| 1443 | 1441 | ||
| 1444 | /* given a cluster offset, calculate which block group it belongs to | 1442 | /* given a cluster offset, calculate which block group it belongs to | 
| 1445 | * and return that block offset. */ | 1443 | * and return that block offset. */ | 
| 1446 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | 1444 | u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster) | 
| 1447 | u32 cluster) | ||
| 1448 | { | 1445 | { | 
| 1449 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1446 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 
| 1450 | u32 group_no; | 1447 | u32 group_no; | 
| @@ -1519,8 +1516,9 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
| 1519 | if (min_clusters > (osb->bitmap_cpg - 1)) { | 1516 | if (min_clusters > (osb->bitmap_cpg - 1)) { | 
| 1520 | /* The only paths asking for contiguousness | 1517 | /* The only paths asking for contiguousness | 
| 1521 | * should know about this already. */ | 1518 | * should know about this already. */ | 
| 1522 | mlog(ML_ERROR, "minimum allocation requested exceeds " | 1519 | mlog(ML_ERROR, "minimum allocation requested %u exceeds " | 
| 1523 | "group bitmap size!"); | 1520 | "group bitmap size %u!\n", min_clusters, | 
| 1521 | osb->bitmap_cpg); | ||
| 1524 | status = -ENOSPC; | 1522 | status = -ENOSPC; | 
| 1525 | goto bail; | 1523 | goto bail; | 
| 1526 | } | 1524 | } | 
| diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index cafe93703095..8799033bb459 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
| @@ -147,4 +147,12 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode) | |||
| 147 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | 147 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | 
| 148 | struct ocfs2_alloc_context *ac); | 148 | struct ocfs2_alloc_context *ac); | 
| 149 | 149 | ||
| 150 | /* given a cluster offset, calculate which block group it belongs to | ||
| 151 | * and return that block offset. */ | ||
| 152 | u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); | ||
| 153 | |||
| 154 | /* somewhat more expensive than our other checks, so use sparingly. */ | ||
| 155 | int ocfs2_check_group_descriptor(struct super_block *sb, | ||
| 156 | struct ocfs2_dinode *di, | ||
| 157 | struct ocfs2_group_desc *gd); | ||
| 150 | #endif /* _CHAINALLOC_H_ */ | 158 | #endif /* _CHAINALLOC_H_ */ | 
| diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5ee775420665..01fe40ee5ea9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -65,7 +65,6 @@ | |||
| 65 | #include "sysfile.h" | 65 | #include "sysfile.h" | 
| 66 | #include "uptodate.h" | 66 | #include "uptodate.h" | 
| 67 | #include "ver.h" | 67 | #include "ver.h" | 
| 68 | #include "vote.h" | ||
| 69 | 68 | ||
| 70 | #include "buffer_head_io.h" | 69 | #include "buffer_head_io.h" | 
| 71 | 70 | ||
| @@ -84,9 +83,11 @@ MODULE_LICENSE("GPL"); | |||
| 84 | 83 | ||
| 85 | struct mount_options | 84 | struct mount_options | 
| 86 | { | 85 | { | 
| 86 | unsigned long commit_interval; | ||
| 87 | unsigned long mount_opt; | 87 | unsigned long mount_opt; | 
| 88 | unsigned int atime_quantum; | 88 | unsigned int atime_quantum; | 
| 89 | signed short slot; | 89 | signed short slot; | 
| 90 | unsigned int localalloc_opt; | ||
| 90 | }; | 91 | }; | 
| 91 | 92 | ||
| 92 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 
| @@ -150,6 +151,9 @@ enum { | |||
| 150 | Opt_data_writeback, | 151 | Opt_data_writeback, | 
| 151 | Opt_atime_quantum, | 152 | Opt_atime_quantum, | 
| 152 | Opt_slot, | 153 | Opt_slot, | 
| 154 | Opt_commit, | ||
| 155 | Opt_localalloc, | ||
| 156 | Opt_localflocks, | ||
| 153 | Opt_err, | 157 | Opt_err, | 
| 154 | }; | 158 | }; | 
| 155 | 159 | ||
| @@ -165,6 +169,9 @@ static match_table_t tokens = { | |||
| 165 | {Opt_data_writeback, "data=writeback"}, | 169 | {Opt_data_writeback, "data=writeback"}, | 
| 166 | {Opt_atime_quantum, "atime_quantum=%u"}, | 170 | {Opt_atime_quantum, "atime_quantum=%u"}, | 
| 167 | {Opt_slot, "preferred_slot=%u"}, | 171 | {Opt_slot, "preferred_slot=%u"}, | 
| 172 | {Opt_commit, "commit=%u"}, | ||
| 173 | {Opt_localalloc, "localalloc=%d"}, | ||
| 174 | {Opt_localflocks, "localflocks"}, | ||
| 168 | {Opt_err, NULL} | 175 | {Opt_err, NULL} | 
| 169 | }; | 176 | }; | 
| 170 | 177 | ||
| @@ -213,7 +220,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
| 213 | 220 | ||
| 214 | mlog_entry_void(); | 221 | mlog_entry_void(); | 
| 215 | 222 | ||
| 216 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE); | 223 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); | 
| 217 | if (IS_ERR(new)) { | 224 | if (IS_ERR(new)) { | 
| 218 | status = PTR_ERR(new); | 225 | status = PTR_ERR(new); | 
| 219 | mlog_errno(status); | 226 | mlog_errno(status); | 
| @@ -221,7 +228,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
| 221 | } | 228 | } | 
| 222 | osb->root_inode = new; | 229 | osb->root_inode = new; | 
| 223 | 230 | ||
| 224 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE); | 231 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0); | 
| 225 | if (IS_ERR(new)) { | 232 | if (IS_ERR(new)) { | 
| 226 | status = PTR_ERR(new); | 233 | status = PTR_ERR(new); | 
| 227 | mlog_errno(status); | 234 | mlog_errno(status); | 
| @@ -443,6 +450,8 @@ unlock_osb: | |||
| 443 | osb->s_mount_opt = parsed_options.mount_opt; | 450 | osb->s_mount_opt = parsed_options.mount_opt; | 
| 444 | osb->s_atime_quantum = parsed_options.atime_quantum; | 451 | osb->s_atime_quantum = parsed_options.atime_quantum; | 
| 445 | osb->preferred_slot = parsed_options.slot; | 452 | osb->preferred_slot = parsed_options.slot; | 
| 453 | if (parsed_options.commit_interval) | ||
| 454 | osb->osb_commit_interval = parsed_options.commit_interval; | ||
| 446 | 455 | ||
| 447 | if (!ocfs2_is_hard_readonly(osb)) | 456 | if (!ocfs2_is_hard_readonly(osb)) | 
| 448 | ocfs2_set_journal_params(osb); | 457 | ocfs2_set_journal_params(osb); | 
| @@ -597,6 +606,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 597 | osb->s_mount_opt = parsed_options.mount_opt; | 606 | osb->s_mount_opt = parsed_options.mount_opt; | 
| 598 | osb->s_atime_quantum = parsed_options.atime_quantum; | 607 | osb->s_atime_quantum = parsed_options.atime_quantum; | 
| 599 | osb->preferred_slot = parsed_options.slot; | 608 | osb->preferred_slot = parsed_options.slot; | 
| 609 | osb->osb_commit_interval = parsed_options.commit_interval; | ||
| 610 | osb->local_alloc_size = parsed_options.localalloc_opt; | ||
| 600 | 611 | ||
| 601 | sb->s_magic = OCFS2_SUPER_MAGIC; | 612 | sb->s_magic = OCFS2_SUPER_MAGIC; | 
| 602 | 613 | ||
| @@ -747,9 +758,11 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 747 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 758 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 
| 748 | options ? options : "(none)"); | 759 | options ? options : "(none)"); | 
| 749 | 760 | ||
| 761 | mopt->commit_interval = 0; | ||
| 750 | mopt->mount_opt = 0; | 762 | mopt->mount_opt = 0; | 
| 751 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 763 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 
| 752 | mopt->slot = OCFS2_INVALID_SLOT; | 764 | mopt->slot = OCFS2_INVALID_SLOT; | 
| 765 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | ||
| 753 | 766 | ||
| 754 | if (!options) { | 767 | if (!options) { | 
| 755 | status = 1; | 768 | status = 1; | 
| @@ -816,6 +829,41 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 816 | if (option) | 829 | if (option) | 
| 817 | mopt->slot = (s16)option; | 830 | mopt->slot = (s16)option; | 
| 818 | break; | 831 | break; | 
| 832 | case Opt_commit: | ||
| 833 | option = 0; | ||
| 834 | if (match_int(&args[0], &option)) { | ||
| 835 | status = 0; | ||
| 836 | goto bail; | ||
| 837 | } | ||
| 838 | if (option < 0) | ||
| 839 | return 0; | ||
| 840 | if (option == 0) | ||
| 841 | option = JBD_DEFAULT_MAX_COMMIT_AGE; | ||
| 842 | mopt->commit_interval = HZ * option; | ||
| 843 | break; | ||
| 844 | case Opt_localalloc: | ||
| 845 | option = 0; | ||
| 846 | if (match_int(&args[0], &option)) { | ||
| 847 | status = 0; | ||
| 848 | goto bail; | ||
| 849 | } | ||
| 850 | if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) | ||
| 851 | mopt->localalloc_opt = option; | ||
| 852 | break; | ||
| 853 | case Opt_localflocks: | ||
| 854 | /* | ||
| 855 | * Changing this during remount could race | ||
| 856 | * flock() requests, or "unbalance" existing | ||
| 857 | * ones (e.g., a lock is taken in one mode but | ||
| 858 | * dropped in the other). If users care enough | ||
| 859 | * to flip locking modes during remount, we | ||
| 860 | * could add a "local" flag to individual | ||
| 861 | * flock structures for proper tracking of | ||
| 862 | * state. | ||
| 863 | */ | ||
| 864 | if (!is_remount) | ||
| 865 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; | ||
| 866 | break; | ||
| 819 | default: | 867 | default: | 
| 820 | mlog(ML_ERROR, | 868 | mlog(ML_ERROR, | 
| 821 | "Unrecognized mount option \"%s\" " | 869 | "Unrecognized mount option \"%s\" " | 
| @@ -864,6 +912,16 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 864 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) | 912 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) | 
| 865 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); | 913 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); | 
| 866 | 914 | ||
| 915 | if (osb->osb_commit_interval) | ||
| 916 | seq_printf(s, ",commit=%u", | ||
| 917 | (unsigned) (osb->osb_commit_interval / HZ)); | ||
| 918 | |||
| 919 | if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) | ||
| 920 | seq_printf(s, ",localalloc=%d", osb->local_alloc_size); | ||
| 921 | |||
| 922 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | ||
| 923 | seq_printf(s, ",localflocks,"); | ||
| 924 | |||
| 867 | return 0; | 925 | return 0; | 
| 868 | } | 926 | } | 
| 869 | 927 | ||
| @@ -965,7 +1023,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 965 | goto bail; | 1023 | goto bail; | 
| 966 | } | 1024 | } | 
| 967 | 1025 | ||
| 968 | status = ocfs2_meta_lock(inode, &bh, 0); | 1026 | status = ocfs2_inode_lock(inode, &bh, 0); | 
| 969 | if (status < 0) { | 1027 | if (status < 0) { | 
| 970 | mlog_errno(status); | 1028 | mlog_errno(status); | 
| 971 | goto bail; | 1029 | goto bail; | 
| @@ -989,7 +1047,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 989 | 1047 | ||
| 990 | brelse(bh); | 1048 | brelse(bh); | 
| 991 | 1049 | ||
| 992 | ocfs2_meta_unlock(inode, 0); | 1050 | ocfs2_inode_unlock(inode, 0); | 
| 993 | status = 0; | 1051 | status = 0; | 
| 994 | bail: | 1052 | bail: | 
| 995 | if (inode) | 1053 | if (inode) | 
| @@ -1020,8 +1078,7 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) | |||
| 1020 | oi->ip_clusters = 0; | 1078 | oi->ip_clusters = 0; | 
| 1021 | 1079 | ||
| 1022 | ocfs2_lock_res_init_once(&oi->ip_rw_lockres); | 1080 | ocfs2_lock_res_init_once(&oi->ip_rw_lockres); | 
| 1023 | ocfs2_lock_res_init_once(&oi->ip_meta_lockres); | 1081 | ocfs2_lock_res_init_once(&oi->ip_inode_lockres); | 
| 1024 | ocfs2_lock_res_init_once(&oi->ip_data_lockres); | ||
| 1025 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); | 1082 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); | 
| 1026 | 1083 | ||
| 1027 | ocfs2_metadata_cache_init(&oi->vfs_inode); | 1084 | ocfs2_metadata_cache_init(&oi->vfs_inode); | 
| @@ -1117,25 +1174,12 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
| 1117 | goto leave; | 1174 | goto leave; | 
| 1118 | } | 1175 | } | 
| 1119 | 1176 | ||
| 1120 | status = ocfs2_register_hb_callbacks(osb); | ||
| 1121 | if (status < 0) { | ||
| 1122 | mlog_errno(status); | ||
| 1123 | goto leave; | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | status = ocfs2_dlm_init(osb); | 1177 | status = ocfs2_dlm_init(osb); | 
| 1127 | if (status < 0) { | 1178 | if (status < 0) { | 
| 1128 | mlog_errno(status); | 1179 | mlog_errno(status); | 
| 1129 | goto leave; | 1180 | goto leave; | 
| 1130 | } | 1181 | } | 
| 1131 | 1182 | ||
| 1132 | /* requires vote_thread to be running. */ | ||
| 1133 | status = ocfs2_register_net_handlers(osb); | ||
| 1134 | if (status < 0) { | ||
| 1135 | mlog_errno(status); | ||
| 1136 | goto leave; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | status = ocfs2_super_lock(osb, 1); | 1183 | status = ocfs2_super_lock(osb, 1); | 
| 1140 | if (status < 0) { | 1184 | if (status < 0) { | 
| 1141 | mlog_errno(status); | 1185 | mlog_errno(status); | 
| @@ -1150,8 +1194,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
| 1150 | goto leave; | 1194 | goto leave; | 
| 1151 | } | 1195 | } | 
| 1152 | 1196 | ||
| 1153 | ocfs2_populate_mounted_map(osb); | ||
| 1154 | |||
| 1155 | /* load all node-local system inodes */ | 1197 | /* load all node-local system inodes */ | 
| 1156 | status = ocfs2_init_local_system_inodes(osb); | 1198 | status = ocfs2_init_local_system_inodes(osb); | 
| 1157 | if (status < 0) { | 1199 | if (status < 0) { | 
| @@ -1174,15 +1216,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
| 1174 | if (ocfs2_mount_local(osb)) | 1216 | if (ocfs2_mount_local(osb)) | 
| 1175 | goto leave; | 1217 | goto leave; | 
| 1176 | 1218 | ||
| 1177 | /* This should be sent *after* we recovered our journal as it | ||
| 1178 | * will cause other nodes to unmark us as needing | ||
| 1179 | * recovery. However, we need to send it *before* dropping the | ||
| 1180 | * super block lock as otherwise their recovery threads might | ||
| 1181 | * try to clean us up while we're live! */ | ||
| 1182 | status = ocfs2_request_mount_vote(osb); | ||
| 1183 | if (status < 0) | ||
| 1184 | mlog_errno(status); | ||
| 1185 | |||
| 1186 | leave: | 1219 | leave: | 
| 1187 | if (unlock_super) | 1220 | if (unlock_super) | 
| 1188 | ocfs2_super_unlock(osb, 1); | 1221 | ocfs2_super_unlock(osb, 1); | 
| @@ -1240,10 +1273,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1240 | mlog_errno(tmp); | 1273 | mlog_errno(tmp); | 
| 1241 | return; | 1274 | return; | 
| 1242 | } | 1275 | } | 
| 1243 | |||
| 1244 | tmp = ocfs2_request_umount_vote(osb); | ||
| 1245 | if (tmp < 0) | ||
| 1246 | mlog_errno(tmp); | ||
| 1247 | } | 1276 | } | 
| 1248 | 1277 | ||
| 1249 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 1278 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 
| @@ -1254,13 +1283,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1254 | 1283 | ||
| 1255 | ocfs2_release_system_inodes(osb); | 1284 | ocfs2_release_system_inodes(osb); | 
| 1256 | 1285 | ||
| 1257 | if (osb->dlm) { | 1286 | if (osb->dlm) | 
| 1258 | ocfs2_unregister_net_handlers(osb); | ||
| 1259 | |||
| 1260 | ocfs2_dlm_shutdown(osb); | 1287 | ocfs2_dlm_shutdown(osb); | 
| 1261 | } | ||
| 1262 | |||
| 1263 | ocfs2_clear_hb_callbacks(osb); | ||
| 1264 | 1288 | ||
| 1265 | debugfs_remove(osb->osb_debug_root); | 1289 | debugfs_remove(osb->osb_debug_root); | 
| 1266 | 1290 | ||
| @@ -1315,7 +1339,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1315 | int i, cbits, bbits; | 1339 | int i, cbits, bbits; | 
| 1316 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | 1340 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | 
| 1317 | struct inode *inode = NULL; | 1341 | struct inode *inode = NULL; | 
| 1318 | struct buffer_head *bitmap_bh = NULL; | ||
| 1319 | struct ocfs2_journal *journal; | 1342 | struct ocfs2_journal *journal; | 
| 1320 | __le32 uuid_net_key; | 1343 | __le32 uuid_net_key; | 
| 1321 | struct ocfs2_super *osb; | 1344 | struct ocfs2_super *osb; | 
| @@ -1344,19 +1367,13 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1344 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1367 | osb->s_sectsize_bits = blksize_bits(sector_size); | 
| 1345 | BUG_ON(!osb->s_sectsize_bits); | 1368 | BUG_ON(!osb->s_sectsize_bits); | 
| 1346 | 1369 | ||
| 1347 | osb->net_response_ids = 0; | ||
| 1348 | spin_lock_init(&osb->net_response_lock); | ||
| 1349 | INIT_LIST_HEAD(&osb->net_response_list); | ||
| 1350 | |||
| 1351 | INIT_LIST_HEAD(&osb->osb_net_handlers); | ||
| 1352 | init_waitqueue_head(&osb->recovery_event); | 1370 | init_waitqueue_head(&osb->recovery_event); | 
| 1353 | spin_lock_init(&osb->vote_task_lock); | 1371 | spin_lock_init(&osb->dc_task_lock); | 
| 1354 | init_waitqueue_head(&osb->vote_event); | 1372 | init_waitqueue_head(&osb->dc_event); | 
| 1355 | osb->vote_work_sequence = 0; | 1373 | osb->dc_work_sequence = 0; | 
| 1356 | osb->vote_wake_sequence = 0; | 1374 | osb->dc_wake_sequence = 0; | 
| 1357 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1375 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 
| 1358 | osb->blocked_lock_count = 0; | 1376 | osb->blocked_lock_count = 0; | 
| 1359 | INIT_LIST_HEAD(&osb->vote_list); | ||
| 1360 | spin_lock_init(&osb->osb_lock); | 1377 | spin_lock_init(&osb->osb_lock); | 
| 1361 | 1378 | ||
| 1362 | atomic_set(&osb->alloc_stats.moves, 0); | 1379 | atomic_set(&osb->alloc_stats.moves, 0); | 
| @@ -1496,7 +1513,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1496 | } | 1513 | } | 
| 1497 | 1514 | ||
| 1498 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); | 1515 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); | 
| 1499 | osb->net_key = le32_to_cpu(uuid_net_key); | ||
| 1500 | 1516 | ||
| 1501 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); | 1517 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); | 
| 1502 | osb->vol_label[63] = '\0'; | 1518 | osb->vol_label[63] = '\0'; | 
| @@ -1539,25 +1555,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1539 | } | 1555 | } | 
| 1540 | 1556 | ||
| 1541 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; | 1557 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; | 
| 1542 | |||
| 1543 | /* We don't have a cluster lock on the bitmap here because | ||
| 1544 | * we're only interested in static information and the extra | ||
| 1545 | * complexity at mount time isn't worht it. Don't pass the | ||
| 1546 | * inode in to the read function though as we don't want it to | ||
| 1547 | * be put in the cache. */ | ||
| 1548 | status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, | ||
| 1549 | NULL); | ||
| 1550 | iput(inode); | 1558 | iput(inode); | 
| 1551 | if (status < 0) { | ||
| 1552 | mlog_errno(status); | ||
| 1553 | goto bail; | ||
| 1554 | } | ||
| 1555 | 1559 | ||
| 1556 | di = (struct ocfs2_dinode *) bitmap_bh->b_data; | 1560 | osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; | 
| 1557 | osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); | ||
| 1558 | brelse(bitmap_bh); | ||
| 1559 | mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", | ||
| 1560 | (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); | ||
| 1561 | 1561 | ||
| 1562 | status = ocfs2_init_slot_info(osb); | 1562 | status = ocfs2_init_slot_info(osb); | 
| 1563 | if (status < 0) { | 1563 | if (status < 0) { | 
| diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index fd2e846e3e6f..ab713ebdd546 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
| @@ -112,7 +112,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
| 112 | goto bail; | 112 | goto bail; | 
| 113 | } | 113 | } | 
| 114 | 114 | ||
| 115 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE); | 115 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE, type); | 
| 116 | if (IS_ERR(inode)) { | 116 | if (IS_ERR(inode)) { | 
| 117 | mlog_errno(PTR_ERR(inode)); | 117 | mlog_errno(PTR_ERR(inode)); | 
| 118 | inode = NULL; | 118 | inode = NULL; | 
| diff --git a/fs/ocfs2/ver.c b/fs/ocfs2/ver.c index 5405ce121c99..e2488f4128a2 100644 --- a/fs/ocfs2/ver.c +++ b/fs/ocfs2/ver.c | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | 29 | ||
| 30 | #include "ver.h" | 30 | #include "ver.h" | 
| 31 | 31 | ||
| 32 | #define OCFS2_BUILD_VERSION "1.3.3" | 32 | #define OCFS2_BUILD_VERSION "1.5.0" | 
| 33 | 33 | ||
| 34 | #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION | 34 | #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION | 
| 35 | 35 | ||
| diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c deleted file mode 100644 index c05358538f2b..000000000000 --- a/fs/ocfs2/vote.c +++ /dev/null | |||
| @@ -1,756 +0,0 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * vote.c | ||
| 5 | * | ||
| 6 | * description here | ||
| 7 | * | ||
| 8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation; either | ||
| 13 | * version 2 of the License, or (at your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public | ||
| 21 | * License along with this program; if not, write to the | ||
| 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 23 | * Boston, MA 021110-1307, USA. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/types.h> | ||
| 27 | #include <linux/slab.h> | ||
| 28 | #include <linux/highmem.h> | ||
| 29 | #include <linux/kthread.h> | ||
| 30 | |||
| 31 | #include <cluster/heartbeat.h> | ||
| 32 | #include <cluster/nodemanager.h> | ||
| 33 | #include <cluster/tcp.h> | ||
| 34 | |||
| 35 | #include <dlm/dlmapi.h> | ||
| 36 | |||
| 37 | #define MLOG_MASK_PREFIX ML_VOTE | ||
| 38 | #include <cluster/masklog.h> | ||
| 39 | |||
| 40 | #include "ocfs2.h" | ||
| 41 | |||
| 42 | #include "alloc.h" | ||
| 43 | #include "dlmglue.h" | ||
| 44 | #include "extent_map.h" | ||
| 45 | #include "heartbeat.h" | ||
| 46 | #include "inode.h" | ||
| 47 | #include "journal.h" | ||
| 48 | #include "slot_map.h" | ||
| 49 | #include "vote.h" | ||
| 50 | |||
| 51 | #include "buffer_head_io.h" | ||
| 52 | |||
| 53 | #define OCFS2_MESSAGE_TYPE_VOTE (0x1) | ||
| 54 | #define OCFS2_MESSAGE_TYPE_RESPONSE (0x2) | ||
| 55 | struct ocfs2_msg_hdr | ||
| 56 | { | ||
| 57 | __be32 h_response_id; /* used to lookup message handle on sending | ||
| 58 | * node. */ | ||
| 59 | __be32 h_request; | ||
| 60 | __be64 h_blkno; | ||
| 61 | __be32 h_generation; | ||
| 62 | __be32 h_node_num; /* node sending this particular message. */ | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct ocfs2_vote_msg | ||
| 66 | { | ||
| 67 | struct ocfs2_msg_hdr v_hdr; | ||
| 68 | __be32 v_reserved1; | ||
| 69 | } __attribute__ ((packed)); | ||
| 70 | |||
| 71 | /* Responses are given these values to maintain backwards | ||
| 72 | * compatibility with older ocfs2 versions */ | ||
| 73 | #define OCFS2_RESPONSE_OK (0) | ||
| 74 | #define OCFS2_RESPONSE_BUSY (-16) | ||
| 75 | #define OCFS2_RESPONSE_BAD_MSG (-22) | ||
| 76 | |||
| 77 | struct ocfs2_response_msg | ||
| 78 | { | ||
| 79 | struct ocfs2_msg_hdr r_hdr; | ||
| 80 | __be32 r_response; | ||
| 81 | } __attribute__ ((packed)); | ||
| 82 | |||
| 83 | struct ocfs2_vote_work { | ||
| 84 | struct list_head w_list; | ||
| 85 | struct ocfs2_vote_msg w_msg; | ||
| 86 | }; | ||
| 87 | |||
| 88 | enum ocfs2_vote_request { | ||
| 89 | OCFS2_VOTE_REQ_INVALID = 0, | ||
| 90 | OCFS2_VOTE_REQ_MOUNT, | ||
| 91 | OCFS2_VOTE_REQ_UMOUNT, | ||
| 92 | OCFS2_VOTE_REQ_LAST | ||
| 93 | }; | ||
| 94 | |||
| 95 | static inline int ocfs2_is_valid_vote_request(int request) | ||
| 96 | { | ||
| 97 | return OCFS2_VOTE_REQ_INVALID < request && | ||
| 98 | request < OCFS2_VOTE_REQ_LAST; | ||
| 99 | } | ||
| 100 | |||
| 101 | typedef void (*ocfs2_net_response_callback)(void *priv, | ||
| 102 | struct ocfs2_response_msg *resp); | ||
| 103 | struct ocfs2_net_response_cb { | ||
| 104 | ocfs2_net_response_callback rc_cb; | ||
| 105 | void *rc_priv; | ||
| 106 | }; | ||
| 107 | |||
| 108 | struct ocfs2_net_wait_ctxt { | ||
| 109 | struct list_head n_list; | ||
| 110 | u32 n_response_id; | ||
| 111 | wait_queue_head_t n_event; | ||
| 112 | struct ocfs2_node_map n_node_map; | ||
| 113 | int n_response; /* an agreggate response. 0 if | ||
| 114 | * all nodes are go, < 0 on any | ||
| 115 | * negative response from any | ||
| 116 | * node or network error. */ | ||
| 117 | struct ocfs2_net_response_cb *n_callback; | ||
| 118 | }; | ||
| 119 | |||
| 120 | static void ocfs2_process_mount_request(struct ocfs2_super *osb, | ||
| 121 | unsigned int node_num) | ||
| 122 | { | ||
| 123 | mlog(0, "MOUNT vote from node %u\n", node_num); | ||
| 124 | /* The other node only sends us this message when he has an EX | ||
| 125 | * on the superblock, so our recovery threads (if having been | ||
| 126 | * launched) are waiting on it.*/ | ||
| 127 | ocfs2_recovery_map_clear(osb, node_num); | ||
| 128 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, node_num); | ||
| 129 | |||
| 130 | /* We clear the umount map here because a node may have been | ||
| 131 | * previously mounted, safely unmounted but never stopped | ||
| 132 | * heartbeating - in which case we'd have a stale entry. */ | ||
| 133 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
| 134 | } | ||
| 135 | |||
| 136 | static void ocfs2_process_umount_request(struct ocfs2_super *osb, | ||
| 137 | unsigned int node_num) | ||
| 138 | { | ||
| 139 | mlog(0, "UMOUNT vote from node %u\n", node_num); | ||
| 140 | ocfs2_node_map_clear_bit(osb, &osb->mounted_map, node_num); | ||
| 141 | ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num); | ||
| 142 | } | ||
| 143 | |||
| 144 | static void ocfs2_process_vote(struct ocfs2_super *osb, | ||
| 145 | struct ocfs2_vote_msg *msg) | ||
| 146 | { | ||
| 147 | int net_status, vote_response; | ||
| 148 | unsigned int node_num; | ||
| 149 | u64 blkno; | ||
| 150 | enum ocfs2_vote_request request; | ||
| 151 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; | ||
| 152 | struct ocfs2_response_msg response; | ||
| 153 | |||
| 154 | /* decode the network mumbo jumbo into local variables. */ | ||
| 155 | request = be32_to_cpu(hdr->h_request); | ||
| 156 | blkno = be64_to_cpu(hdr->h_blkno); | ||
| 157 | node_num = be32_to_cpu(hdr->h_node_num); | ||
| 158 | |||
| 159 | mlog(0, "processing vote: request = %u, blkno = %llu, node_num = %u\n", | ||
| 160 | request, (unsigned long long)blkno, node_num); | ||
| 161 | |||
| 162 | if (!ocfs2_is_valid_vote_request(request)) { | ||
| 163 | mlog(ML_ERROR, "Invalid vote request %d from node %u\n", | ||
| 164 | request, node_num); | ||
| 165 | vote_response = OCFS2_RESPONSE_BAD_MSG; | ||
| 166 | goto respond; | ||
| 167 | } | ||
| 168 | |||
| 169 | vote_response = OCFS2_RESPONSE_OK; | ||
| 170 | |||
| 171 | switch (request) { | ||
| 172 | case OCFS2_VOTE_REQ_UMOUNT: | ||
| 173 | ocfs2_process_umount_request(osb, node_num); | ||
| 174 | goto respond; | ||
| 175 | case OCFS2_VOTE_REQ_MOUNT: | ||
| 176 | ocfs2_process_mount_request(osb, node_num); | ||
| 177 | goto respond; | ||
| 178 | default: | ||
| 179 | /* avoids a gcc warning */ | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | |||
| 183 | respond: | ||
| 184 | /* Response struture is small so we just put it on the stack | ||
| 185 | * and stuff it inline. */ | ||
| 186 | memset(&response, 0, sizeof(struct ocfs2_response_msg)); | ||
| 187 | response.r_hdr.h_response_id = hdr->h_response_id; | ||
| 188 | response.r_hdr.h_blkno = hdr->h_blkno; | ||
| 189 | response.r_hdr.h_generation = hdr->h_generation; | ||
| 190 | response.r_hdr.h_node_num = cpu_to_be32(osb->node_num); | ||
| 191 | response.r_response = cpu_to_be32(vote_response); | ||
| 192 | |||
| 193 | net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
| 194 | osb->net_key, | ||
| 195 | &response, | ||
| 196 | sizeof(struct ocfs2_response_msg), | ||
| 197 | node_num, | ||
| 198 | NULL); | ||
| 199 | /* We still want to error print for ENOPROTOOPT here. The | ||
| 200 | * sending node shouldn't have unregistered his net handler | ||
| 201 | * without sending an unmount vote 1st */ | ||
| 202 | if (net_status < 0 | ||
| 203 | && net_status != -ETIMEDOUT | ||
| 204 | && net_status != -ENOTCONN) | ||
| 205 | mlog(ML_ERROR, "message to node %u fails with error %d!\n", | ||
| 206 | node_num, net_status); | ||
| 207 | } | ||
| 208 | |||
| 209 | static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb) | ||
| 210 | { | ||
| 211 | unsigned long processed; | ||
| 212 | struct ocfs2_lock_res *lockres; | ||
| 213 | struct ocfs2_vote_work *work; | ||
| 214 | |||
| 215 | mlog_entry_void(); | ||
| 216 | |||
| 217 | spin_lock(&osb->vote_task_lock); | ||
| 218 | /* grab this early so we know to try again if a state change and | ||
| 219 | * wake happens part-way through our work */ | ||
| 220 | osb->vote_work_sequence = osb->vote_wake_sequence; | ||
| 221 | |||
| 222 | processed = osb->blocked_lock_count; | ||
| 223 | while (processed) { | ||
| 224 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
| 225 | |||
| 226 | lockres = list_entry(osb->blocked_lock_list.next, | ||
| 227 | struct ocfs2_lock_res, l_blocked_list); | ||
| 228 | list_del_init(&lockres->l_blocked_list); | ||
| 229 | osb->blocked_lock_count--; | ||
| 230 | spin_unlock(&osb->vote_task_lock); | ||
| 231 | |||
| 232 | BUG_ON(!processed); | ||
| 233 | processed--; | ||
| 234 | |||
| 235 | ocfs2_process_blocked_lock(osb, lockres); | ||
| 236 | |||
| 237 | spin_lock(&osb->vote_task_lock); | ||
| 238 | } | ||
| 239 | |||
| 240 | while (osb->vote_count) { | ||
| 241 | BUG_ON(list_empty(&osb->vote_list)); | ||
| 242 | work = list_entry(osb->vote_list.next, | ||
| 243 | struct ocfs2_vote_work, w_list); | ||
| 244 | list_del(&work->w_list); | ||
| 245 | osb->vote_count--; | ||
| 246 | spin_unlock(&osb->vote_task_lock); | ||
| 247 | |||
| 248 | ocfs2_process_vote(osb, &work->w_msg); | ||
| 249 | kfree(work); | ||
| 250 | |||
| 251 | spin_lock(&osb->vote_task_lock); | ||
| 252 | } | ||
| 253 | spin_unlock(&osb->vote_task_lock); | ||
| 254 | |||
| 255 | mlog_exit_void(); | ||
| 256 | } | ||
| 257 | |||
| 258 | static int ocfs2_vote_thread_lists_empty(struct ocfs2_super *osb) | ||
| 259 | { | ||
| 260 | int empty = 0; | ||
| 261 | |||
| 262 | spin_lock(&osb->vote_task_lock); | ||
| 263 | if (list_empty(&osb->blocked_lock_list) && | ||
| 264 | list_empty(&osb->vote_list)) | ||
| 265 | empty = 1; | ||
| 266 | |||
| 267 | spin_unlock(&osb->vote_task_lock); | ||
| 268 | return empty; | ||
| 269 | } | ||
| 270 | |||
| 271 | static int ocfs2_vote_thread_should_wake(struct ocfs2_super *osb) | ||
| 272 | { | ||
| 273 | int should_wake = 0; | ||
| 274 | |||
| 275 | spin_lock(&osb->vote_task_lock); | ||
| 276 | if (osb->vote_work_sequence != osb->vote_wake_sequence) | ||
| 277 | should_wake = 1; | ||
| 278 | spin_unlock(&osb->vote_task_lock); | ||
| 279 | |||
| 280 | return should_wake; | ||
| 281 | } | ||
| 282 | |||
| 283 | int ocfs2_vote_thread(void *arg) | ||
| 284 | { | ||
| 285 | int status = 0; | ||
| 286 | struct ocfs2_super *osb = arg; | ||
| 287 | |||
| 288 | /* only quit once we've been asked to stop and there is no more | ||
| 289 | * work available */ | ||
| 290 | while (!(kthread_should_stop() && | ||
| 291 | ocfs2_vote_thread_lists_empty(osb))) { | ||
| 292 | |||
| 293 | wait_event_interruptible(osb->vote_event, | ||
| 294 | ocfs2_vote_thread_should_wake(osb) || | ||
| 295 | kthread_should_stop()); | ||
| 296 | |||
| 297 | mlog(0, "vote_thread: awoken\n"); | ||
| 298 | |||
| 299 | ocfs2_vote_thread_do_work(osb); | ||
| 300 | } | ||
| 301 | |||
| 302 | osb->vote_task = NULL; | ||
| 303 | return status; | ||
| 304 | } | ||
| 305 | |||
| 306 | static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response_id) | ||
| 307 | { | ||
| 308 | struct ocfs2_net_wait_ctxt *w; | ||
| 309 | |||
| 310 | w = kzalloc(sizeof(*w), GFP_NOFS); | ||
| 311 | if (!w) { | ||
| 312 | mlog_errno(-ENOMEM); | ||
| 313 | goto bail; | ||
| 314 | } | ||
| 315 | |||
| 316 | INIT_LIST_HEAD(&w->n_list); | ||
| 317 | init_waitqueue_head(&w->n_event); | ||
| 318 | ocfs2_node_map_init(&w->n_node_map); | ||
| 319 | w->n_response_id = response_id; | ||
| 320 | w->n_callback = NULL; | ||
| 321 | bail: | ||
| 322 | return w; | ||
| 323 | } | ||
| 324 | |||
| 325 | static unsigned int ocfs2_new_response_id(struct ocfs2_super *osb) | ||
| 326 | { | ||
| 327 | unsigned int ret; | ||
| 328 | |||
| 329 | spin_lock(&osb->net_response_lock); | ||
| 330 | ret = ++osb->net_response_ids; | ||
| 331 | spin_unlock(&osb->net_response_lock); | ||
| 332 | |||
| 333 | return ret; | ||
| 334 | } | ||
| 335 | |||
| 336 | static void ocfs2_dequeue_net_wait_ctxt(struct ocfs2_super *osb, | ||
| 337 | struct ocfs2_net_wait_ctxt *w) | ||
| 338 | { | ||
| 339 | spin_lock(&osb->net_response_lock); | ||
| 340 | list_del(&w->n_list); | ||
| 341 | spin_unlock(&osb->net_response_lock); | ||
| 342 | } | ||
| 343 | |||
| 344 | static void ocfs2_queue_net_wait_ctxt(struct ocfs2_super *osb, | ||
| 345 | struct ocfs2_net_wait_ctxt *w) | ||
| 346 | { | ||
| 347 | spin_lock(&osb->net_response_lock); | ||
| 348 | list_add_tail(&w->n_list, | ||
| 349 | &osb->net_response_list); | ||
| 350 | spin_unlock(&osb->net_response_lock); | ||
| 351 | } | ||
| 352 | |||
| 353 | static void __ocfs2_mark_node_responded(struct ocfs2_super *osb, | ||
| 354 | struct ocfs2_net_wait_ctxt *w, | ||
| 355 | int node_num) | ||
| 356 | { | ||
| 357 | assert_spin_locked(&osb->net_response_lock); | ||
| 358 | |||
| 359 | ocfs2_node_map_clear_bit(osb, &w->n_node_map, node_num); | ||
| 360 | if (ocfs2_node_map_is_empty(osb, &w->n_node_map)) | ||
| 361 | wake_up(&w->n_event); | ||
| 362 | } | ||
| 363 | |||
| 364 | /* Intended to be called from the node down callback, we fake remove | ||
| 365 | * the node from all our response contexts */ | ||
| 366 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
| 367 | int node_num) | ||
| 368 | { | ||
| 369 | struct list_head *p; | ||
| 370 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
| 371 | |||
| 372 | spin_lock(&osb->net_response_lock); | ||
| 373 | |||
| 374 | list_for_each(p, &osb->net_response_list) { | ||
| 375 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
| 376 | |||
| 377 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
| 378 | } | ||
| 379 | |||
| 380 | spin_unlock(&osb->net_response_lock); | ||
| 381 | } | ||
| 382 | |||
| 383 | static int ocfs2_broadcast_vote(struct ocfs2_super *osb, | ||
| 384 | struct ocfs2_vote_msg *request, | ||
| 385 | unsigned int response_id, | ||
| 386 | int *response, | ||
| 387 | struct ocfs2_net_response_cb *callback) | ||
| 388 | { | ||
| 389 | int status, i, remote_err; | ||
| 390 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
| 391 | int dequeued = 0; | ||
| 392 | |||
| 393 | mlog_entry_void(); | ||
| 394 | |||
| 395 | w = ocfs2_new_net_wait_ctxt(response_id); | ||
| 396 | if (!w) { | ||
| 397 | status = -ENOMEM; | ||
| 398 | mlog_errno(status); | ||
| 399 | goto bail; | ||
| 400 | } | ||
| 401 | w->n_callback = callback; | ||
| 402 | |||
| 403 | /* we're pretty much ready to go at this point, and this fills | ||
| 404 | * in n_response which we need anyway... */ | ||
| 405 | ocfs2_queue_net_wait_ctxt(osb, w); | ||
| 406 | |||
| 407 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, 0); | ||
| 408 | |||
| 409 | while (i != O2NM_INVALID_NODE_NUM) { | ||
| 410 | if (i != osb->node_num) { | ||
| 411 | mlog(0, "trying to send request to node %i\n", i); | ||
| 412 | ocfs2_node_map_set_bit(osb, &w->n_node_map, i); | ||
| 413 | |||
| 414 | remote_err = 0; | ||
| 415 | status = o2net_send_message(OCFS2_MESSAGE_TYPE_VOTE, | ||
| 416 | osb->net_key, | ||
| 417 | request, | ||
| 418 | sizeof(*request), | ||
| 419 | i, | ||
| 420 | &remote_err); | ||
| 421 | if (status == -ETIMEDOUT) { | ||
| 422 | mlog(0, "remote node %d timed out!\n", i); | ||
| 423 | status = -EAGAIN; | ||
| 424 | goto bail; | ||
| 425 | } | ||
| 426 | if (remote_err < 0) { | ||
| 427 | status = remote_err; | ||
| 428 | mlog(0, "remote error %d on node %d!\n", | ||
| 429 | remote_err, i); | ||
| 430 | mlog_errno(status); | ||
| 431 | goto bail; | ||
| 432 | } | ||
| 433 | if (status < 0) { | ||
| 434 | mlog_errno(status); | ||
| 435 | goto bail; | ||
| 436 | } | ||
| 437 | } | ||
| 438 | i++; | ||
| 439 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, i); | ||
| 440 | mlog(0, "next is %d, i am %d\n", i, osb->node_num); | ||
| 441 | } | ||
| 442 | mlog(0, "done sending, now waiting on responses...\n"); | ||
| 443 | |||
| 444 | wait_event(w->n_event, ocfs2_node_map_is_empty(osb, &w->n_node_map)); | ||
| 445 | |||
| 446 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
| 447 | dequeued = 1; | ||
| 448 | |||
| 449 | *response = w->n_response; | ||
| 450 | status = 0; | ||
| 451 | bail: | ||
| 452 | if (w) { | ||
| 453 | if (!dequeued) | ||
| 454 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
| 455 | kfree(w); | ||
| 456 | } | ||
| 457 | |||
| 458 | mlog_exit(status); | ||
| 459 | return status; | ||
| 460 | } | ||
| 461 | |||
| 462 | static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, | ||
| 463 | u64 blkno, | ||
| 464 | unsigned int generation, | ||
| 465 | enum ocfs2_vote_request type) | ||
| 466 | { | ||
| 467 | struct ocfs2_vote_msg *request; | ||
| 468 | struct ocfs2_msg_hdr *hdr; | ||
| 469 | |||
| 470 | BUG_ON(!ocfs2_is_valid_vote_request(type)); | ||
| 471 | |||
| 472 | request = kzalloc(sizeof(*request), GFP_NOFS); | ||
| 473 | if (!request) { | ||
| 474 | mlog_errno(-ENOMEM); | ||
| 475 | } else { | ||
| 476 | hdr = &request->v_hdr; | ||
| 477 | hdr->h_node_num = cpu_to_be32(osb->node_num); | ||
| 478 | hdr->h_request = cpu_to_be32(type); | ||
| 479 | hdr->h_blkno = cpu_to_be64(blkno); | ||
| 480 | hdr->h_generation = cpu_to_be32(generation); | ||
| 481 | } | ||
| 482 | |||
| 483 | return request; | ||
| 484 | } | ||
| 485 | |||
| 486 | /* Complete the buildup of a new vote request and process the | ||
| 487 | * broadcast return value. */ | ||
| 488 | static int ocfs2_do_request_vote(struct ocfs2_super *osb, | ||
| 489 | struct ocfs2_vote_msg *request, | ||
| 490 | struct ocfs2_net_response_cb *callback) | ||
| 491 | { | ||
| 492 | int status, response = -EBUSY; | ||
| 493 | unsigned int response_id; | ||
| 494 | struct ocfs2_msg_hdr *hdr; | ||
| 495 | |||
| 496 | response_id = ocfs2_new_response_id(osb); | ||
| 497 | |||
| 498 | hdr = &request->v_hdr; | ||
| 499 | hdr->h_response_id = cpu_to_be32(response_id); | ||
| 500 | |||
| 501 | status = ocfs2_broadcast_vote(osb, request, response_id, &response, | ||
| 502 | callback); | ||
| 503 | if (status < 0) { | ||
| 504 | mlog_errno(status); | ||
| 505 | goto bail; | ||
| 506 | } | ||
| 507 | |||
| 508 | status = response; | ||
| 509 | bail: | ||
| 510 | |||
| 511 | return status; | ||
| 512 | } | ||
| 513 | |||
| 514 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) | ||
| 515 | { | ||
| 516 | int status; | ||
| 517 | struct ocfs2_vote_msg *request = NULL; | ||
| 518 | |||
| 519 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_MOUNT); | ||
| 520 | if (!request) { | ||
| 521 | status = -ENOMEM; | ||
| 522 | goto bail; | ||
| 523 | } | ||
| 524 | |||
| 525 | status = -EAGAIN; | ||
| 526 | while (status == -EAGAIN) { | ||
| 527 | if (!(osb->s_mount_opt & OCFS2_MOUNT_NOINTR) && | ||
| 528 | signal_pending(current)) { | ||
| 529 | status = -ERESTARTSYS; | ||
| 530 | goto bail; | ||
| 531 | } | ||
| 532 | |||
| 533 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
| 534 | osb->node_num)) { | ||
| 535 | status = 0; | ||
| 536 | goto bail; | ||
| 537 | } | ||
| 538 | |||
| 539 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
| 540 | } | ||
| 541 | |||
| 542 | bail: | ||
| 543 | kfree(request); | ||
| 544 | return status; | ||
| 545 | } | ||
| 546 | |||
| 547 | int ocfs2_request_umount_vote(struct ocfs2_super *osb) | ||
| 548 | { | ||
| 549 | int status; | ||
| 550 | struct ocfs2_vote_msg *request = NULL; | ||
| 551 | |||
| 552 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_UMOUNT); | ||
| 553 | if (!request) { | ||
| 554 | status = -ENOMEM; | ||
| 555 | goto bail; | ||
| 556 | } | ||
| 557 | |||
| 558 | status = -EAGAIN; | ||
| 559 | while (status == -EAGAIN) { | ||
| 560 | /* Do not check signals on this vote... We really want | ||
| 561 | * this one to go all the way through. */ | ||
| 562 | |||
| 563 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
| 564 | osb->node_num)) { | ||
| 565 | status = 0; | ||
| 566 | goto bail; | ||
| 567 | } | ||
| 568 | |||
| 569 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
| 570 | } | ||
| 571 | |||
| 572 | bail: | ||
| 573 | kfree(request); | ||
| 574 | return status; | ||
| 575 | } | ||
| 576 | |||
| 577 | /* TODO: This should eventually be a hash table! */ | ||
| 578 | static struct ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(struct ocfs2_super *osb, | ||
| 579 | u32 response_id) | ||
| 580 | { | ||
| 581 | struct list_head *p; | ||
| 582 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
| 583 | |||
| 584 | list_for_each(p, &osb->net_response_list) { | ||
| 585 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
| 586 | if (response_id == w->n_response_id) | ||
| 587 | break; | ||
| 588 | w = NULL; | ||
| 589 | } | ||
| 590 | |||
| 591 | return w; | ||
| 592 | } | ||
| 593 | |||
| 594 | /* Translate response codes into local node errno values */ | ||
| 595 | static inline int ocfs2_translate_response(int response) | ||
| 596 | { | ||
| 597 | int ret; | ||
| 598 | |||
| 599 | switch (response) { | ||
| 600 | case OCFS2_RESPONSE_OK: | ||
| 601 | ret = 0; | ||
| 602 | break; | ||
| 603 | |||
| 604 | case OCFS2_RESPONSE_BUSY: | ||
| 605 | ret = -EBUSY; | ||
| 606 | break; | ||
| 607 | |||
| 608 | default: | ||
| 609 | ret = -EINVAL; | ||
| 610 | } | ||
| 611 | |||
| 612 | return ret; | ||
| 613 | } | ||
| 614 | |||
| 615 | static int ocfs2_handle_response_message(struct o2net_msg *msg, | ||
| 616 | u32 len, | ||
| 617 | void *data, void **ret_data) | ||
| 618 | { | ||
| 619 | unsigned int response_id, node_num; | ||
| 620 | int response_status; | ||
| 621 | struct ocfs2_super *osb = data; | ||
| 622 | struct ocfs2_response_msg *resp; | ||
| 623 | struct ocfs2_net_wait_ctxt * w; | ||
| 624 | struct ocfs2_net_response_cb *resp_cb; | ||
| 625 | |||
| 626 | resp = (struct ocfs2_response_msg *) msg->buf; | ||
| 627 | |||
| 628 | response_id = be32_to_cpu(resp->r_hdr.h_response_id); | ||
| 629 | node_num = be32_to_cpu(resp->r_hdr.h_node_num); | ||
| 630 | response_status = | ||
| 631 | ocfs2_translate_response(be32_to_cpu(resp->r_response)); | ||
| 632 | |||
| 633 | mlog(0, "received response message:\n"); | ||
| 634 | mlog(0, "h_response_id = %u\n", response_id); | ||
| 635 | mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request)); | ||
| 636 | mlog(0, "h_blkno = %llu\n", | ||
| 637 | (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno)); | ||
| 638 | mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation)); | ||
| 639 | mlog(0, "h_node_num = %u\n", node_num); | ||
| 640 | mlog(0, "r_response = %d\n", response_status); | ||
| 641 | |||
| 642 | spin_lock(&osb->net_response_lock); | ||
| 643 | w = __ocfs2_find_net_wait_ctxt(osb, response_id); | ||
| 644 | if (!w) { | ||
| 645 | mlog(0, "request not found!\n"); | ||
| 646 | goto bail; | ||
| 647 | } | ||
| 648 | resp_cb = w->n_callback; | ||
| 649 | |||
| 650 | if (response_status && (!w->n_response)) { | ||
| 651 | /* we only really need one negative response so don't | ||
| 652 | * set it twice. */ | ||
| 653 | w->n_response = response_status; | ||
| 654 | } | ||
| 655 | |||
| 656 | if (resp_cb) { | ||
| 657 | spin_unlock(&osb->net_response_lock); | ||
| 658 | |||
| 659 | resp_cb->rc_cb(resp_cb->rc_priv, resp); | ||
| 660 | |||
| 661 | spin_lock(&osb->net_response_lock); | ||
| 662 | } | ||
| 663 | |||
| 664 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
| 665 | bail: | ||
| 666 | spin_unlock(&osb->net_response_lock); | ||
| 667 | |||
| 668 | return 0; | ||
| 669 | } | ||
| 670 | |||
| 671 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, | ||
| 672 | u32 len, | ||
| 673 | void *data, void **ret_data) | ||
| 674 | { | ||
| 675 | int status; | ||
| 676 | struct ocfs2_super *osb = data; | ||
| 677 | struct ocfs2_vote_work *work; | ||
| 678 | |||
| 679 | work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); | ||
| 680 | if (!work) { | ||
| 681 | status = -ENOMEM; | ||
| 682 | mlog_errno(status); | ||
| 683 | goto bail; | ||
| 684 | } | ||
| 685 | |||
| 686 | INIT_LIST_HEAD(&work->w_list); | ||
| 687 | memcpy(&work->w_msg, msg->buf, sizeof(struct ocfs2_vote_msg)); | ||
| 688 | |||
| 689 | mlog(0, "scheduling vote request:\n"); | ||
| 690 | mlog(0, "h_response_id = %u\n", | ||
| 691 | be32_to_cpu(work->w_msg.v_hdr.h_response_id)); | ||
| 692 | mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request)); | ||
| 693 | mlog(0, "h_blkno = %llu\n", | ||
| 694 | (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno)); | ||
| 695 | mlog(0, "h_generation = %u\n", | ||
| 696 | be32_to_cpu(work->w_msg.v_hdr.h_generation)); | ||
| 697 | mlog(0, "h_node_num = %u\n", | ||
| 698 | be32_to_cpu(work->w_msg.v_hdr.h_node_num)); | ||
| 699 | |||
| 700 | spin_lock(&osb->vote_task_lock); | ||
| 701 | list_add_tail(&work->w_list, &osb->vote_list); | ||
| 702 | osb->vote_count++; | ||
| 703 | spin_unlock(&osb->vote_task_lock); | ||
| 704 | |||
| 705 | ocfs2_kick_vote_thread(osb); | ||
| 706 | |||
| 707 | status = 0; | ||
| 708 | bail: | ||
| 709 | return status; | ||
| 710 | } | ||
| 711 | |||
| 712 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb) | ||
| 713 | { | ||
| 714 | if (!osb->net_key) | ||
| 715 | return; | ||
| 716 | |||
| 717 | o2net_unregister_handler_list(&osb->osb_net_handlers); | ||
| 718 | |||
| 719 | if (!list_empty(&osb->net_response_list)) | ||
| 720 | mlog(ML_ERROR, "net response list not empty!\n"); | ||
| 721 | |||
| 722 | osb->net_key = 0; | ||
| 723 | } | ||
| 724 | |||
| 725 | int ocfs2_register_net_handlers(struct ocfs2_super *osb) | ||
| 726 | { | ||
| 727 | int status = 0; | ||
| 728 | |||
| 729 | if (ocfs2_mount_local(osb)) | ||
| 730 | return 0; | ||
| 731 | |||
| 732 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
| 733 | osb->net_key, | ||
| 734 | sizeof(struct ocfs2_response_msg), | ||
| 735 | ocfs2_handle_response_message, | ||
| 736 | osb, NULL, &osb->osb_net_handlers); | ||
| 737 | if (status) { | ||
| 738 | mlog_errno(status); | ||
| 739 | goto bail; | ||
| 740 | } | ||
| 741 | |||
| 742 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_VOTE, | ||
| 743 | osb->net_key, | ||
| 744 | sizeof(struct ocfs2_vote_msg), | ||
| 745 | ocfs2_handle_vote_message, | ||
| 746 | osb, NULL, &osb->osb_net_handlers); | ||
| 747 | if (status) { | ||
| 748 | mlog_errno(status); | ||
| 749 | goto bail; | ||
| 750 | } | ||
| 751 | bail: | ||
| 752 | if (status < 0) | ||
| 753 | ocfs2_unregister_net_handlers(osb); | ||
| 754 | |||
| 755 | return status; | ||
| 756 | } | ||
| diff --git a/include/linux/Kbuild b/include/linux/Kbuild index f30fa92a44a1..bd694f779346 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild | |||
| @@ -49,6 +49,7 @@ header-y += comstats.h | |||
| 49 | header-y += const.h | 49 | header-y += const.h | 
| 50 | header-y += cgroupstats.h | 50 | header-y += cgroupstats.h | 
| 51 | header-y += cycx_cfm.h | 51 | header-y += cycx_cfm.h | 
| 52 | header-y += dlmconstants.h | ||
| 52 | header-y += dlm_device.h | 53 | header-y += dlm_device.h | 
| 53 | header-y += dlm_netlink.h | 54 | header-y += dlm_netlink.h | 
| 54 | header-y += dm-ioctl.h | 55 | header-y += dm-ioctl.h | 
| diff --git a/include/linux/dlm.h b/include/linux/dlm.h index be9d278761e0..c743fbc769db 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h | |||
| @@ -19,148 +19,12 @@ | |||
| 19 | * routines and structures to use DLM lockspaces | 19 | * routines and structures to use DLM lockspaces | 
| 20 | */ | 20 | */ | 
| 21 | 21 | ||
| 22 | /* | 22 | /* Lock levels and flags are here */ | 
| 23 | * Lock Modes | 23 | #include <linux/dlmconstants.h> | 
| 24 | */ | ||
| 25 | 24 | ||
| 26 | #define DLM_LOCK_IV -1 /* invalid */ | ||
| 27 | #define DLM_LOCK_NL 0 /* null */ | ||
| 28 | #define DLM_LOCK_CR 1 /* concurrent read */ | ||
| 29 | #define DLM_LOCK_CW 2 /* concurrent write */ | ||
| 30 | #define DLM_LOCK_PR 3 /* protected read */ | ||
| 31 | #define DLM_LOCK_PW 4 /* protected write */ | ||
| 32 | #define DLM_LOCK_EX 5 /* exclusive */ | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Maximum size in bytes of a dlm_lock name | ||
| 36 | */ | ||
| 37 | 25 | ||
| 38 | #define DLM_RESNAME_MAXLEN 64 | 26 | #define DLM_RESNAME_MAXLEN 64 | 
| 39 | 27 | ||
| 40 | /* | ||
| 41 | * Flags to dlm_lock | ||
| 42 | * | ||
| 43 | * DLM_LKF_NOQUEUE | ||
| 44 | * | ||
| 45 | * Do not queue the lock request on the wait queue if it cannot be granted | ||
| 46 | * immediately. If the lock cannot be granted because of this flag, DLM will | ||
| 47 | * either return -EAGAIN from the dlm_lock call or will return 0 from | ||
| 48 | * dlm_lock and -EAGAIN in the lock status block when the AST is executed. | ||
| 49 | * | ||
| 50 | * DLM_LKF_CANCEL | ||
| 51 | * | ||
| 52 | * Used to cancel a pending lock request or conversion. A converting lock is | ||
| 53 | * returned to its previously granted mode. | ||
| 54 | * | ||
| 55 | * DLM_LKF_CONVERT | ||
| 56 | * | ||
| 57 | * Indicates a lock conversion request. For conversions the name and namelen | ||
| 58 | * are ignored and the lock ID in the LKSB is used to identify the lock. | ||
| 59 | * | ||
| 60 | * DLM_LKF_VALBLK | ||
| 61 | * | ||
| 62 | * Requests DLM to return the current contents of the lock value block in the | ||
| 63 | * lock status block. When this flag is set in a lock conversion from PW or EX | ||
| 64 | * modes, DLM assigns the value specified in the lock status block to the lock | ||
| 65 | * value block of the lock resource. The LVB is a DLM_LVB_LEN size array | ||
| 66 | * containing application-specific information. | ||
| 67 | * | ||
| 68 | * DLM_LKF_QUECVT | ||
| 69 | * | ||
| 70 | * Force a conversion request to be queued, even if it is compatible with | ||
| 71 | * the granted modes of other locks on the same resource. | ||
| 72 | * | ||
| 73 | * DLM_LKF_IVVALBLK | ||
| 74 | * | ||
| 75 | * Invalidate the lock value block. | ||
| 76 | * | ||
| 77 | * DLM_LKF_CONVDEADLK | ||
| 78 | * | ||
| 79 | * Allows the dlm to resolve conversion deadlocks internally by demoting the | ||
| 80 | * granted mode of a converting lock to NL. The DLM_SBF_DEMOTED flag is | ||
| 81 | * returned for a conversion that's been effected by this. | ||
| 82 | * | ||
| 83 | * DLM_LKF_PERSISTENT | ||
| 84 | * | ||
| 85 | * Only relevant to locks originating in userspace. A persistent lock will not | ||
| 86 | * be removed if the process holding the lock exits. | ||
| 87 | * | ||
| 88 | * DLM_LKF_NODLCKWT | ||
| 89 | * | ||
| 90 | * Do not cancel the lock if it gets into conversion deadlock. | ||
| 91 | * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN. | ||
| 92 | * | ||
| 93 | * DLM_LKF_NODLCKBLK | ||
| 94 | * | ||
| 95 | * net yet implemented | ||
| 96 | * | ||
| 97 | * DLM_LKF_EXPEDITE | ||
| 98 | * | ||
| 99 | * Used only with new requests for NL mode locks. Tells the lock manager | ||
| 100 | * to grant the lock, ignoring other locks in convert and wait queues. | ||
| 101 | * | ||
| 102 | * DLM_LKF_NOQUEUEBAST | ||
| 103 | * | ||
| 104 | * Send blocking AST's before returning -EAGAIN to the caller. It is only | ||
| 105 | * used along with the NOQUEUE flag. Blocking AST's are not sent for failed | ||
| 106 | * NOQUEUE requests otherwise. | ||
| 107 | * | ||
| 108 | * DLM_LKF_HEADQUE | ||
| 109 | * | ||
| 110 | * Add a lock to the head of the convert or wait queue rather than the tail. | ||
| 111 | * | ||
| 112 | * DLM_LKF_NOORDER | ||
| 113 | * | ||
| 114 | * Disregard the standard grant order rules and grant a lock as soon as it | ||
| 115 | * is compatible with other granted locks. | ||
| 116 | * | ||
| 117 | * DLM_LKF_ORPHAN | ||
| 118 | * | ||
| 119 | * not yet implemented | ||
| 120 | * | ||
| 121 | * DLM_LKF_ALTPR | ||
| 122 | * | ||
| 123 | * If the requested mode cannot be granted immediately, try to grant the lock | ||
| 124 | * in PR mode instead. If this alternate mode is granted instead of the | ||
| 125 | * requested mode, DLM_SBF_ALTMODE is returned in the lksb. | ||
| 126 | * | ||
| 127 | * DLM_LKF_ALTCW | ||
| 128 | * | ||
| 129 | * The same as ALTPR, but the alternate mode is CW. | ||
| 130 | * | ||
| 131 | * DLM_LKF_FORCEUNLOCK | ||
| 132 | * | ||
| 133 | * Unlock the lock even if it is converting or waiting or has sublocks. | ||
| 134 | * Only really for use by the userland device.c code. | ||
| 135 | * | ||
| 136 | */ | ||
| 137 | |||
| 138 | #define DLM_LKF_NOQUEUE 0x00000001 | ||
| 139 | #define DLM_LKF_CANCEL 0x00000002 | ||
| 140 | #define DLM_LKF_CONVERT 0x00000004 | ||
| 141 | #define DLM_LKF_VALBLK 0x00000008 | ||
| 142 | #define DLM_LKF_QUECVT 0x00000010 | ||
| 143 | #define DLM_LKF_IVVALBLK 0x00000020 | ||
| 144 | #define DLM_LKF_CONVDEADLK 0x00000040 | ||
| 145 | #define DLM_LKF_PERSISTENT 0x00000080 | ||
| 146 | #define DLM_LKF_NODLCKWT 0x00000100 | ||
| 147 | #define DLM_LKF_NODLCKBLK 0x00000200 | ||
| 148 | #define DLM_LKF_EXPEDITE 0x00000400 | ||
| 149 | #define DLM_LKF_NOQUEUEBAST 0x00000800 | ||
| 150 | #define DLM_LKF_HEADQUE 0x00001000 | ||
| 151 | #define DLM_LKF_NOORDER 0x00002000 | ||
| 152 | #define DLM_LKF_ORPHAN 0x00004000 | ||
| 153 | #define DLM_LKF_ALTPR 0x00008000 | ||
| 154 | #define DLM_LKF_ALTCW 0x00010000 | ||
| 155 | #define DLM_LKF_FORCEUNLOCK 0x00020000 | ||
| 156 | #define DLM_LKF_TIMEOUT 0x00040000 | ||
| 157 | |||
| 158 | /* | ||
| 159 | * Some return codes that are not in errno.h | ||
| 160 | */ | ||
| 161 | |||
| 162 | #define DLM_ECANCEL 0x10001 | ||
| 163 | #define DLM_EUNLOCK 0x10002 | ||
| 164 | 28 | ||
| 165 | typedef void dlm_lockspace_t; | 29 | typedef void dlm_lockspace_t; | 
| 166 | 30 | ||
| diff --git a/include/linux/dlmconstants.h b/include/linux/dlmconstants.h new file mode 100644 index 000000000000..fddb3d3ff321 --- /dev/null +++ b/include/linux/dlmconstants.h | |||
| @@ -0,0 +1,159 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __DLMCONSTANTS_DOT_H__ | ||
| 15 | #define __DLMCONSTANTS_DOT_H__ | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Constants used by DLM interface. | ||
| 19 | */ | ||
| 20 | |||
| 21 | /* | ||
| 22 | * Lock Modes | ||
| 23 | */ | ||
| 24 | |||
| 25 | #define DLM_LOCK_IV (-1) /* invalid */ | ||
| 26 | #define DLM_LOCK_NL 0 /* null */ | ||
| 27 | #define DLM_LOCK_CR 1 /* concurrent read */ | ||
| 28 | #define DLM_LOCK_CW 2 /* concurrent write */ | ||
| 29 | #define DLM_LOCK_PR 3 /* protected read */ | ||
| 30 | #define DLM_LOCK_PW 4 /* protected write */ | ||
| 31 | #define DLM_LOCK_EX 5 /* exclusive */ | ||
| 32 | |||
| 33 | |||
| 34 | /* | ||
| 35 | * Flags to dlm_lock | ||
| 36 | * | ||
| 37 | * DLM_LKF_NOQUEUE | ||
| 38 | * | ||
| 39 | * Do not queue the lock request on the wait queue if it cannot be granted | ||
| 40 | * immediately. If the lock cannot be granted because of this flag, DLM will | ||
| 41 | * either return -EAGAIN from the dlm_lock call or will return 0 from | ||
| 42 | * dlm_lock and -EAGAIN in the lock status block when the AST is executed. | ||
| 43 | * | ||
| 44 | * DLM_LKF_CANCEL | ||
| 45 | * | ||
| 46 | * Used to cancel a pending lock request or conversion. A converting lock is | ||
| 47 | * returned to its previously granted mode. | ||
| 48 | * | ||
| 49 | * DLM_LKF_CONVERT | ||
| 50 | * | ||
| 51 | * Indicates a lock conversion request. For conversions the name and namelen | ||
| 52 | * are ignored and the lock ID in the LKSB is used to identify the lock. | ||
| 53 | * | ||
| 54 | * DLM_LKF_VALBLK | ||
| 55 | * | ||
| 56 | * Requests DLM to return the current contents of the lock value block in the | ||
| 57 | * lock status block. When this flag is set in a lock conversion from PW or EX | ||
| 58 | * modes, DLM assigns the value specified in the lock status block to the lock | ||
| 59 | * value block of the lock resource. The LVB is a DLM_LVB_LEN size array | ||
| 60 | * containing application-specific information. | ||
| 61 | * | ||
| 62 | * DLM_LKF_QUECVT | ||
| 63 | * | ||
| 64 | * Force a conversion request to be queued, even if it is compatible with | ||
| 65 | * the granted modes of other locks on the same resource. | ||
| 66 | * | ||
| 67 | * DLM_LKF_IVVALBLK | ||
| 68 | * | ||
| 69 | * Invalidate the lock value block. | ||
| 70 | * | ||
| 71 | * DLM_LKF_CONVDEADLK | ||
| 72 | * | ||
| 73 | * Allows the dlm to resolve conversion deadlocks internally by demoting the | ||
| 74 | * granted mode of a converting lock to NL. The DLM_SBF_DEMOTED flag is | ||
| 75 | * returned for a conversion that's been effected by this. | ||
| 76 | * | ||
| 77 | * DLM_LKF_PERSISTENT | ||
| 78 | * | ||
| 79 | * Only relevant to locks originating in userspace. A persistent lock will not | ||
| 80 | * be removed if the process holding the lock exits. | ||
| 81 | * | ||
| 82 | * DLM_LKF_NODLCKWT | ||
| 83 | * | ||
| 84 | * Do not cancel the lock if it gets into conversion deadlock. | ||
| 85 | * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN. | ||
| 86 | * | ||
| 87 | * DLM_LKF_NODLCKBLK | ||
| 88 | * | ||
| 89 | * net yet implemented | ||
| 90 | * | ||
| 91 | * DLM_LKF_EXPEDITE | ||
| 92 | * | ||
| 93 | * Used only with new requests for NL mode locks. Tells the lock manager | ||
| 94 | * to grant the lock, ignoring other locks in convert and wait queues. | ||
| 95 | * | ||
| 96 | * DLM_LKF_NOQUEUEBAST | ||
| 97 | * | ||
| 98 | * Send blocking AST's before returning -EAGAIN to the caller. It is only | ||
| 99 | * used along with the NOQUEUE flag. Blocking AST's are not sent for failed | ||
| 100 | * NOQUEUE requests otherwise. | ||
| 101 | * | ||
| 102 | * DLM_LKF_HEADQUE | ||
| 103 | * | ||
| 104 | * Add a lock to the head of the convert or wait queue rather than the tail. | ||
| 105 | * | ||
| 106 | * DLM_LKF_NOORDER | ||
| 107 | * | ||
| 108 | * Disregard the standard grant order rules and grant a lock as soon as it | ||
| 109 | * is compatible with other granted locks. | ||
| 110 | * | ||
| 111 | * DLM_LKF_ORPHAN | ||
| 112 | * | ||
| 113 | * not yet implemented | ||
| 114 | * | ||
| 115 | * DLM_LKF_ALTPR | ||
| 116 | * | ||
| 117 | * If the requested mode cannot be granted immediately, try to grant the lock | ||
| 118 | * in PR mode instead. If this alternate mode is granted instead of the | ||
| 119 | * requested mode, DLM_SBF_ALTMODE is returned in the lksb. | ||
| 120 | * | ||
| 121 | * DLM_LKF_ALTCW | ||
| 122 | * | ||
| 123 | * The same as ALTPR, but the alternate mode is CW. | ||
| 124 | * | ||
| 125 | * DLM_LKF_FORCEUNLOCK | ||
| 126 | * | ||
| 127 | * Unlock the lock even if it is converting or waiting or has sublocks. | ||
| 128 | * Only really for use by the userland device.c code. | ||
| 129 | * | ||
| 130 | */ | ||
| 131 | |||
| 132 | #define DLM_LKF_NOQUEUE 0x00000001 | ||
| 133 | #define DLM_LKF_CANCEL 0x00000002 | ||
| 134 | #define DLM_LKF_CONVERT 0x00000004 | ||
| 135 | #define DLM_LKF_VALBLK 0x00000008 | ||
| 136 | #define DLM_LKF_QUECVT 0x00000010 | ||
| 137 | #define DLM_LKF_IVVALBLK 0x00000020 | ||
| 138 | #define DLM_LKF_CONVDEADLK 0x00000040 | ||
| 139 | #define DLM_LKF_PERSISTENT 0x00000080 | ||
| 140 | #define DLM_LKF_NODLCKWT 0x00000100 | ||
| 141 | #define DLM_LKF_NODLCKBLK 0x00000200 | ||
| 142 | #define DLM_LKF_EXPEDITE 0x00000400 | ||
| 143 | #define DLM_LKF_NOQUEUEBAST 0x00000800 | ||
| 144 | #define DLM_LKF_HEADQUE 0x00001000 | ||
| 145 | #define DLM_LKF_NOORDER 0x00002000 | ||
| 146 | #define DLM_LKF_ORPHAN 0x00004000 | ||
| 147 | #define DLM_LKF_ALTPR 0x00008000 | ||
| 148 | #define DLM_LKF_ALTCW 0x00010000 | ||
| 149 | #define DLM_LKF_FORCEUNLOCK 0x00020000 | ||
| 150 | #define DLM_LKF_TIMEOUT 0x00040000 | ||
| 151 | |||
| 152 | /* | ||
| 153 | * Some return codes that are not in errno.h | ||
| 154 | */ | ||
| 155 | |||
| 156 | #define DLM_ECANCEL 0x10001 | ||
| 157 | #define DLM_EUNLOCK 0x10002 | ||
| 158 | |||
| 159 | #endif /* __DLMCONSTANTS_DOT_H__ */ | ||
