diff options
| -rw-r--r-- | fs/namei.c | 6 | ||||
| -rw-r--r-- | fs/nfs/dir.c | 3 | ||||
| -rw-r--r-- | fs/nfs/super.c | 10 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 8 | ||||
| -rw-r--r-- | fs/ocfs2/dcache.c | 359 | ||||
| -rw-r--r-- | fs/ocfs2/dcache.h | 27 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmapi.h | 1 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmast.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 1 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 10 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 3 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/userdlm.c | 81 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/userdlm.h | 1 | ||||
| -rw-r--r-- | fs/ocfs2/dlmglue.c | 1094 | ||||
| -rw-r--r-- | fs/ocfs2/dlmglue.h | 21 | ||||
| -rw-r--r-- | fs/ocfs2/export.c | 8 | ||||
| -rw-r--r-- | fs/ocfs2/inode.c | 156 | ||||
| -rw-r--r-- | fs/ocfs2/inode.h | 8 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 3 | ||||
| -rw-r--r-- | fs/ocfs2/namei.c | 116 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2_lockid.h | 25 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/sysfile.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/vote.c | 180 | ||||
| -rw-r--r-- | fs/ocfs2/vote.h | 5 | ||||
| -rw-r--r-- | include/linux/fs.h | 7 |
27 files changed, 1245 insertions, 910 deletions
diff --git a/fs/namei.c b/fs/namei.c index 432d6bc6fab..6b591c01b09 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -2370,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
| 2370 | dput(new_dentry); | 2370 | dput(new_dentry); |
| 2371 | } | 2371 | } |
| 2372 | if (!error) | 2372 | if (!error) |
| 2373 | d_move(old_dentry,new_dentry); | 2373 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
| 2374 | d_move(old_dentry,new_dentry); | ||
| 2374 | return error; | 2375 | return error; |
| 2375 | } | 2376 | } |
| 2376 | 2377 | ||
| @@ -2393,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | |||
| 2393 | else | 2394 | else |
| 2394 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 2395 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
| 2395 | if (!error) { | 2396 | if (!error) { |
| 2396 | /* The following d_move() should become unconditional */ | 2397 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
| 2397 | if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) | ||
| 2398 | d_move(old_dentry, new_dentry); | 2398 | d_move(old_dentry, new_dentry); |
| 2399 | } | 2399 | } |
| 2400 | if (target) | 2400 | if (target) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3419c2da9ba..7432f1a43f3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -1669,8 +1669,7 @@ out: | |||
| 1669 | if (rehash) | 1669 | if (rehash) |
| 1670 | d_rehash(rehash); | 1670 | d_rehash(rehash); |
| 1671 | if (!error) { | 1671 | if (!error) { |
| 1672 | if (!S_ISDIR(old_inode->i_mode)) | 1672 | d_move(old_dentry, new_dentry); |
| 1673 | d_move(old_dentry, new_dentry); | ||
| 1674 | nfs_renew_times(new_dentry); | 1673 | nfs_renew_times(new_dentry); |
| 1675 | nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); | 1674 | nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); |
| 1676 | } | 1675 | } |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b99113b0f65..e8d40030cab 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -71,7 +71,7 @@ static struct file_system_type nfs_fs_type = { | |||
| 71 | .name = "nfs", | 71 | .name = "nfs", |
| 72 | .get_sb = nfs_get_sb, | 72 | .get_sb = nfs_get_sb, |
| 73 | .kill_sb = nfs_kill_super, | 73 | .kill_sb = nfs_kill_super, |
| 74 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 74 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
| 75 | }; | 75 | }; |
| 76 | 76 | ||
| 77 | struct file_system_type nfs_xdev_fs_type = { | 77 | struct file_system_type nfs_xdev_fs_type = { |
| @@ -79,7 +79,7 @@ struct file_system_type nfs_xdev_fs_type = { | |||
| 79 | .name = "nfs", | 79 | .name = "nfs", |
| 80 | .get_sb = nfs_xdev_get_sb, | 80 | .get_sb = nfs_xdev_get_sb, |
| 81 | .kill_sb = nfs_kill_super, | 81 | .kill_sb = nfs_kill_super, |
| 82 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 82 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
| 83 | }; | 83 | }; |
| 84 | 84 | ||
| 85 | static struct super_operations nfs_sops = { | 85 | static struct super_operations nfs_sops = { |
| @@ -107,7 +107,7 @@ static struct file_system_type nfs4_fs_type = { | |||
| 107 | .name = "nfs4", | 107 | .name = "nfs4", |
| 108 | .get_sb = nfs4_get_sb, | 108 | .get_sb = nfs4_get_sb, |
| 109 | .kill_sb = nfs4_kill_super, | 109 | .kill_sb = nfs4_kill_super, |
| 110 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 110 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
| 111 | }; | 111 | }; |
| 112 | 112 | ||
| 113 | struct file_system_type nfs4_xdev_fs_type = { | 113 | struct file_system_type nfs4_xdev_fs_type = { |
| @@ -115,7 +115,7 @@ struct file_system_type nfs4_xdev_fs_type = { | |||
| 115 | .name = "nfs4", | 115 | .name = "nfs4", |
| 116 | .get_sb = nfs4_xdev_get_sb, | 116 | .get_sb = nfs4_xdev_get_sb, |
| 117 | .kill_sb = nfs4_kill_super, | 117 | .kill_sb = nfs4_kill_super, |
| 118 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 118 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
| 119 | }; | 119 | }; |
| 120 | 120 | ||
| 121 | struct file_system_type nfs4_referral_fs_type = { | 121 | struct file_system_type nfs4_referral_fs_type = { |
| @@ -123,7 +123,7 @@ struct file_system_type nfs4_referral_fs_type = { | |||
| 123 | .name = "nfs4", | 123 | .name = "nfs4", |
| 124 | .get_sb = nfs4_referral_get_sb, | 124 | .get_sb = nfs4_referral_get_sb, |
| 125 | .kill_sb = nfs4_kill_super, | 125 | .kill_sb = nfs4_kill_super, |
| 126 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 126 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
| 127 | }; | 127 | }; |
| 128 | 128 | ||
| 129 | static struct super_operations nfs4_sops = { | 129 | static struct super_operations nfs4_sops = { |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index ff9e2e2104c..4b46aac7d24 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
| @@ -44,11 +44,17 @@ | |||
| 44 | * locking semantics of the file system using the protocol. It should | 44 | * locking semantics of the file system using the protocol. It should |
| 45 | * be somewhere else, I'm sure, but right now it isn't. | 45 | * be somewhere else, I'm sure, but right now it isn't. |
| 46 | * | 46 | * |
| 47 | * New in version 4: | ||
| 48 | * - Remove i_generation from lock names for better stat performance. | ||
| 49 | * | ||
| 50 | * New in version 3: | ||
| 51 | * - Replace dentry votes with a cluster lock | ||
| 52 | * | ||
| 47 | * New in version 2: | 53 | * New in version 2: |
| 48 | * - full 64 bit i_size in the metadata lock lvbs | 54 | * - full 64 bit i_size in the metadata lock lvbs |
| 49 | * - introduction of "rw" lock and pushing meta/data locking down | 55 | * - introduction of "rw" lock and pushing meta/data locking down |
| 50 | */ | 56 | */ |
| 51 | #define O2NET_PROTOCOL_VERSION 2ULL | 57 | #define O2NET_PROTOCOL_VERSION 4ULL |
| 52 | struct o2net_handshake { | 58 | struct o2net_handshake { |
| 53 | __be64 protocol_version; | 59 | __be64 protocol_version; |
| 54 | __be64 connector_id; | 60 | __be64 connector_id; |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 1a01380e387..014e73978da 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
| @@ -35,15 +35,17 @@ | |||
| 35 | 35 | ||
| 36 | #include "alloc.h" | 36 | #include "alloc.h" |
| 37 | #include "dcache.h" | 37 | #include "dcache.h" |
| 38 | #include "dlmglue.h" | ||
| 38 | #include "file.h" | 39 | #include "file.h" |
| 39 | #include "inode.h" | 40 | #include "inode.h" |
| 40 | 41 | ||
| 42 | |||
| 41 | static int ocfs2_dentry_revalidate(struct dentry *dentry, | 43 | static int ocfs2_dentry_revalidate(struct dentry *dentry, |
| 42 | struct nameidata *nd) | 44 | struct nameidata *nd) |
| 43 | { | 45 | { |
| 44 | struct inode *inode = dentry->d_inode; | 46 | struct inode *inode = dentry->d_inode; |
| 45 | int ret = 0; /* if all else fails, just return false */ | 47 | int ret = 0; /* if all else fails, just return false */ |
| 46 | struct ocfs2_super *osb; | 48 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
| 47 | 49 | ||
| 48 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 50 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
| 49 | dentry->d_name.len, dentry->d_name.name); | 51 | dentry->d_name.len, dentry->d_name.name); |
| @@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
| 55 | goto bail; | 57 | goto bail; |
| 56 | } | 58 | } |
| 57 | 59 | ||
| 58 | osb = OCFS2_SB(inode->i_sb); | ||
| 59 | |||
| 60 | BUG_ON(!osb); | 60 | BUG_ON(!osb); |
| 61 | 61 | ||
| 62 | if (inode != osb->root_inode) { | 62 | if (inode == osb->root_inode || is_bad_inode(inode)) |
| 63 | spin_lock(&OCFS2_I(inode)->ip_lock); | 63 | goto bail; |
| 64 | /* did we or someone else delete this inode? */ | 64 | |
| 65 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { | 65 | spin_lock(&OCFS2_I(inode)->ip_lock); |
| 66 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 66 | /* did we or someone else delete this inode? */ |
| 67 | mlog(0, "inode (%llu) deleted, returning false\n", | 67 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { |
| 68 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 69 | goto bail; | ||
| 70 | } | ||
| 71 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 68 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
| 69 | mlog(0, "inode (%llu) deleted, returning false\n", | ||
| 70 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 71 | goto bail; | ||
| 72 | } | ||
| 73 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
| 72 | 74 | ||
| 73 | if (!inode->i_nlink) { | 75 | /* |
| 74 | mlog(0, "Inode %llu orphaned, returning false " | 76 | * We don't need a cluster lock to test this because once an |
| 75 | "dir = %d\n", | 77 | * inode nlink hits zero, it never goes back. |
| 76 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 78 | */ |
| 77 | S_ISDIR(inode->i_mode)); | 79 | if (inode->i_nlink == 0) { |
| 78 | goto bail; | 80 | mlog(0, "Inode %llu orphaned, returning false " |
| 79 | } | 81 | "dir = %d\n", |
| 82 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 83 | S_ISDIR(inode->i_mode)); | ||
| 84 | goto bail; | ||
| 80 | } | 85 | } |
| 81 | 86 | ||
| 82 | ret = 1; | 87 | ret = 1; |
| @@ -87,6 +92,322 @@ bail: | |||
| 87 | return ret; | 92 | return ret; |
| 88 | } | 93 | } |
| 89 | 94 | ||
| 95 | static int ocfs2_match_dentry(struct dentry *dentry, | ||
| 96 | u64 parent_blkno, | ||
| 97 | int skip_unhashed) | ||
| 98 | { | ||
| 99 | struct inode *parent; | ||
| 100 | |||
| 101 | /* | ||
| 102 | * ocfs2_lookup() does a d_splice_alias() _before_ attaching | ||
| 103 | * to the lock data, so we skip those here, otherwise | ||
| 104 | * ocfs2_dentry_attach_lock() will get its original dentry | ||
| 105 | * back. | ||
| 106 | */ | ||
| 107 | if (!dentry->d_fsdata) | ||
| 108 | return 0; | ||
| 109 | |||
| 110 | if (!dentry->d_parent) | ||
| 111 | return 0; | ||
| 112 | |||
| 113 | if (skip_unhashed && d_unhashed(dentry)) | ||
| 114 | return 0; | ||
| 115 | |||
| 116 | parent = dentry->d_parent->d_inode; | ||
| 117 | /* Negative parent dentry? */ | ||
| 118 | if (!parent) | ||
| 119 | return 0; | ||
| 120 | |||
| 121 | /* Name is in a different directory. */ | ||
| 122 | if (OCFS2_I(parent)->ip_blkno != parent_blkno) | ||
| 123 | return 0; | ||
| 124 | |||
| 125 | return 1; | ||
| 126 | } | ||
| 127 | |||
| 128 | /* | ||
| 129 | * Walk the inode alias list, and find a dentry which has a given | ||
| 130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | ||
| 131 | * is looking for a dentry_lock reference. The vote thread is looking | ||
| 132 | * to unhash aliases, so we allow it to skip any that already have | ||
| 133 | * that property. | ||
| 134 | */ | ||
| 135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | ||
| 136 | u64 parent_blkno, | ||
| 137 | int skip_unhashed) | ||
| 138 | { | ||
| 139 | struct list_head *p; | ||
| 140 | struct dentry *dentry = NULL; | ||
| 141 | |||
| 142 | spin_lock(&dcache_lock); | ||
| 143 | |||
| 144 | list_for_each(p, &inode->i_dentry) { | ||
| 145 | dentry = list_entry(p, struct dentry, d_alias); | ||
| 146 | |||
| 147 | if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { | ||
| 148 | mlog(0, "dentry found: %.*s\n", | ||
| 149 | dentry->d_name.len, dentry->d_name.name); | ||
| 150 | |||
| 151 | dget_locked(dentry); | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | |||
| 155 | dentry = NULL; | ||
| 156 | } | ||
| 157 | |||
| 158 | spin_unlock(&dcache_lock); | ||
| 159 | |||
| 160 | return dentry; | ||
| 161 | } | ||
| 162 | |||
| 163 | DEFINE_SPINLOCK(dentry_attach_lock); | ||
| 164 | |||
| 165 | /* | ||
| 166 | * Attach this dentry to a cluster lock. | ||
| 167 | * | ||
| 168 | * Dentry locks cover all links in a given directory to a particular | ||
| 169 | * inode. We do this so that ocfs2 can build a lock name which all | ||
| 170 | * nodes in the cluster can agree on at all times. Shoving full names | ||
| 171 | * in the cluster lock won't work due to size restrictions. Covering | ||
| 172 | * links inside of a directory is a good compromise because it still | ||
| 173 | * allows us to use the parent directory lock to synchronize | ||
| 174 | * operations. | ||
| 175 | * | ||
| 176 | * Call this function with the parent dir semaphore and the parent dir | ||
| 177 | * cluster lock held. | ||
| 178 | * | ||
| 179 | * The dir semaphore will protect us from having to worry about | ||
| 180 | * concurrent processes on our node trying to attach a lock at the | ||
| 181 | * same time. | ||
| 182 | * | ||
| 183 | * The dir cluster lock (held at either PR or EX mode) protects us | ||
| 184 | * from unlink and rename on other nodes. | ||
| 185 | * | ||
| 186 | * A dput() can happen asynchronously due to pruning, so we cover | ||
| 187 | * attaching and detaching the dentry lock with a | ||
| 188 | * dentry_attach_lock. | ||
| 189 | * | ||
| 190 | * A node which has done lookup on a name retains a protected read | ||
| 191 | * lock until final dput. If the user requests and unlink or rename, | ||
| 192 | * the protected read is upgraded to an exclusive lock. Other nodes | ||
| 193 | * who have seen the dentry will then be informed that they need to | ||
| 194 | * downgrade their lock, which will involve d_delete on the | ||
| 195 | * dentry. This happens in ocfs2_dentry_convert_worker(). | ||
| 196 | */ | ||
| 197 | int ocfs2_dentry_attach_lock(struct dentry *dentry, | ||
| 198 | struct inode *inode, | ||
| 199 | u64 parent_blkno) | ||
| 200 | { | ||
| 201 | int ret; | ||
| 202 | struct dentry *alias; | ||
| 203 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
| 204 | |||
| 205 | mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n", | ||
| 206 | dentry->d_name.len, dentry->d_name.name, | ||
| 207 | (unsigned long long)parent_blkno, dl); | ||
| 208 | |||
| 209 | /* | ||
| 210 | * Negative dentry. We ignore these for now. | ||
| 211 | * | ||
| 212 | * XXX: Could we can improve ocfs2_dentry_revalidate() by | ||
| 213 | * tracking these? | ||
| 214 | */ | ||
| 215 | if (!inode) | ||
| 216 | return 0; | ||
| 217 | |||
| 218 | if (dl) { | ||
| 219 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | ||
| 220 | " \"%.*s\": old parent: %llu, new: %llu\n", | ||
| 221 | dentry->d_name.len, dentry->d_name.name, | ||
| 222 | (unsigned long long)parent_blkno, | ||
| 223 | (unsigned long long)dl->dl_parent_blkno); | ||
| 224 | return 0; | ||
| 225 | } | ||
| 226 | |||
| 227 | alias = ocfs2_find_local_alias(inode, parent_blkno, 0); | ||
| 228 | if (alias) { | ||
| 229 | /* | ||
| 230 | * Great, an alias exists, which means we must have a | ||
| 231 | * dentry lock already. We can just grab the lock off | ||
| 232 | * the alias and add it to the list. | ||
| 233 | * | ||
| 234 | * We're depending here on the fact that this dentry | ||
| 235 | * was found and exists in the dcache and so must have | ||
| 236 | * a reference to the dentry_lock because we can't | ||
| 237 | * race creates. Final dput() cannot happen on it | ||
| 238 | * since we have it pinned, so our reference is safe. | ||
| 239 | */ | ||
| 240 | dl = alias->d_fsdata; | ||
| 241 | mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n", | ||
| 242 | (unsigned long long)parent_blkno, | ||
| 243 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 244 | |||
| 245 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | ||
| 246 | " \"%.*s\": old parent: %llu, new: %llu\n", | ||
| 247 | dentry->d_name.len, dentry->d_name.name, | ||
| 248 | (unsigned long long)parent_blkno, | ||
| 249 | (unsigned long long)dl->dl_parent_blkno); | ||
| 250 | |||
| 251 | mlog(0, "Found: %s\n", dl->dl_lockres.l_name); | ||
| 252 | |||
| 253 | goto out_attach; | ||
| 254 | } | ||
| 255 | |||
| 256 | /* | ||
| 257 | * There are no other aliases | ||
| 258 | */ | ||
| 259 | dl = kmalloc(sizeof(*dl), GFP_NOFS); | ||
| 260 | if (!dl) { | ||
| 261 | ret = -ENOMEM; | ||
| 262 | mlog_errno(ret); | ||
| 263 | return ret; | ||
| 264 | } | ||
| 265 | |||
| 266 | dl->dl_count = 0; | ||
| 267 | /* | ||
| 268 | * Does this have to happen below, for all attaches, in case | ||
| 269 | * the struct inode gets blown away by votes? | ||
| 270 | */ | ||
| 271 | dl->dl_inode = igrab(inode); | ||
| 272 | dl->dl_parent_blkno = parent_blkno; | ||
| 273 | ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); | ||
| 274 | |||
| 275 | out_attach: | ||
| 276 | spin_lock(&dentry_attach_lock); | ||
| 277 | dentry->d_fsdata = dl; | ||
| 278 | dl->dl_count++; | ||
| 279 | spin_unlock(&dentry_attach_lock); | ||
| 280 | |||
| 281 | /* | ||
| 282 | * This actually gets us our PRMODE level lock. From now on, | ||
| 283 | * we'll have a notification if one of these names is | ||
| 284 | * destroyed on another node. | ||
| 285 | */ | ||
| 286 | ret = ocfs2_dentry_lock(dentry, 0); | ||
| 287 | if (!ret) | ||
| 288 | ocfs2_dentry_unlock(dentry, 0); | ||
| 289 | else | ||
| 290 | mlog_errno(ret); | ||
| 291 | |||
| 292 | dput(alias); | ||
| 293 | |||
| 294 | return ret; | ||
| 295 | } | ||
| 296 | |||
| 297 | /* | ||
| 298 | * ocfs2_dentry_iput() and friends. | ||
| 299 | * | ||
| 300 | * At this point, our particular dentry is detached from the inodes | ||
| 301 | * alias list, so there's no way that the locking code can find it. | ||
| 302 | * | ||
| 303 | * The interesting stuff happens when we determine that our lock needs | ||
| 304 | * to go away because this is the last subdir alias in the | ||
| 305 | * system. This function needs to handle a couple things: | ||
| 306 | * | ||
| 307 | * 1) Synchronizing lock shutdown with the downconvert threads. This | ||
| 308 | * is already handled for us via the lockres release drop function | ||
| 309 | * called in ocfs2_release_dentry_lock() | ||
| 310 | * | ||
| 311 | * 2) A race may occur when we're doing our lock shutdown and | ||
| 312 | * another process wants to create a new dentry lock. Right now we | ||
| 313 | * let them race, which means that for a very short while, this | ||
| 314 | * node might have two locks on a lock resource. This should be a | ||
| 315 | * problem though because one of them is in the process of being | ||
| 316 | * thrown out. | ||
| 317 | */ | ||
| 318 | static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, | ||
| 319 | struct ocfs2_dentry_lock *dl) | ||
| 320 | { | ||
| 321 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); | ||
| 322 | ocfs2_lock_res_free(&dl->dl_lockres); | ||
| 323 | iput(dl->dl_inode); | ||
| 324 | kfree(dl); | ||
| 325 | } | ||
| 326 | |||
| 327 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | ||
| 328 | struct ocfs2_dentry_lock *dl) | ||
| 329 | { | ||
| 330 | int unlock = 0; | ||
| 331 | |||
| 332 | BUG_ON(dl->dl_count == 0); | ||
| 333 | |||
| 334 | spin_lock(&dentry_attach_lock); | ||
| 335 | dl->dl_count--; | ||
| 336 | unlock = !dl->dl_count; | ||
| 337 | spin_unlock(&dentry_attach_lock); | ||
| 338 | |||
| 339 | if (unlock) | ||
| 340 | ocfs2_drop_dentry_lock(osb, dl); | ||
| 341 | } | ||
| 342 | |||
| 343 | static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | ||
| 344 | { | ||
| 345 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
| 346 | |||
| 347 | mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED), | ||
| 348 | "dentry: %.*s\n", dentry->d_name.len, | ||
| 349 | dentry->d_name.name); | ||
| 350 | |||
| 351 | if (!dl) | ||
| 352 | goto out; | ||
| 353 | |||
| 354 | mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n", | ||
| 355 | dentry->d_name.len, dentry->d_name.name, | ||
| 356 | dl->dl_count); | ||
| 357 | |||
| 358 | ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); | ||
| 359 | |||
| 360 | out: | ||
| 361 | iput(inode); | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * d_move(), but keep the locks in sync. | ||
| 366 | * | ||
| 367 | * When we are done, "dentry" will have the parent dir and name of | ||
| 368 | * "target", which will be thrown away. | ||
| 369 | * | ||
| 370 | * We manually update the lock of "dentry" if need be. | ||
| 371 | * | ||
| 372 | * "target" doesn't have it's dentry lock touched - we allow the later | ||
| 373 | * dput() to handle this for us. | ||
| 374 | * | ||
| 375 | * This is called during ocfs2_rename(), while holding parent | ||
| 376 | * directory locks. The dentries have already been deleted on other | ||
| 377 | * nodes via ocfs2_remote_dentry_delete(). | ||
| 378 | * | ||
| 379 | * Normally, the VFS handles the d_move() for the file sytem, after | ||
| 380 | * the ->rename() callback. OCFS2 wants to handle this internally, so | ||
| 381 | * the new lock can be created atomically with respect to the cluster. | ||
| 382 | */ | ||
| 383 | void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | ||
| 384 | struct inode *old_dir, struct inode *new_dir) | ||
| 385 | { | ||
| 386 | int ret; | ||
| 387 | struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb); | ||
| 388 | struct inode *inode = dentry->d_inode; | ||
| 389 | |||
| 390 | /* | ||
| 391 | * Move within the same directory, so the actual lock info won't | ||
| 392 | * change. | ||
| 393 | * | ||
| 394 | * XXX: Is there any advantage to dropping the lock here? | ||
| 395 | */ | ||
| 396 | if (old_dir == new_dir) | ||
| 397 | goto out_move; | ||
| 398 | |||
| 399 | ocfs2_dentry_lock_put(osb, dentry->d_fsdata); | ||
| 400 | |||
| 401 | dentry->d_fsdata = NULL; | ||
| 402 | ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno); | ||
| 403 | if (ret) | ||
| 404 | mlog_errno(ret); | ||
| 405 | |||
| 406 | out_move: | ||
| 407 | d_move(dentry, target); | ||
| 408 | } | ||
| 409 | |||
| 90 | struct dentry_operations ocfs2_dentry_ops = { | 410 | struct dentry_operations ocfs2_dentry_ops = { |
| 91 | .d_revalidate = ocfs2_dentry_revalidate, | 411 | .d_revalidate = ocfs2_dentry_revalidate, |
| 412 | .d_iput = ocfs2_dentry_iput, | ||
| 92 | }; | 413 | }; |
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index 90072771114..c091c34d988 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
| @@ -28,4 +28,31 @@ | |||
| 28 | 28 | ||
| 29 | extern struct dentry_operations ocfs2_dentry_ops; | 29 | extern struct dentry_operations ocfs2_dentry_ops; |
| 30 | 30 | ||
| 31 | struct ocfs2_dentry_lock { | ||
| 32 | unsigned int dl_count; | ||
| 33 | u64 dl_parent_blkno; | ||
| 34 | |||
| 35 | /* | ||
| 36 | * The ocfs2_dentry_lock keeps an inode reference until | ||
| 37 | * dl_lockres has been destroyed. This is usually done in | ||
| 38 | * ->d_iput() anyway, so there should be minimal impact. | ||
| 39 | */ | ||
| 40 | struct inode *dl_inode; | ||
| 41 | struct ocfs2_lock_res dl_lockres; | ||
| 42 | }; | ||
| 43 | |||
| 44 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, | ||
| 45 | u64 parent_blkno); | ||
| 46 | |||
| 47 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | ||
| 48 | struct ocfs2_dentry_lock *dl); | ||
| 49 | |||
| 50 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, | ||
| 51 | int skip_unhashed); | ||
| 52 | |||
| 53 | void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | ||
| 54 | struct inode *old_dir, struct inode *new_dir); | ||
| 55 | |||
| 56 | extern spinlock_t dentry_attach_lock; | ||
| 57 | |||
| 31 | #endif /* OCFS2_DCACHE_H */ | 58 | #endif /* OCFS2_DCACHE_H */ |
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index 53652f51c0e..cfd5cb65cab 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h | |||
| @@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, | |||
| 182 | struct dlm_lockstatus *lksb, | 182 | struct dlm_lockstatus *lksb, |
| 183 | int flags, | 183 | int flags, |
| 184 | const char *name, | 184 | const char *name, |
| 185 | int namelen, | ||
| 185 | dlm_astlockfunc_t *ast, | 186 | dlm_astlockfunc_t *ast, |
| 186 | void *data, | 187 | void *data, |
| 187 | dlm_bastlockfunc_t *bast); | 188 | dlm_bastlockfunc_t *bast); |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index f13a4bac41f..681046d5139 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
| @@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
| 320 | 320 | ||
| 321 | res = dlm_lookup_lockres(dlm, name, locklen); | 321 | res = dlm_lookup_lockres(dlm, name, locklen); |
| 322 | if (!res) { | 322 | if (!res) { |
| 323 | mlog(ML_ERROR, "got %sast for unknown lockres! " | 323 | mlog(0, "got %sast for unknown lockres! " |
| 324 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", | 324 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", |
| 325 | past->type == DLM_AST ? "" : "b", | 325 | past->type == DLM_AST ? "" : "b", |
| 326 | dlm_get_lock_cookie_node(cookie), | 326 | dlm_get_lock_cookie_node(cookie), |
| 327 | dlm_get_lock_cookie_seq(cookie), | 327 | dlm_get_lock_cookie_seq(cookie), |
| @@ -462,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
| 462 | mlog(ML_ERROR, "sent AST to node %u, it returned " | 462 | mlog(ML_ERROR, "sent AST to node %u, it returned " |
| 463 | "DLM_MIGRATING!\n", lock->ml.node); | 463 | "DLM_MIGRATING!\n", lock->ml.node); |
| 464 | BUG(); | 464 | BUG(); |
| 465 | } else if (status != DLM_NORMAL) { | 465 | } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) { |
| 466 | mlog(ML_ERROR, "AST to node %u returned %d!\n", | 466 | mlog(ML_ERROR, "AST to node %u returned %d!\n", |
| 467 | lock->ml.node, status); | 467 | lock->ml.node, status); |
| 468 | /* ignore it */ | 468 | /* ignore it */ |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 14530ee7e11..fa968180b07 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm, | |||
| 747 | u8 owner); | 747 | u8 owner); |
| 748 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | 748 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, |
| 749 | const char *lockid, | 749 | const char *lockid, |
| 750 | int namelen, | ||
| 750 | int flags); | 751 | int flags); |
| 751 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | 752 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, |
| 752 | const char *name, | 753 | const char *name, |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 5ca57ec650c..42a1b91979b 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
| @@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie) | |||
| 540 | 540 | ||
| 541 | enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, | 541 | enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, |
| 542 | struct dlm_lockstatus *lksb, int flags, | 542 | struct dlm_lockstatus *lksb, int flags, |
| 543 | const char *name, dlm_astlockfunc_t *ast, void *data, | 543 | const char *name, int namelen, dlm_astlockfunc_t *ast, |
| 544 | dlm_bastlockfunc_t *bast) | 544 | void *data, dlm_bastlockfunc_t *bast) |
| 545 | { | 545 | { |
| 546 | enum dlm_status status; | 546 | enum dlm_status status; |
| 547 | struct dlm_lock_resource *res = NULL; | 547 | struct dlm_lock_resource *res = NULL; |
| @@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, | |||
| 571 | recovery = (flags & LKM_RECOVERY); | 571 | recovery = (flags & LKM_RECOVERY); |
| 572 | 572 | ||
| 573 | if (recovery && | 573 | if (recovery && |
| 574 | (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) { | 574 | (!dlm_is_recovery_lock(name, namelen) || convert) ) { |
| 575 | dlm_error(status); | 575 | dlm_error(status); |
| 576 | goto error; | 576 | goto error; |
| 577 | } | 577 | } |
| @@ -643,7 +643,7 @@ retry_convert: | |||
| 643 | } | 643 | } |
| 644 | 644 | ||
| 645 | status = DLM_IVBUFLEN; | 645 | status = DLM_IVBUFLEN; |
| 646 | if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) { | 646 | if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) { |
| 647 | dlm_error(status); | 647 | dlm_error(status); |
| 648 | goto error; | 648 | goto error; |
| 649 | } | 649 | } |
| @@ -659,7 +659,7 @@ retry_convert: | |||
| 659 | dlm_wait_for_recovery(dlm); | 659 | dlm_wait_for_recovery(dlm); |
| 660 | 660 | ||
| 661 | /* find or create the lock resource */ | 661 | /* find or create the lock resource */ |
| 662 | res = dlm_get_lock_resource(dlm, name, flags); | 662 | res = dlm_get_lock_resource(dlm, name, namelen, flags); |
| 663 | if (!res) { | 663 | if (!res) { |
| 664 | status = DLM_IVLOCKID; | 664 | status = DLM_IVLOCKID; |
| 665 | dlm_error(status); | 665 | dlm_error(status); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9503240ef0e..f784177b624 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
| 740 | */ | 740 | */ |
| 741 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | 741 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, |
| 742 | const char *lockid, | 742 | const char *lockid, |
| 743 | int namelen, | ||
| 743 | int flags) | 744 | int flags) |
| 744 | { | 745 | { |
| 745 | struct dlm_lock_resource *tmpres=NULL, *res=NULL; | 746 | struct dlm_lock_resource *tmpres=NULL, *res=NULL; |
| @@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | |||
| 748 | int blocked = 0; | 749 | int blocked = 0; |
| 749 | int ret, nodenum; | 750 | int ret, nodenum; |
| 750 | struct dlm_node_iter iter; | 751 | struct dlm_node_iter iter; |
| 751 | unsigned int namelen, hash; | 752 | unsigned int hash; |
| 752 | int tries = 0; | 753 | int tries = 0; |
| 753 | int bit, wait_on_recovery = 0; | 754 | int bit, wait_on_recovery = 0; |
| 754 | 755 | ||
| 755 | BUG_ON(!lockid); | 756 | BUG_ON(!lockid); |
| 756 | 757 | ||
| 757 | namelen = strlen(lockid); | ||
| 758 | hash = dlm_lockid_hash(lockid, namelen); | 758 | hash = dlm_lockid_hash(lockid, namelen); |
| 759 | 759 | ||
| 760 | mlog(0, "get lockres %s (len %d)\n", lockid, namelen); | 760 | mlog(0, "get lockres %s (len %d)\n", lockid, namelen); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 594745fab0b..9d950d7cea3 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -2285,7 +2285,8 @@ again: | |||
| 2285 | memset(&lksb, 0, sizeof(lksb)); | 2285 | memset(&lksb, 0, sizeof(lksb)); |
| 2286 | 2286 | ||
| 2287 | ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, | 2287 | ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, |
| 2288 | DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast); | 2288 | DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN, |
| 2289 | dlm_reco_ast, dlm, dlm_reco_bast); | ||
| 2289 | 2290 | ||
| 2290 | mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", | 2291 | mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", |
| 2291 | dlm->name, ret, lksb.status); | 2292 | dlm->name, ret, lksb.status); |
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c index e641b084b34..eead48bbfac 100644 --- a/fs/ocfs2/dlm/userdlm.c +++ b/fs/ocfs2/dlm/userdlm.c | |||
| @@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) | |||
| 102 | spin_unlock(&lockres->l_lock); | 102 | spin_unlock(&lockres->l_lock); |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ | 105 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ |
| 106 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | 106 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ |
| 107 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 107 | "resource %.*s: %s\n", dlm_errname(_stat), _func, \ |
| 108 | _lockres->l_name, dlm_errmsg(_stat)); \ | 108 | _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \ |
| 109 | } while (0) | 109 | } while (0) |
| 110 | 110 | ||
| 111 | /* WARNING: This function lives in a world where the only three lock | 111 | /* WARNING: This function lives in a world where the only three lock |
| @@ -127,21 +127,22 @@ static void user_ast(void *opaque) | |||
| 127 | struct user_lock_res *lockres = opaque; | 127 | struct user_lock_res *lockres = opaque; |
| 128 | struct dlm_lockstatus *lksb; | 128 | struct dlm_lockstatus *lksb; |
| 129 | 129 | ||
| 130 | mlog(0, "AST fired for lockres %s\n", lockres->l_name); | 130 | mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen, |
| 131 | lockres->l_name); | ||
| 131 | 132 | ||
| 132 | spin_lock(&lockres->l_lock); | 133 | spin_lock(&lockres->l_lock); |
| 133 | 134 | ||
| 134 | lksb = &(lockres->l_lksb); | 135 | lksb = &(lockres->l_lksb); |
| 135 | if (lksb->status != DLM_NORMAL) { | 136 | if (lksb->status != DLM_NORMAL) { |
| 136 | mlog(ML_ERROR, "lksb status value of %u on lockres %s\n", | 137 | mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", |
| 137 | lksb->status, lockres->l_name); | 138 | lksb->status, lockres->l_namelen, lockres->l_name); |
| 138 | spin_unlock(&lockres->l_lock); | 139 | spin_unlock(&lockres->l_lock); |
| 139 | return; | 140 | return; |
| 140 | } | 141 | } |
| 141 | 142 | ||
| 142 | mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, | 143 | mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, |
| 143 | "Lockres %s, requested ivmode. flags 0x%x\n", | 144 | "Lockres %.*s, requested ivmode. flags 0x%x\n", |
| 144 | lockres->l_name, lockres->l_flags); | 145 | lockres->l_namelen, lockres->l_name, lockres->l_flags); |
| 145 | 146 | ||
| 146 | /* we're downconverting. */ | 147 | /* we're downconverting. */ |
| 147 | if (lockres->l_requested < lockres->l_level) { | 148 | if (lockres->l_requested < lockres->l_level) { |
| @@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level) | |||
| 213 | { | 214 | { |
| 214 | struct user_lock_res *lockres = opaque; | 215 | struct user_lock_res *lockres = opaque; |
| 215 | 216 | ||
| 216 | mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n", | 217 | mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n", |
| 217 | lockres->l_name, level); | 218 | lockres->l_namelen, lockres->l_name, level); |
| 218 | 219 | ||
| 219 | spin_lock(&lockres->l_lock); | 220 | spin_lock(&lockres->l_lock); |
| 220 | lockres->l_flags |= USER_LOCK_BLOCKED; | 221 | lockres->l_flags |= USER_LOCK_BLOCKED; |
| @@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
| 231 | { | 232 | { |
| 232 | struct user_lock_res *lockres = opaque; | 233 | struct user_lock_res *lockres = opaque; |
| 233 | 234 | ||
| 234 | mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); | 235 | mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen, |
| 236 | lockres->l_name); | ||
| 235 | 237 | ||
| 236 | if (status != DLM_NORMAL && status != DLM_CANCELGRANT) | 238 | if (status != DLM_NORMAL && status != DLM_CANCELGRANT) |
| 237 | mlog(ML_ERROR, "Dlm returns status %d\n", status); | 239 | mlog(ML_ERROR, "Dlm returns status %d\n", status); |
| @@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
| 244 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { | 246 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { |
| 245 | lockres->l_level = LKM_IVMODE; | 247 | lockres->l_level = LKM_IVMODE; |
| 246 | } else if (status == DLM_CANCELGRANT) { | 248 | } else if (status == DLM_CANCELGRANT) { |
| 247 | mlog(0, "Lock %s, cancel fails, flags 0x%x\n", | ||
| 248 | lockres->l_name, lockres->l_flags); | ||
| 249 | /* We tried to cancel a convert request, but it was | 249 | /* We tried to cancel a convert request, but it was |
| 250 | * already granted. Don't clear the busy flag - the | 250 | * already granted. Don't clear the busy flag - the |
| 251 | * ast should've done this already. */ | 251 | * ast should've done this already. */ |
| @@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
| 255 | } else { | 255 | } else { |
| 256 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | 256 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); |
| 257 | /* Cancel succeeded, we want to re-queue */ | 257 | /* Cancel succeeded, we want to re-queue */ |
| 258 | mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n", | ||
| 259 | lockres->l_name, lockres->l_flags); | ||
| 260 | lockres->l_requested = LKM_IVMODE; /* cancel an | 258 | lockres->l_requested = LKM_IVMODE; /* cancel an |
| 261 | * upconvert | 259 | * upconvert |
| 262 | * request. */ | 260 | * request. */ |
| @@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque) | |||
| 287 | struct user_lock_res *lockres = (struct user_lock_res *) opaque; | 285 | struct user_lock_res *lockres = (struct user_lock_res *) opaque; |
| 288 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | 286 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); |
| 289 | 287 | ||
| 290 | mlog(0, "processing lockres %s\n", lockres->l_name); | 288 | mlog(0, "processing lockres %.*s\n", lockres->l_namelen, |
| 289 | lockres->l_name); | ||
| 291 | 290 | ||
| 292 | spin_lock(&lockres->l_lock); | 291 | spin_lock(&lockres->l_lock); |
| 293 | 292 | ||
| 294 | mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), | 293 | mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), |
| 295 | "Lockres %s, flags 0x%x\n", | 294 | "Lockres %.*s, flags 0x%x\n", |
| 296 | lockres->l_name, lockres->l_flags); | 295 | lockres->l_namelen, lockres->l_name, lockres->l_flags); |
| 297 | 296 | ||
| 298 | /* notice that we don't clear USER_LOCK_BLOCKED here. If it's | 297 | /* notice that we don't clear USER_LOCK_BLOCKED here. If it's |
| 299 | * set, we want user_ast clear it. */ | 298 | * set, we want user_ast clear it. */ |
| @@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque) | |||
| 305 | * flag, and finally we might get another bast which re-queues | 304 | * flag, and finally we might get another bast which re-queues |
| 306 | * us before our ast for the downconvert is called. */ | 305 | * us before our ast for the downconvert is called. */ |
| 307 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { | 306 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { |
| 308 | mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n", | ||
| 309 | lockres->l_name, lockres->l_flags); | ||
| 310 | spin_unlock(&lockres->l_lock); | 307 | spin_unlock(&lockres->l_lock); |
| 311 | goto drop_ref; | 308 | goto drop_ref; |
| 312 | } | 309 | } |
| 313 | 310 | ||
| 314 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | 311 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { |
| 315 | mlog(0, "lock is in teardown so we do nothing\n"); | ||
| 316 | spin_unlock(&lockres->l_lock); | 312 | spin_unlock(&lockres->l_lock); |
| 317 | goto drop_ref; | 313 | goto drop_ref; |
| 318 | } | 314 | } |
| 319 | 315 | ||
| 320 | if (lockres->l_flags & USER_LOCK_BUSY) { | 316 | if (lockres->l_flags & USER_LOCK_BUSY) { |
| 321 | mlog(0, "Cancel lock %s, flags 0x%x\n", | ||
| 322 | lockres->l_name, lockres->l_flags); | ||
| 323 | |||
| 324 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { | 317 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { |
| 325 | spin_unlock(&lockres->l_lock); | 318 | spin_unlock(&lockres->l_lock); |
| 326 | goto drop_ref; | 319 | goto drop_ref; |
| @@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque) | |||
| 372 | &lockres->l_lksb, | 365 | &lockres->l_lksb, |
| 373 | LKM_CONVERT|LKM_VALBLK, | 366 | LKM_CONVERT|LKM_VALBLK, |
| 374 | lockres->l_name, | 367 | lockres->l_name, |
| 368 | lockres->l_namelen, | ||
| 375 | user_ast, | 369 | user_ast, |
| 376 | lockres, | 370 | lockres, |
| 377 | user_bast); | 371 | user_bast); |
| @@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres, | |||
| 420 | 414 | ||
| 421 | if (level != LKM_EXMODE && | 415 | if (level != LKM_EXMODE && |
| 422 | level != LKM_PRMODE) { | 416 | level != LKM_PRMODE) { |
| 423 | mlog(ML_ERROR, "lockres %s: invalid request!\n", | 417 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", |
| 424 | lockres->l_name); | 418 | lockres->l_namelen, lockres->l_name); |
| 425 | status = -EINVAL; | 419 | status = -EINVAL; |
| 426 | goto bail; | 420 | goto bail; |
| 427 | } | 421 | } |
| 428 | 422 | ||
| 429 | mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n", | 423 | mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n", |
| 430 | lockres->l_name, | 424 | lockres->l_namelen, lockres->l_name, |
| 431 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", | 425 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", |
| 432 | lkm_flags); | 426 | lkm_flags); |
| 433 | 427 | ||
| 434 | again: | 428 | again: |
| 435 | if (signal_pending(current)) { | 429 | if (signal_pending(current)) { |
| @@ -474,15 +468,13 @@ again: | |||
| 474 | BUG_ON(level == LKM_IVMODE); | 468 | BUG_ON(level == LKM_IVMODE); |
| 475 | BUG_ON(level == LKM_NLMODE); | 469 | BUG_ON(level == LKM_NLMODE); |
| 476 | 470 | ||
| 477 | mlog(0, "lock %s, get lock from %d to level = %d\n", | ||
| 478 | lockres->l_name, lockres->l_level, level); | ||
| 479 | |||
| 480 | /* call dlm_lock to upgrade lock now */ | 471 | /* call dlm_lock to upgrade lock now */ |
| 481 | status = dlmlock(dlm, | 472 | status = dlmlock(dlm, |
| 482 | level, | 473 | level, |
| 483 | &lockres->l_lksb, | 474 | &lockres->l_lksb, |
| 484 | local_flags, | 475 | local_flags, |
| 485 | lockres->l_name, | 476 | lockres->l_name, |
| 477 | lockres->l_namelen, | ||
| 486 | user_ast, | 478 | user_ast, |
| 487 | lockres, | 479 | lockres, |
| 488 | user_bast); | 480 | user_bast); |
| @@ -498,9 +490,6 @@ again: | |||
| 498 | goto bail; | 490 | goto bail; |
| 499 | } | 491 | } |
| 500 | 492 | ||
| 501 | mlog(0, "lock %s, successfull return from dlmlock\n", | ||
| 502 | lockres->l_name); | ||
| 503 | |||
| 504 | user_wait_on_busy_lock(lockres); | 493 | user_wait_on_busy_lock(lockres); |
| 505 | goto again; | 494 | goto again; |
| 506 | } | 495 | } |
| @@ -508,9 +497,6 @@ again: | |||
| 508 | user_dlm_inc_holders(lockres, level); | 497 | user_dlm_inc_holders(lockres, level); |
| 509 | spin_unlock(&lockres->l_lock); | 498 | spin_unlock(&lockres->l_lock); |
| 510 | 499 | ||
| 511 | mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name, | ||
| 512 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE"); | ||
| 513 | |||
| 514 | status = 0; | 500 | status = 0; |
| 515 | bail: | 501 | bail: |
| 516 | return status; | 502 | return status; |
| @@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres, | |||
| 538 | { | 524 | { |
| 539 | if (level != LKM_EXMODE && | 525 | if (level != LKM_EXMODE && |
| 540 | level != LKM_PRMODE) { | 526 | level != LKM_PRMODE) { |
| 541 | mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name); | 527 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", |
| 528 | lockres->l_namelen, lockres->l_name); | ||
| 542 | return; | 529 | return; |
| 543 | } | 530 | } |
| 544 | 531 | ||
| 545 | mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name, | ||
| 546 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE"); | ||
| 547 | |||
| 548 | spin_lock(&lockres->l_lock); | 532 | spin_lock(&lockres->l_lock); |
| 549 | user_dlm_dec_holders(lockres, level); | 533 | user_dlm_dec_holders(lockres, level); |
| 550 | __user_dlm_cond_queue_lockres(lockres); | 534 | __user_dlm_cond_queue_lockres(lockres); |
| @@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres, | |||
| 602 | memcpy(lockres->l_name, | 586 | memcpy(lockres->l_name, |
| 603 | dentry->d_name.name, | 587 | dentry->d_name.name, |
| 604 | dentry->d_name.len); | 588 | dentry->d_name.len); |
| 589 | lockres->l_namelen = dentry->d_name.len; | ||
| 605 | } | 590 | } |
| 606 | 591 | ||
| 607 | int user_dlm_destroy_lock(struct user_lock_res *lockres) | 592 | int user_dlm_destroy_lock(struct user_lock_res *lockres) |
| @@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
| 609 | int status = -EBUSY; | 594 | int status = -EBUSY; |
| 610 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | 595 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); |
| 611 | 596 | ||
| 612 | mlog(0, "asked to destroy %s\n", lockres->l_name); | 597 | mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name); |
| 613 | 598 | ||
| 614 | spin_lock(&lockres->l_lock); | 599 | spin_lock(&lockres->l_lock); |
| 615 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | 600 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { |
| 616 | mlog(0, "Lock is already torn down\n"); | ||
| 617 | spin_unlock(&lockres->l_lock); | 601 | spin_unlock(&lockres->l_lock); |
| 618 | return 0; | 602 | return 0; |
| 619 | } | 603 | } |
| @@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
| 623 | while (lockres->l_flags & USER_LOCK_BUSY) { | 607 | while (lockres->l_flags & USER_LOCK_BUSY) { |
| 624 | spin_unlock(&lockres->l_lock); | 608 | spin_unlock(&lockres->l_lock); |
| 625 | 609 | ||
| 626 | mlog(0, "lock %s is busy\n", lockres->l_name); | ||
| 627 | |||
| 628 | user_wait_on_busy_lock(lockres); | 610 | user_wait_on_busy_lock(lockres); |
| 629 | 611 | ||
| 630 | spin_lock(&lockres->l_lock); | 612 | spin_lock(&lockres->l_lock); |
| @@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
| 632 | 614 | ||
| 633 | if (lockres->l_ro_holders || lockres->l_ex_holders) { | 615 | if (lockres->l_ro_holders || lockres->l_ex_holders) { |
| 634 | spin_unlock(&lockres->l_lock); | 616 | spin_unlock(&lockres->l_lock); |
| 635 | mlog(0, "lock %s has holders\n", lockres->l_name); | ||
| 636 | goto bail; | 617 | goto bail; |
| 637 | } | 618 | } |
| 638 | 619 | ||
| 639 | status = 0; | 620 | status = 0; |
| 640 | if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { | 621 | if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { |
| 641 | spin_unlock(&lockres->l_lock); | 622 | spin_unlock(&lockres->l_lock); |
| 642 | mlog(0, "lock %s is not attached\n", lockres->l_name); | ||
| 643 | goto bail; | 623 | goto bail; |
| 644 | } | 624 | } |
| 645 | 625 | ||
| @@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
| 647 | lockres->l_flags |= USER_LOCK_BUSY; | 627 | lockres->l_flags |= USER_LOCK_BUSY; |
| 648 | spin_unlock(&lockres->l_lock); | 628 | spin_unlock(&lockres->l_lock); |
| 649 | 629 | ||
| 650 | mlog(0, "unlocking lockres %s\n", lockres->l_name); | ||
| 651 | status = dlmunlock(dlm, | 630 | status = dlmunlock(dlm, |
| 652 | &lockres->l_lksb, | 631 | &lockres->l_lksb, |
| 653 | LKM_VALBLK, | 632 | LKM_VALBLK, |
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h index 04178bc40b7..c400e93bbf7 100644 --- a/fs/ocfs2/dlm/userdlm.h +++ b/fs/ocfs2/dlm/userdlm.h | |||
| @@ -53,6 +53,7 @@ struct user_lock_res { | |||
| 53 | 53 | ||
| 54 | #define USER_DLM_LOCK_ID_MAX_LEN 32 | 54 | #define USER_DLM_LOCK_ID_MAX_LEN 32 |
| 55 | char l_name[USER_DLM_LOCK_ID_MAX_LEN]; | 55 | char l_name[USER_DLM_LOCK_ID_MAX_LEN]; |
| 56 | int l_namelen; | ||
| 56 | int l_level; | 57 | int l_level; |
| 57 | unsigned int l_ro_holders; | 58 | unsigned int l_ro_holders; |
| 58 | unsigned int l_ex_holders; | 59 | unsigned int l_ex_holders; |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 151b41781ea..de887063dcf 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | #include "ocfs2.h" | 46 | #include "ocfs2.h" |
| 47 | 47 | ||
| 48 | #include "alloc.h" | 48 | #include "alloc.h" |
| 49 | #include "dcache.h" | ||
| 49 | #include "dlmglue.h" | 50 | #include "dlmglue.h" |
| 50 | #include "extent_map.h" | 51 | #include "extent_map.h" |
| 51 | #include "heartbeat.h" | 52 | #include "heartbeat.h" |
| @@ -66,78 +67,161 @@ struct ocfs2_mask_waiter { | |||
| 66 | unsigned long mw_goal; | 67 | unsigned long mw_goal; |
| 67 | }; | 68 | }; |
| 68 | 69 | ||
| 69 | static void ocfs2_inode_ast_func(void *opaque); | 70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
| 70 | static void ocfs2_inode_bast_func(void *opaque, | 71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
| 71 | int level); | ||
| 72 | static void ocfs2_super_ast_func(void *opaque); | ||
| 73 | static void ocfs2_super_bast_func(void *opaque, | ||
| 74 | int level); | ||
| 75 | static void ocfs2_rename_ast_func(void *opaque); | ||
| 76 | static void ocfs2_rename_bast_func(void *opaque, | ||
| 77 | int level); | ||
| 78 | |||
| 79 | /* so far, all locks have gotten along with the same unlock ast */ | ||
| 80 | static void ocfs2_unlock_ast_func(void *opaque, | ||
| 81 | enum dlm_status status); | ||
| 82 | static int ocfs2_do_unblock_meta(struct inode *inode, | ||
| 83 | int *requeue); | ||
| 84 | static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, | ||
| 85 | int *requeue); | ||
| 86 | static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, | ||
| 87 | int *requeue); | ||
| 88 | static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, | ||
| 89 | int *requeue); | ||
| 90 | static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, | ||
| 91 | int *requeue); | ||
| 92 | typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int); | ||
| 93 | static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, | ||
| 94 | struct ocfs2_lock_res *lockres, | ||
| 95 | int *requeue, | ||
| 96 | ocfs2_convert_worker_t *worker); | ||
| 97 | 72 | ||
| 73 | /* | ||
| 74 | * Return value from ->downconvert_worker functions. | ||
| 75 | * | ||
| 76 | * These control the precise actions of ocfs2_unblock_lock() | ||
| 77 | * and ocfs2_process_blocked_lock() | ||
| 78 | * | ||
| 79 | */ | ||
| 80 | enum ocfs2_unblock_action { | ||
| 81 | UNBLOCK_CONTINUE = 0, /* Continue downconvert */ | ||
| 82 | UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire | ||
| 83 | * ->post_unlock callback */ | ||
| 84 | UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire | ||
| 85 | * ->post_unlock() callback. */ | ||
| 86 | }; | ||
| 87 | |||
| 88 | struct ocfs2_unblock_ctl { | ||
| 89 | int requeue; | ||
| 90 | enum ocfs2_unblock_action unblock_action; | ||
| 91 | }; | ||
| 92 | |||
| 93 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | ||
| 94 | int new_level); | ||
| 95 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); | ||
| 96 | |||
| 97 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | ||
| 98 | int blocking); | ||
| 99 | |||
| 100 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | ||
| 101 | int blocking); | ||
| 102 | |||
| 103 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | ||
| 104 | struct ocfs2_lock_res *lockres); | ||
| 105 | |||
| 106 | /* | ||
| 107 | * OCFS2 Lock Resource Operations | ||
| 108 | * | ||
| 109 | * These fine tune the behavior of the generic dlmglue locking infrastructure. | ||
| 110 | * | ||
| 111 | * The most basic of lock types can point ->l_priv to their respective | ||
| 112 | * struct ocfs2_super and allow the default actions to manage things. | ||
| 113 | * | ||
| 114 | * Right now, each lock type also needs to implement an init function, | ||
| 115 | * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() | ||
| 116 | * should be called when the lock is no longer needed (i.e., object | ||
| 117 | * destruction time). | ||
| 118 | */ | ||
| 98 | struct ocfs2_lock_res_ops { | 119 | struct ocfs2_lock_res_ops { |
| 99 | void (*ast)(void *); | 120 | /* |
| 100 | void (*bast)(void *, int); | 121 | * Translate an ocfs2_lock_res * into an ocfs2_super *. Define |
| 101 | void (*unlock_ast)(void *, enum dlm_status); | 122 | * this callback if ->l_priv is not an ocfs2_super pointer |
| 102 | int (*unblock)(struct ocfs2_lock_res *, int *); | 123 | */ |
| 124 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Optionally called in the downconvert (or "vote") thread | ||
| 128 | * after a successful downconvert. The lockres will not be | ||
| 129 | * referenced after this callback is called, so it is safe to | ||
| 130 | * free memory, etc. | ||
| 131 | * | ||
| 132 | * The exact semantics of when this is called are controlled | ||
| 133 | * by ->downconvert_worker() | ||
| 134 | */ | ||
| 135 | void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Allow a lock type to add checks to determine whether it is | ||
| 139 | * safe to downconvert a lock. Return 0 to re-queue the | ||
| 140 | * downconvert at a later time, nonzero to continue. | ||
| 141 | * | ||
| 142 | * For most locks, the default checks that there are no | ||
| 143 | * incompatible holders are sufficient. | ||
| 144 | * | ||
| 145 | * Called with the lockres spinlock held. | ||
| 146 | */ | ||
| 147 | int (*check_downconvert)(struct ocfs2_lock_res *, int); | ||
| 148 | |||
| 149 | /* | ||
| 150 | * Allows a lock type to populate the lock value block. This | ||
| 151 | * is called on downconvert, and when we drop a lock. | ||
| 152 | * | ||
| 153 | * Locks that want to use this should set LOCK_TYPE_USES_LVB | ||
| 154 | * in the flags field. | ||
| 155 | * | ||
| 156 | * Called with the lockres spinlock held. | ||
| 157 | */ | ||
| 158 | void (*set_lvb)(struct ocfs2_lock_res *); | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Called from the downconvert thread when it is determined | ||
| 162 | * that a lock will be downconverted. This is called without | ||
| 163 | * any locks held so the function can do work that might | ||
| 164 | * schedule (syncing out data, etc). | ||
| 165 | * | ||
| 166 | * This should return any one of the ocfs2_unblock_action | ||
| 167 | * values, depending on what it wants the thread to do. | ||
| 168 | */ | ||
| 169 | int (*downconvert_worker)(struct ocfs2_lock_res *, int); | ||
| 170 | |||
| 171 | /* | ||
| 172 | * LOCK_TYPE_* flags which describe the specific requirements | ||
| 173 | * of a lock type. Descriptions of each individual flag follow. | ||
| 174 | */ | ||
| 175 | int flags; | ||
| 103 | }; | 176 | }; |
| 104 | 177 | ||
| 178 | /* | ||
| 179 | * Some locks want to "refresh" potentially stale data when a | ||
| 180 | * meaningful (PRMODE or EXMODE) lock level is first obtained. If this | ||
| 181 | * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the | ||
| 182 | * individual lockres l_flags member from the ast function. It is | ||
| 183 | * expected that the locking wrapper will clear the | ||
| 184 | * OCFS2_LOCK_NEEDS_REFRESH flag when done. | ||
| 185 | */ | ||
| 186 | #define LOCK_TYPE_REQUIRES_REFRESH 0x1 | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Indicate that a lock type makes use of the lock value block. The | ||
| 190 | * ->set_lvb lock type callback must be defined. | ||
| 191 | */ | ||
| 192 | #define LOCK_TYPE_USES_LVB 0x2 | ||
| 193 | |||
| 105 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | 194 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { |
| 106 | .ast = ocfs2_inode_ast_func, | 195 | .get_osb = ocfs2_get_inode_osb, |
| 107 | .bast = ocfs2_inode_bast_func, | 196 | .flags = 0, |
| 108 | .unlock_ast = ocfs2_unlock_ast_func, | ||
| 109 | .unblock = ocfs2_unblock_inode_lock, | ||
| 110 | }; | 197 | }; |
| 111 | 198 | ||
| 112 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 199 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { |
| 113 | .ast = ocfs2_inode_ast_func, | 200 | .get_osb = ocfs2_get_inode_osb, |
| 114 | .bast = ocfs2_inode_bast_func, | 201 | .check_downconvert = ocfs2_check_meta_downconvert, |
| 115 | .unlock_ast = ocfs2_unlock_ast_func, | 202 | .set_lvb = ocfs2_set_meta_lvb, |
| 116 | .unblock = ocfs2_unblock_meta, | 203 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
| 117 | }; | 204 | }; |
| 118 | 205 | ||
| 119 | static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | ||
| 120 | int blocking); | ||
| 121 | |||
| 122 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | 206 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { |
| 123 | .ast = ocfs2_inode_ast_func, | 207 | .get_osb = ocfs2_get_inode_osb, |
| 124 | .bast = ocfs2_inode_bast_func, | 208 | .downconvert_worker = ocfs2_data_convert_worker, |
| 125 | .unlock_ast = ocfs2_unlock_ast_func, | 209 | .flags = 0, |
| 126 | .unblock = ocfs2_unblock_data, | ||
| 127 | }; | 210 | }; |
| 128 | 211 | ||
| 129 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 212 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { |
| 130 | .ast = ocfs2_super_ast_func, | 213 | .flags = LOCK_TYPE_REQUIRES_REFRESH, |
| 131 | .bast = ocfs2_super_bast_func, | ||
| 132 | .unlock_ast = ocfs2_unlock_ast_func, | ||
| 133 | .unblock = ocfs2_unblock_osb_lock, | ||
| 134 | }; | 214 | }; |
| 135 | 215 | ||
| 136 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { | 216 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { |
| 137 | .ast = ocfs2_rename_ast_func, | 217 | .flags = 0, |
| 138 | .bast = ocfs2_rename_bast_func, | 218 | }; |
| 139 | .unlock_ast = ocfs2_unlock_ast_func, | 219 | |
| 140 | .unblock = ocfs2_unblock_osb_lock, | 220 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { |
| 221 | .get_osb = ocfs2_get_dentry_osb, | ||
| 222 | .post_unlock = ocfs2_dentry_post_unlock, | ||
| 223 | .downconvert_worker = ocfs2_dentry_convert_worker, | ||
| 224 | .flags = 0, | ||
| 141 | }; | 225 | }; |
| 142 | 226 | ||
| 143 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 227 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
| @@ -147,29 +231,26 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | |||
| 147 | lockres->l_type == OCFS2_LOCK_TYPE_RW; | 231 | lockres->l_type == OCFS2_LOCK_TYPE_RW; |
| 148 | } | 232 | } |
| 149 | 233 | ||
| 150 | static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres) | 234 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) |
| 151 | { | 235 | { |
| 152 | return lockres->l_type == OCFS2_LOCK_TYPE_SUPER; | 236 | BUG_ON(!ocfs2_is_inode_lock(lockres)); |
| 153 | } | ||
| 154 | 237 | ||
| 155 | static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres) | 238 | return (struct inode *) lockres->l_priv; |
| 156 | { | ||
| 157 | return lockres->l_type == OCFS2_LOCK_TYPE_RENAME; | ||
| 158 | } | 239 | } |
| 159 | 240 | ||
| 160 | static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres) | 241 | static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) |
| 161 | { | 242 | { |
| 162 | BUG_ON(!ocfs2_is_super_lock(lockres) | 243 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); |
| 163 | && !ocfs2_is_rename_lock(lockres)); | ||
| 164 | 244 | ||
| 165 | return (struct ocfs2_super *) lockres->l_priv; | 245 | return (struct ocfs2_dentry_lock *)lockres->l_priv; |
| 166 | } | 246 | } |
| 167 | 247 | ||
| 168 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) | 248 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) |
| 169 | { | 249 | { |
| 170 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | 250 | if (lockres->l_ops->get_osb) |
| 251 | return lockres->l_ops->get_osb(lockres); | ||
| 171 | 252 | ||
| 172 | return (struct inode *) lockres->l_priv; | 253 | return (struct ocfs2_super *)lockres->l_priv; |
| 173 | } | 254 | } |
| 174 | 255 | ||
| 175 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 256 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
| @@ -200,25 +281,6 @@ static int ocfs2_meta_lock_update(struct inode *inode, | |||
| 200 | struct buffer_head **bh); | 281 | struct buffer_head **bh); |
| 201 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 282 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
| 202 | static inline int ocfs2_highest_compat_lock_level(int level); | 283 | static inline int ocfs2_highest_compat_lock_level(int level); |
| 203 | static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, | ||
| 204 | struct ocfs2_lock_res *lockres, | ||
| 205 | int new_level); | ||
| 206 | |||
| 207 | static char *ocfs2_lock_type_strings[] = { | ||
| 208 | [OCFS2_LOCK_TYPE_META] = "Meta", | ||
| 209 | [OCFS2_LOCK_TYPE_DATA] = "Data", | ||
| 210 | [OCFS2_LOCK_TYPE_SUPER] = "Super", | ||
| 211 | [OCFS2_LOCK_TYPE_RENAME] = "Rename", | ||
| 212 | /* Need to differntiate from [R]ename.. serializing writes is the | ||
| 213 | * important job it does, anyway. */ | ||
| 214 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | ||
| 215 | }; | ||
| 216 | |||
| 217 | static char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | ||
| 218 | { | ||
| 219 | mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); | ||
| 220 | return ocfs2_lock_type_strings[type]; | ||
| 221 | } | ||
| 222 | 284 | ||
| 223 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 285 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
| 224 | u64 blkno, | 286 | u64 blkno, |
| @@ -265,13 +327,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) | |||
| 265 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | 327 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, |
| 266 | struct ocfs2_lock_res *res, | 328 | struct ocfs2_lock_res *res, |
| 267 | enum ocfs2_lock_type type, | 329 | enum ocfs2_lock_type type, |
| 268 | u64 blkno, | ||
| 269 | u32 generation, | ||
| 270 | struct ocfs2_lock_res_ops *ops, | 330 | struct ocfs2_lock_res_ops *ops, |
| 271 | void *priv) | 331 | void *priv) |
| 272 | { | 332 | { |
| 273 | ocfs2_build_lock_name(type, blkno, generation, res->l_name); | ||
| 274 | |||
| 275 | res->l_type = type; | 333 | res->l_type = type; |
| 276 | res->l_ops = ops; | 334 | res->l_ops = ops; |
| 277 | res->l_priv = priv; | 335 | res->l_priv = priv; |
| @@ -299,6 +357,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) | |||
| 299 | 357 | ||
| 300 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 358 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
| 301 | enum ocfs2_lock_type type, | 359 | enum ocfs2_lock_type type, |
| 360 | unsigned int generation, | ||
| 302 | struct inode *inode) | 361 | struct inode *inode) |
| 303 | { | 362 | { |
| 304 | struct ocfs2_lock_res_ops *ops; | 363 | struct ocfs2_lock_res_ops *ops; |
| @@ -319,9 +378,73 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
| 319 | break; | 378 | break; |
| 320 | }; | 379 | }; |
| 321 | 380 | ||
| 322 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, | 381 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, |
| 323 | OCFS2_I(inode)->ip_blkno, | 382 | generation, res->l_name); |
| 324 | inode->i_generation, ops, inode); | 383 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); |
| 384 | } | ||
| 385 | |||
| 386 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | ||
| 387 | { | ||
| 388 | struct inode *inode = ocfs2_lock_res_inode(lockres); | ||
| 389 | |||
| 390 | return OCFS2_SB(inode->i_sb); | ||
| 391 | } | ||
| 392 | |||
| 393 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | ||
| 394 | { | ||
| 395 | __be64 inode_blkno_be; | ||
| 396 | |||
| 397 | memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], | ||
| 398 | sizeof(__be64)); | ||
| 399 | |||
| 400 | return be64_to_cpu(inode_blkno_be); | ||
| 401 | } | ||
| 402 | |||
| 403 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) | ||
| 404 | { | ||
| 405 | struct ocfs2_dentry_lock *dl = lockres->l_priv; | ||
| 406 | |||
| 407 | return OCFS2_SB(dl->dl_inode->i_sb); | ||
| 408 | } | ||
| 409 | |||
| 410 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | ||
| 411 | u64 parent, struct inode *inode) | ||
| 412 | { | ||
| 413 | int len; | ||
| 414 | u64 inode_blkno = OCFS2_I(inode)->ip_blkno; | ||
| 415 | __be64 inode_blkno_be = cpu_to_be64(inode_blkno); | ||
| 416 | struct ocfs2_lock_res *lockres = &dl->dl_lockres; | ||
| 417 | |||
| 418 | ocfs2_lock_res_init_once(lockres); | ||
| 419 | |||
| 420 | /* | ||
| 421 | * Unfortunately, the standard lock naming scheme won't work | ||
| 422 | * here because we have two 16 byte values to use. Instead, | ||
| 423 | * we'll stuff the inode number as a binary value. We still | ||
| 424 | * want error prints to show something without garbling the | ||
| 425 | * display, so drop a null byte in there before the inode | ||
| 426 | * number. A future version of OCFS2 will likely use all | ||
| 427 | * binary lock names. The stringified names have been a | ||
| 428 | * tremendous aid in debugging, but now that the debugfs | ||
| 429 | * interface exists, we can mangle things there if need be. | ||
| 430 | * | ||
| 431 | * NOTE: We also drop the standard "pad" value (the total lock | ||
| 432 | * name size stays the same though - the last part is all | ||
| 433 | * zeros due to the memset in ocfs2_lock_res_init_once() | ||
| 434 | */ | ||
| 435 | len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, | ||
| 436 | "%c%016llx", | ||
| 437 | ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), | ||
| 438 | (long long)parent); | ||
| 439 | |||
| 440 | BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); | ||
| 441 | |||
| 442 | memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, | ||
| 443 | sizeof(__be64)); | ||
| 444 | |||
| 445 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
| 446 | OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, | ||
| 447 | dl); | ||
| 325 | } | 448 | } |
| 326 | 449 | ||
| 327 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | 450 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, |
| @@ -330,8 +453,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | |||
| 330 | /* Superblock lockres doesn't come from a slab so we call init | 453 | /* Superblock lockres doesn't come from a slab so we call init |
| 331 | * once on it manually. */ | 454 | * once on it manually. */ |
| 332 | ocfs2_lock_res_init_once(res); | 455 | ocfs2_lock_res_init_once(res); |
| 456 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, | ||
| 457 | 0, res->l_name); | ||
| 333 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, | 458 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, |
| 334 | OCFS2_SUPER_BLOCK_BLKNO, 0, | ||
| 335 | &ocfs2_super_lops, osb); | 459 | &ocfs2_super_lops, osb); |
| 336 | } | 460 | } |
| 337 | 461 | ||
| @@ -341,7 +465,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
| 341 | /* Rename lockres doesn't come from a slab so we call init | 465 | /* Rename lockres doesn't come from a slab so we call init |
| 342 | * once on it manually. */ | 466 | * once on it manually. */ |
| 343 | ocfs2_lock_res_init_once(res); | 467 | ocfs2_lock_res_init_once(res); |
| 344 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0, | 468 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); |
| 469 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, | ||
| 345 | &ocfs2_rename_lops, osb); | 470 | &ocfs2_rename_lops, osb); |
| 346 | } | 471 | } |
| 347 | 472 | ||
| @@ -495,7 +620,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
| 495 | * information is already up to data. Convert from NL to | 620 | * information is already up to data. Convert from NL to |
| 496 | * *anything* however should mark ourselves as needing an | 621 | * *anything* however should mark ourselves as needing an |
| 497 | * update */ | 622 | * update */ |
| 498 | if (lockres->l_level == LKM_NLMODE) | 623 | if (lockres->l_level == LKM_NLMODE && |
| 624 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | ||
| 499 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 625 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
| 500 | 626 | ||
| 501 | lockres->l_level = lockres->l_requested; | 627 | lockres->l_level = lockres->l_requested; |
| @@ -512,7 +638,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
| 512 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 638 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
| 513 | 639 | ||
| 514 | if (lockres->l_requested > LKM_NLMODE && | 640 | if (lockres->l_requested > LKM_NLMODE && |
| 515 | !(lockres->l_flags & OCFS2_LOCK_LOCAL)) | 641 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && |
| 642 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | ||
| 516 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 643 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
| 517 | 644 | ||
| 518 | lockres->l_level = lockres->l_requested; | 645 | lockres->l_level = lockres->l_requested; |
| @@ -522,68 +649,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
| 522 | mlog_exit_void(); | 649 | mlog_exit_void(); |
| 523 | } | 650 | } |
| 524 | 651 | ||
| 525 | static void ocfs2_inode_ast_func(void *opaque) | ||
| 526 | { | ||
| 527 | struct ocfs2_lock_res *lockres = opaque; | ||
| 528 | struct inode *inode; | ||
| 529 | struct dlm_lockstatus *lksb; | ||
| 530 | unsigned long flags; | ||
| 531 | |||
| 532 | mlog_entry_void(); | ||
| 533 | |||
| 534 | inode = ocfs2_lock_res_inode(lockres); | ||
| 535 | |||
| 536 | mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n", | ||
| 537 | (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action, | ||
| 538 | ocfs2_lock_type_string(lockres->l_type)); | ||
| 539 | |||
| 540 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | ||
| 541 | |||
| 542 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 543 | |||
| 544 | lksb = &(lockres->l_lksb); | ||
| 545 | if (lksb->status != DLM_NORMAL) { | ||
| 546 | mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u " | ||
| 547 | "on inode %llu\n", lksb->status, | ||
| 548 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 549 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 550 | mlog_exit_void(); | ||
| 551 | return; | ||
| 552 | } | ||
| 553 | |||
| 554 | switch(lockres->l_action) { | ||
| 555 | case OCFS2_AST_ATTACH: | ||
| 556 | ocfs2_generic_handle_attach_action(lockres); | ||
| 557 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | ||
| 558 | break; | ||
| 559 | case OCFS2_AST_CONVERT: | ||
| 560 | ocfs2_generic_handle_convert_action(lockres); | ||
| 561 | break; | ||
| 562 | case OCFS2_AST_DOWNCONVERT: | ||
| 563 | ocfs2_generic_handle_downconvert_action(lockres); | ||
| 564 | break; | ||
| 565 | default: | ||
| 566 | mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " | ||
| 567 | "lockres flags = 0x%lx, unlock action: %u\n", | ||
| 568 | lockres->l_name, lockres->l_action, lockres->l_flags, | ||
| 569 | lockres->l_unlock_action); | ||
| 570 | |||
| 571 | BUG(); | ||
| 572 | } | ||
| 573 | |||
| 574 | /* data and rw locking ignores refresh flag for now. */ | ||
| 575 | if (lockres->l_type != OCFS2_LOCK_TYPE_META) | ||
| 576 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | ||
| 577 | |||
| 578 | /* set it to something invalid so if we get called again we | ||
| 579 | * can catch it. */ | ||
| 580 | lockres->l_action = OCFS2_AST_INVALID; | ||
| 581 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 582 | wake_up(&lockres->l_event); | ||
| 583 | |||
| 584 | mlog_exit_void(); | ||
| 585 | } | ||
| 586 | |||
| 587 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | 652 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, |
| 588 | int level) | 653 | int level) |
| 589 | { | 654 | { |
| @@ -610,54 +675,33 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
| 610 | return needs_downconvert; | 675 | return needs_downconvert; |
| 611 | } | 676 | } |
| 612 | 677 | ||
| 613 | static void ocfs2_generic_bast_func(struct ocfs2_super *osb, | 678 | static void ocfs2_blocking_ast(void *opaque, int level) |
| 614 | struct ocfs2_lock_res *lockres, | ||
| 615 | int level) | ||
| 616 | { | 679 | { |
| 680 | struct ocfs2_lock_res *lockres = opaque; | ||
| 681 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | ||
| 617 | int needs_downconvert; | 682 | int needs_downconvert; |
| 618 | unsigned long flags; | 683 | unsigned long flags; |
| 619 | 684 | ||
| 620 | mlog_entry_void(); | ||
| 621 | |||
| 622 | BUG_ON(level <= LKM_NLMODE); | 685 | BUG_ON(level <= LKM_NLMODE); |
| 623 | 686 | ||
| 687 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | ||
| 688 | lockres->l_name, level, lockres->l_level, | ||
| 689 | ocfs2_lock_type_string(lockres->l_type)); | ||
| 690 | |||
| 624 | spin_lock_irqsave(&lockres->l_lock, flags); | 691 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 625 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 692 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
| 626 | if (needs_downconvert) | 693 | if (needs_downconvert) |
| 627 | ocfs2_schedule_blocked_lock(osb, lockres); | 694 | ocfs2_schedule_blocked_lock(osb, lockres); |
| 628 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 695 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 629 | 696 | ||
| 630 | ocfs2_kick_vote_thread(osb); | ||
| 631 | |||
| 632 | wake_up(&lockres->l_event); | 697 | wake_up(&lockres->l_event); |
| 633 | mlog_exit_void(); | ||
| 634 | } | ||
| 635 | |||
| 636 | static void ocfs2_inode_bast_func(void *opaque, int level) | ||
| 637 | { | ||
| 638 | struct ocfs2_lock_res *lockres = opaque; | ||
| 639 | struct inode *inode; | ||
| 640 | struct ocfs2_super *osb; | ||
| 641 | 698 | ||
| 642 | mlog_entry_void(); | 699 | ocfs2_kick_vote_thread(osb); |
| 643 | |||
| 644 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | ||
| 645 | |||
| 646 | inode = ocfs2_lock_res_inode(lockres); | ||
| 647 | osb = OCFS2_SB(inode->i_sb); | ||
| 648 | |||
| 649 | mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n", | ||
| 650 | (unsigned long long)OCFS2_I(inode)->ip_blkno, level, | ||
| 651 | lockres->l_level, ocfs2_lock_type_string(lockres->l_type)); | ||
| 652 | |||
| 653 | ocfs2_generic_bast_func(osb, lockres, level); | ||
| 654 | |||
| 655 | mlog_exit_void(); | ||
| 656 | } | 700 | } |
| 657 | 701 | ||
| 658 | static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, | 702 | static void ocfs2_locking_ast(void *opaque) |
| 659 | int ignore_refresh) | ||
| 660 | { | 703 | { |
| 704 | struct ocfs2_lock_res *lockres = opaque; | ||
| 661 | struct dlm_lockstatus *lksb = &lockres->l_lksb; | 705 | struct dlm_lockstatus *lksb = &lockres->l_lksb; |
| 662 | unsigned long flags; | 706 | unsigned long flags; |
| 663 | 707 | ||
| @@ -673,6 +717,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, | |||
| 673 | switch(lockres->l_action) { | 717 | switch(lockres->l_action) { |
| 674 | case OCFS2_AST_ATTACH: | 718 | case OCFS2_AST_ATTACH: |
| 675 | ocfs2_generic_handle_attach_action(lockres); | 719 | ocfs2_generic_handle_attach_action(lockres); |
| 720 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | ||
| 676 | break; | 721 | break; |
| 677 | case OCFS2_AST_CONVERT: | 722 | case OCFS2_AST_CONVERT: |
| 678 | ocfs2_generic_handle_convert_action(lockres); | 723 | ocfs2_generic_handle_convert_action(lockres); |
| @@ -681,80 +726,19 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, | |||
| 681 | ocfs2_generic_handle_downconvert_action(lockres); | 726 | ocfs2_generic_handle_downconvert_action(lockres); |
| 682 | break; | 727 | break; |
| 683 | default: | 728 | default: |
| 729 | mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " | ||
| 730 | "lockres flags = 0x%lx, unlock action: %u\n", | ||
| 731 | lockres->l_name, lockres->l_action, lockres->l_flags, | ||
| 732 | lockres->l_unlock_action); | ||
| 684 | BUG(); | 733 | BUG(); |
| 685 | } | 734 | } |
| 686 | 735 | ||
| 687 | if (ignore_refresh) | ||
| 688 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | ||
| 689 | |||
| 690 | /* set it to something invalid so if we get called again we | 736 | /* set it to something invalid so if we get called again we |
| 691 | * can catch it. */ | 737 | * can catch it. */ |
| 692 | lockres->l_action = OCFS2_AST_INVALID; | 738 | lockres->l_action = OCFS2_AST_INVALID; |
| 693 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 694 | 739 | ||
| 695 | wake_up(&lockres->l_event); | 740 | wake_up(&lockres->l_event); |
| 696 | } | 741 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 697 | |||
| 698 | static void ocfs2_super_ast_func(void *opaque) | ||
| 699 | { | ||
| 700 | struct ocfs2_lock_res *lockres = opaque; | ||
| 701 | |||
| 702 | mlog_entry_void(); | ||
| 703 | mlog(0, "Superblock AST fired\n"); | ||
| 704 | |||
| 705 | BUG_ON(!ocfs2_is_super_lock(lockres)); | ||
| 706 | ocfs2_generic_ast_func(lockres, 0); | ||
| 707 | |||
| 708 | mlog_exit_void(); | ||
| 709 | } | ||
| 710 | |||
| 711 | static void ocfs2_super_bast_func(void *opaque, | ||
| 712 | int level) | ||
| 713 | { | ||
| 714 | struct ocfs2_lock_res *lockres = opaque; | ||
| 715 | struct ocfs2_super *osb; | ||
| 716 | |||
| 717 | mlog_entry_void(); | ||
| 718 | mlog(0, "Superblock BAST fired\n"); | ||
| 719 | |||
| 720 | BUG_ON(!ocfs2_is_super_lock(lockres)); | ||
| 721 | osb = ocfs2_lock_res_super(lockres); | ||
| 722 | ocfs2_generic_bast_func(osb, lockres, level); | ||
| 723 | |||
| 724 | mlog_exit_void(); | ||
| 725 | } | ||
| 726 | |||
| 727 | static void ocfs2_rename_ast_func(void *opaque) | ||
| 728 | { | ||
| 729 | struct ocfs2_lock_res *lockres = opaque; | ||
| 730 | |||
| 731 | mlog_entry_void(); | ||
| 732 | |||
| 733 | mlog(0, "Rename AST fired\n"); | ||
| 734 | |||
| 735 | BUG_ON(!ocfs2_is_rename_lock(lockres)); | ||
| 736 | |||
| 737 | ocfs2_generic_ast_func(lockres, 1); | ||
| 738 | |||
| 739 | mlog_exit_void(); | ||
| 740 | } | ||
| 741 | |||
| 742 | static void ocfs2_rename_bast_func(void *opaque, | ||
| 743 | int level) | ||
| 744 | { | ||
| 745 | struct ocfs2_lock_res *lockres = opaque; | ||
| 746 | struct ocfs2_super *osb; | ||
| 747 | |||
| 748 | mlog_entry_void(); | ||
| 749 | |||
| 750 | mlog(0, "Rename BAST fired\n"); | ||
| 751 | |||
| 752 | BUG_ON(!ocfs2_is_rename_lock(lockres)); | ||
| 753 | |||
| 754 | osb = ocfs2_lock_res_super(lockres); | ||
| 755 | ocfs2_generic_bast_func(osb, lockres, level); | ||
| 756 | |||
| 757 | mlog_exit_void(); | ||
| 758 | } | 742 | } |
| 759 | 743 | ||
| 760 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 744 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
| @@ -810,9 +794,10 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, | |||
| 810 | &lockres->l_lksb, | 794 | &lockres->l_lksb, |
| 811 | dlm_flags, | 795 | dlm_flags, |
| 812 | lockres->l_name, | 796 | lockres->l_name, |
| 813 | lockres->l_ops->ast, | 797 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 798 | ocfs2_locking_ast, | ||
| 814 | lockres, | 799 | lockres, |
| 815 | lockres->l_ops->bast); | 800 | ocfs2_blocking_ast); |
| 816 | if (status != DLM_NORMAL) { | 801 | if (status != DLM_NORMAL) { |
| 817 | ocfs2_log_dlm_error("dlmlock", status, lockres); | 802 | ocfs2_log_dlm_error("dlmlock", status, lockres); |
| 818 | ret = -EINVAL; | 803 | ret = -EINVAL; |
| @@ -930,6 +915,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
| 930 | 915 | ||
| 931 | ocfs2_init_mask_waiter(&mw); | 916 | ocfs2_init_mask_waiter(&mw); |
| 932 | 917 | ||
| 918 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | ||
| 919 | lkm_flags |= LKM_VALBLK; | ||
| 920 | |||
| 933 | again: | 921 | again: |
| 934 | wait = 0; | 922 | wait = 0; |
| 935 | 923 | ||
| @@ -997,11 +985,12 @@ again: | |||
| 997 | status = dlmlock(osb->dlm, | 985 | status = dlmlock(osb->dlm, |
| 998 | level, | 986 | level, |
| 999 | &lockres->l_lksb, | 987 | &lockres->l_lksb, |
| 1000 | lkm_flags|LKM_CONVERT|LKM_VALBLK, | 988 | lkm_flags|LKM_CONVERT, |
| 1001 | lockres->l_name, | 989 | lockres->l_name, |
| 1002 | lockres->l_ops->ast, | 990 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 991 | ocfs2_locking_ast, | ||
| 1003 | lockres, | 992 | lockres, |
| 1004 | lockres->l_ops->bast); | 993 | ocfs2_blocking_ast); |
| 1005 | if (status != DLM_NORMAL) { | 994 | if (status != DLM_NORMAL) { |
| 1006 | if ((lkm_flags & LKM_NOQUEUE) && | 995 | if ((lkm_flags & LKM_NOQUEUE) && |
| 1007 | (status == DLM_NOTQUEUED)) | 996 | (status == DLM_NOTQUEUED)) |
| @@ -1074,18 +1063,21 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
| 1074 | mlog_exit_void(); | 1063 | mlog_exit_void(); |
| 1075 | } | 1064 | } |
| 1076 | 1065 | ||
| 1077 | static int ocfs2_create_new_inode_lock(struct inode *inode, | 1066 | int ocfs2_create_new_lock(struct ocfs2_super *osb, |
| 1078 | struct ocfs2_lock_res *lockres) | 1067 | struct ocfs2_lock_res *lockres, |
| 1068 | int ex, | ||
| 1069 | int local) | ||
| 1079 | { | 1070 | { |
| 1080 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1071 | int level = ex ? LKM_EXMODE : LKM_PRMODE; |
| 1081 | unsigned long flags; | 1072 | unsigned long flags; |
| 1073 | int lkm_flags = local ? LKM_LOCAL : 0; | ||
| 1082 | 1074 | ||
| 1083 | spin_lock_irqsave(&lockres->l_lock, flags); | 1075 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 1084 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 1076 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
| 1085 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); | 1077 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); |
| 1086 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1078 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1087 | 1079 | ||
| 1088 | return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL); | 1080 | return ocfs2_lock_create(osb, lockres, level, lkm_flags); |
| 1089 | } | 1081 | } |
| 1090 | 1082 | ||
| 1091 | /* Grants us an EX lock on the data and metadata resources, skipping | 1083 | /* Grants us an EX lock on the data and metadata resources, skipping |
| @@ -1097,6 +1089,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode, | |||
| 1097 | int ocfs2_create_new_inode_locks(struct inode *inode) | 1089 | int ocfs2_create_new_inode_locks(struct inode *inode) |
| 1098 | { | 1090 | { |
| 1099 | int ret; | 1091 | int ret; |
| 1092 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1100 | 1093 | ||
| 1101 | BUG_ON(!inode); | 1094 | BUG_ON(!inode); |
| 1102 | BUG_ON(!ocfs2_inode_is_new(inode)); | 1095 | BUG_ON(!ocfs2_inode_is_new(inode)); |
| @@ -1113,22 +1106,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
| 1113 | * on a resource which has an invalid one -- we'll set it | 1106 | * on a resource which has an invalid one -- we'll set it |
| 1114 | * valid when we release the EX. */ | 1107 | * valid when we release the EX. */ |
| 1115 | 1108 | ||
| 1116 | ret = ocfs2_create_new_inode_lock(inode, | 1109 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); |
| 1117 | &OCFS2_I(inode)->ip_rw_lockres); | ||
| 1118 | if (ret) { | 1110 | if (ret) { |
| 1119 | mlog_errno(ret); | 1111 | mlog_errno(ret); |
| 1120 | goto bail; | 1112 | goto bail; |
| 1121 | } | 1113 | } |
| 1122 | 1114 | ||
| 1123 | ret = ocfs2_create_new_inode_lock(inode, | 1115 | /* |
| 1124 | &OCFS2_I(inode)->ip_meta_lockres); | 1116 | * We don't want to use LKM_LOCAL on a meta data lock as they |
| 1117 | * don't use a generation in their lock names. | ||
| 1118 | */ | ||
| 1119 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | ||
| 1125 | if (ret) { | 1120 | if (ret) { |
| 1126 | mlog_errno(ret); | 1121 | mlog_errno(ret); |
| 1127 | goto bail; | 1122 | goto bail; |
| 1128 | } | 1123 | } |
| 1129 | 1124 | ||
| 1130 | ret = ocfs2_create_new_inode_lock(inode, | 1125 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); |
| 1131 | &OCFS2_I(inode)->ip_data_lockres); | ||
| 1132 | if (ret) { | 1126 | if (ret) { |
| 1133 | mlog_errno(ret); | 1127 | mlog_errno(ret); |
| 1134 | goto bail; | 1128 | goto bail; |
| @@ -1317,7 +1311,17 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
| 1317 | 1311 | ||
| 1318 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1312 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; |
| 1319 | 1313 | ||
| 1320 | lvb->lvb_version = cpu_to_be32(OCFS2_LVB_VERSION); | 1314 | /* |
| 1315 | * Invalidate the LVB of a deleted inode - this way other | ||
| 1316 | * nodes are forced to go to disk and discover the new inode | ||
| 1317 | * status. | ||
| 1318 | */ | ||
| 1319 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | ||
| 1320 | lvb->lvb_version = 0; | ||
| 1321 | goto out; | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | lvb->lvb_version = OCFS2_LVB_VERSION; | ||
| 1321 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); | 1325 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); |
| 1322 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); | 1326 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); |
| 1323 | lvb->lvb_iuid = cpu_to_be32(inode->i_uid); | 1327 | lvb->lvb_iuid = cpu_to_be32(inode->i_uid); |
| @@ -1331,7 +1335,9 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
| 1331 | lvb->lvb_imtime_packed = | 1335 | lvb->lvb_imtime_packed = |
| 1332 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); | 1336 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); |
| 1333 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); | 1337 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); |
| 1338 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | ||
| 1334 | 1339 | ||
| 1340 | out: | ||
| 1335 | mlog_meta_lvb(0, lockres); | 1341 | mlog_meta_lvb(0, lockres); |
| 1336 | 1342 | ||
| 1337 | mlog_exit_void(); | 1343 | mlog_exit_void(); |
| @@ -1386,11 +1392,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
| 1386 | mlog_exit_void(); | 1392 | mlog_exit_void(); |
| 1387 | } | 1393 | } |
| 1388 | 1394 | ||
| 1389 | static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres) | 1395 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, |
| 1396 | struct ocfs2_lock_res *lockres) | ||
| 1390 | { | 1397 | { |
| 1391 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1398 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; |
| 1392 | 1399 | ||
| 1393 | if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION) | 1400 | if (lvb->lvb_version == OCFS2_LVB_VERSION |
| 1401 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | ||
| 1394 | return 1; | 1402 | return 1; |
| 1395 | return 0; | 1403 | return 0; |
| 1396 | } | 1404 | } |
| @@ -1487,7 +1495,7 @@ static int ocfs2_meta_lock_update(struct inode *inode, | |||
| 1487 | * map (directories, bitmap files, etc) */ | 1495 | * map (directories, bitmap files, etc) */ |
| 1488 | ocfs2_extent_map_trunc(inode, 0); | 1496 | ocfs2_extent_map_trunc(inode, 0); |
| 1489 | 1497 | ||
| 1490 | if (ocfs2_meta_lvb_is_trustable(lockres)) { | 1498 | if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { |
| 1491 | mlog(0, "Trusting LVB on inode %llu\n", | 1499 | mlog(0, "Trusting LVB on inode %llu\n", |
| 1492 | (unsigned long long)oi->ip_blkno); | 1500 | (unsigned long long)oi->ip_blkno); |
| 1493 | ocfs2_refresh_inode_from_lvb(inode); | 1501 | ocfs2_refresh_inode_from_lvb(inode); |
| @@ -1628,6 +1636,18 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
| 1628 | wait_event(osb->recovery_event, | 1636 | wait_event(osb->recovery_event, |
| 1629 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1637 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); |
| 1630 | 1638 | ||
| 1639 | /* | ||
| 1640 | * We only see this flag if we're being called from | ||
| 1641 | * ocfs2_read_locked_inode(). It means we're locking an inode | ||
| 1642 | * which hasn't been populated yet, so clear the refresh flag | ||
| 1643 | * and let the caller handle it. | ||
| 1644 | */ | ||
| 1645 | if (inode->i_state & I_NEW) { | ||
| 1646 | status = 0; | ||
| 1647 | ocfs2_complete_lock_res_refresh(lockres, 0); | ||
| 1648 | goto bail; | ||
| 1649 | } | ||
| 1650 | |||
| 1631 | /* This is fun. The caller may want a bh back, or it may | 1651 | /* This is fun. The caller may want a bh back, or it may |
| 1632 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1652 | * not. ocfs2_meta_lock_update definitely wants one in, but |
| 1633 | * may or may not read one, depending on what's in the | 1653 | * may or may not read one, depending on what's in the |
| @@ -1807,6 +1827,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) | |||
| 1807 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); | 1827 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); |
| 1808 | } | 1828 | } |
| 1809 | 1829 | ||
| 1830 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | ||
| 1831 | { | ||
| 1832 | int ret; | ||
| 1833 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
| 1834 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
| 1835 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | ||
| 1836 | |||
| 1837 | BUG_ON(!dl); | ||
| 1838 | |||
| 1839 | if (ocfs2_is_hard_readonly(osb)) | ||
| 1840 | return -EROFS; | ||
| 1841 | |||
| 1842 | ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); | ||
| 1843 | if (ret < 0) | ||
| 1844 | mlog_errno(ret); | ||
| 1845 | |||
| 1846 | return ret; | ||
| 1847 | } | ||
| 1848 | |||
| 1849 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | ||
| 1850 | { | ||
| 1851 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
| 1852 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
| 1853 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | ||
| 1854 | |||
| 1855 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); | ||
| 1856 | } | ||
| 1857 | |||
| 1810 | /* Reference counting of the dlm debug structure. We want this because | 1858 | /* Reference counting of the dlm debug structure. We want this because |
| 1811 | * open references on the debug inodes can live on after a mount, so | 1859 | * open references on the debug inodes can live on after a mount, so |
| 1812 | * we can't rely on the ocfs2_super to always exist. */ | 1860 | * we can't rely on the ocfs2_super to always exist. */ |
| @@ -1937,9 +1985,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | |||
| 1937 | if (!lockres) | 1985 | if (!lockres) |
| 1938 | return -EINVAL; | 1986 | return -EINVAL; |
| 1939 | 1987 | ||
| 1940 | seq_printf(m, "0x%x\t" | 1988 | seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); |
| 1941 | "%.*s\t" | 1989 | |
| 1942 | "%d\t" | 1990 | if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) |
| 1991 | seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, | ||
| 1992 | lockres->l_name, | ||
| 1993 | (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); | ||
| 1994 | else | ||
| 1995 | seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); | ||
| 1996 | |||
| 1997 | seq_printf(m, "%d\t" | ||
| 1943 | "0x%lx\t" | 1998 | "0x%lx\t" |
| 1944 | "0x%x\t" | 1999 | "0x%x\t" |
| 1945 | "0x%x\t" | 2000 | "0x%x\t" |
| @@ -1947,8 +2002,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | |||
| 1947 | "%u\t" | 2002 | "%u\t" |
| 1948 | "%d\t" | 2003 | "%d\t" |
| 1949 | "%d\t", | 2004 | "%d\t", |
| 1950 | OCFS2_DLM_DEBUG_STR_VERSION, | ||
| 1951 | OCFS2_LOCK_ID_MAX_LEN, lockres->l_name, | ||
| 1952 | lockres->l_level, | 2005 | lockres->l_level, |
| 1953 | lockres->l_flags, | 2006 | lockres->l_flags, |
| 1954 | lockres->l_action, | 2007 | lockres->l_action, |
| @@ -2138,7 +2191,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
| 2138 | mlog_exit_void(); | 2191 | mlog_exit_void(); |
| 2139 | } | 2192 | } |
| 2140 | 2193 | ||
| 2141 | static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status) | 2194 | static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) |
| 2142 | { | 2195 | { |
| 2143 | struct ocfs2_lock_res *lockres = opaque; | 2196 | struct ocfs2_lock_res *lockres = opaque; |
| 2144 | unsigned long flags; | 2197 | unsigned long flags; |
| @@ -2194,24 +2247,20 @@ complete_unlock: | |||
| 2194 | mlog_exit_void(); | 2247 | mlog_exit_void(); |
| 2195 | } | 2248 | } |
| 2196 | 2249 | ||
| 2197 | typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *); | ||
| 2198 | |||
| 2199 | struct drop_lock_cb { | ||
| 2200 | ocfs2_pre_drop_cb_t *drop_func; | ||
| 2201 | void *drop_data; | ||
| 2202 | }; | ||
| 2203 | |||
| 2204 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 2250 | static int ocfs2_drop_lock(struct ocfs2_super *osb, |
| 2205 | struct ocfs2_lock_res *lockres, | 2251 | struct ocfs2_lock_res *lockres) |
| 2206 | struct drop_lock_cb *dcb) | ||
| 2207 | { | 2252 | { |
| 2208 | enum dlm_status status; | 2253 | enum dlm_status status; |
| 2209 | unsigned long flags; | 2254 | unsigned long flags; |
| 2255 | int lkm_flags = 0; | ||
| 2210 | 2256 | ||
| 2211 | /* We didn't get anywhere near actually using this lockres. */ | 2257 | /* We didn't get anywhere near actually using this lockres. */ |
| 2212 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 2258 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) |
| 2213 | goto out; | 2259 | goto out; |
| 2214 | 2260 | ||
| 2261 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | ||
| 2262 | lkm_flags |= LKM_VALBLK; | ||
| 2263 | |||
| 2215 | spin_lock_irqsave(&lockres->l_lock, flags); | 2264 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2216 | 2265 | ||
| 2217 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), | 2266 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), |
| @@ -2234,8 +2283,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
| 2234 | spin_lock_irqsave(&lockres->l_lock, flags); | 2283 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2235 | } | 2284 | } |
| 2236 | 2285 | ||
| 2237 | if (dcb) | 2286 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
| 2238 | dcb->drop_func(lockres, dcb->drop_data); | 2287 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && |
| 2288 | lockres->l_level == LKM_EXMODE && | ||
| 2289 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | ||
| 2290 | lockres->l_ops->set_lvb(lockres); | ||
| 2291 | } | ||
| 2239 | 2292 | ||
| 2240 | if (lockres->l_flags & OCFS2_LOCK_BUSY) | 2293 | if (lockres->l_flags & OCFS2_LOCK_BUSY) |
| 2241 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", | 2294 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", |
| @@ -2261,8 +2314,8 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
| 2261 | 2314 | ||
| 2262 | mlog(0, "lock %s\n", lockres->l_name); | 2315 | mlog(0, "lock %s\n", lockres->l_name); |
| 2263 | 2316 | ||
| 2264 | status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK, | 2317 | status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, |
| 2265 | lockres->l_ops->unlock_ast, lockres); | 2318 | ocfs2_unlock_ast, lockres); |
| 2266 | if (status != DLM_NORMAL) { | 2319 | if (status != DLM_NORMAL) { |
| 2267 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 2320 | ocfs2_log_dlm_error("dlmunlock", status, lockres); |
| 2268 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 2321 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); |
| @@ -2309,43 +2362,26 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | |||
| 2309 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2362 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2310 | } | 2363 | } |
| 2311 | 2364 | ||
| 2312 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) | 2365 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
| 2366 | struct ocfs2_lock_res *lockres) | ||
| 2313 | { | 2367 | { |
| 2314 | int status; | 2368 | int ret; |
| 2315 | |||
| 2316 | mlog_entry_void(); | ||
| 2317 | |||
| 2318 | ocfs2_mark_lockres_freeing(&osb->osb_super_lockres); | ||
| 2319 | |||
| 2320 | status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL); | ||
| 2321 | if (status < 0) | ||
| 2322 | mlog_errno(status); | ||
| 2323 | |||
| 2324 | ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres); | ||
| 2325 | |||
| 2326 | status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL); | ||
| 2327 | if (status < 0) | ||
| 2328 | mlog_errno(status); | ||
| 2329 | 2369 | ||
| 2330 | mlog_exit(status); | 2370 | ocfs2_mark_lockres_freeing(lockres); |
| 2371 | ret = ocfs2_drop_lock(osb, lockres); | ||
| 2372 | if (ret) | ||
| 2373 | mlog_errno(ret); | ||
| 2331 | } | 2374 | } |
| 2332 | 2375 | ||
| 2333 | static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data) | 2376 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) |
| 2334 | { | 2377 | { |
| 2335 | struct inode *inode = data; | 2378 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); |
| 2336 | 2379 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); | |
| 2337 | /* the metadata lock requires a bit more work as we have an | ||
| 2338 | * LVB to worry about. */ | ||
| 2339 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | ||
| 2340 | lockres->l_level == LKM_EXMODE && | ||
| 2341 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | ||
| 2342 | __ocfs2_stuff_meta_lvb(inode); | ||
| 2343 | } | 2380 | } |
| 2344 | 2381 | ||
| 2345 | int ocfs2_drop_inode_locks(struct inode *inode) | 2382 | int ocfs2_drop_inode_locks(struct inode *inode) |
| 2346 | { | 2383 | { |
| 2347 | int status, err; | 2384 | int status, err; |
| 2348 | struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, }; | ||
| 2349 | 2385 | ||
| 2350 | mlog_entry_void(); | 2386 | mlog_entry_void(); |
| 2351 | 2387 | ||
| @@ -2353,24 +2389,21 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
| 2353 | * ocfs2_clear_inode has done it for us. */ | 2389 | * ocfs2_clear_inode has done it for us. */ |
| 2354 | 2390 | ||
| 2355 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2391 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
| 2356 | &OCFS2_I(inode)->ip_data_lockres, | 2392 | &OCFS2_I(inode)->ip_data_lockres); |
| 2357 | NULL); | ||
| 2358 | if (err < 0) | 2393 | if (err < 0) |
| 2359 | mlog_errno(err); | 2394 | mlog_errno(err); |
| 2360 | 2395 | ||
| 2361 | status = err; | 2396 | status = err; |
| 2362 | 2397 | ||
| 2363 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2398 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
| 2364 | &OCFS2_I(inode)->ip_meta_lockres, | 2399 | &OCFS2_I(inode)->ip_meta_lockres); |
| 2365 | &meta_dcb); | ||
| 2366 | if (err < 0) | 2400 | if (err < 0) |
| 2367 | mlog_errno(err); | 2401 | mlog_errno(err); |
| 2368 | if (err < 0 && !status) | 2402 | if (err < 0 && !status) |
| 2369 | status = err; | 2403 | status = err; |
| 2370 | 2404 | ||
| 2371 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2405 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
| 2372 | &OCFS2_I(inode)->ip_rw_lockres, | 2406 | &OCFS2_I(inode)->ip_rw_lockres); |
| 2373 | NULL); | ||
| 2374 | if (err < 0) | 2407 | if (err < 0) |
| 2375 | mlog_errno(err); | 2408 | mlog_errno(err); |
| 2376 | if (err < 0 && !status) | 2409 | if (err < 0 && !status) |
| @@ -2419,9 +2452,10 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | |||
| 2419 | &lockres->l_lksb, | 2452 | &lockres->l_lksb, |
| 2420 | dlm_flags, | 2453 | dlm_flags, |
| 2421 | lockres->l_name, | 2454 | lockres->l_name, |
| 2422 | lockres->l_ops->ast, | 2455 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 2456 | ocfs2_locking_ast, | ||
| 2423 | lockres, | 2457 | lockres, |
| 2424 | lockres->l_ops->bast); | 2458 | ocfs2_blocking_ast); |
| 2425 | if (status != DLM_NORMAL) { | 2459 | if (status != DLM_NORMAL) { |
| 2426 | ocfs2_log_dlm_error("dlmlock", status, lockres); | 2460 | ocfs2_log_dlm_error("dlmlock", status, lockres); |
| 2427 | ret = -EINVAL; | 2461 | ret = -EINVAL; |
| @@ -2480,7 +2514,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
| 2480 | status = dlmunlock(osb->dlm, | 2514 | status = dlmunlock(osb->dlm, |
| 2481 | &lockres->l_lksb, | 2515 | &lockres->l_lksb, |
| 2482 | LKM_CANCEL, | 2516 | LKM_CANCEL, |
| 2483 | lockres->l_ops->unlock_ast, | 2517 | ocfs2_unlock_ast, |
| 2484 | lockres); | 2518 | lockres); |
| 2485 | if (status != DLM_NORMAL) { | 2519 | if (status != DLM_NORMAL) { |
| 2486 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 2520 | ocfs2_log_dlm_error("dlmunlock", status, lockres); |
| @@ -2494,115 +2528,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
| 2494 | return ret; | 2528 | return ret; |
| 2495 | } | 2529 | } |
| 2496 | 2530 | ||
| 2497 | static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, | 2531 | static int ocfs2_unblock_lock(struct ocfs2_super *osb, |
| 2498 | struct ocfs2_lock_res *lockres, | 2532 | struct ocfs2_lock_res *lockres, |
| 2499 | int new_level) | 2533 | struct ocfs2_unblock_ctl *ctl) |
| 2500 | { | ||
| 2501 | int ret; | ||
| 2502 | |||
| 2503 | mlog_entry_void(); | ||
| 2504 | |||
| 2505 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); | ||
| 2506 | |||
| 2507 | if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { | ||
| 2508 | ret = 0; | ||
| 2509 | mlog(0, "lockres %s currently being refreshed -- backing " | ||
| 2510 | "off!\n", lockres->l_name); | ||
| 2511 | } else if (new_level == LKM_PRMODE) | ||
| 2512 | ret = !lockres->l_ex_holders && | ||
| 2513 | ocfs2_inode_fully_checkpointed(inode); | ||
| 2514 | else /* Must be NLMODE we're converting to. */ | ||
| 2515 | ret = !lockres->l_ro_holders && !lockres->l_ex_holders && | ||
| 2516 | ocfs2_inode_fully_checkpointed(inode); | ||
| 2517 | |||
| 2518 | mlog_exit(ret); | ||
| 2519 | return ret; | ||
| 2520 | } | ||
| 2521 | |||
| 2522 | static int ocfs2_do_unblock_meta(struct inode *inode, | ||
| 2523 | int *requeue) | ||
| 2524 | { | ||
| 2525 | int new_level; | ||
| 2526 | int set_lvb = 0; | ||
| 2527 | int ret = 0; | ||
| 2528 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | ||
| 2529 | unsigned long flags; | ||
| 2530 | |||
| 2531 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 2532 | |||
| 2533 | mlog_entry_void(); | ||
| 2534 | |||
| 2535 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 2536 | |||
| 2537 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | ||
| 2538 | |||
| 2539 | mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level, | ||
| 2540 | lockres->l_blocking); | ||
| 2541 | |||
| 2542 | BUG_ON(lockres->l_level != LKM_EXMODE && | ||
| 2543 | lockres->l_level != LKM_PRMODE); | ||
| 2544 | |||
| 2545 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
| 2546 | *requeue = 1; | ||
| 2547 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
| 2548 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 2549 | if (ret) { | ||
| 2550 | ret = ocfs2_cancel_convert(osb, lockres); | ||
| 2551 | if (ret < 0) | ||
| 2552 | mlog_errno(ret); | ||
| 2553 | } | ||
| 2554 | goto leave; | ||
| 2555 | } | ||
| 2556 | |||
| 2557 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | ||
| 2558 | |||
| 2559 | mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n", | ||
| 2560 | lockres->l_level, lockres->l_blocking, new_level); | ||
| 2561 | |||
| 2562 | if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) { | ||
| 2563 | if (lockres->l_level == LKM_EXMODE) | ||
| 2564 | set_lvb = 1; | ||
| 2565 | |||
| 2566 | /* If the lock hasn't been refreshed yet (rare), then | ||
| 2567 | * our memory inode values are old and we skip | ||
| 2568 | * stuffing the lvb. There's no need to actually clear | ||
| 2569 | * out the lvb here as it's value is still valid. */ | ||
| 2570 | if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { | ||
| 2571 | if (set_lvb) | ||
| 2572 | __ocfs2_stuff_meta_lvb(inode); | ||
| 2573 | } else | ||
| 2574 | mlog(0, "lockres %s: downconverting stale lock!\n", | ||
| 2575 | lockres->l_name); | ||
| 2576 | |||
| 2577 | mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, " | ||
| 2578 | "l_blocking=%d, new_level=%d\n", | ||
| 2579 | lockres->l_level, lockres->l_blocking, new_level); | ||
| 2580 | |||
| 2581 | ocfs2_prepare_downconvert(lockres, new_level); | ||
| 2582 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 2583 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); | ||
| 2584 | goto leave; | ||
| 2585 | } | ||
| 2586 | if (!ocfs2_inode_fully_checkpointed(inode)) | ||
| 2587 | ocfs2_start_checkpoint(osb); | ||
| 2588 | |||
| 2589 | *requeue = 1; | ||
| 2590 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 2591 | ret = 0; | ||
| 2592 | leave: | ||
| 2593 | mlog_exit(ret); | ||
| 2594 | return ret; | ||
| 2595 | } | ||
| 2596 | |||
| 2597 | static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, | ||
| 2598 | struct ocfs2_lock_res *lockres, | ||
| 2599 | int *requeue, | ||
| 2600 | ocfs2_convert_worker_t *worker) | ||
| 2601 | { | 2534 | { |
| 2602 | unsigned long flags; | 2535 | unsigned long flags; |
| 2603 | int blocking; | 2536 | int blocking; |
| 2604 | int new_level; | 2537 | int new_level; |
| 2605 | int ret = 0; | 2538 | int ret = 0; |
| 2539 | int set_lvb = 0; | ||
| 2606 | 2540 | ||
| 2607 | mlog_entry_void(); | 2541 | mlog_entry_void(); |
| 2608 | 2542 | ||
| @@ -2612,7 +2546,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, | |||
| 2612 | 2546 | ||
| 2613 | recheck: | 2547 | recheck: |
| 2614 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 2548 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
| 2615 | *requeue = 1; | 2549 | ctl->requeue = 1; |
| 2616 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 2550 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
| 2617 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2551 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2618 | if (ret) { | 2552 | if (ret) { |
| @@ -2626,27 +2560,33 @@ recheck: | |||
| 2626 | /* if we're blocking an exclusive and we have *any* holders, | 2560 | /* if we're blocking an exclusive and we have *any* holders, |
| 2627 | * then requeue. */ | 2561 | * then requeue. */ |
| 2628 | if ((lockres->l_blocking == LKM_EXMODE) | 2562 | if ((lockres->l_blocking == LKM_EXMODE) |
| 2629 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | 2563 | && (lockres->l_ex_holders || lockres->l_ro_holders)) |
| 2630 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2564 | goto leave_requeue; |
| 2631 | *requeue = 1; | ||
| 2632 | ret = 0; | ||
| 2633 | goto leave; | ||
| 2634 | } | ||
| 2635 | 2565 | ||
| 2636 | /* If it's a PR we're blocking, then only | 2566 | /* If it's a PR we're blocking, then only |
| 2637 | * requeue if we've got any EX holders */ | 2567 | * requeue if we've got any EX holders */ |
| 2638 | if (lockres->l_blocking == LKM_PRMODE && | 2568 | if (lockres->l_blocking == LKM_PRMODE && |
| 2639 | lockres->l_ex_holders) { | 2569 | lockres->l_ex_holders) |
| 2640 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2570 | goto leave_requeue; |
| 2641 | *requeue = 1; | 2571 | |
| 2642 | ret = 0; | 2572 | /* |
| 2643 | goto leave; | 2573 | * Can we get a lock in this state if the holder counts are |
| 2644 | } | 2574 | * zero? The meta data unblock code used to check this. |
| 2575 | */ | ||
| 2576 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | ||
| 2577 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) | ||
| 2578 | goto leave_requeue; | ||
| 2579 | |||
| 2580 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | ||
| 2581 | |||
| 2582 | if (lockres->l_ops->check_downconvert | ||
| 2583 | && !lockres->l_ops->check_downconvert(lockres, new_level)) | ||
| 2584 | goto leave_requeue; | ||
| 2645 | 2585 | ||
| 2646 | /* If we get here, then we know that there are no more | 2586 | /* If we get here, then we know that there are no more |
| 2647 | * incompatible holders (and anyone asking for an incompatible | 2587 | * incompatible holders (and anyone asking for an incompatible |
| 2648 | * lock is blocked). We can now downconvert the lock */ | 2588 | * lock is blocked). We can now downconvert the lock */ |
| 2649 | if (!worker) | 2589 | if (!lockres->l_ops->downconvert_worker) |
| 2650 | goto downconvert; | 2590 | goto downconvert; |
| 2651 | 2591 | ||
| 2652 | /* Some lockres types want to do a bit of work before | 2592 | /* Some lockres types want to do a bit of work before |
| @@ -2656,7 +2596,10 @@ recheck: | |||
| 2656 | blocking = lockres->l_blocking; | 2596 | blocking = lockres->l_blocking; |
| 2657 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2597 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2658 | 2598 | ||
| 2659 | worker(lockres, blocking); | 2599 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); |
| 2600 | |||
| 2601 | if (ctl->unblock_action == UNBLOCK_STOP_POST) | ||
| 2602 | goto leave; | ||
| 2660 | 2603 | ||
| 2661 | spin_lock_irqsave(&lockres->l_lock, flags); | 2604 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2662 | if (blocking != lockres->l_blocking) { | 2605 | if (blocking != lockres->l_blocking) { |
| @@ -2666,25 +2609,43 @@ recheck: | |||
| 2666 | } | 2609 | } |
| 2667 | 2610 | ||
| 2668 | downconvert: | 2611 | downconvert: |
| 2669 | *requeue = 0; | 2612 | ctl->requeue = 0; |
| 2670 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 2613 | |
| 2614 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | ||
| 2615 | if (lockres->l_level == LKM_EXMODE) | ||
| 2616 | set_lvb = 1; | ||
| 2617 | |||
| 2618 | /* | ||
| 2619 | * We only set the lvb if the lock has been fully | ||
| 2620 | * refreshed - otherwise we risk setting stale | ||
| 2621 | * data. Otherwise, there's no need to actually clear | ||
| 2622 | * out the lvb here as it's value is still valid. | ||
| 2623 | */ | ||
| 2624 | if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | ||
| 2625 | lockres->l_ops->set_lvb(lockres); | ||
| 2626 | } | ||
| 2671 | 2627 | ||
| 2672 | ocfs2_prepare_downconvert(lockres, new_level); | 2628 | ocfs2_prepare_downconvert(lockres, new_level); |
| 2673 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2629 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2674 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0); | 2630 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); |
| 2675 | leave: | 2631 | leave: |
| 2676 | mlog_exit(ret); | 2632 | mlog_exit(ret); |
| 2677 | return ret; | 2633 | return ret; |
| 2634 | |||
| 2635 | leave_requeue: | ||
| 2636 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 2637 | ctl->requeue = 1; | ||
| 2638 | |||
| 2639 | mlog_exit(0); | ||
| 2640 | return 0; | ||
| 2678 | } | 2641 | } |
| 2679 | 2642 | ||
| 2680 | static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 2643 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, |
| 2681 | int blocking) | 2644 | int blocking) |
| 2682 | { | 2645 | { |
| 2683 | struct inode *inode; | 2646 | struct inode *inode; |
| 2684 | struct address_space *mapping; | 2647 | struct address_space *mapping; |
| 2685 | 2648 | ||
| 2686 | mlog_entry_void(); | ||
| 2687 | |||
| 2688 | inode = ocfs2_lock_res_inode(lockres); | 2649 | inode = ocfs2_lock_res_inode(lockres); |
| 2689 | mapping = inode->i_mapping; | 2650 | mapping = inode->i_mapping; |
| 2690 | 2651 | ||
| @@ -2705,116 +2666,159 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 2705 | filemap_fdatawait(mapping); | 2666 | filemap_fdatawait(mapping); |
| 2706 | } | 2667 | } |
| 2707 | 2668 | ||
| 2708 | mlog_exit_void(); | 2669 | return UNBLOCK_CONTINUE; |
| 2709 | } | 2670 | } |
| 2710 | 2671 | ||
| 2711 | int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, | 2672 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, |
| 2712 | int *requeue) | 2673 | int new_level) |
| 2713 | { | 2674 | { |
| 2714 | int status; | 2675 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
| 2715 | struct inode *inode; | 2676 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); |
| 2716 | struct ocfs2_super *osb; | ||
| 2717 | |||
| 2718 | mlog_entry_void(); | ||
| 2719 | |||
| 2720 | inode = ocfs2_lock_res_inode(lockres); | ||
| 2721 | osb = OCFS2_SB(inode->i_sb); | ||
| 2722 | |||
| 2723 | mlog(0, "unblock inode %llu\n", | ||
| 2724 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 2725 | 2677 | ||
| 2726 | status = ocfs2_generic_unblock_lock(osb, | 2678 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); |
| 2727 | lockres, | 2679 | BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); |
| 2728 | requeue, | ||
| 2729 | ocfs2_data_convert_worker); | ||
| 2730 | if (status < 0) | ||
| 2731 | mlog_errno(status); | ||
| 2732 | 2680 | ||
| 2733 | mlog(0, "inode %llu, requeue = %d\n", | 2681 | if (checkpointed) |
| 2734 | (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); | 2682 | return 1; |
| 2735 | 2683 | ||
| 2736 | mlog_exit(status); | 2684 | ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); |
| 2737 | return status; | 2685 | return 0; |
| 2738 | } | 2686 | } |
| 2739 | 2687 | ||
| 2740 | static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, | 2688 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) |
| 2741 | int *requeue) | ||
| 2742 | { | 2689 | { |
| 2743 | int status; | 2690 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
| 2744 | struct inode *inode; | ||
| 2745 | |||
| 2746 | mlog_entry_void(); | ||
| 2747 | |||
| 2748 | mlog(0, "Unblock lockres %s\n", lockres->l_name); | ||
| 2749 | |||
| 2750 | inode = ocfs2_lock_res_inode(lockres); | ||
| 2751 | 2691 | ||
| 2752 | status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb), | 2692 | __ocfs2_stuff_meta_lvb(inode); |
| 2753 | lockres, | ||
| 2754 | requeue, | ||
| 2755 | NULL); | ||
| 2756 | if (status < 0) | ||
| 2757 | mlog_errno(status); | ||
| 2758 | |||
| 2759 | mlog_exit(status); | ||
| 2760 | return status; | ||
| 2761 | } | 2693 | } |
| 2762 | 2694 | ||
| 2763 | 2695 | /* | |
| 2764 | int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, | 2696 | * Does the final reference drop on our dentry lock. Right now this |
| 2765 | int *requeue) | 2697 | * happens in the vote thread, but we could choose to simplify the |
| 2698 | * dlmglue API and push these off to the ocfs2_wq in the future. | ||
| 2699 | */ | ||
| 2700 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | ||
| 2701 | struct ocfs2_lock_res *lockres) | ||
| 2766 | { | 2702 | { |
| 2767 | int status; | 2703 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); |
| 2768 | struct inode *inode; | 2704 | ocfs2_dentry_lock_put(osb, dl); |
| 2769 | 2705 | } | |
| 2770 | mlog_entry_void(); | ||
| 2771 | 2706 | ||
| 2772 | inode = ocfs2_lock_res_inode(lockres); | 2707 | /* |
| 2708 | * d_delete() matching dentries before the lock downconvert. | ||
| 2709 | * | ||
| 2710 | * At this point, any process waiting to destroy the | ||
| 2711 | * dentry_lock due to last ref count is stopped by the | ||
| 2712 | * OCFS2_LOCK_QUEUED flag. | ||
| 2713 | * | ||
| 2714 | * We have two potential problems | ||
| 2715 | * | ||
| 2716 | * 1) If we do the last reference drop on our dentry_lock (via dput) | ||
| 2717 | * we'll wind up in ocfs2_release_dentry_lock(), waiting on | ||
| 2718 | * the downconvert to finish. Instead we take an elevated | ||
| 2719 | * reference and push the drop until after we've completed our | ||
| 2720 | * unblock processing. | ||
| 2721 | * | ||
| 2722 | * 2) There might be another process with a final reference, | ||
| 2723 | * waiting on us to finish processing. If this is the case, we | ||
| 2724 | * detect it and exit out - there's no more dentries anyway. | ||
| 2725 | */ | ||
| 2726 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | ||
| 2727 | int blocking) | ||
| 2728 | { | ||
| 2729 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | ||
| 2730 | struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); | ||
| 2731 | struct dentry *dentry; | ||
| 2732 | unsigned long flags; | ||
| 2733 | int extra_ref = 0; | ||
| 2773 | 2734 | ||
| 2774 | mlog(0, "unblock inode %llu\n", | 2735 | /* |
| 2775 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 2736 | * This node is blocking another node from getting a read |
| 2737 | * lock. This happens when we've renamed within a | ||
| 2738 | * directory. We've forced the other nodes to d_delete(), but | ||
| 2739 | * we never actually dropped our lock because it's still | ||
| 2740 | * valid. The downconvert code will retain a PR for this node, | ||
| 2741 | * so there's no further work to do. | ||
| 2742 | */ | ||
| 2743 | if (blocking == LKM_PRMODE) | ||
| 2744 | return UNBLOCK_CONTINUE; | ||
| 2776 | 2745 | ||
| 2777 | status = ocfs2_do_unblock_meta(inode, requeue); | 2746 | /* |
| 2778 | if (status < 0) | 2747 | * Mark this inode as potentially orphaned. The code in |
| 2779 | mlog_errno(status); | 2748 | * ocfs2_delete_inode() will figure out whether it actually |
| 2749 | * needs to be freed or not. | ||
| 2750 | */ | ||
| 2751 | spin_lock(&oi->ip_lock); | ||
| 2752 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | ||
| 2753 | spin_unlock(&oi->ip_lock); | ||
| 2780 | 2754 | ||
| 2781 | mlog(0, "inode %llu, requeue = %d\n", | 2755 | /* |
| 2782 | (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); | 2756 | * Yuck. We need to make sure however that the check of |
| 2757 | * OCFS2_LOCK_FREEING and the extra reference are atomic with | ||
| 2758 | * respect to a reference decrement or the setting of that | ||
| 2759 | * flag. | ||
| 2760 | */ | ||
| 2761 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 2762 | spin_lock(&dentry_attach_lock); | ||
| 2763 | if (!(lockres->l_flags & OCFS2_LOCK_FREEING) | ||
| 2764 | && dl->dl_count) { | ||
| 2765 | dl->dl_count++; | ||
| 2766 | extra_ref = 1; | ||
| 2767 | } | ||
| 2768 | spin_unlock(&dentry_attach_lock); | ||
| 2769 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 2783 | 2770 | ||
| 2784 | mlog_exit(status); | 2771 | mlog(0, "extra_ref = %d\n", extra_ref); |
| 2785 | return status; | ||
| 2786 | } | ||
| 2787 | 2772 | ||
| 2788 | /* Generic unblock function for any lockres whose private data is an | 2773 | /* |
| 2789 | * ocfs2_super pointer. */ | 2774 | * We have a process waiting on us in ocfs2_dentry_iput(), |
| 2790 | static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, | 2775 | * which means we can't have any more outstanding |
| 2791 | int *requeue) | 2776 | * aliases. There's no need to do any more work. |
| 2792 | { | 2777 | */ |
| 2793 | int status; | 2778 | if (!extra_ref) |
| 2794 | struct ocfs2_super *osb; | 2779 | return UNBLOCK_CONTINUE; |
| 2780 | |||
| 2781 | spin_lock(&dentry_attach_lock); | ||
| 2782 | while (1) { | ||
| 2783 | dentry = ocfs2_find_local_alias(dl->dl_inode, | ||
| 2784 | dl->dl_parent_blkno, 1); | ||
| 2785 | if (!dentry) | ||
| 2786 | break; | ||
| 2787 | spin_unlock(&dentry_attach_lock); | ||
| 2795 | 2788 | ||
| 2796 | mlog_entry_void(); | 2789 | mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, |
| 2790 | dentry->d_name.name); | ||
| 2797 | 2791 | ||
| 2798 | mlog(0, "Unblock lockres %s\n", lockres->l_name); | 2792 | /* |
| 2793 | * The following dcache calls may do an | ||
| 2794 | * iput(). Normally we don't want that from the | ||
| 2795 | * downconverting thread, but in this case it's ok | ||
| 2796 | * because the requesting node already has an | ||
| 2797 | * exclusive lock on the inode, so it can't be queued | ||
| 2798 | * for a downconvert. | ||
| 2799 | */ | ||
| 2800 | d_delete(dentry); | ||
| 2801 | dput(dentry); | ||
| 2799 | 2802 | ||
| 2800 | osb = ocfs2_lock_res_super(lockres); | 2803 | spin_lock(&dentry_attach_lock); |
| 2804 | } | ||
| 2805 | spin_unlock(&dentry_attach_lock); | ||
| 2801 | 2806 | ||
| 2802 | status = ocfs2_generic_unblock_lock(osb, | 2807 | /* |
| 2803 | lockres, | 2808 | * If we are the last holder of this dentry lock, there is no |
| 2804 | requeue, | 2809 | * reason to downconvert so skip straight to the unlock. |
| 2805 | NULL); | 2810 | */ |
| 2806 | if (status < 0) | 2811 | if (dl->dl_count == 1) |
| 2807 | mlog_errno(status); | 2812 | return UNBLOCK_STOP_POST; |
| 2808 | 2813 | ||
| 2809 | mlog_exit(status); | 2814 | return UNBLOCK_CONTINUE_POST; |
| 2810 | return status; | ||
| 2811 | } | 2815 | } |
| 2812 | 2816 | ||
| 2813 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 2817 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
| 2814 | struct ocfs2_lock_res *lockres) | 2818 | struct ocfs2_lock_res *lockres) |
| 2815 | { | 2819 | { |
| 2816 | int status; | 2820 | int status; |
| 2817 | int requeue = 0; | 2821 | struct ocfs2_unblock_ctl ctl = {0, 0,}; |
| 2818 | unsigned long flags; | 2822 | unsigned long flags; |
| 2819 | 2823 | ||
| 2820 | /* Our reference to the lockres in this function can be | 2824 | /* Our reference to the lockres in this function can be |
| @@ -2825,7 +2829,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
| 2825 | 2829 | ||
| 2826 | BUG_ON(!lockres); | 2830 | BUG_ON(!lockres); |
| 2827 | BUG_ON(!lockres->l_ops); | 2831 | BUG_ON(!lockres->l_ops); |
| 2828 | BUG_ON(!lockres->l_ops->unblock); | ||
| 2829 | 2832 | ||
| 2830 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 2833 | mlog(0, "lockres %s blocked.\n", lockres->l_name); |
| 2831 | 2834 | ||
| @@ -2839,21 +2842,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
| 2839 | goto unqueue; | 2842 | goto unqueue; |
| 2840 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2843 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2841 | 2844 | ||
| 2842 | status = lockres->l_ops->unblock(lockres, &requeue); | 2845 | status = ocfs2_unblock_lock(osb, lockres, &ctl); |
| 2843 | if (status < 0) | 2846 | if (status < 0) |
| 2844 | mlog_errno(status); | 2847 | mlog_errno(status); |
| 2845 | 2848 | ||
| 2846 | spin_lock_irqsave(&lockres->l_lock, flags); | 2849 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2847 | unqueue: | 2850 | unqueue: |
| 2848 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) { | 2851 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { |
| 2849 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); | 2852 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); |
| 2850 | } else | 2853 | } else |
| 2851 | ocfs2_schedule_blocked_lock(osb, lockres); | 2854 | ocfs2_schedule_blocked_lock(osb, lockres); |
| 2852 | 2855 | ||
| 2853 | mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, | 2856 | mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, |
| 2854 | requeue ? "yes" : "no"); | 2857 | ctl.requeue ? "yes" : "no"); |
| 2855 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2858 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2856 | 2859 | ||
| 2860 | if (ctl.unblock_action != UNBLOCK_CONTINUE | ||
| 2861 | && lockres->l_ops->post_unlock) | ||
| 2862 | lockres->l_ops->post_unlock(osb, lockres); | ||
| 2863 | |||
| 2857 | mlog_exit_void(); | 2864 | mlog_exit_void(); |
| 2858 | } | 2865 | } |
| 2859 | 2866 | ||
| @@ -2896,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level, | |||
| 2896 | 2903 | ||
| 2897 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 2904 | mlog(level, "LVB information for %s (called from %s:%u):\n", |
| 2898 | lockres->l_name, function, line); | 2905 | lockres->l_name, function, line); |
| 2899 | mlog(level, "version: %u, clusters: %u\n", | 2906 | mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", |
| 2900 | be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters)); | 2907 | lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), |
| 2908 | be32_to_cpu(lvb->lvb_igeneration)); | ||
| 2901 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", | 2909 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", |
| 2902 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), | 2910 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), |
| 2903 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), | 2911 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 243ae862ece..4a276938722 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
| @@ -27,10 +27,14 @@ | |||
| 27 | #ifndef DLMGLUE_H | 27 | #ifndef DLMGLUE_H |
| 28 | #define DLMGLUE_H | 28 | #define DLMGLUE_H |
| 29 | 29 | ||
| 30 | #define OCFS2_LVB_VERSION 3 | 30 | #include "dcache.h" |
| 31 | |||
| 32 | #define OCFS2_LVB_VERSION 4 | ||
| 31 | 33 | ||
| 32 | struct ocfs2_meta_lvb { | 34 | struct ocfs2_meta_lvb { |
| 33 | __be32 lvb_version; | 35 | __u8 lvb_version; |
| 36 | __u8 lvb_reserved0; | ||
| 37 | __be16 lvb_reserved1; | ||
| 34 | __be32 lvb_iclusters; | 38 | __be32 lvb_iclusters; |
| 35 | __be32 lvb_iuid; | 39 | __be32 lvb_iuid; |
| 36 | __be32 lvb_igid; | 40 | __be32 lvb_igid; |
| @@ -41,7 +45,8 @@ struct ocfs2_meta_lvb { | |||
| 41 | __be16 lvb_imode; | 45 | __be16 lvb_imode; |
| 42 | __be16 lvb_inlink; | 46 | __be16 lvb_inlink; |
| 43 | __be32 lvb_iattr; | 47 | __be32 lvb_iattr; |
| 44 | __be32 lvb_reserved[2]; | 48 | __be32 lvb_igeneration; |
| 49 | __be32 lvb_reserved2; | ||
| 45 | }; | 50 | }; |
| 46 | 51 | ||
| 47 | /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ | 52 | /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ |
| @@ -57,9 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb); | |||
| 57 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); | 62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); |
| 58 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
| 59 | enum ocfs2_lock_type type, | 64 | enum ocfs2_lock_type type, |
| 65 | unsigned int generation, | ||
| 60 | struct inode *inode); | 66 | struct inode *inode); |
| 67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | ||
| 68 | u64 parent, struct inode *inode); | ||
| 61 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); | 69 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); |
| 62 | int ocfs2_create_new_inode_locks(struct inode *inode); | 70 | int ocfs2_create_new_inode_locks(struct inode *inode); |
| 71 | int ocfs2_create_new_lock(struct ocfs2_super *osb, | ||
| 72 | struct ocfs2_lock_res *lockres, int ex, int local); | ||
| 63 | int ocfs2_drop_inode_locks(struct inode *inode); | 73 | int ocfs2_drop_inode_locks(struct inode *inode); |
| 64 | int ocfs2_data_lock_full(struct inode *inode, | 74 | int ocfs2_data_lock_full(struct inode *inode, |
| 65 | int write, | 75 | int write, |
| @@ -93,7 +103,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb, | |||
| 93 | int ex); | 103 | int ex); |
| 94 | int ocfs2_rename_lock(struct ocfs2_super *osb); | 104 | int ocfs2_rename_lock(struct ocfs2_super *osb); |
| 95 | void ocfs2_rename_unlock(struct ocfs2_super *osb); | 105 | void ocfs2_rename_unlock(struct ocfs2_super *osb); |
| 106 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); | ||
| 107 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); | ||
| 108 | |||
| 96 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 109 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); |
| 110 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | ||
| 111 | struct ocfs2_lock_res *lockres); | ||
| 97 | 112 | ||
| 98 | /* for the vote thread */ | 113 | /* for the vote thread */ |
| 99 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 114 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index ec55ab3c121..fb91089a60a 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | 33 | ||
| 34 | #include "dir.h" | 34 | #include "dir.h" |
| 35 | #include "dlmglue.h" | 35 | #include "dlmglue.h" |
| 36 | #include "dcache.h" | ||
| 36 | #include "export.h" | 37 | #include "export.h" |
| 37 | #include "inode.h" | 38 | #include "inode.h" |
| 38 | 39 | ||
| @@ -57,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) | |||
| 57 | return ERR_PTR(-ESTALE); | 58 | return ERR_PTR(-ESTALE); |
| 58 | } | 59 | } |
| 59 | 60 | ||
| 60 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno); | 61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0); |
| 61 | 62 | ||
| 62 | if (IS_ERR(inode)) { | 63 | if (IS_ERR(inode)) { |
| 63 | mlog_errno(PTR_ERR(inode)); | 64 | mlog_errno(PTR_ERR(inode)); |
| @@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) | |||
| 77 | mlog_errno(-ENOMEM); | 78 | mlog_errno(-ENOMEM); |
| 78 | return ERR_PTR(-ENOMEM); | 79 | return ERR_PTR(-ENOMEM); |
| 79 | } | 80 | } |
| 81 | result->d_op = &ocfs2_dentry_ops; | ||
| 80 | 82 | ||
| 81 | mlog_exit_ptr(result); | 83 | mlog_exit_ptr(result); |
| 82 | return result; | 84 | return result; |
| @@ -113,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
| 113 | goto bail_unlock; | 115 | goto bail_unlock; |
| 114 | } | 116 | } |
| 115 | 117 | ||
| 116 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); | 118 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); |
| 117 | if (IS_ERR(inode)) { | 119 | if (IS_ERR(inode)) { |
| 118 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 120 | mlog(ML_ERROR, "Unable to create inode %llu\n", |
| 119 | (unsigned long long)blkno); | 121 | (unsigned long long)blkno); |
| @@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
| 127 | parent = ERR_PTR(-ENOMEM); | 129 | parent = ERR_PTR(-ENOMEM); |
| 128 | } | 130 | } |
| 129 | 131 | ||
| 132 | parent->d_op = &ocfs2_dentry_ops; | ||
| 133 | |||
| 130 | bail_unlock: | 134 | bail_unlock: |
| 131 | ocfs2_meta_unlock(dir, 0); | 135 | ocfs2_meta_unlock(dir, 0); |
| 132 | 136 | ||
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7bcf6915459..69d3db56916 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -54,8 +54,6 @@ | |||
| 54 | 54 | ||
| 55 | #include "buffer_head_io.h" | 55 | #include "buffer_head_io.h" |
| 56 | 56 | ||
| 57 | #define OCFS2_FI_FLAG_NOWAIT 0x1 | ||
| 58 | #define OCFS2_FI_FLAG_DELETE 0x2 | ||
| 59 | struct ocfs2_find_inode_args | 57 | struct ocfs2_find_inode_args |
| 60 | { | 58 | { |
| 61 | u64 fi_blkno; | 59 | u64 fi_blkno; |
| @@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, | |||
| 109 | return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); | 107 | return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); |
| 110 | } | 108 | } |
| 111 | 109 | ||
| 112 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) | 110 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) |
| 113 | { | 111 | { |
| 114 | struct inode *inode = NULL; | 112 | struct inode *inode = NULL; |
| 115 | struct super_block *sb = osb->sb; | 113 | struct super_block *sb = osb->sb; |
| @@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) | |||
| 127 | } | 125 | } |
| 128 | 126 | ||
| 129 | args.fi_blkno = blkno; | 127 | args.fi_blkno = blkno; |
| 130 | args.fi_flags = 0; | 128 | args.fi_flags = flags; |
| 131 | args.fi_ino = ino_from_blkno(sb, blkno); | 129 | args.fi_ino = ino_from_blkno(sb, blkno); |
| 132 | 130 | ||
| 133 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, | 131 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, |
| @@ -297,15 +295,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 297 | OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; | 295 | OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; |
| 298 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); | 296 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); |
| 299 | 297 | ||
| 300 | if (create_ino) | ||
| 301 | inode->i_ino = ino_from_blkno(inode->i_sb, | ||
| 302 | le64_to_cpu(fe->i_blkno)); | ||
| 303 | |||
| 304 | mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n", | ||
| 305 | (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false"); | ||
| 306 | |||
| 307 | inode->i_nlink = le16_to_cpu(fe->i_links_count); | 298 | inode->i_nlink = le16_to_cpu(fe->i_links_count); |
| 308 | 299 | ||
| 300 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) | ||
| 301 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; | ||
| 302 | |||
| 309 | if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { | 303 | if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { |
| 310 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; | 304 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; |
| 311 | mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); | 305 | mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); |
| @@ -343,12 +337,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 343 | break; | 337 | break; |
| 344 | } | 338 | } |
| 345 | 339 | ||
| 340 | if (create_ino) { | ||
| 341 | inode->i_ino = ino_from_blkno(inode->i_sb, | ||
| 342 | le64_to_cpu(fe->i_blkno)); | ||
| 343 | |||
| 344 | /* | ||
| 345 | * If we ever want to create system files from kernel, | ||
| 346 | * the generation argument to | ||
| 347 | * ocfs2_inode_lock_res_init() will have to change. | ||
| 348 | */ | ||
| 349 | BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)); | ||
| 350 | |||
| 351 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | ||
| 352 | OCFS2_LOCK_TYPE_META, 0, inode); | ||
| 353 | } | ||
| 354 | |||
| 346 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, | 355 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, |
| 347 | OCFS2_LOCK_TYPE_RW, inode); | 356 | OCFS2_LOCK_TYPE_RW, inode->i_generation, |
| 348 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 357 | inode); |
| 349 | OCFS2_LOCK_TYPE_META, inode); | 358 | |
| 350 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, | 359 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, |
| 351 | OCFS2_LOCK_TYPE_DATA, inode); | 360 | OCFS2_LOCK_TYPE_DATA, inode->i_generation, |
| 361 | inode); | ||
| 352 | 362 | ||
| 353 | ocfs2_set_inode_flags(inode); | 363 | ocfs2_set_inode_flags(inode); |
| 354 | inode->i_flags |= S_NOATIME; | 364 | inode->i_flags |= S_NOATIME; |
| @@ -366,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
| 366 | struct ocfs2_super *osb; | 376 | struct ocfs2_super *osb; |
| 367 | struct ocfs2_dinode *fe; | 377 | struct ocfs2_dinode *fe; |
| 368 | struct buffer_head *bh = NULL; | 378 | struct buffer_head *bh = NULL; |
| 369 | int status; | 379 | int status, can_lock; |
| 370 | int sysfile = 0; | 380 | u32 generation = 0; |
| 371 | 381 | ||
| 372 | mlog_entry("(0x%p, 0x%p)\n", inode, args); | 382 | mlog_entry("(0x%p, 0x%p)\n", inode, args); |
| 373 | 383 | ||
| 374 | status = -EINVAL; | 384 | status = -EINVAL; |
| 375 | if (inode == NULL || inode->i_sb == NULL) { | 385 | if (inode == NULL || inode->i_sb == NULL) { |
| 376 | mlog(ML_ERROR, "bad inode\n"); | 386 | mlog(ML_ERROR, "bad inode\n"); |
| 377 | goto bail; | 387 | return status; |
| 378 | } | 388 | } |
| 379 | sb = inode->i_sb; | 389 | sb = inode->i_sb; |
| 380 | osb = OCFS2_SB(sb); | 390 | osb = OCFS2_SB(sb); |
| @@ -382,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
| 382 | if (!args) { | 392 | if (!args) { |
| 383 | mlog(ML_ERROR, "bad inode args\n"); | 393 | mlog(ML_ERROR, "bad inode args\n"); |
| 384 | make_bad_inode(inode); | 394 | make_bad_inode(inode); |
| 385 | goto bail; | 395 | return status; |
| 396 | } | ||
| 397 | |||
| 398 | /* | ||
| 399 | * To improve performance of cold-cache inode stats, we take | ||
| 400 | * the cluster lock here if possible. | ||
| 401 | * | ||
| 402 | * Generally, OCFS2 never trusts the contents of an inode | ||
| 403 | * unless it's holding a cluster lock, so taking it here isn't | ||
| 404 | * a correctness issue as much as it is a performance | ||
| 405 | * improvement. | ||
| 406 | * | ||
| 407 | * There are three times when taking the lock is not a good idea: | ||
| 408 | * | ||
| 409 | * 1) During startup, before we have initialized the DLM. | ||
| 410 | * | ||
| 411 | * 2) If we are reading certain system files which never get | ||
| 412 | * cluster locks (local alloc, truncate log). | ||
| 413 | * | ||
| 414 | * 3) If the process doing the iget() is responsible for | ||
| 415 | * orphan dir recovery. We're holding the orphan dir lock and | ||
| 416 | * can get into a deadlock with another process on another | ||
| 417 | * node in ->delete_inode(). | ||
| 418 | * | ||
| 419 | * #1 and #2 can be simply solved by never taking the lock | ||
| 420 | * here for system files (which are the only type we read | ||
| 421 | * during mount). It's a heavier approach, but our main | ||
| 422 | * concern is user-accesible files anyway. | ||
| 423 | * | ||
| 424 | * #3 works itself out because we'll eventually take the | ||
| 425 | * cluster lock before trusting anything anyway. | ||
| 426 | */ | ||
| 427 | can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | ||
| 428 | && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK); | ||
| 429 | |||
| 430 | /* | ||
| 431 | * To maintain backwards compatibility with older versions of | ||
| 432 | * ocfs2-tools, we still store the generation value for system | ||
| 433 | * files. The only ones that actually matter to userspace are | ||
| 434 | * the journals, but it's easier and inexpensive to just flag | ||
| 435 | * all system files similarly. | ||
| 436 | */ | ||
| 437 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | ||
| 438 | generation = osb->fs_generation; | ||
| 439 | |||
| 440 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | ||
| 441 | OCFS2_LOCK_TYPE_META, | ||
| 442 | generation, inode); | ||
| 443 | |||
| 444 | if (can_lock) { | ||
| 445 | status = ocfs2_meta_lock(inode, NULL, NULL, 0); | ||
| 446 | if (status) { | ||
| 447 | make_bad_inode(inode); | ||
| 448 | mlog_errno(status); | ||
| 449 | return status; | ||
| 450 | } | ||
| 386 | } | 451 | } |
| 387 | 452 | ||
| 388 | /* Read the FE off disk. This is safe because the kernel only | 453 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, |
| 389 | * does one read_inode2 for a new inode, and if it doesn't | 454 | can_lock ? inode : NULL); |
| 390 | * exist yet then nobody can be working on it! */ | ||
| 391 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL); | ||
| 392 | if (status < 0) { | 455 | if (status < 0) { |
| 393 | mlog_errno(status); | 456 | mlog_errno(status); |
| 394 | make_bad_inode(inode); | ||
| 395 | goto bail; | 457 | goto bail; |
| 396 | } | 458 | } |
| 397 | 459 | ||
| 460 | status = -EINVAL; | ||
| 398 | fe = (struct ocfs2_dinode *) bh->b_data; | 461 | fe = (struct ocfs2_dinode *) bh->b_data; |
| 399 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 462 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
| 400 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", | 463 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", |
| 401 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); | 464 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); |
| 402 | make_bad_inode(inode); | ||
| 403 | goto bail; | 465 | goto bail; |
| 404 | } | 466 | } |
| 405 | 467 | ||
| 406 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) | 468 | /* |
| 407 | sysfile = 1; | 469 | * This is a code bug. Right now the caller needs to |
| 470 | * understand whether it is asking for a system file inode or | ||
| 471 | * not so the proper lock names can be built. | ||
| 472 | */ | ||
| 473 | mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != | ||
| 474 | !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), | ||
| 475 | "Inode %llu: system file state is ambigous\n", | ||
| 476 | (unsigned long long)args->fi_blkno); | ||
| 408 | 477 | ||
| 409 | if (S_ISCHR(le16_to_cpu(fe->i_mode)) || | 478 | if (S_ISCHR(le16_to_cpu(fe->i_mode)) || |
| 410 | S_ISBLK(le16_to_cpu(fe->i_mode))) | 479 | S_ISBLK(le16_to_cpu(fe->i_mode))) |
| 411 | inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); | 480 | inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); |
| 412 | 481 | ||
| 413 | status = -EINVAL; | ||
| 414 | if (ocfs2_populate_inode(inode, fe, 0) < 0) { | 482 | if (ocfs2_populate_inode(inode, fe, 0) < 0) { |
| 415 | mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", | 483 | mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", |
| 416 | (unsigned long long)fe->i_blkno, inode->i_ino); | 484 | (unsigned long long)fe->i_blkno, inode->i_ino); |
| 417 | make_bad_inode(inode); | ||
| 418 | goto bail; | 485 | goto bail; |
| 419 | } | 486 | } |
| 420 | 487 | ||
| 421 | BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); | 488 | BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); |
| 422 | 489 | ||
| 423 | if (sysfile) | ||
| 424 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; | ||
| 425 | |||
| 426 | status = 0; | 490 | status = 0; |
| 427 | 491 | ||
| 428 | bail: | 492 | bail: |
| 493 | if (can_lock) | ||
| 494 | ocfs2_meta_unlock(inode, 0); | ||
| 495 | |||
| 496 | if (status < 0) | ||
| 497 | make_bad_inode(inode); | ||
| 498 | |||
| 429 | if (args && bh) | 499 | if (args && bh) |
| 430 | brelse(bh); | 500 | brelse(bh); |
| 431 | 501 | ||
| @@ -898,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode) | |||
| 898 | goto bail_unlock_inode; | 968 | goto bail_unlock_inode; |
| 899 | } | 969 | } |
| 900 | 970 | ||
| 901 | /* Mark the inode as successfully deleted. This is important | 971 | /* |
| 902 | * for ocfs2_clear_inode as it will check this flag and skip | 972 | * Mark the inode as successfully deleted. |
| 903 | * any checkpointing work */ | 973 | * |
| 974 | * This is important for ocfs2_clear_inode() as it will check | ||
| 975 | * this flag and skip any checkpointing work | ||
| 976 | * | ||
| 977 | * ocfs2_stuff_meta_lvb() also uses this flag to invalidate | ||
| 978 | * the LVB for other nodes. | ||
| 979 | */ | ||
| 904 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; | 980 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; |
| 905 | 981 | ||
| 906 | bail_unlock_inode: | 982 | bail_unlock_inode: |
| @@ -1025,12 +1101,10 @@ void ocfs2_drop_inode(struct inode *inode) | |||
| 1025 | /* Testing ip_orphaned_slot here wouldn't work because we may | 1101 | /* Testing ip_orphaned_slot here wouldn't work because we may |
| 1026 | * not have gotten a delete_inode vote from any other nodes | 1102 | * not have gotten a delete_inode vote from any other nodes |
| 1027 | * yet. */ | 1103 | * yet. */ |
| 1028 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { | 1104 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) |
| 1029 | mlog(0, "Inode was orphaned on another node, clearing nlink.\n"); | 1105 | generic_delete_inode(inode); |
| 1030 | inode->i_nlink = 0; | 1106 | else |
| 1031 | } | 1107 | generic_drop_inode(inode); |
| 1032 | |||
| 1033 | generic_drop_inode(inode); | ||
| 1034 | 1108 | ||
| 1035 | mlog_exit_void(); | 1109 | mlog_exit_void(); |
| 1036 | } | 1110 | } |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 4d1e5399256..9957810fdf8 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
| @@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block, | |||
| 122 | void ocfs2_clear_inode(struct inode *inode); | 122 | void ocfs2_clear_inode(struct inode *inode); |
| 123 | void ocfs2_delete_inode(struct inode *inode); | 123 | void ocfs2_delete_inode(struct inode *inode); |
| 124 | void ocfs2_drop_inode(struct inode *inode); | 124 | void ocfs2_drop_inode(struct inode *inode); |
| 125 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff); | 125 | |
| 126 | /* Flags for ocfs2_iget() */ | ||
| 127 | #define OCFS2_FI_FLAG_NOWAIT 0x1 | ||
| 128 | #define OCFS2_FI_FLAG_DELETE 0x2 | ||
| 129 | #define OCFS2_FI_FLAG_SYSFILE 0x4 | ||
| 130 | #define OCFS2_FI_FLAG_NOLOCK 0x8 | ||
| 131 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags); | ||
| 126 | struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, | 132 | struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, |
| 127 | u64 blkno, | 133 | u64 blkno, |
| 128 | int delete_vote); | 134 | int delete_vote); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f92bf1dd379..fd9734def55 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
| 1493 | if (de->name_len == 2 && !strncmp("..", de->name, 2)) | 1493 | if (de->name_len == 2 && !strncmp("..", de->name, 2)) |
| 1494 | continue; | 1494 | continue; |
| 1495 | 1495 | ||
| 1496 | iter = ocfs2_iget(osb, le64_to_cpu(de->inode)); | 1496 | iter = ocfs2_iget(osb, le64_to_cpu(de->inode), |
| 1497 | OCFS2_FI_FLAG_NOLOCK); | ||
| 1497 | if (IS_ERR(iter)) | 1498 | if (IS_ERR(iter)) |
| 1498 | continue; | 1499 | continue; |
| 1499 | 1500 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 0d3e939b1f5..849c3b4bb94 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
| 179 | if (status < 0) | 179 | if (status < 0) |
| 180 | goto bail_add; | 180 | goto bail_add; |
| 181 | 181 | ||
| 182 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); | 182 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); |
| 183 | if (IS_ERR(inode)) { | 183 | if (IS_ERR(inode)) { |
| 184 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 184 | mlog(ML_ERROR, "Unable to create inode %llu\n", |
| 185 | (unsigned long long)blkno); | 185 | (unsigned long long)blkno); |
| @@ -199,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
| 199 | spin_unlock(&oi->ip_lock); | 199 | spin_unlock(&oi->ip_lock); |
| 200 | 200 | ||
| 201 | bail_add: | 201 | bail_add: |
| 202 | |||
| 203 | dentry->d_op = &ocfs2_dentry_ops; | 202 | dentry->d_op = &ocfs2_dentry_ops; |
| 204 | ret = d_splice_alias(inode, dentry); | 203 | ret = d_splice_alias(inode, dentry); |
| 205 | 204 | ||
| 205 | if (inode) { | ||
| 206 | /* | ||
| 207 | * If d_splice_alias() finds a DCACHE_DISCONNECTED | ||
| 208 | * dentry, it will d_move() it on top of ourse. The | ||
| 209 | * return value will indicate this however, so in | ||
| 210 | * those cases, we switch them around for the locking | ||
| 211 | * code. | ||
| 212 | * | ||
| 213 | * NOTE: This dentry already has ->d_op set from | ||
| 214 | * ocfs2_get_parent() and ocfs2_get_dentry() | ||
| 215 | */ | ||
| 216 | if (ret) | ||
| 217 | dentry = ret; | ||
| 218 | |||
| 219 | status = ocfs2_dentry_attach_lock(dentry, inode, | ||
| 220 | OCFS2_I(dir)->ip_blkno); | ||
| 221 | if (status) { | ||
| 222 | mlog_errno(status); | ||
| 223 | ret = ERR_PTR(status); | ||
| 224 | goto bail_unlock; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 206 | bail_unlock: | 228 | bail_unlock: |
| 207 | /* Don't drop the cluster lock until *after* the d_add -- | 229 | /* Don't drop the cluster lock until *after* the d_add -- |
| 208 | * unlink on another node will message us to remove that | 230 | * unlink on another node will message us to remove that |
| @@ -418,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir, | |||
| 418 | goto leave; | 440 | goto leave; |
| 419 | } | 441 | } |
| 420 | 442 | ||
| 443 | status = ocfs2_dentry_attach_lock(dentry, inode, | ||
| 444 | OCFS2_I(dir)->ip_blkno); | ||
| 445 | if (status) { | ||
| 446 | mlog_errno(status); | ||
| 447 | goto leave; | ||
| 448 | } | ||
| 449 | |||
| 421 | insert_inode_hash(inode); | 450 | insert_inode_hash(inode); |
| 422 | dentry->d_op = &ocfs2_dentry_ops; | 451 | dentry->d_op = &ocfs2_dentry_ops; |
| 423 | d_instantiate(dentry, inode); | 452 | d_instantiate(dentry, inode); |
| @@ -725,6 +754,12 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
| 725 | goto bail; | 754 | goto bail; |
| 726 | } | 755 | } |
| 727 | 756 | ||
| 757 | err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | ||
| 758 | if (err) { | ||
| 759 | mlog_errno(err); | ||
| 760 | goto bail; | ||
| 761 | } | ||
| 762 | |||
| 728 | atomic_inc(&inode->i_count); | 763 | atomic_inc(&inode->i_count); |
| 729 | dentry->d_op = &ocfs2_dentry_ops; | 764 | dentry->d_op = &ocfs2_dentry_ops; |
| 730 | d_instantiate(dentry, inode); | 765 | d_instantiate(dentry, inode); |
| @@ -743,6 +778,23 @@ bail: | |||
| 743 | return err; | 778 | return err; |
| 744 | } | 779 | } |
| 745 | 780 | ||
| 781 | /* | ||
| 782 | * Takes and drops an exclusive lock on the given dentry. This will | ||
| 783 | * force other nodes to drop it. | ||
| 784 | */ | ||
| 785 | static int ocfs2_remote_dentry_delete(struct dentry *dentry) | ||
| 786 | { | ||
| 787 | int ret; | ||
| 788 | |||
| 789 | ret = ocfs2_dentry_lock(dentry, 1); | ||
| 790 | if (ret) | ||
| 791 | mlog_errno(ret); | ||
| 792 | else | ||
| 793 | ocfs2_dentry_unlock(dentry, 1); | ||
| 794 | |||
| 795 | return ret; | ||
| 796 | } | ||
| 797 | |||
| 746 | static int ocfs2_unlink(struct inode *dir, | 798 | static int ocfs2_unlink(struct inode *dir, |
| 747 | struct dentry *dentry) | 799 | struct dentry *dentry) |
| 748 | { | 800 | { |
| @@ -832,8 +884,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
| 832 | else | 884 | else |
| 833 | inode->i_nlink--; | 885 | inode->i_nlink--; |
| 834 | 886 | ||
| 835 | status = ocfs2_request_unlink_vote(inode, dentry, | 887 | status = ocfs2_remote_dentry_delete(dentry); |
| 836 | (unsigned int) inode->i_nlink); | ||
| 837 | if (status < 0) { | 888 | if (status < 0) { |
| 838 | /* This vote should succeed under all normal | 889 | /* This vote should succeed under all normal |
| 839 | * circumstances. */ | 890 | * circumstances. */ |
| @@ -1019,7 +1070,6 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1019 | struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, | 1070 | struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, |
| 1020 | // this is the 1st dirent bh | 1071 | // this is the 1st dirent bh |
| 1021 | nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; | 1072 | nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; |
| 1022 | unsigned int links_count; | ||
| 1023 | 1073 | ||
| 1024 | /* At some point it might be nice to break this function up a | 1074 | /* At some point it might be nice to break this function up a |
| 1025 | * bit. */ | 1075 | * bit. */ |
| @@ -1093,23 +1143,26 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1093 | } | 1143 | } |
| 1094 | } | 1144 | } |
| 1095 | 1145 | ||
| 1096 | if (S_ISDIR(old_inode->i_mode)) { | 1146 | /* |
| 1097 | /* Directories actually require metadata updates to | 1147 | * Though we don't require an inode meta data update if |
| 1098 | * the directory info so we can't get away with not | 1148 | * old_inode is not a directory, we lock anyway here to ensure |
| 1099 | * doing node locking on it. */ | 1149 | * the vote thread on other nodes won't have to concurrently |
| 1100 | status = ocfs2_meta_lock(old_inode, handle, NULL, 1); | 1150 | * downconvert the inode and the dentry locks. |
| 1101 | if (status < 0) { | 1151 | */ |
| 1102 | if (status != -ENOENT) | 1152 | status = ocfs2_meta_lock(old_inode, handle, NULL, 1); |
| 1103 | mlog_errno(status); | 1153 | if (status < 0) { |
| 1104 | goto bail; | 1154 | if (status != -ENOENT) |
| 1105 | } | ||
| 1106 | |||
| 1107 | status = ocfs2_request_rename_vote(old_inode, old_dentry); | ||
| 1108 | if (status < 0) { | ||
| 1109 | mlog_errno(status); | 1155 | mlog_errno(status); |
| 1110 | goto bail; | 1156 | goto bail; |
| 1111 | } | 1157 | } |
| 1158 | |||
| 1159 | status = ocfs2_remote_dentry_delete(old_dentry); | ||
| 1160 | if (status < 0) { | ||
| 1161 | mlog_errno(status); | ||
| 1162 | goto bail; | ||
| 1163 | } | ||
| 1112 | 1164 | ||
| 1165 | if (S_ISDIR(old_inode->i_mode)) { | ||
| 1113 | status = -EIO; | 1166 | status = -EIO; |
| 1114 | old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); | 1167 | old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); |
| 1115 | if (!old_inode_de_bh) | 1168 | if (!old_inode_de_bh) |
| @@ -1123,14 +1176,6 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1123 | if (!new_inode && new_dir!=old_dir && | 1176 | if (!new_inode && new_dir!=old_dir && |
| 1124 | new_dir->i_nlink >= OCFS2_LINK_MAX) | 1177 | new_dir->i_nlink >= OCFS2_LINK_MAX) |
| 1125 | goto bail; | 1178 | goto bail; |
| 1126 | } else { | ||
| 1127 | /* Ah, the simple case - we're a file so just send a | ||
| 1128 | * message. */ | ||
| 1129 | status = ocfs2_request_rename_vote(old_inode, old_dentry); | ||
| 1130 | if (status < 0) { | ||
| 1131 | mlog_errno(status); | ||
| 1132 | goto bail; | ||
| 1133 | } | ||
| 1134 | } | 1179 | } |
| 1135 | 1180 | ||
| 1136 | status = -ENOENT; | 1181 | status = -ENOENT; |
| @@ -1202,13 +1247,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1202 | goto bail; | 1247 | goto bail; |
| 1203 | } | 1248 | } |
| 1204 | 1249 | ||
| 1205 | if (S_ISDIR(new_inode->i_mode)) | 1250 | status = ocfs2_remote_dentry_delete(new_dentry); |
| 1206 | links_count = 0; | ||
| 1207 | else | ||
| 1208 | links_count = (unsigned int) (new_inode->i_nlink - 1); | ||
| 1209 | |||
| 1210 | status = ocfs2_request_unlink_vote(new_inode, new_dentry, | ||
| 1211 | links_count); | ||
| 1212 | if (status < 0) { | 1251 | if (status < 0) { |
| 1213 | mlog_errno(status); | 1252 | mlog_errno(status); |
| 1214 | goto bail; | 1253 | goto bail; |
| @@ -1387,6 +1426,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1387 | } | 1426 | } |
| 1388 | } | 1427 | } |
| 1389 | 1428 | ||
| 1429 | ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); | ||
| 1390 | status = 0; | 1430 | status = 0; |
| 1391 | bail: | 1431 | bail: |
| 1392 | if (rename_lock) | 1432 | if (rename_lock) |
| @@ -1675,6 +1715,12 @@ static int ocfs2_symlink(struct inode *dir, | |||
| 1675 | goto bail; | 1715 | goto bail; |
| 1676 | } | 1716 | } |
| 1677 | 1717 | ||
| 1718 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | ||
| 1719 | if (status) { | ||
| 1720 | mlog_errno(status); | ||
| 1721 | goto bail; | ||
| 1722 | } | ||
| 1723 | |||
| 1678 | insert_inode_hash(inode); | 1724 | insert_inode_hash(inode); |
| 1679 | dentry->d_op = &ocfs2_dentry_ops; | 1725 | dentry->d_op = &ocfs2_dentry_ops; |
| 1680 | d_instantiate(dentry, inode); | 1726 | d_instantiate(dentry, inode); |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 7dd9e1e705b..4d5d5655c18 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
| @@ -35,12 +35,15 @@ | |||
| 35 | #define OCFS2_LOCK_ID_MAX_LEN 32 | 35 | #define OCFS2_LOCK_ID_MAX_LEN 32 |
| 36 | #define OCFS2_LOCK_ID_PAD "000000" | 36 | #define OCFS2_LOCK_ID_PAD "000000" |
| 37 | 37 | ||
| 38 | #define OCFS2_DENTRY_LOCK_INO_START 18 | ||
| 39 | |||
| 38 | enum ocfs2_lock_type { | 40 | enum ocfs2_lock_type { |
| 39 | OCFS2_LOCK_TYPE_META = 0, | 41 | OCFS2_LOCK_TYPE_META = 0, |
| 40 | OCFS2_LOCK_TYPE_DATA, | 42 | OCFS2_LOCK_TYPE_DATA, |
| 41 | OCFS2_LOCK_TYPE_SUPER, | 43 | OCFS2_LOCK_TYPE_SUPER, |
| 42 | OCFS2_LOCK_TYPE_RENAME, | 44 | OCFS2_LOCK_TYPE_RENAME, |
| 43 | OCFS2_LOCK_TYPE_RW, | 45 | OCFS2_LOCK_TYPE_RW, |
| 46 | OCFS2_LOCK_TYPE_DENTRY, | ||
| 44 | OCFS2_NUM_LOCK_TYPES | 47 | OCFS2_NUM_LOCK_TYPES |
| 45 | }; | 48 | }; |
| 46 | 49 | ||
| @@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
| 63 | case OCFS2_LOCK_TYPE_RW: | 66 | case OCFS2_LOCK_TYPE_RW: |
| 64 | c = 'W'; | 67 | c = 'W'; |
| 65 | break; | 68 | break; |
| 69 | case OCFS2_LOCK_TYPE_DENTRY: | ||
| 70 | c = 'N'; | ||
| 71 | break; | ||
| 66 | default: | 72 | default: |
| 67 | c = '\0'; | 73 | c = '\0'; |
| 68 | } | 74 | } |
| @@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
| 70 | return c; | 76 | return c; |
| 71 | } | 77 | } |
| 72 | 78 | ||
| 79 | static char *ocfs2_lock_type_strings[] = { | ||
| 80 | [OCFS2_LOCK_TYPE_META] = "Meta", | ||
| 81 | [OCFS2_LOCK_TYPE_DATA] = "Data", | ||
| 82 | [OCFS2_LOCK_TYPE_SUPER] = "Super", | ||
| 83 | [OCFS2_LOCK_TYPE_RENAME] = "Rename", | ||
| 84 | /* Need to differntiate from [R]ename.. serializing writes is the | ||
| 85 | * important job it does, anyway. */ | ||
| 86 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | ||
| 87 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", | ||
| 88 | }; | ||
| 89 | |||
| 90 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | ||
| 91 | { | ||
| 92 | #ifdef __KERNEL__ | ||
| 93 | mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); | ||
| 94 | #endif | ||
| 95 | return ocfs2_lock_type_strings[type]; | ||
| 96 | } | ||
| 97 | |||
| 73 | #endif /* OCFS2_LOCKID_H */ | 98 | #endif /* OCFS2_LOCKID_H */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d17e33e66a1..4c29cd7cc8e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
| 202 | 202 | ||
| 203 | mlog_entry_void(); | 203 | mlog_entry_void(); |
| 204 | 204 | ||
| 205 | new = ocfs2_iget(osb, osb->root_blkno); | 205 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE); |
| 206 | if (IS_ERR(new)) { | 206 | if (IS_ERR(new)) { |
| 207 | status = PTR_ERR(new); | 207 | status = PTR_ERR(new); |
| 208 | mlog_errno(status); | 208 | mlog_errno(status); |
| @@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
| 210 | } | 210 | } |
| 211 | osb->root_inode = new; | 211 | osb->root_inode = new; |
| 212 | 212 | ||
| 213 | new = ocfs2_iget(osb, osb->system_dir_blkno); | 213 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE); |
| 214 | if (IS_ERR(new)) { | 214 | if (IS_ERR(new)) { |
| 215 | status = PTR_ERR(new); | 215 | status = PTR_ERR(new); |
| 216 | mlog_errno(status); | 216 | mlog_errno(status); |
| @@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = { | |||
| 682 | .kill_sb = kill_block_super, /* set to the generic one | 682 | .kill_sb = kill_block_super, /* set to the generic one |
| 683 | * right now, but do we | 683 | * right now, but do we |
| 684 | * need to change that? */ | 684 | * need to change that? */ |
| 685 | .fs_flags = FS_REQUIRES_DEV, | 685 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, |
| 686 | .next = NULL | 686 | .next = NULL |
| 687 | }; | 687 | }; |
| 688 | 688 | ||
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index fc29cb7a437..5df6e35d09b 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
| @@ -28,11 +28,11 @@ | |||
| 28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
| 29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
| 30 | 30 | ||
| 31 | #include "ocfs2.h" | ||
| 32 | |||
| 33 | #define MLOG_MASK_PREFIX ML_INODE | 31 | #define MLOG_MASK_PREFIX ML_INODE |
| 34 | #include <cluster/masklog.h> | 32 | #include <cluster/masklog.h> |
| 35 | 33 | ||
| 34 | #include "ocfs2.h" | ||
| 35 | |||
| 36 | #include "alloc.h" | 36 | #include "alloc.h" |
| 37 | #include "dir.h" | 37 | #include "dir.h" |
| 38 | #include "inode.h" | 38 | #include "inode.h" |
| @@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
| 115 | goto bail; | 115 | goto bail; |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | inode = ocfs2_iget(osb, blkno); | 118 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE); |
| 119 | if (IS_ERR(inode)) { | 119 | if (IS_ERR(inode)) { |
| 120 | mlog_errno(PTR_ERR(inode)); | 120 | mlog_errno(PTR_ERR(inode)); |
| 121 | inode = NULL; | 121 | inode = NULL; |
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index cf70fe2075b..5b4dca79990 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c | |||
| @@ -74,9 +74,6 @@ struct ocfs2_vote_msg | |||
| 74 | __be32 v_orphaned_slot; /* Used during delete votes */ | 74 | __be32 v_orphaned_slot; /* Used during delete votes */ |
| 75 | __be32 v_nlink; /* Used during unlink votes */ | 75 | __be32 v_nlink; /* Used during unlink votes */ |
| 76 | } md1; /* Message type dependant 1 */ | 76 | } md1; /* Message type dependant 1 */ |
| 77 | __be32 v_unlink_namelen; | ||
| 78 | __be64 v_unlink_parent; | ||
| 79 | u8 v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN]; | ||
| 80 | }; | 77 | }; |
| 81 | 78 | ||
| 82 | /* Responses are given these values to maintain backwards | 79 | /* Responses are given these values to maintain backwards |
| @@ -100,8 +97,6 @@ struct ocfs2_vote_work { | |||
| 100 | enum ocfs2_vote_request { | 97 | enum ocfs2_vote_request { |
| 101 | OCFS2_VOTE_REQ_INVALID = 0, | 98 | OCFS2_VOTE_REQ_INVALID = 0, |
| 102 | OCFS2_VOTE_REQ_DELETE, | 99 | OCFS2_VOTE_REQ_DELETE, |
| 103 | OCFS2_VOTE_REQ_UNLINK, | ||
| 104 | OCFS2_VOTE_REQ_RENAME, | ||
| 105 | OCFS2_VOTE_REQ_MOUNT, | 100 | OCFS2_VOTE_REQ_MOUNT, |
| 106 | OCFS2_VOTE_REQ_UMOUNT, | 101 | OCFS2_VOTE_REQ_UMOUNT, |
| 107 | OCFS2_VOTE_REQ_LAST | 102 | OCFS2_VOTE_REQ_LAST |
| @@ -261,103 +256,13 @@ done: | |||
| 261 | return response; | 256 | return response; |
| 262 | } | 257 | } |
| 263 | 258 | ||
| 264 | static int ocfs2_match_dentry(struct dentry *dentry, | ||
| 265 | u64 parent_blkno, | ||
| 266 | unsigned int namelen, | ||
| 267 | const char *name) | ||
| 268 | { | ||
| 269 | struct inode *parent; | ||
| 270 | |||
| 271 | if (!dentry->d_parent) { | ||
| 272 | mlog(0, "Detached from parent.\n"); | ||
| 273 | return 0; | ||
| 274 | } | ||
| 275 | |||
| 276 | parent = dentry->d_parent->d_inode; | ||
| 277 | /* Negative parent dentry? */ | ||
| 278 | if (!parent) | ||
| 279 | return 0; | ||
| 280 | |||
| 281 | /* Name is in a different directory. */ | ||
| 282 | if (OCFS2_I(parent)->ip_blkno != parent_blkno) | ||
| 283 | return 0; | ||
| 284 | |||
| 285 | if (dentry->d_name.len != namelen) | ||
| 286 | return 0; | ||
| 287 | |||
| 288 | /* comparison above guarantees this is safe. */ | ||
| 289 | if (memcmp(dentry->d_name.name, name, namelen)) | ||
| 290 | return 0; | ||
| 291 | |||
| 292 | return 1; | ||
| 293 | } | ||
| 294 | |||
| 295 | static void ocfs2_process_dentry_request(struct inode *inode, | ||
| 296 | int rename, | ||
| 297 | unsigned int new_nlink, | ||
| 298 | u64 parent_blkno, | ||
| 299 | unsigned int namelen, | ||
| 300 | const char *name) | ||
| 301 | { | ||
| 302 | struct dentry *dentry = NULL; | ||
| 303 | struct list_head *p; | ||
| 304 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 305 | |||
| 306 | mlog(0, "parent %llu, namelen = %u, name = %.*s\n", | ||
| 307 | (unsigned long long)parent_blkno, namelen, namelen, name); | ||
| 308 | |||
| 309 | spin_lock(&dcache_lock); | ||
| 310 | |||
| 311 | /* Another node is removing this name from the system. It is | ||
| 312 | * up to us to find the corresponding dentry and if it exists, | ||
| 313 | * unhash it from the dcache. */ | ||
| 314 | list_for_each(p, &inode->i_dentry) { | ||
| 315 | dentry = list_entry(p, struct dentry, d_alias); | ||
| 316 | |||
| 317 | if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) { | ||
| 318 | mlog(0, "dentry found: %.*s\n", | ||
| 319 | dentry->d_name.len, dentry->d_name.name); | ||
| 320 | |||
| 321 | dget_locked(dentry); | ||
| 322 | break; | ||
| 323 | } | ||
| 324 | |||
| 325 | dentry = NULL; | ||
| 326 | } | ||
| 327 | |||
| 328 | spin_unlock(&dcache_lock); | ||
| 329 | |||
| 330 | if (dentry) { | ||
| 331 | d_delete(dentry); | ||
| 332 | dput(dentry); | ||
| 333 | } | ||
| 334 | |||
| 335 | /* rename votes don't send link counts */ | ||
| 336 | if (!rename) { | ||
| 337 | mlog(0, "new_nlink = %u\n", new_nlink); | ||
| 338 | |||
| 339 | /* We don't have the proper locks here to directly | ||
| 340 | * change i_nlink and besides, the vote is sent | ||
| 341 | * *before* the operation so it may have failed on the | ||
| 342 | * other node. This passes a hint to ocfs2_drop_inode | ||
| 343 | * to force ocfs2_delete_inode, who will take the | ||
| 344 | * proper cluster locks to sort things out. */ | ||
| 345 | if (new_nlink == 0) { | ||
| 346 | spin_lock(&oi->ip_lock); | ||
| 347 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | ||
| 348 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
| 349 | } | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | static void ocfs2_process_vote(struct ocfs2_super *osb, | 259 | static void ocfs2_process_vote(struct ocfs2_super *osb, |
| 354 | struct ocfs2_vote_msg *msg) | 260 | struct ocfs2_vote_msg *msg) |
| 355 | { | 261 | { |
| 356 | int net_status, vote_response; | 262 | int net_status, vote_response; |
| 357 | int orphaned_slot = 0; | 263 | int orphaned_slot = 0; |
| 358 | int rename = 0; | 264 | unsigned int node_num, generation; |
| 359 | unsigned int node_num, generation, new_nlink, namelen; | 265 | u64 blkno; |
| 360 | u64 blkno, parent_blkno; | ||
| 361 | enum ocfs2_vote_request request; | 266 | enum ocfs2_vote_request request; |
| 362 | struct inode *inode = NULL; | 267 | struct inode *inode = NULL; |
| 363 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; | 268 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; |
| @@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb, | |||
| 437 | vote_response = ocfs2_process_delete_request(inode, | 342 | vote_response = ocfs2_process_delete_request(inode, |
| 438 | &orphaned_slot); | 343 | &orphaned_slot); |
| 439 | break; | 344 | break; |
| 440 | case OCFS2_VOTE_REQ_RENAME: | ||
| 441 | rename = 1; | ||
| 442 | /* fall through */ | ||
| 443 | case OCFS2_VOTE_REQ_UNLINK: | ||
| 444 | parent_blkno = be64_to_cpu(msg->v_unlink_parent); | ||
| 445 | namelen = be32_to_cpu(msg->v_unlink_namelen); | ||
| 446 | /* new_nlink will be ignored in case of a rename vote */ | ||
| 447 | new_nlink = be32_to_cpu(msg->md1.v_nlink); | ||
| 448 | ocfs2_process_dentry_request(inode, rename, new_nlink, | ||
| 449 | parent_blkno, namelen, | ||
| 450 | msg->v_unlink_dirent); | ||
| 451 | break; | ||
| 452 | default: | 345 | default: |
| 453 | mlog(ML_ERROR, "node %u, invalid request: %u\n", | 346 | mlog(ML_ERROR, "node %u, invalid request: %u\n", |
| 454 | node_num, request); | 347 | node_num, request); |
| @@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode) | |||
| 889 | return status; | 782 | return status; |
| 890 | } | 783 | } |
| 891 | 784 | ||
| 892 | static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request, | ||
| 893 | struct dentry *dentry) | ||
| 894 | { | ||
| 895 | struct inode *parent = dentry->d_parent->d_inode; | ||
| 896 | |||
| 897 | /* We need some values which will uniquely identify a dentry | ||
| 898 | * on the other nodes so that they can find it and run | ||
| 899 | * d_delete against it. Parent directory block and full name | ||
| 900 | * should suffice. */ | ||
| 901 | |||
| 902 | mlog(0, "unlink/rename request: parent: %llu name: %.*s\n", | ||
| 903 | (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len, | ||
| 904 | dentry->d_name.name); | ||
| 905 | |||
| 906 | request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno); | ||
| 907 | request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len); | ||
| 908 | memcpy(request->v_unlink_dirent, dentry->d_name.name, | ||
| 909 | dentry->d_name.len); | ||
| 910 | } | ||
| 911 | |||
| 912 | int ocfs2_request_unlink_vote(struct inode *inode, | ||
| 913 | struct dentry *dentry, | ||
| 914 | unsigned int nlink) | ||
| 915 | { | ||
| 916 | int status; | ||
| 917 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 918 | struct ocfs2_vote_msg *request; | ||
| 919 | |||
| 920 | if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN) | ||
| 921 | return -ENAMETOOLONG; | ||
| 922 | |||
| 923 | status = -ENOMEM; | ||
| 924 | request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno, | ||
| 925 | inode->i_generation, | ||
| 926 | OCFS2_VOTE_REQ_UNLINK, nlink); | ||
| 927 | if (request) { | ||
| 928 | ocfs2_setup_unlink_vote(request, dentry); | ||
| 929 | |||
| 930 | status = ocfs2_request_vote(inode, request, NULL); | ||
| 931 | |||
| 932 | kfree(request); | ||
| 933 | } | ||
| 934 | return status; | ||
| 935 | } | ||
| 936 | |||
| 937 | int ocfs2_request_rename_vote(struct inode *inode, | ||
| 938 | struct dentry *dentry) | ||
| 939 | { | ||
| 940 | int status; | ||
| 941 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 942 | struct ocfs2_vote_msg *request; | ||
| 943 | |||
| 944 | if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN) | ||
| 945 | return -ENAMETOOLONG; | ||
| 946 | |||
| 947 | status = -ENOMEM; | ||
| 948 | request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno, | ||
| 949 | inode->i_generation, | ||
| 950 | OCFS2_VOTE_REQ_RENAME, 0); | ||
| 951 | if (request) { | ||
| 952 | ocfs2_setup_unlink_vote(request, dentry); | ||
| 953 | |||
| 954 | status = ocfs2_request_vote(inode, request, NULL); | ||
| 955 | |||
| 956 | kfree(request); | ||
| 957 | } | ||
| 958 | return status; | ||
| 959 | } | ||
| 960 | |||
| 961 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) | 785 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) |
| 962 | { | 786 | { |
| 963 | int status; | 787 | int status; |
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h index 9cce6070346..53ebc1c69e5 100644 --- a/fs/ocfs2/vote.h +++ b/fs/ocfs2/vote.h | |||
| @@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb) | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | int ocfs2_request_delete_vote(struct inode *inode); | 41 | int ocfs2_request_delete_vote(struct inode *inode); |
| 42 | int ocfs2_request_unlink_vote(struct inode *inode, | ||
| 43 | struct dentry *dentry, | ||
| 44 | unsigned int nlink); | ||
| 45 | int ocfs2_request_rename_vote(struct inode *inode, | ||
| 46 | struct dentry *dentry); | ||
| 47 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); | 42 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); |
| 48 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); | 43 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); |
| 49 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); | 44 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 555bc195c42..1d3e601ece7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -92,9 +92,10 @@ extern int dir_notify_enable; | |||
| 92 | #define FS_REQUIRES_DEV 1 | 92 | #define FS_REQUIRES_DEV 1 |
| 93 | #define FS_BINARY_MOUNTDATA 2 | 93 | #define FS_BINARY_MOUNTDATA 2 |
| 94 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 94 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
| 95 | #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon | 95 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() |
| 96 | * as nfs_rename() will be cleaned up | 96 | * during rename() internally. |
| 97 | */ | 97 | */ |
| 98 | |||
| 98 | /* | 99 | /* |
| 99 | * These are the fs-independent mount-flags: up to 32 flags are supported | 100 | * These are the fs-independent mount-flags: up to 32 flags are supported |
| 100 | */ | 101 | */ |
