diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/namei.c | 6 | ||||
-rw-r--r-- | fs/nfs/dir.c | 3 | ||||
-rw-r--r-- | fs/nfs/super.c | 10 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 8 | ||||
-rw-r--r-- | fs/ocfs2/dcache.c | 359 | ||||
-rw-r--r-- | fs/ocfs2/dcache.h | 27 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmapi.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmast.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 10 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlm/userdlm.c | 81 | ||||
-rw-r--r-- | fs/ocfs2/dlm/userdlm.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 1094 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.h | 21 | ||||
-rw-r--r-- | fs/ocfs2/export.c | 8 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 156 | ||||
-rw-r--r-- | fs/ocfs2/inode.h | 8 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/namei.c | 116 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_lockid.h | 25 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/sysfile.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/vote.c | 180 | ||||
-rw-r--r-- | fs/ocfs2/vote.h | 5 |
26 files changed, 1241 insertions, 907 deletions
diff --git a/fs/namei.c b/fs/namei.c index 432d6bc6fab0..6b591c01b09f 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -2370,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
2370 | dput(new_dentry); | 2370 | dput(new_dentry); |
2371 | } | 2371 | } |
2372 | if (!error) | 2372 | if (!error) |
2373 | d_move(old_dentry,new_dentry); | 2373 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
2374 | d_move(old_dentry,new_dentry); | ||
2374 | return error; | 2375 | return error; |
2375 | } | 2376 | } |
2376 | 2377 | ||
@@ -2393,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | |||
2393 | else | 2394 | else |
2394 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 2395 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
2395 | if (!error) { | 2396 | if (!error) { |
2396 | /* The following d_move() should become unconditional */ | 2397 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
2397 | if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) | ||
2398 | d_move(old_dentry, new_dentry); | 2398 | d_move(old_dentry, new_dentry); |
2399 | } | 2399 | } |
2400 | if (target) | 2400 | if (target) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3419c2da9ba9..7432f1a43f3d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1669,8 +1669,7 @@ out: | |||
1669 | if (rehash) | 1669 | if (rehash) |
1670 | d_rehash(rehash); | 1670 | d_rehash(rehash); |
1671 | if (!error) { | 1671 | if (!error) { |
1672 | if (!S_ISDIR(old_inode->i_mode)) | 1672 | d_move(old_dentry, new_dentry); |
1673 | d_move(old_dentry, new_dentry); | ||
1674 | nfs_renew_times(new_dentry); | 1673 | nfs_renew_times(new_dentry); |
1675 | nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); | 1674 | nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); |
1676 | } | 1675 | } |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b99113b0f65f..e8d40030cab4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -71,7 +71,7 @@ static struct file_system_type nfs_fs_type = { | |||
71 | .name = "nfs", | 71 | .name = "nfs", |
72 | .get_sb = nfs_get_sb, | 72 | .get_sb = nfs_get_sb, |
73 | .kill_sb = nfs_kill_super, | 73 | .kill_sb = nfs_kill_super, |
74 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 74 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
75 | }; | 75 | }; |
76 | 76 | ||
77 | struct file_system_type nfs_xdev_fs_type = { | 77 | struct file_system_type nfs_xdev_fs_type = { |
@@ -79,7 +79,7 @@ struct file_system_type nfs_xdev_fs_type = { | |||
79 | .name = "nfs", | 79 | .name = "nfs", |
80 | .get_sb = nfs_xdev_get_sb, | 80 | .get_sb = nfs_xdev_get_sb, |
81 | .kill_sb = nfs_kill_super, | 81 | .kill_sb = nfs_kill_super, |
82 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 82 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
83 | }; | 83 | }; |
84 | 84 | ||
85 | static struct super_operations nfs_sops = { | 85 | static struct super_operations nfs_sops = { |
@@ -107,7 +107,7 @@ static struct file_system_type nfs4_fs_type = { | |||
107 | .name = "nfs4", | 107 | .name = "nfs4", |
108 | .get_sb = nfs4_get_sb, | 108 | .get_sb = nfs4_get_sb, |
109 | .kill_sb = nfs4_kill_super, | 109 | .kill_sb = nfs4_kill_super, |
110 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 110 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
111 | }; | 111 | }; |
112 | 112 | ||
113 | struct file_system_type nfs4_xdev_fs_type = { | 113 | struct file_system_type nfs4_xdev_fs_type = { |
@@ -115,7 +115,7 @@ struct file_system_type nfs4_xdev_fs_type = { | |||
115 | .name = "nfs4", | 115 | .name = "nfs4", |
116 | .get_sb = nfs4_xdev_get_sb, | 116 | .get_sb = nfs4_xdev_get_sb, |
117 | .kill_sb = nfs4_kill_super, | 117 | .kill_sb = nfs4_kill_super, |
118 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 118 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
119 | }; | 119 | }; |
120 | 120 | ||
121 | struct file_system_type nfs4_referral_fs_type = { | 121 | struct file_system_type nfs4_referral_fs_type = { |
@@ -123,7 +123,7 @@ struct file_system_type nfs4_referral_fs_type = { | |||
123 | .name = "nfs4", | 123 | .name = "nfs4", |
124 | .get_sb = nfs4_referral_get_sb, | 124 | .get_sb = nfs4_referral_get_sb, |
125 | .kill_sb = nfs4_kill_super, | 125 | .kill_sb = nfs4_kill_super, |
126 | .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, | 126 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, |
127 | }; | 127 | }; |
128 | 128 | ||
129 | static struct super_operations nfs4_sops = { | 129 | static struct super_operations nfs4_sops = { |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index ff9e2e2104c2..4b46aac7d243 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -44,11 +44,17 @@ | |||
44 | * locking semantics of the file system using the protocol. It should | 44 | * locking semantics of the file system using the protocol. It should |
45 | * be somewhere else, I'm sure, but right now it isn't. | 45 | * be somewhere else, I'm sure, but right now it isn't. |
46 | * | 46 | * |
47 | * New in version 4: | ||
48 | * - Remove i_generation from lock names for better stat performance. | ||
49 | * | ||
50 | * New in version 3: | ||
51 | * - Replace dentry votes with a cluster lock | ||
52 | * | ||
47 | * New in version 2: | 53 | * New in version 2: |
48 | * - full 64 bit i_size in the metadata lock lvbs | 54 | * - full 64 bit i_size in the metadata lock lvbs |
49 | * - introduction of "rw" lock and pushing meta/data locking down | 55 | * - introduction of "rw" lock and pushing meta/data locking down |
50 | */ | 56 | */ |
51 | #define O2NET_PROTOCOL_VERSION 2ULL | 57 | #define O2NET_PROTOCOL_VERSION 4ULL |
52 | struct o2net_handshake { | 58 | struct o2net_handshake { |
53 | __be64 protocol_version; | 59 | __be64 protocol_version; |
54 | __be64 connector_id; | 60 | __be64 connector_id; |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 1a01380e3878..014e73978dac 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -35,15 +35,17 @@ | |||
35 | 35 | ||
36 | #include "alloc.h" | 36 | #include "alloc.h" |
37 | #include "dcache.h" | 37 | #include "dcache.h" |
38 | #include "dlmglue.h" | ||
38 | #include "file.h" | 39 | #include "file.h" |
39 | #include "inode.h" | 40 | #include "inode.h" |
40 | 41 | ||
42 | |||
41 | static int ocfs2_dentry_revalidate(struct dentry *dentry, | 43 | static int ocfs2_dentry_revalidate(struct dentry *dentry, |
42 | struct nameidata *nd) | 44 | struct nameidata *nd) |
43 | { | 45 | { |
44 | struct inode *inode = dentry->d_inode; | 46 | struct inode *inode = dentry->d_inode; |
45 | int ret = 0; /* if all else fails, just return false */ | 47 | int ret = 0; /* if all else fails, just return false */ |
46 | struct ocfs2_super *osb; | 48 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
47 | 49 | ||
48 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 50 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
49 | dentry->d_name.len, dentry->d_name.name); | 51 | dentry->d_name.len, dentry->d_name.name); |
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
55 | goto bail; | 57 | goto bail; |
56 | } | 58 | } |
57 | 59 | ||
58 | osb = OCFS2_SB(inode->i_sb); | ||
59 | |||
60 | BUG_ON(!osb); | 60 | BUG_ON(!osb); |
61 | 61 | ||
62 | if (inode != osb->root_inode) { | 62 | if (inode == osb->root_inode || is_bad_inode(inode)) |
63 | spin_lock(&OCFS2_I(inode)->ip_lock); | 63 | goto bail; |
64 | /* did we or someone else delete this inode? */ | 64 | |
65 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { | 65 | spin_lock(&OCFS2_I(inode)->ip_lock); |
66 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 66 | /* did we or someone else delete this inode? */ |
67 | mlog(0, "inode (%llu) deleted, returning false\n", | 67 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { |
68 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
69 | goto bail; | ||
70 | } | ||
71 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 68 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
69 | mlog(0, "inode (%llu) deleted, returning false\n", | ||
70 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
71 | goto bail; | ||
72 | } | ||
73 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
72 | 74 | ||
73 | if (!inode->i_nlink) { | 75 | /* |
74 | mlog(0, "Inode %llu orphaned, returning false " | 76 | * We don't need a cluster lock to test this because once an |
75 | "dir = %d\n", | 77 | * inode nlink hits zero, it never goes back. |
76 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 78 | */ |
77 | S_ISDIR(inode->i_mode)); | 79 | if (inode->i_nlink == 0) { |
78 | goto bail; | 80 | mlog(0, "Inode %llu orphaned, returning false " |
79 | } | 81 | "dir = %d\n", |
82 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
83 | S_ISDIR(inode->i_mode)); | ||
84 | goto bail; | ||
80 | } | 85 | } |
81 | 86 | ||
82 | ret = 1; | 87 | ret = 1; |
@@ -87,6 +92,322 @@ bail: | |||
87 | return ret; | 92 | return ret; |
88 | } | 93 | } |
89 | 94 | ||
95 | static int ocfs2_match_dentry(struct dentry *dentry, | ||
96 | u64 parent_blkno, | ||
97 | int skip_unhashed) | ||
98 | { | ||
99 | struct inode *parent; | ||
100 | |||
101 | /* | ||
102 | * ocfs2_lookup() does a d_splice_alias() _before_ attaching | ||
103 | * to the lock data, so we skip those here, otherwise | ||
104 | * ocfs2_dentry_attach_lock() will get its original dentry | ||
105 | * back. | ||
106 | */ | ||
107 | if (!dentry->d_fsdata) | ||
108 | return 0; | ||
109 | |||
110 | if (!dentry->d_parent) | ||
111 | return 0; | ||
112 | |||
113 | if (skip_unhashed && d_unhashed(dentry)) | ||
114 | return 0; | ||
115 | |||
116 | parent = dentry->d_parent->d_inode; | ||
117 | /* Negative parent dentry? */ | ||
118 | if (!parent) | ||
119 | return 0; | ||
120 | |||
121 | /* Name is in a different directory. */ | ||
122 | if (OCFS2_I(parent)->ip_blkno != parent_blkno) | ||
123 | return 0; | ||
124 | |||
125 | return 1; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Walk the inode alias list, and find a dentry which has a given | ||
130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | ||
131 | * is looking for a dentry_lock reference. The vote thread is looking | ||
132 | * to unhash aliases, so we allow it to skip any that already have | ||
133 | * that property. | ||
134 | */ | ||
135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | ||
136 | u64 parent_blkno, | ||
137 | int skip_unhashed) | ||
138 | { | ||
139 | struct list_head *p; | ||
140 | struct dentry *dentry = NULL; | ||
141 | |||
142 | spin_lock(&dcache_lock); | ||
143 | |||
144 | list_for_each(p, &inode->i_dentry) { | ||
145 | dentry = list_entry(p, struct dentry, d_alias); | ||
146 | |||
147 | if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { | ||
148 | mlog(0, "dentry found: %.*s\n", | ||
149 | dentry->d_name.len, dentry->d_name.name); | ||
150 | |||
151 | dget_locked(dentry); | ||
152 | break; | ||
153 | } | ||
154 | |||
155 | dentry = NULL; | ||
156 | } | ||
157 | |||
158 | spin_unlock(&dcache_lock); | ||
159 | |||
160 | return dentry; | ||
161 | } | ||
162 | |||
163 | DEFINE_SPINLOCK(dentry_attach_lock); | ||
164 | |||
165 | /* | ||
166 | * Attach this dentry to a cluster lock. | ||
167 | * | ||
168 | * Dentry locks cover all links in a given directory to a particular | ||
169 | * inode. We do this so that ocfs2 can build a lock name which all | ||
170 | * nodes in the cluster can agree on at all times. Shoving full names | ||
171 | * in the cluster lock won't work due to size restrictions. Covering | ||
172 | * links inside of a directory is a good compromise because it still | ||
173 | * allows us to use the parent directory lock to synchronize | ||
174 | * operations. | ||
175 | * | ||
176 | * Call this function with the parent dir semaphore and the parent dir | ||
177 | * cluster lock held. | ||
178 | * | ||
179 | * The dir semaphore will protect us from having to worry about | ||
180 | * concurrent processes on our node trying to attach a lock at the | ||
181 | * same time. | ||
182 | * | ||
183 | * The dir cluster lock (held at either PR or EX mode) protects us | ||
184 | * from unlink and rename on other nodes. | ||
185 | * | ||
186 | * A dput() can happen asynchronously due to pruning, so we cover | ||
187 | * attaching and detaching the dentry lock with a | ||
188 | * dentry_attach_lock. | ||
189 | * | ||
190 | * A node which has done lookup on a name retains a protected read | ||
191 | * lock until final dput. If the user requests and unlink or rename, | ||
192 | * the protected read is upgraded to an exclusive lock. Other nodes | ||
193 | * who have seen the dentry will then be informed that they need to | ||
194 | * downgrade their lock, which will involve d_delete on the | ||
195 | * dentry. This happens in ocfs2_dentry_convert_worker(). | ||
196 | */ | ||
197 | int ocfs2_dentry_attach_lock(struct dentry *dentry, | ||
198 | struct inode *inode, | ||
199 | u64 parent_blkno) | ||
200 | { | ||
201 | int ret; | ||
202 | struct dentry *alias; | ||
203 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
204 | |||
205 | mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n", | ||
206 | dentry->d_name.len, dentry->d_name.name, | ||
207 | (unsigned long long)parent_blkno, dl); | ||
208 | |||
209 | /* | ||
210 | * Negative dentry. We ignore these for now. | ||
211 | * | ||
212 | * XXX: Could we can improve ocfs2_dentry_revalidate() by | ||
213 | * tracking these? | ||
214 | */ | ||
215 | if (!inode) | ||
216 | return 0; | ||
217 | |||
218 | if (dl) { | ||
219 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | ||
220 | " \"%.*s\": old parent: %llu, new: %llu\n", | ||
221 | dentry->d_name.len, dentry->d_name.name, | ||
222 | (unsigned long long)parent_blkno, | ||
223 | (unsigned long long)dl->dl_parent_blkno); | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | alias = ocfs2_find_local_alias(inode, parent_blkno, 0); | ||
228 | if (alias) { | ||
229 | /* | ||
230 | * Great, an alias exists, which means we must have a | ||
231 | * dentry lock already. We can just grab the lock off | ||
232 | * the alias and add it to the list. | ||
233 | * | ||
234 | * We're depending here on the fact that this dentry | ||
235 | * was found and exists in the dcache and so must have | ||
236 | * a reference to the dentry_lock because we can't | ||
237 | * race creates. Final dput() cannot happen on it | ||
238 | * since we have it pinned, so our reference is safe. | ||
239 | */ | ||
240 | dl = alias->d_fsdata; | ||
241 | mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n", | ||
242 | (unsigned long long)parent_blkno, | ||
243 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
244 | |||
245 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | ||
246 | " \"%.*s\": old parent: %llu, new: %llu\n", | ||
247 | dentry->d_name.len, dentry->d_name.name, | ||
248 | (unsigned long long)parent_blkno, | ||
249 | (unsigned long long)dl->dl_parent_blkno); | ||
250 | |||
251 | mlog(0, "Found: %s\n", dl->dl_lockres.l_name); | ||
252 | |||
253 | goto out_attach; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * There are no other aliases | ||
258 | */ | ||
259 | dl = kmalloc(sizeof(*dl), GFP_NOFS); | ||
260 | if (!dl) { | ||
261 | ret = -ENOMEM; | ||
262 | mlog_errno(ret); | ||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | dl->dl_count = 0; | ||
267 | /* | ||
268 | * Does this have to happen below, for all attaches, in case | ||
269 | * the struct inode gets blown away by votes? | ||
270 | */ | ||
271 | dl->dl_inode = igrab(inode); | ||
272 | dl->dl_parent_blkno = parent_blkno; | ||
273 | ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); | ||
274 | |||
275 | out_attach: | ||
276 | spin_lock(&dentry_attach_lock); | ||
277 | dentry->d_fsdata = dl; | ||
278 | dl->dl_count++; | ||
279 | spin_unlock(&dentry_attach_lock); | ||
280 | |||
281 | /* | ||
282 | * This actually gets us our PRMODE level lock. From now on, | ||
283 | * we'll have a notification if one of these names is | ||
284 | * destroyed on another node. | ||
285 | */ | ||
286 | ret = ocfs2_dentry_lock(dentry, 0); | ||
287 | if (!ret) | ||
288 | ocfs2_dentry_unlock(dentry, 0); | ||
289 | else | ||
290 | mlog_errno(ret); | ||
291 | |||
292 | dput(alias); | ||
293 | |||
294 | return ret; | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * ocfs2_dentry_iput() and friends. | ||
299 | * | ||
300 | * At this point, our particular dentry is detached from the inodes | ||
301 | * alias list, so there's no way that the locking code can find it. | ||
302 | * | ||
303 | * The interesting stuff happens when we determine that our lock needs | ||
304 | * to go away because this is the last subdir alias in the | ||
305 | * system. This function needs to handle a couple things: | ||
306 | * | ||
307 | * 1) Synchronizing lock shutdown with the downconvert threads. This | ||
308 | * is already handled for us via the lockres release drop function | ||
309 | * called in ocfs2_release_dentry_lock() | ||
310 | * | ||
311 | * 2) A race may occur when we're doing our lock shutdown and | ||
312 | * another process wants to create a new dentry lock. Right now we | ||
313 | * let them race, which means that for a very short while, this | ||
314 | * node might have two locks on a lock resource. This should be a | ||
315 | * problem though because one of them is in the process of being | ||
316 | * thrown out. | ||
317 | */ | ||
318 | static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, | ||
319 | struct ocfs2_dentry_lock *dl) | ||
320 | { | ||
321 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); | ||
322 | ocfs2_lock_res_free(&dl->dl_lockres); | ||
323 | iput(dl->dl_inode); | ||
324 | kfree(dl); | ||
325 | } | ||
326 | |||
327 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | ||
328 | struct ocfs2_dentry_lock *dl) | ||
329 | { | ||
330 | int unlock = 0; | ||
331 | |||
332 | BUG_ON(dl->dl_count == 0); | ||
333 | |||
334 | spin_lock(&dentry_attach_lock); | ||
335 | dl->dl_count--; | ||
336 | unlock = !dl->dl_count; | ||
337 | spin_unlock(&dentry_attach_lock); | ||
338 | |||
339 | if (unlock) | ||
340 | ocfs2_drop_dentry_lock(osb, dl); | ||
341 | } | ||
342 | |||
343 | static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | ||
344 | { | ||
345 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
346 | |||
347 | mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED), | ||
348 | "dentry: %.*s\n", dentry->d_name.len, | ||
349 | dentry->d_name.name); | ||
350 | |||
351 | if (!dl) | ||
352 | goto out; | ||
353 | |||
354 | mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n", | ||
355 | dentry->d_name.len, dentry->d_name.name, | ||
356 | dl->dl_count); | ||
357 | |||
358 | ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); | ||
359 | |||
360 | out: | ||
361 | iput(inode); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * d_move(), but keep the locks in sync. | ||
366 | * | ||
367 | * When we are done, "dentry" will have the parent dir and name of | ||
368 | * "target", which will be thrown away. | ||
369 | * | ||
370 | * We manually update the lock of "dentry" if need be. | ||
371 | * | ||
372 | * "target" doesn't have it's dentry lock touched - we allow the later | ||
373 | * dput() to handle this for us. | ||
374 | * | ||
375 | * This is called during ocfs2_rename(), while holding parent | ||
376 | * directory locks. The dentries have already been deleted on other | ||
377 | * nodes via ocfs2_remote_dentry_delete(). | ||
378 | * | ||
379 | * Normally, the VFS handles the d_move() for the file sytem, after | ||
380 | * the ->rename() callback. OCFS2 wants to handle this internally, so | ||
381 | * the new lock can be created atomically with respect to the cluster. | ||
382 | */ | ||
383 | void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | ||
384 | struct inode *old_dir, struct inode *new_dir) | ||
385 | { | ||
386 | int ret; | ||
387 | struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb); | ||
388 | struct inode *inode = dentry->d_inode; | ||
389 | |||
390 | /* | ||
391 | * Move within the same directory, so the actual lock info won't | ||
392 | * change. | ||
393 | * | ||
394 | * XXX: Is there any advantage to dropping the lock here? | ||
395 | */ | ||
396 | if (old_dir == new_dir) | ||
397 | goto out_move; | ||
398 | |||
399 | ocfs2_dentry_lock_put(osb, dentry->d_fsdata); | ||
400 | |||
401 | dentry->d_fsdata = NULL; | ||
402 | ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno); | ||
403 | if (ret) | ||
404 | mlog_errno(ret); | ||
405 | |||
406 | out_move: | ||
407 | d_move(dentry, target); | ||
408 | } | ||
409 | |||
90 | struct dentry_operations ocfs2_dentry_ops = { | 410 | struct dentry_operations ocfs2_dentry_ops = { |
91 | .d_revalidate = ocfs2_dentry_revalidate, | 411 | .d_revalidate = ocfs2_dentry_revalidate, |
412 | .d_iput = ocfs2_dentry_iput, | ||
92 | }; | 413 | }; |
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index 90072771114b..c091c34d9883 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
@@ -28,4 +28,31 @@ | |||
28 | 28 | ||
29 | extern struct dentry_operations ocfs2_dentry_ops; | 29 | extern struct dentry_operations ocfs2_dentry_ops; |
30 | 30 | ||
31 | struct ocfs2_dentry_lock { | ||
32 | unsigned int dl_count; | ||
33 | u64 dl_parent_blkno; | ||
34 | |||
35 | /* | ||
36 | * The ocfs2_dentry_lock keeps an inode reference until | ||
37 | * dl_lockres has been destroyed. This is usually done in | ||
38 | * ->d_iput() anyway, so there should be minimal impact. | ||
39 | */ | ||
40 | struct inode *dl_inode; | ||
41 | struct ocfs2_lock_res dl_lockres; | ||
42 | }; | ||
43 | |||
44 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, | ||
45 | u64 parent_blkno); | ||
46 | |||
47 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | ||
48 | struct ocfs2_dentry_lock *dl); | ||
49 | |||
50 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, | ||
51 | int skip_unhashed); | ||
52 | |||
53 | void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | ||
54 | struct inode *old_dir, struct inode *new_dir); | ||
55 | |||
56 | extern spinlock_t dentry_attach_lock; | ||
57 | |||
31 | #endif /* OCFS2_DCACHE_H */ | 58 | #endif /* OCFS2_DCACHE_H */ |
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index 53652f51c0e1..cfd5cb65cab0 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h | |||
@@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, | |||
182 | struct dlm_lockstatus *lksb, | 182 | struct dlm_lockstatus *lksb, |
183 | int flags, | 183 | int flags, |
184 | const char *name, | 184 | const char *name, |
185 | int namelen, | ||
185 | dlm_astlockfunc_t *ast, | 186 | dlm_astlockfunc_t *ast, |
186 | void *data, | 187 | void *data, |
187 | dlm_bastlockfunc_t *bast); | 188 | dlm_bastlockfunc_t *bast); |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index f13a4bac41f0..681046d51393 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
320 | 320 | ||
321 | res = dlm_lookup_lockres(dlm, name, locklen); | 321 | res = dlm_lookup_lockres(dlm, name, locklen); |
322 | if (!res) { | 322 | if (!res) { |
323 | mlog(ML_ERROR, "got %sast for unknown lockres! " | 323 | mlog(0, "got %sast for unknown lockres! " |
324 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", | 324 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", |
325 | past->type == DLM_AST ? "" : "b", | 325 | past->type == DLM_AST ? "" : "b", |
326 | dlm_get_lock_cookie_node(cookie), | 326 | dlm_get_lock_cookie_node(cookie), |
327 | dlm_get_lock_cookie_seq(cookie), | 327 | dlm_get_lock_cookie_seq(cookie), |
@@ -462,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
462 | mlog(ML_ERROR, "sent AST to node %u, it returned " | 462 | mlog(ML_ERROR, "sent AST to node %u, it returned " |
463 | "DLM_MIGRATING!\n", lock->ml.node); | 463 | "DLM_MIGRATING!\n", lock->ml.node); |
464 | BUG(); | 464 | BUG(); |
465 | } else if (status != DLM_NORMAL) { | 465 | } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) { |
466 | mlog(ML_ERROR, "AST to node %u returned %d!\n", | 466 | mlog(ML_ERROR, "AST to node %u returned %d!\n", |
467 | lock->ml.node, status); | 467 | lock->ml.node, status); |
468 | /* ignore it */ | 468 | /* ignore it */ |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 14530ee7e11d..fa968180b072 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm, | |||
747 | u8 owner); | 747 | u8 owner); |
748 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | 748 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, |
749 | const char *lockid, | 749 | const char *lockid, |
750 | int namelen, | ||
750 | int flags); | 751 | int flags); |
751 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | 752 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, |
752 | const char *name, | 753 | const char *name, |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 5ca57ec650c7..42a1b91979b5 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie) | |||
540 | 540 | ||
541 | enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, | 541 | enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, |
542 | struct dlm_lockstatus *lksb, int flags, | 542 | struct dlm_lockstatus *lksb, int flags, |
543 | const char *name, dlm_astlockfunc_t *ast, void *data, | 543 | const char *name, int namelen, dlm_astlockfunc_t *ast, |
544 | dlm_bastlockfunc_t *bast) | 544 | void *data, dlm_bastlockfunc_t *bast) |
545 | { | 545 | { |
546 | enum dlm_status status; | 546 | enum dlm_status status; |
547 | struct dlm_lock_resource *res = NULL; | 547 | struct dlm_lock_resource *res = NULL; |
@@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, | |||
571 | recovery = (flags & LKM_RECOVERY); | 571 | recovery = (flags & LKM_RECOVERY); |
572 | 572 | ||
573 | if (recovery && | 573 | if (recovery && |
574 | (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) { | 574 | (!dlm_is_recovery_lock(name, namelen) || convert) ) { |
575 | dlm_error(status); | 575 | dlm_error(status); |
576 | goto error; | 576 | goto error; |
577 | } | 577 | } |
@@ -643,7 +643,7 @@ retry_convert: | |||
643 | } | 643 | } |
644 | 644 | ||
645 | status = DLM_IVBUFLEN; | 645 | status = DLM_IVBUFLEN; |
646 | if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) { | 646 | if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) { |
647 | dlm_error(status); | 647 | dlm_error(status); |
648 | goto error; | 648 | goto error; |
649 | } | 649 | } |
@@ -659,7 +659,7 @@ retry_convert: | |||
659 | dlm_wait_for_recovery(dlm); | 659 | dlm_wait_for_recovery(dlm); |
660 | 660 | ||
661 | /* find or create the lock resource */ | 661 | /* find or create the lock resource */ |
662 | res = dlm_get_lock_resource(dlm, name, flags); | 662 | res = dlm_get_lock_resource(dlm, name, namelen, flags); |
663 | if (!res) { | 663 | if (!res) { |
664 | status = DLM_IVLOCKID; | 664 | status = DLM_IVLOCKID; |
665 | dlm_error(status); | 665 | dlm_error(status); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9503240ef0e5..f784177b6241 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
740 | */ | 740 | */ |
741 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | 741 | struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, |
742 | const char *lockid, | 742 | const char *lockid, |
743 | int namelen, | ||
743 | int flags) | 744 | int flags) |
744 | { | 745 | { |
745 | struct dlm_lock_resource *tmpres=NULL, *res=NULL; | 746 | struct dlm_lock_resource *tmpres=NULL, *res=NULL; |
@@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | |||
748 | int blocked = 0; | 749 | int blocked = 0; |
749 | int ret, nodenum; | 750 | int ret, nodenum; |
750 | struct dlm_node_iter iter; | 751 | struct dlm_node_iter iter; |
751 | unsigned int namelen, hash; | 752 | unsigned int hash; |
752 | int tries = 0; | 753 | int tries = 0; |
753 | int bit, wait_on_recovery = 0; | 754 | int bit, wait_on_recovery = 0; |
754 | 755 | ||
755 | BUG_ON(!lockid); | 756 | BUG_ON(!lockid); |
756 | 757 | ||
757 | namelen = strlen(lockid); | ||
758 | hash = dlm_lockid_hash(lockid, namelen); | 758 | hash = dlm_lockid_hash(lockid, namelen); |
759 | 759 | ||
760 | mlog(0, "get lockres %s (len %d)\n", lockid, namelen); | 760 | mlog(0, "get lockres %s (len %d)\n", lockid, namelen); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 594745fab0b5..9d950d7cea38 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -2285,7 +2285,8 @@ again: | |||
2285 | memset(&lksb, 0, sizeof(lksb)); | 2285 | memset(&lksb, 0, sizeof(lksb)); |
2286 | 2286 | ||
2287 | ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, | 2287 | ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, |
2288 | DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast); | 2288 | DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN, |
2289 | dlm_reco_ast, dlm, dlm_reco_bast); | ||
2289 | 2290 | ||
2290 | mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", | 2291 | mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", |
2291 | dlm->name, ret, lksb.status); | 2292 | dlm->name, ret, lksb.status); |
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c index e641b084b343..eead48bbfac6 100644 --- a/fs/ocfs2/dlm/userdlm.c +++ b/fs/ocfs2/dlm/userdlm.c | |||
@@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) | |||
102 | spin_unlock(&lockres->l_lock); | 102 | spin_unlock(&lockres->l_lock); |
103 | } | 103 | } |
104 | 104 | ||
105 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ | 105 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ |
106 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | 106 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ |
107 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 107 | "resource %.*s: %s\n", dlm_errname(_stat), _func, \ |
108 | _lockres->l_name, dlm_errmsg(_stat)); \ | 108 | _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \ |
109 | } while (0) | 109 | } while (0) |
110 | 110 | ||
111 | /* WARNING: This function lives in a world where the only three lock | 111 | /* WARNING: This function lives in a world where the only three lock |
@@ -127,21 +127,22 @@ static void user_ast(void *opaque) | |||
127 | struct user_lock_res *lockres = opaque; | 127 | struct user_lock_res *lockres = opaque; |
128 | struct dlm_lockstatus *lksb; | 128 | struct dlm_lockstatus *lksb; |
129 | 129 | ||
130 | mlog(0, "AST fired for lockres %s\n", lockres->l_name); | 130 | mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen, |
131 | lockres->l_name); | ||
131 | 132 | ||
132 | spin_lock(&lockres->l_lock); | 133 | spin_lock(&lockres->l_lock); |
133 | 134 | ||
134 | lksb = &(lockres->l_lksb); | 135 | lksb = &(lockres->l_lksb); |
135 | if (lksb->status != DLM_NORMAL) { | 136 | if (lksb->status != DLM_NORMAL) { |
136 | mlog(ML_ERROR, "lksb status value of %u on lockres %s\n", | 137 | mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", |
137 | lksb->status, lockres->l_name); | 138 | lksb->status, lockres->l_namelen, lockres->l_name); |
138 | spin_unlock(&lockres->l_lock); | 139 | spin_unlock(&lockres->l_lock); |
139 | return; | 140 | return; |
140 | } | 141 | } |
141 | 142 | ||
142 | mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, | 143 | mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, |
143 | "Lockres %s, requested ivmode. flags 0x%x\n", | 144 | "Lockres %.*s, requested ivmode. flags 0x%x\n", |
144 | lockres->l_name, lockres->l_flags); | 145 | lockres->l_namelen, lockres->l_name, lockres->l_flags); |
145 | 146 | ||
146 | /* we're downconverting. */ | 147 | /* we're downconverting. */ |
147 | if (lockres->l_requested < lockres->l_level) { | 148 | if (lockres->l_requested < lockres->l_level) { |
@@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level) | |||
213 | { | 214 | { |
214 | struct user_lock_res *lockres = opaque; | 215 | struct user_lock_res *lockres = opaque; |
215 | 216 | ||
216 | mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n", | 217 | mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n", |
217 | lockres->l_name, level); | 218 | lockres->l_namelen, lockres->l_name, level); |
218 | 219 | ||
219 | spin_lock(&lockres->l_lock); | 220 | spin_lock(&lockres->l_lock); |
220 | lockres->l_flags |= USER_LOCK_BLOCKED; | 221 | lockres->l_flags |= USER_LOCK_BLOCKED; |
@@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
231 | { | 232 | { |
232 | struct user_lock_res *lockres = opaque; | 233 | struct user_lock_res *lockres = opaque; |
233 | 234 | ||
234 | mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); | 235 | mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen, |
236 | lockres->l_name); | ||
235 | 237 | ||
236 | if (status != DLM_NORMAL && status != DLM_CANCELGRANT) | 238 | if (status != DLM_NORMAL && status != DLM_CANCELGRANT) |
237 | mlog(ML_ERROR, "Dlm returns status %d\n", status); | 239 | mlog(ML_ERROR, "Dlm returns status %d\n", status); |
@@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
244 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { | 246 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { |
245 | lockres->l_level = LKM_IVMODE; | 247 | lockres->l_level = LKM_IVMODE; |
246 | } else if (status == DLM_CANCELGRANT) { | 248 | } else if (status == DLM_CANCELGRANT) { |
247 | mlog(0, "Lock %s, cancel fails, flags 0x%x\n", | ||
248 | lockres->l_name, lockres->l_flags); | ||
249 | /* We tried to cancel a convert request, but it was | 249 | /* We tried to cancel a convert request, but it was |
250 | * already granted. Don't clear the busy flag - the | 250 | * already granted. Don't clear the busy flag - the |
251 | * ast should've done this already. */ | 251 | * ast should've done this already. */ |
@@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
255 | } else { | 255 | } else { |
256 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | 256 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); |
257 | /* Cancel succeeded, we want to re-queue */ | 257 | /* Cancel succeeded, we want to re-queue */ |
258 | mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n", | ||
259 | lockres->l_name, lockres->l_flags); | ||
260 | lockres->l_requested = LKM_IVMODE; /* cancel an | 258 | lockres->l_requested = LKM_IVMODE; /* cancel an |
261 | * upconvert | 259 | * upconvert |
262 | * request. */ | 260 | * request. */ |
@@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque) | |||
287 | struct user_lock_res *lockres = (struct user_lock_res *) opaque; | 285 | struct user_lock_res *lockres = (struct user_lock_res *) opaque; |
288 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | 286 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); |
289 | 287 | ||
290 | mlog(0, "processing lockres %s\n", lockres->l_name); | 288 | mlog(0, "processing lockres %.*s\n", lockres->l_namelen, |
289 | lockres->l_name); | ||
291 | 290 | ||
292 | spin_lock(&lockres->l_lock); | 291 | spin_lock(&lockres->l_lock); |
293 | 292 | ||
294 | mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), | 293 | mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), |
295 | "Lockres %s, flags 0x%x\n", | 294 | "Lockres %.*s, flags 0x%x\n", |
296 | lockres->l_name, lockres->l_flags); | 295 | lockres->l_namelen, lockres->l_name, lockres->l_flags); |
297 | 296 | ||
298 | /* notice that we don't clear USER_LOCK_BLOCKED here. If it's | 297 | /* notice that we don't clear USER_LOCK_BLOCKED here. If it's |
299 | * set, we want user_ast clear it. */ | 298 | * set, we want user_ast clear it. */ |
@@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque) | |||
305 | * flag, and finally we might get another bast which re-queues | 304 | * flag, and finally we might get another bast which re-queues |
306 | * us before our ast for the downconvert is called. */ | 305 | * us before our ast for the downconvert is called. */ |
307 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { | 306 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { |
308 | mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n", | ||
309 | lockres->l_name, lockres->l_flags); | ||
310 | spin_unlock(&lockres->l_lock); | 307 | spin_unlock(&lockres->l_lock); |
311 | goto drop_ref; | 308 | goto drop_ref; |
312 | } | 309 | } |
313 | 310 | ||
314 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | 311 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { |
315 | mlog(0, "lock is in teardown so we do nothing\n"); | ||
316 | spin_unlock(&lockres->l_lock); | 312 | spin_unlock(&lockres->l_lock); |
317 | goto drop_ref; | 313 | goto drop_ref; |
318 | } | 314 | } |
319 | 315 | ||
320 | if (lockres->l_flags & USER_LOCK_BUSY) { | 316 | if (lockres->l_flags & USER_LOCK_BUSY) { |
321 | mlog(0, "Cancel lock %s, flags 0x%x\n", | ||
322 | lockres->l_name, lockres->l_flags); | ||
323 | |||
324 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { | 317 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { |
325 | spin_unlock(&lockres->l_lock); | 318 | spin_unlock(&lockres->l_lock); |
326 | goto drop_ref; | 319 | goto drop_ref; |
@@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque) | |||
372 | &lockres->l_lksb, | 365 | &lockres->l_lksb, |
373 | LKM_CONVERT|LKM_VALBLK, | 366 | LKM_CONVERT|LKM_VALBLK, |
374 | lockres->l_name, | 367 | lockres->l_name, |
368 | lockres->l_namelen, | ||
375 | user_ast, | 369 | user_ast, |
376 | lockres, | 370 | lockres, |
377 | user_bast); | 371 | user_bast); |
@@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres, | |||
420 | 414 | ||
421 | if (level != LKM_EXMODE && | 415 | if (level != LKM_EXMODE && |
422 | level != LKM_PRMODE) { | 416 | level != LKM_PRMODE) { |
423 | mlog(ML_ERROR, "lockres %s: invalid request!\n", | 417 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", |
424 | lockres->l_name); | 418 | lockres->l_namelen, lockres->l_name); |
425 | status = -EINVAL; | 419 | status = -EINVAL; |
426 | goto bail; | 420 | goto bail; |
427 | } | 421 | } |
428 | 422 | ||
429 | mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n", | 423 | mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n", |
430 | lockres->l_name, | 424 | lockres->l_namelen, lockres->l_name, |
431 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", | 425 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", |
432 | lkm_flags); | 426 | lkm_flags); |
433 | 427 | ||
434 | again: | 428 | again: |
435 | if (signal_pending(current)) { | 429 | if (signal_pending(current)) { |
@@ -474,15 +468,13 @@ again: | |||
474 | BUG_ON(level == LKM_IVMODE); | 468 | BUG_ON(level == LKM_IVMODE); |
475 | BUG_ON(level == LKM_NLMODE); | 469 | BUG_ON(level == LKM_NLMODE); |
476 | 470 | ||
477 | mlog(0, "lock %s, get lock from %d to level = %d\n", | ||
478 | lockres->l_name, lockres->l_level, level); | ||
479 | |||
480 | /* call dlm_lock to upgrade lock now */ | 471 | /* call dlm_lock to upgrade lock now */ |
481 | status = dlmlock(dlm, | 472 | status = dlmlock(dlm, |
482 | level, | 473 | level, |
483 | &lockres->l_lksb, | 474 | &lockres->l_lksb, |
484 | local_flags, | 475 | local_flags, |
485 | lockres->l_name, | 476 | lockres->l_name, |
477 | lockres->l_namelen, | ||
486 | user_ast, | 478 | user_ast, |
487 | lockres, | 479 | lockres, |
488 | user_bast); | 480 | user_bast); |
@@ -498,9 +490,6 @@ again: | |||
498 | goto bail; | 490 | goto bail; |
499 | } | 491 | } |
500 | 492 | ||
501 | mlog(0, "lock %s, successfull return from dlmlock\n", | ||
502 | lockres->l_name); | ||
503 | |||
504 | user_wait_on_busy_lock(lockres); | 493 | user_wait_on_busy_lock(lockres); |
505 | goto again; | 494 | goto again; |
506 | } | 495 | } |
@@ -508,9 +497,6 @@ again: | |||
508 | user_dlm_inc_holders(lockres, level); | 497 | user_dlm_inc_holders(lockres, level); |
509 | spin_unlock(&lockres->l_lock); | 498 | spin_unlock(&lockres->l_lock); |
510 | 499 | ||
511 | mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name, | ||
512 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE"); | ||
513 | |||
514 | status = 0; | 500 | status = 0; |
515 | bail: | 501 | bail: |
516 | return status; | 502 | return status; |
@@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres, | |||
538 | { | 524 | { |
539 | if (level != LKM_EXMODE && | 525 | if (level != LKM_EXMODE && |
540 | level != LKM_PRMODE) { | 526 | level != LKM_PRMODE) { |
541 | mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name); | 527 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", |
528 | lockres->l_namelen, lockres->l_name); | ||
542 | return; | 529 | return; |
543 | } | 530 | } |
544 | 531 | ||
545 | mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name, | ||
546 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE"); | ||
547 | |||
548 | spin_lock(&lockres->l_lock); | 532 | spin_lock(&lockres->l_lock); |
549 | user_dlm_dec_holders(lockres, level); | 533 | user_dlm_dec_holders(lockres, level); |
550 | __user_dlm_cond_queue_lockres(lockres); | 534 | __user_dlm_cond_queue_lockres(lockres); |
@@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres, | |||
602 | memcpy(lockres->l_name, | 586 | memcpy(lockres->l_name, |
603 | dentry->d_name.name, | 587 | dentry->d_name.name, |
604 | dentry->d_name.len); | 588 | dentry->d_name.len); |
589 | lockres->l_namelen = dentry->d_name.len; | ||
605 | } | 590 | } |
606 | 591 | ||
607 | int user_dlm_destroy_lock(struct user_lock_res *lockres) | 592 | int user_dlm_destroy_lock(struct user_lock_res *lockres) |
@@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
609 | int status = -EBUSY; | 594 | int status = -EBUSY; |
610 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | 595 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); |
611 | 596 | ||
612 | mlog(0, "asked to destroy %s\n", lockres->l_name); | 597 | mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name); |
613 | 598 | ||
614 | spin_lock(&lockres->l_lock); | 599 | spin_lock(&lockres->l_lock); |
615 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | 600 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { |
616 | mlog(0, "Lock is already torn down\n"); | ||
617 | spin_unlock(&lockres->l_lock); | 601 | spin_unlock(&lockres->l_lock); |
618 | return 0; | 602 | return 0; |
619 | } | 603 | } |
@@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
623 | while (lockres->l_flags & USER_LOCK_BUSY) { | 607 | while (lockres->l_flags & USER_LOCK_BUSY) { |
624 | spin_unlock(&lockres->l_lock); | 608 | spin_unlock(&lockres->l_lock); |
625 | 609 | ||
626 | mlog(0, "lock %s is busy\n", lockres->l_name); | ||
627 | |||
628 | user_wait_on_busy_lock(lockres); | 610 | user_wait_on_busy_lock(lockres); |
629 | 611 | ||
630 | spin_lock(&lockres->l_lock); | 612 | spin_lock(&lockres->l_lock); |
@@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
632 | 614 | ||
633 | if (lockres->l_ro_holders || lockres->l_ex_holders) { | 615 | if (lockres->l_ro_holders || lockres->l_ex_holders) { |
634 | spin_unlock(&lockres->l_lock); | 616 | spin_unlock(&lockres->l_lock); |
635 | mlog(0, "lock %s has holders\n", lockres->l_name); | ||
636 | goto bail; | 617 | goto bail; |
637 | } | 618 | } |
638 | 619 | ||
639 | status = 0; | 620 | status = 0; |
640 | if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { | 621 | if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { |
641 | spin_unlock(&lockres->l_lock); | 622 | spin_unlock(&lockres->l_lock); |
642 | mlog(0, "lock %s is not attached\n", lockres->l_name); | ||
643 | goto bail; | 623 | goto bail; |
644 | } | 624 | } |
645 | 625 | ||
@@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
647 | lockres->l_flags |= USER_LOCK_BUSY; | 627 | lockres->l_flags |= USER_LOCK_BUSY; |
648 | spin_unlock(&lockres->l_lock); | 628 | spin_unlock(&lockres->l_lock); |
649 | 629 | ||
650 | mlog(0, "unlocking lockres %s\n", lockres->l_name); | ||
651 | status = dlmunlock(dlm, | 630 | status = dlmunlock(dlm, |
652 | &lockres->l_lksb, | 631 | &lockres->l_lksb, |
653 | LKM_VALBLK, | 632 | LKM_VALBLK, |
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h index 04178bc40b76..c400e93bbf79 100644 --- a/fs/ocfs2/dlm/userdlm.h +++ b/fs/ocfs2/dlm/userdlm.h | |||
@@ -53,6 +53,7 @@ struct user_lock_res { | |||
53 | 53 | ||
54 | #define USER_DLM_LOCK_ID_MAX_LEN 32 | 54 | #define USER_DLM_LOCK_ID_MAX_LEN 32 |
55 | char l_name[USER_DLM_LOCK_ID_MAX_LEN]; | 55 | char l_name[USER_DLM_LOCK_ID_MAX_LEN]; |
56 | int l_namelen; | ||
56 | int l_level; | 57 | int l_level; |
57 | unsigned int l_ro_holders; | 58 | unsigned int l_ro_holders; |
58 | unsigned int l_ex_holders; | 59 | unsigned int l_ex_holders; |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 151b41781eab..de887063dcfc 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "ocfs2.h" | 46 | #include "ocfs2.h" |
47 | 47 | ||
48 | #include "alloc.h" | 48 | #include "alloc.h" |
49 | #include "dcache.h" | ||
49 | #include "dlmglue.h" | 50 | #include "dlmglue.h" |
50 | #include "extent_map.h" | 51 | #include "extent_map.h" |
51 | #include "heartbeat.h" | 52 | #include "heartbeat.h" |
@@ -66,78 +67,161 @@ struct ocfs2_mask_waiter { | |||
66 | unsigned long mw_goal; | 67 | unsigned long mw_goal; |
67 | }; | 68 | }; |
68 | 69 | ||
69 | static void ocfs2_inode_ast_func(void *opaque); | 70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
70 | static void ocfs2_inode_bast_func(void *opaque, | 71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
71 | int level); | ||
72 | static void ocfs2_super_ast_func(void *opaque); | ||
73 | static void ocfs2_super_bast_func(void *opaque, | ||
74 | int level); | ||
75 | static void ocfs2_rename_ast_func(void *opaque); | ||
76 | static void ocfs2_rename_bast_func(void *opaque, | ||
77 | int level); | ||
78 | |||
79 | /* so far, all locks have gotten along with the same unlock ast */ | ||
80 | static void ocfs2_unlock_ast_func(void *opaque, | ||
81 | enum dlm_status status); | ||
82 | static int ocfs2_do_unblock_meta(struct inode *inode, | ||
83 | int *requeue); | ||
84 | static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, | ||
85 | int *requeue); | ||
86 | static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, | ||
87 | int *requeue); | ||
88 | static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, | ||
89 | int *requeue); | ||
90 | static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, | ||
91 | int *requeue); | ||
92 | typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int); | ||
93 | static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, | ||
94 | struct ocfs2_lock_res *lockres, | ||
95 | int *requeue, | ||
96 | ocfs2_convert_worker_t *worker); | ||
97 | 72 | ||
73 | /* | ||
74 | * Return value from ->downconvert_worker functions. | ||
75 | * | ||
76 | * These control the precise actions of ocfs2_unblock_lock() | ||
77 | * and ocfs2_process_blocked_lock() | ||
78 | * | ||
79 | */ | ||
80 | enum ocfs2_unblock_action { | ||
81 | UNBLOCK_CONTINUE = 0, /* Continue downconvert */ | ||
82 | UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire | ||
83 | * ->post_unlock callback */ | ||
84 | UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire | ||
85 | * ->post_unlock() callback. */ | ||
86 | }; | ||
87 | |||
88 | struct ocfs2_unblock_ctl { | ||
89 | int requeue; | ||
90 | enum ocfs2_unblock_action unblock_action; | ||
91 | }; | ||
92 | |||
93 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | ||
94 | int new_level); | ||
95 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); | ||
96 | |||
97 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | ||
98 | int blocking); | ||
99 | |||
100 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | ||
101 | int blocking); | ||
102 | |||
103 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | ||
104 | struct ocfs2_lock_res *lockres); | ||
105 | |||
106 | /* | ||
107 | * OCFS2 Lock Resource Operations | ||
108 | * | ||
109 | * These fine tune the behavior of the generic dlmglue locking infrastructure. | ||
110 | * | ||
111 | * The most basic of lock types can point ->l_priv to their respective | ||
112 | * struct ocfs2_super and allow the default actions to manage things. | ||
113 | * | ||
114 | * Right now, each lock type also needs to implement an init function, | ||
115 | * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() | ||
116 | * should be called when the lock is no longer needed (i.e., object | ||
117 | * destruction time). | ||
118 | */ | ||
98 | struct ocfs2_lock_res_ops { | 119 | struct ocfs2_lock_res_ops { |
99 | void (*ast)(void *); | 120 | /* |
100 | void (*bast)(void *, int); | 121 | * Translate an ocfs2_lock_res * into an ocfs2_super *. Define |
101 | void (*unlock_ast)(void *, enum dlm_status); | 122 | * this callback if ->l_priv is not an ocfs2_super pointer |
102 | int (*unblock)(struct ocfs2_lock_res *, int *); | 123 | */ |
124 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | ||
125 | |||
126 | /* | ||
127 | * Optionally called in the downconvert (or "vote") thread | ||
128 | * after a successful downconvert. The lockres will not be | ||
129 | * referenced after this callback is called, so it is safe to | ||
130 | * free memory, etc. | ||
131 | * | ||
132 | * The exact semantics of when this is called are controlled | ||
133 | * by ->downconvert_worker() | ||
134 | */ | ||
135 | void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); | ||
136 | |||
137 | /* | ||
138 | * Allow a lock type to add checks to determine whether it is | ||
139 | * safe to downconvert a lock. Return 0 to re-queue the | ||
140 | * downconvert at a later time, nonzero to continue. | ||
141 | * | ||
142 | * For most locks, the default checks that there are no | ||
143 | * incompatible holders are sufficient. | ||
144 | * | ||
145 | * Called with the lockres spinlock held. | ||
146 | */ | ||
147 | int (*check_downconvert)(struct ocfs2_lock_res *, int); | ||
148 | |||
149 | /* | ||
150 | * Allows a lock type to populate the lock value block. This | ||
151 | * is called on downconvert, and when we drop a lock. | ||
152 | * | ||
153 | * Locks that want to use this should set LOCK_TYPE_USES_LVB | ||
154 | * in the flags field. | ||
155 | * | ||
156 | * Called with the lockres spinlock held. | ||
157 | */ | ||
158 | void (*set_lvb)(struct ocfs2_lock_res *); | ||
159 | |||
160 | /* | ||
161 | * Called from the downconvert thread when it is determined | ||
162 | * that a lock will be downconverted. This is called without | ||
163 | * any locks held so the function can do work that might | ||
164 | * schedule (syncing out data, etc). | ||
165 | * | ||
166 | * This should return any one of the ocfs2_unblock_action | ||
167 | * values, depending on what it wants the thread to do. | ||
168 | */ | ||
169 | int (*downconvert_worker)(struct ocfs2_lock_res *, int); | ||
170 | |||
171 | /* | ||
172 | * LOCK_TYPE_* flags which describe the specific requirements | ||
173 | * of a lock type. Descriptions of each individual flag follow. | ||
174 | */ | ||
175 | int flags; | ||
103 | }; | 176 | }; |
104 | 177 | ||
178 | /* | ||
179 | * Some locks want to "refresh" potentially stale data when a | ||
180 | * meaningful (PRMODE or EXMODE) lock level is first obtained. If this | ||
181 | * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the | ||
182 | * individual lockres l_flags member from the ast function. It is | ||
183 | * expected that the locking wrapper will clear the | ||
184 | * OCFS2_LOCK_NEEDS_REFRESH flag when done. | ||
185 | */ | ||
186 | #define LOCK_TYPE_REQUIRES_REFRESH 0x1 | ||
187 | |||
188 | /* | ||
189 | * Indicate that a lock type makes use of the lock value block. The | ||
190 | * ->set_lvb lock type callback must be defined. | ||
191 | */ | ||
192 | #define LOCK_TYPE_USES_LVB 0x2 | ||
193 | |||
105 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | 194 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { |
106 | .ast = ocfs2_inode_ast_func, | 195 | .get_osb = ocfs2_get_inode_osb, |
107 | .bast = ocfs2_inode_bast_func, | 196 | .flags = 0, |
108 | .unlock_ast = ocfs2_unlock_ast_func, | ||
109 | .unblock = ocfs2_unblock_inode_lock, | ||
110 | }; | 197 | }; |
111 | 198 | ||
112 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 199 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { |
113 | .ast = ocfs2_inode_ast_func, | 200 | .get_osb = ocfs2_get_inode_osb, |
114 | .bast = ocfs2_inode_bast_func, | 201 | .check_downconvert = ocfs2_check_meta_downconvert, |
115 | .unlock_ast = ocfs2_unlock_ast_func, | 202 | .set_lvb = ocfs2_set_meta_lvb, |
116 | .unblock = ocfs2_unblock_meta, | 203 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
117 | }; | 204 | }; |
118 | 205 | ||
119 | static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | ||
120 | int blocking); | ||
121 | |||
122 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | 206 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { |
123 | .ast = ocfs2_inode_ast_func, | 207 | .get_osb = ocfs2_get_inode_osb, |
124 | .bast = ocfs2_inode_bast_func, | 208 | .downconvert_worker = ocfs2_data_convert_worker, |
125 | .unlock_ast = ocfs2_unlock_ast_func, | 209 | .flags = 0, |
126 | .unblock = ocfs2_unblock_data, | ||
127 | }; | 210 | }; |
128 | 211 | ||
129 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 212 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { |
130 | .ast = ocfs2_super_ast_func, | 213 | .flags = LOCK_TYPE_REQUIRES_REFRESH, |
131 | .bast = ocfs2_super_bast_func, | ||
132 | .unlock_ast = ocfs2_unlock_ast_func, | ||
133 | .unblock = ocfs2_unblock_osb_lock, | ||
134 | }; | 214 | }; |
135 | 215 | ||
136 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { | 216 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { |
137 | .ast = ocfs2_rename_ast_func, | 217 | .flags = 0, |
138 | .bast = ocfs2_rename_bast_func, | 218 | }; |
139 | .unlock_ast = ocfs2_unlock_ast_func, | 219 | |
140 | .unblock = ocfs2_unblock_osb_lock, | 220 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { |
221 | .get_osb = ocfs2_get_dentry_osb, | ||
222 | .post_unlock = ocfs2_dentry_post_unlock, | ||
223 | .downconvert_worker = ocfs2_dentry_convert_worker, | ||
224 | .flags = 0, | ||
141 | }; | 225 | }; |
142 | 226 | ||
143 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 227 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
@@ -147,29 +231,26 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | |||
147 | lockres->l_type == OCFS2_LOCK_TYPE_RW; | 231 | lockres->l_type == OCFS2_LOCK_TYPE_RW; |
148 | } | 232 | } |
149 | 233 | ||
150 | static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres) | 234 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) |
151 | { | 235 | { |
152 | return lockres->l_type == OCFS2_LOCK_TYPE_SUPER; | 236 | BUG_ON(!ocfs2_is_inode_lock(lockres)); |
153 | } | ||
154 | 237 | ||
155 | static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres) | 238 | return (struct inode *) lockres->l_priv; |
156 | { | ||
157 | return lockres->l_type == OCFS2_LOCK_TYPE_RENAME; | ||
158 | } | 239 | } |
159 | 240 | ||
160 | static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres) | 241 | static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) |
161 | { | 242 | { |
162 | BUG_ON(!ocfs2_is_super_lock(lockres) | 243 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); |
163 | && !ocfs2_is_rename_lock(lockres)); | ||
164 | 244 | ||
165 | return (struct ocfs2_super *) lockres->l_priv; | 245 | return (struct ocfs2_dentry_lock *)lockres->l_priv; |
166 | } | 246 | } |
167 | 247 | ||
168 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) | 248 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) |
169 | { | 249 | { |
170 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | 250 | if (lockres->l_ops->get_osb) |
251 | return lockres->l_ops->get_osb(lockres); | ||
171 | 252 | ||
172 | return (struct inode *) lockres->l_priv; | 253 | return (struct ocfs2_super *)lockres->l_priv; |
173 | } | 254 | } |
174 | 255 | ||
175 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 256 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
@@ -200,25 +281,6 @@ static int ocfs2_meta_lock_update(struct inode *inode, | |||
200 | struct buffer_head **bh); | 281 | struct buffer_head **bh); |
201 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 282 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
202 | static inline int ocfs2_highest_compat_lock_level(int level); | 283 | static inline int ocfs2_highest_compat_lock_level(int level); |
203 | static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, | ||
204 | struct ocfs2_lock_res *lockres, | ||
205 | int new_level); | ||
206 | |||
207 | static char *ocfs2_lock_type_strings[] = { | ||
208 | [OCFS2_LOCK_TYPE_META] = "Meta", | ||
209 | [OCFS2_LOCK_TYPE_DATA] = "Data", | ||
210 | [OCFS2_LOCK_TYPE_SUPER] = "Super", | ||
211 | [OCFS2_LOCK_TYPE_RENAME] = "Rename", | ||
212 | /* Need to differntiate from [R]ename.. serializing writes is the | ||
213 | * important job it does, anyway. */ | ||
214 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | ||
215 | }; | ||
216 | |||
217 | static char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | ||
218 | { | ||
219 | mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); | ||
220 | return ocfs2_lock_type_strings[type]; | ||
221 | } | ||
222 | 284 | ||
223 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 285 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
224 | u64 blkno, | 286 | u64 blkno, |
@@ -265,13 +327,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) | |||
265 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | 327 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, |
266 | struct ocfs2_lock_res *res, | 328 | struct ocfs2_lock_res *res, |
267 | enum ocfs2_lock_type type, | 329 | enum ocfs2_lock_type type, |
268 | u64 blkno, | ||
269 | u32 generation, | ||
270 | struct ocfs2_lock_res_ops *ops, | 330 | struct ocfs2_lock_res_ops *ops, |
271 | void *priv) | 331 | void *priv) |
272 | { | 332 | { |
273 | ocfs2_build_lock_name(type, blkno, generation, res->l_name); | ||
274 | |||
275 | res->l_type = type; | 333 | res->l_type = type; |
276 | res->l_ops = ops; | 334 | res->l_ops = ops; |
277 | res->l_priv = priv; | 335 | res->l_priv = priv; |
@@ -299,6 +357,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) | |||
299 | 357 | ||
300 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 358 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
301 | enum ocfs2_lock_type type, | 359 | enum ocfs2_lock_type type, |
360 | unsigned int generation, | ||
302 | struct inode *inode) | 361 | struct inode *inode) |
303 | { | 362 | { |
304 | struct ocfs2_lock_res_ops *ops; | 363 | struct ocfs2_lock_res_ops *ops; |
@@ -319,9 +378,73 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
319 | break; | 378 | break; |
320 | }; | 379 | }; |
321 | 380 | ||
322 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, | 381 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, |
323 | OCFS2_I(inode)->ip_blkno, | 382 | generation, res->l_name); |
324 | inode->i_generation, ops, inode); | 383 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); |
384 | } | ||
385 | |||
386 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | ||
387 | { | ||
388 | struct inode *inode = ocfs2_lock_res_inode(lockres); | ||
389 | |||
390 | return OCFS2_SB(inode->i_sb); | ||
391 | } | ||
392 | |||
393 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | ||
394 | { | ||
395 | __be64 inode_blkno_be; | ||
396 | |||
397 | memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], | ||
398 | sizeof(__be64)); | ||
399 | |||
400 | return be64_to_cpu(inode_blkno_be); | ||
401 | } | ||
402 | |||
403 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) | ||
404 | { | ||
405 | struct ocfs2_dentry_lock *dl = lockres->l_priv; | ||
406 | |||
407 | return OCFS2_SB(dl->dl_inode->i_sb); | ||
408 | } | ||
409 | |||
410 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | ||
411 | u64 parent, struct inode *inode) | ||
412 | { | ||
413 | int len; | ||
414 | u64 inode_blkno = OCFS2_I(inode)->ip_blkno; | ||
415 | __be64 inode_blkno_be = cpu_to_be64(inode_blkno); | ||
416 | struct ocfs2_lock_res *lockres = &dl->dl_lockres; | ||
417 | |||
418 | ocfs2_lock_res_init_once(lockres); | ||
419 | |||
420 | /* | ||
421 | * Unfortunately, the standard lock naming scheme won't work | ||
422 | * here because we have two 16 byte values to use. Instead, | ||
423 | * we'll stuff the inode number as a binary value. We still | ||
424 | * want error prints to show something without garbling the | ||
425 | * display, so drop a null byte in there before the inode | ||
426 | * number. A future version of OCFS2 will likely use all | ||
427 | * binary lock names. The stringified names have been a | ||
428 | * tremendous aid in debugging, but now that the debugfs | ||
429 | * interface exists, we can mangle things there if need be. | ||
430 | * | ||
431 | * NOTE: We also drop the standard "pad" value (the total lock | ||
432 | * name size stays the same though - the last part is all | ||
433 | * zeros due to the memset in ocfs2_lock_res_init_once() | ||
434 | */ | ||
435 | len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, | ||
436 | "%c%016llx", | ||
437 | ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), | ||
438 | (long long)parent); | ||
439 | |||
440 | BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); | ||
441 | |||
442 | memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, | ||
443 | sizeof(__be64)); | ||
444 | |||
445 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
446 | OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, | ||
447 | dl); | ||
325 | } | 448 | } |
326 | 449 | ||
327 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | 450 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, |
@@ -330,8 +453,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | |||
330 | /* Superblock lockres doesn't come from a slab so we call init | 453 | /* Superblock lockres doesn't come from a slab so we call init |
331 | * once on it manually. */ | 454 | * once on it manually. */ |
332 | ocfs2_lock_res_init_once(res); | 455 | ocfs2_lock_res_init_once(res); |
456 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, | ||
457 | 0, res->l_name); | ||
333 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, | 458 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, |
334 | OCFS2_SUPER_BLOCK_BLKNO, 0, | ||
335 | &ocfs2_super_lops, osb); | 459 | &ocfs2_super_lops, osb); |
336 | } | 460 | } |
337 | 461 | ||
@@ -341,7 +465,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
341 | /* Rename lockres doesn't come from a slab so we call init | 465 | /* Rename lockres doesn't come from a slab so we call init |
342 | * once on it manually. */ | 466 | * once on it manually. */ |
343 | ocfs2_lock_res_init_once(res); | 467 | ocfs2_lock_res_init_once(res); |
344 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0, | 468 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); |
469 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, | ||
345 | &ocfs2_rename_lops, osb); | 470 | &ocfs2_rename_lops, osb); |
346 | } | 471 | } |
347 | 472 | ||
@@ -495,7 +620,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
495 | * information is already up to data. Convert from NL to | 620 | * information is already up to data. Convert from NL to |
496 | * *anything* however should mark ourselves as needing an | 621 | * *anything* however should mark ourselves as needing an |
497 | * update */ | 622 | * update */ |
498 | if (lockres->l_level == LKM_NLMODE) | 623 | if (lockres->l_level == LKM_NLMODE && |
624 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | ||
499 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 625 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
500 | 626 | ||
501 | lockres->l_level = lockres->l_requested; | 627 | lockres->l_level = lockres->l_requested; |
@@ -512,7 +638,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
512 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 638 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
513 | 639 | ||
514 | if (lockres->l_requested > LKM_NLMODE && | 640 | if (lockres->l_requested > LKM_NLMODE && |
515 | !(lockres->l_flags & OCFS2_LOCK_LOCAL)) | 641 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && |
642 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | ||
516 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 643 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
517 | 644 | ||
518 | lockres->l_level = lockres->l_requested; | 645 | lockres->l_level = lockres->l_requested; |
@@ -522,68 +649,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
522 | mlog_exit_void(); | 649 | mlog_exit_void(); |
523 | } | 650 | } |
524 | 651 | ||
525 | static void ocfs2_inode_ast_func(void *opaque) | ||
526 | { | ||
527 | struct ocfs2_lock_res *lockres = opaque; | ||
528 | struct inode *inode; | ||
529 | struct dlm_lockstatus *lksb; | ||
530 | unsigned long flags; | ||
531 | |||
532 | mlog_entry_void(); | ||
533 | |||
534 | inode = ocfs2_lock_res_inode(lockres); | ||
535 | |||
536 | mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n", | ||
537 | (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action, | ||
538 | ocfs2_lock_type_string(lockres->l_type)); | ||
539 | |||
540 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | ||
541 | |||
542 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
543 | |||
544 | lksb = &(lockres->l_lksb); | ||
545 | if (lksb->status != DLM_NORMAL) { | ||
546 | mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u " | ||
547 | "on inode %llu\n", lksb->status, | ||
548 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
549 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
550 | mlog_exit_void(); | ||
551 | return; | ||
552 | } | ||
553 | |||
554 | switch(lockres->l_action) { | ||
555 | case OCFS2_AST_ATTACH: | ||
556 | ocfs2_generic_handle_attach_action(lockres); | ||
557 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | ||
558 | break; | ||
559 | case OCFS2_AST_CONVERT: | ||
560 | ocfs2_generic_handle_convert_action(lockres); | ||
561 | break; | ||
562 | case OCFS2_AST_DOWNCONVERT: | ||
563 | ocfs2_generic_handle_downconvert_action(lockres); | ||
564 | break; | ||
565 | default: | ||
566 | mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " | ||
567 | "lockres flags = 0x%lx, unlock action: %u\n", | ||
568 | lockres->l_name, lockres->l_action, lockres->l_flags, | ||
569 | lockres->l_unlock_action); | ||
570 | |||
571 | BUG(); | ||
572 | } | ||
573 | |||
574 | /* data and rw locking ignores refresh flag for now. */ | ||
575 | if (lockres->l_type != OCFS2_LOCK_TYPE_META) | ||
576 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | ||
577 | |||
578 | /* set it to something invalid so if we get called again we | ||
579 | * can catch it. */ | ||
580 | lockres->l_action = OCFS2_AST_INVALID; | ||
581 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
582 | wake_up(&lockres->l_event); | ||
583 | |||
584 | mlog_exit_void(); | ||
585 | } | ||
586 | |||
587 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | 652 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, |
588 | int level) | 653 | int level) |
589 | { | 654 | { |
@@ -610,54 +675,33 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
610 | return needs_downconvert; | 675 | return needs_downconvert; |
611 | } | 676 | } |
612 | 677 | ||
613 | static void ocfs2_generic_bast_func(struct ocfs2_super *osb, | 678 | static void ocfs2_blocking_ast(void *opaque, int level) |
614 | struct ocfs2_lock_res *lockres, | ||
615 | int level) | ||
616 | { | 679 | { |
680 | struct ocfs2_lock_res *lockres = opaque; | ||
681 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | ||
617 | int needs_downconvert; | 682 | int needs_downconvert; |
618 | unsigned long flags; | 683 | unsigned long flags; |
619 | 684 | ||
620 | mlog_entry_void(); | ||
621 | |||
622 | BUG_ON(level <= LKM_NLMODE); | 685 | BUG_ON(level <= LKM_NLMODE); |
623 | 686 | ||
687 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | ||
688 | lockres->l_name, level, lockres->l_level, | ||
689 | ocfs2_lock_type_string(lockres->l_type)); | ||
690 | |||
624 | spin_lock_irqsave(&lockres->l_lock, flags); | 691 | spin_lock_irqsave(&lockres->l_lock, flags); |
625 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 692 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
626 | if (needs_downconvert) | 693 | if (needs_downconvert) |
627 | ocfs2_schedule_blocked_lock(osb, lockres); | 694 | ocfs2_schedule_blocked_lock(osb, lockres); |
628 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 695 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
629 | 696 | ||
630 | ocfs2_kick_vote_thread(osb); | ||
631 | |||
632 | wake_up(&lockres->l_event); | 697 | wake_up(&lockres->l_event); |
633 | mlog_exit_void(); | ||
634 | } | ||
635 | |||
636 | static void ocfs2_inode_bast_func(void *opaque, int level) | ||
637 | { | ||
638 | struct ocfs2_lock_res *lockres = opaque; | ||
639 | struct inode *inode; | ||
640 | struct ocfs2_super *osb; | ||
641 | 698 | ||
642 | mlog_entry_void(); | 699 | ocfs2_kick_vote_thread(osb); |
643 | |||
644 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | ||
645 | |||
646 | inode = ocfs2_lock_res_inode(lockres); | ||
647 | osb = OCFS2_SB(inode->i_sb); | ||
648 | |||
649 | mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n", | ||
650 | (unsigned long long)OCFS2_I(inode)->ip_blkno, level, | ||
651 | lockres->l_level, ocfs2_lock_type_string(lockres->l_type)); | ||
652 | |||
653 | ocfs2_generic_bast_func(osb, lockres, level); | ||
654 | |||
655 | mlog_exit_void(); | ||
656 | } | 700 | } |
657 | 701 | ||
658 | static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, | 702 | static void ocfs2_locking_ast(void *opaque) |
659 | int ignore_refresh) | ||
660 | { | 703 | { |
704 | struct ocfs2_lock_res *lockres = opaque; | ||
661 | struct dlm_lockstatus *lksb = &lockres->l_lksb; | 705 | struct dlm_lockstatus *lksb = &lockres->l_lksb; |
662 | unsigned long flags; | 706 | unsigned long flags; |
663 | 707 | ||
@@ -673,6 +717,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, | |||
673 | switch(lockres->l_action) { | 717 | switch(lockres->l_action) { |
674 | case OCFS2_AST_ATTACH: | 718 | case OCFS2_AST_ATTACH: |
675 | ocfs2_generic_handle_attach_action(lockres); | 719 | ocfs2_generic_handle_attach_action(lockres); |
720 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | ||
676 | break; | 721 | break; |
677 | case OCFS2_AST_CONVERT: | 722 | case OCFS2_AST_CONVERT: |
678 | ocfs2_generic_handle_convert_action(lockres); | 723 | ocfs2_generic_handle_convert_action(lockres); |
@@ -681,80 +726,19 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, | |||
681 | ocfs2_generic_handle_downconvert_action(lockres); | 726 | ocfs2_generic_handle_downconvert_action(lockres); |
682 | break; | 727 | break; |
683 | default: | 728 | default: |
729 | mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " | ||
730 | "lockres flags = 0x%lx, unlock action: %u\n", | ||
731 | lockres->l_name, lockres->l_action, lockres->l_flags, | ||
732 | lockres->l_unlock_action); | ||
684 | BUG(); | 733 | BUG(); |
685 | } | 734 | } |
686 | 735 | ||
687 | if (ignore_refresh) | ||
688 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | ||
689 | |||
690 | /* set it to something invalid so if we get called again we | 736 | /* set it to something invalid so if we get called again we |
691 | * can catch it. */ | 737 | * can catch it. */ |
692 | lockres->l_action = OCFS2_AST_INVALID; | 738 | lockres->l_action = OCFS2_AST_INVALID; |
693 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
694 | 739 | ||
695 | wake_up(&lockres->l_event); | 740 | wake_up(&lockres->l_event); |
696 | } | 741 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
697 | |||
698 | static void ocfs2_super_ast_func(void *opaque) | ||
699 | { | ||
700 | struct ocfs2_lock_res *lockres = opaque; | ||
701 | |||
702 | mlog_entry_void(); | ||
703 | mlog(0, "Superblock AST fired\n"); | ||
704 | |||
705 | BUG_ON(!ocfs2_is_super_lock(lockres)); | ||
706 | ocfs2_generic_ast_func(lockres, 0); | ||
707 | |||
708 | mlog_exit_void(); | ||
709 | } | ||
710 | |||
711 | static void ocfs2_super_bast_func(void *opaque, | ||
712 | int level) | ||
713 | { | ||
714 | struct ocfs2_lock_res *lockres = opaque; | ||
715 | struct ocfs2_super *osb; | ||
716 | |||
717 | mlog_entry_void(); | ||
718 | mlog(0, "Superblock BAST fired\n"); | ||
719 | |||
720 | BUG_ON(!ocfs2_is_super_lock(lockres)); | ||
721 | osb = ocfs2_lock_res_super(lockres); | ||
722 | ocfs2_generic_bast_func(osb, lockres, level); | ||
723 | |||
724 | mlog_exit_void(); | ||
725 | } | ||
726 | |||
727 | static void ocfs2_rename_ast_func(void *opaque) | ||
728 | { | ||
729 | struct ocfs2_lock_res *lockres = opaque; | ||
730 | |||
731 | mlog_entry_void(); | ||
732 | |||
733 | mlog(0, "Rename AST fired\n"); | ||
734 | |||
735 | BUG_ON(!ocfs2_is_rename_lock(lockres)); | ||
736 | |||
737 | ocfs2_generic_ast_func(lockres, 1); | ||
738 | |||
739 | mlog_exit_void(); | ||
740 | } | ||
741 | |||
742 | static void ocfs2_rename_bast_func(void *opaque, | ||
743 | int level) | ||
744 | { | ||
745 | struct ocfs2_lock_res *lockres = opaque; | ||
746 | struct ocfs2_super *osb; | ||
747 | |||
748 | mlog_entry_void(); | ||
749 | |||
750 | mlog(0, "Rename BAST fired\n"); | ||
751 | |||
752 | BUG_ON(!ocfs2_is_rename_lock(lockres)); | ||
753 | |||
754 | osb = ocfs2_lock_res_super(lockres); | ||
755 | ocfs2_generic_bast_func(osb, lockres, level); | ||
756 | |||
757 | mlog_exit_void(); | ||
758 | } | 742 | } |
759 | 743 | ||
760 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 744 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
@@ -810,9 +794,10 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, | |||
810 | &lockres->l_lksb, | 794 | &lockres->l_lksb, |
811 | dlm_flags, | 795 | dlm_flags, |
812 | lockres->l_name, | 796 | lockres->l_name, |
813 | lockres->l_ops->ast, | 797 | OCFS2_LOCK_ID_MAX_LEN - 1, |
798 | ocfs2_locking_ast, | ||
814 | lockres, | 799 | lockres, |
815 | lockres->l_ops->bast); | 800 | ocfs2_blocking_ast); |
816 | if (status != DLM_NORMAL) { | 801 | if (status != DLM_NORMAL) { |
817 | ocfs2_log_dlm_error("dlmlock", status, lockres); | 802 | ocfs2_log_dlm_error("dlmlock", status, lockres); |
818 | ret = -EINVAL; | 803 | ret = -EINVAL; |
@@ -930,6 +915,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
930 | 915 | ||
931 | ocfs2_init_mask_waiter(&mw); | 916 | ocfs2_init_mask_waiter(&mw); |
932 | 917 | ||
918 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | ||
919 | lkm_flags |= LKM_VALBLK; | ||
920 | |||
933 | again: | 921 | again: |
934 | wait = 0; | 922 | wait = 0; |
935 | 923 | ||
@@ -997,11 +985,12 @@ again: | |||
997 | status = dlmlock(osb->dlm, | 985 | status = dlmlock(osb->dlm, |
998 | level, | 986 | level, |
999 | &lockres->l_lksb, | 987 | &lockres->l_lksb, |
1000 | lkm_flags|LKM_CONVERT|LKM_VALBLK, | 988 | lkm_flags|LKM_CONVERT, |
1001 | lockres->l_name, | 989 | lockres->l_name, |
1002 | lockres->l_ops->ast, | 990 | OCFS2_LOCK_ID_MAX_LEN - 1, |
991 | ocfs2_locking_ast, | ||
1003 | lockres, | 992 | lockres, |
1004 | lockres->l_ops->bast); | 993 | ocfs2_blocking_ast); |
1005 | if (status != DLM_NORMAL) { | 994 | if (status != DLM_NORMAL) { |
1006 | if ((lkm_flags & LKM_NOQUEUE) && | 995 | if ((lkm_flags & LKM_NOQUEUE) && |
1007 | (status == DLM_NOTQUEUED)) | 996 | (status == DLM_NOTQUEUED)) |
@@ -1074,18 +1063,21 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
1074 | mlog_exit_void(); | 1063 | mlog_exit_void(); |
1075 | } | 1064 | } |
1076 | 1065 | ||
1077 | static int ocfs2_create_new_inode_lock(struct inode *inode, | 1066 | int ocfs2_create_new_lock(struct ocfs2_super *osb, |
1078 | struct ocfs2_lock_res *lockres) | 1067 | struct ocfs2_lock_res *lockres, |
1068 | int ex, | ||
1069 | int local) | ||
1079 | { | 1070 | { |
1080 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1071 | int level = ex ? LKM_EXMODE : LKM_PRMODE; |
1081 | unsigned long flags; | 1072 | unsigned long flags; |
1073 | int lkm_flags = local ? LKM_LOCAL : 0; | ||
1082 | 1074 | ||
1083 | spin_lock_irqsave(&lockres->l_lock, flags); | 1075 | spin_lock_irqsave(&lockres->l_lock, flags); |
1084 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 1076 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
1085 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); | 1077 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); |
1086 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1078 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1087 | 1079 | ||
1088 | return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL); | 1080 | return ocfs2_lock_create(osb, lockres, level, lkm_flags); |
1089 | } | 1081 | } |
1090 | 1082 | ||
1091 | /* Grants us an EX lock on the data and metadata resources, skipping | 1083 | /* Grants us an EX lock on the data and metadata resources, skipping |
@@ -1097,6 +1089,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode, | |||
1097 | int ocfs2_create_new_inode_locks(struct inode *inode) | 1089 | int ocfs2_create_new_inode_locks(struct inode *inode) |
1098 | { | 1090 | { |
1099 | int ret; | 1091 | int ret; |
1092 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1100 | 1093 | ||
1101 | BUG_ON(!inode); | 1094 | BUG_ON(!inode); |
1102 | BUG_ON(!ocfs2_inode_is_new(inode)); | 1095 | BUG_ON(!ocfs2_inode_is_new(inode)); |
@@ -1113,22 +1106,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
1113 | * on a resource which has an invalid one -- we'll set it | 1106 | * on a resource which has an invalid one -- we'll set it |
1114 | * valid when we release the EX. */ | 1107 | * valid when we release the EX. */ |
1115 | 1108 | ||
1116 | ret = ocfs2_create_new_inode_lock(inode, | 1109 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); |
1117 | &OCFS2_I(inode)->ip_rw_lockres); | ||
1118 | if (ret) { | 1110 | if (ret) { |
1119 | mlog_errno(ret); | 1111 | mlog_errno(ret); |
1120 | goto bail; | 1112 | goto bail; |
1121 | } | 1113 | } |
1122 | 1114 | ||
1123 | ret = ocfs2_create_new_inode_lock(inode, | 1115 | /* |
1124 | &OCFS2_I(inode)->ip_meta_lockres); | 1116 | * We don't want to use LKM_LOCAL on a meta data lock as they |
1117 | * don't use a generation in their lock names. | ||
1118 | */ | ||
1119 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | ||
1125 | if (ret) { | 1120 | if (ret) { |
1126 | mlog_errno(ret); | 1121 | mlog_errno(ret); |
1127 | goto bail; | 1122 | goto bail; |
1128 | } | 1123 | } |
1129 | 1124 | ||
1130 | ret = ocfs2_create_new_inode_lock(inode, | 1125 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); |
1131 | &OCFS2_I(inode)->ip_data_lockres); | ||
1132 | if (ret) { | 1126 | if (ret) { |
1133 | mlog_errno(ret); | 1127 | mlog_errno(ret); |
1134 | goto bail; | 1128 | goto bail; |
@@ -1317,7 +1311,17 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
1317 | 1311 | ||
1318 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1312 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; |
1319 | 1313 | ||
1320 | lvb->lvb_version = cpu_to_be32(OCFS2_LVB_VERSION); | 1314 | /* |
1315 | * Invalidate the LVB of a deleted inode - this way other | ||
1316 | * nodes are forced to go to disk and discover the new inode | ||
1317 | * status. | ||
1318 | */ | ||
1319 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | ||
1320 | lvb->lvb_version = 0; | ||
1321 | goto out; | ||
1322 | } | ||
1323 | |||
1324 | lvb->lvb_version = OCFS2_LVB_VERSION; | ||
1321 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); | 1325 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); |
1322 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); | 1326 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); |
1323 | lvb->lvb_iuid = cpu_to_be32(inode->i_uid); | 1327 | lvb->lvb_iuid = cpu_to_be32(inode->i_uid); |
@@ -1331,7 +1335,9 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
1331 | lvb->lvb_imtime_packed = | 1335 | lvb->lvb_imtime_packed = |
1332 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); | 1336 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); |
1333 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); | 1337 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); |
1338 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | ||
1334 | 1339 | ||
1340 | out: | ||
1335 | mlog_meta_lvb(0, lockres); | 1341 | mlog_meta_lvb(0, lockres); |
1336 | 1342 | ||
1337 | mlog_exit_void(); | 1343 | mlog_exit_void(); |
@@ -1386,11 +1392,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
1386 | mlog_exit_void(); | 1392 | mlog_exit_void(); |
1387 | } | 1393 | } |
1388 | 1394 | ||
1389 | static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres) | 1395 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, |
1396 | struct ocfs2_lock_res *lockres) | ||
1390 | { | 1397 | { |
1391 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1398 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; |
1392 | 1399 | ||
1393 | if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION) | 1400 | if (lvb->lvb_version == OCFS2_LVB_VERSION |
1401 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | ||
1394 | return 1; | 1402 | return 1; |
1395 | return 0; | 1403 | return 0; |
1396 | } | 1404 | } |
@@ -1487,7 +1495,7 @@ static int ocfs2_meta_lock_update(struct inode *inode, | |||
1487 | * map (directories, bitmap files, etc) */ | 1495 | * map (directories, bitmap files, etc) */ |
1488 | ocfs2_extent_map_trunc(inode, 0); | 1496 | ocfs2_extent_map_trunc(inode, 0); |
1489 | 1497 | ||
1490 | if (ocfs2_meta_lvb_is_trustable(lockres)) { | 1498 | if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { |
1491 | mlog(0, "Trusting LVB on inode %llu\n", | 1499 | mlog(0, "Trusting LVB on inode %llu\n", |
1492 | (unsigned long long)oi->ip_blkno); | 1500 | (unsigned long long)oi->ip_blkno); |
1493 | ocfs2_refresh_inode_from_lvb(inode); | 1501 | ocfs2_refresh_inode_from_lvb(inode); |
@@ -1628,6 +1636,18 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
1628 | wait_event(osb->recovery_event, | 1636 | wait_event(osb->recovery_event, |
1629 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1637 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); |
1630 | 1638 | ||
1639 | /* | ||
1640 | * We only see this flag if we're being called from | ||
1641 | * ocfs2_read_locked_inode(). It means we're locking an inode | ||
1642 | * which hasn't been populated yet, so clear the refresh flag | ||
1643 | * and let the caller handle it. | ||
1644 | */ | ||
1645 | if (inode->i_state & I_NEW) { | ||
1646 | status = 0; | ||
1647 | ocfs2_complete_lock_res_refresh(lockres, 0); | ||
1648 | goto bail; | ||
1649 | } | ||
1650 | |||
1631 | /* This is fun. The caller may want a bh back, or it may | 1651 | /* This is fun. The caller may want a bh back, or it may |
1632 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1652 | * not. ocfs2_meta_lock_update definitely wants one in, but |
1633 | * may or may not read one, depending on what's in the | 1653 | * may or may not read one, depending on what's in the |
@@ -1807,6 +1827,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) | |||
1807 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); | 1827 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); |
1808 | } | 1828 | } |
1809 | 1829 | ||
1830 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | ||
1831 | { | ||
1832 | int ret; | ||
1833 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
1834 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
1835 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | ||
1836 | |||
1837 | BUG_ON(!dl); | ||
1838 | |||
1839 | if (ocfs2_is_hard_readonly(osb)) | ||
1840 | return -EROFS; | ||
1841 | |||
1842 | ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); | ||
1843 | if (ret < 0) | ||
1844 | mlog_errno(ret); | ||
1845 | |||
1846 | return ret; | ||
1847 | } | ||
1848 | |||
1849 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | ||
1850 | { | ||
1851 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
1852 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
1853 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | ||
1854 | |||
1855 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); | ||
1856 | } | ||
1857 | |||
1810 | /* Reference counting of the dlm debug structure. We want this because | 1858 | /* Reference counting of the dlm debug structure. We want this because |
1811 | * open references on the debug inodes can live on after a mount, so | 1859 | * open references on the debug inodes can live on after a mount, so |
1812 | * we can't rely on the ocfs2_super to always exist. */ | 1860 | * we can't rely on the ocfs2_super to always exist. */ |
@@ -1937,9 +1985,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | |||
1937 | if (!lockres) | 1985 | if (!lockres) |
1938 | return -EINVAL; | 1986 | return -EINVAL; |
1939 | 1987 | ||
1940 | seq_printf(m, "0x%x\t" | 1988 | seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); |
1941 | "%.*s\t" | 1989 | |
1942 | "%d\t" | 1990 | if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) |
1991 | seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, | ||
1992 | lockres->l_name, | ||
1993 | (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); | ||
1994 | else | ||
1995 | seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); | ||
1996 | |||
1997 | seq_printf(m, "%d\t" | ||
1943 | "0x%lx\t" | 1998 | "0x%lx\t" |
1944 | "0x%x\t" | 1999 | "0x%x\t" |
1945 | "0x%x\t" | 2000 | "0x%x\t" |
@@ -1947,8 +2002,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | |||
1947 | "%u\t" | 2002 | "%u\t" |
1948 | "%d\t" | 2003 | "%d\t" |
1949 | "%d\t", | 2004 | "%d\t", |
1950 | OCFS2_DLM_DEBUG_STR_VERSION, | ||
1951 | OCFS2_LOCK_ID_MAX_LEN, lockres->l_name, | ||
1952 | lockres->l_level, | 2005 | lockres->l_level, |
1953 | lockres->l_flags, | 2006 | lockres->l_flags, |
1954 | lockres->l_action, | 2007 | lockres->l_action, |
@@ -2138,7 +2191,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
2138 | mlog_exit_void(); | 2191 | mlog_exit_void(); |
2139 | } | 2192 | } |
2140 | 2193 | ||
2141 | static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status) | 2194 | static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) |
2142 | { | 2195 | { |
2143 | struct ocfs2_lock_res *lockres = opaque; | 2196 | struct ocfs2_lock_res *lockres = opaque; |
2144 | unsigned long flags; | 2197 | unsigned long flags; |
@@ -2194,24 +2247,20 @@ complete_unlock: | |||
2194 | mlog_exit_void(); | 2247 | mlog_exit_void(); |
2195 | } | 2248 | } |
2196 | 2249 | ||
2197 | typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *); | ||
2198 | |||
2199 | struct drop_lock_cb { | ||
2200 | ocfs2_pre_drop_cb_t *drop_func; | ||
2201 | void *drop_data; | ||
2202 | }; | ||
2203 | |||
2204 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 2250 | static int ocfs2_drop_lock(struct ocfs2_super *osb, |
2205 | struct ocfs2_lock_res *lockres, | 2251 | struct ocfs2_lock_res *lockres) |
2206 | struct drop_lock_cb *dcb) | ||
2207 | { | 2252 | { |
2208 | enum dlm_status status; | 2253 | enum dlm_status status; |
2209 | unsigned long flags; | 2254 | unsigned long flags; |
2255 | int lkm_flags = 0; | ||
2210 | 2256 | ||
2211 | /* We didn't get anywhere near actually using this lockres. */ | 2257 | /* We didn't get anywhere near actually using this lockres. */ |
2212 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 2258 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) |
2213 | goto out; | 2259 | goto out; |
2214 | 2260 | ||
2261 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | ||
2262 | lkm_flags |= LKM_VALBLK; | ||
2263 | |||
2215 | spin_lock_irqsave(&lockres->l_lock, flags); | 2264 | spin_lock_irqsave(&lockres->l_lock, flags); |
2216 | 2265 | ||
2217 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), | 2266 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), |
@@ -2234,8 +2283,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
2234 | spin_lock_irqsave(&lockres->l_lock, flags); | 2283 | spin_lock_irqsave(&lockres->l_lock, flags); |
2235 | } | 2284 | } |
2236 | 2285 | ||
2237 | if (dcb) | 2286 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
2238 | dcb->drop_func(lockres, dcb->drop_data); | 2287 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && |
2288 | lockres->l_level == LKM_EXMODE && | ||
2289 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | ||
2290 | lockres->l_ops->set_lvb(lockres); | ||
2291 | } | ||
2239 | 2292 | ||
2240 | if (lockres->l_flags & OCFS2_LOCK_BUSY) | 2293 | if (lockres->l_flags & OCFS2_LOCK_BUSY) |
2241 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", | 2294 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", |
@@ -2261,8 +2314,8 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
2261 | 2314 | ||
2262 | mlog(0, "lock %s\n", lockres->l_name); | 2315 | mlog(0, "lock %s\n", lockres->l_name); |
2263 | 2316 | ||
2264 | status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK, | 2317 | status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, |
2265 | lockres->l_ops->unlock_ast, lockres); | 2318 | ocfs2_unlock_ast, lockres); |
2266 | if (status != DLM_NORMAL) { | 2319 | if (status != DLM_NORMAL) { |
2267 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 2320 | ocfs2_log_dlm_error("dlmunlock", status, lockres); |
2268 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 2321 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); |
@@ -2309,43 +2362,26 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | |||
2309 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2362 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2310 | } | 2363 | } |
2311 | 2364 | ||
2312 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) | 2365 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
2366 | struct ocfs2_lock_res *lockres) | ||
2313 | { | 2367 | { |
2314 | int status; | 2368 | int ret; |
2315 | |||
2316 | mlog_entry_void(); | ||
2317 | |||
2318 | ocfs2_mark_lockres_freeing(&osb->osb_super_lockres); | ||
2319 | |||
2320 | status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL); | ||
2321 | if (status < 0) | ||
2322 | mlog_errno(status); | ||
2323 | |||
2324 | ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres); | ||
2325 | |||
2326 | status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL); | ||
2327 | if (status < 0) | ||
2328 | mlog_errno(status); | ||
2329 | 2369 | ||
2330 | mlog_exit(status); | 2370 | ocfs2_mark_lockres_freeing(lockres); |
2371 | ret = ocfs2_drop_lock(osb, lockres); | ||
2372 | if (ret) | ||
2373 | mlog_errno(ret); | ||
2331 | } | 2374 | } |
2332 | 2375 | ||
2333 | static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data) | 2376 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) |
2334 | { | 2377 | { |
2335 | struct inode *inode = data; | 2378 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); |
2336 | 2379 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); | |
2337 | /* the metadata lock requires a bit more work as we have an | ||
2338 | * LVB to worry about. */ | ||
2339 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | ||
2340 | lockres->l_level == LKM_EXMODE && | ||
2341 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | ||
2342 | __ocfs2_stuff_meta_lvb(inode); | ||
2343 | } | 2380 | } |
2344 | 2381 | ||
2345 | int ocfs2_drop_inode_locks(struct inode *inode) | 2382 | int ocfs2_drop_inode_locks(struct inode *inode) |
2346 | { | 2383 | { |
2347 | int status, err; | 2384 | int status, err; |
2348 | struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, }; | ||
2349 | 2385 | ||
2350 | mlog_entry_void(); | 2386 | mlog_entry_void(); |
2351 | 2387 | ||
@@ -2353,24 +2389,21 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
2353 | * ocfs2_clear_inode has done it for us. */ | 2389 | * ocfs2_clear_inode has done it for us. */ |
2354 | 2390 | ||
2355 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2391 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
2356 | &OCFS2_I(inode)->ip_data_lockres, | 2392 | &OCFS2_I(inode)->ip_data_lockres); |
2357 | NULL); | ||
2358 | if (err < 0) | 2393 | if (err < 0) |
2359 | mlog_errno(err); | 2394 | mlog_errno(err); |
2360 | 2395 | ||
2361 | status = err; | 2396 | status = err; |
2362 | 2397 | ||
2363 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2398 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
2364 | &OCFS2_I(inode)->ip_meta_lockres, | 2399 | &OCFS2_I(inode)->ip_meta_lockres); |
2365 | &meta_dcb); | ||
2366 | if (err < 0) | 2400 | if (err < 0) |
2367 | mlog_errno(err); | 2401 | mlog_errno(err); |
2368 | if (err < 0 && !status) | 2402 | if (err < 0 && !status) |
2369 | status = err; | 2403 | status = err; |
2370 | 2404 | ||
2371 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2405 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
2372 | &OCFS2_I(inode)->ip_rw_lockres, | 2406 | &OCFS2_I(inode)->ip_rw_lockres); |
2373 | NULL); | ||
2374 | if (err < 0) | 2407 | if (err < 0) |
2375 | mlog_errno(err); | 2408 | mlog_errno(err); |
2376 | if (err < 0 && !status) | 2409 | if (err < 0 && !status) |
@@ -2419,9 +2452,10 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | |||
2419 | &lockres->l_lksb, | 2452 | &lockres->l_lksb, |
2420 | dlm_flags, | 2453 | dlm_flags, |
2421 | lockres->l_name, | 2454 | lockres->l_name, |
2422 | lockres->l_ops->ast, | 2455 | OCFS2_LOCK_ID_MAX_LEN - 1, |
2456 | ocfs2_locking_ast, | ||
2423 | lockres, | 2457 | lockres, |
2424 | lockres->l_ops->bast); | 2458 | ocfs2_blocking_ast); |
2425 | if (status != DLM_NORMAL) { | 2459 | if (status != DLM_NORMAL) { |
2426 | ocfs2_log_dlm_error("dlmlock", status, lockres); | 2460 | ocfs2_log_dlm_error("dlmlock", status, lockres); |
2427 | ret = -EINVAL; | 2461 | ret = -EINVAL; |
@@ -2480,7 +2514,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
2480 | status = dlmunlock(osb->dlm, | 2514 | status = dlmunlock(osb->dlm, |
2481 | &lockres->l_lksb, | 2515 | &lockres->l_lksb, |
2482 | LKM_CANCEL, | 2516 | LKM_CANCEL, |
2483 | lockres->l_ops->unlock_ast, | 2517 | ocfs2_unlock_ast, |
2484 | lockres); | 2518 | lockres); |
2485 | if (status != DLM_NORMAL) { | 2519 | if (status != DLM_NORMAL) { |
2486 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 2520 | ocfs2_log_dlm_error("dlmunlock", status, lockres); |
@@ -2494,115 +2528,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
2494 | return ret; | 2528 | return ret; |
2495 | } | 2529 | } |
2496 | 2530 | ||
2497 | static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, | 2531 | static int ocfs2_unblock_lock(struct ocfs2_super *osb, |
2498 | struct ocfs2_lock_res *lockres, | 2532 | struct ocfs2_lock_res *lockres, |
2499 | int new_level) | 2533 | struct ocfs2_unblock_ctl *ctl) |
2500 | { | ||
2501 | int ret; | ||
2502 | |||
2503 | mlog_entry_void(); | ||
2504 | |||
2505 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); | ||
2506 | |||
2507 | if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { | ||
2508 | ret = 0; | ||
2509 | mlog(0, "lockres %s currently being refreshed -- backing " | ||
2510 | "off!\n", lockres->l_name); | ||
2511 | } else if (new_level == LKM_PRMODE) | ||
2512 | ret = !lockres->l_ex_holders && | ||
2513 | ocfs2_inode_fully_checkpointed(inode); | ||
2514 | else /* Must be NLMODE we're converting to. */ | ||
2515 | ret = !lockres->l_ro_holders && !lockres->l_ex_holders && | ||
2516 | ocfs2_inode_fully_checkpointed(inode); | ||
2517 | |||
2518 | mlog_exit(ret); | ||
2519 | return ret; | ||
2520 | } | ||
2521 | |||
2522 | static int ocfs2_do_unblock_meta(struct inode *inode, | ||
2523 | int *requeue) | ||
2524 | { | ||
2525 | int new_level; | ||
2526 | int set_lvb = 0; | ||
2527 | int ret = 0; | ||
2528 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | ||
2529 | unsigned long flags; | ||
2530 | |||
2531 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2532 | |||
2533 | mlog_entry_void(); | ||
2534 | |||
2535 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
2536 | |||
2537 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | ||
2538 | |||
2539 | mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level, | ||
2540 | lockres->l_blocking); | ||
2541 | |||
2542 | BUG_ON(lockres->l_level != LKM_EXMODE && | ||
2543 | lockres->l_level != LKM_PRMODE); | ||
2544 | |||
2545 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
2546 | *requeue = 1; | ||
2547 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
2548 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
2549 | if (ret) { | ||
2550 | ret = ocfs2_cancel_convert(osb, lockres); | ||
2551 | if (ret < 0) | ||
2552 | mlog_errno(ret); | ||
2553 | } | ||
2554 | goto leave; | ||
2555 | } | ||
2556 | |||
2557 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | ||
2558 | |||
2559 | mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n", | ||
2560 | lockres->l_level, lockres->l_blocking, new_level); | ||
2561 | |||
2562 | if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) { | ||
2563 | if (lockres->l_level == LKM_EXMODE) | ||
2564 | set_lvb = 1; | ||
2565 | |||
2566 | /* If the lock hasn't been refreshed yet (rare), then | ||
2567 | * our memory inode values are old and we skip | ||
2568 | * stuffing the lvb. There's no need to actually clear | ||
2569 | * out the lvb here as it's value is still valid. */ | ||
2570 | if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { | ||
2571 | if (set_lvb) | ||
2572 | __ocfs2_stuff_meta_lvb(inode); | ||
2573 | } else | ||
2574 | mlog(0, "lockres %s: downconverting stale lock!\n", | ||
2575 | lockres->l_name); | ||
2576 | |||
2577 | mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, " | ||
2578 | "l_blocking=%d, new_level=%d\n", | ||
2579 | lockres->l_level, lockres->l_blocking, new_level); | ||
2580 | |||
2581 | ocfs2_prepare_downconvert(lockres, new_level); | ||
2582 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
2583 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); | ||
2584 | goto leave; | ||
2585 | } | ||
2586 | if (!ocfs2_inode_fully_checkpointed(inode)) | ||
2587 | ocfs2_start_checkpoint(osb); | ||
2588 | |||
2589 | *requeue = 1; | ||
2590 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
2591 | ret = 0; | ||
2592 | leave: | ||
2593 | mlog_exit(ret); | ||
2594 | return ret; | ||
2595 | } | ||
2596 | |||
2597 | static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, | ||
2598 | struct ocfs2_lock_res *lockres, | ||
2599 | int *requeue, | ||
2600 | ocfs2_convert_worker_t *worker) | ||
2601 | { | 2534 | { |
2602 | unsigned long flags; | 2535 | unsigned long flags; |
2603 | int blocking; | 2536 | int blocking; |
2604 | int new_level; | 2537 | int new_level; |
2605 | int ret = 0; | 2538 | int ret = 0; |
2539 | int set_lvb = 0; | ||
2606 | 2540 | ||
2607 | mlog_entry_void(); | 2541 | mlog_entry_void(); |
2608 | 2542 | ||
@@ -2612,7 +2546,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, | |||
2612 | 2546 | ||
2613 | recheck: | 2547 | recheck: |
2614 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 2548 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
2615 | *requeue = 1; | 2549 | ctl->requeue = 1; |
2616 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 2550 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
2617 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2551 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2618 | if (ret) { | 2552 | if (ret) { |
@@ -2626,27 +2560,33 @@ recheck: | |||
2626 | /* if we're blocking an exclusive and we have *any* holders, | 2560 | /* if we're blocking an exclusive and we have *any* holders, |
2627 | * then requeue. */ | 2561 | * then requeue. */ |
2628 | if ((lockres->l_blocking == LKM_EXMODE) | 2562 | if ((lockres->l_blocking == LKM_EXMODE) |
2629 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | 2563 | && (lockres->l_ex_holders || lockres->l_ro_holders)) |
2630 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2564 | goto leave_requeue; |
2631 | *requeue = 1; | ||
2632 | ret = 0; | ||
2633 | goto leave; | ||
2634 | } | ||
2635 | 2565 | ||
2636 | /* If it's a PR we're blocking, then only | 2566 | /* If it's a PR we're blocking, then only |
2637 | * requeue if we've got any EX holders */ | 2567 | * requeue if we've got any EX holders */ |
2638 | if (lockres->l_blocking == LKM_PRMODE && | 2568 | if (lockres->l_blocking == LKM_PRMODE && |
2639 | lockres->l_ex_holders) { | 2569 | lockres->l_ex_holders) |
2640 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2570 | goto leave_requeue; |
2641 | *requeue = 1; | 2571 | |
2642 | ret = 0; | 2572 | /* |
2643 | goto leave; | 2573 | * Can we get a lock in this state if the holder counts are |
2644 | } | 2574 | * zero? The meta data unblock code used to check this. |
2575 | */ | ||
2576 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | ||
2577 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) | ||
2578 | goto leave_requeue; | ||
2579 | |||
2580 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | ||
2581 | |||
2582 | if (lockres->l_ops->check_downconvert | ||
2583 | && !lockres->l_ops->check_downconvert(lockres, new_level)) | ||
2584 | goto leave_requeue; | ||
2645 | 2585 | ||
2646 | /* If we get here, then we know that there are no more | 2586 | /* If we get here, then we know that there are no more |
2647 | * incompatible holders (and anyone asking for an incompatible | 2587 | * incompatible holders (and anyone asking for an incompatible |
2648 | * lock is blocked). We can now downconvert the lock */ | 2588 | * lock is blocked). We can now downconvert the lock */ |
2649 | if (!worker) | 2589 | if (!lockres->l_ops->downconvert_worker) |
2650 | goto downconvert; | 2590 | goto downconvert; |
2651 | 2591 | ||
2652 | /* Some lockres types want to do a bit of work before | 2592 | /* Some lockres types want to do a bit of work before |
@@ -2656,7 +2596,10 @@ recheck: | |||
2656 | blocking = lockres->l_blocking; | 2596 | blocking = lockres->l_blocking; |
2657 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2597 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2658 | 2598 | ||
2659 | worker(lockres, blocking); | 2599 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); |
2600 | |||
2601 | if (ctl->unblock_action == UNBLOCK_STOP_POST) | ||
2602 | goto leave; | ||
2660 | 2603 | ||
2661 | spin_lock_irqsave(&lockres->l_lock, flags); | 2604 | spin_lock_irqsave(&lockres->l_lock, flags); |
2662 | if (blocking != lockres->l_blocking) { | 2605 | if (blocking != lockres->l_blocking) { |
@@ -2666,25 +2609,43 @@ recheck: | |||
2666 | } | 2609 | } |
2667 | 2610 | ||
2668 | downconvert: | 2611 | downconvert: |
2669 | *requeue = 0; | 2612 | ctl->requeue = 0; |
2670 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 2613 | |
2614 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | ||
2615 | if (lockres->l_level == LKM_EXMODE) | ||
2616 | set_lvb = 1; | ||
2617 | |||
2618 | /* | ||
2619 | * We only set the lvb if the lock has been fully | ||
2620 | * refreshed - otherwise we risk setting stale | ||
2621 | * data. Otherwise, there's no need to actually clear | ||
2622 | * out the lvb here as it's value is still valid. | ||
2623 | */ | ||
2624 | if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | ||
2625 | lockres->l_ops->set_lvb(lockres); | ||
2626 | } | ||
2671 | 2627 | ||
2672 | ocfs2_prepare_downconvert(lockres, new_level); | 2628 | ocfs2_prepare_downconvert(lockres, new_level); |
2673 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2629 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2674 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0); | 2630 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); |
2675 | leave: | 2631 | leave: |
2676 | mlog_exit(ret); | 2632 | mlog_exit(ret); |
2677 | return ret; | 2633 | return ret; |
2634 | |||
2635 | leave_requeue: | ||
2636 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
2637 | ctl->requeue = 1; | ||
2638 | |||
2639 | mlog_exit(0); | ||
2640 | return 0; | ||
2678 | } | 2641 | } |
2679 | 2642 | ||
2680 | static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 2643 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, |
2681 | int blocking) | 2644 | int blocking) |
2682 | { | 2645 | { |
2683 | struct inode *inode; | 2646 | struct inode *inode; |
2684 | struct address_space *mapping; | 2647 | struct address_space *mapping; |
2685 | 2648 | ||
2686 | mlog_entry_void(); | ||
2687 | |||
2688 | inode = ocfs2_lock_res_inode(lockres); | 2649 | inode = ocfs2_lock_res_inode(lockres); |
2689 | mapping = inode->i_mapping; | 2650 | mapping = inode->i_mapping; |
2690 | 2651 | ||
@@ -2705,116 +2666,159 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2705 | filemap_fdatawait(mapping); | 2666 | filemap_fdatawait(mapping); |
2706 | } | 2667 | } |
2707 | 2668 | ||
2708 | mlog_exit_void(); | 2669 | return UNBLOCK_CONTINUE; |
2709 | } | 2670 | } |
2710 | 2671 | ||
2711 | int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, | 2672 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, |
2712 | int *requeue) | 2673 | int new_level) |
2713 | { | 2674 | { |
2714 | int status; | 2675 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
2715 | struct inode *inode; | 2676 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); |
2716 | struct ocfs2_super *osb; | ||
2717 | |||
2718 | mlog_entry_void(); | ||
2719 | |||
2720 | inode = ocfs2_lock_res_inode(lockres); | ||
2721 | osb = OCFS2_SB(inode->i_sb); | ||
2722 | |||
2723 | mlog(0, "unblock inode %llu\n", | ||
2724 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
2725 | 2677 | ||
2726 | status = ocfs2_generic_unblock_lock(osb, | 2678 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); |
2727 | lockres, | 2679 | BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); |
2728 | requeue, | ||
2729 | ocfs2_data_convert_worker); | ||
2730 | if (status < 0) | ||
2731 | mlog_errno(status); | ||
2732 | 2680 | ||
2733 | mlog(0, "inode %llu, requeue = %d\n", | 2681 | if (checkpointed) |
2734 | (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); | 2682 | return 1; |
2735 | 2683 | ||
2736 | mlog_exit(status); | 2684 | ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); |
2737 | return status; | 2685 | return 0; |
2738 | } | 2686 | } |
2739 | 2687 | ||
2740 | static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, | 2688 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) |
2741 | int *requeue) | ||
2742 | { | 2689 | { |
2743 | int status; | 2690 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
2744 | struct inode *inode; | ||
2745 | |||
2746 | mlog_entry_void(); | ||
2747 | |||
2748 | mlog(0, "Unblock lockres %s\n", lockres->l_name); | ||
2749 | |||
2750 | inode = ocfs2_lock_res_inode(lockres); | ||
2751 | 2691 | ||
2752 | status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb), | 2692 | __ocfs2_stuff_meta_lvb(inode); |
2753 | lockres, | ||
2754 | requeue, | ||
2755 | NULL); | ||
2756 | if (status < 0) | ||
2757 | mlog_errno(status); | ||
2758 | |||
2759 | mlog_exit(status); | ||
2760 | return status; | ||
2761 | } | 2693 | } |
2762 | 2694 | ||
2763 | 2695 | /* | |
2764 | int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, | 2696 | * Does the final reference drop on our dentry lock. Right now this |
2765 | int *requeue) | 2697 | * happens in the vote thread, but we could choose to simplify the |
2698 | * dlmglue API and push these off to the ocfs2_wq in the future. | ||
2699 | */ | ||
2700 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | ||
2701 | struct ocfs2_lock_res *lockres) | ||
2766 | { | 2702 | { |
2767 | int status; | 2703 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); |
2768 | struct inode *inode; | 2704 | ocfs2_dentry_lock_put(osb, dl); |
2769 | 2705 | } | |
2770 | mlog_entry_void(); | ||
2771 | 2706 | ||
2772 | inode = ocfs2_lock_res_inode(lockres); | 2707 | /* |
2708 | * d_delete() matching dentries before the lock downconvert. | ||
2709 | * | ||
2710 | * At this point, any process waiting to destroy the | ||
2711 | * dentry_lock due to last ref count is stopped by the | ||
2712 | * OCFS2_LOCK_QUEUED flag. | ||
2713 | * | ||
2714 | * We have two potential problems | ||
2715 | * | ||
2716 | * 1) If we do the last reference drop on our dentry_lock (via dput) | ||
2717 | * we'll wind up in ocfs2_release_dentry_lock(), waiting on | ||
2718 | * the downconvert to finish. Instead we take an elevated | ||
2719 | * reference and push the drop until after we've completed our | ||
2720 | * unblock processing. | ||
2721 | * | ||
2722 | * 2) There might be another process with a final reference, | ||
2723 | * waiting on us to finish processing. If this is the case, we | ||
2724 | * detect it and exit out - there's no more dentries anyway. | ||
2725 | */ | ||
2726 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | ||
2727 | int blocking) | ||
2728 | { | ||
2729 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | ||
2730 | struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); | ||
2731 | struct dentry *dentry; | ||
2732 | unsigned long flags; | ||
2733 | int extra_ref = 0; | ||
2773 | 2734 | ||
2774 | mlog(0, "unblock inode %llu\n", | 2735 | /* |
2775 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 2736 | * This node is blocking another node from getting a read |
2737 | * lock. This happens when we've renamed within a | ||
2738 | * directory. We've forced the other nodes to d_delete(), but | ||
2739 | * we never actually dropped our lock because it's still | ||
2740 | * valid. The downconvert code will retain a PR for this node, | ||
2741 | * so there's no further work to do. | ||
2742 | */ | ||
2743 | if (blocking == LKM_PRMODE) | ||
2744 | return UNBLOCK_CONTINUE; | ||
2776 | 2745 | ||
2777 | status = ocfs2_do_unblock_meta(inode, requeue); | 2746 | /* |
2778 | if (status < 0) | 2747 | * Mark this inode as potentially orphaned. The code in |
2779 | mlog_errno(status); | 2748 | * ocfs2_delete_inode() will figure out whether it actually |
2749 | * needs to be freed or not. | ||
2750 | */ | ||
2751 | spin_lock(&oi->ip_lock); | ||
2752 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | ||
2753 | spin_unlock(&oi->ip_lock); | ||
2780 | 2754 | ||
2781 | mlog(0, "inode %llu, requeue = %d\n", | 2755 | /* |
2782 | (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); | 2756 | * Yuck. We need to make sure however that the check of |
2757 | * OCFS2_LOCK_FREEING and the extra reference are atomic with | ||
2758 | * respect to a reference decrement or the setting of that | ||
2759 | * flag. | ||
2760 | */ | ||
2761 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
2762 | spin_lock(&dentry_attach_lock); | ||
2763 | if (!(lockres->l_flags & OCFS2_LOCK_FREEING) | ||
2764 | && dl->dl_count) { | ||
2765 | dl->dl_count++; | ||
2766 | extra_ref = 1; | ||
2767 | } | ||
2768 | spin_unlock(&dentry_attach_lock); | ||
2769 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
2783 | 2770 | ||
2784 | mlog_exit(status); | 2771 | mlog(0, "extra_ref = %d\n", extra_ref); |
2785 | return status; | ||
2786 | } | ||
2787 | 2772 | ||
2788 | /* Generic unblock function for any lockres whose private data is an | 2773 | /* |
2789 | * ocfs2_super pointer. */ | 2774 | * We have a process waiting on us in ocfs2_dentry_iput(), |
2790 | static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, | 2775 | * which means we can't have any more outstanding |
2791 | int *requeue) | 2776 | * aliases. There's no need to do any more work. |
2792 | { | 2777 | */ |
2793 | int status; | 2778 | if (!extra_ref) |
2794 | struct ocfs2_super *osb; | 2779 | return UNBLOCK_CONTINUE; |
2780 | |||
2781 | spin_lock(&dentry_attach_lock); | ||
2782 | while (1) { | ||
2783 | dentry = ocfs2_find_local_alias(dl->dl_inode, | ||
2784 | dl->dl_parent_blkno, 1); | ||
2785 | if (!dentry) | ||
2786 | break; | ||
2787 | spin_unlock(&dentry_attach_lock); | ||
2795 | 2788 | ||
2796 | mlog_entry_void(); | 2789 | mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, |
2790 | dentry->d_name.name); | ||
2797 | 2791 | ||
2798 | mlog(0, "Unblock lockres %s\n", lockres->l_name); | 2792 | /* |
2793 | * The following dcache calls may do an | ||
2794 | * iput(). Normally we don't want that from the | ||
2795 | * downconverting thread, but in this case it's ok | ||
2796 | * because the requesting node already has an | ||
2797 | * exclusive lock on the inode, so it can't be queued | ||
2798 | * for a downconvert. | ||
2799 | */ | ||
2800 | d_delete(dentry); | ||
2801 | dput(dentry); | ||
2799 | 2802 | ||
2800 | osb = ocfs2_lock_res_super(lockres); | 2803 | spin_lock(&dentry_attach_lock); |
2804 | } | ||
2805 | spin_unlock(&dentry_attach_lock); | ||
2801 | 2806 | ||
2802 | status = ocfs2_generic_unblock_lock(osb, | 2807 | /* |
2803 | lockres, | 2808 | * If we are the last holder of this dentry lock, there is no |
2804 | requeue, | 2809 | * reason to downconvert so skip straight to the unlock. |
2805 | NULL); | 2810 | */ |
2806 | if (status < 0) | 2811 | if (dl->dl_count == 1) |
2807 | mlog_errno(status); | 2812 | return UNBLOCK_STOP_POST; |
2808 | 2813 | ||
2809 | mlog_exit(status); | 2814 | return UNBLOCK_CONTINUE_POST; |
2810 | return status; | ||
2811 | } | 2815 | } |
2812 | 2816 | ||
2813 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 2817 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
2814 | struct ocfs2_lock_res *lockres) | 2818 | struct ocfs2_lock_res *lockres) |
2815 | { | 2819 | { |
2816 | int status; | 2820 | int status; |
2817 | int requeue = 0; | 2821 | struct ocfs2_unblock_ctl ctl = {0, 0,}; |
2818 | unsigned long flags; | 2822 | unsigned long flags; |
2819 | 2823 | ||
2820 | /* Our reference to the lockres in this function can be | 2824 | /* Our reference to the lockres in this function can be |
@@ -2825,7 +2829,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
2825 | 2829 | ||
2826 | BUG_ON(!lockres); | 2830 | BUG_ON(!lockres); |
2827 | BUG_ON(!lockres->l_ops); | 2831 | BUG_ON(!lockres->l_ops); |
2828 | BUG_ON(!lockres->l_ops->unblock); | ||
2829 | 2832 | ||
2830 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 2833 | mlog(0, "lockres %s blocked.\n", lockres->l_name); |
2831 | 2834 | ||
@@ -2839,21 +2842,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
2839 | goto unqueue; | 2842 | goto unqueue; |
2840 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2843 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2841 | 2844 | ||
2842 | status = lockres->l_ops->unblock(lockres, &requeue); | 2845 | status = ocfs2_unblock_lock(osb, lockres, &ctl); |
2843 | if (status < 0) | 2846 | if (status < 0) |
2844 | mlog_errno(status); | 2847 | mlog_errno(status); |
2845 | 2848 | ||
2846 | spin_lock_irqsave(&lockres->l_lock, flags); | 2849 | spin_lock_irqsave(&lockres->l_lock, flags); |
2847 | unqueue: | 2850 | unqueue: |
2848 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) { | 2851 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { |
2849 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); | 2852 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); |
2850 | } else | 2853 | } else |
2851 | ocfs2_schedule_blocked_lock(osb, lockres); | 2854 | ocfs2_schedule_blocked_lock(osb, lockres); |
2852 | 2855 | ||
2853 | mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, | 2856 | mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, |
2854 | requeue ? "yes" : "no"); | 2857 | ctl.requeue ? "yes" : "no"); |
2855 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2858 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2856 | 2859 | ||
2860 | if (ctl.unblock_action != UNBLOCK_CONTINUE | ||
2861 | && lockres->l_ops->post_unlock) | ||
2862 | lockres->l_ops->post_unlock(osb, lockres); | ||
2863 | |||
2857 | mlog_exit_void(); | 2864 | mlog_exit_void(); |
2858 | } | 2865 | } |
2859 | 2866 | ||
@@ -2896,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level, | |||
2896 | 2903 | ||
2897 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 2904 | mlog(level, "LVB information for %s (called from %s:%u):\n", |
2898 | lockres->l_name, function, line); | 2905 | lockres->l_name, function, line); |
2899 | mlog(level, "version: %u, clusters: %u\n", | 2906 | mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", |
2900 | be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters)); | 2907 | lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), |
2908 | be32_to_cpu(lvb->lvb_igeneration)); | ||
2901 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", | 2909 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", |
2902 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), | 2910 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), |
2903 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), | 2911 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 243ae862ece5..4a2769387229 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -27,10 +27,14 @@ | |||
27 | #ifndef DLMGLUE_H | 27 | #ifndef DLMGLUE_H |
28 | #define DLMGLUE_H | 28 | #define DLMGLUE_H |
29 | 29 | ||
30 | #define OCFS2_LVB_VERSION 3 | 30 | #include "dcache.h" |
31 | |||
32 | #define OCFS2_LVB_VERSION 4 | ||
31 | 33 | ||
32 | struct ocfs2_meta_lvb { | 34 | struct ocfs2_meta_lvb { |
33 | __be32 lvb_version; | 35 | __u8 lvb_version; |
36 | __u8 lvb_reserved0; | ||
37 | __be16 lvb_reserved1; | ||
34 | __be32 lvb_iclusters; | 38 | __be32 lvb_iclusters; |
35 | __be32 lvb_iuid; | 39 | __be32 lvb_iuid; |
36 | __be32 lvb_igid; | 40 | __be32 lvb_igid; |
@@ -41,7 +45,8 @@ struct ocfs2_meta_lvb { | |||
41 | __be16 lvb_imode; | 45 | __be16 lvb_imode; |
42 | __be16 lvb_inlink; | 46 | __be16 lvb_inlink; |
43 | __be32 lvb_iattr; | 47 | __be32 lvb_iattr; |
44 | __be32 lvb_reserved[2]; | 48 | __be32 lvb_igeneration; |
49 | __be32 lvb_reserved2; | ||
45 | }; | 50 | }; |
46 | 51 | ||
47 | /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ | 52 | /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ |
@@ -57,9 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb); | |||
57 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); | 62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); |
58 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
59 | enum ocfs2_lock_type type, | 64 | enum ocfs2_lock_type type, |
65 | unsigned int generation, | ||
60 | struct inode *inode); | 66 | struct inode *inode); |
67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | ||
68 | u64 parent, struct inode *inode); | ||
61 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); | 69 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); |
62 | int ocfs2_create_new_inode_locks(struct inode *inode); | 70 | int ocfs2_create_new_inode_locks(struct inode *inode); |
71 | int ocfs2_create_new_lock(struct ocfs2_super *osb, | ||
72 | struct ocfs2_lock_res *lockres, int ex, int local); | ||
63 | int ocfs2_drop_inode_locks(struct inode *inode); | 73 | int ocfs2_drop_inode_locks(struct inode *inode); |
64 | int ocfs2_data_lock_full(struct inode *inode, | 74 | int ocfs2_data_lock_full(struct inode *inode, |
65 | int write, | 75 | int write, |
@@ -93,7 +103,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb, | |||
93 | int ex); | 103 | int ex); |
94 | int ocfs2_rename_lock(struct ocfs2_super *osb); | 104 | int ocfs2_rename_lock(struct ocfs2_super *osb); |
95 | void ocfs2_rename_unlock(struct ocfs2_super *osb); | 105 | void ocfs2_rename_unlock(struct ocfs2_super *osb); |
106 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); | ||
107 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); | ||
108 | |||
96 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 109 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); |
110 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | ||
111 | struct ocfs2_lock_res *lockres); | ||
97 | 112 | ||
98 | /* for the vote thread */ | 113 | /* for the vote thread */ |
99 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 114 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index ec55ab3c1214..fb91089a60a7 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #include "dir.h" | 34 | #include "dir.h" |
35 | #include "dlmglue.h" | 35 | #include "dlmglue.h" |
36 | #include "dcache.h" | ||
36 | #include "export.h" | 37 | #include "export.h" |
37 | #include "inode.h" | 38 | #include "inode.h" |
38 | 39 | ||
@@ -57,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) | |||
57 | return ERR_PTR(-ESTALE); | 58 | return ERR_PTR(-ESTALE); |
58 | } | 59 | } |
59 | 60 | ||
60 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno); | 61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0); |
61 | 62 | ||
62 | if (IS_ERR(inode)) { | 63 | if (IS_ERR(inode)) { |
63 | mlog_errno(PTR_ERR(inode)); | 64 | mlog_errno(PTR_ERR(inode)); |
@@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) | |||
77 | mlog_errno(-ENOMEM); | 78 | mlog_errno(-ENOMEM); |
78 | return ERR_PTR(-ENOMEM); | 79 | return ERR_PTR(-ENOMEM); |
79 | } | 80 | } |
81 | result->d_op = &ocfs2_dentry_ops; | ||
80 | 82 | ||
81 | mlog_exit_ptr(result); | 83 | mlog_exit_ptr(result); |
82 | return result; | 84 | return result; |
@@ -113,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
113 | goto bail_unlock; | 115 | goto bail_unlock; |
114 | } | 116 | } |
115 | 117 | ||
116 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); | 118 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); |
117 | if (IS_ERR(inode)) { | 119 | if (IS_ERR(inode)) { |
118 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 120 | mlog(ML_ERROR, "Unable to create inode %llu\n", |
119 | (unsigned long long)blkno); | 121 | (unsigned long long)blkno); |
@@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
127 | parent = ERR_PTR(-ENOMEM); | 129 | parent = ERR_PTR(-ENOMEM); |
128 | } | 130 | } |
129 | 131 | ||
132 | parent->d_op = &ocfs2_dentry_ops; | ||
133 | |||
130 | bail_unlock: | 134 | bail_unlock: |
131 | ocfs2_meta_unlock(dir, 0); | 135 | ocfs2_meta_unlock(dir, 0); |
132 | 136 | ||
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7bcf69154592..69d3db569166 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -54,8 +54,6 @@ | |||
54 | 54 | ||
55 | #include "buffer_head_io.h" | 55 | #include "buffer_head_io.h" |
56 | 56 | ||
57 | #define OCFS2_FI_FLAG_NOWAIT 0x1 | ||
58 | #define OCFS2_FI_FLAG_DELETE 0x2 | ||
59 | struct ocfs2_find_inode_args | 57 | struct ocfs2_find_inode_args |
60 | { | 58 | { |
61 | u64 fi_blkno; | 59 | u64 fi_blkno; |
@@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, | |||
109 | return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); | 107 | return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); |
110 | } | 108 | } |
111 | 109 | ||
112 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) | 110 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) |
113 | { | 111 | { |
114 | struct inode *inode = NULL; | 112 | struct inode *inode = NULL; |
115 | struct super_block *sb = osb->sb; | 113 | struct super_block *sb = osb->sb; |
@@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) | |||
127 | } | 125 | } |
128 | 126 | ||
129 | args.fi_blkno = blkno; | 127 | args.fi_blkno = blkno; |
130 | args.fi_flags = 0; | 128 | args.fi_flags = flags; |
131 | args.fi_ino = ino_from_blkno(sb, blkno); | 129 | args.fi_ino = ino_from_blkno(sb, blkno); |
132 | 130 | ||
133 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, | 131 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, |
@@ -297,15 +295,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
297 | OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; | 295 | OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; |
298 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); | 296 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); |
299 | 297 | ||
300 | if (create_ino) | ||
301 | inode->i_ino = ino_from_blkno(inode->i_sb, | ||
302 | le64_to_cpu(fe->i_blkno)); | ||
303 | |||
304 | mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n", | ||
305 | (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false"); | ||
306 | |||
307 | inode->i_nlink = le16_to_cpu(fe->i_links_count); | 298 | inode->i_nlink = le16_to_cpu(fe->i_links_count); |
308 | 299 | ||
300 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) | ||
301 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; | ||
302 | |||
309 | if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { | 303 | if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { |
310 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; | 304 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; |
311 | mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); | 305 | mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); |
@@ -343,12 +337,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
343 | break; | 337 | break; |
344 | } | 338 | } |
345 | 339 | ||
340 | if (create_ino) { | ||
341 | inode->i_ino = ino_from_blkno(inode->i_sb, | ||
342 | le64_to_cpu(fe->i_blkno)); | ||
343 | |||
344 | /* | ||
345 | * If we ever want to create system files from kernel, | ||
346 | * the generation argument to | ||
347 | * ocfs2_inode_lock_res_init() will have to change. | ||
348 | */ | ||
349 | BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)); | ||
350 | |||
351 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | ||
352 | OCFS2_LOCK_TYPE_META, 0, inode); | ||
353 | } | ||
354 | |||
346 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, | 355 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, |
347 | OCFS2_LOCK_TYPE_RW, inode); | 356 | OCFS2_LOCK_TYPE_RW, inode->i_generation, |
348 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 357 | inode); |
349 | OCFS2_LOCK_TYPE_META, inode); | 358 | |
350 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, | 359 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, |
351 | OCFS2_LOCK_TYPE_DATA, inode); | 360 | OCFS2_LOCK_TYPE_DATA, inode->i_generation, |
361 | inode); | ||
352 | 362 | ||
353 | ocfs2_set_inode_flags(inode); | 363 | ocfs2_set_inode_flags(inode); |
354 | inode->i_flags |= S_NOATIME; | 364 | inode->i_flags |= S_NOATIME; |
@@ -366,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
366 | struct ocfs2_super *osb; | 376 | struct ocfs2_super *osb; |
367 | struct ocfs2_dinode *fe; | 377 | struct ocfs2_dinode *fe; |
368 | struct buffer_head *bh = NULL; | 378 | struct buffer_head *bh = NULL; |
369 | int status; | 379 | int status, can_lock; |
370 | int sysfile = 0; | 380 | u32 generation = 0; |
371 | 381 | ||
372 | mlog_entry("(0x%p, 0x%p)\n", inode, args); | 382 | mlog_entry("(0x%p, 0x%p)\n", inode, args); |
373 | 383 | ||
374 | status = -EINVAL; | 384 | status = -EINVAL; |
375 | if (inode == NULL || inode->i_sb == NULL) { | 385 | if (inode == NULL || inode->i_sb == NULL) { |
376 | mlog(ML_ERROR, "bad inode\n"); | 386 | mlog(ML_ERROR, "bad inode\n"); |
377 | goto bail; | 387 | return status; |
378 | } | 388 | } |
379 | sb = inode->i_sb; | 389 | sb = inode->i_sb; |
380 | osb = OCFS2_SB(sb); | 390 | osb = OCFS2_SB(sb); |
@@ -382,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
382 | if (!args) { | 392 | if (!args) { |
383 | mlog(ML_ERROR, "bad inode args\n"); | 393 | mlog(ML_ERROR, "bad inode args\n"); |
384 | make_bad_inode(inode); | 394 | make_bad_inode(inode); |
385 | goto bail; | 395 | return status; |
396 | } | ||
397 | |||
398 | /* | ||
399 | * To improve performance of cold-cache inode stats, we take | ||
400 | * the cluster lock here if possible. | ||
401 | * | ||
402 | * Generally, OCFS2 never trusts the contents of an inode | ||
403 | * unless it's holding a cluster lock, so taking it here isn't | ||
404 | * a correctness issue as much as it is a performance | ||
405 | * improvement. | ||
406 | * | ||
407 | * There are three times when taking the lock is not a good idea: | ||
408 | * | ||
409 | * 1) During startup, before we have initialized the DLM. | ||
410 | * | ||
411 | * 2) If we are reading certain system files which never get | ||
412 | * cluster locks (local alloc, truncate log). | ||
413 | * | ||
414 | * 3) If the process doing the iget() is responsible for | ||
415 | * orphan dir recovery. We're holding the orphan dir lock and | ||
416 | * can get into a deadlock with another process on another | ||
417 | * node in ->delete_inode(). | ||
418 | * | ||
419 | * #1 and #2 can be simply solved by never taking the lock | ||
420 | * here for system files (which are the only type we read | ||
421 | * during mount). It's a heavier approach, but our main | ||
422 | * concern is user-accesible files anyway. | ||
423 | * | ||
424 | * #3 works itself out because we'll eventually take the | ||
425 | * cluster lock before trusting anything anyway. | ||
426 | */ | ||
427 | can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | ||
428 | && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK); | ||
429 | |||
430 | /* | ||
431 | * To maintain backwards compatibility with older versions of | ||
432 | * ocfs2-tools, we still store the generation value for system | ||
433 | * files. The only ones that actually matter to userspace are | ||
434 | * the journals, but it's easier and inexpensive to just flag | ||
435 | * all system files similarly. | ||
436 | */ | ||
437 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | ||
438 | generation = osb->fs_generation; | ||
439 | |||
440 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | ||
441 | OCFS2_LOCK_TYPE_META, | ||
442 | generation, inode); | ||
443 | |||
444 | if (can_lock) { | ||
445 | status = ocfs2_meta_lock(inode, NULL, NULL, 0); | ||
446 | if (status) { | ||
447 | make_bad_inode(inode); | ||
448 | mlog_errno(status); | ||
449 | return status; | ||
450 | } | ||
386 | } | 451 | } |
387 | 452 | ||
388 | /* Read the FE off disk. This is safe because the kernel only | 453 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, |
389 | * does one read_inode2 for a new inode, and if it doesn't | 454 | can_lock ? inode : NULL); |
390 | * exist yet then nobody can be working on it! */ | ||
391 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL); | ||
392 | if (status < 0) { | 455 | if (status < 0) { |
393 | mlog_errno(status); | 456 | mlog_errno(status); |
394 | make_bad_inode(inode); | ||
395 | goto bail; | 457 | goto bail; |
396 | } | 458 | } |
397 | 459 | ||
460 | status = -EINVAL; | ||
398 | fe = (struct ocfs2_dinode *) bh->b_data; | 461 | fe = (struct ocfs2_dinode *) bh->b_data; |
399 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 462 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
400 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", | 463 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", |
401 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); | 464 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); |
402 | make_bad_inode(inode); | ||
403 | goto bail; | 465 | goto bail; |
404 | } | 466 | } |
405 | 467 | ||
406 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) | 468 | /* |
407 | sysfile = 1; | 469 | * This is a code bug. Right now the caller needs to |
470 | * understand whether it is asking for a system file inode or | ||
471 | * not so the proper lock names can be built. | ||
472 | */ | ||
473 | mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != | ||
474 | !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), | ||
475 | "Inode %llu: system file state is ambigous\n", | ||
476 | (unsigned long long)args->fi_blkno); | ||
408 | 477 | ||
409 | if (S_ISCHR(le16_to_cpu(fe->i_mode)) || | 478 | if (S_ISCHR(le16_to_cpu(fe->i_mode)) || |
410 | S_ISBLK(le16_to_cpu(fe->i_mode))) | 479 | S_ISBLK(le16_to_cpu(fe->i_mode))) |
411 | inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); | 480 | inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); |
412 | 481 | ||
413 | status = -EINVAL; | ||
414 | if (ocfs2_populate_inode(inode, fe, 0) < 0) { | 482 | if (ocfs2_populate_inode(inode, fe, 0) < 0) { |
415 | mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", | 483 | mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", |
416 | (unsigned long long)fe->i_blkno, inode->i_ino); | 484 | (unsigned long long)fe->i_blkno, inode->i_ino); |
417 | make_bad_inode(inode); | ||
418 | goto bail; | 485 | goto bail; |
419 | } | 486 | } |
420 | 487 | ||
421 | BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); | 488 | BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); |
422 | 489 | ||
423 | if (sysfile) | ||
424 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; | ||
425 | |||
426 | status = 0; | 490 | status = 0; |
427 | 491 | ||
428 | bail: | 492 | bail: |
493 | if (can_lock) | ||
494 | ocfs2_meta_unlock(inode, 0); | ||
495 | |||
496 | if (status < 0) | ||
497 | make_bad_inode(inode); | ||
498 | |||
429 | if (args && bh) | 499 | if (args && bh) |
430 | brelse(bh); | 500 | brelse(bh); |
431 | 501 | ||
@@ -898,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode) | |||
898 | goto bail_unlock_inode; | 968 | goto bail_unlock_inode; |
899 | } | 969 | } |
900 | 970 | ||
901 | /* Mark the inode as successfully deleted. This is important | 971 | /* |
902 | * for ocfs2_clear_inode as it will check this flag and skip | 972 | * Mark the inode as successfully deleted. |
903 | * any checkpointing work */ | 973 | * |
974 | * This is important for ocfs2_clear_inode() as it will check | ||
975 | * this flag and skip any checkpointing work | ||
976 | * | ||
977 | * ocfs2_stuff_meta_lvb() also uses this flag to invalidate | ||
978 | * the LVB for other nodes. | ||
979 | */ | ||
904 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; | 980 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; |
905 | 981 | ||
906 | bail_unlock_inode: | 982 | bail_unlock_inode: |
@@ -1025,12 +1101,10 @@ void ocfs2_drop_inode(struct inode *inode) | |||
1025 | /* Testing ip_orphaned_slot here wouldn't work because we may | 1101 | /* Testing ip_orphaned_slot here wouldn't work because we may |
1026 | * not have gotten a delete_inode vote from any other nodes | 1102 | * not have gotten a delete_inode vote from any other nodes |
1027 | * yet. */ | 1103 | * yet. */ |
1028 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { | 1104 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) |
1029 | mlog(0, "Inode was orphaned on another node, clearing nlink.\n"); | 1105 | generic_delete_inode(inode); |
1030 | inode->i_nlink = 0; | 1106 | else |
1031 | } | 1107 | generic_drop_inode(inode); |
1032 | |||
1033 | generic_drop_inode(inode); | ||
1034 | 1108 | ||
1035 | mlog_exit_void(); | 1109 | mlog_exit_void(); |
1036 | } | 1110 | } |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 4d1e53992566..9957810fdf85 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block, | |||
122 | void ocfs2_clear_inode(struct inode *inode); | 122 | void ocfs2_clear_inode(struct inode *inode); |
123 | void ocfs2_delete_inode(struct inode *inode); | 123 | void ocfs2_delete_inode(struct inode *inode); |
124 | void ocfs2_drop_inode(struct inode *inode); | 124 | void ocfs2_drop_inode(struct inode *inode); |
125 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff); | 125 | |
126 | /* Flags for ocfs2_iget() */ | ||
127 | #define OCFS2_FI_FLAG_NOWAIT 0x1 | ||
128 | #define OCFS2_FI_FLAG_DELETE 0x2 | ||
129 | #define OCFS2_FI_FLAG_SYSFILE 0x4 | ||
130 | #define OCFS2_FI_FLAG_NOLOCK 0x8 | ||
131 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags); | ||
126 | struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, | 132 | struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, |
127 | u64 blkno, | 133 | u64 blkno, |
128 | int delete_vote); | 134 | int delete_vote); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f92bf1dd379a..fd9734def551 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
1493 | if (de->name_len == 2 && !strncmp("..", de->name, 2)) | 1493 | if (de->name_len == 2 && !strncmp("..", de->name, 2)) |
1494 | continue; | 1494 | continue; |
1495 | 1495 | ||
1496 | iter = ocfs2_iget(osb, le64_to_cpu(de->inode)); | 1496 | iter = ocfs2_iget(osb, le64_to_cpu(de->inode), |
1497 | OCFS2_FI_FLAG_NOLOCK); | ||
1497 | if (IS_ERR(iter)) | 1498 | if (IS_ERR(iter)) |
1498 | continue; | 1499 | continue; |
1499 | 1500 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 0d3e939b1f56..849c3b4bb94a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
179 | if (status < 0) | 179 | if (status < 0) |
180 | goto bail_add; | 180 | goto bail_add; |
181 | 181 | ||
182 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); | 182 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); |
183 | if (IS_ERR(inode)) { | 183 | if (IS_ERR(inode)) { |
184 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 184 | mlog(ML_ERROR, "Unable to create inode %llu\n", |
185 | (unsigned long long)blkno); | 185 | (unsigned long long)blkno); |
@@ -199,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
199 | spin_unlock(&oi->ip_lock); | 199 | spin_unlock(&oi->ip_lock); |
200 | 200 | ||
201 | bail_add: | 201 | bail_add: |
202 | |||
203 | dentry->d_op = &ocfs2_dentry_ops; | 202 | dentry->d_op = &ocfs2_dentry_ops; |
204 | ret = d_splice_alias(inode, dentry); | 203 | ret = d_splice_alias(inode, dentry); |
205 | 204 | ||
205 | if (inode) { | ||
206 | /* | ||
207 | * If d_splice_alias() finds a DCACHE_DISCONNECTED | ||
208 | * dentry, it will d_move() it on top of ourse. The | ||
209 | * return value will indicate this however, so in | ||
210 | * those cases, we switch them around for the locking | ||
211 | * code. | ||
212 | * | ||
213 | * NOTE: This dentry already has ->d_op set from | ||
214 | * ocfs2_get_parent() and ocfs2_get_dentry() | ||
215 | */ | ||
216 | if (ret) | ||
217 | dentry = ret; | ||
218 | |||
219 | status = ocfs2_dentry_attach_lock(dentry, inode, | ||
220 | OCFS2_I(dir)->ip_blkno); | ||
221 | if (status) { | ||
222 | mlog_errno(status); | ||
223 | ret = ERR_PTR(status); | ||
224 | goto bail_unlock; | ||
225 | } | ||
226 | } | ||
227 | |||
206 | bail_unlock: | 228 | bail_unlock: |
207 | /* Don't drop the cluster lock until *after* the d_add -- | 229 | /* Don't drop the cluster lock until *after* the d_add -- |
208 | * unlink on another node will message us to remove that | 230 | * unlink on another node will message us to remove that |
@@ -418,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir, | |||
418 | goto leave; | 440 | goto leave; |
419 | } | 441 | } |
420 | 442 | ||
443 | status = ocfs2_dentry_attach_lock(dentry, inode, | ||
444 | OCFS2_I(dir)->ip_blkno); | ||
445 | if (status) { | ||
446 | mlog_errno(status); | ||
447 | goto leave; | ||
448 | } | ||
449 | |||
421 | insert_inode_hash(inode); | 450 | insert_inode_hash(inode); |
422 | dentry->d_op = &ocfs2_dentry_ops; | 451 | dentry->d_op = &ocfs2_dentry_ops; |
423 | d_instantiate(dentry, inode); | 452 | d_instantiate(dentry, inode); |
@@ -725,6 +754,12 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
725 | goto bail; | 754 | goto bail; |
726 | } | 755 | } |
727 | 756 | ||
757 | err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | ||
758 | if (err) { | ||
759 | mlog_errno(err); | ||
760 | goto bail; | ||
761 | } | ||
762 | |||
728 | atomic_inc(&inode->i_count); | 763 | atomic_inc(&inode->i_count); |
729 | dentry->d_op = &ocfs2_dentry_ops; | 764 | dentry->d_op = &ocfs2_dentry_ops; |
730 | d_instantiate(dentry, inode); | 765 | d_instantiate(dentry, inode); |
@@ -743,6 +778,23 @@ bail: | |||
743 | return err; | 778 | return err; |
744 | } | 779 | } |
745 | 780 | ||
781 | /* | ||
782 | * Takes and drops an exclusive lock on the given dentry. This will | ||
783 | * force other nodes to drop it. | ||
784 | */ | ||
785 | static int ocfs2_remote_dentry_delete(struct dentry *dentry) | ||
786 | { | ||
787 | int ret; | ||
788 | |||
789 | ret = ocfs2_dentry_lock(dentry, 1); | ||
790 | if (ret) | ||
791 | mlog_errno(ret); | ||
792 | else | ||
793 | ocfs2_dentry_unlock(dentry, 1); | ||
794 | |||
795 | return ret; | ||
796 | } | ||
797 | |||
746 | static int ocfs2_unlink(struct inode *dir, | 798 | static int ocfs2_unlink(struct inode *dir, |
747 | struct dentry *dentry) | 799 | struct dentry *dentry) |
748 | { | 800 | { |
@@ -832,8 +884,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
832 | else | 884 | else |
833 | inode->i_nlink--; | 885 | inode->i_nlink--; |
834 | 886 | ||
835 | status = ocfs2_request_unlink_vote(inode, dentry, | 887 | status = ocfs2_remote_dentry_delete(dentry); |
836 | (unsigned int) inode->i_nlink); | ||
837 | if (status < 0) { | 888 | if (status < 0) { |
838 | /* This vote should succeed under all normal | 889 | /* This vote should succeed under all normal |
839 | * circumstances. */ | 890 | * circumstances. */ |
@@ -1019,7 +1070,6 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1019 | struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, | 1070 | struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, |
1020 | // this is the 1st dirent bh | 1071 | // this is the 1st dirent bh |
1021 | nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; | 1072 | nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; |
1022 | unsigned int links_count; | ||
1023 | 1073 | ||
1024 | /* At some point it might be nice to break this function up a | 1074 | /* At some point it might be nice to break this function up a |
1025 | * bit. */ | 1075 | * bit. */ |
@@ -1093,23 +1143,26 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1093 | } | 1143 | } |
1094 | } | 1144 | } |
1095 | 1145 | ||
1096 | if (S_ISDIR(old_inode->i_mode)) { | 1146 | /* |
1097 | /* Directories actually require metadata updates to | 1147 | * Though we don't require an inode meta data update if |
1098 | * the directory info so we can't get away with not | 1148 | * old_inode is not a directory, we lock anyway here to ensure |
1099 | * doing node locking on it. */ | 1149 | * the vote thread on other nodes won't have to concurrently |
1100 | status = ocfs2_meta_lock(old_inode, handle, NULL, 1); | 1150 | * downconvert the inode and the dentry locks. |
1101 | if (status < 0) { | 1151 | */ |
1102 | if (status != -ENOENT) | 1152 | status = ocfs2_meta_lock(old_inode, handle, NULL, 1); |
1103 | mlog_errno(status); | 1153 | if (status < 0) { |
1104 | goto bail; | 1154 | if (status != -ENOENT) |
1105 | } | ||
1106 | |||
1107 | status = ocfs2_request_rename_vote(old_inode, old_dentry); | ||
1108 | if (status < 0) { | ||
1109 | mlog_errno(status); | 1155 | mlog_errno(status); |
1110 | goto bail; | 1156 | goto bail; |
1111 | } | 1157 | } |
1158 | |||
1159 | status = ocfs2_remote_dentry_delete(old_dentry); | ||
1160 | if (status < 0) { | ||
1161 | mlog_errno(status); | ||
1162 | goto bail; | ||
1163 | } | ||
1112 | 1164 | ||
1165 | if (S_ISDIR(old_inode->i_mode)) { | ||
1113 | status = -EIO; | 1166 | status = -EIO; |
1114 | old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); | 1167 | old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); |
1115 | if (!old_inode_de_bh) | 1168 | if (!old_inode_de_bh) |
@@ -1123,14 +1176,6 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1123 | if (!new_inode && new_dir!=old_dir && | 1176 | if (!new_inode && new_dir!=old_dir && |
1124 | new_dir->i_nlink >= OCFS2_LINK_MAX) | 1177 | new_dir->i_nlink >= OCFS2_LINK_MAX) |
1125 | goto bail; | 1178 | goto bail; |
1126 | } else { | ||
1127 | /* Ah, the simple case - we're a file so just send a | ||
1128 | * message. */ | ||
1129 | status = ocfs2_request_rename_vote(old_inode, old_dentry); | ||
1130 | if (status < 0) { | ||
1131 | mlog_errno(status); | ||
1132 | goto bail; | ||
1133 | } | ||
1134 | } | 1179 | } |
1135 | 1180 | ||
1136 | status = -ENOENT; | 1181 | status = -ENOENT; |
@@ -1202,13 +1247,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1202 | goto bail; | 1247 | goto bail; |
1203 | } | 1248 | } |
1204 | 1249 | ||
1205 | if (S_ISDIR(new_inode->i_mode)) | 1250 | status = ocfs2_remote_dentry_delete(new_dentry); |
1206 | links_count = 0; | ||
1207 | else | ||
1208 | links_count = (unsigned int) (new_inode->i_nlink - 1); | ||
1209 | |||
1210 | status = ocfs2_request_unlink_vote(new_inode, new_dentry, | ||
1211 | links_count); | ||
1212 | if (status < 0) { | 1251 | if (status < 0) { |
1213 | mlog_errno(status); | 1252 | mlog_errno(status); |
1214 | goto bail; | 1253 | goto bail; |
@@ -1387,6 +1426,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1387 | } | 1426 | } |
1388 | } | 1427 | } |
1389 | 1428 | ||
1429 | ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); | ||
1390 | status = 0; | 1430 | status = 0; |
1391 | bail: | 1431 | bail: |
1392 | if (rename_lock) | 1432 | if (rename_lock) |
@@ -1675,6 +1715,12 @@ static int ocfs2_symlink(struct inode *dir, | |||
1675 | goto bail; | 1715 | goto bail; |
1676 | } | 1716 | } |
1677 | 1717 | ||
1718 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | ||
1719 | if (status) { | ||
1720 | mlog_errno(status); | ||
1721 | goto bail; | ||
1722 | } | ||
1723 | |||
1678 | insert_inode_hash(inode); | 1724 | insert_inode_hash(inode); |
1679 | dentry->d_op = &ocfs2_dentry_ops; | 1725 | dentry->d_op = &ocfs2_dentry_ops; |
1680 | d_instantiate(dentry, inode); | 1726 | d_instantiate(dentry, inode); |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 7dd9e1e705b0..4d5d5655c185 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -35,12 +35,15 @@ | |||
35 | #define OCFS2_LOCK_ID_MAX_LEN 32 | 35 | #define OCFS2_LOCK_ID_MAX_LEN 32 |
36 | #define OCFS2_LOCK_ID_PAD "000000" | 36 | #define OCFS2_LOCK_ID_PAD "000000" |
37 | 37 | ||
38 | #define OCFS2_DENTRY_LOCK_INO_START 18 | ||
39 | |||
38 | enum ocfs2_lock_type { | 40 | enum ocfs2_lock_type { |
39 | OCFS2_LOCK_TYPE_META = 0, | 41 | OCFS2_LOCK_TYPE_META = 0, |
40 | OCFS2_LOCK_TYPE_DATA, | 42 | OCFS2_LOCK_TYPE_DATA, |
41 | OCFS2_LOCK_TYPE_SUPER, | 43 | OCFS2_LOCK_TYPE_SUPER, |
42 | OCFS2_LOCK_TYPE_RENAME, | 44 | OCFS2_LOCK_TYPE_RENAME, |
43 | OCFS2_LOCK_TYPE_RW, | 45 | OCFS2_LOCK_TYPE_RW, |
46 | OCFS2_LOCK_TYPE_DENTRY, | ||
44 | OCFS2_NUM_LOCK_TYPES | 47 | OCFS2_NUM_LOCK_TYPES |
45 | }; | 48 | }; |
46 | 49 | ||
@@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
63 | case OCFS2_LOCK_TYPE_RW: | 66 | case OCFS2_LOCK_TYPE_RW: |
64 | c = 'W'; | 67 | c = 'W'; |
65 | break; | 68 | break; |
69 | case OCFS2_LOCK_TYPE_DENTRY: | ||
70 | c = 'N'; | ||
71 | break; | ||
66 | default: | 72 | default: |
67 | c = '\0'; | 73 | c = '\0'; |
68 | } | 74 | } |
@@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
70 | return c; | 76 | return c; |
71 | } | 77 | } |
72 | 78 | ||
79 | static char *ocfs2_lock_type_strings[] = { | ||
80 | [OCFS2_LOCK_TYPE_META] = "Meta", | ||
81 | [OCFS2_LOCK_TYPE_DATA] = "Data", | ||
82 | [OCFS2_LOCK_TYPE_SUPER] = "Super", | ||
83 | [OCFS2_LOCK_TYPE_RENAME] = "Rename", | ||
84 | /* Need to differntiate from [R]ename.. serializing writes is the | ||
85 | * important job it does, anyway. */ | ||
86 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | ||
87 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", | ||
88 | }; | ||
89 | |||
90 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | ||
91 | { | ||
92 | #ifdef __KERNEL__ | ||
93 | mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); | ||
94 | #endif | ||
95 | return ocfs2_lock_type_strings[type]; | ||
96 | } | ||
97 | |||
73 | #endif /* OCFS2_LOCKID_H */ | 98 | #endif /* OCFS2_LOCKID_H */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d17e33e66a1e..4c29cd7cc8e6 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
202 | 202 | ||
203 | mlog_entry_void(); | 203 | mlog_entry_void(); |
204 | 204 | ||
205 | new = ocfs2_iget(osb, osb->root_blkno); | 205 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE); |
206 | if (IS_ERR(new)) { | 206 | if (IS_ERR(new)) { |
207 | status = PTR_ERR(new); | 207 | status = PTR_ERR(new); |
208 | mlog_errno(status); | 208 | mlog_errno(status); |
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
210 | } | 210 | } |
211 | osb->root_inode = new; | 211 | osb->root_inode = new; |
212 | 212 | ||
213 | new = ocfs2_iget(osb, osb->system_dir_blkno); | 213 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE); |
214 | if (IS_ERR(new)) { | 214 | if (IS_ERR(new)) { |
215 | status = PTR_ERR(new); | 215 | status = PTR_ERR(new); |
216 | mlog_errno(status); | 216 | mlog_errno(status); |
@@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = { | |||
682 | .kill_sb = kill_block_super, /* set to the generic one | 682 | .kill_sb = kill_block_super, /* set to the generic one |
683 | * right now, but do we | 683 | * right now, but do we |
684 | * need to change that? */ | 684 | * need to change that? */ |
685 | .fs_flags = FS_REQUIRES_DEV, | 685 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, |
686 | .next = NULL | 686 | .next = NULL |
687 | }; | 687 | }; |
688 | 688 | ||
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index fc29cb7a437d..5df6e35d09b1 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -28,11 +28,11 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | 30 | ||
31 | #include "ocfs2.h" | ||
32 | |||
33 | #define MLOG_MASK_PREFIX ML_INODE | 31 | #define MLOG_MASK_PREFIX ML_INODE |
34 | #include <cluster/masklog.h> | 32 | #include <cluster/masklog.h> |
35 | 33 | ||
34 | #include "ocfs2.h" | ||
35 | |||
36 | #include "alloc.h" | 36 | #include "alloc.h" |
37 | #include "dir.h" | 37 | #include "dir.h" |
38 | #include "inode.h" | 38 | #include "inode.h" |
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
115 | goto bail; | 115 | goto bail; |
116 | } | 116 | } |
117 | 117 | ||
118 | inode = ocfs2_iget(osb, blkno); | 118 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE); |
119 | if (IS_ERR(inode)) { | 119 | if (IS_ERR(inode)) { |
120 | mlog_errno(PTR_ERR(inode)); | 120 | mlog_errno(PTR_ERR(inode)); |
121 | inode = NULL; | 121 | inode = NULL; |
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index cf70fe2075b8..5b4dca79990b 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c | |||
@@ -74,9 +74,6 @@ struct ocfs2_vote_msg | |||
74 | __be32 v_orphaned_slot; /* Used during delete votes */ | 74 | __be32 v_orphaned_slot; /* Used during delete votes */ |
75 | __be32 v_nlink; /* Used during unlink votes */ | 75 | __be32 v_nlink; /* Used during unlink votes */ |
76 | } md1; /* Message type dependant 1 */ | 76 | } md1; /* Message type dependant 1 */ |
77 | __be32 v_unlink_namelen; | ||
78 | __be64 v_unlink_parent; | ||
79 | u8 v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN]; | ||
80 | }; | 77 | }; |
81 | 78 | ||
82 | /* Responses are given these values to maintain backwards | 79 | /* Responses are given these values to maintain backwards |
@@ -100,8 +97,6 @@ struct ocfs2_vote_work { | |||
100 | enum ocfs2_vote_request { | 97 | enum ocfs2_vote_request { |
101 | OCFS2_VOTE_REQ_INVALID = 0, | 98 | OCFS2_VOTE_REQ_INVALID = 0, |
102 | OCFS2_VOTE_REQ_DELETE, | 99 | OCFS2_VOTE_REQ_DELETE, |
103 | OCFS2_VOTE_REQ_UNLINK, | ||
104 | OCFS2_VOTE_REQ_RENAME, | ||
105 | OCFS2_VOTE_REQ_MOUNT, | 100 | OCFS2_VOTE_REQ_MOUNT, |
106 | OCFS2_VOTE_REQ_UMOUNT, | 101 | OCFS2_VOTE_REQ_UMOUNT, |
107 | OCFS2_VOTE_REQ_LAST | 102 | OCFS2_VOTE_REQ_LAST |
@@ -261,103 +256,13 @@ done: | |||
261 | return response; | 256 | return response; |
262 | } | 257 | } |
263 | 258 | ||
264 | static int ocfs2_match_dentry(struct dentry *dentry, | ||
265 | u64 parent_blkno, | ||
266 | unsigned int namelen, | ||
267 | const char *name) | ||
268 | { | ||
269 | struct inode *parent; | ||
270 | |||
271 | if (!dentry->d_parent) { | ||
272 | mlog(0, "Detached from parent.\n"); | ||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | parent = dentry->d_parent->d_inode; | ||
277 | /* Negative parent dentry? */ | ||
278 | if (!parent) | ||
279 | return 0; | ||
280 | |||
281 | /* Name is in a different directory. */ | ||
282 | if (OCFS2_I(parent)->ip_blkno != parent_blkno) | ||
283 | return 0; | ||
284 | |||
285 | if (dentry->d_name.len != namelen) | ||
286 | return 0; | ||
287 | |||
288 | /* comparison above guarantees this is safe. */ | ||
289 | if (memcmp(dentry->d_name.name, name, namelen)) | ||
290 | return 0; | ||
291 | |||
292 | return 1; | ||
293 | } | ||
294 | |||
295 | static void ocfs2_process_dentry_request(struct inode *inode, | ||
296 | int rename, | ||
297 | unsigned int new_nlink, | ||
298 | u64 parent_blkno, | ||
299 | unsigned int namelen, | ||
300 | const char *name) | ||
301 | { | ||
302 | struct dentry *dentry = NULL; | ||
303 | struct list_head *p; | ||
304 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
305 | |||
306 | mlog(0, "parent %llu, namelen = %u, name = %.*s\n", | ||
307 | (unsigned long long)parent_blkno, namelen, namelen, name); | ||
308 | |||
309 | spin_lock(&dcache_lock); | ||
310 | |||
311 | /* Another node is removing this name from the system. It is | ||
312 | * up to us to find the corresponding dentry and if it exists, | ||
313 | * unhash it from the dcache. */ | ||
314 | list_for_each(p, &inode->i_dentry) { | ||
315 | dentry = list_entry(p, struct dentry, d_alias); | ||
316 | |||
317 | if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) { | ||
318 | mlog(0, "dentry found: %.*s\n", | ||
319 | dentry->d_name.len, dentry->d_name.name); | ||
320 | |||
321 | dget_locked(dentry); | ||
322 | break; | ||
323 | } | ||
324 | |||
325 | dentry = NULL; | ||
326 | } | ||
327 | |||
328 | spin_unlock(&dcache_lock); | ||
329 | |||
330 | if (dentry) { | ||
331 | d_delete(dentry); | ||
332 | dput(dentry); | ||
333 | } | ||
334 | |||
335 | /* rename votes don't send link counts */ | ||
336 | if (!rename) { | ||
337 | mlog(0, "new_nlink = %u\n", new_nlink); | ||
338 | |||
339 | /* We don't have the proper locks here to directly | ||
340 | * change i_nlink and besides, the vote is sent | ||
341 | * *before* the operation so it may have failed on the | ||
342 | * other node. This passes a hint to ocfs2_drop_inode | ||
343 | * to force ocfs2_delete_inode, who will take the | ||
344 | * proper cluster locks to sort things out. */ | ||
345 | if (new_nlink == 0) { | ||
346 | spin_lock(&oi->ip_lock); | ||
347 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | ||
348 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static void ocfs2_process_vote(struct ocfs2_super *osb, | 259 | static void ocfs2_process_vote(struct ocfs2_super *osb, |
354 | struct ocfs2_vote_msg *msg) | 260 | struct ocfs2_vote_msg *msg) |
355 | { | 261 | { |
356 | int net_status, vote_response; | 262 | int net_status, vote_response; |
357 | int orphaned_slot = 0; | 263 | int orphaned_slot = 0; |
358 | int rename = 0; | 264 | unsigned int node_num, generation; |
359 | unsigned int node_num, generation, new_nlink, namelen; | 265 | u64 blkno; |
360 | u64 blkno, parent_blkno; | ||
361 | enum ocfs2_vote_request request; | 266 | enum ocfs2_vote_request request; |
362 | struct inode *inode = NULL; | 267 | struct inode *inode = NULL; |
363 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; | 268 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; |
@@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb, | |||
437 | vote_response = ocfs2_process_delete_request(inode, | 342 | vote_response = ocfs2_process_delete_request(inode, |
438 | &orphaned_slot); | 343 | &orphaned_slot); |
439 | break; | 344 | break; |
440 | case OCFS2_VOTE_REQ_RENAME: | ||
441 | rename = 1; | ||
442 | /* fall through */ | ||
443 | case OCFS2_VOTE_REQ_UNLINK: | ||
444 | parent_blkno = be64_to_cpu(msg->v_unlink_parent); | ||
445 | namelen = be32_to_cpu(msg->v_unlink_namelen); | ||
446 | /* new_nlink will be ignored in case of a rename vote */ | ||
447 | new_nlink = be32_to_cpu(msg->md1.v_nlink); | ||
448 | ocfs2_process_dentry_request(inode, rename, new_nlink, | ||
449 | parent_blkno, namelen, | ||
450 | msg->v_unlink_dirent); | ||
451 | break; | ||
452 | default: | 345 | default: |
453 | mlog(ML_ERROR, "node %u, invalid request: %u\n", | 346 | mlog(ML_ERROR, "node %u, invalid request: %u\n", |
454 | node_num, request); | 347 | node_num, request); |
@@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode) | |||
889 | return status; | 782 | return status; |
890 | } | 783 | } |
891 | 784 | ||
892 | static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request, | ||
893 | struct dentry *dentry) | ||
894 | { | ||
895 | struct inode *parent = dentry->d_parent->d_inode; | ||
896 | |||
897 | /* We need some values which will uniquely identify a dentry | ||
898 | * on the other nodes so that they can find it and run | ||
899 | * d_delete against it. Parent directory block and full name | ||
900 | * should suffice. */ | ||
901 | |||
902 | mlog(0, "unlink/rename request: parent: %llu name: %.*s\n", | ||
903 | (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len, | ||
904 | dentry->d_name.name); | ||
905 | |||
906 | request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno); | ||
907 | request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len); | ||
908 | memcpy(request->v_unlink_dirent, dentry->d_name.name, | ||
909 | dentry->d_name.len); | ||
910 | } | ||
911 | |||
912 | int ocfs2_request_unlink_vote(struct inode *inode, | ||
913 | struct dentry *dentry, | ||
914 | unsigned int nlink) | ||
915 | { | ||
916 | int status; | ||
917 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
918 | struct ocfs2_vote_msg *request; | ||
919 | |||
920 | if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN) | ||
921 | return -ENAMETOOLONG; | ||
922 | |||
923 | status = -ENOMEM; | ||
924 | request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno, | ||
925 | inode->i_generation, | ||
926 | OCFS2_VOTE_REQ_UNLINK, nlink); | ||
927 | if (request) { | ||
928 | ocfs2_setup_unlink_vote(request, dentry); | ||
929 | |||
930 | status = ocfs2_request_vote(inode, request, NULL); | ||
931 | |||
932 | kfree(request); | ||
933 | } | ||
934 | return status; | ||
935 | } | ||
936 | |||
937 | int ocfs2_request_rename_vote(struct inode *inode, | ||
938 | struct dentry *dentry) | ||
939 | { | ||
940 | int status; | ||
941 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
942 | struct ocfs2_vote_msg *request; | ||
943 | |||
944 | if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN) | ||
945 | return -ENAMETOOLONG; | ||
946 | |||
947 | status = -ENOMEM; | ||
948 | request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno, | ||
949 | inode->i_generation, | ||
950 | OCFS2_VOTE_REQ_RENAME, 0); | ||
951 | if (request) { | ||
952 | ocfs2_setup_unlink_vote(request, dentry); | ||
953 | |||
954 | status = ocfs2_request_vote(inode, request, NULL); | ||
955 | |||
956 | kfree(request); | ||
957 | } | ||
958 | return status; | ||
959 | } | ||
960 | |||
961 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) | 785 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) |
962 | { | 786 | { |
963 | int status; | 787 | int status; |
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h index 9cce60703466..53ebc1c69e56 100644 --- a/fs/ocfs2/vote.h +++ b/fs/ocfs2/vote.h | |||
@@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb) | |||
39 | } | 39 | } |
40 | 40 | ||
41 | int ocfs2_request_delete_vote(struct inode *inode); | 41 | int ocfs2_request_delete_vote(struct inode *inode); |
42 | int ocfs2_request_unlink_vote(struct inode *inode, | ||
43 | struct dentry *dentry, | ||
44 | unsigned int nlink); | ||
45 | int ocfs2_request_rename_vote(struct inode *inode, | ||
46 | struct dentry *dentry); | ||
47 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); | 42 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); |
48 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); | 43 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); |
49 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); | 44 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); |