diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
commit | ada47b5fe13d89735805b566185f4885f5a3f750 (patch) | |
tree | 644b88f8a71896307d71438e9b3af49126ffb22b /fs/ocfs2 | |
parent | 43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff) | |
parent | 3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff) |
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'fs/ocfs2')
70 files changed, 3092 insertions, 2260 deletions
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig index 701b7a3a872e..0d840669698e 100644 --- a/fs/ocfs2/Kconfig +++ b/fs/ocfs2/Kconfig | |||
@@ -6,6 +6,7 @@ config OCFS2_FS | |||
6 | select CRC32 | 6 | select CRC32 |
7 | select QUOTA | 7 | select QUOTA |
8 | select QUOTA_TREE | 8 | select QUOTA_TREE |
9 | select FS_POSIX_ACL | ||
9 | help | 10 | help |
10 | OCFS2 is a general purpose extent based shared disk cluster file | 11 | OCFS2 is a general purpose extent based shared disk cluster file |
11 | system with many similarities to ext3. It supports 64 bit inode | 12 | system with many similarities to ext3. It supports 64 bit inode |
@@ -74,12 +75,3 @@ config OCFS2_DEBUG_FS | |||
74 | This option will enable expensive consistency checks. Enable | 75 | This option will enable expensive consistency checks. Enable |
75 | this option for debugging only as it is likely to decrease | 76 | this option for debugging only as it is likely to decrease |
76 | performance of the filesystem. | 77 | performance of the filesystem. |
77 | |||
78 | config OCFS2_FS_POSIX_ACL | ||
79 | bool "OCFS2 POSIX Access Control Lists" | ||
80 | depends on OCFS2_FS | ||
81 | select FS_POSIX_ACL | ||
82 | default n | ||
83 | help | ||
84 | Posix Access Control Lists (ACLs) support permissions for users and | ||
85 | groups beyond the owner/group/world scheme. | ||
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 31f25ce32c97..791c0886c060 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -39,16 +39,14 @@ ocfs2-objs := \ | |||
39 | ver.o \ | 39 | ver.o \ |
40 | quota_local.o \ | 40 | quota_local.o \ |
41 | quota_global.o \ | 41 | quota_global.o \ |
42 | xattr.o | 42 | xattr.o \ |
43 | 43 | acl.o | |
44 | ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y) | ||
45 | ocfs2-objs += acl.o | ||
46 | endif | ||
47 | 44 | ||
48 | ocfs2_stackglue-objs := stackglue.o | 45 | ocfs2_stackglue-objs := stackglue.o |
49 | ocfs2_stack_o2cb-objs := stack_o2cb.o | 46 | ocfs2_stack_o2cb-objs := stack_o2cb.o |
50 | ocfs2_stack_user-objs := stack_user.o | 47 | ocfs2_stack_user-objs := stack_user.o |
51 | 48 | ||
49 | obj-$(CONFIG_OCFS2_FS) += dlmfs/ | ||
52 | # cluster/ is always needed when OCFS2_FS for masklog support | 50 | # cluster/ is always needed when OCFS2_FS for masklog support |
53 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 51 | obj-$(CONFIG_OCFS2_FS) += cluster/ |
54 | obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/ | 52 | obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/ |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index fbeaec762103..e13fc9e8fcdc 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/slab.h> | ||
24 | #include <linux/string.h> | 25 | #include <linux/string.h> |
25 | 26 | ||
26 | #define MLOG_MASK_PREFIX ML_INODE | 27 | #define MLOG_MASK_PREFIX ML_INODE |
@@ -30,6 +31,8 @@ | |||
30 | #include "alloc.h" | 31 | #include "alloc.h" |
31 | #include "dlmglue.h" | 32 | #include "dlmglue.h" |
32 | #include "file.h" | 33 | #include "file.h" |
34 | #include "inode.h" | ||
35 | #include "journal.h" | ||
33 | #include "ocfs2_fs.h" | 36 | #include "ocfs2_fs.h" |
34 | 37 | ||
35 | #include "xattr.h" | 38 | #include "xattr.h" |
@@ -98,15 +101,11 @@ static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode, | |||
98 | int type, | 101 | int type, |
99 | struct buffer_head *di_bh) | 102 | struct buffer_head *di_bh) |
100 | { | 103 | { |
101 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
102 | int name_index; | 104 | int name_index; |
103 | char *value = NULL; | 105 | char *value = NULL; |
104 | struct posix_acl *acl; | 106 | struct posix_acl *acl; |
105 | int retval; | 107 | int retval; |
106 | 108 | ||
107 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) | ||
108 | return NULL; | ||
109 | |||
110 | switch (type) { | 109 | switch (type) { |
111 | case ACL_TYPE_ACCESS: | 110 | case ACL_TYPE_ACCESS: |
112 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; | 111 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; |
@@ -170,6 +169,60 @@ static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type) | |||
170 | } | 169 | } |
171 | 170 | ||
172 | /* | 171 | /* |
172 | * Helper function to set i_mode in memory and disk. Some call paths | ||
173 | * will not have di_bh or a journal handle to pass, in which case it | ||
174 | * will create it's own. | ||
175 | */ | ||
176 | static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | ||
177 | handle_t *handle, umode_t new_mode) | ||
178 | { | ||
179 | int ret, commit_handle = 0; | ||
180 | struct ocfs2_dinode *di; | ||
181 | |||
182 | if (di_bh == NULL) { | ||
183 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
184 | if (ret) { | ||
185 | mlog_errno(ret); | ||
186 | goto out; | ||
187 | } | ||
188 | } else | ||
189 | get_bh(di_bh); | ||
190 | |||
191 | if (handle == NULL) { | ||
192 | handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), | ||
193 | OCFS2_INODE_UPDATE_CREDITS); | ||
194 | if (IS_ERR(handle)) { | ||
195 | ret = PTR_ERR(handle); | ||
196 | mlog_errno(ret); | ||
197 | goto out_brelse; | ||
198 | } | ||
199 | |||
200 | commit_handle = 1; | ||
201 | } | ||
202 | |||
203 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
204 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
205 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
206 | if (ret) { | ||
207 | mlog_errno(ret); | ||
208 | goto out_commit; | ||
209 | } | ||
210 | |||
211 | inode->i_mode = new_mode; | ||
212 | di->i_mode = cpu_to_le16(inode->i_mode); | ||
213 | |||
214 | ocfs2_journal_dirty(handle, di_bh); | ||
215 | |||
216 | out_commit: | ||
217 | if (commit_handle) | ||
218 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
219 | out_brelse: | ||
220 | brelse(di_bh); | ||
221 | out: | ||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | /* | ||
173 | * Set the access or default ACL of an inode. | 226 | * Set the access or default ACL of an inode. |
174 | */ | 227 | */ |
175 | static int ocfs2_set_acl(handle_t *handle, | 228 | static int ocfs2_set_acl(handle_t *handle, |
@@ -197,9 +250,14 @@ static int ocfs2_set_acl(handle_t *handle, | |||
197 | if (ret < 0) | 250 | if (ret < 0) |
198 | return ret; | 251 | return ret; |
199 | else { | 252 | else { |
200 | inode->i_mode = mode; | ||
201 | if (ret == 0) | 253 | if (ret == 0) |
202 | acl = NULL; | 254 | acl = NULL; |
255 | |||
256 | ret = ocfs2_acl_set_mode(inode, di_bh, | ||
257 | handle, mode); | ||
258 | if (ret) | ||
259 | return ret; | ||
260 | |||
203 | } | 261 | } |
204 | } | 262 | } |
205 | break; | 263 | break; |
@@ -287,6 +345,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
287 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 345 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
288 | struct posix_acl *acl = NULL; | 346 | struct posix_acl *acl = NULL; |
289 | int ret = 0; | 347 | int ret = 0; |
348 | mode_t mode; | ||
290 | 349 | ||
291 | if (!S_ISLNK(inode->i_mode)) { | 350 | if (!S_ISLNK(inode->i_mode)) { |
292 | if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { | 351 | if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { |
@@ -295,12 +354,17 @@ int ocfs2_init_acl(handle_t *handle, | |||
295 | if (IS_ERR(acl)) | 354 | if (IS_ERR(acl)) |
296 | return PTR_ERR(acl); | 355 | return PTR_ERR(acl); |
297 | } | 356 | } |
298 | if (!acl) | 357 | if (!acl) { |
299 | inode->i_mode &= ~current_umask(); | 358 | mode = inode->i_mode & ~current_umask(); |
359 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); | ||
360 | if (ret) { | ||
361 | mlog_errno(ret); | ||
362 | goto cleanup; | ||
363 | } | ||
364 | } | ||
300 | } | 365 | } |
301 | if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { | 366 | if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { |
302 | struct posix_acl *clone; | 367 | struct posix_acl *clone; |
303 | mode_t mode; | ||
304 | 368 | ||
305 | if (S_ISDIR(inode->i_mode)) { | 369 | if (S_ISDIR(inode->i_mode)) { |
306 | ret = ocfs2_set_acl(handle, inode, di_bh, | 370 | ret = ocfs2_set_acl(handle, inode, di_bh, |
@@ -317,7 +381,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
317 | mode = inode->i_mode; | 381 | mode = inode->i_mode; |
318 | ret = posix_acl_create_masq(clone, &mode); | 382 | ret = posix_acl_create_masq(clone, &mode); |
319 | if (ret >= 0) { | 383 | if (ret >= 0) { |
320 | inode->i_mode = mode; | 384 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); |
321 | if (ret > 0) { | 385 | if (ret > 0) { |
322 | ret = ocfs2_set_acl(handle, inode, | 386 | ret = ocfs2_set_acl(handle, inode, |
323 | di_bh, ACL_TYPE_ACCESS, | 387 | di_bh, ACL_TYPE_ACCESS, |
@@ -331,13 +395,14 @@ cleanup: | |||
331 | return ret; | 395 | return ret; |
332 | } | 396 | } |
333 | 397 | ||
334 | static size_t ocfs2_xattr_list_acl_access(struct inode *inode, | 398 | static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry, |
335 | char *list, | 399 | char *list, |
336 | size_t list_len, | 400 | size_t list_len, |
337 | const char *name, | 401 | const char *name, |
338 | size_t name_len) | 402 | size_t name_len, |
403 | int type) | ||
339 | { | 404 | { |
340 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 405 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
341 | const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); | 406 | const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); |
342 | 407 | ||
343 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) | 408 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
@@ -348,13 +413,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode, | |||
348 | return size; | 413 | return size; |
349 | } | 414 | } |
350 | 415 | ||
351 | static size_t ocfs2_xattr_list_acl_default(struct inode *inode, | 416 | static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry, |
352 | char *list, | 417 | char *list, |
353 | size_t list_len, | 418 | size_t list_len, |
354 | const char *name, | 419 | const char *name, |
355 | size_t name_len) | 420 | size_t name_len, |
421 | int type) | ||
356 | { | 422 | { |
357 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 423 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
358 | const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); | 424 | const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); |
359 | 425 | ||
360 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) | 426 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
@@ -365,19 +431,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode, | |||
365 | return size; | 431 | return size; |
366 | } | 432 | } |
367 | 433 | ||
368 | static int ocfs2_xattr_get_acl(struct inode *inode, | 434 | static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name, |
369 | int type, | 435 | void *buffer, size_t size, int type) |
370 | void *buffer, | ||
371 | size_t size) | ||
372 | { | 436 | { |
373 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 437 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
374 | struct posix_acl *acl; | 438 | struct posix_acl *acl; |
375 | int ret; | 439 | int ret; |
376 | 440 | ||
441 | if (strcmp(name, "") != 0) | ||
442 | return -EINVAL; | ||
377 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) | 443 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
378 | return -EOPNOTSUPP; | 444 | return -EOPNOTSUPP; |
379 | 445 | ||
380 | acl = ocfs2_get_acl(inode, type); | 446 | acl = ocfs2_get_acl(dentry->d_inode, type); |
381 | if (IS_ERR(acl)) | 447 | if (IS_ERR(acl)) |
382 | return PTR_ERR(acl); | 448 | return PTR_ERR(acl); |
383 | if (acl == NULL) | 449 | if (acl == NULL) |
@@ -388,35 +454,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode, | |||
388 | return ret; | 454 | return ret; |
389 | } | 455 | } |
390 | 456 | ||
391 | static int ocfs2_xattr_get_acl_access(struct inode *inode, | 457 | static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name, |
392 | const char *name, | 458 | const void *value, size_t size, int flags, int type) |
393 | void *buffer, | ||
394 | size_t size) | ||
395 | { | ||
396 | if (strcmp(name, "") != 0) | ||
397 | return -EINVAL; | ||
398 | return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); | ||
399 | } | ||
400 | |||
401 | static int ocfs2_xattr_get_acl_default(struct inode *inode, | ||
402 | const char *name, | ||
403 | void *buffer, | ||
404 | size_t size) | ||
405 | { | ||
406 | if (strcmp(name, "") != 0) | ||
407 | return -EINVAL; | ||
408 | return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); | ||
409 | } | ||
410 | |||
411 | static int ocfs2_xattr_set_acl(struct inode *inode, | ||
412 | int type, | ||
413 | const void *value, | ||
414 | size_t size) | ||
415 | { | 459 | { |
460 | struct inode *inode = dentry->d_inode; | ||
416 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 461 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
417 | struct posix_acl *acl; | 462 | struct posix_acl *acl; |
418 | int ret = 0; | 463 | int ret = 0; |
419 | 464 | ||
465 | if (strcmp(name, "") != 0) | ||
466 | return -EINVAL; | ||
420 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) | 467 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
421 | return -EOPNOTSUPP; | 468 | return -EOPNOTSUPP; |
422 | 469 | ||
@@ -442,38 +489,18 @@ cleanup: | |||
442 | return ret; | 489 | return ret; |
443 | } | 490 | } |
444 | 491 | ||
445 | static int ocfs2_xattr_set_acl_access(struct inode *inode, | ||
446 | const char *name, | ||
447 | const void *value, | ||
448 | size_t size, | ||
449 | int flags) | ||
450 | { | ||
451 | if (strcmp(name, "") != 0) | ||
452 | return -EINVAL; | ||
453 | return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); | ||
454 | } | ||
455 | |||
456 | static int ocfs2_xattr_set_acl_default(struct inode *inode, | ||
457 | const char *name, | ||
458 | const void *value, | ||
459 | size_t size, | ||
460 | int flags) | ||
461 | { | ||
462 | if (strcmp(name, "") != 0) | ||
463 | return -EINVAL; | ||
464 | return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); | ||
465 | } | ||
466 | |||
467 | struct xattr_handler ocfs2_xattr_acl_access_handler = { | 492 | struct xattr_handler ocfs2_xattr_acl_access_handler = { |
468 | .prefix = POSIX_ACL_XATTR_ACCESS, | 493 | .prefix = POSIX_ACL_XATTR_ACCESS, |
494 | .flags = ACL_TYPE_ACCESS, | ||
469 | .list = ocfs2_xattr_list_acl_access, | 495 | .list = ocfs2_xattr_list_acl_access, |
470 | .get = ocfs2_xattr_get_acl_access, | 496 | .get = ocfs2_xattr_get_acl, |
471 | .set = ocfs2_xattr_set_acl_access, | 497 | .set = ocfs2_xattr_set_acl, |
472 | }; | 498 | }; |
473 | 499 | ||
474 | struct xattr_handler ocfs2_xattr_acl_default_handler = { | 500 | struct xattr_handler ocfs2_xattr_acl_default_handler = { |
475 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 501 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
502 | .flags = ACL_TYPE_DEFAULT, | ||
476 | .list = ocfs2_xattr_list_acl_default, | 503 | .list = ocfs2_xattr_list_acl_default, |
477 | .get = ocfs2_xattr_get_acl_default, | 504 | .get = ocfs2_xattr_get_acl, |
478 | .set = ocfs2_xattr_set_acl_default, | 505 | .set = ocfs2_xattr_set_acl, |
479 | }; | 506 | }; |
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 8f6389ed4da5..5c5d31f05853 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h | |||
@@ -26,8 +26,6 @@ struct ocfs2_acl_entry { | |||
26 | __le32 e_id; | 26 | __le32 e_id; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | #ifdef CONFIG_OCFS2_FS_POSIX_ACL | ||
30 | |||
31 | extern int ocfs2_check_acl(struct inode *, int); | 29 | extern int ocfs2_check_acl(struct inode *, int); |
32 | extern int ocfs2_acl_chmod(struct inode *); | 30 | extern int ocfs2_acl_chmod(struct inode *); |
33 | extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, | 31 | extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, |
@@ -35,24 +33,4 @@ extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, | |||
35 | struct ocfs2_alloc_context *, | 33 | struct ocfs2_alloc_context *, |
36 | struct ocfs2_alloc_context *); | 34 | struct ocfs2_alloc_context *); |
37 | 35 | ||
38 | #else /* CONFIG_OCFS2_FS_POSIX_ACL*/ | ||
39 | |||
40 | #define ocfs2_check_acl NULL | ||
41 | static inline int ocfs2_acl_chmod(struct inode *inode) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | static inline int ocfs2_init_acl(handle_t *handle, | ||
46 | struct inode *inode, | ||
47 | struct inode *dir, | ||
48 | struct buffer_head *di_bh, | ||
49 | struct buffer_head *dir_bh, | ||
50 | struct ocfs2_alloc_context *meta_ac, | ||
51 | struct ocfs2_alloc_context *data_ac) | ||
52 | { | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | #endif /* CONFIG_OCFS2_FS_POSIX_ACL*/ | ||
57 | |||
58 | #endif /* OCFS2_ACL_H */ | 36 | #endif /* OCFS2_ACL_H */ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 38a42f5d59ff..9f8bd913c51e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -1050,7 +1050,8 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle, | |||
1050 | strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); | 1050 | strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); |
1051 | eb->h_blkno = cpu_to_le64(first_blkno); | 1051 | eb->h_blkno = cpu_to_le64(first_blkno); |
1052 | eb->h_fs_generation = cpu_to_le32(osb->fs_generation); | 1052 | eb->h_fs_generation = cpu_to_le32(osb->fs_generation); |
1053 | eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); | 1053 | eb->h_suballoc_slot = |
1054 | cpu_to_le16(meta_ac->ac_alloc_slot); | ||
1054 | eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); | 1055 | eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); |
1055 | eb->h_list.l_count = | 1056 | eb->h_list.l_count = |
1056 | cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); | 1057 | cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); |
@@ -1765,9 +1766,9 @@ set_and_inc: | |||
1765 | * | 1766 | * |
1766 | * The array index of the subtree root is passed back. | 1767 | * The array index of the subtree root is passed back. |
1767 | */ | 1768 | */ |
1768 | static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, | 1769 | int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, |
1769 | struct ocfs2_path *left, | 1770 | struct ocfs2_path *left, |
1770 | struct ocfs2_path *right) | 1771 | struct ocfs2_path *right) |
1771 | { | 1772 | { |
1772 | int i = 0; | 1773 | int i = 0; |
1773 | 1774 | ||
@@ -2398,7 +2399,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) | |||
2398 | * | 2399 | * |
2399 | * The array is assumed to be large enough to hold an entire path (tree depth). | 2400 | * The array is assumed to be large enough to hold an entire path (tree depth). |
2400 | * | 2401 | * |
2401 | * Upon succesful return from this function: | 2402 | * Upon successful return from this function: |
2402 | * | 2403 | * |
2403 | * - The 'right_path' array will contain a path to the leaf block | 2404 | * - The 'right_path' array will contain a path to the leaf block |
2404 | * whose range contains e_cpos. | 2405 | * whose range contains e_cpos. |
@@ -2872,8 +2873,8 @@ out: | |||
2872 | * This looks similar, but is subtly different to | 2873 | * This looks similar, but is subtly different to |
2873 | * ocfs2_find_cpos_for_left_leaf(). | 2874 | * ocfs2_find_cpos_for_left_leaf(). |
2874 | */ | 2875 | */ |
2875 | static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | 2876 | int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, |
2876 | struct ocfs2_path *path, u32 *cpos) | 2877 | struct ocfs2_path *path, u32 *cpos) |
2877 | { | 2878 | { |
2878 | int i, j, ret = 0; | 2879 | int i, j, ret = 0; |
2879 | u64 blkno; | 2880 | u64 blkno; |
@@ -5712,7 +5713,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5712 | goto out; | 5713 | goto out; |
5713 | } | 5714 | } |
5714 | 5715 | ||
5715 | vfs_dq_free_space_nodirty(inode, | 5716 | dquot_free_space_nodirty(inode, |
5716 | ocfs2_clusters_to_bytes(inode->i_sb, len)); | 5717 | ocfs2_clusters_to_bytes(inode->i_sb, len)); |
5717 | 5718 | ||
5718 | ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc); | 5719 | ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc); |
@@ -6037,7 +6038,7 @@ static void ocfs2_truncate_log_worker(struct work_struct *work) | |||
6037 | if (status < 0) | 6038 | if (status < 0) |
6038 | mlog_errno(status); | 6039 | mlog_errno(status); |
6039 | else | 6040 | else |
6040 | ocfs2_init_inode_steal_slot(osb); | 6041 | ocfs2_init_steal_slots(osb); |
6041 | 6042 | ||
6042 | mlog_exit(status); | 6043 | mlog_exit(status); |
6043 | } | 6044 | } |
@@ -6935,7 +6936,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6935 | goto bail; | 6936 | goto bail; |
6936 | } | 6937 | } |
6937 | 6938 | ||
6938 | vfs_dq_free_space_nodirty(inode, | 6939 | dquot_free_space_nodirty(inode, |
6939 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); | 6940 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); |
6940 | spin_lock(&OCFS2_I(inode)->ip_lock); | 6941 | spin_lock(&OCFS2_I(inode)->ip_lock); |
6941 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - | 6942 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - |
@@ -7190,8 +7191,8 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, | |||
7190 | * wait on them - the truncate_inode_pages() call later will | 7191 | * wait on them - the truncate_inode_pages() call later will |
7191 | * do that for us. | 7192 | * do that for us. |
7192 | */ | 7193 | */ |
7193 | ret = do_sync_mapping_range(inode->i_mapping, range_start, | 7194 | ret = filemap_fdatawrite_range(inode->i_mapping, range_start, |
7194 | range_end - 1, SYNC_FILE_RANGE_WRITE); | 7195 | range_end - 1); |
7195 | if (ret) | 7196 | if (ret) |
7196 | mlog_errno(ret); | 7197 | mlog_errno(ret); |
7197 | 7198 | ||
@@ -7300,11 +7301,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
7300 | unsigned int page_end; | 7301 | unsigned int page_end; |
7301 | u64 phys; | 7302 | u64 phys; |
7302 | 7303 | ||
7303 | if (vfs_dq_alloc_space_nodirty(inode, | 7304 | ret = dquot_alloc_space_nodirty(inode, |
7304 | ocfs2_clusters_to_bytes(osb->sb, 1))) { | 7305 | ocfs2_clusters_to_bytes(osb->sb, 1)); |
7305 | ret = -EDQUOT; | 7306 | if (ret) |
7306 | goto out_commit; | 7307 | goto out_commit; |
7307 | } | ||
7308 | did_quota = 1; | 7308 | did_quota = 1; |
7309 | 7309 | ||
7310 | ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, | 7310 | ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, |
@@ -7380,7 +7380,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
7380 | 7380 | ||
7381 | out_commit: | 7381 | out_commit: |
7382 | if (ret < 0 && did_quota) | 7382 | if (ret < 0 && did_quota) |
7383 | vfs_dq_free_space_nodirty(inode, | 7383 | dquot_free_space_nodirty(inode, |
7384 | ocfs2_clusters_to_bytes(osb->sb, 1)); | 7384 | ocfs2_clusters_to_bytes(osb->sb, 1)); |
7385 | 7385 | ||
7386 | ocfs2_commit_trans(osb, handle); | 7386 | ocfs2_commit_trans(osb, handle); |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 9c122d574464..1db4359ccb90 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle, | |||
317 | int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, | 317 | int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, |
318 | handle_t *handle, | 318 | handle_t *handle, |
319 | struct ocfs2_path *path); | 319 | struct ocfs2_path *path); |
320 | int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | ||
321 | struct ocfs2_path *path, u32 *cpos); | ||
322 | int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, | ||
323 | struct ocfs2_path *left, | ||
324 | struct ocfs2_path *right); | ||
320 | #endif /* OCFS2_ALLOC_H */ | 325 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index deb2b132ae5e..21441ddb5506 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -547,6 +547,9 @@ bail: | |||
547 | * | 547 | * |
548 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | 548 | * called like this: dio->get_blocks(dio->inode, fs_startblk, |
549 | * fs_count, map_bh, dio->rw == WRITE); | 549 | * fs_count, map_bh, dio->rw == WRITE); |
550 | * | ||
551 | * Note that we never bother to allocate blocks here, and thus ignore the | ||
552 | * create argument. | ||
550 | */ | 553 | */ |
551 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | 554 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, |
552 | struct buffer_head *bh_result, int create) | 555 | struct buffer_head *bh_result, int create) |
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
563 | 566 | ||
564 | inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | 567 | inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); |
565 | 568 | ||
566 | /* | ||
567 | * Any write past EOF is not allowed because we'd be extending. | ||
568 | */ | ||
569 | if (create && (iblock + max_blocks) > inode_blocks) { | ||
570 | ret = -EIO; | ||
571 | goto bail; | ||
572 | } | ||
573 | |||
574 | /* This figures out the size of the next contiguous block, and | 569 | /* This figures out the size of the next contiguous block, and |
575 | * our logical offset */ | 570 | * our logical offset */ |
576 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, | 571 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, |
@@ -582,17 +577,9 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
582 | goto bail; | 577 | goto bail; |
583 | } | 578 | } |
584 | 579 | ||
585 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { | 580 | /* We should already CoW the refcounted extent in case of create. */ |
586 | ocfs2_error(inode->i_sb, | 581 | BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); |
587 | "Inode %llu has a hole at block %llu\n", | ||
588 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
589 | (unsigned long long)iblock); | ||
590 | ret = -EROFS; | ||
591 | goto bail; | ||
592 | } | ||
593 | 582 | ||
594 | /* We should already CoW the refcounted extent. */ | ||
595 | BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); | ||
596 | /* | 583 | /* |
597 | * get_more_blocks() expects us to describe a hole by clearing | 584 | * get_more_blocks() expects us to describe a hole by clearing |
598 | * the mapped bit on bh_result(). | 585 | * the mapped bit on bh_result(). |
@@ -601,20 +588,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
601 | */ | 588 | */ |
602 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 589 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) |
603 | map_bh(bh_result, inode->i_sb, p_blkno); | 590 | map_bh(bh_result, inode->i_sb, p_blkno); |
604 | else { | 591 | else |
605 | /* | ||
606 | * ocfs2_prepare_inode_for_write() should have caught | ||
607 | * the case where we'd be filling a hole and triggered | ||
608 | * a buffered write instead. | ||
609 | */ | ||
610 | if (create) { | ||
611 | ret = -EIO; | ||
612 | mlog_errno(ret); | ||
613 | goto bail; | ||
614 | } | ||
615 | |||
616 | clear_buffer_mapped(bh_result); | 592 | clear_buffer_mapped(bh_result); |
617 | } | ||
618 | 593 | ||
619 | /* make sure we don't map more than max_blocks blocks here as | 594 | /* make sure we don't map more than max_blocks blocks here as |
620 | that's all the kernel will handle at this point. */ | 595 | that's all the kernel will handle at this point. */ |
@@ -625,7 +600,7 @@ bail: | |||
625 | return ret; | 600 | return ret; |
626 | } | 601 | } |
627 | 602 | ||
628 | /* | 603 | /* |
629 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're | 604 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're |
630 | * particularly interested in the aio/dio case. Like the core uses | 605 | * particularly interested in the aio/dio case. Like the core uses |
631 | * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from | 606 | * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from |
@@ -696,7 +671,7 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
696 | 671 | ||
697 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 672 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
698 | inode->i_sb->s_bdev, iov, offset, | 673 | inode->i_sb->s_bdev, iov, offset, |
699 | nr_segs, | 674 | nr_segs, |
700 | ocfs2_direct_IO_get_blocks, | 675 | ocfs2_direct_IO_get_blocks, |
701 | ocfs2_dio_end_io); | 676 | ocfs2_dio_end_io); |
702 | 677 | ||
@@ -1789,10 +1764,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1789 | 1764 | ||
1790 | wc->w_handle = handle; | 1765 | wc->w_handle = handle; |
1791 | 1766 | ||
1792 | if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode, | 1767 | if (clusters_to_alloc) { |
1793 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) { | 1768 | ret = dquot_alloc_space_nodirty(inode, |
1794 | ret = -EDQUOT; | 1769 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); |
1795 | goto out_commit; | 1770 | if (ret) |
1771 | goto out_commit; | ||
1796 | } | 1772 | } |
1797 | /* | 1773 | /* |
1798 | * We don't want this to fail in ocfs2_write_end(), so do it | 1774 | * We don't want this to fail in ocfs2_write_end(), so do it |
@@ -1835,7 +1811,7 @@ success: | |||
1835 | return 0; | 1811 | return 0; |
1836 | out_quota: | 1812 | out_quota: |
1837 | if (clusters_to_alloc) | 1813 | if (clusters_to_alloc) |
1838 | vfs_dq_free_space(inode, | 1814 | dquot_free_space(inode, |
1839 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); | 1815 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); |
1840 | out_commit: | 1816 | out_commit: |
1841 | ocfs2_commit_trans(osb, handle); | 1817 | ocfs2_commit_trans(osb, handle); |
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index a1163b8b417c..b7428c5d0d3b 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c | |||
@@ -47,7 +47,7 @@ | |||
47 | * Calculate the bit offset in the hamming code buffer based on the bit's | 47 | * Calculate the bit offset in the hamming code buffer based on the bit's |
48 | * offset in the data buffer. Since the hamming code reserves all | 48 | * offset in the data buffer. Since the hamming code reserves all |
49 | * power-of-two bits for parity, the data bit number and the code bit | 49 | * power-of-two bits for parity, the data bit number and the code bit |
50 | * number are offest by all the parity bits beforehand. | 50 | * number are offset by all the parity bits beforehand. |
51 | * | 51 | * |
52 | * Recall that bit numbers in hamming code are 1-based. This function | 52 | * Recall that bit numbers in hamming code are 1-based. This function |
53 | * takes the 0-based data bit from the caller. | 53 | * takes the 0-based data bit from the caller. |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index d43d34a1dd31..f9d5d3ffc75a 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -25,7 +25,6 @@ | |||
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
30 | 29 | ||
31 | #include <cluster/masklog.h> | 30 | #include <cluster/masklog.h> |
@@ -368,7 +367,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, | |||
368 | } | 367 | } |
369 | ocfs2_metadata_cache_io_unlock(ci); | 368 | ocfs2_metadata_cache_io_unlock(ci); |
370 | 369 | ||
371 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", | 370 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", |
372 | (unsigned long long)block, nr, | 371 | (unsigned long long)block, nr, |
373 | ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", | 372 | ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", |
374 | flags); | 373 | flags); |
@@ -407,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | |||
407 | struct buffer_head *bh) | 406 | struct buffer_head *bh) |
408 | { | 407 | { |
409 | int ret = 0; | 408 | int ret = 0; |
409 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | ||
410 | 410 | ||
411 | mlog_entry_void(); | 411 | mlog_entry_void(); |
412 | 412 | ||
@@ -426,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | |||
426 | 426 | ||
427 | get_bh(bh); /* for end_buffer_write_sync() */ | 427 | get_bh(bh); /* for end_buffer_write_sync() */ |
428 | bh->b_end_io = end_buffer_write_sync; | 428 | bh->b_end_io = end_buffer_write_sync; |
429 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); | ||
429 | submit_bh(WRITE, bh); | 430 | submit_bh(WRITE, bh); |
430 | 431 | ||
431 | wait_on_buffer(bh); | 432 | wait_on_buffer(bh); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index c452d116b892..41d5f1f92d56 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/crc32.h> | 34 | #include <linux/crc32.h> |
35 | #include <linux/time.h> | 35 | #include <linux/time.h> |
36 | #include <linux/debugfs.h> | 36 | #include <linux/debugfs.h> |
37 | #include <linux/slab.h> | ||
37 | 38 | ||
38 | #include "heartbeat.h" | 39 | #include "heartbeat.h" |
39 | #include "tcp.h" | 40 | #include "tcp.h" |
@@ -78,7 +79,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); | |||
78 | 79 | ||
79 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; | 80 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; |
80 | 81 | ||
81 | /* Only sets a new threshold if there are no active regions. | 82 | /* Only sets a new threshold if there are no active regions. |
82 | * | 83 | * |
83 | * No locking or otherwise interesting code is required for reading | 84 | * No locking or otherwise interesting code is required for reading |
84 | * o2hb_dead_threshold as it can't change once regions are active and | 85 | * o2hb_dead_threshold as it can't change once regions are active and |
@@ -170,13 +171,14 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
170 | 171 | ||
171 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " | 172 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " |
172 | "milliseconds\n", reg->hr_dev_name, | 173 | "milliseconds\n", reg->hr_dev_name, |
173 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 174 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
174 | o2quo_disk_timeout(); | 175 | o2quo_disk_timeout(); |
175 | } | 176 | } |
176 | 177 | ||
177 | static void o2hb_arm_write_timeout(struct o2hb_region *reg) | 178 | static void o2hb_arm_write_timeout(struct o2hb_region *reg) |
178 | { | 179 | { |
179 | mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS); | 180 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
181 | O2HB_MAX_WRITE_TIMEOUT_MS); | ||
180 | 182 | ||
181 | cancel_delayed_work(®->hr_write_timeout_work); | 183 | cancel_delayed_work(®->hr_write_timeout_work); |
182 | reg->hr_last_timeout_start = jiffies; | 184 | reg->hr_last_timeout_start = jiffies; |
@@ -623,7 +625,7 @@ static int o2hb_check_slot(struct o2hb_region *reg, | |||
623 | "seq %llu last %llu changed %u equal %u\n", | 625 | "seq %llu last %llu changed %u equal %u\n", |
624 | slot->ds_node_num, (long long)slot->ds_last_generation, | 626 | slot->ds_node_num, (long long)slot->ds_last_generation, |
625 | le32_to_cpu(hb_block->hb_cksum), | 627 | le32_to_cpu(hb_block->hb_cksum), |
626 | (unsigned long long)le64_to_cpu(hb_block->hb_seq), | 628 | (unsigned long long)le64_to_cpu(hb_block->hb_seq), |
627 | (unsigned long long)slot->ds_last_time, slot->ds_changed_samples, | 629 | (unsigned long long)slot->ds_last_time, slot->ds_changed_samples, |
628 | slot->ds_equal_samples); | 630 | slot->ds_equal_samples); |
629 | 631 | ||
@@ -874,7 +876,8 @@ static int o2hb_thread(void *data) | |||
874 | do_gettimeofday(&after_hb); | 876 | do_gettimeofday(&after_hb); |
875 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); | 877 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); |
876 | 878 | ||
877 | mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n", | 879 | mlog(ML_HEARTBEAT, |
880 | "start = %lu.%lu, end = %lu.%lu, msec = %u\n", | ||
878 | before_hb.tv_sec, (unsigned long) before_hb.tv_usec, | 881 | before_hb.tv_sec, (unsigned long) before_hb.tv_usec, |
879 | after_hb.tv_sec, (unsigned long) after_hb.tv_usec, | 882 | after_hb.tv_sec, (unsigned long) after_hb.tv_usec, |
880 | elapsed_msec); | 883 | elapsed_msec); |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 1cd2934de615..3bb928a2bf7d 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -112,6 +112,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | |||
112 | define_mask(XATTR), | 112 | define_mask(XATTR), |
113 | define_mask(QUOTA), | 113 | define_mask(QUOTA), |
114 | define_mask(REFCOUNT), | 114 | define_mask(REFCOUNT), |
115 | define_mask(BASTS), | ||
115 | define_mask(ERROR), | 116 | define_mask(ERROR), |
116 | define_mask(NOTICE), | 117 | define_mask(NOTICE), |
117 | define_mask(KTHREAD), | 118 | define_mask(KTHREAD), |
@@ -135,7 +136,7 @@ static ssize_t mlog_store(struct kobject *obj, struct attribute *attr, | |||
135 | return mlog_mask_store(mlog_attr->mask, buf, count); | 136 | return mlog_mask_store(mlog_attr->mask, buf, count); |
136 | } | 137 | } |
137 | 138 | ||
138 | static struct sysfs_ops mlog_attr_ops = { | 139 | static const struct sysfs_ops mlog_attr_ops = { |
139 | .show = mlog_show, | 140 | .show = mlog_show, |
140 | .store = mlog_store, | 141 | .store = mlog_store, |
141 | }; | 142 | }; |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 9b4d11726cf2..3dfddbec32f2 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -114,6 +114,7 @@ | |||
114 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ | 114 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ |
115 | #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ | 115 | #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ |
116 | #define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ | 116 | #define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ |
117 | #define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ | ||
117 | /* bits that are infrequently given and frequently matched in the high word */ | 118 | /* bits that are infrequently given and frequently matched in the high word */ |
118 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
119 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
@@ -194,9 +195,9 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; | |||
194 | * previous token if args expands to nothing. | 195 | * previous token if args expands to nothing. |
195 | */ | 196 | */ |
196 | #define __mlog_printk(level, fmt, args...) \ | 197 | #define __mlog_printk(level, fmt, args...) \ |
197 | printk(level "(%u,%lu):%s:%d " fmt, task_pid_nr(current), \ | 198 | printk(level "(%s,%u,%lu):%s:%d " fmt, current->comm, \ |
198 | __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ , \ | 199 | task_pid_nr(current), __mlog_cpu_guess, \ |
199 | ##args) | 200 | __PRETTY_FUNCTION__, __LINE__ , ##args) |
200 | 201 | ||
201 | #define mlog(mask, fmt, args...) do { \ | 202 | #define mlog(mask, fmt, args...) do { \ |
202 | u64 __m = MLOG_MASK_PREFIX | (mask); \ | 203 | u64 __m = MLOG_MASK_PREFIX | (mask); \ |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index da794bc07a6c..a3f150e52b02 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -294,10 +294,10 @@ static int sc_seq_show(struct seq_file *seq, void *v) | |||
294 | if (sc->sc_sock) { | 294 | if (sc->sc_sock) { |
295 | inet = inet_sk(sc->sc_sock->sk); | 295 | inet = inet_sk(sc->sc_sock->sk); |
296 | /* the stack's structs aren't sparse endian clean */ | 296 | /* the stack's structs aren't sparse endian clean */ |
297 | saddr = (__force __be32)inet->saddr; | 297 | saddr = (__force __be32)inet->inet_saddr; |
298 | daddr = (__force __be32)inet->daddr; | 298 | daddr = (__force __be32)inet->inet_daddr; |
299 | sport = (__force __be16)inet->sport; | 299 | sport = (__force __be16)inet->inet_sport; |
300 | dport = (__force __be16)inet->dport; | 300 | dport = (__force __be16)inet->inet_dport; |
301 | } | 301 | } |
302 | 302 | ||
303 | /* XXX sigh, inet-> doesn't have sparse annotation so any | 303 | /* XXX sigh, inet-> doesn't have sparse annotation so any |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 7ee6188bc79a..ed0c9f367fed 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -19,6 +19,7 @@ | |||
19 | * Boston, MA 021110-1307, USA. | 19 | * Boston, MA 021110-1307, USA. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/slab.h> | ||
22 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
24 | #include <linux/configfs.h> | 25 | #include <linux/configfs.h> |
@@ -35,6 +36,10 @@ | |||
35 | * cluster references throughout where nodes are looked up */ | 36 | * cluster references throughout where nodes are looked up */ |
36 | struct o2nm_cluster *o2nm_single_cluster = NULL; | 37 | struct o2nm_cluster *o2nm_single_cluster = NULL; |
37 | 38 | ||
39 | char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { | ||
40 | "reset", /* O2NM_FENCE_RESET */ | ||
41 | "panic", /* O2NM_FENCE_PANIC */ | ||
42 | }; | ||
38 | 43 | ||
39 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) | 44 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) |
40 | { | 45 | { |
@@ -579,6 +584,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( | |||
579 | return o2nm_cluster_attr_write(page, count, | 584 | return o2nm_cluster_attr_write(page, count, |
580 | &cluster->cl_reconnect_delay_ms); | 585 | &cluster->cl_reconnect_delay_ms); |
581 | } | 586 | } |
587 | |||
588 | static ssize_t o2nm_cluster_attr_fence_method_read( | ||
589 | struct o2nm_cluster *cluster, char *page) | ||
590 | { | ||
591 | ssize_t ret = 0; | ||
592 | |||
593 | if (cluster) | ||
594 | ret = sprintf(page, "%s\n", | ||
595 | o2nm_fence_method_desc[cluster->cl_fence_method]); | ||
596 | return ret; | ||
597 | } | ||
598 | |||
599 | static ssize_t o2nm_cluster_attr_fence_method_write( | ||
600 | struct o2nm_cluster *cluster, const char *page, size_t count) | ||
601 | { | ||
602 | unsigned int i; | ||
603 | |||
604 | if (page[count - 1] != '\n') | ||
605 | goto bail; | ||
606 | |||
607 | for (i = 0; i < O2NM_FENCE_METHODS; ++i) { | ||
608 | if (count != strlen(o2nm_fence_method_desc[i]) + 1) | ||
609 | continue; | ||
610 | if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1)) | ||
611 | continue; | ||
612 | if (cluster->cl_fence_method != i) { | ||
613 | printk(KERN_INFO "ocfs2: Changing fence method to %s\n", | ||
614 | o2nm_fence_method_desc[i]); | ||
615 | cluster->cl_fence_method = i; | ||
616 | } | ||
617 | return count; | ||
618 | } | ||
619 | |||
620 | bail: | ||
621 | return -EINVAL; | ||
622 | } | ||
623 | |||
582 | static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { | 624 | static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { |
583 | .attr = { .ca_owner = THIS_MODULE, | 625 | .attr = { .ca_owner = THIS_MODULE, |
584 | .ca_name = "idle_timeout_ms", | 626 | .ca_name = "idle_timeout_ms", |
@@ -603,10 +645,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { | |||
603 | .store = o2nm_cluster_attr_reconnect_delay_ms_write, | 645 | .store = o2nm_cluster_attr_reconnect_delay_ms_write, |
604 | }; | 646 | }; |
605 | 647 | ||
648 | static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = { | ||
649 | .attr = { .ca_owner = THIS_MODULE, | ||
650 | .ca_name = "fence_method", | ||
651 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
652 | .show = o2nm_cluster_attr_fence_method_read, | ||
653 | .store = o2nm_cluster_attr_fence_method_write, | ||
654 | }; | ||
655 | |||
606 | static struct configfs_attribute *o2nm_cluster_attrs[] = { | 656 | static struct configfs_attribute *o2nm_cluster_attrs[] = { |
607 | &o2nm_cluster_attr_idle_timeout_ms.attr, | 657 | &o2nm_cluster_attr_idle_timeout_ms.attr, |
608 | &o2nm_cluster_attr_keepalive_delay_ms.attr, | 658 | &o2nm_cluster_attr_keepalive_delay_ms.attr, |
609 | &o2nm_cluster_attr_reconnect_delay_ms.attr, | 659 | &o2nm_cluster_attr_reconnect_delay_ms.attr, |
660 | &o2nm_cluster_attr_fence_method.attr, | ||
610 | NULL, | 661 | NULL, |
611 | }; | 662 | }; |
612 | static ssize_t o2nm_cluster_show(struct config_item *item, | 663 | static ssize_t o2nm_cluster_show(struct config_item *item, |
@@ -778,6 +829,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g | |||
778 | cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; | 829 | cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; |
779 | cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; | 830 | cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; |
780 | cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; | 831 | cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; |
832 | cluster->cl_fence_method = O2NM_FENCE_RESET; | ||
781 | 833 | ||
782 | ret = &cluster->cl_group; | 834 | ret = &cluster->cl_group; |
783 | o2nm_single_cluster = cluster; | 835 | o2nm_single_cluster = cluster; |
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index c992ea0da4ad..09ea2d388bbb 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h | |||
@@ -33,6 +33,12 @@ | |||
33 | #include <linux/configfs.h> | 33 | #include <linux/configfs.h> |
34 | #include <linux/rbtree.h> | 34 | #include <linux/rbtree.h> |
35 | 35 | ||
36 | enum o2nm_fence_method { | ||
37 | O2NM_FENCE_RESET = 0, | ||
38 | O2NM_FENCE_PANIC, | ||
39 | O2NM_FENCE_METHODS, /* Number of fence methods */ | ||
40 | }; | ||
41 | |||
36 | struct o2nm_node { | 42 | struct o2nm_node { |
37 | spinlock_t nd_lock; | 43 | spinlock_t nd_lock; |
38 | struct config_item nd_item; | 44 | struct config_item nd_item; |
@@ -58,6 +64,7 @@ struct o2nm_cluster { | |||
58 | unsigned int cl_idle_timeout_ms; | 64 | unsigned int cl_idle_timeout_ms; |
59 | unsigned int cl_keepalive_delay_ms; | 65 | unsigned int cl_keepalive_delay_ms; |
60 | unsigned int cl_reconnect_delay_ms; | 66 | unsigned int cl_reconnect_delay_ms; |
67 | enum o2nm_fence_method cl_fence_method; | ||
61 | 68 | ||
62 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ | 69 | /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ |
63 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 70 | unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index bbacf7da48a4..cf3e16696216 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
@@ -44,7 +44,6 @@ | |||
44 | * and if they're the last, they fire off the decision. | 44 | * and if they're the last, they fire off the decision. |
45 | */ | 45 | */ |
46 | #include <linux/kernel.h> | 46 | #include <linux/kernel.h> |
47 | #include <linux/slab.h> | ||
48 | #include <linux/workqueue.h> | 47 | #include <linux/workqueue.h> |
49 | #include <linux/reboot.h> | 48 | #include <linux/reboot.h> |
50 | 49 | ||
@@ -74,8 +73,20 @@ static void o2quo_fence_self(void) | |||
74 | * threads can still schedule, etc, etc */ | 73 | * threads can still schedule, etc, etc */ |
75 | o2hb_stop_all_regions(); | 74 | o2hb_stop_all_regions(); |
76 | 75 | ||
77 | printk("ocfs2 is very sorry to be fencing this system by restarting\n"); | 76 | switch (o2nm_single_cluster->cl_fence_method) { |
78 | emergency_restart(); | 77 | case O2NM_FENCE_PANIC: |
78 | panic("*** ocfs2 is very sorry to be fencing this system by " | ||
79 | "panicing ***\n"); | ||
80 | break; | ||
81 | default: | ||
82 | WARN_ON(o2nm_single_cluster->cl_fence_method >= | ||
83 | O2NM_FENCE_METHODS); | ||
84 | case O2NM_FENCE_RESET: | ||
85 | printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this " | ||
86 | "system by restarting ***\n"); | ||
87 | emergency_restart(); | ||
88 | break; | ||
89 | }; | ||
79 | } | 90 | } |
80 | 91 | ||
81 | /* Indicate that a timeout occured on a hearbeat region write. The | 92 | /* Indicate that a timeout occured on a hearbeat region write. The |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 334f231a422c..73e743eea2c8 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -72,9 +72,9 @@ | |||
72 | 72 | ||
73 | #include "tcp_internal.h" | 73 | #include "tcp_internal.h" |
74 | 74 | ||
75 | #define SC_NODEF_FMT "node %s (num %u) at %u.%u.%u.%u:%u" | 75 | #define SC_NODEF_FMT "node %s (num %u) at %pI4:%u" |
76 | #define SC_NODEF_ARGS(sc) sc->sc_node->nd_name, sc->sc_node->nd_num, \ | 76 | #define SC_NODEF_ARGS(sc) sc->sc_node->nd_name, sc->sc_node->nd_num, \ |
77 | NIPQUAD(sc->sc_node->nd_ipv4_address), \ | 77 | &sc->sc_node->nd_ipv4_address, \ |
78 | ntohs(sc->sc_node->nd_ipv4_port) | 78 | ntohs(sc->sc_node->nd_ipv4_port) |
79 | 79 | ||
80 | /* | 80 | /* |
@@ -485,7 +485,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
485 | } | 485 | } |
486 | 486 | ||
487 | if (was_valid && !valid) { | 487 | if (was_valid && !valid) { |
488 | printk(KERN_INFO "o2net: no longer connected to " | 488 | printk(KERN_NOTICE "o2net: no longer connected to " |
489 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); | 489 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); |
490 | o2net_complete_nodes_nsw(nn); | 490 | o2net_complete_nodes_nsw(nn); |
491 | } | 491 | } |
@@ -493,7 +493,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
493 | if (!was_valid && valid) { | 493 | if (!was_valid && valid) { |
494 | o2quo_conn_up(o2net_num_from_nn(nn)); | 494 | o2quo_conn_up(o2net_num_from_nn(nn)); |
495 | cancel_delayed_work(&nn->nn_connect_expired); | 495 | cancel_delayed_work(&nn->nn_connect_expired); |
496 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", | 496 | printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", |
497 | o2nm_this_node() > sc->sc_node->nd_num ? | 497 | o2nm_this_node() > sc->sc_node->nd_num ? |
498 | "connected to" : "accepted connection from", | 498 | "connected to" : "accepted connection from", |
499 | SC_NODEF_ARGS(sc)); | 499 | SC_NODEF_ARGS(sc)); |
@@ -930,7 +930,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc, | |||
930 | cond_resched(); | 930 | cond_resched(); |
931 | continue; | 931 | continue; |
932 | } | 932 | } |
933 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT | 933 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT |
934 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); | 934 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); |
935 | o2net_ensure_shutdown(nn, sc, 0); | 935 | o2net_ensure_shutdown(nn, sc, 0); |
936 | break; | 936 | break; |
@@ -1476,14 +1476,14 @@ static void o2net_idle_timer(unsigned long data) | |||
1476 | 1476 | ||
1477 | do_gettimeofday(&now); | 1477 | do_gettimeofday(&now); |
1478 | 1478 | ||
1479 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " | 1479 | printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " |
1480 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), | 1480 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), |
1481 | o2net_idle_timeout() / 1000, | 1481 | o2net_idle_timeout() / 1000, |
1482 | o2net_idle_timeout() % 1000); | 1482 | o2net_idle_timeout() % 1000); |
1483 | mlog(ML_NOTICE, "here are some times that might help debug the " | 1483 | mlog(ML_NOTICE, "here are some times that might help debug the " |
1484 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " | 1484 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |
1485 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", | 1485 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", |
1486 | sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, | 1486 | sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, |
1487 | now.tv_sec, (long) now.tv_usec, | 1487 | now.tv_sec, (long) now.tv_usec, |
1488 | sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, | 1488 | sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, |
1489 | sc->sc_tv_advance_start.tv_sec, | 1489 | sc->sc_tv_advance_start.tv_sec, |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 8d58cfe410b1..96fa7ebc530c 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -32,10 +32,10 @@ | |||
32 | * on their number */ | 32 | * on their number */ |
33 | #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) | 33 | #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * This version number represents quite a lot, unfortunately. It not | 36 | * This version number represents quite a lot, unfortunately. It not |
37 | * only represents the raw network message protocol on the wire but also | 37 | * only represents the raw network message protocol on the wire but also |
38 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should |
39 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. |
40 | * | 40 | * |
41 | * With version 11, we separate out the filesystem locking portion. The | 41 | * With version 11, we separate out the filesystem locking portion. The |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 28c3ec238796..efd77d071c80 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -2439,7 +2439,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | |||
2439 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | 2439 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; |
2440 | memset(dx_root, 0, osb->sb->s_blocksize); | 2440 | memset(dx_root, 0, osb->sb->s_blocksize); |
2441 | strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); | 2441 | strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); |
2442 | dx_root->dr_suballoc_slot = cpu_to_le16(osb->slot_num); | 2442 | dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); |
2443 | dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); | 2443 | dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); |
2444 | dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); | 2444 | dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); |
2445 | dx_root->dr_blkno = cpu_to_le64(dr_blkno); | 2445 | dx_root->dr_blkno = cpu_to_le64(dr_blkno); |
@@ -2964,12 +2964,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
2964 | goto out; | 2964 | goto out; |
2965 | } | 2965 | } |
2966 | 2966 | ||
2967 | if (vfs_dq_alloc_space_nodirty(dir, | 2967 | ret = dquot_alloc_space_nodirty(dir, |
2968 | ocfs2_clusters_to_bytes(osb->sb, | 2968 | ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc)); |
2969 | alloc + dx_alloc))) { | 2969 | if (ret) |
2970 | ret = -EDQUOT; | ||
2971 | goto out_commit; | 2970 | goto out_commit; |
2972 | } | ||
2973 | did_quota = 1; | 2971 | did_quota = 1; |
2974 | 2972 | ||
2975 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { | 2973 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { |
@@ -3178,7 +3176,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3178 | 3176 | ||
3179 | out_commit: | 3177 | out_commit: |
3180 | if (ret < 0 && did_quota) | 3178 | if (ret < 0 && did_quota) |
3181 | vfs_dq_free_space_nodirty(dir, bytes_allocated); | 3179 | dquot_free_space_nodirty(dir, bytes_allocated); |
3182 | 3180 | ||
3183 | ocfs2_commit_trans(osb, handle); | 3181 | ocfs2_commit_trans(osb, handle); |
3184 | 3182 | ||
@@ -3221,11 +3219,10 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
3221 | if (extend) { | 3219 | if (extend) { |
3222 | u32 offset = OCFS2_I(dir)->ip_clusters; | 3220 | u32 offset = OCFS2_I(dir)->ip_clusters; |
3223 | 3221 | ||
3224 | if (vfs_dq_alloc_space_nodirty(dir, | 3222 | status = dquot_alloc_space_nodirty(dir, |
3225 | ocfs2_clusters_to_bytes(sb, 1))) { | 3223 | ocfs2_clusters_to_bytes(sb, 1)); |
3226 | status = -EDQUOT; | 3224 | if (status) |
3227 | goto bail; | 3225 | goto bail; |
3228 | } | ||
3229 | did_quota = 1; | 3226 | did_quota = 1; |
3230 | 3227 | ||
3231 | status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, | 3228 | status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, |
@@ -3254,7 +3251,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
3254 | status = 0; | 3251 | status = 0; |
3255 | bail: | 3252 | bail: |
3256 | if (did_quota && status < 0) | 3253 | if (did_quota && status < 0) |
3257 | vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); | 3254 | dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); |
3258 | mlog_exit(status); | 3255 | mlog_exit(status); |
3259 | return status; | 3256 | return status; |
3260 | } | 3257 | } |
@@ -3889,11 +3886,10 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3889 | goto out; | 3886 | goto out; |
3890 | } | 3887 | } |
3891 | 3888 | ||
3892 | if (vfs_dq_alloc_space_nodirty(dir, | 3889 | ret = dquot_alloc_space_nodirty(dir, |
3893 | ocfs2_clusters_to_bytes(dir->i_sb, 1))) { | 3890 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); |
3894 | ret = -EDQUOT; | 3891 | if (ret) |
3895 | goto out_commit; | 3892 | goto out_commit; |
3896 | } | ||
3897 | did_quota = 1; | 3893 | did_quota = 1; |
3898 | 3894 | ||
3899 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh, | 3895 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh, |
@@ -3983,7 +3979,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3983 | 3979 | ||
3984 | out_commit: | 3980 | out_commit: |
3985 | if (ret < 0 && did_quota) | 3981 | if (ret < 0 && did_quota) |
3986 | vfs_dq_free_space_nodirty(dir, | 3982 | dquot_free_space_nodirty(dir, |
3987 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | 3983 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); |
3988 | 3984 | ||
3989 | ocfs2_commit_trans(osb, handle); | 3985 | ocfs2_commit_trans(osb, handle); |
@@ -4165,11 +4161,10 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, | |||
4165 | goto out; | 4161 | goto out; |
4166 | } | 4162 | } |
4167 | 4163 | ||
4168 | if (vfs_dq_alloc_space_nodirty(dir, | 4164 | ret = dquot_alloc_space_nodirty(dir, |
4169 | ocfs2_clusters_to_bytes(osb->sb, 1))) { | 4165 | ocfs2_clusters_to_bytes(osb->sb, 1)); |
4170 | ret = -EDQUOT; | 4166 | if (ret) |
4171 | goto out_commit; | 4167 | goto out_commit; |
4172 | } | ||
4173 | did_quota = 1; | 4168 | did_quota = 1; |
4174 | 4169 | ||
4175 | /* | 4170 | /* |
@@ -4229,7 +4224,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, | |||
4229 | 4224 | ||
4230 | out_commit: | 4225 | out_commit: |
4231 | if (ret < 0 && did_quota) | 4226 | if (ret < 0 && did_quota) |
4232 | vfs_dq_free_space_nodirty(dir, | 4227 | dquot_free_space_nodirty(dir, |
4233 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | 4228 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); |
4234 | 4229 | ||
4235 | ocfs2_commit_trans(osb, handle); | 4230 | ocfs2_commit_trans(osb, handle); |
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index 190361375700..dcebf0d920fa 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile | |||
@@ -1,8 +1,7 @@ | |||
1 | EXTRA_CFLAGS += -Ifs/ocfs2 | 1 | EXTRA_CFLAGS += -Ifs/ocfs2 |
2 | 2 | ||
3 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o | 3 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o |
4 | 4 | ||
5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ | 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ |
6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o | 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o |
7 | 7 | ||
8 | ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o | ||
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index b5786a787fab..3cfa114aa391 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h | |||
@@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err); | |||
95 | mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \ | 95 | mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \ |
96 | } while (0) | 96 | } while (0) |
97 | 97 | ||
98 | #define DLM_LKSB_UNUSED1 0x01 | 98 | #define DLM_LKSB_UNUSED1 0x01 |
99 | #define DLM_LKSB_PUT_LVB 0x02 | 99 | #define DLM_LKSB_PUT_LVB 0x02 |
100 | #define DLM_LKSB_GET_LVB 0x04 | 100 | #define DLM_LKSB_GET_LVB 0x04 |
101 | #define DLM_LKSB_UNUSED2 0x08 | 101 | #define DLM_LKSB_UNUSED2 0x08 |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 01cf8cc3d286..12d5eb78a11a 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | ||
32 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
33 | #include <linux/init.h> | 32 | #include <linux/init.h> |
34 | #include <linux/sysctl.h> | 33 | #include <linux/sysctl.h> |
@@ -123,7 +122,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) | |||
123 | dlm_lock_put(lock); | 122 | dlm_lock_put(lock); |
124 | /* free up the reserved bast that we are cancelling. | 123 | /* free up the reserved bast that we are cancelling. |
125 | * guaranteed that this will not be the last reserved | 124 | * guaranteed that this will not be the last reserved |
126 | * ast because *both* an ast and a bast were reserved | 125 | * ast because *both* an ast and a bast were reserved |
127 | * to get to this point. the res->spinlock will not be | 126 | * to get to this point. the res->spinlock will not be |
128 | * taken here */ | 127 | * taken here */ |
129 | dlm_lockres_release_ast(dlm, res); | 128 | dlm_lockres_release_ast(dlm, res); |
@@ -185,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
185 | BUG_ON(!lksb); | 184 | BUG_ON(!lksb); |
186 | 185 | ||
187 | /* only updates if this node masters the lockres */ | 186 | /* only updates if this node masters the lockres */ |
187 | spin_lock(&res->spinlock); | ||
188 | if (res->owner == dlm->node_num) { | 188 | if (res->owner == dlm->node_num) { |
189 | |||
190 | spin_lock(&res->spinlock); | ||
191 | /* check the lksb flags for the direction */ | 189 | /* check the lksb flags for the direction */ |
192 | if (lksb->flags & DLM_LKSB_GET_LVB) { | 190 | if (lksb->flags & DLM_LKSB_GET_LVB) { |
193 | mlog(0, "getting lvb from lockres for %s node\n", | 191 | mlog(0, "getting lvb from lockres for %s node\n", |
@@ -202,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
202 | * here. In the future we might want to clear it at the time | 200 | * here. In the future we might want to clear it at the time |
203 | * the put is actually done. | 201 | * the put is actually done. |
204 | */ | 202 | */ |
205 | spin_unlock(&res->spinlock); | ||
206 | } | 203 | } |
204 | spin_unlock(&res->spinlock); | ||
207 | 205 | ||
208 | /* reset any lvb flags on the lksb */ | 206 | /* reset any lvb flags on the lksb */ |
209 | lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); | 207 | lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index ca96bce50e18..90803b47cd8c 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | ||
32 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
33 | #include <linux/init.h> | 32 | #include <linux/init.h> |
34 | #include <linux/sysctl.h> | 33 | #include <linux/sysctl.h> |
@@ -396,7 +395,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, | |||
396 | /* instead of logging the same network error over | 395 | /* instead of logging the same network error over |
397 | * and over, sleep here and wait for the heartbeat | 396 | * and over, sleep here and wait for the heartbeat |
398 | * to notice the node is dead. times out after 5s. */ | 397 | * to notice the node is dead. times out after 5s. */ |
399 | dlm_wait_for_node_death(dlm, res->owner, | 398 | dlm_wait_for_node_death(dlm, res->owner, |
400 | DLM_NODE_DEATH_WAIT_MAX); | 399 | DLM_NODE_DEATH_WAIT_MAX); |
401 | ret = DLM_RECOVERING; | 400 | ret = DLM_RECOVERING; |
402 | mlog(0, "node %u died so returning DLM_RECOVERING " | 401 | mlog(0, "node %u died so returning DLM_RECOVERING " |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 42b0bad7a612..0cd24cf54396 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
102 | assert_spin_locked(&res->spinlock); | 102 | assert_spin_locked(&res->spinlock); |
103 | 103 | ||
104 | stringify_lockname(res->lockname.name, res->lockname.len, | 104 | stringify_lockname(res->lockname.name, res->lockname.len, |
105 | buf, sizeof(buf) - 1); | 105 | buf, sizeof(buf)); |
106 | printk("lockres: %s, owner=%u, state=%u\n", | 106 | printk("lockres: %s, owner=%u, state=%u\n", |
107 | buf, res->owner, res->state); | 107 | buf, res->owner, res->state); |
108 | printk(" last used: %lu, refcnt: %u, on purge list: %s\n", | 108 | printk(" last used: %lu, refcnt: %u, on purge list: %s\n", |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0334000676d3..988c9055fd4e 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, | |||
816 | } | 816 | } |
817 | 817 | ||
818 | /* Once the dlm ctxt is marked as leaving then we don't want | 818 | /* Once the dlm ctxt is marked as leaving then we don't want |
819 | * to be put in someone's domain map. | 819 | * to be put in someone's domain map. |
820 | * Also, explicitly disallow joining at certain troublesome | 820 | * Also, explicitly disallow joining at certain troublesome |
821 | * times (ie. during recovery). */ | 821 | * times (ie. during recovery). */ |
822 | if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { | 822 | if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 437698e9465f..733337772671 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, | |||
269 | } | 269 | } |
270 | dlm_revert_pending_lock(res, lock); | 270 | dlm_revert_pending_lock(res, lock); |
271 | dlm_lock_put(lock); | 271 | dlm_lock_put(lock); |
272 | } else if (dlm_is_recovery_lock(res->lockname.name, | 272 | } else if (dlm_is_recovery_lock(res->lockname.name, |
273 | res->lockname.len)) { | 273 | res->lockname.len)) { |
274 | /* special case for the $RECOVERY lock. | 274 | /* special case for the $RECOVERY lock. |
275 | * there will never be an AST delivered to put | 275 | * there will never be an AST delivered to put |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 83bcaf266b35..9289b4357d27 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) | |||
366 | struct dlm_master_list_entry *mle; | 366 | struct dlm_master_list_entry *mle; |
367 | 367 | ||
368 | assert_spin_locked(&dlm->spinlock); | 368 | assert_spin_locked(&dlm->spinlock); |
369 | 369 | ||
370 | list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { | 370 | list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { |
371 | if (node_up) | 371 | if (node_up) |
372 | dlm_mle_node_up(dlm, mle, NULL, idx); | 372 | dlm_mle_node_up(dlm, mle, NULL, idx); |
@@ -833,7 +833,7 @@ lookup: | |||
833 | __dlm_insert_mle(dlm, mle); | 833 | __dlm_insert_mle(dlm, mle); |
834 | 834 | ||
835 | /* still holding the dlm spinlock, check the recovery map | 835 | /* still holding the dlm spinlock, check the recovery map |
836 | * to see if there are any nodes that still need to be | 836 | * to see if there are any nodes that still need to be |
837 | * considered. these will not appear in the mle nodemap | 837 | * considered. these will not appear in the mle nodemap |
838 | * but they might own this lockres. wait on them. */ | 838 | * but they might own this lockres. wait on them. */ |
839 | bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); | 839 | bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); |
@@ -883,7 +883,7 @@ redo_request: | |||
883 | msleep(500); | 883 | msleep(500); |
884 | } | 884 | } |
885 | continue; | 885 | continue; |
886 | } | 886 | } |
887 | 887 | ||
888 | dlm_kick_recovery_thread(dlm); | 888 | dlm_kick_recovery_thread(dlm); |
889 | msleep(1000); | 889 | msleep(1000); |
@@ -939,8 +939,8 @@ wait: | |||
939 | res->lockname.name, blocked); | 939 | res->lockname.name, blocked); |
940 | if (++tries > 20) { | 940 | if (++tries > 20) { |
941 | mlog(ML_ERROR, "%s:%.*s: spinning on " | 941 | mlog(ML_ERROR, "%s:%.*s: spinning on " |
942 | "dlm_wait_for_lock_mastery, blocked=%d\n", | 942 | "dlm_wait_for_lock_mastery, blocked=%d\n", |
943 | dlm->name, res->lockname.len, | 943 | dlm->name, res->lockname.len, |
944 | res->lockname.name, blocked); | 944 | res->lockname.name, blocked); |
945 | dlm_print_one_lock_resource(res); | 945 | dlm_print_one_lock_resource(res); |
946 | dlm_print_one_mle(mle); | 946 | dlm_print_one_mle(mle); |
@@ -1029,7 +1029,7 @@ recheck: | |||
1029 | ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked); | 1029 | ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked); |
1030 | b = (mle->type == DLM_MLE_BLOCK); | 1030 | b = (mle->type == DLM_MLE_BLOCK); |
1031 | if ((*blocked && !b) || (!*blocked && b)) { | 1031 | if ((*blocked && !b) || (!*blocked && b)) { |
1032 | mlog(0, "%s:%.*s: status change: old=%d new=%d\n", | 1032 | mlog(0, "%s:%.*s: status change: old=%d new=%d\n", |
1033 | dlm->name, res->lockname.len, res->lockname.name, | 1033 | dlm->name, res->lockname.len, res->lockname.name, |
1034 | *blocked, b); | 1034 | *blocked, b); |
1035 | *blocked = b; | 1035 | *blocked = b; |
@@ -1602,7 +1602,7 @@ send_response: | |||
1602 | } | 1602 | } |
1603 | mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", | 1603 | mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", |
1604 | dlm->node_num, res->lockname.len, res->lockname.name); | 1604 | dlm->node_num, res->lockname.len, res->lockname.name); |
1605 | ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, | 1605 | ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, |
1606 | DLM_ASSERT_MASTER_MLE_CLEANUP); | 1606 | DLM_ASSERT_MASTER_MLE_CLEANUP); |
1607 | if (ret < 0) { | 1607 | if (ret < 0) { |
1608 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); | 1608 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); |
@@ -1701,7 +1701,7 @@ again: | |||
1701 | 1701 | ||
1702 | if (r & DLM_ASSERT_RESPONSE_REASSERT) { | 1702 | if (r & DLM_ASSERT_RESPONSE_REASSERT) { |
1703 | mlog(0, "%.*s: node %u create mles on other " | 1703 | mlog(0, "%.*s: node %u create mles on other " |
1704 | "nodes and requests a re-assert\n", | 1704 | "nodes and requests a re-assert\n", |
1705 | namelen, lockname, to); | 1705 | namelen, lockname, to); |
1706 | reassert = 1; | 1706 | reassert = 1; |
1707 | } | 1707 | } |
@@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1812 | spin_unlock(&dlm->master_lock); | 1812 | spin_unlock(&dlm->master_lock); |
1813 | spin_unlock(&dlm->spinlock); | 1813 | spin_unlock(&dlm->spinlock); |
1814 | goto done; | 1814 | goto done; |
1815 | } | 1815 | } |
1816 | } | 1816 | } |
1817 | } | 1817 | } |
1818 | spin_unlock(&dlm->master_lock); | 1818 | spin_unlock(&dlm->master_lock); |
@@ -1875,7 +1875,6 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1875 | ok: | 1875 | ok: |
1876 | spin_unlock(&res->spinlock); | 1876 | spin_unlock(&res->spinlock); |
1877 | } | 1877 | } |
1878 | spin_unlock(&dlm->spinlock); | ||
1879 | 1878 | ||
1880 | // mlog(0, "woo! got an assert_master from node %u!\n", | 1879 | // mlog(0, "woo! got an assert_master from node %u!\n", |
1881 | // assert->node_idx); | 1880 | // assert->node_idx); |
@@ -1883,7 +1882,7 @@ ok: | |||
1883 | int extra_ref = 0; | 1882 | int extra_ref = 0; |
1884 | int nn = -1; | 1883 | int nn = -1; |
1885 | int rr, err = 0; | 1884 | int rr, err = 0; |
1886 | 1885 | ||
1887 | spin_lock(&mle->spinlock); | 1886 | spin_lock(&mle->spinlock); |
1888 | if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) | 1887 | if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) |
1889 | extra_ref = 1; | 1888 | extra_ref = 1; |
@@ -1891,7 +1890,7 @@ ok: | |||
1891 | /* MASTER mle: if any bits set in the response map | 1890 | /* MASTER mle: if any bits set in the response map |
1892 | * then the calling node needs to re-assert to clear | 1891 | * then the calling node needs to re-assert to clear |
1893 | * up nodes that this node contacted */ | 1892 | * up nodes that this node contacted */ |
1894 | while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, | 1893 | while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, |
1895 | nn+1)) < O2NM_MAX_NODES) { | 1894 | nn+1)) < O2NM_MAX_NODES) { |
1896 | if (nn != dlm->node_num && nn != assert->node_idx) | 1895 | if (nn != dlm->node_num && nn != assert->node_idx) |
1897 | master_request = 1; | 1896 | master_request = 1; |
@@ -1926,7 +1925,6 @@ ok: | |||
1926 | /* master is known, detach if not already detached. | 1925 | /* master is known, detach if not already detached. |
1927 | * ensures that only one assert_master call will happen | 1926 | * ensures that only one assert_master call will happen |
1928 | * on this mle. */ | 1927 | * on this mle. */ |
1929 | spin_lock(&dlm->spinlock); | ||
1930 | spin_lock(&dlm->master_lock); | 1928 | spin_lock(&dlm->master_lock); |
1931 | 1929 | ||
1932 | rr = atomic_read(&mle->mle_refs.refcount); | 1930 | rr = atomic_read(&mle->mle_refs.refcount); |
@@ -1959,7 +1957,6 @@ ok: | |||
1959 | __dlm_put_mle(mle); | 1957 | __dlm_put_mle(mle); |
1960 | } | 1958 | } |
1961 | spin_unlock(&dlm->master_lock); | 1959 | spin_unlock(&dlm->master_lock); |
1962 | spin_unlock(&dlm->spinlock); | ||
1963 | } else if (res) { | 1960 | } else if (res) { |
1964 | if (res->owner != assert->node_idx) { | 1961 | if (res->owner != assert->node_idx) { |
1965 | mlog(0, "assert_master from %u, but current " | 1962 | mlog(0, "assert_master from %u, but current " |
@@ -1967,6 +1964,7 @@ ok: | |||
1967 | res->owner, namelen, name); | 1964 | res->owner, namelen, name); |
1968 | } | 1965 | } |
1969 | } | 1966 | } |
1967 | spin_unlock(&dlm->spinlock); | ||
1970 | 1968 | ||
1971 | done: | 1969 | done: |
1972 | ret = 0; | 1970 | ret = 0; |
@@ -2002,7 +2000,7 @@ kill: | |||
2002 | __dlm_print_one_lock_resource(res); | 2000 | __dlm_print_one_lock_resource(res); |
2003 | spin_unlock(&res->spinlock); | 2001 | spin_unlock(&res->spinlock); |
2004 | spin_unlock(&dlm->spinlock); | 2002 | spin_unlock(&dlm->spinlock); |
2005 | *ret_data = (void *)res; | 2003 | *ret_data = (void *)res; |
2006 | dlm_put(dlm); | 2004 | dlm_put(dlm); |
2007 | return -EINVAL; | 2005 | return -EINVAL; |
2008 | } | 2006 | } |
@@ -2040,10 +2038,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, | |||
2040 | item->u.am.request_from = request_from; | 2038 | item->u.am.request_from = request_from; |
2041 | item->u.am.flags = flags; | 2039 | item->u.am.flags = flags; |
2042 | 2040 | ||
2043 | if (ignore_higher) | 2041 | if (ignore_higher) |
2044 | mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, | 2042 | mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, |
2045 | res->lockname.name); | 2043 | res->lockname.name); |
2046 | 2044 | ||
2047 | spin_lock(&dlm->work_lock); | 2045 | spin_lock(&dlm->work_lock); |
2048 | list_add_tail(&item->list, &dlm->work_list); | 2046 | list_add_tail(&item->list, &dlm->work_list); |
2049 | spin_unlock(&dlm->work_lock); | 2047 | spin_unlock(&dlm->work_lock); |
@@ -2133,7 +2131,7 @@ put: | |||
2133 | * think that $RECOVERY is currently mastered by a dead node. If so, | 2131 | * think that $RECOVERY is currently mastered by a dead node. If so, |
2134 | * we wait a short time to allow that node to get notified by its own | 2132 | * we wait a short time to allow that node to get notified by its own |
2135 | * heartbeat stack, then check again. All $RECOVERY lock resources | 2133 | * heartbeat stack, then check again. All $RECOVERY lock resources |
2136 | * mastered by dead nodes are purged when the hearbeat callback is | 2134 | * mastered by dead nodes are purged when the hearbeat callback is |
2137 | * fired, so we can know for sure that it is safe to continue once | 2135 | * fired, so we can know for sure that it is safe to continue once |
2138 | * the node returns a live node or no node. */ | 2136 | * the node returns a live node or no node. */ |
2139 | static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, | 2137 | static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, |
@@ -2174,7 +2172,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, | |||
2174 | ret = -EAGAIN; | 2172 | ret = -EAGAIN; |
2175 | } | 2173 | } |
2176 | spin_unlock(&dlm->spinlock); | 2174 | spin_unlock(&dlm->spinlock); |
2177 | mlog(0, "%s: reco lock master is %u\n", dlm->name, | 2175 | mlog(0, "%s: reco lock master is %u\n", dlm->name, |
2178 | master); | 2176 | master); |
2179 | break; | 2177 | break; |
2180 | } | 2178 | } |
@@ -2586,7 +2584,7 @@ fail: | |||
2586 | * is complete everywhere. if the target dies while this is | 2584 | * is complete everywhere. if the target dies while this is |
2587 | * going on, some nodes could potentially see the target as the | 2585 | * going on, some nodes could potentially see the target as the |
2588 | * master, so it is important that my recovery finds the migration | 2586 | * master, so it is important that my recovery finds the migration |
2589 | * mle and sets the master to UNKNONWN. */ | 2587 | * mle and sets the master to UNKNOWN. */ |
2590 | 2588 | ||
2591 | 2589 | ||
2592 | /* wait for new node to assert master */ | 2590 | /* wait for new node to assert master */ |
@@ -2602,7 +2600,7 @@ fail: | |||
2602 | 2600 | ||
2603 | mlog(0, "%s:%.*s: timed out during migration\n", | 2601 | mlog(0, "%s:%.*s: timed out during migration\n", |
2604 | dlm->name, res->lockname.len, res->lockname.name); | 2602 | dlm->name, res->lockname.len, res->lockname.name); |
2605 | /* avoid hang during shutdown when migrating lockres | 2603 | /* avoid hang during shutdown when migrating lockres |
2606 | * to a node which also goes down */ | 2604 | * to a node which also goes down */ |
2607 | if (dlm_is_node_dead(dlm, target)) { | 2605 | if (dlm_is_node_dead(dlm, target)) { |
2608 | mlog(0, "%s:%.*s: expected migration " | 2606 | mlog(0, "%s:%.*s: expected migration " |
@@ -2738,7 +2736,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm, | |||
2738 | can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING); | 2736 | can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING); |
2739 | spin_unlock(&res->spinlock); | 2737 | spin_unlock(&res->spinlock); |
2740 | 2738 | ||
2741 | /* target has died, so make the caller break out of the | 2739 | /* target has died, so make the caller break out of the |
2742 | * wait_event, but caller must recheck the domain_map */ | 2740 | * wait_event, but caller must recheck the domain_map */ |
2743 | spin_lock(&dlm->spinlock); | 2741 | spin_lock(&dlm->spinlock); |
2744 | if (!test_bit(mig_target, dlm->domain_map)) | 2742 | if (!test_bit(mig_target, dlm->domain_map)) |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index d9fa3d22e17c..b4f99de2caf3 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -310,7 +310,7 @@ static int dlm_recovery_thread(void *data) | |||
310 | mlog(0, "dlm thread running for %s...\n", dlm->name); | 310 | mlog(0, "dlm thread running for %s...\n", dlm->name); |
311 | 311 | ||
312 | while (!kthread_should_stop()) { | 312 | while (!kthread_should_stop()) { |
313 | if (dlm_joined(dlm)) { | 313 | if (dlm_domain_fully_joined(dlm)) { |
314 | status = dlm_do_recovery(dlm); | 314 | status = dlm_do_recovery(dlm); |
315 | if (status == -EAGAIN) { | 315 | if (status == -EAGAIN) { |
316 | /* do not sleep, recheck immediately. */ | 316 | /* do not sleep, recheck immediately. */ |
@@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, | |||
1050 | if (lock->ml.node == dead_node) { | 1050 | if (lock->ml.node == dead_node) { |
1051 | mlog(0, "AHA! there was " | 1051 | mlog(0, "AHA! there was " |
1052 | "a $RECOVERY lock for dead " | 1052 | "a $RECOVERY lock for dead " |
1053 | "node %u (%s)!\n", | 1053 | "node %u (%s)!\n", |
1054 | dead_node, dlm->name); | 1054 | dead_node, dlm->name); |
1055 | list_del_init(&lock->list); | 1055 | list_del_init(&lock->list); |
1056 | dlm_lock_put(lock); | 1056 | dlm_lock_put(lock); |
@@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, | |||
1164 | mres->master = master; | 1164 | mres->master = master; |
1165 | } | 1165 | } |
1166 | 1166 | ||
1167 | static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock, | ||
1168 | struct dlm_migratable_lockres *mres, | ||
1169 | int queue) | ||
1170 | { | ||
1171 | if (!lock->lksb) | ||
1172 | return; | ||
1173 | |||
1174 | /* Ignore lvb in all locks in the blocked list */ | ||
1175 | if (queue == DLM_BLOCKED_LIST) | ||
1176 | return; | ||
1177 | |||
1178 | /* Only consider lvbs in locks with granted EX or PR lock levels */ | ||
1179 | if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE) | ||
1180 | return; | ||
1181 | |||
1182 | if (dlm_lvb_is_empty(mres->lvb)) { | ||
1183 | memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); | ||
1184 | return; | ||
1185 | } | ||
1186 | |||
1187 | /* Ensure the lvb copied for migration matches in other valid locks */ | ||
1188 | if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN)) | ||
1189 | return; | ||
1190 | |||
1191 | mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, " | ||
1192 | "node=%u\n", | ||
1193 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
1194 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
1195 | lock->lockres->lockname.len, lock->lockres->lockname.name, | ||
1196 | lock->ml.node); | ||
1197 | dlm_print_one_lock_resource(lock->lockres); | ||
1198 | BUG(); | ||
1199 | } | ||
1167 | 1200 | ||
1168 | /* returns 1 if this lock fills the network structure, | 1201 | /* returns 1 if this lock fills the network structure, |
1169 | * 0 otherwise */ | 1202 | * 0 otherwise */ |
@@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock, | |||
1181 | ml->list = queue; | 1214 | ml->list = queue; |
1182 | if (lock->lksb) { | 1215 | if (lock->lksb) { |
1183 | ml->flags = lock->lksb->flags; | 1216 | ml->flags = lock->lksb->flags; |
1184 | /* send our current lvb */ | 1217 | dlm_prepare_lvb_for_migration(lock, mres, queue); |
1185 | if (ml->type == LKM_EXMODE || | ||
1186 | ml->type == LKM_PRMODE) { | ||
1187 | /* if it is already set, this had better be a PR | ||
1188 | * and it has to match */ | ||
1189 | if (!dlm_lvb_is_empty(mres->lvb) && | ||
1190 | (ml->type == LKM_EXMODE || | ||
1191 | memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) { | ||
1192 | mlog(ML_ERROR, "mismatched lvbs!\n"); | ||
1193 | dlm_print_one_lock_resource(lock->lockres); | ||
1194 | BUG(); | ||
1195 | } | ||
1196 | memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); | ||
1197 | } | ||
1198 | } | 1218 | } |
1199 | ml->node = lock->ml.node; | 1219 | ml->node = lock->ml.node; |
1200 | mres->num_locks++; | 1220 | mres->num_locks++; |
@@ -1730,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1730 | struct dlm_lock *lock = NULL; | 1750 | struct dlm_lock *lock = NULL; |
1731 | u8 from = O2NM_MAX_NODES; | 1751 | u8 from = O2NM_MAX_NODES; |
1732 | unsigned int added = 0; | 1752 | unsigned int added = 0; |
1753 | __be64 c; | ||
1733 | 1754 | ||
1734 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); | 1755 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); |
1735 | for (i=0; i<mres->num_locks; i++) { | 1756 | for (i=0; i<mres->num_locks; i++) { |
@@ -1777,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1777 | /* lock is always created locally first, and | 1798 | /* lock is always created locally first, and |
1778 | * destroyed locally last. it must be on the list */ | 1799 | * destroyed locally last. it must be on the list */ |
1779 | if (!lock) { | 1800 | if (!lock) { |
1780 | __be64 c = ml->cookie; | 1801 | c = ml->cookie; |
1781 | mlog(ML_ERROR, "could not find local lock " | 1802 | mlog(ML_ERROR, "Could not find local lock " |
1782 | "with cookie %u:%llu!\n", | 1803 | "with cookie %u:%llu, node %u, " |
1804 | "list %u, flags 0x%x, type %d, " | ||
1805 | "conv %d, highest blocked %d\n", | ||
1783 | dlm_get_lock_cookie_node(be64_to_cpu(c)), | 1806 | dlm_get_lock_cookie_node(be64_to_cpu(c)), |
1784 | dlm_get_lock_cookie_seq(be64_to_cpu(c))); | 1807 | dlm_get_lock_cookie_seq(be64_to_cpu(c)), |
1808 | ml->node, ml->list, ml->flags, ml->type, | ||
1809 | ml->convert_type, ml->highest_blocked); | ||
1810 | __dlm_print_one_lock_resource(res); | ||
1811 | BUG(); | ||
1812 | } | ||
1813 | |||
1814 | if (lock->ml.node != ml->node) { | ||
1815 | c = lock->ml.cookie; | ||
1816 | mlog(ML_ERROR, "Mismatched node# in lock " | ||
1817 | "cookie %u:%llu, name %.*s, node %u\n", | ||
1818 | dlm_get_lock_cookie_node(be64_to_cpu(c)), | ||
1819 | dlm_get_lock_cookie_seq(be64_to_cpu(c)), | ||
1820 | res->lockname.len, res->lockname.name, | ||
1821 | lock->ml.node); | ||
1822 | c = ml->cookie; | ||
1823 | mlog(ML_ERROR, "Migrate lock cookie %u:%llu, " | ||
1824 | "node %u, list %u, flags 0x%x, type %d, " | ||
1825 | "conv %d, highest blocked %d\n", | ||
1826 | dlm_get_lock_cookie_node(be64_to_cpu(c)), | ||
1827 | dlm_get_lock_cookie_seq(be64_to_cpu(c)), | ||
1828 | ml->node, ml->list, ml->flags, ml->type, | ||
1829 | ml->convert_type, ml->highest_blocked); | ||
1785 | __dlm_print_one_lock_resource(res); | 1830 | __dlm_print_one_lock_resource(res); |
1786 | BUG(); | 1831 | BUG(); |
1787 | } | 1832 | } |
1788 | BUG_ON(lock->ml.node != ml->node); | ||
1789 | 1833 | ||
1790 | if (tmpq != queue) { | 1834 | if (tmpq != queue) { |
1791 | mlog(0, "lock was on %u instead of %u for %.*s\n", | 1835 | c = ml->cookie; |
1792 | j, ml->list, res->lockname.len, res->lockname.name); | 1836 | mlog(0, "Lock cookie %u:%llu was on list %u " |
1837 | "instead of list %u for %.*s\n", | ||
1838 | dlm_get_lock_cookie_node(be64_to_cpu(c)), | ||
1839 | dlm_get_lock_cookie_seq(be64_to_cpu(c)), | ||
1840 | j, ml->list, res->lockname.len, | ||
1841 | res->lockname.name); | ||
1842 | __dlm_print_one_lock_resource(res); | ||
1793 | spin_unlock(&res->spinlock); | 1843 | spin_unlock(&res->spinlock); |
1794 | continue; | 1844 | continue; |
1795 | } | 1845 | } |
@@ -1839,7 +1889,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1839 | * the lvb. */ | 1889 | * the lvb. */ |
1840 | memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); | 1890 | memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); |
1841 | } else { | 1891 | } else { |
1842 | /* otherwise, the node is sending its | 1892 | /* otherwise, the node is sending its |
1843 | * most recent valid lvb info */ | 1893 | * most recent valid lvb info */ |
1844 | BUG_ON(ml->type != LKM_EXMODE && | 1894 | BUG_ON(ml->type != LKM_EXMODE && |
1845 | ml->type != LKM_PRMODE); | 1895 | ml->type != LKM_PRMODE); |
@@ -1886,7 +1936,7 @@ skip_lvb: | |||
1886 | spin_lock(&res->spinlock); | 1936 | spin_lock(&res->spinlock); |
1887 | list_for_each_entry(lock, queue, list) { | 1937 | list_for_each_entry(lock, queue, list) { |
1888 | if (lock->ml.cookie == ml->cookie) { | 1938 | if (lock->ml.cookie == ml->cookie) { |
1889 | __be64 c = lock->ml.cookie; | 1939 | c = lock->ml.cookie; |
1890 | mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " | 1940 | mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " |
1891 | "exists on this lockres!\n", dlm->name, | 1941 | "exists on this lockres!\n", dlm->name, |
1892 | res->lockname.len, res->lockname.name, | 1942 | res->lockname.len, res->lockname.name, |
@@ -2114,7 +2164,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, | |||
2114 | assert_spin_locked(&res->spinlock); | 2164 | assert_spin_locked(&res->spinlock); |
2115 | 2165 | ||
2116 | if (res->owner == dlm->node_num) | 2166 | if (res->owner == dlm->node_num) |
2117 | /* if this node owned the lockres, and if the dead node | 2167 | /* if this node owned the lockres, and if the dead node |
2118 | * had an EX when he died, blank out the lvb */ | 2168 | * had an EX when he died, blank out the lvb */ |
2119 | search_node = dead_node; | 2169 | search_node = dead_node; |
2120 | else { | 2170 | else { |
@@ -2152,7 +2202,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2152 | 2202 | ||
2153 | /* this node is the lockres master: | 2203 | /* this node is the lockres master: |
2154 | * 1) remove any stale locks for the dead node | 2204 | * 1) remove any stale locks for the dead node |
2155 | * 2) if the dead node had an EX when he died, blank out the lvb | 2205 | * 2) if the dead node had an EX when he died, blank out the lvb |
2156 | */ | 2206 | */ |
2157 | assert_spin_locked(&dlm->spinlock); | 2207 | assert_spin_locked(&dlm->spinlock); |
2158 | assert_spin_locked(&res->spinlock); | 2208 | assert_spin_locked(&res->spinlock); |
@@ -2193,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2193 | mlog(0, "%s:%.*s: freed %u locks for dead node %u, " | 2243 | mlog(0, "%s:%.*s: freed %u locks for dead node %u, " |
2194 | "dropping ref from lockres\n", dlm->name, | 2244 | "dropping ref from lockres\n", dlm->name, |
2195 | res->lockname.len, res->lockname.name, freed, dead_node); | 2245 | res->lockname.len, res->lockname.name, freed, dead_node); |
2196 | BUG_ON(!test_bit(dead_node, res->refmap)); | 2246 | if(!test_bit(dead_node, res->refmap)) { |
2247 | mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, " | ||
2248 | "but ref was not set\n", dlm->name, | ||
2249 | res->lockname.len, res->lockname.name, freed, dead_node); | ||
2250 | __dlm_print_one_lock_resource(res); | ||
2251 | } | ||
2197 | dlm_lockres_clear_refmap_bit(dead_node, res); | 2252 | dlm_lockres_clear_refmap_bit(dead_node, res); |
2198 | } else if (test_bit(dead_node, res->refmap)) { | 2253 | } else if (test_bit(dead_node, res->refmap)) { |
2199 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " | 2254 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " |
@@ -2260,7 +2315,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2260 | } | 2315 | } |
2261 | spin_unlock(&res->spinlock); | 2316 | spin_unlock(&res->spinlock); |
2262 | continue; | 2317 | continue; |
2263 | } | 2318 | } |
2264 | spin_lock(&res->spinlock); | 2319 | spin_lock(&res->spinlock); |
2265 | /* zero the lvb if necessary */ | 2320 | /* zero the lvb if necessary */ |
2266 | dlm_revalidate_lvb(dlm, res, dead_node); | 2321 | dlm_revalidate_lvb(dlm, res, dead_node); |
@@ -2411,7 +2466,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st) | |||
2411 | * this function on each node racing to become the recovery | 2466 | * this function on each node racing to become the recovery |
2412 | * master will not stop attempting this until either: | 2467 | * master will not stop attempting this until either: |
2413 | * a) this node gets the EX (and becomes the recovery master), | 2468 | * a) this node gets the EX (and becomes the recovery master), |
2414 | * or b) dlm->reco.new_master gets set to some nodenum | 2469 | * or b) dlm->reco.new_master gets set to some nodenum |
2415 | * != O2NM_INVALID_NODE_NUM (another node will do the reco). | 2470 | * != O2NM_INVALID_NODE_NUM (another node will do the reco). |
2416 | * so each time a recovery master is needed, the entire cluster | 2471 | * so each time a recovery master is needed, the entire cluster |
2417 | * will sync at this point. if the new master dies, that will | 2472 | * will sync at this point. if the new master dies, that will |
@@ -2424,7 +2479,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) | |||
2424 | 2479 | ||
2425 | mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", | 2480 | mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", |
2426 | dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); | 2481 | dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); |
2427 | again: | 2482 | again: |
2428 | memset(&lksb, 0, sizeof(lksb)); | 2483 | memset(&lksb, 0, sizeof(lksb)); |
2429 | 2484 | ||
2430 | ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, | 2485 | ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, |
@@ -2437,8 +2492,8 @@ again: | |||
2437 | if (ret == DLM_NORMAL) { | 2492 | if (ret == DLM_NORMAL) { |
2438 | mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", | 2493 | mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", |
2439 | dlm->name, dlm->node_num); | 2494 | dlm->name, dlm->node_num); |
2440 | 2495 | ||
2441 | /* got the EX lock. check to see if another node | 2496 | /* got the EX lock. check to see if another node |
2442 | * just became the reco master */ | 2497 | * just became the reco master */ |
2443 | if (dlm_reco_master_ready(dlm)) { | 2498 | if (dlm_reco_master_ready(dlm)) { |
2444 | mlog(0, "%s: got reco EX lock, but %u will " | 2499 | mlog(0, "%s: got reco EX lock, but %u will " |
@@ -2451,12 +2506,12 @@ again: | |||
2451 | /* see if recovery was already finished elsewhere */ | 2506 | /* see if recovery was already finished elsewhere */ |
2452 | spin_lock(&dlm->spinlock); | 2507 | spin_lock(&dlm->spinlock); |
2453 | if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { | 2508 | if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { |
2454 | status = -EINVAL; | 2509 | status = -EINVAL; |
2455 | mlog(0, "%s: got reco EX lock, but " | 2510 | mlog(0, "%s: got reco EX lock, but " |
2456 | "node got recovered already\n", dlm->name); | 2511 | "node got recovered already\n", dlm->name); |
2457 | if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { | 2512 | if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { |
2458 | mlog(ML_ERROR, "%s: new master is %u " | 2513 | mlog(ML_ERROR, "%s: new master is %u " |
2459 | "but no dead node!\n", | 2514 | "but no dead node!\n", |
2460 | dlm->name, dlm->reco.new_master); | 2515 | dlm->name, dlm->reco.new_master); |
2461 | BUG(); | 2516 | BUG(); |
2462 | } | 2517 | } |
@@ -2468,7 +2523,7 @@ again: | |||
2468 | * set the master and send the messages to begin recovery */ | 2523 | * set the master and send the messages to begin recovery */ |
2469 | if (!status) { | 2524 | if (!status) { |
2470 | mlog(0, "%s: dead=%u, this=%u, sending " | 2525 | mlog(0, "%s: dead=%u, this=%u, sending " |
2471 | "begin_reco now\n", dlm->name, | 2526 | "begin_reco now\n", dlm->name, |
2472 | dlm->reco.dead_node, dlm->node_num); | 2527 | dlm->reco.dead_node, dlm->node_num); |
2473 | status = dlm_send_begin_reco_message(dlm, | 2528 | status = dlm_send_begin_reco_message(dlm, |
2474 | dlm->reco.dead_node); | 2529 | dlm->reco.dead_node); |
@@ -2501,7 +2556,7 @@ again: | |||
2501 | mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", | 2556 | mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", |
2502 | dlm->name, dlm->node_num); | 2557 | dlm->name, dlm->node_num); |
2503 | /* another node is master. wait on | 2558 | /* another node is master. wait on |
2504 | * reco.new_master != O2NM_INVALID_NODE_NUM | 2559 | * reco.new_master != O2NM_INVALID_NODE_NUM |
2505 | * for at most one second */ | 2560 | * for at most one second */ |
2506 | wait_event_timeout(dlm->dlm_reco_thread_wq, | 2561 | wait_event_timeout(dlm->dlm_reco_thread_wq, |
2507 | dlm_reco_master_ready(dlm), | 2562 | dlm_reco_master_ready(dlm), |
@@ -2589,9 +2644,23 @@ retry: | |||
2589 | "begin reco msg (%d)\n", dlm->name, nodenum, ret); | 2644 | "begin reco msg (%d)\n", dlm->name, nodenum, ret); |
2590 | ret = 0; | 2645 | ret = 0; |
2591 | } | 2646 | } |
2647 | |||
2648 | /* | ||
2649 | * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8, | ||
2650 | * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN. | ||
2651 | * We are handling both for compatibility reasons. | ||
2652 | */ | ||
2653 | if (ret == -EAGAIN || ret == EAGAIN) { | ||
2654 | mlog(0, "%s: trying to start recovery of node " | ||
2655 | "%u, but node %u is waiting for last recovery " | ||
2656 | "to complete, backoff for a bit\n", dlm->name, | ||
2657 | dead_node, nodenum); | ||
2658 | msleep(100); | ||
2659 | goto retry; | ||
2660 | } | ||
2592 | if (ret < 0) { | 2661 | if (ret < 0) { |
2593 | struct dlm_lock_resource *res; | 2662 | struct dlm_lock_resource *res; |
2594 | /* this is now a serious problem, possibly ENOMEM | 2663 | /* this is now a serious problem, possibly ENOMEM |
2595 | * in the network stack. must retry */ | 2664 | * in the network stack. must retry */ |
2596 | mlog_errno(ret); | 2665 | mlog_errno(ret); |
2597 | mlog(ML_ERROR, "begin reco of dlm %s to node %u " | 2666 | mlog(ML_ERROR, "begin reco of dlm %s to node %u " |
@@ -2604,18 +2673,10 @@ retry: | |||
2604 | } else { | 2673 | } else { |
2605 | mlog(ML_ERROR, "recovery lock not found\n"); | 2674 | mlog(ML_ERROR, "recovery lock not found\n"); |
2606 | } | 2675 | } |
2607 | /* sleep for a bit in hopes that we can avoid | 2676 | /* sleep for a bit in hopes that we can avoid |
2608 | * another ENOMEM */ | 2677 | * another ENOMEM */ |
2609 | msleep(100); | 2678 | msleep(100); |
2610 | goto retry; | 2679 | goto retry; |
2611 | } else if (ret == EAGAIN) { | ||
2612 | mlog(0, "%s: trying to start recovery of node " | ||
2613 | "%u, but node %u is waiting for last recovery " | ||
2614 | "to complete, backoff for a bit\n", dlm->name, | ||
2615 | dead_node, nodenum); | ||
2616 | /* TODO Look into replacing msleep with cond_resched() */ | ||
2617 | msleep(100); | ||
2618 | goto retry; | ||
2619 | } | 2680 | } |
2620 | } | 2681 | } |
2621 | 2682 | ||
@@ -2639,7 +2700,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2639 | dlm->name, br->node_idx, br->dead_node, | 2700 | dlm->name, br->node_idx, br->dead_node, |
2640 | dlm->reco.dead_node, dlm->reco.new_master); | 2701 | dlm->reco.dead_node, dlm->reco.new_master); |
2641 | spin_unlock(&dlm->spinlock); | 2702 | spin_unlock(&dlm->spinlock); |
2642 | return EAGAIN; | 2703 | return -EAGAIN; |
2643 | } | 2704 | } |
2644 | spin_unlock(&dlm->spinlock); | 2705 | spin_unlock(&dlm->spinlock); |
2645 | 2706 | ||
@@ -2664,7 +2725,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2664 | } | 2725 | } |
2665 | if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { | 2726 | if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { |
2666 | mlog(ML_NOTICE, "%s: dead_node previously set to %u, " | 2727 | mlog(ML_NOTICE, "%s: dead_node previously set to %u, " |
2667 | "node %u changing it to %u\n", dlm->name, | 2728 | "node %u changing it to %u\n", dlm->name, |
2668 | dlm->reco.dead_node, br->node_idx, br->dead_node); | 2729 | dlm->reco.dead_node, br->node_idx, br->dead_node); |
2669 | } | 2730 | } |
2670 | dlm_set_reco_master(dlm, br->node_idx); | 2731 | dlm_set_reco_master(dlm, br->node_idx); |
@@ -2730,8 +2791,8 @@ stage2: | |||
2730 | if (ret < 0) { | 2791 | if (ret < 0) { |
2731 | mlog_errno(ret); | 2792 | mlog_errno(ret); |
2732 | if (dlm_is_host_down(ret)) { | 2793 | if (dlm_is_host_down(ret)) { |
2733 | /* this has no effect on this recovery | 2794 | /* this has no effect on this recovery |
2734 | * session, so set the status to zero to | 2795 | * session, so set the status to zero to |
2735 | * finish out the last recovery */ | 2796 | * finish out the last recovery */ |
2736 | mlog(ML_ERROR, "node %u went down after this " | 2797 | mlog(ML_ERROR, "node %u went down after this " |
2737 | "node finished recovery.\n", nodenum); | 2798 | "node finished recovery.\n", nodenum); |
@@ -2768,7 +2829,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2768 | mlog(0, "%s: node %u finalizing recovery stage%d of " | 2829 | mlog(0, "%s: node %u finalizing recovery stage%d of " |
2769 | "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, | 2830 | "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, |
2770 | fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); | 2831 | fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); |
2771 | 2832 | ||
2772 | spin_lock(&dlm->spinlock); | 2833 | spin_lock(&dlm->spinlock); |
2773 | 2834 | ||
2774 | if (dlm->reco.new_master != fr->node_idx) { | 2835 | if (dlm->reco.new_master != fr->node_idx) { |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 52ec020ea78b..11a6d1fd1d35 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | ||
32 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
33 | #include <linux/init.h> | 32 | #include <linux/init.h> |
34 | #include <linux/sysctl.h> | 33 | #include <linux/sysctl.h> |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 00f53b2aea76..b47c1b92b82b 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | ||
32 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
33 | #include <linux/init.h> | 32 | #include <linux/init.h> |
34 | #include <linux/sysctl.h> | 33 | #include <linux/sysctl.h> |
@@ -190,8 +189,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
190 | actions &= ~(DLM_UNLOCK_REMOVE_LOCK| | 189 | actions &= ~(DLM_UNLOCK_REMOVE_LOCK| |
191 | DLM_UNLOCK_REGRANT_LOCK| | 190 | DLM_UNLOCK_REGRANT_LOCK| |
192 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); | 191 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); |
193 | } else if (status == DLM_RECOVERING || | 192 | } else if (status == DLM_RECOVERING || |
194 | status == DLM_MIGRATING || | 193 | status == DLM_MIGRATING || |
195 | status == DLM_FORWARD) { | 194 | status == DLM_FORWARD) { |
196 | /* must clear the actions because this unlock | 195 | /* must clear the actions because this unlock |
197 | * is about to be retried. cannot free or do | 196 | * is about to be retried. cannot free or do |
@@ -661,14 +660,14 @@ retry: | |||
661 | if (call_ast) { | 660 | if (call_ast) { |
662 | mlog(0, "calling unlockast(%p, %d)\n", data, status); | 661 | mlog(0, "calling unlockast(%p, %d)\n", data, status); |
663 | if (is_master) { | 662 | if (is_master) { |
664 | /* it is possible that there is one last bast | 663 | /* it is possible that there is one last bast |
665 | * pending. make sure it is flushed, then | 664 | * pending. make sure it is flushed, then |
666 | * call the unlockast. | 665 | * call the unlockast. |
667 | * not an issue if this is a mastered remotely, | 666 | * not an issue if this is a mastered remotely, |
668 | * since this lock has been removed from the | 667 | * since this lock has been removed from the |
669 | * lockres queues and cannot be found. */ | 668 | * lockres queues and cannot be found. */ |
670 | dlm_kick_thread(dlm, NULL); | 669 | dlm_kick_thread(dlm, NULL); |
671 | wait_event(dlm->ast_wq, | 670 | wait_event(dlm->ast_wq, |
672 | dlm_lock_basts_flushed(dlm, lock)); | 671 | dlm_lock_basts_flushed(dlm, lock)); |
673 | } | 672 | } |
674 | (*unlockast)(data, status); | 673 | (*unlockast)(data, status); |
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile new file mode 100644 index 000000000000..df69b4856d0d --- /dev/null +++ b/fs/ocfs2/dlmfs/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | EXTRA_CFLAGS += -Ifs/ocfs2 | ||
2 | |||
3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o | ||
4 | |||
5 | ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o | ||
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 02bf17808bdc..b83d6107a1f5 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -43,24 +43,17 @@ | |||
43 | #include <linux/init.h> | 43 | #include <linux/init.h> |
44 | #include <linux/string.h> | 44 | #include <linux/string.h> |
45 | #include <linux/backing-dev.h> | 45 | #include <linux/backing-dev.h> |
46 | #include <linux/poll.h> | ||
46 | 47 | ||
47 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
48 | 49 | ||
49 | 50 | #include "stackglue.h" | |
50 | #include "cluster/nodemanager.h" | ||
51 | #include "cluster/heartbeat.h" | ||
52 | #include "cluster/tcp.h" | ||
53 | |||
54 | #include "dlmapi.h" | ||
55 | |||
56 | #include "userdlm.h" | 51 | #include "userdlm.h" |
57 | |||
58 | #include "dlmfsver.h" | 52 | #include "dlmfsver.h" |
59 | 53 | ||
60 | #define MLOG_MASK_PREFIX ML_DLMFS | 54 | #define MLOG_MASK_PREFIX ML_DLMFS |
61 | #include "cluster/masklog.h" | 55 | #include "cluster/masklog.h" |
62 | 56 | ||
63 | #include "ocfs2_lockingver.h" | ||
64 | 57 | ||
65 | static const struct super_operations dlmfs_ops; | 58 | static const struct super_operations dlmfs_ops; |
66 | static const struct file_operations dlmfs_file_operations; | 59 | static const struct file_operations dlmfs_file_operations; |
@@ -71,15 +64,46 @@ static struct kmem_cache *dlmfs_inode_cache; | |||
71 | 64 | ||
72 | struct workqueue_struct *user_dlm_worker; | 65 | struct workqueue_struct *user_dlm_worker; |
73 | 66 | ||
67 | |||
68 | |||
74 | /* | 69 | /* |
75 | * This is the userdlmfs locking protocol version. | 70 | * These are the ABI capabilities of dlmfs. |
71 | * | ||
72 | * Over time, dlmfs has added some features that were not part of the | ||
73 | * initial ABI. Unfortunately, some of these features are not detectable | ||
74 | * via standard usage. For example, Linux's default poll always returns | ||
75 | * POLLIN, so there is no way for a caller of poll(2) to know when dlmfs | ||
76 | * added poll support. Instead, we provide this list of new capabilities. | ||
77 | * | ||
78 | * Capabilities is a read-only attribute. We do it as a module parameter | ||
79 | * so we can discover it whether dlmfs is built in, loaded, or even not | ||
80 | * loaded. | ||
76 | * | 81 | * |
77 | * See fs/ocfs2/dlmglue.c for more details on locking versions. | 82 | * The ABI features are local to this machine's dlmfs mount. This is |
83 | * distinct from the locking protocol, which is concerned with inter-node | ||
84 | * interaction. | ||
85 | * | ||
86 | * Capabilities: | ||
87 | * - bast : POLLIN against the file descriptor of a held lock | ||
88 | * signifies a bast fired on the lock. | ||
78 | */ | 89 | */ |
79 | static const struct dlm_protocol_version user_locking_protocol = { | 90 | #define DLMFS_CAPABILITIES "bast stackglue" |
80 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | 91 | extern int param_set_dlmfs_capabilities(const char *val, |
81 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | 92 | struct kernel_param *kp) |
82 | }; | 93 | { |
94 | printk(KERN_ERR "%s: readonly parameter\n", kp->name); | ||
95 | return -EINVAL; | ||
96 | } | ||
97 | static int param_get_dlmfs_capabilities(char *buffer, | ||
98 | struct kernel_param *kp) | ||
99 | { | ||
100 | return strlcpy(buffer, DLMFS_CAPABILITIES, | ||
101 | strlen(DLMFS_CAPABILITIES) + 1); | ||
102 | } | ||
103 | module_param_call(capabilities, param_set_dlmfs_capabilities, | ||
104 | param_get_dlmfs_capabilities, NULL, 0444); | ||
105 | MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); | ||
106 | |||
83 | 107 | ||
84 | /* | 108 | /* |
85 | * decodes a set of open flags into a valid lock level and a set of flags. | 109 | * decodes a set of open flags into a valid lock level and a set of flags. |
@@ -88,20 +112,20 @@ static const struct dlm_protocol_version user_locking_protocol = { | |||
88 | * O_RDONLY -> PRMODE level | 112 | * O_RDONLY -> PRMODE level |
89 | * O_WRONLY -> EXMODE level | 113 | * O_WRONLY -> EXMODE level |
90 | * | 114 | * |
91 | * O_NONBLOCK -> LKM_NOQUEUE | 115 | * O_NONBLOCK -> NOQUEUE |
92 | */ | 116 | */ |
93 | static int dlmfs_decode_open_flags(int open_flags, | 117 | static int dlmfs_decode_open_flags(int open_flags, |
94 | int *level, | 118 | int *level, |
95 | int *flags) | 119 | int *flags) |
96 | { | 120 | { |
97 | if (open_flags & (O_WRONLY|O_RDWR)) | 121 | if (open_flags & (O_WRONLY|O_RDWR)) |
98 | *level = LKM_EXMODE; | 122 | *level = DLM_LOCK_EX; |
99 | else | 123 | else |
100 | *level = LKM_PRMODE; | 124 | *level = DLM_LOCK_PR; |
101 | 125 | ||
102 | *flags = 0; | 126 | *flags = 0; |
103 | if (open_flags & O_NONBLOCK) | 127 | if (open_flags & O_NONBLOCK) |
104 | *flags |= LKM_NOQUEUE; | 128 | *flags |= DLM_LKF_NOQUEUE; |
105 | 129 | ||
106 | return 0; | 130 | return 0; |
107 | } | 131 | } |
@@ -142,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode, | |||
142 | * to be able userspace to be able to distinguish a | 166 | * to be able userspace to be able to distinguish a |
143 | * valid lock request from one that simply couldn't be | 167 | * valid lock request from one that simply couldn't be |
144 | * granted. */ | 168 | * granted. */ |
145 | if (flags & LKM_NOQUEUE && status == -EAGAIN) | 169 | if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN) |
146 | status = -ETXTBSY; | 170 | status = -ETXTBSY; |
147 | kfree(fp); | 171 | kfree(fp); |
148 | goto bail; | 172 | goto bail; |
@@ -169,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode, | |||
169 | status = 0; | 193 | status = 0; |
170 | if (fp) { | 194 | if (fp) { |
171 | level = fp->fp_lock_level; | 195 | level = fp->fp_lock_level; |
172 | if (level != LKM_IVMODE) | 196 | if (level != DLM_LOCK_IV) |
173 | user_dlm_cluster_unlock(&ip->ip_lockres, level); | 197 | user_dlm_cluster_unlock(&ip->ip_lockres, level); |
174 | 198 | ||
175 | kfree(fp); | 199 | kfree(fp); |
@@ -179,13 +203,46 @@ static int dlmfs_file_release(struct inode *inode, | |||
179 | return 0; | 203 | return 0; |
180 | } | 204 | } |
181 | 205 | ||
206 | /* | ||
207 | * We do ->setattr() just to override size changes. Our size is the size | ||
208 | * of the LVB and nothing else. | ||
209 | */ | ||
210 | static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr) | ||
211 | { | ||
212 | int error; | ||
213 | struct inode *inode = dentry->d_inode; | ||
214 | |||
215 | attr->ia_valid &= ~ATTR_SIZE; | ||
216 | error = inode_change_ok(inode, attr); | ||
217 | if (!error) | ||
218 | error = inode_setattr(inode, attr); | ||
219 | |||
220 | return error; | ||
221 | } | ||
222 | |||
223 | static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) | ||
224 | { | ||
225 | int event = 0; | ||
226 | struct inode *inode = file->f_path.dentry->d_inode; | ||
227 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | ||
228 | |||
229 | poll_wait(file, &ip->ip_lockres.l_event, wait); | ||
230 | |||
231 | spin_lock(&ip->ip_lockres.l_lock); | ||
232 | if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) | ||
233 | event = POLLIN | POLLRDNORM; | ||
234 | spin_unlock(&ip->ip_lockres.l_lock); | ||
235 | |||
236 | return event; | ||
237 | } | ||
238 | |||
182 | static ssize_t dlmfs_file_read(struct file *filp, | 239 | static ssize_t dlmfs_file_read(struct file *filp, |
183 | char __user *buf, | 240 | char __user *buf, |
184 | size_t count, | 241 | size_t count, |
185 | loff_t *ppos) | 242 | loff_t *ppos) |
186 | { | 243 | { |
187 | int bytes_left; | 244 | int bytes_left; |
188 | ssize_t readlen; | 245 | ssize_t readlen, got; |
189 | char *lvb_buf; | 246 | char *lvb_buf; |
190 | struct inode *inode = filp->f_path.dentry->d_inode; | 247 | struct inode *inode = filp->f_path.dentry->d_inode; |
191 | 248 | ||
@@ -205,15 +262,19 @@ static ssize_t dlmfs_file_read(struct file *filp, | |||
205 | if ((count + *ppos) > i_size_read(inode)) | 262 | if ((count + *ppos) > i_size_read(inode)) |
206 | readlen = i_size_read(inode) - *ppos; | 263 | readlen = i_size_read(inode) - *ppos; |
207 | else | 264 | else |
208 | readlen = count - *ppos; | 265 | readlen = count; |
209 | 266 | ||
210 | lvb_buf = kmalloc(readlen, GFP_NOFS); | 267 | lvb_buf = kmalloc(readlen, GFP_NOFS); |
211 | if (!lvb_buf) | 268 | if (!lvb_buf) |
212 | return -ENOMEM; | 269 | return -ENOMEM; |
213 | 270 | ||
214 | user_dlm_read_lvb(inode, lvb_buf, readlen); | 271 | got = user_dlm_read_lvb(inode, lvb_buf, readlen); |
215 | bytes_left = __copy_to_user(buf, lvb_buf, readlen); | 272 | if (got) { |
216 | readlen -= bytes_left; | 273 | BUG_ON(got != readlen); |
274 | bytes_left = __copy_to_user(buf, lvb_buf, readlen); | ||
275 | readlen -= bytes_left; | ||
276 | } else | ||
277 | readlen = 0; | ||
217 | 278 | ||
218 | kfree(lvb_buf); | 279 | kfree(lvb_buf); |
219 | 280 | ||
@@ -272,7 +333,7 @@ static void dlmfs_init_once(void *foo) | |||
272 | struct dlmfs_inode_private *ip = | 333 | struct dlmfs_inode_private *ip = |
273 | (struct dlmfs_inode_private *) foo; | 334 | (struct dlmfs_inode_private *) foo; |
274 | 335 | ||
275 | ip->ip_dlm = NULL; | 336 | ip->ip_conn = NULL; |
276 | ip->ip_parent = NULL; | 337 | ip->ip_parent = NULL; |
277 | 338 | ||
278 | inode_init_once(&ip->ip_vfs_inode); | 339 | inode_init_once(&ip->ip_vfs_inode); |
@@ -314,14 +375,14 @@ static void dlmfs_clear_inode(struct inode *inode) | |||
314 | goto clear_fields; | 375 | goto clear_fields; |
315 | } | 376 | } |
316 | 377 | ||
317 | mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); | 378 | mlog(0, "we're a directory, ip->ip_conn = 0x%p\n", ip->ip_conn); |
318 | /* we must be a directory. If required, lets unregister the | 379 | /* we must be a directory. If required, lets unregister the |
319 | * dlm context now. */ | 380 | * dlm context now. */ |
320 | if (ip->ip_dlm) | 381 | if (ip->ip_conn) |
321 | user_dlm_unregister_context(ip->ip_dlm); | 382 | user_dlm_unregister(ip->ip_conn); |
322 | clear_fields: | 383 | clear_fields: |
323 | ip->ip_parent = NULL; | 384 | ip->ip_parent = NULL; |
324 | ip->ip_dlm = NULL; | 385 | ip->ip_conn = NULL; |
325 | } | 386 | } |
326 | 387 | ||
327 | static struct backing_dev_info dlmfs_backing_dev_info = { | 388 | static struct backing_dev_info dlmfs_backing_dev_info = { |
@@ -371,7 +432,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, | |||
371 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 432 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
372 | 433 | ||
373 | ip = DLMFS_I(inode); | 434 | ip = DLMFS_I(inode); |
374 | ip->ip_dlm = DLMFS_I(parent)->ip_dlm; | 435 | ip->ip_conn = DLMFS_I(parent)->ip_conn; |
375 | 436 | ||
376 | switch (mode & S_IFMT) { | 437 | switch (mode & S_IFMT) { |
377 | default: | 438 | default: |
@@ -425,13 +486,12 @@ static int dlmfs_mkdir(struct inode * dir, | |||
425 | struct inode *inode = NULL; | 486 | struct inode *inode = NULL; |
426 | struct qstr *domain = &dentry->d_name; | 487 | struct qstr *domain = &dentry->d_name; |
427 | struct dlmfs_inode_private *ip; | 488 | struct dlmfs_inode_private *ip; |
428 | struct dlm_ctxt *dlm; | 489 | struct ocfs2_cluster_connection *conn; |
429 | struct dlm_protocol_version proto = user_locking_protocol; | ||
430 | 490 | ||
431 | mlog(0, "mkdir %.*s\n", domain->len, domain->name); | 491 | mlog(0, "mkdir %.*s\n", domain->len, domain->name); |
432 | 492 | ||
433 | /* verify that we have a proper domain */ | 493 | /* verify that we have a proper domain */ |
434 | if (domain->len >= O2NM_MAX_NAME_LEN) { | 494 | if (domain->len >= GROUP_NAME_MAX) { |
435 | status = -EINVAL; | 495 | status = -EINVAL; |
436 | mlog(ML_ERROR, "invalid domain name for directory.\n"); | 496 | mlog(ML_ERROR, "invalid domain name for directory.\n"); |
437 | goto bail; | 497 | goto bail; |
@@ -446,14 +506,14 @@ static int dlmfs_mkdir(struct inode * dir, | |||
446 | 506 | ||
447 | ip = DLMFS_I(inode); | 507 | ip = DLMFS_I(inode); |
448 | 508 | ||
449 | dlm = user_dlm_register_context(domain, &proto); | 509 | conn = user_dlm_register(domain); |
450 | if (IS_ERR(dlm)) { | 510 | if (IS_ERR(conn)) { |
451 | status = PTR_ERR(dlm); | 511 | status = PTR_ERR(conn); |
452 | mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", | 512 | mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", |
453 | status, domain->len, domain->name); | 513 | status, domain->len, domain->name); |
454 | goto bail; | 514 | goto bail; |
455 | } | 515 | } |
456 | ip->ip_dlm = dlm; | 516 | ip->ip_conn = conn; |
457 | 517 | ||
458 | inc_nlink(dir); | 518 | inc_nlink(dir); |
459 | d_instantiate(dentry, inode); | 519 | d_instantiate(dentry, inode); |
@@ -549,6 +609,7 @@ static int dlmfs_fill_super(struct super_block * sb, | |||
549 | static const struct file_operations dlmfs_file_operations = { | 609 | static const struct file_operations dlmfs_file_operations = { |
550 | .open = dlmfs_file_open, | 610 | .open = dlmfs_file_open, |
551 | .release = dlmfs_file_release, | 611 | .release = dlmfs_file_release, |
612 | .poll = dlmfs_file_poll, | ||
552 | .read = dlmfs_file_read, | 613 | .read = dlmfs_file_read, |
553 | .write = dlmfs_file_write, | 614 | .write = dlmfs_file_write, |
554 | }; | 615 | }; |
@@ -576,6 +637,7 @@ static const struct super_operations dlmfs_ops = { | |||
576 | 637 | ||
577 | static const struct inode_operations dlmfs_file_inode_operations = { | 638 | static const struct inode_operations dlmfs_file_inode_operations = { |
578 | .getattr = simple_getattr, | 639 | .getattr = simple_getattr, |
640 | .setattr = dlmfs_file_setattr, | ||
579 | }; | 641 | }; |
580 | 642 | ||
581 | static int dlmfs_get_sb(struct file_system_type *fs_type, | 643 | static int dlmfs_get_sb(struct file_system_type *fs_type, |
@@ -620,6 +682,7 @@ static int __init init_dlmfs_fs(void) | |||
620 | } | 682 | } |
621 | cleanup_worker = 1; | 683 | cleanup_worker = 1; |
622 | 684 | ||
685 | user_dlm_set_locking_protocol(); | ||
623 | status = register_filesystem(&dlmfs_fs_type); | 686 | status = register_filesystem(&dlmfs_fs_type); |
624 | bail: | 687 | bail: |
625 | if (status) { | 688 | if (status) { |
diff --git a/fs/ocfs2/dlm/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c index a733b3321f83..a733b3321f83 100644 --- a/fs/ocfs2/dlm/dlmfsver.c +++ b/fs/ocfs2/dlmfs/dlmfsver.c | |||
diff --git a/fs/ocfs2/dlm/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h index f35eadbed25c..f35eadbed25c 100644 --- a/fs/ocfs2/dlm/dlmfsver.h +++ b/fs/ocfs2/dlmfs/dlmfsver.h | |||
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c index 4cb1d3dae250..0499e3fb7bdb 100644 --- a/fs/ocfs2/dlm/userdlm.c +++ b/fs/ocfs2/dlmfs/userdlm.c | |||
@@ -34,18 +34,19 @@ | |||
34 | #include <linux/types.h> | 34 | #include <linux/types.h> |
35 | #include <linux/crc32.h> | 35 | #include <linux/crc32.h> |
36 | 36 | ||
37 | 37 | #include "ocfs2_lockingver.h" | |
38 | #include "cluster/nodemanager.h" | 38 | #include "stackglue.h" |
39 | #include "cluster/heartbeat.h" | ||
40 | #include "cluster/tcp.h" | ||
41 | |||
42 | #include "dlmapi.h" | ||
43 | |||
44 | #include "userdlm.h" | 39 | #include "userdlm.h" |
45 | 40 | ||
46 | #define MLOG_MASK_PREFIX ML_DLMFS | 41 | #define MLOG_MASK_PREFIX ML_DLMFS |
47 | #include "cluster/masklog.h" | 42 | #include "cluster/masklog.h" |
48 | 43 | ||
44 | |||
45 | static inline struct user_lock_res *user_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) | ||
46 | { | ||
47 | return container_of(lksb, struct user_lock_res, l_lksb); | ||
48 | } | ||
49 | |||
49 | static inline int user_check_wait_flag(struct user_lock_res *lockres, | 50 | static inline int user_check_wait_flag(struct user_lock_res *lockres, |
50 | int flag) | 51 | int flag) |
51 | { | 52 | { |
@@ -73,15 +74,15 @@ static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres) | |||
73 | } | 74 | } |
74 | 75 | ||
75 | /* I heart container_of... */ | 76 | /* I heart container_of... */ |
76 | static inline struct dlm_ctxt * | 77 | static inline struct ocfs2_cluster_connection * |
77 | dlm_ctxt_from_user_lockres(struct user_lock_res *lockres) | 78 | cluster_connection_from_user_lockres(struct user_lock_res *lockres) |
78 | { | 79 | { |
79 | struct dlmfs_inode_private *ip; | 80 | struct dlmfs_inode_private *ip; |
80 | 81 | ||
81 | ip = container_of(lockres, | 82 | ip = container_of(lockres, |
82 | struct dlmfs_inode_private, | 83 | struct dlmfs_inode_private, |
83 | ip_lockres); | 84 | ip_lockres); |
84 | return ip->ip_dlm; | 85 | return ip->ip_conn; |
85 | } | 86 | } |
86 | 87 | ||
87 | static struct inode * | 88 | static struct inode * |
@@ -103,9 +104,9 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) | |||
103 | } | 104 | } |
104 | 105 | ||
105 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ | 106 | #define user_log_dlm_error(_func, _stat, _lockres) do { \ |
106 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | 107 | mlog(ML_ERROR, "Dlm error %d while calling %s on " \ |
107 | "resource %.*s: %s\n", dlm_errname(_stat), _func, \ | 108 | "resource %.*s\n", _stat, _func, \ |
108 | _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \ | 109 | _lockres->l_namelen, _lockres->l_name); \ |
109 | } while (0) | 110 | } while (0) |
110 | 111 | ||
111 | /* WARNING: This function lives in a world where the only three lock | 112 | /* WARNING: This function lives in a world where the only three lock |
@@ -113,34 +114,35 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) | |||
113 | * lock types are added. */ | 114 | * lock types are added. */ |
114 | static inline int user_highest_compat_lock_level(int level) | 115 | static inline int user_highest_compat_lock_level(int level) |
115 | { | 116 | { |
116 | int new_level = LKM_EXMODE; | 117 | int new_level = DLM_LOCK_EX; |
117 | 118 | ||
118 | if (level == LKM_EXMODE) | 119 | if (level == DLM_LOCK_EX) |
119 | new_level = LKM_NLMODE; | 120 | new_level = DLM_LOCK_NL; |
120 | else if (level == LKM_PRMODE) | 121 | else if (level == DLM_LOCK_PR) |
121 | new_level = LKM_PRMODE; | 122 | new_level = DLM_LOCK_PR; |
122 | return new_level; | 123 | return new_level; |
123 | } | 124 | } |
124 | 125 | ||
125 | static void user_ast(void *opaque) | 126 | static void user_ast(struct ocfs2_dlm_lksb *lksb) |
126 | { | 127 | { |
127 | struct user_lock_res *lockres = opaque; | 128 | struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); |
128 | struct dlm_lockstatus *lksb; | 129 | int status; |
129 | 130 | ||
130 | mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen, | 131 | mlog(ML_BASTS, "AST fired for lockres %.*s, level %d => %d\n", |
131 | lockres->l_name); | 132 | lockres->l_namelen, lockres->l_name, lockres->l_level, |
133 | lockres->l_requested); | ||
132 | 134 | ||
133 | spin_lock(&lockres->l_lock); | 135 | spin_lock(&lockres->l_lock); |
134 | 136 | ||
135 | lksb = &(lockres->l_lksb); | 137 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); |
136 | if (lksb->status != DLM_NORMAL) { | 138 | if (status) { |
137 | mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", | 139 | mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", |
138 | lksb->status, lockres->l_namelen, lockres->l_name); | 140 | status, lockres->l_namelen, lockres->l_name); |
139 | spin_unlock(&lockres->l_lock); | 141 | spin_unlock(&lockres->l_lock); |
140 | return; | 142 | return; |
141 | } | 143 | } |
142 | 144 | ||
143 | mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, | 145 | mlog_bug_on_msg(lockres->l_requested == DLM_LOCK_IV, |
144 | "Lockres %.*s, requested ivmode. flags 0x%x\n", | 146 | "Lockres %.*s, requested ivmode. flags 0x%x\n", |
145 | lockres->l_namelen, lockres->l_name, lockres->l_flags); | 147 | lockres->l_namelen, lockres->l_name, lockres->l_flags); |
146 | 148 | ||
@@ -148,13 +150,13 @@ static void user_ast(void *opaque) | |||
148 | if (lockres->l_requested < lockres->l_level) { | 150 | if (lockres->l_requested < lockres->l_level) { |
149 | if (lockres->l_requested <= | 151 | if (lockres->l_requested <= |
150 | user_highest_compat_lock_level(lockres->l_blocking)) { | 152 | user_highest_compat_lock_level(lockres->l_blocking)) { |
151 | lockres->l_blocking = LKM_NLMODE; | 153 | lockres->l_blocking = DLM_LOCK_NL; |
152 | lockres->l_flags &= ~USER_LOCK_BLOCKED; | 154 | lockres->l_flags &= ~USER_LOCK_BLOCKED; |
153 | } | 155 | } |
154 | } | 156 | } |
155 | 157 | ||
156 | lockres->l_level = lockres->l_requested; | 158 | lockres->l_level = lockres->l_requested; |
157 | lockres->l_requested = LKM_IVMODE; | 159 | lockres->l_requested = DLM_LOCK_IV; |
158 | lockres->l_flags |= USER_LOCK_ATTACHED; | 160 | lockres->l_flags |= USER_LOCK_ATTACHED; |
159 | lockres->l_flags &= ~USER_LOCK_BUSY; | 161 | lockres->l_flags &= ~USER_LOCK_BUSY; |
160 | 162 | ||
@@ -193,11 +195,11 @@ static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) | |||
193 | return; | 195 | return; |
194 | 196 | ||
195 | switch (lockres->l_blocking) { | 197 | switch (lockres->l_blocking) { |
196 | case LKM_EXMODE: | 198 | case DLM_LOCK_EX: |
197 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 199 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) |
198 | queue = 1; | 200 | queue = 1; |
199 | break; | 201 | break; |
200 | case LKM_PRMODE: | 202 | case DLM_LOCK_PR: |
201 | if (!lockres->l_ex_holders) | 203 | if (!lockres->l_ex_holders) |
202 | queue = 1; | 204 | queue = 1; |
203 | break; | 205 | break; |
@@ -209,12 +211,12 @@ static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) | |||
209 | __user_dlm_queue_lockres(lockres); | 211 | __user_dlm_queue_lockres(lockres); |
210 | } | 212 | } |
211 | 213 | ||
212 | static void user_bast(void *opaque, int level) | 214 | static void user_bast(struct ocfs2_dlm_lksb *lksb, int level) |
213 | { | 215 | { |
214 | struct user_lock_res *lockres = opaque; | 216 | struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); |
215 | 217 | ||
216 | mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n", | 218 | mlog(ML_BASTS, "BAST fired for lockres %.*s, blocking %d, level %d\n", |
217 | lockres->l_namelen, lockres->l_name, level); | 219 | lockres->l_namelen, lockres->l_name, level, lockres->l_level); |
218 | 220 | ||
219 | spin_lock(&lockres->l_lock); | 221 | spin_lock(&lockres->l_lock); |
220 | lockres->l_flags |= USER_LOCK_BLOCKED; | 222 | lockres->l_flags |= USER_LOCK_BLOCKED; |
@@ -227,15 +229,15 @@ static void user_bast(void *opaque, int level) | |||
227 | wake_up(&lockres->l_event); | 229 | wake_up(&lockres->l_event); |
228 | } | 230 | } |
229 | 231 | ||
230 | static void user_unlock_ast(void *opaque, enum dlm_status status) | 232 | static void user_unlock_ast(struct ocfs2_dlm_lksb *lksb, int status) |
231 | { | 233 | { |
232 | struct user_lock_res *lockres = opaque; | 234 | struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); |
233 | 235 | ||
234 | mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen, | 236 | mlog(ML_BASTS, "UNLOCK AST fired for lockres %.*s, flags 0x%x\n", |
235 | lockres->l_name); | 237 | lockres->l_namelen, lockres->l_name, lockres->l_flags); |
236 | 238 | ||
237 | if (status != DLM_NORMAL && status != DLM_CANCELGRANT) | 239 | if (status) |
238 | mlog(ML_ERROR, "Dlm returns status %d\n", status); | 240 | mlog(ML_ERROR, "dlm returns status %d\n", status); |
239 | 241 | ||
240 | spin_lock(&lockres->l_lock); | 242 | spin_lock(&lockres->l_lock); |
241 | /* The teardown flag gets set early during the unlock process, | 243 | /* The teardown flag gets set early during the unlock process, |
@@ -243,7 +245,7 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
243 | * for a concurrent cancel. */ | 245 | * for a concurrent cancel. */ |
244 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN | 246 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN |
245 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { | 247 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { |
246 | lockres->l_level = LKM_IVMODE; | 248 | lockres->l_level = DLM_LOCK_IV; |
247 | } else if (status == DLM_CANCELGRANT) { | 249 | } else if (status == DLM_CANCELGRANT) { |
248 | /* We tried to cancel a convert request, but it was | 250 | /* We tried to cancel a convert request, but it was |
249 | * already granted. Don't clear the busy flag - the | 251 | * already granted. Don't clear the busy flag - the |
@@ -254,7 +256,7 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
254 | } else { | 256 | } else { |
255 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | 257 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); |
256 | /* Cancel succeeded, we want to re-queue */ | 258 | /* Cancel succeeded, we want to re-queue */ |
257 | lockres->l_requested = LKM_IVMODE; /* cancel an | 259 | lockres->l_requested = DLM_LOCK_IV; /* cancel an |
258 | * upconvert | 260 | * upconvert |
259 | * request. */ | 261 | * request. */ |
260 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | 262 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; |
@@ -271,6 +273,21 @@ out_noclear: | |||
271 | wake_up(&lockres->l_event); | 273 | wake_up(&lockres->l_event); |
272 | } | 274 | } |
273 | 275 | ||
276 | /* | ||
277 | * This is the userdlmfs locking protocol version. | ||
278 | * | ||
279 | * See fs/ocfs2/dlmglue.c for more details on locking versions. | ||
280 | */ | ||
281 | static struct ocfs2_locking_protocol user_dlm_lproto = { | ||
282 | .lp_max_version = { | ||
283 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
284 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
285 | }, | ||
286 | .lp_lock_ast = user_ast, | ||
287 | .lp_blocking_ast = user_bast, | ||
288 | .lp_unlock_ast = user_unlock_ast, | ||
289 | }; | ||
290 | |||
274 | static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) | 291 | static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) |
275 | { | 292 | { |
276 | struct inode *inode; | 293 | struct inode *inode; |
@@ -283,10 +300,10 @@ static void user_dlm_unblock_lock(struct work_struct *work) | |||
283 | int new_level, status; | 300 | int new_level, status; |
284 | struct user_lock_res *lockres = | 301 | struct user_lock_res *lockres = |
285 | container_of(work, struct user_lock_res, l_work); | 302 | container_of(work, struct user_lock_res, l_work); |
286 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | 303 | struct ocfs2_cluster_connection *conn = |
304 | cluster_connection_from_user_lockres(lockres); | ||
287 | 305 | ||
288 | mlog(0, "processing lockres %.*s\n", lockres->l_namelen, | 306 | mlog(0, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); |
289 | lockres->l_name); | ||
290 | 307 | ||
291 | spin_lock(&lockres->l_lock); | 308 | spin_lock(&lockres->l_lock); |
292 | 309 | ||
@@ -304,17 +321,23 @@ static void user_dlm_unblock_lock(struct work_struct *work) | |||
304 | * flag, and finally we might get another bast which re-queues | 321 | * flag, and finally we might get another bast which re-queues |
305 | * us before our ast for the downconvert is called. */ | 322 | * us before our ast for the downconvert is called. */ |
306 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { | 323 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { |
324 | mlog(ML_BASTS, "lockres %.*s USER_LOCK_BLOCKED\n", | ||
325 | lockres->l_namelen, lockres->l_name); | ||
307 | spin_unlock(&lockres->l_lock); | 326 | spin_unlock(&lockres->l_lock); |
308 | goto drop_ref; | 327 | goto drop_ref; |
309 | } | 328 | } |
310 | 329 | ||
311 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | 330 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { |
331 | mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_TEARDOWN\n", | ||
332 | lockres->l_namelen, lockres->l_name); | ||
312 | spin_unlock(&lockres->l_lock); | 333 | spin_unlock(&lockres->l_lock); |
313 | goto drop_ref; | 334 | goto drop_ref; |
314 | } | 335 | } |
315 | 336 | ||
316 | if (lockres->l_flags & USER_LOCK_BUSY) { | 337 | if (lockres->l_flags & USER_LOCK_BUSY) { |
317 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { | 338 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { |
339 | mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_CANCEL\n", | ||
340 | lockres->l_namelen, lockres->l_name); | ||
318 | spin_unlock(&lockres->l_lock); | 341 | spin_unlock(&lockres->l_lock); |
319 | goto drop_ref; | 342 | goto drop_ref; |
320 | } | 343 | } |
@@ -322,32 +345,31 @@ static void user_dlm_unblock_lock(struct work_struct *work) | |||
322 | lockres->l_flags |= USER_LOCK_IN_CANCEL; | 345 | lockres->l_flags |= USER_LOCK_IN_CANCEL; |
323 | spin_unlock(&lockres->l_lock); | 346 | spin_unlock(&lockres->l_lock); |
324 | 347 | ||
325 | status = dlmunlock(dlm, | 348 | status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, |
326 | &lockres->l_lksb, | 349 | DLM_LKF_CANCEL); |
327 | LKM_CANCEL, | 350 | if (status) |
328 | user_unlock_ast, | 351 | user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); |
329 | lockres); | ||
330 | if (status != DLM_NORMAL) | ||
331 | user_log_dlm_error("dlmunlock", status, lockres); | ||
332 | goto drop_ref; | 352 | goto drop_ref; |
333 | } | 353 | } |
334 | 354 | ||
335 | /* If there are still incompat holders, we can exit safely | 355 | /* If there are still incompat holders, we can exit safely |
336 | * without worrying about re-queueing this lock as that will | 356 | * without worrying about re-queueing this lock as that will |
337 | * happen on the last call to user_cluster_unlock. */ | 357 | * happen on the last call to user_cluster_unlock. */ |
338 | if ((lockres->l_blocking == LKM_EXMODE) | 358 | if ((lockres->l_blocking == DLM_LOCK_EX) |
339 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | 359 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { |
340 | spin_unlock(&lockres->l_lock); | 360 | spin_unlock(&lockres->l_lock); |
341 | mlog(0, "can't downconvert for ex: ro = %u, ex = %u\n", | 361 | mlog(ML_BASTS, "lockres %.*s, EX/PR Holders %u,%u\n", |
342 | lockres->l_ro_holders, lockres->l_ex_holders); | 362 | lockres->l_namelen, lockres->l_name, |
363 | lockres->l_ex_holders, lockres->l_ro_holders); | ||
343 | goto drop_ref; | 364 | goto drop_ref; |
344 | } | 365 | } |
345 | 366 | ||
346 | if ((lockres->l_blocking == LKM_PRMODE) | 367 | if ((lockres->l_blocking == DLM_LOCK_PR) |
347 | && lockres->l_ex_holders) { | 368 | && lockres->l_ex_holders) { |
348 | spin_unlock(&lockres->l_lock); | 369 | spin_unlock(&lockres->l_lock); |
349 | mlog(0, "can't downconvert for pr: ex = %u\n", | 370 | mlog(ML_BASTS, "lockres %.*s, EX Holders %u\n", |
350 | lockres->l_ex_holders); | 371 | lockres->l_namelen, lockres->l_name, |
372 | lockres->l_ex_holders); | ||
351 | goto drop_ref; | 373 | goto drop_ref; |
352 | } | 374 | } |
353 | 375 | ||
@@ -355,22 +377,17 @@ static void user_dlm_unblock_lock(struct work_struct *work) | |||
355 | new_level = user_highest_compat_lock_level(lockres->l_blocking); | 377 | new_level = user_highest_compat_lock_level(lockres->l_blocking); |
356 | lockres->l_requested = new_level; | 378 | lockres->l_requested = new_level; |
357 | lockres->l_flags |= USER_LOCK_BUSY; | 379 | lockres->l_flags |= USER_LOCK_BUSY; |
358 | mlog(0, "Downconvert lock from %d to %d\n", | 380 | mlog(ML_BASTS, "lockres %.*s, downconvert %d => %d\n", |
359 | lockres->l_level, new_level); | 381 | lockres->l_namelen, lockres->l_name, lockres->l_level, new_level); |
360 | spin_unlock(&lockres->l_lock); | 382 | spin_unlock(&lockres->l_lock); |
361 | 383 | ||
362 | /* need lock downconvert request now... */ | 384 | /* need lock downconvert request now... */ |
363 | status = dlmlock(dlm, | 385 | status = ocfs2_dlm_lock(conn, new_level, &lockres->l_lksb, |
364 | new_level, | 386 | DLM_LKF_CONVERT|DLM_LKF_VALBLK, |
365 | &lockres->l_lksb, | 387 | lockres->l_name, |
366 | LKM_CONVERT|LKM_VALBLK, | 388 | lockres->l_namelen); |
367 | lockres->l_name, | 389 | if (status) { |
368 | lockres->l_namelen, | 390 | user_log_dlm_error("ocfs2_dlm_lock", status, lockres); |
369 | user_ast, | ||
370 | lockres, | ||
371 | user_bast); | ||
372 | if (status != DLM_NORMAL) { | ||
373 | user_log_dlm_error("dlmlock", status, lockres); | ||
374 | user_recover_from_dlm_error(lockres); | 391 | user_recover_from_dlm_error(lockres); |
375 | } | 392 | } |
376 | 393 | ||
@@ -382,10 +399,10 @@ static inline void user_dlm_inc_holders(struct user_lock_res *lockres, | |||
382 | int level) | 399 | int level) |
383 | { | 400 | { |
384 | switch(level) { | 401 | switch(level) { |
385 | case LKM_EXMODE: | 402 | case DLM_LOCK_EX: |
386 | lockres->l_ex_holders++; | 403 | lockres->l_ex_holders++; |
387 | break; | 404 | break; |
388 | case LKM_PRMODE: | 405 | case DLM_LOCK_PR: |
389 | lockres->l_ro_holders++; | 406 | lockres->l_ro_holders++; |
390 | break; | 407 | break; |
391 | default: | 408 | default: |
@@ -410,20 +427,19 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres, | |||
410 | int lkm_flags) | 427 | int lkm_flags) |
411 | { | 428 | { |
412 | int status, local_flags; | 429 | int status, local_flags; |
413 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | 430 | struct ocfs2_cluster_connection *conn = |
431 | cluster_connection_from_user_lockres(lockres); | ||
414 | 432 | ||
415 | if (level != LKM_EXMODE && | 433 | if (level != DLM_LOCK_EX && |
416 | level != LKM_PRMODE) { | 434 | level != DLM_LOCK_PR) { |
417 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", | 435 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", |
418 | lockres->l_namelen, lockres->l_name); | 436 | lockres->l_namelen, lockres->l_name); |
419 | status = -EINVAL; | 437 | status = -EINVAL; |
420 | goto bail; | 438 | goto bail; |
421 | } | 439 | } |
422 | 440 | ||
423 | mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n", | 441 | mlog(ML_BASTS, "lockres %.*s, level %d, flags = 0x%x\n", |
424 | lockres->l_namelen, lockres->l_name, | 442 | lockres->l_namelen, lockres->l_name, level, lkm_flags); |
425 | (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", | ||
426 | lkm_flags); | ||
427 | 443 | ||
428 | again: | 444 | again: |
429 | if (signal_pending(current)) { | 445 | if (signal_pending(current)) { |
@@ -457,35 +473,26 @@ again: | |||
457 | } | 473 | } |
458 | 474 | ||
459 | if (level > lockres->l_level) { | 475 | if (level > lockres->l_level) { |
460 | local_flags = lkm_flags | LKM_VALBLK; | 476 | local_flags = lkm_flags | DLM_LKF_VALBLK; |
461 | if (lockres->l_level != LKM_IVMODE) | 477 | if (lockres->l_level != DLM_LOCK_IV) |
462 | local_flags |= LKM_CONVERT; | 478 | local_flags |= DLM_LKF_CONVERT; |
463 | 479 | ||
464 | lockres->l_requested = level; | 480 | lockres->l_requested = level; |
465 | lockres->l_flags |= USER_LOCK_BUSY; | 481 | lockres->l_flags |= USER_LOCK_BUSY; |
466 | spin_unlock(&lockres->l_lock); | 482 | spin_unlock(&lockres->l_lock); |
467 | 483 | ||
468 | BUG_ON(level == LKM_IVMODE); | 484 | BUG_ON(level == DLM_LOCK_IV); |
469 | BUG_ON(level == LKM_NLMODE); | 485 | BUG_ON(level == DLM_LOCK_NL); |
470 | 486 | ||
471 | /* call dlm_lock to upgrade lock now */ | 487 | /* call dlm_lock to upgrade lock now */ |
472 | status = dlmlock(dlm, | 488 | status = ocfs2_dlm_lock(conn, level, &lockres->l_lksb, |
473 | level, | 489 | local_flags, lockres->l_name, |
474 | &lockres->l_lksb, | 490 | lockres->l_namelen); |
475 | local_flags, | 491 | if (status) { |
476 | lockres->l_name, | 492 | if ((lkm_flags & DLM_LKF_NOQUEUE) && |
477 | lockres->l_namelen, | 493 | (status != -EAGAIN)) |
478 | user_ast, | 494 | user_log_dlm_error("ocfs2_dlm_lock", |
479 | lockres, | 495 | status, lockres); |
480 | user_bast); | ||
481 | if (status != DLM_NORMAL) { | ||
482 | if ((lkm_flags & LKM_NOQUEUE) && | ||
483 | (status == DLM_NOTQUEUED)) | ||
484 | status = -EAGAIN; | ||
485 | else { | ||
486 | user_log_dlm_error("dlmlock", status, lockres); | ||
487 | status = -EINVAL; | ||
488 | } | ||
489 | user_recover_from_dlm_error(lockres); | 496 | user_recover_from_dlm_error(lockres); |
490 | goto bail; | 497 | goto bail; |
491 | } | 498 | } |
@@ -506,11 +513,11 @@ static inline void user_dlm_dec_holders(struct user_lock_res *lockres, | |||
506 | int level) | 513 | int level) |
507 | { | 514 | { |
508 | switch(level) { | 515 | switch(level) { |
509 | case LKM_EXMODE: | 516 | case DLM_LOCK_EX: |
510 | BUG_ON(!lockres->l_ex_holders); | 517 | BUG_ON(!lockres->l_ex_holders); |
511 | lockres->l_ex_holders--; | 518 | lockres->l_ex_holders--; |
512 | break; | 519 | break; |
513 | case LKM_PRMODE: | 520 | case DLM_LOCK_PR: |
514 | BUG_ON(!lockres->l_ro_holders); | 521 | BUG_ON(!lockres->l_ro_holders); |
515 | lockres->l_ro_holders--; | 522 | lockres->l_ro_holders--; |
516 | break; | 523 | break; |
@@ -522,8 +529,8 @@ static inline void user_dlm_dec_holders(struct user_lock_res *lockres, | |||
522 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, | 529 | void user_dlm_cluster_unlock(struct user_lock_res *lockres, |
523 | int level) | 530 | int level) |
524 | { | 531 | { |
525 | if (level != LKM_EXMODE && | 532 | if (level != DLM_LOCK_EX && |
526 | level != LKM_PRMODE) { | 533 | level != DLM_LOCK_PR) { |
527 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", | 534 | mlog(ML_ERROR, "lockres %.*s: invalid request!\n", |
528 | lockres->l_namelen, lockres->l_name); | 535 | lockres->l_namelen, lockres->l_name); |
529 | return; | 536 | return; |
@@ -540,33 +547,40 @@ void user_dlm_write_lvb(struct inode *inode, | |||
540 | unsigned int len) | 547 | unsigned int len) |
541 | { | 548 | { |
542 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | 549 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; |
543 | char *lvb = lockres->l_lksb.lvb; | 550 | char *lvb; |
544 | 551 | ||
545 | BUG_ON(len > DLM_LVB_LEN); | 552 | BUG_ON(len > DLM_LVB_LEN); |
546 | 553 | ||
547 | spin_lock(&lockres->l_lock); | 554 | spin_lock(&lockres->l_lock); |
548 | 555 | ||
549 | BUG_ON(lockres->l_level < LKM_EXMODE); | 556 | BUG_ON(lockres->l_level < DLM_LOCK_EX); |
557 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | ||
550 | memcpy(lvb, val, len); | 558 | memcpy(lvb, val, len); |
551 | 559 | ||
552 | spin_unlock(&lockres->l_lock); | 560 | spin_unlock(&lockres->l_lock); |
553 | } | 561 | } |
554 | 562 | ||
555 | void user_dlm_read_lvb(struct inode *inode, | 563 | ssize_t user_dlm_read_lvb(struct inode *inode, |
556 | char *val, | 564 | char *val, |
557 | unsigned int len) | 565 | unsigned int len) |
558 | { | 566 | { |
559 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; | 567 | struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; |
560 | char *lvb = lockres->l_lksb.lvb; | 568 | char *lvb; |
569 | ssize_t ret = len; | ||
561 | 570 | ||
562 | BUG_ON(len > DLM_LVB_LEN); | 571 | BUG_ON(len > DLM_LVB_LEN); |
563 | 572 | ||
564 | spin_lock(&lockres->l_lock); | 573 | spin_lock(&lockres->l_lock); |
565 | 574 | ||
566 | BUG_ON(lockres->l_level < LKM_PRMODE); | 575 | BUG_ON(lockres->l_level < DLM_LOCK_PR); |
567 | memcpy(val, lvb, len); | 576 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)) { |
577 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | ||
578 | memcpy(val, lvb, len); | ||
579 | } else | ||
580 | ret = 0; | ||
568 | 581 | ||
569 | spin_unlock(&lockres->l_lock); | 582 | spin_unlock(&lockres->l_lock); |
583 | return ret; | ||
570 | } | 584 | } |
571 | 585 | ||
572 | void user_dlm_lock_res_init(struct user_lock_res *lockres, | 586 | void user_dlm_lock_res_init(struct user_lock_res *lockres, |
@@ -576,9 +590,9 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres, | |||
576 | 590 | ||
577 | spin_lock_init(&lockres->l_lock); | 591 | spin_lock_init(&lockres->l_lock); |
578 | init_waitqueue_head(&lockres->l_event); | 592 | init_waitqueue_head(&lockres->l_event); |
579 | lockres->l_level = LKM_IVMODE; | 593 | lockres->l_level = DLM_LOCK_IV; |
580 | lockres->l_requested = LKM_IVMODE; | 594 | lockres->l_requested = DLM_LOCK_IV; |
581 | lockres->l_blocking = LKM_IVMODE; | 595 | lockres->l_blocking = DLM_LOCK_IV; |
582 | 596 | ||
583 | /* should have been checked before getting here. */ | 597 | /* should have been checked before getting here. */ |
584 | BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); | 598 | BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); |
@@ -592,9 +606,10 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres, | |||
592 | int user_dlm_destroy_lock(struct user_lock_res *lockres) | 606 | int user_dlm_destroy_lock(struct user_lock_res *lockres) |
593 | { | 607 | { |
594 | int status = -EBUSY; | 608 | int status = -EBUSY; |
595 | struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); | 609 | struct ocfs2_cluster_connection *conn = |
610 | cluster_connection_from_user_lockres(lockres); | ||
596 | 611 | ||
597 | mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name); | 612 | mlog(ML_BASTS, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); |
598 | 613 | ||
599 | spin_lock(&lockres->l_lock); | 614 | spin_lock(&lockres->l_lock); |
600 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | 615 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { |
@@ -627,14 +642,9 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
627 | lockres->l_flags |= USER_LOCK_BUSY; | 642 | lockres->l_flags |= USER_LOCK_BUSY; |
628 | spin_unlock(&lockres->l_lock); | 643 | spin_unlock(&lockres->l_lock); |
629 | 644 | ||
630 | status = dlmunlock(dlm, | 645 | status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); |
631 | &lockres->l_lksb, | 646 | if (status) { |
632 | LKM_VALBLK, | 647 | user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); |
633 | user_unlock_ast, | ||
634 | lockres); | ||
635 | if (status != DLM_NORMAL) { | ||
636 | user_log_dlm_error("dlmunlock", status, lockres); | ||
637 | status = -EINVAL; | ||
638 | goto bail; | 648 | goto bail; |
639 | } | 649 | } |
640 | 650 | ||
@@ -645,32 +655,34 @@ bail: | |||
645 | return status; | 655 | return status; |
646 | } | 656 | } |
647 | 657 | ||
648 | struct dlm_ctxt *user_dlm_register_context(struct qstr *name, | 658 | static void user_dlm_recovery_handler_noop(int node_num, |
649 | struct dlm_protocol_version *proto) | 659 | void *recovery_data) |
650 | { | 660 | { |
651 | struct dlm_ctxt *dlm; | 661 | /* We ignore recovery events */ |
652 | u32 dlm_key; | 662 | return; |
653 | char *domain; | 663 | } |
654 | |||
655 | domain = kmalloc(name->len + 1, GFP_NOFS); | ||
656 | if (!domain) { | ||
657 | mlog_errno(-ENOMEM); | ||
658 | return ERR_PTR(-ENOMEM); | ||
659 | } | ||
660 | 664 | ||
661 | dlm_key = crc32_le(0, name->name, name->len); | 665 | void user_dlm_set_locking_protocol(void) |
666 | { | ||
667 | ocfs2_stack_glue_set_max_proto_version(&user_dlm_lproto.lp_max_version); | ||
668 | } | ||
662 | 669 | ||
663 | snprintf(domain, name->len + 1, "%.*s", name->len, name->name); | 670 | struct ocfs2_cluster_connection *user_dlm_register(struct qstr *name) |
671 | { | ||
672 | int rc; | ||
673 | struct ocfs2_cluster_connection *conn; | ||
664 | 674 | ||
665 | dlm = dlm_register_domain(domain, dlm_key, proto); | 675 | rc = ocfs2_cluster_connect_agnostic(name->name, name->len, |
666 | if (IS_ERR(dlm)) | 676 | &user_dlm_lproto, |
667 | mlog_errno(PTR_ERR(dlm)); | 677 | user_dlm_recovery_handler_noop, |
678 | NULL, &conn); | ||
679 | if (rc) | ||
680 | mlog_errno(rc); | ||
668 | 681 | ||
669 | kfree(domain); | 682 | return rc ? ERR_PTR(rc) : conn; |
670 | return dlm; | ||
671 | } | 683 | } |
672 | 684 | ||
673 | void user_dlm_unregister_context(struct dlm_ctxt *dlm) | 685 | void user_dlm_unregister(struct ocfs2_cluster_connection *conn) |
674 | { | 686 | { |
675 | dlm_unregister_domain(dlm); | 687 | ocfs2_cluster_disconnect(conn, 0); |
676 | } | 688 | } |
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlmfs/userdlm.h index 0c3cc03c61fa..3b42d79531d7 100644 --- a/fs/ocfs2/dlm/userdlm.h +++ b/fs/ocfs2/dlmfs/userdlm.h | |||
@@ -57,7 +57,7 @@ struct user_lock_res { | |||
57 | int l_level; | 57 | int l_level; |
58 | unsigned int l_ro_holders; | 58 | unsigned int l_ro_holders; |
59 | unsigned int l_ex_holders; | 59 | unsigned int l_ex_holders; |
60 | struct dlm_lockstatus l_lksb; | 60 | struct ocfs2_dlm_lksb l_lksb; |
61 | 61 | ||
62 | int l_requested; | 62 | int l_requested; |
63 | int l_blocking; | 63 | int l_blocking; |
@@ -80,15 +80,15 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres, | |||
80 | void user_dlm_write_lvb(struct inode *inode, | 80 | void user_dlm_write_lvb(struct inode *inode, |
81 | const char *val, | 81 | const char *val, |
82 | unsigned int len); | 82 | unsigned int len); |
83 | void user_dlm_read_lvb(struct inode *inode, | 83 | ssize_t user_dlm_read_lvb(struct inode *inode, |
84 | char *val, | 84 | char *val, |
85 | unsigned int len); | 85 | unsigned int len); |
86 | struct dlm_ctxt *user_dlm_register_context(struct qstr *name, | 86 | struct ocfs2_cluster_connection *user_dlm_register(struct qstr *name); |
87 | struct dlm_protocol_version *proto); | 87 | void user_dlm_unregister(struct ocfs2_cluster_connection *conn); |
88 | void user_dlm_unregister_context(struct dlm_ctxt *dlm); | 88 | void user_dlm_set_locking_protocol(void); |
89 | 89 | ||
90 | struct dlmfs_inode_private { | 90 | struct dlmfs_inode_private { |
91 | struct dlm_ctxt *ip_dlm; | 91 | struct ocfs2_cluster_connection *ip_conn; |
92 | 92 | ||
93 | struct user_lock_res ip_lockres; /* unused for directories. */ | 93 | struct user_lock_res ip_lockres; /* unused for directories. */ |
94 | struct inode *ip_parent; | 94 | struct inode *ip_parent; |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0d38d67194cb..50c4ee805da4 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -297,6 +297,11 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | |||
297 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 297 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; |
298 | } | 298 | } |
299 | 299 | ||
300 | static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) | ||
301 | { | ||
302 | return container_of(lksb, struct ocfs2_lock_res, l_lksb); | ||
303 | } | ||
304 | |||
300 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) | 305 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) |
301 | { | 306 | { |
302 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | 307 | BUG_ON(!ocfs2_is_inode_lock(lockres)); |
@@ -875,6 +880,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
875 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 880 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
876 | 881 | ||
877 | lockres->l_level = lockres->l_requested; | 882 | lockres->l_level = lockres->l_requested; |
883 | |||
884 | /* | ||
885 | * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing | ||
886 | * the OCFS2_LOCK_BUSY flag to prevent the dc thread from | ||
887 | * downconverting the lock before the upconvert has fully completed. | ||
888 | */ | ||
889 | lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
890 | |||
878 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 891 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
879 | 892 | ||
880 | mlog_exit_void(); | 893 | mlog_exit_void(); |
@@ -907,8 +920,6 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
907 | 920 | ||
908 | assert_spin_locked(&lockres->l_lock); | 921 | assert_spin_locked(&lockres->l_lock); |
909 | 922 | ||
910 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
911 | |||
912 | if (level > lockres->l_blocking) { | 923 | if (level > lockres->l_blocking) { |
913 | /* only schedule a downconvert if we haven't already scheduled | 924 | /* only schedule a downconvert if we haven't already scheduled |
914 | * one that goes low enough to satisfy the level we're | 925 | * one that goes low enough to satisfy the level we're |
@@ -921,6 +932,13 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
921 | lockres->l_blocking = level; | 932 | lockres->l_blocking = level; |
922 | } | 933 | } |
923 | 934 | ||
935 | mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", | ||
936 | lockres->l_name, level, lockres->l_level, lockres->l_blocking, | ||
937 | needs_downconvert); | ||
938 | |||
939 | if (needs_downconvert) | ||
940 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
941 | |||
924 | mlog_exit(needs_downconvert); | 942 | mlog_exit(needs_downconvert); |
925 | return needs_downconvert; | 943 | return needs_downconvert; |
926 | } | 944 | } |
@@ -1031,18 +1049,17 @@ static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) | |||
1031 | return lockres->l_pending_gen; | 1049 | return lockres->l_pending_gen; |
1032 | } | 1050 | } |
1033 | 1051 | ||
1034 | 1052 | static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) | |
1035 | static void ocfs2_blocking_ast(void *opaque, int level) | ||
1036 | { | 1053 | { |
1037 | struct ocfs2_lock_res *lockres = opaque; | 1054 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); |
1038 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 1055 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1039 | int needs_downconvert; | 1056 | int needs_downconvert; |
1040 | unsigned long flags; | 1057 | unsigned long flags; |
1041 | 1058 | ||
1042 | BUG_ON(level <= DLM_LOCK_NL); | 1059 | BUG_ON(level <= DLM_LOCK_NL); |
1043 | 1060 | ||
1044 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | 1061 | mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " |
1045 | lockres->l_name, level, lockres->l_level, | 1062 | "type %s\n", lockres->l_name, level, lockres->l_level, |
1046 | ocfs2_lock_type_string(lockres->l_type)); | 1063 | ocfs2_lock_type_string(lockres->l_type)); |
1047 | 1064 | ||
1048 | /* | 1065 | /* |
@@ -1063,9 +1080,9 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
1063 | ocfs2_wake_downconvert_thread(osb); | 1080 | ocfs2_wake_downconvert_thread(osb); |
1064 | } | 1081 | } |
1065 | 1082 | ||
1066 | static void ocfs2_locking_ast(void *opaque) | 1083 | static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) |
1067 | { | 1084 | { |
1068 | struct ocfs2_lock_res *lockres = opaque; | 1085 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); |
1069 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 1086 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1070 | unsigned long flags; | 1087 | unsigned long flags; |
1071 | int status; | 1088 | int status; |
@@ -1086,6 +1103,10 @@ static void ocfs2_locking_ast(void *opaque) | |||
1086 | return; | 1103 | return; |
1087 | } | 1104 | } |
1088 | 1105 | ||
1106 | mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " | ||
1107 | "level %d => %d\n", lockres->l_name, lockres->l_action, | ||
1108 | lockres->l_unlock_action, lockres->l_level, lockres->l_requested); | ||
1109 | |||
1089 | switch(lockres->l_action) { | 1110 | switch(lockres->l_action) { |
1090 | case OCFS2_AST_ATTACH: | 1111 | case OCFS2_AST_ATTACH: |
1091 | ocfs2_generic_handle_attach_action(lockres); | 1112 | ocfs2_generic_handle_attach_action(lockres); |
@@ -1098,8 +1119,8 @@ static void ocfs2_locking_ast(void *opaque) | |||
1098 | ocfs2_generic_handle_downconvert_action(lockres); | 1119 | ocfs2_generic_handle_downconvert_action(lockres); |
1099 | break; | 1120 | break; |
1100 | default: | 1121 | default: |
1101 | mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " | 1122 | mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " |
1102 | "lockres flags = 0x%lx, unlock action: %u\n", | 1123 | "flags 0x%lx, unlock: %u\n", |
1103 | lockres->l_name, lockres->l_action, lockres->l_flags, | 1124 | lockres->l_name, lockres->l_action, lockres->l_flags, |
1104 | lockres->l_unlock_action); | 1125 | lockres->l_unlock_action); |
1105 | BUG(); | 1126 | BUG(); |
@@ -1125,6 +1146,88 @@ out: | |||
1125 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1146 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1126 | } | 1147 | } |
1127 | 1148 | ||
1149 | static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) | ||
1150 | { | ||
1151 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | ||
1152 | unsigned long flags; | ||
1153 | |||
1154 | mlog_entry_void(); | ||
1155 | |||
1156 | mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", | ||
1157 | lockres->l_name, lockres->l_unlock_action); | ||
1158 | |||
1159 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1160 | if (error) { | ||
1161 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " | ||
1162 | "unlock_action %d\n", error, lockres->l_name, | ||
1163 | lockres->l_unlock_action); | ||
1164 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1165 | mlog_exit_void(); | ||
1166 | return; | ||
1167 | } | ||
1168 | |||
1169 | switch(lockres->l_unlock_action) { | ||
1170 | case OCFS2_UNLOCK_CANCEL_CONVERT: | ||
1171 | mlog(0, "Cancel convert success for %s\n", lockres->l_name); | ||
1172 | lockres->l_action = OCFS2_AST_INVALID; | ||
1173 | /* Downconvert thread may have requeued this lock, we | ||
1174 | * need to wake it. */ | ||
1175 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | ||
1176 | ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); | ||
1177 | break; | ||
1178 | case OCFS2_UNLOCK_DROP_LOCK: | ||
1179 | lockres->l_level = DLM_LOCK_IV; | ||
1180 | break; | ||
1181 | default: | ||
1182 | BUG(); | ||
1183 | } | ||
1184 | |||
1185 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | ||
1186 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | ||
1187 | wake_up(&lockres->l_event); | ||
1188 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1189 | |||
1190 | mlog_exit_void(); | ||
1191 | } | ||
1192 | |||
1193 | /* | ||
1194 | * This is the filesystem locking protocol. It provides the lock handling | ||
1195 | * hooks for the underlying DLM. It has a maximum version number. | ||
1196 | * The version number allows interoperability with systems running at | ||
1197 | * the same major number and an equal or smaller minor number. | ||
1198 | * | ||
1199 | * Whenever the filesystem does new things with locks (adds or removes a | ||
1200 | * lock, orders them differently, does different things underneath a lock), | ||
1201 | * the version must be changed. The protocol is negotiated when joining | ||
1202 | * the dlm domain. A node may join the domain if its major version is | ||
1203 | * identical to all other nodes and its minor version is greater than | ||
1204 | * or equal to all other nodes. When its minor version is greater than | ||
1205 | * the other nodes, it will run at the minor version specified by the | ||
1206 | * other nodes. | ||
1207 | * | ||
1208 | * If a locking change is made that will not be compatible with older | ||
1209 | * versions, the major number must be increased and the minor version set | ||
1210 | * to zero. If a change merely adds a behavior that can be disabled when | ||
1211 | * speaking to older versions, the minor version must be increased. If a | ||
1212 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
1213 | * are just ignored by older versions), the version does not need to be | ||
1214 | * updated. | ||
1215 | */ | ||
1216 | static struct ocfs2_locking_protocol lproto = { | ||
1217 | .lp_max_version = { | ||
1218 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
1219 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
1220 | }, | ||
1221 | .lp_lock_ast = ocfs2_locking_ast, | ||
1222 | .lp_blocking_ast = ocfs2_blocking_ast, | ||
1223 | .lp_unlock_ast = ocfs2_unlock_ast, | ||
1224 | }; | ||
1225 | |||
1226 | void ocfs2_set_locking_protocol(void) | ||
1227 | { | ||
1228 | ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); | ||
1229 | } | ||
1230 | |||
1128 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 1231 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
1129 | int convert) | 1232 | int convert) |
1130 | { | 1233 | { |
@@ -1133,6 +1236,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
1133 | mlog_entry_void(); | 1236 | mlog_entry_void(); |
1134 | spin_lock_irqsave(&lockres->l_lock, flags); | 1237 | spin_lock_irqsave(&lockres->l_lock, flags); |
1135 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 1238 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
1239 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
1136 | if (convert) | 1240 | if (convert) |
1137 | lockres->l_action = OCFS2_AST_INVALID; | 1241 | lockres->l_action = OCFS2_AST_INVALID; |
1138 | else | 1242 | else |
@@ -1179,8 +1283,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, | |||
1179 | &lockres->l_lksb, | 1283 | &lockres->l_lksb, |
1180 | dlm_flags, | 1284 | dlm_flags, |
1181 | lockres->l_name, | 1285 | lockres->l_name, |
1182 | OCFS2_LOCK_ID_MAX_LEN - 1, | 1286 | OCFS2_LOCK_ID_MAX_LEN - 1); |
1183 | lockres); | ||
1184 | lockres_clear_pending(lockres, gen, osb); | 1287 | lockres_clear_pending(lockres, gen, osb); |
1185 | if (ret) { | 1288 | if (ret) { |
1186 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 1289 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
@@ -1323,13 +1426,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
1323 | again: | 1426 | again: |
1324 | wait = 0; | 1427 | wait = 0; |
1325 | 1428 | ||
1429 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1430 | |||
1326 | if (catch_signals && signal_pending(current)) { | 1431 | if (catch_signals && signal_pending(current)) { |
1327 | ret = -ERESTARTSYS; | 1432 | ret = -ERESTARTSYS; |
1328 | goto out; | 1433 | goto unlock; |
1329 | } | 1434 | } |
1330 | 1435 | ||
1331 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1332 | |||
1333 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, | 1436 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, |
1334 | "Cluster lock called on freeing lockres %s! flags " | 1437 | "Cluster lock called on freeing lockres %s! flags " |
1335 | "0x%lx\n", lockres->l_name, lockres->l_flags); | 1438 | "0x%lx\n", lockres->l_name, lockres->l_flags); |
@@ -1346,6 +1449,25 @@ again: | |||
1346 | goto unlock; | 1449 | goto unlock; |
1347 | } | 1450 | } |
1348 | 1451 | ||
1452 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { | ||
1453 | /* | ||
1454 | * We've upconverted. If the lock now has a level we can | ||
1455 | * work with, we take it. If, however, the lock is not at the | ||
1456 | * required level, we go thru the full cycle. One way this could | ||
1457 | * happen is if a process requesting an upconvert to PR is | ||
1458 | * closely followed by another requesting upconvert to an EX. | ||
1459 | * If the process requesting EX lands here, we want it to | ||
1460 | * continue attempting to upconvert and let the process | ||
1461 | * requesting PR take the lock. | ||
1462 | * If multiple processes request upconvert to PR, the first one | ||
1463 | * here will take the lock. The others will have to go thru the | ||
1464 | * OCFS2_LOCK_BLOCKED check to ensure that there is no pending | ||
1465 | * downconvert request. | ||
1466 | */ | ||
1467 | if (level <= lockres->l_level) | ||
1468 | goto update_holders; | ||
1469 | } | ||
1470 | |||
1349 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 1471 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && |
1350 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 1472 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { |
1351 | /* is the lock is currently blocked on behalf of | 1473 | /* is the lock is currently blocked on behalf of |
@@ -1383,7 +1505,7 @@ again: | |||
1383 | BUG_ON(level == DLM_LOCK_IV); | 1505 | BUG_ON(level == DLM_LOCK_IV); |
1384 | BUG_ON(level == DLM_LOCK_NL); | 1506 | BUG_ON(level == DLM_LOCK_NL); |
1385 | 1507 | ||
1386 | mlog(0, "lock %s, convert from %d to level = %d\n", | 1508 | mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", |
1387 | lockres->l_name, lockres->l_level, level); | 1509 | lockres->l_name, lockres->l_level, level); |
1388 | 1510 | ||
1389 | /* call dlm_lock to upgrade lock now */ | 1511 | /* call dlm_lock to upgrade lock now */ |
@@ -1392,8 +1514,7 @@ again: | |||
1392 | &lockres->l_lksb, | 1514 | &lockres->l_lksb, |
1393 | lkm_flags, | 1515 | lkm_flags, |
1394 | lockres->l_name, | 1516 | lockres->l_name, |
1395 | OCFS2_LOCK_ID_MAX_LEN - 1, | 1517 | OCFS2_LOCK_ID_MAX_LEN - 1); |
1396 | lockres); | ||
1397 | lockres_clear_pending(lockres, gen, osb); | 1518 | lockres_clear_pending(lockres, gen, osb); |
1398 | if (ret) { | 1519 | if (ret) { |
1399 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || | 1520 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || |
@@ -1416,11 +1537,14 @@ again: | |||
1416 | goto again; | 1537 | goto again; |
1417 | } | 1538 | } |
1418 | 1539 | ||
1540 | update_holders: | ||
1419 | /* Ok, if we get here then we're good to go. */ | 1541 | /* Ok, if we get here then we're good to go. */ |
1420 | ocfs2_inc_holders(lockres, level); | 1542 | ocfs2_inc_holders(lockres, level); |
1421 | 1543 | ||
1422 | ret = 0; | 1544 | ret = 0; |
1423 | unlock: | 1545 | unlock: |
1546 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | ||
1547 | |||
1424 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1548 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1425 | out: | 1549 | out: |
1426 | /* | 1550 | /* |
@@ -1757,7 +1881,7 @@ out: | |||
1757 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | 1881 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of |
1758 | * flock() calls. The locking approach this requires is sufficiently | 1882 | * flock() calls. The locking approach this requires is sufficiently |
1759 | * different from all other cluster lock types that we implement a | 1883 | * different from all other cluster lock types that we implement a |
1760 | * seperate path to the "low-level" dlm calls. In particular: | 1884 | * separate path to the "low-level" dlm calls. In particular: |
1761 | * | 1885 | * |
1762 | * - No optimization of lock levels is done - we take at exactly | 1886 | * - No optimization of lock levels is done - we take at exactly |
1763 | * what's been requested. | 1887 | * what's been requested. |
@@ -1827,8 +1951,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
1827 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1951 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1828 | 1952 | ||
1829 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, | 1953 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, |
1830 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | 1954 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); |
1831 | lockres); | ||
1832 | if (ret) { | 1955 | if (ret) { |
1833 | if (!trylock || (ret != -EAGAIN)) { | 1956 | if (!trylock || (ret != -EAGAIN)) { |
1834 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 1957 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
@@ -1855,7 +1978,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
1855 | * outstanding lock request, so a cancel convert is | 1978 | * outstanding lock request, so a cancel convert is |
1856 | * required. We intentionally overwrite 'ret' - if the | 1979 | * required. We intentionally overwrite 'ret' - if the |
1857 | * cancel fails and the lock was granted, it's easier | 1980 | * cancel fails and the lock was granted, it's easier |
1858 | * to just bubble sucess back up to the user. | 1981 | * to just bubble success back up to the user. |
1859 | */ | 1982 | */ |
1860 | ret = ocfs2_flock_handle_signal(lockres, level); | 1983 | ret = ocfs2_flock_handle_signal(lockres, level); |
1861 | } else if (!ret && (level > lockres->l_level)) { | 1984 | } else if (!ret && (level > lockres->l_level)) { |
@@ -2957,7 +3080,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2957 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, | 3080 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, |
2958 | osb->uuid_str, | 3081 | osb->uuid_str, |
2959 | strlen(osb->uuid_str), | 3082 | strlen(osb->uuid_str), |
2960 | ocfs2_do_node_down, osb, | 3083 | &lproto, ocfs2_do_node_down, osb, |
2961 | &conn); | 3084 | &conn); |
2962 | if (status) { | 3085 | if (status) { |
2963 | mlog_errno(status); | 3086 | mlog_errno(status); |
@@ -3024,50 +3147,6 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, | |||
3024 | mlog_exit_void(); | 3147 | mlog_exit_void(); |
3025 | } | 3148 | } |
3026 | 3149 | ||
3027 | static void ocfs2_unlock_ast(void *opaque, int error) | ||
3028 | { | ||
3029 | struct ocfs2_lock_res *lockres = opaque; | ||
3030 | unsigned long flags; | ||
3031 | |||
3032 | mlog_entry_void(); | ||
3033 | |||
3034 | mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, | ||
3035 | lockres->l_unlock_action); | ||
3036 | |||
3037 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
3038 | if (error) { | ||
3039 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " | ||
3040 | "unlock_action %d\n", error, lockres->l_name, | ||
3041 | lockres->l_unlock_action); | ||
3042 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
3043 | mlog_exit_void(); | ||
3044 | return; | ||
3045 | } | ||
3046 | |||
3047 | switch(lockres->l_unlock_action) { | ||
3048 | case OCFS2_UNLOCK_CANCEL_CONVERT: | ||
3049 | mlog(0, "Cancel convert success for %s\n", lockres->l_name); | ||
3050 | lockres->l_action = OCFS2_AST_INVALID; | ||
3051 | /* Downconvert thread may have requeued this lock, we | ||
3052 | * need to wake it. */ | ||
3053 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | ||
3054 | ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); | ||
3055 | break; | ||
3056 | case OCFS2_UNLOCK_DROP_LOCK: | ||
3057 | lockres->l_level = DLM_LOCK_IV; | ||
3058 | break; | ||
3059 | default: | ||
3060 | BUG(); | ||
3061 | } | ||
3062 | |||
3063 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | ||
3064 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | ||
3065 | wake_up(&lockres->l_event); | ||
3066 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
3067 | |||
3068 | mlog_exit_void(); | ||
3069 | } | ||
3070 | |||
3071 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 3150 | static int ocfs2_drop_lock(struct ocfs2_super *osb, |
3072 | struct ocfs2_lock_res *lockres) | 3151 | struct ocfs2_lock_res *lockres) |
3073 | { | 3152 | { |
@@ -3135,8 +3214,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
3135 | 3214 | ||
3136 | mlog(0, "lock %s\n", lockres->l_name); | 3215 | mlog(0, "lock %s\n", lockres->l_name); |
3137 | 3216 | ||
3138 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, | 3217 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); |
3139 | lockres); | ||
3140 | if (ret) { | 3218 | if (ret) { |
3141 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 3219 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
3142 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 3220 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); |
@@ -3155,7 +3233,7 @@ out: | |||
3155 | /* Mark the lockres as being dropped. It will no longer be | 3233 | /* Mark the lockres as being dropped. It will no longer be |
3156 | * queued if blocking, but we still may have to wait on it | 3234 | * queued if blocking, but we still may have to wait on it |
3157 | * being dequeued from the downconvert thread before we can consider | 3235 | * being dequeued from the downconvert thread before we can consider |
3158 | * it safe to drop. | 3236 | * it safe to drop. |
3159 | * | 3237 | * |
3160 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 3238 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
3161 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | 3239 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) |
@@ -3244,13 +3322,20 @@ static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | |||
3244 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 3322 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
3245 | 3323 | ||
3246 | if (lockres->l_level <= new_level) { | 3324 | if (lockres->l_level <= new_level) { |
3247 | mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", | 3325 | mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " |
3248 | lockres->l_level, new_level); | 3326 | "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " |
3327 | "block %d, pgen %d\n", lockres->l_name, lockres->l_level, | ||
3328 | new_level, list_empty(&lockres->l_blocked_list), | ||
3329 | list_empty(&lockres->l_mask_waiters), lockres->l_type, | ||
3330 | lockres->l_flags, lockres->l_ro_holders, | ||
3331 | lockres->l_ex_holders, lockres->l_action, | ||
3332 | lockres->l_unlock_action, lockres->l_requested, | ||
3333 | lockres->l_blocking, lockres->l_pending_gen); | ||
3249 | BUG(); | 3334 | BUG(); |
3250 | } | 3335 | } |
3251 | 3336 | ||
3252 | mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", | 3337 | mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", |
3253 | lockres->l_name, new_level, lockres->l_blocking); | 3338 | lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); |
3254 | 3339 | ||
3255 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 3340 | lockres->l_action = OCFS2_AST_DOWNCONVERT; |
3256 | lockres->l_requested = new_level; | 3341 | lockres->l_requested = new_level; |
@@ -3269,6 +3354,9 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | |||
3269 | 3354 | ||
3270 | mlog_entry_void(); | 3355 | mlog_entry_void(); |
3271 | 3356 | ||
3357 | mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, | ||
3358 | lockres->l_level, new_level); | ||
3359 | |||
3272 | if (lvb) | 3360 | if (lvb) |
3273 | dlm_flags |= DLM_LKF_VALBLK; | 3361 | dlm_flags |= DLM_LKF_VALBLK; |
3274 | 3362 | ||
@@ -3277,8 +3365,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | |||
3277 | &lockres->l_lksb, | 3365 | &lockres->l_lksb, |
3278 | dlm_flags, | 3366 | dlm_flags, |
3279 | lockres->l_name, | 3367 | lockres->l_name, |
3280 | OCFS2_LOCK_ID_MAX_LEN - 1, | 3368 | OCFS2_LOCK_ID_MAX_LEN - 1); |
3281 | lockres); | ||
3282 | lockres_clear_pending(lockres, generation, osb); | 3369 | lockres_clear_pending(lockres, generation, osb); |
3283 | if (ret) { | 3370 | if (ret) { |
3284 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 3371 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
@@ -3299,14 +3386,12 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | |||
3299 | assert_spin_locked(&lockres->l_lock); | 3386 | assert_spin_locked(&lockres->l_lock); |
3300 | 3387 | ||
3301 | mlog_entry_void(); | 3388 | mlog_entry_void(); |
3302 | mlog(0, "lock %s\n", lockres->l_name); | ||
3303 | 3389 | ||
3304 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | 3390 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { |
3305 | /* If we're already trying to cancel a lock conversion | 3391 | /* If we're already trying to cancel a lock conversion |
3306 | * then just drop the spinlock and allow the caller to | 3392 | * then just drop the spinlock and allow the caller to |
3307 | * requeue this lock. */ | 3393 | * requeue this lock. */ |
3308 | 3394 | mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); | |
3309 | mlog(0, "Lockres %s, skip convert\n", lockres->l_name); | ||
3310 | return 0; | 3395 | return 0; |
3311 | } | 3396 | } |
3312 | 3397 | ||
@@ -3321,6 +3406,8 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | |||
3321 | "lock %s, invalid flags: 0x%lx\n", | 3406 | "lock %s, invalid flags: 0x%lx\n", |
3322 | lockres->l_name, lockres->l_flags); | 3407 | lockres->l_name, lockres->l_flags); |
3323 | 3408 | ||
3409 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | ||
3410 | |||
3324 | return 1; | 3411 | return 1; |
3325 | } | 3412 | } |
3326 | 3413 | ||
@@ -3330,16 +3417,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
3330 | int ret; | 3417 | int ret; |
3331 | 3418 | ||
3332 | mlog_entry_void(); | 3419 | mlog_entry_void(); |
3333 | mlog(0, "lock %s\n", lockres->l_name); | ||
3334 | 3420 | ||
3335 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, | 3421 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, |
3336 | DLM_LKF_CANCEL, lockres); | 3422 | DLM_LKF_CANCEL); |
3337 | if (ret) { | 3423 | if (ret) { |
3338 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 3424 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
3339 | ocfs2_recover_from_dlm_error(lockres, 0); | 3425 | ocfs2_recover_from_dlm_error(lockres, 0); |
3340 | } | 3426 | } |
3341 | 3427 | ||
3342 | mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); | 3428 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); |
3343 | 3429 | ||
3344 | mlog_exit(ret); | 3430 | mlog_exit(ret); |
3345 | return ret; | 3431 | return ret; |
@@ -3352,6 +3438,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
3352 | unsigned long flags; | 3438 | unsigned long flags; |
3353 | int blocking; | 3439 | int blocking; |
3354 | int new_level; | 3440 | int new_level; |
3441 | int level; | ||
3355 | int ret = 0; | 3442 | int ret = 0; |
3356 | int set_lvb = 0; | 3443 | int set_lvb = 0; |
3357 | unsigned int gen; | 3444 | unsigned int gen; |
@@ -3360,9 +3447,17 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
3360 | 3447 | ||
3361 | spin_lock_irqsave(&lockres->l_lock, flags); | 3448 | spin_lock_irqsave(&lockres->l_lock, flags); |
3362 | 3449 | ||
3363 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | ||
3364 | |||
3365 | recheck: | 3450 | recheck: |
3451 | /* | ||
3452 | * Is it still blocking? If not, we have no more work to do. | ||
3453 | */ | ||
3454 | if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { | ||
3455 | BUG_ON(lockres->l_blocking != DLM_LOCK_NL); | ||
3456 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
3457 | ret = 0; | ||
3458 | goto leave; | ||
3459 | } | ||
3460 | |||
3366 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 3461 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
3367 | /* XXX | 3462 | /* XXX |
3368 | * This is a *big* race. The OCFS2_LOCK_PENDING flag | 3463 | * This is a *big* race. The OCFS2_LOCK_PENDING flag |
@@ -3387,8 +3482,11 @@ recheck: | |||
3387 | * at the same time they set OCFS2_DLM_BUSY. They must | 3482 | * at the same time they set OCFS2_DLM_BUSY. They must |
3388 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. | 3483 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. |
3389 | */ | 3484 | */ |
3390 | if (lockres->l_flags & OCFS2_LOCK_PENDING) | 3485 | if (lockres->l_flags & OCFS2_LOCK_PENDING) { |
3486 | mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", | ||
3487 | lockres->l_name); | ||
3391 | goto leave_requeue; | 3488 | goto leave_requeue; |
3489 | } | ||
3392 | 3490 | ||
3393 | ctl->requeue = 1; | 3491 | ctl->requeue = 1; |
3394 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 3492 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
@@ -3401,31 +3499,70 @@ recheck: | |||
3401 | goto leave; | 3499 | goto leave; |
3402 | } | 3500 | } |
3403 | 3501 | ||
3502 | /* | ||
3503 | * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is | ||
3504 | * set when the ast is received for an upconvert just before the | ||
3505 | * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast | ||
3506 | * on the heels of the ast, we want to delay the downconvert just | ||
3507 | * enough to allow the up requestor to do its task. Because this | ||
3508 | * lock is in the blocked queue, the lock will be downconverted | ||
3509 | * as soon as the requestor is done with the lock. | ||
3510 | */ | ||
3511 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) | ||
3512 | goto leave_requeue; | ||
3513 | |||
3514 | /* | ||
3515 | * How can we block and yet be at NL? We were trying to upconvert | ||
3516 | * from NL and got canceled. The code comes back here, and now | ||
3517 | * we notice and clear BLOCKING. | ||
3518 | */ | ||
3519 | if (lockres->l_level == DLM_LOCK_NL) { | ||
3520 | BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); | ||
3521 | mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); | ||
3522 | lockres->l_blocking = DLM_LOCK_NL; | ||
3523 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
3524 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
3525 | goto leave; | ||
3526 | } | ||
3527 | |||
3404 | /* if we're blocking an exclusive and we have *any* holders, | 3528 | /* if we're blocking an exclusive and we have *any* holders, |
3405 | * then requeue. */ | 3529 | * then requeue. */ |
3406 | if ((lockres->l_blocking == DLM_LOCK_EX) | 3530 | if ((lockres->l_blocking == DLM_LOCK_EX) |
3407 | && (lockres->l_ex_holders || lockres->l_ro_holders)) | 3531 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { |
3532 | mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", | ||
3533 | lockres->l_name, lockres->l_ex_holders, | ||
3534 | lockres->l_ro_holders); | ||
3408 | goto leave_requeue; | 3535 | goto leave_requeue; |
3536 | } | ||
3409 | 3537 | ||
3410 | /* If it's a PR we're blocking, then only | 3538 | /* If it's a PR we're blocking, then only |
3411 | * requeue if we've got any EX holders */ | 3539 | * requeue if we've got any EX holders */ |
3412 | if (lockres->l_blocking == DLM_LOCK_PR && | 3540 | if (lockres->l_blocking == DLM_LOCK_PR && |
3413 | lockres->l_ex_holders) | 3541 | lockres->l_ex_holders) { |
3542 | mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", | ||
3543 | lockres->l_name, lockres->l_ex_holders); | ||
3414 | goto leave_requeue; | 3544 | goto leave_requeue; |
3545 | } | ||
3415 | 3546 | ||
3416 | /* | 3547 | /* |
3417 | * Can we get a lock in this state if the holder counts are | 3548 | * Can we get a lock in this state if the holder counts are |
3418 | * zero? The meta data unblock code used to check this. | 3549 | * zero? The meta data unblock code used to check this. |
3419 | */ | 3550 | */ |
3420 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 3551 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
3421 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) | 3552 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { |
3553 | mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", | ||
3554 | lockres->l_name); | ||
3422 | goto leave_requeue; | 3555 | goto leave_requeue; |
3556 | } | ||
3423 | 3557 | ||
3424 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 3558 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); |
3425 | 3559 | ||
3426 | if (lockres->l_ops->check_downconvert | 3560 | if (lockres->l_ops->check_downconvert |
3427 | && !lockres->l_ops->check_downconvert(lockres, new_level)) | 3561 | && !lockres->l_ops->check_downconvert(lockres, new_level)) { |
3562 | mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", | ||
3563 | lockres->l_name); | ||
3428 | goto leave_requeue; | 3564 | goto leave_requeue; |
3565 | } | ||
3429 | 3566 | ||
3430 | /* If we get here, then we know that there are no more | 3567 | /* If we get here, then we know that there are no more |
3431 | * incompatible holders (and anyone asking for an incompatible | 3568 | * incompatible holders (and anyone asking for an incompatible |
@@ -3438,17 +3575,24 @@ recheck: | |||
3438 | * may sleep, so we save off a copy of what we're blocking as | 3575 | * may sleep, so we save off a copy of what we're blocking as |
3439 | * it may change while we're not holding the spin lock. */ | 3576 | * it may change while we're not holding the spin lock. */ |
3440 | blocking = lockres->l_blocking; | 3577 | blocking = lockres->l_blocking; |
3578 | level = lockres->l_level; | ||
3441 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3579 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3442 | 3580 | ||
3443 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); | 3581 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); |
3444 | 3582 | ||
3445 | if (ctl->unblock_action == UNBLOCK_STOP_POST) | 3583 | if (ctl->unblock_action == UNBLOCK_STOP_POST) { |
3584 | mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", | ||
3585 | lockres->l_name); | ||
3446 | goto leave; | 3586 | goto leave; |
3587 | } | ||
3447 | 3588 | ||
3448 | spin_lock_irqsave(&lockres->l_lock, flags); | 3589 | spin_lock_irqsave(&lockres->l_lock, flags); |
3449 | if (blocking != lockres->l_blocking) { | 3590 | if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { |
3450 | /* If this changed underneath us, then we can't drop | 3591 | /* If this changed underneath us, then we can't drop |
3451 | * it just yet. */ | 3592 | * it just yet. */ |
3593 | mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " | ||
3594 | "Recheck\n", lockres->l_name, blocking, | ||
3595 | lockres->l_blocking, level, lockres->l_level); | ||
3452 | goto recheck; | 3596 | goto recheck; |
3453 | } | 3597 | } |
3454 | 3598 | ||
@@ -3843,45 +3987,6 @@ void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) | |||
3843 | ocfs2_cluster_unlock(osb, lockres, level); | 3987 | ocfs2_cluster_unlock(osb, lockres, level); |
3844 | } | 3988 | } |
3845 | 3989 | ||
3846 | /* | ||
3847 | * This is the filesystem locking protocol. It provides the lock handling | ||
3848 | * hooks for the underlying DLM. It has a maximum version number. | ||
3849 | * The version number allows interoperability with systems running at | ||
3850 | * the same major number and an equal or smaller minor number. | ||
3851 | * | ||
3852 | * Whenever the filesystem does new things with locks (adds or removes a | ||
3853 | * lock, orders them differently, does different things underneath a lock), | ||
3854 | * the version must be changed. The protocol is negotiated when joining | ||
3855 | * the dlm domain. A node may join the domain if its major version is | ||
3856 | * identical to all other nodes and its minor version is greater than | ||
3857 | * or equal to all other nodes. When its minor version is greater than | ||
3858 | * the other nodes, it will run at the minor version specified by the | ||
3859 | * other nodes. | ||
3860 | * | ||
3861 | * If a locking change is made that will not be compatible with older | ||
3862 | * versions, the major number must be increased and the minor version set | ||
3863 | * to zero. If a change merely adds a behavior that can be disabled when | ||
3864 | * speaking to older versions, the minor version must be increased. If a | ||
3865 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
3866 | * are just ignored by older versions), the version does not need to be | ||
3867 | * updated. | ||
3868 | */ | ||
3869 | static struct ocfs2_locking_protocol lproto = { | ||
3870 | .lp_max_version = { | ||
3871 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
3872 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
3873 | }, | ||
3874 | .lp_lock_ast = ocfs2_locking_ast, | ||
3875 | .lp_blocking_ast = ocfs2_blocking_ast, | ||
3876 | .lp_unlock_ast = ocfs2_unlock_ast, | ||
3877 | }; | ||
3878 | |||
3879 | void ocfs2_set_locking_protocol(void) | ||
3880 | { | ||
3881 | ocfs2_stack_glue_set_locking_protocol(&lproto); | ||
3882 | } | ||
3883 | |||
3884 | |||
3885 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 3990 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
3886 | struct ocfs2_lock_res *lockres) | 3991 | struct ocfs2_lock_res *lockres) |
3887 | { | 3992 | { |
@@ -3898,7 +4003,7 @@ static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
3898 | BUG_ON(!lockres); | 4003 | BUG_ON(!lockres); |
3899 | BUG_ON(!lockres->l_ops); | 4004 | BUG_ON(!lockres->l_ops); |
3900 | 4005 | ||
3901 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 4006 | mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); |
3902 | 4007 | ||
3903 | /* Detect whether a lock has been marked as going away while | 4008 | /* Detect whether a lock has been marked as going away while |
3904 | * the downconvert thread was processing other things. A lock can | 4009 | * the downconvert thread was processing other things. A lock can |
@@ -3921,7 +4026,7 @@ unqueue: | |||
3921 | } else | 4026 | } else |
3922 | ocfs2_schedule_blocked_lock(osb, lockres); | 4027 | ocfs2_schedule_blocked_lock(osb, lockres); |
3923 | 4028 | ||
3924 | mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, | 4029 | mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, |
3925 | ctl.requeue ? "yes" : "no"); | 4030 | ctl.requeue ? "yes" : "no"); |
3926 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 4031 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3927 | 4032 | ||
@@ -3943,7 +4048,7 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
3943 | /* Do not schedule a lock for downconvert when it's on | 4048 | /* Do not schedule a lock for downconvert when it's on |
3944 | * the way to destruction - any nodes wanting access | 4049 | * the way to destruction - any nodes wanting access |
3945 | * to the resource will get it soon. */ | 4050 | * to the resource will get it soon. */ |
3946 | mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", | 4051 | mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", |
3947 | lockres->l_name, lockres->l_flags); | 4052 | lockres->l_name, lockres->l_flags); |
3948 | return; | 4053 | return; |
3949 | } | 4054 | } |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 15713cbb865c..19ad145d2af3 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -239,7 +239,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, | |||
239 | mlog(0, "Encoding parent: blkno: %llu, generation: %u\n", | 239 | mlog(0, "Encoding parent: blkno: %llu, generation: %u\n", |
240 | (unsigned long long)blkno, generation); | 240 | (unsigned long long)blkno, generation); |
241 | } | 241 | } |
242 | 242 | ||
243 | *max_len = len; | 243 | *max_len = len; |
244 | 244 | ||
245 | bail: | 245 | bail: |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 843db64e9d4a..09e3fdfa6d33 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/slab.h> | ||
27 | #include <linux/types.h> | 28 | #include <linux/types.h> |
28 | #include <linux/fiemap.h> | 29 | #include <linux/fiemap.h> |
29 | 30 | ||
@@ -37,6 +38,7 @@ | |||
37 | #include "extent_map.h" | 38 | #include "extent_map.h" |
38 | #include "inode.h" | 39 | #include "inode.h" |
39 | #include "super.h" | 40 | #include "super.h" |
41 | #include "symlink.h" | ||
40 | 42 | ||
41 | #include "buffer_head_io.h" | 43 | #include "buffer_head_io.h" |
42 | 44 | ||
@@ -191,7 +193,7 @@ static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, | |||
191 | emi->ei_clusters += ins->ei_clusters; | 193 | emi->ei_clusters += ins->ei_clusters; |
192 | return 1; | 194 | return 1; |
193 | } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && | 195 | } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && |
194 | (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys && | 196 | (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && |
195 | ins->ei_flags == emi->ei_flags) { | 197 | ins->ei_flags == emi->ei_flags) { |
196 | emi->ei_phys = ins->ei_phys; | 198 | emi->ei_phys = ins->ei_phys; |
197 | emi->ei_cpos = ins->ei_cpos; | 199 | emi->ei_cpos = ins->ei_cpos; |
@@ -452,7 +454,7 @@ static int ocfs2_get_clusters_nocache(struct inode *inode, | |||
452 | if (i == -1) { | 454 | if (i == -1) { |
453 | /* | 455 | /* |
454 | * Holes can be larger than the maximum size of an | 456 | * Holes can be larger than the maximum size of an |
455 | * extent, so we return their lengths in a seperate | 457 | * extent, so we return their lengths in a separate |
456 | * field. | 458 | * field. |
457 | */ | 459 | */ |
458 | if (hole_len) { | 460 | if (hole_len) { |
@@ -703,6 +705,12 @@ out: | |||
703 | return ret; | 705 | return ret; |
704 | } | 706 | } |
705 | 707 | ||
708 | /* | ||
709 | * The ocfs2_fiemap_inline() may be a little bit misleading, since | ||
710 | * it not only handles the fiemap for inlined files, but also deals | ||
711 | * with the fast symlink, cause they have no difference for extent | ||
712 | * mapping per se. | ||
713 | */ | ||
706 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, | 714 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, |
707 | struct fiemap_extent_info *fieinfo, | 715 | struct fiemap_extent_info *fieinfo, |
708 | u64 map_start) | 716 | u64 map_start) |
@@ -715,11 +723,18 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, | |||
715 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 723 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
716 | 724 | ||
717 | di = (struct ocfs2_dinode *)di_bh->b_data; | 725 | di = (struct ocfs2_dinode *)di_bh->b_data; |
718 | id_count = le16_to_cpu(di->id2.i_data.id_count); | 726 | if (ocfs2_inode_is_fast_symlink(inode)) |
727 | id_count = ocfs2_fast_symlink_chars(inode->i_sb); | ||
728 | else | ||
729 | id_count = le16_to_cpu(di->id2.i_data.id_count); | ||
719 | 730 | ||
720 | if (map_start < id_count) { | 731 | if (map_start < id_count) { |
721 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; | 732 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; |
722 | phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); | 733 | if (ocfs2_inode_is_fast_symlink(inode)) |
734 | phys += offsetof(struct ocfs2_dinode, id2.i_symlink); | ||
735 | else | ||
736 | phys += offsetof(struct ocfs2_dinode, | ||
737 | id2.i_data.id_data); | ||
723 | 738 | ||
724 | ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, | 739 | ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, |
725 | flags); | 740 | flags); |
@@ -756,9 +771,10 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
756 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 771 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
757 | 772 | ||
758 | /* | 773 | /* |
759 | * Handle inline-data separately. | 774 | * Handle inline-data and fast symlink separately. |
760 | */ | 775 | */ |
761 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 776 | if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || |
777 | ocfs2_inode_is_fast_symlink(inode)) { | ||
762 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); | 778 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); |
763 | goto out_unlock; | 779 | goto out_unlock; |
764 | } | 780 | } |
@@ -786,6 +802,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
786 | fe_flags = 0; | 802 | fe_flags = 0; |
787 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) | 803 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) |
788 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; | 804 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; |
805 | if (rec.e_flags & OCFS2_EXT_REFCOUNTED) | ||
806 | fe_flags |= FIEMAP_EXTENT_SHARED; | ||
789 | if (is_last) | 807 | if (is_last) |
790 | fe_flags |= FIEMAP_EXTENT_LAST; | 808 | fe_flags |= FIEMAP_EXTENT_LAST; |
791 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; | 809 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index de059f490586..a5fbd9cea968 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -107,6 +107,9 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) | |||
107 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, | 107 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, |
108 | file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); | 108 | file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); |
109 | 109 | ||
110 | if (file->f_mode & FMODE_WRITE) | ||
111 | dquot_initialize(inode); | ||
112 | |||
110 | spin_lock(&oi->ip_lock); | 113 | spin_lock(&oi->ip_lock); |
111 | 114 | ||
112 | /* Check that the inode hasn't been wiped from disk by another | 115 | /* Check that the inode hasn't been wiped from disk by another |
@@ -629,11 +632,10 @@ restart_all: | |||
629 | } | 632 | } |
630 | 633 | ||
631 | restarted_transaction: | 634 | restarted_transaction: |
632 | if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, | 635 | status = dquot_alloc_space_nodirty(inode, |
633 | clusters_to_add))) { | 636 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); |
634 | status = -EDQUOT; | 637 | if (status) |
635 | goto leave; | 638 | goto leave; |
636 | } | ||
637 | did_quota = 1; | 639 | did_quota = 1; |
638 | 640 | ||
639 | /* reserve a write to the file entry early on - that we if we | 641 | /* reserve a write to the file entry early on - that we if we |
@@ -674,7 +676,7 @@ restarted_transaction: | |||
674 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); | 676 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); |
675 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 677 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
676 | /* Release unused quota reservation */ | 678 | /* Release unused quota reservation */ |
677 | vfs_dq_free_space(inode, | 679 | dquot_free_space(inode, |
678 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); | 680 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); |
679 | did_quota = 0; | 681 | did_quota = 0; |
680 | 682 | ||
@@ -682,6 +684,7 @@ restarted_transaction: | |||
682 | if (why == RESTART_META) { | 684 | if (why == RESTART_META) { |
683 | mlog(0, "restarting function.\n"); | 685 | mlog(0, "restarting function.\n"); |
684 | restart_func = 1; | 686 | restart_func = 1; |
687 | status = 0; | ||
685 | } else { | 688 | } else { |
686 | BUG_ON(why != RESTART_TRANS); | 689 | BUG_ON(why != RESTART_TRANS); |
687 | 690 | ||
@@ -710,7 +713,7 @@ restarted_transaction: | |||
710 | 713 | ||
711 | leave: | 714 | leave: |
712 | if (status < 0 && did_quota) | 715 | if (status < 0 && did_quota) |
713 | vfs_dq_free_space(inode, | 716 | dquot_free_space(inode, |
714 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); | 717 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); |
715 | if (handle) { | 718 | if (handle) { |
716 | ocfs2_commit_trans(osb, handle); | 719 | ocfs2_commit_trans(osb, handle); |
@@ -749,7 +752,7 @@ static int ocfs2_write_zero_page(struct inode *inode, | |||
749 | int ret; | 752 | int ret; |
750 | 753 | ||
751 | offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ | 754 | offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ |
752 | /* ugh. in prepare/commit_write, if from==to==start of block, we | 755 | /* ugh. in prepare/commit_write, if from==to==start of block, we |
753 | ** skip the prepare. make sure we never send an offset for the start | 756 | ** skip the prepare. make sure we never send an offset for the start |
754 | ** of a block | 757 | ** of a block |
755 | */ | 758 | */ |
@@ -978,6 +981,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
978 | 981 | ||
979 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; | 982 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; |
980 | if (size_change) { | 983 | if (size_change) { |
984 | dquot_initialize(inode); | ||
985 | |||
981 | status = ocfs2_rw_lock(inode, 1); | 986 | status = ocfs2_rw_lock(inode, 1); |
982 | if (status < 0) { | 987 | if (status < 0) { |
983 | mlog_errno(status); | 988 | mlog_errno(status); |
@@ -993,10 +998,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
993 | } | 998 | } |
994 | 999 | ||
995 | if (size_change && attr->ia_size != i_size_read(inode)) { | 1000 | if (size_change && attr->ia_size != i_size_read(inode)) { |
996 | if (attr->ia_size > sb->s_maxbytes) { | 1001 | status = inode_newsize_ok(inode, attr->ia_size); |
997 | status = -EFBIG; | 1002 | if (status) |
998 | goto bail_unlock; | 1003 | goto bail_unlock; |
999 | } | ||
1000 | 1004 | ||
1001 | if (i_size_read(inode) > attr->ia_size) { | 1005 | if (i_size_read(inode) > attr->ia_size) { |
1002 | if (ocfs2_should_order_data(inode)) { | 1006 | if (ocfs2_should_order_data(inode)) { |
@@ -1021,7 +1025,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1021 | /* | 1025 | /* |
1022 | * Gather pointers to quota structures so that allocation / | 1026 | * Gather pointers to quota structures so that allocation / |
1023 | * freeing of quota structures happens here and not inside | 1027 | * freeing of quota structures happens here and not inside |
1024 | * vfs_dq_transfer() where we have problems with lock ordering | 1028 | * dquot_transfer() where we have problems with lock ordering |
1025 | */ | 1029 | */ |
1026 | if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid | 1030 | if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid |
1027 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, | 1031 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, |
@@ -1054,7 +1058,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1054 | mlog_errno(status); | 1058 | mlog_errno(status); |
1055 | goto bail_unlock; | 1059 | goto bail_unlock; |
1056 | } | 1060 | } |
1057 | status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; | 1061 | status = dquot_transfer(inode, attr); |
1058 | if (status < 0) | 1062 | if (status < 0) |
1059 | goto bail_commit; | 1063 | goto bail_commit; |
1060 | } else { | 1064 | } else { |
@@ -1772,13 +1776,14 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1772 | loff_t *ppos, | 1776 | loff_t *ppos, |
1773 | size_t count, | 1777 | size_t count, |
1774 | int appending, | 1778 | int appending, |
1775 | int *direct_io) | 1779 | int *direct_io, |
1780 | int *has_refcount) | ||
1776 | { | 1781 | { |
1777 | int ret = 0, meta_level = 0; | 1782 | int ret = 0, meta_level = 0; |
1778 | struct inode *inode = dentry->d_inode; | 1783 | struct inode *inode = dentry->d_inode; |
1779 | loff_t saved_pos, end; | 1784 | loff_t saved_pos, end; |
1780 | 1785 | ||
1781 | /* | 1786 | /* |
1782 | * We start with a read level meta lock and only jump to an ex | 1787 | * We start with a read level meta lock and only jump to an ex |
1783 | * if we need to make modifications here. | 1788 | * if we need to make modifications here. |
1784 | */ | 1789 | */ |
@@ -1833,6 +1838,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1833 | saved_pos, | 1838 | saved_pos, |
1834 | count, | 1839 | count, |
1835 | &meta_level); | 1840 | &meta_level); |
1841 | if (has_refcount) | ||
1842 | *has_refcount = 1; | ||
1843 | if (direct_io) | ||
1844 | *direct_io = 0; | ||
1836 | } | 1845 | } |
1837 | 1846 | ||
1838 | if (ret < 0) { | 1847 | if (ret < 0) { |
@@ -1899,7 +1908,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
1899 | loff_t pos) | 1908 | loff_t pos) |
1900 | { | 1909 | { |
1901 | int ret, direct_io, appending, rw_level, have_alloc_sem = 0; | 1910 | int ret, direct_io, appending, rw_level, have_alloc_sem = 0; |
1902 | int can_do_direct; | 1911 | int can_do_direct, has_refcount = 0; |
1903 | ssize_t written = 0; | 1912 | ssize_t written = 0; |
1904 | size_t ocount; /* original count */ | 1913 | size_t ocount; /* original count */ |
1905 | size_t count; /* after file limit checks */ | 1914 | size_t count; /* after file limit checks */ |
@@ -1942,7 +1951,7 @@ relock: | |||
1942 | can_do_direct = direct_io; | 1951 | can_do_direct = direct_io; |
1943 | ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, | 1952 | ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, |
1944 | iocb->ki_left, appending, | 1953 | iocb->ki_left, appending, |
1945 | &can_do_direct); | 1954 | &can_do_direct, &has_refcount); |
1946 | if (ret < 0) { | 1955 | if (ret < 0) { |
1947 | mlog_errno(ret); | 1956 | mlog_errno(ret); |
1948 | goto out; | 1957 | goto out; |
@@ -1973,18 +1982,18 @@ relock: | |||
1973 | /* communicate with ocfs2_dio_end_io */ | 1982 | /* communicate with ocfs2_dio_end_io */ |
1974 | ocfs2_iocb_set_rw_locked(iocb, rw_level); | 1983 | ocfs2_iocb_set_rw_locked(iocb, rw_level); |
1975 | 1984 | ||
1976 | if (direct_io) { | 1985 | ret = generic_segment_checks(iov, &nr_segs, &ocount, |
1977 | ret = generic_segment_checks(iov, &nr_segs, &ocount, | 1986 | VERIFY_READ); |
1978 | VERIFY_READ); | 1987 | if (ret) |
1979 | if (ret) | 1988 | goto out_dio; |
1980 | goto out_dio; | ||
1981 | 1989 | ||
1982 | count = ocount; | 1990 | count = ocount; |
1983 | ret = generic_write_checks(file, ppos, &count, | 1991 | ret = generic_write_checks(file, ppos, &count, |
1984 | S_ISBLK(inode->i_mode)); | 1992 | S_ISBLK(inode->i_mode)); |
1985 | if (ret) | 1993 | if (ret) |
1986 | goto out_dio; | 1994 | goto out_dio; |
1987 | 1995 | ||
1996 | if (direct_io) { | ||
1988 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 1997 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
1989 | ppos, count, ocount); | 1998 | ppos, count, ocount); |
1990 | if (written < 0) { | 1999 | if (written < 0) { |
@@ -1999,21 +2008,26 @@ relock: | |||
1999 | goto out_dio; | 2008 | goto out_dio; |
2000 | } | 2009 | } |
2001 | } else { | 2010 | } else { |
2002 | written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); | 2011 | current->backing_dev_info = file->f_mapping->backing_dev_info; |
2012 | written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, | ||
2013 | ppos, count, 0); | ||
2014 | current->backing_dev_info = NULL; | ||
2003 | } | 2015 | } |
2004 | 2016 | ||
2005 | out_dio: | 2017 | out_dio: |
2006 | /* buffered aio wouldn't have proper lock coverage today */ | 2018 | /* buffered aio wouldn't have proper lock coverage today */ |
2007 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 2019 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
2008 | 2020 | ||
2009 | if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { | 2021 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || |
2022 | ((file->f_flags & O_DIRECT) && has_refcount)) { | ||
2010 | ret = filemap_fdatawrite_range(file->f_mapping, pos, | 2023 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
2011 | pos + count - 1); | 2024 | pos + count - 1); |
2012 | if (ret < 0) | 2025 | if (ret < 0) |
2013 | written = ret; | 2026 | written = ret; |
2014 | 2027 | ||
2015 | if (!ret && (old_size != i_size_read(inode) || | 2028 | if (!ret && ((old_size != i_size_read(inode)) || |
2016 | old_clusters != OCFS2_I(inode)->ip_clusters)) { | 2029 | (old_clusters != OCFS2_I(inode)->ip_clusters) || |
2030 | has_refcount)) { | ||
2017 | ret = jbd2_journal_force_commit(osb->journal->j_journal); | 2031 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
2018 | if (ret < 0) | 2032 | if (ret < 0) |
2019 | written = ret; | 2033 | written = ret; |
@@ -2024,7 +2038,7 @@ out_dio: | |||
2024 | pos + count - 1); | 2038 | pos + count - 1); |
2025 | } | 2039 | } |
2026 | 2040 | ||
2027 | /* | 2041 | /* |
2028 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io | 2042 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io |
2029 | * function pointer which is called when o_direct io completes so that | 2043 | * function pointer which is called when o_direct io completes so that |
2030 | * it can unlock our rw lock. (it's the clustered equivalent of | 2044 | * it can unlock our rw lock. (it's the clustered equivalent of |
@@ -2034,7 +2048,7 @@ out_dio: | |||
2034 | * async dio is going to do it in the future or an end_io after an | 2048 | * async dio is going to do it in the future or an end_io after an |
2035 | * error has already done it. | 2049 | * error has already done it. |
2036 | */ | 2050 | */ |
2037 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { | 2051 | if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { |
2038 | rw_level = -1; | 2052 | rw_level = -1; |
2039 | have_alloc_sem = 0; | 2053 | have_alloc_sem = 0; |
2040 | } | 2054 | } |
@@ -2062,7 +2076,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | |||
2062 | int ret; | 2076 | int ret; |
2063 | 2077 | ||
2064 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | 2078 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, |
2065 | sd->total_len, 0, NULL); | 2079 | sd->total_len, 0, NULL, NULL); |
2066 | if (ret < 0) { | 2080 | if (ret < 0) { |
2067 | mlog_errno(ret); | 2081 | mlog_errno(ret); |
2068 | return ret; | 2082 | return ret; |
@@ -2189,7 +2203,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
2189 | goto bail; | 2203 | goto bail; |
2190 | } | 2204 | } |
2191 | 2205 | ||
2192 | /* | 2206 | /* |
2193 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads | 2207 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads |
2194 | * need locks to protect pending reads from racing with truncate. | 2208 | * need locks to protect pending reads from racing with truncate. |
2195 | */ | 2209 | */ |
@@ -2211,10 +2225,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
2211 | * We're fine letting folks race truncates and extending | 2225 | * We're fine letting folks race truncates and extending |
2212 | * writes with read across the cluster, just like they can | 2226 | * writes with read across the cluster, just like they can |
2213 | * locally. Hence no rw_lock during read. | 2227 | * locally. Hence no rw_lock during read. |
2214 | * | 2228 | * |
2215 | * Take and drop the meta data lock to update inode fields | 2229 | * Take and drop the meta data lock to update inode fields |
2216 | * like i_size. This allows the checks down below | 2230 | * like i_size. This allows the checks down below |
2217 | * generic_file_aio_read() a chance of actually working. | 2231 | * generic_file_aio_read() a chance of actually working. |
2218 | */ | 2232 | */ |
2219 | ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 2233 | ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
2220 | if (ret < 0) { | 2234 | if (ret < 0) { |
@@ -2239,7 +2253,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
2239 | bail: | 2253 | bail: |
2240 | if (have_alloc_sem) | 2254 | if (have_alloc_sem) |
2241 | up_read(&inode->i_alloc_sem); | 2255 | up_read(&inode->i_alloc_sem); |
2242 | if (rw_level != -1) | 2256 | if (rw_level != -1) |
2243 | ocfs2_rw_unlock(inode, rw_level); | 2257 | ocfs2_rw_unlock(inode, rw_level); |
2244 | mlog_exit(ret); | 2258 | mlog_exit(ret); |
2245 | 2259 | ||
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index c6e7213db868..1aa863dd901f 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
@@ -26,7 +26,6 @@ | |||
26 | 26 | ||
27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | ||
30 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
31 | 30 | ||
32 | #define MLOG_MASK_PREFIX ML_SUPER | 31 | #define MLOG_MASK_PREFIX ML_SUPER |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0297fb8982b8..af189887201c 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -25,7 +25,6 @@ | |||
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
30 | #include <linux/pagemap.h> | 29 | #include <linux/pagemap.h> |
31 | #include <linux/quotaops.h> | 30 | #include <linux/quotaops.h> |
@@ -475,7 +474,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
475 | if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { | 474 | if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { |
476 | status = ocfs2_try_open_lock(inode, 0); | 475 | status = ocfs2_try_open_lock(inode, 0); |
477 | if (status) { | 476 | if (status) { |
478 | make_bad_inode(inode); | 477 | make_bad_inode(inode); |
479 | return status; | 478 | return status; |
480 | } | 479 | } |
481 | } | 480 | } |
@@ -559,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
559 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 558 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
560 | if (IS_ERR(handle)) { | 559 | if (IS_ERR(handle)) { |
561 | status = PTR_ERR(handle); | 560 | status = PTR_ERR(handle); |
561 | handle = NULL; | ||
562 | mlog_errno(status); | 562 | mlog_errno(status); |
563 | goto out; | 563 | goto out; |
564 | } | 564 | } |
@@ -640,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
640 | goto bail_unlock; | 640 | goto bail_unlock; |
641 | } | 641 | } |
642 | 642 | ||
643 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, | 643 | if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { |
644 | orphan_dir_bh); | 644 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, |
645 | if (status < 0) { | 645 | orphan_dir_bh); |
646 | mlog_errno(status); | 646 | if (status < 0) { |
647 | goto bail_commit; | 647 | mlog_errno(status); |
648 | goto bail_commit; | ||
649 | } | ||
648 | } | 650 | } |
649 | 651 | ||
650 | /* set the inodes dtime */ | 652 | /* set the inodes dtime */ |
@@ -665,7 +667,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
665 | } | 667 | } |
666 | 668 | ||
667 | ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); | 669 | ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); |
668 | vfs_dq_free_inode(inode); | 670 | dquot_free_inode(inode); |
669 | 671 | ||
670 | status = ocfs2_free_dinode(handle, inode_alloc_inode, | 672 | status = ocfs2_free_dinode(handle, inode_alloc_inode, |
671 | inode_alloc_bh, di); | 673 | inode_alloc_bh, di); |
@@ -684,7 +686,7 @@ bail: | |||
684 | return status; | 686 | return status; |
685 | } | 687 | } |
686 | 688 | ||
687 | /* | 689 | /* |
688 | * Serialize with orphan dir recovery. If the process doing | 690 | * Serialize with orphan dir recovery. If the process doing |
689 | * recovery on this orphan dir does an iget() with the dir | 691 | * recovery on this orphan dir does an iget() with the dir |
690 | * i_mutex held, we'll deadlock here. Instead we detect this | 692 | * i_mutex held, we'll deadlock here. Instead we detect this |
@@ -723,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb, | |||
723 | static int ocfs2_wipe_inode(struct inode *inode, | 725 | static int ocfs2_wipe_inode(struct inode *inode, |
724 | struct buffer_head *di_bh) | 726 | struct buffer_head *di_bh) |
725 | { | 727 | { |
726 | int status, orphaned_slot; | 728 | int status, orphaned_slot = -1; |
727 | struct inode *orphan_dir_inode = NULL; | 729 | struct inode *orphan_dir_inode = NULL; |
728 | struct buffer_head *orphan_dir_bh = NULL; | 730 | struct buffer_head *orphan_dir_bh = NULL; |
729 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 731 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
730 | struct ocfs2_dinode *di; | 732 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; |
731 | 733 | ||
732 | di = (struct ocfs2_dinode *) di_bh->b_data; | 734 | if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { |
733 | orphaned_slot = le16_to_cpu(di->i_orphaned_slot); | 735 | orphaned_slot = le16_to_cpu(di->i_orphaned_slot); |
734 | 736 | ||
735 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); | 737 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); |
736 | if (status) | 738 | if (status) |
737 | return status; | 739 | return status; |
738 | 740 | ||
739 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 741 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
740 | ORPHAN_DIR_SYSTEM_INODE, | 742 | ORPHAN_DIR_SYSTEM_INODE, |
741 | orphaned_slot); | 743 | orphaned_slot); |
742 | if (!orphan_dir_inode) { | 744 | if (!orphan_dir_inode) { |
743 | status = -EEXIST; | 745 | status = -EEXIST; |
744 | mlog_errno(status); | 746 | mlog_errno(status); |
745 | goto bail; | 747 | goto bail; |
746 | } | 748 | } |
747 | 749 | ||
748 | /* Lock the orphan dir. The lock will be held for the entire | 750 | /* Lock the orphan dir. The lock will be held for the entire |
749 | * delete_inode operation. We do this now to avoid races with | 751 | * delete_inode operation. We do this now to avoid races with |
750 | * recovery completion on other nodes. */ | 752 | * recovery completion on other nodes. */ |
751 | mutex_lock(&orphan_dir_inode->i_mutex); | 753 | mutex_lock(&orphan_dir_inode->i_mutex); |
752 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 754 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
753 | if (status < 0) { | 755 | if (status < 0) { |
754 | mutex_unlock(&orphan_dir_inode->i_mutex); | 756 | mutex_unlock(&orphan_dir_inode->i_mutex); |
755 | 757 | ||
756 | mlog_errno(status); | 758 | mlog_errno(status); |
757 | goto bail; | 759 | goto bail; |
760 | } | ||
758 | } | 761 | } |
759 | 762 | ||
760 | /* we do this while holding the orphan dir lock because we | 763 | /* we do this while holding the orphan dir lock because we |
@@ -795,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
795 | mlog_errno(status); | 798 | mlog_errno(status); |
796 | 799 | ||
797 | bail_unlock_dir: | 800 | bail_unlock_dir: |
801 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR) | ||
802 | return status; | ||
803 | |||
798 | ocfs2_inode_unlock(orphan_dir_inode, 1); | 804 | ocfs2_inode_unlock(orphan_dir_inode, 1); |
799 | mutex_unlock(&orphan_dir_inode->i_mutex); | 805 | mutex_unlock(&orphan_dir_inode->i_mutex); |
800 | brelse(orphan_dir_bh); | 806 | brelse(orphan_dir_bh); |
@@ -890,7 +896,23 @@ static int ocfs2_query_inode_wipe(struct inode *inode, | |||
890 | 896 | ||
891 | /* Do some basic inode verification... */ | 897 | /* Do some basic inode verification... */ |
892 | di = (struct ocfs2_dinode *) di_bh->b_data; | 898 | di = (struct ocfs2_dinode *) di_bh->b_data; |
893 | if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { | 899 | if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) && |
900 | !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { | ||
901 | /* | ||
902 | * Inodes in the orphan dir must have ORPHANED_FL. The only | ||
903 | * inodes that come back out of the orphan dir are reflink | ||
904 | * targets. A reflink target may be moved out of the orphan | ||
905 | * dir between the time we scan the directory and the time we | ||
906 | * process it. This would lead to HAS_REFCOUNT_FL being set but | ||
907 | * ORPHANED_FL not. | ||
908 | */ | ||
909 | if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { | ||
910 | mlog(0, "Reflinked inode %llu is no longer orphaned. " | ||
911 | "it shouldn't be deleted\n", | ||
912 | (unsigned long long)oi->ip_blkno); | ||
913 | goto bail; | ||
914 | } | ||
915 | |||
894 | /* for lack of a better error? */ | 916 | /* for lack of a better error? */ |
895 | status = -EEXIST; | 917 | status = -EEXIST; |
896 | mlog(ML_ERROR, | 918 | mlog(ML_ERROR, |
@@ -971,6 +993,8 @@ void ocfs2_delete_inode(struct inode *inode) | |||
971 | goto bail; | 993 | goto bail; |
972 | } | 994 | } |
973 | 995 | ||
996 | dquot_initialize(inode); | ||
997 | |||
974 | if (!ocfs2_inode_is_valid_to_delete(inode)) { | 998 | if (!ocfs2_inode_is_valid_to_delete(inode)) { |
975 | /* It's probably not necessary to truncate_inode_pages | 999 | /* It's probably not necessary to truncate_inode_pages |
976 | * here but we do it for safety anyway (it will most | 1000 | * here but we do it for safety anyway (it will most |
@@ -1087,6 +1111,8 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1087 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 1111 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, |
1088 | "Inode=%lu\n", inode->i_ino); | 1112 | "Inode=%lu\n", inode->i_ino); |
1089 | 1113 | ||
1114 | dquot_drop(inode); | ||
1115 | |||
1090 | /* To preven remote deletes we hold open lock before, now it | 1116 | /* To preven remote deletes we hold open lock before, now it |
1091 | * is time to unlock PR and EX open locks. */ | 1117 | * is time to unlock PR and EX open locks. */ |
1092 | ocfs2_open_unlock(inode); | 1118 | ocfs2_open_unlock(inode); |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index ba4fe07b293c..0b28e1921a39 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -100,6 +100,8 @@ struct ocfs2_inode_info | |||
100 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 | 100 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 |
101 | /* Does someone have the file open O_DIRECT */ | 101 | /* Does someone have the file open O_DIRECT */ |
102 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 | 102 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 |
103 | /* Tell the inode wipe code it's not in orphan dir */ | ||
104 | #define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080 | ||
103 | 105 | ||
104 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) | 106 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) |
105 | { | 107 | { |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 31fbb0619510..7d9d9c132cef 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/compat.h> | ||
10 | 11 | ||
11 | #define MLOG_MASK_PREFIX ML_INODE | 12 | #define MLOG_MASK_PREFIX ML_INODE |
12 | #include <cluster/masklog.h> | 13 | #include <cluster/masklog.h> |
@@ -181,6 +182,10 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
181 | #ifdef CONFIG_COMPAT | 182 | #ifdef CONFIG_COMPAT |
182 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 183 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
183 | { | 184 | { |
185 | bool preserve; | ||
186 | struct reflink_arguments args; | ||
187 | struct inode *inode = file->f_path.dentry->d_inode; | ||
188 | |||
184 | switch (cmd) { | 189 | switch (cmd) { |
185 | case OCFS2_IOC32_GETFLAGS: | 190 | case OCFS2_IOC32_GETFLAGS: |
186 | cmd = OCFS2_IOC_GETFLAGS; | 191 | cmd = OCFS2_IOC_GETFLAGS; |
@@ -195,8 +200,15 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
195 | case OCFS2_IOC_GROUP_EXTEND: | 200 | case OCFS2_IOC_GROUP_EXTEND: |
196 | case OCFS2_IOC_GROUP_ADD: | 201 | case OCFS2_IOC_GROUP_ADD: |
197 | case OCFS2_IOC_GROUP_ADD64: | 202 | case OCFS2_IOC_GROUP_ADD64: |
198 | case OCFS2_IOC_REFLINK: | ||
199 | break; | 203 | break; |
204 | case OCFS2_IOC_REFLINK: | ||
205 | if (copy_from_user(&args, (struct reflink_arguments *)arg, | ||
206 | sizeof(args))) | ||
207 | return -EFAULT; | ||
208 | preserve = (args.preserve != 0); | ||
209 | |||
210 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), | ||
211 | compat_ptr(args.new_path), preserve); | ||
200 | default: | 212 | default: |
201 | return -ENOIOCTLCMD; | 213 | return -ENOIOCTLCMD; |
202 | } | 214 | } |
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h index cf9a5ee30fef..0cd5323bd3f0 100644 --- a/fs/ocfs2/ioctl.h +++ b/fs/ocfs2/ioctl.h | |||
@@ -7,10 +7,10 @@ | |||
7 | * | 7 | * |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #ifndef OCFS2_IOCTL_H | 10 | #ifndef OCFS2_IOCTL_PROTO_H |
11 | #define OCFS2_IOCTL_H | 11 | #define OCFS2_IOCTL_PROTO_H |
12 | 12 | ||
13 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | 13 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
14 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); | 14 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); |
15 | 15 | ||
16 | #endif /* OCFS2_IOCTL_H */ | 16 | #endif /* OCFS2_IOCTL_PROTO_H */ |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 54c16b66327e..9336c60e3a36 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -659,7 +659,7 @@ static int __ocfs2_journal_access(handle_t *handle, | |||
659 | 659 | ||
660 | default: | 660 | default: |
661 | status = -EINVAL; | 661 | status = -EINVAL; |
662 | mlog(ML_ERROR, "Uknown access type!\n"); | 662 | mlog(ML_ERROR, "Unknown access type!\n"); |
663 | } | 663 | } |
664 | if (!status && ocfs2_meta_ecc(osb) && triggers) | 664 | if (!status && ocfs2_meta_ecc(osb) && triggers) |
665 | jbd2_journal_set_triggers(bh, &triggers->ot_triggers); | 665 | jbd2_journal_set_triggers(bh, &triggers->ot_triggers); |
@@ -2034,7 +2034,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
2034 | status = -ENOENT; | 2034 | status = -ENOENT; |
2035 | mlog_errno(status); | 2035 | mlog_errno(status); |
2036 | return status; | 2036 | return status; |
2037 | } | 2037 | } |
2038 | 2038 | ||
2039 | mutex_lock(&orphan_dir_inode->i_mutex); | 2039 | mutex_lock(&orphan_dir_inode->i_mutex); |
2040 | status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); | 2040 | status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ac10f83edb95..c983715d8d8c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -476,7 +476,7 @@ out_mutex: | |||
476 | 476 | ||
477 | out: | 477 | out: |
478 | if (!status) | 478 | if (!status) |
479 | ocfs2_init_inode_steal_slot(osb); | 479 | ocfs2_init_steal_slots(osb); |
480 | mlog_exit(status); | 480 | mlog_exit(status); |
481 | return status; | 481 | return status; |
482 | } | 482 | } |
@@ -872,8 +872,10 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, | |||
872 | (unsigned long long)la_start_blk, | 872 | (unsigned long long)la_start_blk, |
873 | (unsigned long long)blkno); | 873 | (unsigned long long)blkno); |
874 | 874 | ||
875 | status = ocfs2_free_clusters(handle, main_bm_inode, | 875 | status = ocfs2_release_clusters(handle, |
876 | main_bm_bh, blkno, count); | 876 | main_bm_inode, |
877 | main_bm_bh, blkno, | ||
878 | count); | ||
877 | if (status < 0) { | 879 | if (status < 0) { |
878 | mlog_errno(status); | 880 | mlog_errno(status); |
879 | goto bail; | 881 | goto bail; |
@@ -984,8 +986,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |||
984 | } | 986 | } |
985 | 987 | ||
986 | retry_enospc: | 988 | retry_enospc: |
987 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; | 989 | (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; |
988 | |||
989 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | 990 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); |
990 | if (status == -ENOSPC) { | 991 | if (status == -ENOSPC) { |
991 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | 992 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == |
@@ -1061,6 +1062,7 @@ retry_enospc: | |||
1061 | OCFS2_LA_DISABLED) | 1062 | OCFS2_LA_DISABLED) |
1062 | goto bail; | 1063 | goto bail; |
1063 | 1064 | ||
1065 | ac->ac_bits_wanted = osb->local_alloc_default_bits; | ||
1064 | status = ocfs2_claim_clusters(osb, handle, ac, | 1066 | status = ocfs2_claim_clusters(osb, handle, ac, |
1065 | osb->local_alloc_bits, | 1067 | osb->local_alloc_bits, |
1066 | &cluster_off, | 1068 | &cluster_off, |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 544ac6245175..b5cb3ede9408 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c | |||
@@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) | |||
133 | 133 | ||
134 | if (!(fl->fl_flags & FL_POSIX)) | 134 | if (!(fl->fl_flags & FL_POSIX)) |
135 | return -ENOLCK; | 135 | return -ENOLCK; |
136 | if (__mandatory_lock(inode)) | 136 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) |
137 | return -ENOLCK; | 137 | return -ENOLCK; |
138 | 138 | ||
139 | return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); | 139 | return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 39737613424a..7898bd3a99f5 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -25,7 +25,6 @@ | |||
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
30 | #include <linux/pagemap.h> | 29 | #include <linux/pagemap.h> |
31 | #include <linux/uio.h> | 30 | #include <linux/uio.h> |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f010b22b1c44..4cbb18f26c5f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
84 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 84 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
85 | handle_t *handle, | 85 | handle_t *handle, |
86 | struct inode *inode, | 86 | struct inode *inode, |
87 | struct ocfs2_dinode *fe, | 87 | struct buffer_head *fe_bh, |
88 | char *name, | 88 | char *name, |
89 | struct ocfs2_dir_lookup_result *lookup, | 89 | struct ocfs2_dir_lookup_result *lookup, |
90 | struct inode *orphan_dir_inode); | 90 | struct inode *orphan_dir_inode); |
@@ -212,7 +212,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) | |||
212 | } else | 212 | } else |
213 | inode->i_gid = current_fsgid(); | 213 | inode->i_gid = current_fsgid(); |
214 | inode->i_mode = mode; | 214 | inode->i_mode = mode; |
215 | vfs_dq_init(inode); | 215 | dquot_initialize(inode); |
216 | return inode; | 216 | return inode; |
217 | } | 217 | } |
218 | 218 | ||
@@ -244,6 +244,8 @@ static int ocfs2_mknod(struct inode *dir, | |||
244 | (unsigned long)dev, dentry->d_name.len, | 244 | (unsigned long)dev, dentry->d_name.len, |
245 | dentry->d_name.name); | 245 | dentry->d_name.name); |
246 | 246 | ||
247 | dquot_initialize(dir); | ||
248 | |||
247 | /* get our super block */ | 249 | /* get our super block */ |
248 | osb = OCFS2_SB(dir->i_sb); | 250 | osb = OCFS2_SB(dir->i_sb); |
249 | 251 | ||
@@ -348,13 +350,9 @@ static int ocfs2_mknod(struct inode *dir, | |||
348 | goto leave; | 350 | goto leave; |
349 | } | 351 | } |
350 | 352 | ||
351 | /* We don't use standard VFS wrapper because we don't want vfs_dq_init | 353 | status = dquot_alloc_inode(inode); |
352 | * to be called. */ | 354 | if (status) |
353 | if (sb_any_quota_active(osb->sb) && | ||
354 | osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { | ||
355 | status = -EDQUOT; | ||
356 | goto leave; | 355 | goto leave; |
357 | } | ||
358 | did_quota_inode = 1; | 356 | did_quota_inode = 1; |
359 | 357 | ||
360 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, | 358 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, |
@@ -410,28 +408,33 @@ static int ocfs2_mknod(struct inode *dir, | |||
410 | } | 408 | } |
411 | } | 409 | } |
412 | 410 | ||
413 | status = ocfs2_add_entry(handle, dentry, inode, | 411 | /* |
414 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, | 412 | * Do this before adding the entry to the directory. We add |
415 | &lookup); | 413 | * also set d_op after success so that ->d_iput() will cleanup |
416 | if (status < 0) { | 414 | * the dentry lock even if ocfs2_add_entry() fails below. |
415 | */ | ||
416 | status = ocfs2_dentry_attach_lock(dentry, inode, | ||
417 | OCFS2_I(dir)->ip_blkno); | ||
418 | if (status) { | ||
417 | mlog_errno(status); | 419 | mlog_errno(status); |
418 | goto leave; | 420 | goto leave; |
419 | } | 421 | } |
422 | dentry->d_op = &ocfs2_dentry_ops; | ||
420 | 423 | ||
421 | status = ocfs2_dentry_attach_lock(dentry, inode, | 424 | status = ocfs2_add_entry(handle, dentry, inode, |
422 | OCFS2_I(dir)->ip_blkno); | 425 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, |
423 | if (status) { | 426 | &lookup); |
427 | if (status < 0) { | ||
424 | mlog_errno(status); | 428 | mlog_errno(status); |
425 | goto leave; | 429 | goto leave; |
426 | } | 430 | } |
427 | 431 | ||
428 | insert_inode_hash(inode); | 432 | insert_inode_hash(inode); |
429 | dentry->d_op = &ocfs2_dentry_ops; | ||
430 | d_instantiate(dentry, inode); | 433 | d_instantiate(dentry, inode); |
431 | status = 0; | 434 | status = 0; |
432 | leave: | 435 | leave: |
433 | if (status < 0 && did_quota_inode) | 436 | if (status < 0 && did_quota_inode) |
434 | vfs_dq_free_inode(inode); | 437 | dquot_free_inode(inode); |
435 | if (handle) | 438 | if (handle) |
436 | ocfs2_commit_trans(osb, handle); | 439 | ocfs2_commit_trans(osb, handle); |
437 | 440 | ||
@@ -447,11 +450,6 @@ leave: | |||
447 | 450 | ||
448 | ocfs2_free_dir_lookup_result(&lookup); | 451 | ocfs2_free_dir_lookup_result(&lookup); |
449 | 452 | ||
450 | if ((status < 0) && inode) { | ||
451 | clear_nlink(inode); | ||
452 | iput(inode); | ||
453 | } | ||
454 | |||
455 | if (inode_ac) | 453 | if (inode_ac) |
456 | ocfs2_free_alloc_context(inode_ac); | 454 | ocfs2_free_alloc_context(inode_ac); |
457 | 455 | ||
@@ -461,6 +459,17 @@ leave: | |||
461 | if (meta_ac) | 459 | if (meta_ac) |
462 | ocfs2_free_alloc_context(meta_ac); | 460 | ocfs2_free_alloc_context(meta_ac); |
463 | 461 | ||
462 | /* | ||
463 | * We should call iput after the i_mutex of the bitmap been | ||
464 | * unlocked in ocfs2_free_alloc_context, or the | ||
465 | * ocfs2_delete_inode will mutex_lock again. | ||
466 | */ | ||
467 | if ((status < 0) && inode) { | ||
468 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | ||
469 | clear_nlink(inode); | ||
470 | iput(inode); | ||
471 | } | ||
472 | |||
464 | mlog_exit(status); | 473 | mlog_exit(status); |
465 | 474 | ||
466 | return status; | 475 | return status; |
@@ -636,6 +645,8 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
636 | if (S_ISDIR(inode->i_mode)) | 645 | if (S_ISDIR(inode->i_mode)) |
637 | return -EPERM; | 646 | return -EPERM; |
638 | 647 | ||
648 | dquot_initialize(dir); | ||
649 | |||
639 | err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); | 650 | err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); |
640 | if (err < 0) { | 651 | if (err < 0) { |
641 | if (err != -ENOENT) | 652 | if (err != -ENOENT) |
@@ -791,6 +802,8 @@ static int ocfs2_unlink(struct inode *dir, | |||
791 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, | 802 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, |
792 | dentry->d_name.len, dentry->d_name.name); | 803 | dentry->d_name.len, dentry->d_name.name); |
793 | 804 | ||
805 | dquot_initialize(dir); | ||
806 | |||
794 | BUG_ON(dentry->d_parent->d_inode != dir); | 807 | BUG_ON(dentry->d_parent->d_inode != dir); |
795 | 808 | ||
796 | mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); | 809 | mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); |
@@ -877,7 +890,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
877 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 890 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
878 | 891 | ||
879 | if (inode_is_unlinkable(inode)) { | 892 | if (inode_is_unlinkable(inode)) { |
880 | status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, | 893 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, |
881 | &orphan_insert, orphan_dir); | 894 | &orphan_insert, orphan_dir); |
882 | if (status < 0) { | 895 | if (status < 0) { |
883 | mlog_errno(status); | 896 | mlog_errno(status); |
@@ -1051,6 +1064,9 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1051 | old_dentry->d_name.len, old_dentry->d_name.name, | 1064 | old_dentry->d_name.len, old_dentry->d_name.name, |
1052 | new_dentry->d_name.len, new_dentry->d_name.name); | 1065 | new_dentry->d_name.len, new_dentry->d_name.name); |
1053 | 1066 | ||
1067 | dquot_initialize(old_dir); | ||
1068 | dquot_initialize(new_dir); | ||
1069 | |||
1054 | osb = OCFS2_SB(old_dir->i_sb); | 1070 | osb = OCFS2_SB(old_dir->i_sb); |
1055 | 1071 | ||
1056 | if (new_inode) { | 1072 | if (new_inode) { |
@@ -1295,7 +1311,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1295 | if (S_ISDIR(new_inode->i_mode) || | 1311 | if (S_ISDIR(new_inode->i_mode) || |
1296 | (ocfs2_read_links_count(newfe) == 1)) { | 1312 | (ocfs2_read_links_count(newfe) == 1)) { |
1297 | status = ocfs2_orphan_add(osb, handle, new_inode, | 1313 | status = ocfs2_orphan_add(osb, handle, new_inode, |
1298 | newfe, orphan_name, | 1314 | newfe_bh, orphan_name, |
1299 | &orphan_insert, orphan_dir); | 1315 | &orphan_insert, orphan_dir); |
1300 | if (status < 0) { | 1316 | if (status < 0) { |
1301 | mlog_errno(status); | 1317 | mlog_errno(status); |
@@ -1599,6 +1615,8 @@ static int ocfs2_symlink(struct inode *dir, | |||
1599 | mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, | 1615 | mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, |
1600 | dentry, symname, dentry->d_name.len, dentry->d_name.name); | 1616 | dentry, symname, dentry->d_name.len, dentry->d_name.name); |
1601 | 1617 | ||
1618 | dquot_initialize(dir); | ||
1619 | |||
1602 | sb = dir->i_sb; | 1620 | sb = dir->i_sb; |
1603 | osb = OCFS2_SB(sb); | 1621 | osb = OCFS2_SB(sb); |
1604 | 1622 | ||
@@ -1688,13 +1706,9 @@ static int ocfs2_symlink(struct inode *dir, | |||
1688 | goto bail; | 1706 | goto bail; |
1689 | } | 1707 | } |
1690 | 1708 | ||
1691 | /* We don't use standard VFS wrapper because we don't want vfs_dq_init | 1709 | status = dquot_alloc_inode(inode); |
1692 | * to be called. */ | 1710 | if (status) |
1693 | if (sb_any_quota_active(osb->sb) && | ||
1694 | osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { | ||
1695 | status = -EDQUOT; | ||
1696 | goto bail; | 1711 | goto bail; |
1697 | } | ||
1698 | did_quota_inode = 1; | 1712 | did_quota_inode = 1; |
1699 | 1713 | ||
1700 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, | 1714 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, |
@@ -1716,11 +1730,10 @@ static int ocfs2_symlink(struct inode *dir, | |||
1716 | u32 offset = 0; | 1730 | u32 offset = 0; |
1717 | 1731 | ||
1718 | inode->i_op = &ocfs2_symlink_inode_operations; | 1732 | inode->i_op = &ocfs2_symlink_inode_operations; |
1719 | if (vfs_dq_alloc_space_nodirty(inode, | 1733 | status = dquot_alloc_space_nodirty(inode, |
1720 | ocfs2_clusters_to_bytes(osb->sb, 1))) { | 1734 | ocfs2_clusters_to_bytes(osb->sb, 1)); |
1721 | status = -EDQUOT; | 1735 | if (status) |
1722 | goto bail; | 1736 | goto bail; |
1723 | } | ||
1724 | did_quota = 1; | 1737 | did_quota = 1; |
1725 | status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, | 1738 | status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, |
1726 | new_fe_bh, | 1739 | new_fe_bh, |
@@ -1769,29 +1782,34 @@ static int ocfs2_symlink(struct inode *dir, | |||
1769 | } | 1782 | } |
1770 | } | 1783 | } |
1771 | 1784 | ||
1772 | status = ocfs2_add_entry(handle, dentry, inode, | 1785 | /* |
1773 | le64_to_cpu(fe->i_blkno), parent_fe_bh, | 1786 | * Do this before adding the entry to the directory. We add |
1774 | &lookup); | 1787 | * also set d_op after success so that ->d_iput() will cleanup |
1775 | if (status < 0) { | 1788 | * the dentry lock even if ocfs2_add_entry() fails below. |
1789 | */ | ||
1790 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | ||
1791 | if (status) { | ||
1776 | mlog_errno(status); | 1792 | mlog_errno(status); |
1777 | goto bail; | 1793 | goto bail; |
1778 | } | 1794 | } |
1795 | dentry->d_op = &ocfs2_dentry_ops; | ||
1779 | 1796 | ||
1780 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | 1797 | status = ocfs2_add_entry(handle, dentry, inode, |
1781 | if (status) { | 1798 | le64_to_cpu(fe->i_blkno), parent_fe_bh, |
1799 | &lookup); | ||
1800 | if (status < 0) { | ||
1782 | mlog_errno(status); | 1801 | mlog_errno(status); |
1783 | goto bail; | 1802 | goto bail; |
1784 | } | 1803 | } |
1785 | 1804 | ||
1786 | insert_inode_hash(inode); | 1805 | insert_inode_hash(inode); |
1787 | dentry->d_op = &ocfs2_dentry_ops; | ||
1788 | d_instantiate(dentry, inode); | 1806 | d_instantiate(dentry, inode); |
1789 | bail: | 1807 | bail: |
1790 | if (status < 0 && did_quota) | 1808 | if (status < 0 && did_quota) |
1791 | vfs_dq_free_space_nodirty(inode, | 1809 | dquot_free_space_nodirty(inode, |
1792 | ocfs2_clusters_to_bytes(osb->sb, 1)); | 1810 | ocfs2_clusters_to_bytes(osb->sb, 1)); |
1793 | if (status < 0 && did_quota_inode) | 1811 | if (status < 0 && did_quota_inode) |
1794 | vfs_dq_free_inode(inode); | 1812 | dquot_free_inode(inode); |
1795 | if (handle) | 1813 | if (handle) |
1796 | ocfs2_commit_trans(osb, handle); | 1814 | ocfs2_commit_trans(osb, handle); |
1797 | 1815 | ||
@@ -1809,6 +1827,7 @@ bail: | |||
1809 | if (xattr_ac) | 1827 | if (xattr_ac) |
1810 | ocfs2_free_alloc_context(xattr_ac); | 1828 | ocfs2_free_alloc_context(xattr_ac); |
1811 | if ((status < 0) && inode) { | 1829 | if ((status < 0) && inode) { |
1830 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | ||
1812 | clear_nlink(inode); | 1831 | clear_nlink(inode); |
1813 | iput(inode); | 1832 | iput(inode); |
1814 | } | 1833 | } |
@@ -1909,7 +1928,7 @@ leave: | |||
1909 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 1928 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
1910 | handle_t *handle, | 1929 | handle_t *handle, |
1911 | struct inode *inode, | 1930 | struct inode *inode, |
1912 | struct ocfs2_dinode *fe, | 1931 | struct buffer_head *fe_bh, |
1913 | char *name, | 1932 | char *name, |
1914 | struct ocfs2_dir_lookup_result *lookup, | 1933 | struct ocfs2_dir_lookup_result *lookup, |
1915 | struct inode *orphan_dir_inode) | 1934 | struct inode *orphan_dir_inode) |
@@ -1917,6 +1936,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1917 | struct buffer_head *orphan_dir_bh = NULL; | 1936 | struct buffer_head *orphan_dir_bh = NULL; |
1918 | int status = 0; | 1937 | int status = 0; |
1919 | struct ocfs2_dinode *orphan_fe; | 1938 | struct ocfs2_dinode *orphan_fe; |
1939 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
1920 | 1940 | ||
1921 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); | 1941 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); |
1922 | 1942 | ||
@@ -1957,13 +1977,31 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1957 | goto leave; | 1977 | goto leave; |
1958 | } | 1978 | } |
1959 | 1979 | ||
1980 | /* | ||
1981 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
1982 | * It's safe anyway, though some callers may duplicate the journaling. | ||
1983 | * Journaling within the func just make the logic look more | ||
1984 | * straightforward. | ||
1985 | */ | ||
1986 | status = ocfs2_journal_access_di(handle, | ||
1987 | INODE_CACHE(inode), | ||
1988 | fe_bh, | ||
1989 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1990 | if (status < 0) { | ||
1991 | mlog_errno(status); | ||
1992 | goto leave; | ||
1993 | } | ||
1994 | |||
1960 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); | 1995 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); |
1996 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; | ||
1961 | 1997 | ||
1962 | /* Record which orphan dir our inode now resides | 1998 | /* Record which orphan dir our inode now resides |
1963 | * in. delete_inode will use this to determine which orphan | 1999 | * in. delete_inode will use this to determine which orphan |
1964 | * dir to lock. */ | 2000 | * dir to lock. */ |
1965 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); | 2001 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); |
1966 | 2002 | ||
2003 | ocfs2_journal_dirty(handle, fe_bh); | ||
2004 | |||
1967 | mlog(0, "Inode %llu orphaned in slot %d\n", | 2005 | mlog(0, "Inode %llu orphaned in slot %d\n", |
1968 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); | 2006 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); |
1969 | 2007 | ||
@@ -2099,15 +2137,12 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2099 | goto leave; | 2137 | goto leave; |
2100 | } | 2138 | } |
2101 | 2139 | ||
2102 | /* We don't use standard VFS wrapper because we don't want vfs_dq_init | 2140 | status = dquot_alloc_inode(inode); |
2103 | * to be called. */ | 2141 | if (status) |
2104 | if (sb_any_quota_active(osb->sb) && | ||
2105 | osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { | ||
2106 | status = -EDQUOT; | ||
2107 | goto leave; | 2142 | goto leave; |
2108 | } | ||
2109 | did_quota_inode = 1; | 2143 | did_quota_inode = 1; |
2110 | 2144 | ||
2145 | inode->i_nlink = 0; | ||
2111 | /* do the real work now. */ | 2146 | /* do the real work now. */ |
2112 | status = ocfs2_mknod_locked(osb, dir, inode, | 2147 | status = ocfs2_mknod_locked(osb, dir, inode, |
2113 | 0, &new_di_bh, parent_di_bh, handle, | 2148 | 0, &new_di_bh, parent_di_bh, handle, |
@@ -2124,7 +2159,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2124 | } | 2159 | } |
2125 | 2160 | ||
2126 | di = (struct ocfs2_dinode *)new_di_bh->b_data; | 2161 | di = (struct ocfs2_dinode *)new_di_bh->b_data; |
2127 | status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name, | 2162 | status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, |
2128 | &orphan_insert, orphan_dir); | 2163 | &orphan_insert, orphan_dir); |
2129 | if (status < 0) { | 2164 | if (status < 0) { |
2130 | mlog_errno(status); | 2165 | mlog_errno(status); |
@@ -2136,9 +2171,10 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2136 | if (status < 0) | 2171 | if (status < 0) |
2137 | mlog_errno(status); | 2172 | mlog_errno(status); |
2138 | 2173 | ||
2174 | insert_inode_hash(inode); | ||
2139 | leave: | 2175 | leave: |
2140 | if (status < 0 && did_quota_inode) | 2176 | if (status < 0 && did_quota_inode) |
2141 | vfs_dq_free_inode(inode); | 2177 | dquot_free_inode(inode); |
2142 | if (handle) | 2178 | if (handle) |
2143 | ocfs2_commit_trans(osb, handle); | 2179 | ocfs2_commit_trans(osb, handle); |
2144 | 2180 | ||
@@ -2267,6 +2303,8 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
2267 | di = (struct ocfs2_dinode *)di_bh->b_data; | 2303 | di = (struct ocfs2_dinode *)di_bh->b_data; |
2268 | le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); | 2304 | le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); |
2269 | di->i_orphaned_slot = 0; | 2305 | di->i_orphaned_slot = 0; |
2306 | inode->i_nlink = 1; | ||
2307 | ocfs2_set_links_count(di, inode->i_nlink); | ||
2270 | ocfs2_journal_dirty(handle, di_bh); | 2308 | ocfs2_journal_dirty(handle, di_bh); |
2271 | 2309 | ||
2272 | status = ocfs2_add_entry(handle, dentry, inode, | 2310 | status = ocfs2_add_entry(handle, dentry, inode, |
@@ -2284,7 +2322,6 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
2284 | goto out_commit; | 2322 | goto out_commit; |
2285 | } | 2323 | } |
2286 | 2324 | ||
2287 | insert_inode_hash(inode); | ||
2288 | dentry->d_op = &ocfs2_dentry_ops; | 2325 | dentry->d_op = &ocfs2_dentry_ops; |
2289 | d_instantiate(dentry, inode); | 2326 | d_instantiate(dentry, inode); |
2290 | status = 0; | 2327 | status = 0; |
@@ -2326,4 +2363,5 @@ const struct inode_operations ocfs2_dir_iops = { | |||
2326 | .getxattr = generic_getxattr, | 2363 | .getxattr = generic_getxattr, |
2327 | .listxattr = ocfs2_listxattr, | 2364 | .listxattr = ocfs2_listxattr, |
2328 | .removexattr = generic_removexattr, | 2365 | .removexattr = generic_removexattr, |
2366 | .fiemap = ocfs2_fiemap, | ||
2329 | }; | 2367 | }; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d963d8638709..adf5e2ebc2c4 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -42,6 +42,7 @@ | |||
42 | 42 | ||
43 | #include "ocfs2_fs.h" | 43 | #include "ocfs2_fs.h" |
44 | #include "ocfs2_lockid.h" | 44 | #include "ocfs2_lockid.h" |
45 | #include "ocfs2_ioctl.h" | ||
45 | 46 | ||
46 | /* For struct ocfs2_blockcheck_stats */ | 47 | /* For struct ocfs2_blockcheck_stats */ |
47 | #include "blockcheck.h" | 48 | #include "blockcheck.h" |
@@ -136,6 +137,10 @@ enum ocfs2_unlock_action { | |||
136 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a | 137 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a |
137 | call to dlm_lock. Only | 138 | call to dlm_lock. Only |
138 | exists with BUSY set. */ | 139 | exists with BUSY set. */ |
140 | #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread | ||
141 | * from downconverting | ||
142 | * before the upconvert | ||
143 | * has completed */ | ||
139 | 144 | ||
140 | struct ocfs2_lock_res_ops; | 145 | struct ocfs2_lock_res_ops; |
141 | 146 | ||
@@ -155,7 +160,7 @@ struct ocfs2_lock_res { | |||
155 | int l_level; | 160 | int l_level; |
156 | unsigned int l_ro_holders; | 161 | unsigned int l_ro_holders; |
157 | unsigned int l_ex_holders; | 162 | unsigned int l_ex_holders; |
158 | union ocfs2_dlm_lksb l_lksb; | 163 | struct ocfs2_dlm_lksb l_lksb; |
159 | 164 | ||
160 | /* used from AST/BAST funcs. */ | 165 | /* used from AST/BAST funcs. */ |
161 | enum ocfs2_ast_action l_action; | 166 | enum ocfs2_ast_action l_action; |
@@ -245,9 +250,11 @@ enum ocfs2_mount_options | |||
245 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | 250 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ |
246 | OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ | 251 | OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ |
247 | OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ | 252 | OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ |
248 | OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ | 253 | OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */ |
249 | OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */ | 254 | OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access |
250 | OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ | 255 | control lists */ |
256 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ | ||
257 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ | ||
251 | }; | 258 | }; |
252 | 259 | ||
253 | #define OCFS2_OSB_SOFT_RO 0x0001 | 260 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -299,7 +306,9 @@ struct ocfs2_super | |||
299 | u32 s_next_generation; | 306 | u32 s_next_generation; |
300 | unsigned long osb_flags; | 307 | unsigned long osb_flags; |
301 | s16 s_inode_steal_slot; | 308 | s16 s_inode_steal_slot; |
309 | s16 s_meta_steal_slot; | ||
302 | atomic_t s_num_inodes_stolen; | 310 | atomic_t s_num_inodes_stolen; |
311 | atomic_t s_num_meta_stolen; | ||
303 | 312 | ||
304 | unsigned long s_mount_opt; | 313 | unsigned long s_mount_opt; |
305 | unsigned int s_atime_quantum; | 314 | unsigned int s_atime_quantum; |
@@ -754,35 +763,18 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, | |||
754 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); | 763 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); |
755 | } | 764 | } |
756 | 765 | ||
757 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | 766 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) |
758 | { | 767 | { |
759 | spin_lock(&osb->osb_lock); | 768 | ext2_set_bit(bit, bitmap); |
760 | osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; | ||
761 | spin_unlock(&osb->osb_lock); | ||
762 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
763 | } | 769 | } |
770 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) | ||
764 | 771 | ||
765 | static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, | 772 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) |
766 | s16 slot) | ||
767 | { | 773 | { |
768 | spin_lock(&osb->osb_lock); | 774 | ext2_clear_bit(bit, bitmap); |
769 | osb->s_inode_steal_slot = slot; | ||
770 | spin_unlock(&osb->osb_lock); | ||
771 | } | ||
772 | |||
773 | static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) | ||
774 | { | ||
775 | s16 slot; | ||
776 | |||
777 | spin_lock(&osb->osb_lock); | ||
778 | slot = osb->s_inode_steal_slot; | ||
779 | spin_unlock(&osb->osb_lock); | ||
780 | |||
781 | return slot; | ||
782 | } | 775 | } |
776 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) | ||
783 | 777 | ||
784 | #define ocfs2_set_bit ext2_set_bit | ||
785 | #define ocfs2_clear_bit ext2_clear_bit | ||
786 | #define ocfs2_test_bit ext2_test_bit | 778 | #define ocfs2_test_bit ext2_test_bit |
787 | #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit | 779 | #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit |
788 | #define ocfs2_find_next_bit ext2_find_next_bit | 780 | #define ocfs2_find_next_bit ext2_find_next_bit |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index e9431e4a5e7c..bb37218a7978 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -254,63 +254,6 @@ | |||
254 | * refcount tree */ | 254 | * refcount tree */ |
255 | 255 | ||
256 | /* | 256 | /* |
257 | * ioctl commands | ||
258 | */ | ||
259 | #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) | ||
260 | #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) | ||
261 | #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) | ||
262 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | ||
263 | |||
264 | /* | ||
265 | * Space reservation / allocation / free ioctls and argument structure | ||
266 | * are designed to be compatible with XFS. | ||
267 | * | ||
268 | * ALLOCSP* and FREESP* are not and will never be supported, but are | ||
269 | * included here for completeness. | ||
270 | */ | ||
271 | struct ocfs2_space_resv { | ||
272 | __s16 l_type; | ||
273 | __s16 l_whence; | ||
274 | __s64 l_start; | ||
275 | __s64 l_len; /* len == 0 means until end of file */ | ||
276 | __s32 l_sysid; | ||
277 | __u32 l_pid; | ||
278 | __s32 l_pad[4]; /* reserve area */ | ||
279 | }; | ||
280 | |||
281 | #define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) | ||
282 | #define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) | ||
283 | #define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) | ||
284 | #define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) | ||
285 | #define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) | ||
286 | #define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) | ||
287 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | ||
288 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | ||
289 | |||
290 | /* Used to pass group descriptor data when online resize is done */ | ||
291 | struct ocfs2_new_group_input { | ||
292 | __u64 group; /* Group descriptor's blkno. */ | ||
293 | __u32 clusters; /* Total number of clusters in this group */ | ||
294 | __u32 frees; /* Total free clusters in this group */ | ||
295 | __u16 chain; /* Chain for this group */ | ||
296 | __u16 reserved1; | ||
297 | __u32 reserved2; | ||
298 | }; | ||
299 | |||
300 | #define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) | ||
301 | #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) | ||
302 | #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) | ||
303 | |||
304 | /* Used to pass 2 file names to reflink. */ | ||
305 | struct reflink_arguments { | ||
306 | __u64 old_path; | ||
307 | __u64 new_path; | ||
308 | __u64 preserve; | ||
309 | }; | ||
310 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | ||
311 | |||
312 | |||
313 | /* | ||
314 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 257 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) |
315 | */ | 258 | */ |
316 | #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ | 259 | #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ |
@@ -1202,7 +1145,7 @@ struct ocfs2_local_disk_dqinfo { | |||
1202 | /* Header of one chunk of a quota file */ | 1145 | /* Header of one chunk of a quota file */ |
1203 | struct ocfs2_local_disk_chunk { | 1146 | struct ocfs2_local_disk_chunk { |
1204 | __le32 dqc_free; /* Number of free entries in the bitmap */ | 1147 | __le32 dqc_free; /* Number of free entries in the bitmap */ |
1205 | u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding | 1148 | __u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding |
1206 | * chunk of quota file */ | 1149 | * chunk of quota file */ |
1207 | }; | 1150 | }; |
1208 | 1151 | ||
@@ -1417,9 +1360,16 @@ static inline int ocfs2_fast_symlink_chars(int blocksize) | |||
1417 | return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); | 1360 | return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); |
1418 | } | 1361 | } |
1419 | 1362 | ||
1420 | static inline int ocfs2_max_inline_data(int blocksize) | 1363 | static inline int ocfs2_max_inline_data_with_xattr(int blocksize, |
1364 | struct ocfs2_dinode *di) | ||
1421 | { | 1365 | { |
1422 | return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data); | 1366 | if (di && (di->i_dyn_features & OCFS2_INLINE_XATTR_FL)) |
1367 | return blocksize - | ||
1368 | offsetof(struct ocfs2_dinode, id2.i_data.id_data) - | ||
1369 | di->i_xattr_inline_size; | ||
1370 | else | ||
1371 | return blocksize - | ||
1372 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
1423 | } | 1373 | } |
1424 | 1374 | ||
1425 | static inline int ocfs2_extent_recs_per_inode(int blocksize) | 1375 | static inline int ocfs2_extent_recs_per_inode(int blocksize) |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h new file mode 100644 index 000000000000..2d3420af1a83 --- /dev/null +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -0,0 +1,79 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ocfs2_ioctl.h | ||
5 | * | ||
6 | * Defines OCFS2 ioctls. | ||
7 | * | ||
8 | * Copyright (C) 2010 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License, version 2, as published by the Free Software Foundation. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | #ifndef OCFS2_IOCTL_H | ||
21 | #define OCFS2_IOCTL_H | ||
22 | |||
23 | /* | ||
24 | * ioctl commands | ||
25 | */ | ||
26 | #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) | ||
27 | #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) | ||
28 | #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) | ||
29 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | ||
30 | |||
31 | /* | ||
32 | * Space reservation / allocation / free ioctls and argument structure | ||
33 | * are designed to be compatible with XFS. | ||
34 | * | ||
35 | * ALLOCSP* and FREESP* are not and will never be supported, but are | ||
36 | * included here for completeness. | ||
37 | */ | ||
38 | struct ocfs2_space_resv { | ||
39 | __s16 l_type; | ||
40 | __s16 l_whence; | ||
41 | __s64 l_start; | ||
42 | __s64 l_len; /* len == 0 means until end of file */ | ||
43 | __s32 l_sysid; | ||
44 | __u32 l_pid; | ||
45 | __s32 l_pad[4]; /* reserve area */ | ||
46 | }; | ||
47 | |||
48 | #define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) | ||
49 | #define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) | ||
50 | #define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) | ||
51 | #define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) | ||
52 | #define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) | ||
53 | #define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) | ||
54 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | ||
55 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | ||
56 | |||
57 | /* Used to pass group descriptor data when online resize is done */ | ||
58 | struct ocfs2_new_group_input { | ||
59 | __u64 group; /* Group descriptor's blkno. */ | ||
60 | __u32 clusters; /* Total number of clusters in this group */ | ||
61 | __u32 frees; /* Total free clusters in this group */ | ||
62 | __u16 chain; /* Chain for this group */ | ||
63 | __u16 reserved1; | ||
64 | __u32 reserved2; | ||
65 | }; | ||
66 | |||
67 | #define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) | ||
68 | #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) | ||
69 | #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) | ||
70 | |||
71 | /* Used to pass 2 file names to reflink. */ | ||
72 | struct reflink_arguments { | ||
73 | __u64 old_path; | ||
74 | __u64 new_path; | ||
75 | __u64 preserve; | ||
76 | }; | ||
77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | ||
78 | |||
79 | #endif /* OCFS2_IOCTL_H */ | ||
diff --git a/fs/ocfs2/ocfs2_lockingver.h b/fs/ocfs2/ocfs2_lockingver.h index 82d5eeac0fff..2e45c8d2ea7e 100644 --- a/fs/ocfs2/ocfs2_lockingver.h +++ b/fs/ocfs2/ocfs2_lockingver.h | |||
@@ -23,6 +23,8 @@ | |||
23 | /* | 23 | /* |
24 | * The protocol version for ocfs2 cluster locking. See dlmglue.c for | 24 | * The protocol version for ocfs2 cluster locking. See dlmglue.c for |
25 | * more details. | 25 | * more details. |
26 | * | ||
27 | * 1.0 - Initial locking version from ocfs2 1.4. | ||
26 | */ | 28 | */ |
27 | #define OCFS2_LOCKING_PROTOCOL_MAJOR 1 | 29 | #define OCFS2_LOCKING_PROTOCOL_MAJOR 1 |
28 | #define OCFS2_LOCKING_PROTOCOL_MINOR 0 | 30 | #define OCFS2_LOCKING_PROTOCOL_MINOR 0 |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index e5df9d170b0c..123bc520a2c0 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -17,10 +17,6 @@ | |||
17 | 17 | ||
18 | #include "ocfs2.h" | 18 | #include "ocfs2.h" |
19 | 19 | ||
20 | /* Common stuff */ | ||
21 | /* id number of quota format */ | ||
22 | #define QFMT_OCFS2 3 | ||
23 | |||
24 | /* | 20 | /* |
25 | * In-memory structures | 21 | * In-memory structures |
26 | */ | 22 | */ |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b437dc0c4cad..ab42a74c7539 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -3,6 +3,7 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/fs.h> | 5 | #include <linux/fs.h> |
6 | #include <linux/slab.h> | ||
6 | #include <linux/quota.h> | 7 | #include <linux/quota.h> |
7 | #include <linux/quotaops.h> | 8 | #include <linux/quotaops.h> |
8 | #include <linux/dqblk_qtree.h> | 9 | #include <linux/dqblk_qtree.h> |
@@ -851,13 +852,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot) | |||
851 | } | 852 | } |
852 | 853 | ||
853 | const struct dquot_operations ocfs2_quota_operations = { | 854 | const struct dquot_operations ocfs2_quota_operations = { |
854 | .initialize = dquot_initialize, | ||
855 | .drop = dquot_drop, | ||
856 | .alloc_space = dquot_alloc_space, | ||
857 | .alloc_inode = dquot_alloc_inode, | ||
858 | .free_space = dquot_free_space, | ||
859 | .free_inode = dquot_free_inode, | ||
860 | .transfer = dquot_transfer, | ||
861 | .write_dquot = ocfs2_write_dquot, | 855 | .write_dquot = ocfs2_write_dquot, |
862 | .acquire_dquot = ocfs2_acquire_dquot, | 856 | .acquire_dquot = ocfs2_acquire_dquot, |
863 | .release_dquot = ocfs2_release_dquot, | 857 | .release_dquot = ocfs2_release_dquot, |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 1a2c50a759fa..9ad49305f450 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -3,6 +3,7 @@ | |||
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/fs.h> | 5 | #include <linux/fs.h> |
6 | #include <linux/slab.h> | ||
6 | #include <linux/quota.h> | 7 | #include <linux/quota.h> |
7 | #include <linux/quotaops.h> | 8 | #include <linux/quotaops.h> |
8 | #include <linux/module.h> | 9 | #include <linux/module.h> |
@@ -457,7 +458,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, | |||
457 | break; | 458 | break; |
458 | } | 459 | } |
459 | dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data; | 460 | dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data; |
460 | for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) { | 461 | for_each_set_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) { |
461 | qbh = NULL; | 462 | qbh = NULL; |
462 | status = ocfs2_read_quota_block(lqinode, | 463 | status = ocfs2_read_quota_block(lqinode, |
463 | ol_dqblk_block(sb, chunk, bit), | 464 | ol_dqblk_block(sb, chunk, bit), |
@@ -1325,7 +1326,7 @@ out: | |||
1325 | return status; | 1326 | return status; |
1326 | } | 1327 | } |
1327 | 1328 | ||
1328 | static struct quota_format_ops ocfs2_format_ops = { | 1329 | static const struct quota_format_ops ocfs2_format_ops = { |
1329 | .check_quota_file = ocfs2_local_check_quota_file, | 1330 | .check_quota_file = ocfs2_local_check_quota_file, |
1330 | .read_file_info = ocfs2_local_read_info, | 1331 | .read_file_info = ocfs2_local_read_info, |
1331 | .write_file_info = ocfs2_global_write_info, | 1332 | .write_file_info = ocfs2_global_write_info, |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3a0df7a1b810..5cbcd0f008fc 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -37,7 +37,6 @@ | |||
37 | 37 | ||
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/blkdev.h> | 39 | #include <linux/blkdev.h> |
40 | #include <linux/gfp.h> | ||
41 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
42 | #include <linux/writeback.h> | 41 | #include <linux/writeback.h> |
43 | #include <linux/pagevec.h> | 42 | #include <linux/pagevec.h> |
@@ -276,7 +275,7 @@ static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, | |||
276 | spin_unlock(&osb->osb_lock); | 275 | spin_unlock(&osb->osb_lock); |
277 | } | 276 | } |
278 | 277 | ||
279 | void ocfs2_kref_remove_refcount_tree(struct kref *kref) | 278 | static void ocfs2_kref_remove_refcount_tree(struct kref *kref) |
280 | { | 279 | { |
281 | struct ocfs2_refcount_tree *tree = | 280 | struct ocfs2_refcount_tree *tree = |
282 | container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); | 281 | container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); |
@@ -524,23 +523,6 @@ out: | |||
524 | return ret; | 523 | return ret; |
525 | } | 524 | } |
526 | 525 | ||
527 | int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw, | ||
528 | struct ocfs2_refcount_tree **ret_tree, | ||
529 | struct buffer_head **ref_bh) | ||
530 | { | ||
531 | int ret; | ||
532 | u64 ref_blkno; | ||
533 | |||
534 | ret = ocfs2_get_refcount_block(inode, &ref_blkno); | ||
535 | if (ret) { | ||
536 | mlog_errno(ret); | ||
537 | return ret; | ||
538 | } | ||
539 | |||
540 | return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, | ||
541 | rw, ret_tree, ref_bh); | ||
542 | } | ||
543 | |||
544 | void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, | 526 | void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, |
545 | struct ocfs2_refcount_tree *tree, int rw) | 527 | struct ocfs2_refcount_tree *tree, int rw) |
546 | { | 528 | { |
@@ -643,7 +625,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, | |||
643 | rb = (struct ocfs2_refcount_block *)new_bh->b_data; | 625 | rb = (struct ocfs2_refcount_block *)new_bh->b_data; |
644 | memset(rb, 0, inode->i_sb->s_blocksize); | 626 | memset(rb, 0, inode->i_sb->s_blocksize); |
645 | strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); | 627 | strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); |
646 | rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); | 628 | rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); |
647 | rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); | 629 | rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); |
648 | rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); | 630 | rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); |
649 | rb->rf_blkno = cpu_to_le64(first_blkno); | 631 | rb->rf_blkno = cpu_to_le64(first_blkno); |
@@ -969,6 +951,103 @@ out: | |||
969 | } | 951 | } |
970 | 952 | ||
971 | /* | 953 | /* |
954 | * Find the end range for a leaf refcount block indicated by | ||
955 | * el->l_recs[index].e_blkno. | ||
956 | */ | ||
957 | static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci, | ||
958 | struct buffer_head *ref_root_bh, | ||
959 | struct ocfs2_extent_block *eb, | ||
960 | struct ocfs2_extent_list *el, | ||
961 | int index, u32 *cpos_end) | ||
962 | { | ||
963 | int ret, i, subtree_root; | ||
964 | u32 cpos; | ||
965 | u64 blkno; | ||
966 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
967 | struct ocfs2_path *left_path = NULL, *right_path = NULL; | ||
968 | struct ocfs2_extent_tree et; | ||
969 | struct ocfs2_extent_list *tmp_el; | ||
970 | |||
971 | if (index < le16_to_cpu(el->l_next_free_rec) - 1) { | ||
972 | /* | ||
973 | * We have a extent rec after index, so just use the e_cpos | ||
974 | * of the next extent rec. | ||
975 | */ | ||
976 | *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos); | ||
977 | return 0; | ||
978 | } | ||
979 | |||
980 | if (!eb || (eb && !eb->h_next_leaf_blk)) { | ||
981 | /* | ||
982 | * We are the last extent rec, so any high cpos should | ||
983 | * be stored in this leaf refcount block. | ||
984 | */ | ||
985 | *cpos_end = UINT_MAX; | ||
986 | return 0; | ||
987 | } | ||
988 | |||
989 | /* | ||
990 | * If the extent block isn't the last one, we have to find | ||
991 | * the subtree root between this extent block and the next | ||
992 | * leaf extent block and get the corresponding e_cpos from | ||
993 | * the subroot. Otherwise we may corrupt the b-tree. | ||
994 | */ | ||
995 | ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); | ||
996 | |||
997 | left_path = ocfs2_new_path_from_et(&et); | ||
998 | if (!left_path) { | ||
999 | ret = -ENOMEM; | ||
1000 | mlog_errno(ret); | ||
1001 | goto out; | ||
1002 | } | ||
1003 | |||
1004 | cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos); | ||
1005 | ret = ocfs2_find_path(ci, left_path, cpos); | ||
1006 | if (ret) { | ||
1007 | mlog_errno(ret); | ||
1008 | goto out; | ||
1009 | } | ||
1010 | |||
1011 | right_path = ocfs2_new_path_from_path(left_path); | ||
1012 | if (!right_path) { | ||
1013 | ret = -ENOMEM; | ||
1014 | mlog_errno(ret); | ||
1015 | goto out; | ||
1016 | } | ||
1017 | |||
1018 | ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos); | ||
1019 | if (ret) { | ||
1020 | mlog_errno(ret); | ||
1021 | goto out; | ||
1022 | } | ||
1023 | |||
1024 | ret = ocfs2_find_path(ci, right_path, cpos); | ||
1025 | if (ret) { | ||
1026 | mlog_errno(ret); | ||
1027 | goto out; | ||
1028 | } | ||
1029 | |||
1030 | subtree_root = ocfs2_find_subtree_root(&et, left_path, | ||
1031 | right_path); | ||
1032 | |||
1033 | tmp_el = left_path->p_node[subtree_root].el; | ||
1034 | blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; | ||
1035 | for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) { | ||
1036 | if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { | ||
1037 | *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); | ||
1038 | break; | ||
1039 | } | ||
1040 | } | ||
1041 | |||
1042 | BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec)); | ||
1043 | |||
1044 | out: | ||
1045 | ocfs2_free_path(left_path); | ||
1046 | ocfs2_free_path(right_path); | ||
1047 | return ret; | ||
1048 | } | ||
1049 | |||
1050 | /* | ||
972 | * Given a cpos and len, try to find the refcount record which contains cpos. | 1051 | * Given a cpos and len, try to find the refcount record which contains cpos. |
973 | * 1. If cpos can be found in one refcount record, return the record. | 1052 | * 1. If cpos can be found in one refcount record, return the record. |
974 | * 2. If cpos can't be found, return a fake record which start from cpos | 1053 | * 2. If cpos can't be found, return a fake record which start from cpos |
@@ -983,10 +1062,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, | |||
983 | struct buffer_head **ret_bh) | 1062 | struct buffer_head **ret_bh) |
984 | { | 1063 | { |
985 | int ret = 0, i, found; | 1064 | int ret = 0, i, found; |
986 | u32 low_cpos; | 1065 | u32 low_cpos, uninitialized_var(cpos_end); |
987 | struct ocfs2_extent_list *el; | 1066 | struct ocfs2_extent_list *el; |
988 | struct ocfs2_extent_rec *tmp, *rec = NULL; | 1067 | struct ocfs2_extent_rec *rec = NULL; |
989 | struct ocfs2_extent_block *eb; | 1068 | struct ocfs2_extent_block *eb = NULL; |
990 | struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; | 1069 | struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; |
991 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | 1070 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
992 | struct ocfs2_refcount_block *rb = | 1071 | struct ocfs2_refcount_block *rb = |
@@ -1034,12 +1113,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, | |||
1034 | } | 1113 | } |
1035 | } | 1114 | } |
1036 | 1115 | ||
1037 | /* adjust len when we have ocfs2_extent_rec after it. */ | 1116 | if (found) { |
1038 | if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { | 1117 | ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh, |
1039 | tmp = &el->l_recs[i+1]; | 1118 | eb, el, i, &cpos_end); |
1119 | if (ret) { | ||
1120 | mlog_errno(ret); | ||
1121 | goto out; | ||
1122 | } | ||
1040 | 1123 | ||
1041 | if (le32_to_cpu(tmp->e_cpos) < cpos + len) | 1124 | if (cpos_end < low_cpos + len) |
1042 | len = le32_to_cpu(tmp->e_cpos) - cpos; | 1125 | len = cpos_end - low_cpos; |
1043 | } | 1126 | } |
1044 | 1127 | ||
1045 | ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), | 1128 | ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), |
@@ -1246,7 +1329,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, | |||
1246 | memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); | 1329 | memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); |
1247 | 1330 | ||
1248 | new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; | 1331 | new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; |
1249 | new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); | 1332 | new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); |
1250 | new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); | 1333 | new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); |
1251 | new_rb->rf_blkno = cpu_to_le64(blkno); | 1334 | new_rb->rf_blkno = cpu_to_le64(blkno); |
1252 | new_rb->rf_cpos = cpu_to_le32(0); | 1335 | new_rb->rf_cpos = cpu_to_le32(0); |
@@ -1418,7 +1501,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, | |||
1418 | 1501 | ||
1419 | /* change old and new rl_used accordingly. */ | 1502 | /* change old and new rl_used accordingly. */ |
1420 | le16_add_cpu(&rl->rl_used, -num_moved); | 1503 | le16_add_cpu(&rl->rl_used, -num_moved); |
1421 | new_rl->rl_used = cpu_to_le32(num_moved); | 1504 | new_rl->rl_used = cpu_to_le16(num_moved); |
1422 | 1505 | ||
1423 | sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), | 1506 | sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), |
1424 | sizeof(struct ocfs2_refcount_rec), | 1507 | sizeof(struct ocfs2_refcount_rec), |
@@ -1492,7 +1575,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, | |||
1492 | new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; | 1575 | new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; |
1493 | memset(new_rb, 0, sb->s_blocksize); | 1576 | memset(new_rb, 0, sb->s_blocksize); |
1494 | strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); | 1577 | strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); |
1495 | new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); | 1578 | new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); |
1496 | new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); | 1579 | new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); |
1497 | new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); | 1580 | new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); |
1498 | new_rb->rf_blkno = cpu_to_le64(blkno); | 1581 | new_rb->rf_blkno = cpu_to_le64(blkno); |
@@ -1797,7 +1880,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle, | |||
1797 | recs_need++; | 1880 | recs_need++; |
1798 | 1881 | ||
1799 | /* If the leaf block don't have enough record, expand it. */ | 1882 | /* If the leaf block don't have enough record, expand it. */ |
1800 | if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { | 1883 | if (le16_to_cpu(rf_list->rl_used) + recs_need > |
1884 | le16_to_cpu(rf_list->rl_count)) { | ||
1801 | struct ocfs2_refcount_rec tmp_rec; | 1885 | struct ocfs2_refcount_rec tmp_rec; |
1802 | u64 cpos = le64_to_cpu(orig_rec->r_cpos); | 1886 | u64 cpos = le64_to_cpu(orig_rec->r_cpos); |
1803 | len = le32_to_cpu(orig_rec->r_clusters); | 1887 | len = le32_to_cpu(orig_rec->r_clusters); |
@@ -1859,7 +1943,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle, | |||
1859 | memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); | 1943 | memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); |
1860 | le64_add_cpu(&tail_rec->r_cpos, | 1944 | le64_add_cpu(&tail_rec->r_cpos, |
1861 | le32_to_cpu(tail_rec->r_clusters) - len); | 1945 | le32_to_cpu(tail_rec->r_clusters) - len); |
1862 | tail_rec->r_clusters = le32_to_cpu(len); | 1946 | tail_rec->r_clusters = cpu_to_le32(len); |
1863 | } | 1947 | } |
1864 | 1948 | ||
1865 | /* | 1949 | /* |
@@ -2431,7 +2515,7 @@ out: | |||
2431 | * we gonna touch and whether we need to create new blocks. | 2515 | * we gonna touch and whether we need to create new blocks. |
2432 | * | 2516 | * |
2433 | * Normally the refcount blocks store these refcount should be | 2517 | * Normally the refcount blocks store these refcount should be |
2434 | * continguous also, so that we can get the number easily. | 2518 | * contiguous also, so that we can get the number easily. |
2435 | * As for meta_ac, we will at most add split 2 refcount record and | 2519 | * As for meta_ac, we will at most add split 2 refcount record and |
2436 | * 2 more refcount block, so just check it in a rough way. | 2520 | * 2 more refcount block, so just check it in a rough way. |
2437 | * | 2521 | * |
@@ -2860,7 +2944,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2860 | 2944 | ||
2861 | while (offset < end) { | 2945 | while (offset < end) { |
2862 | page_index = offset >> PAGE_CACHE_SHIFT; | 2946 | page_index = offset >> PAGE_CACHE_SHIFT; |
2863 | map_end = (page_index + 1) << PAGE_CACHE_SHIFT; | 2947 | map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; |
2864 | if (map_end > end) | 2948 | if (map_end > end) |
2865 | map_end = end; | 2949 | map_end = end; |
2866 | 2950 | ||
@@ -2872,8 +2956,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2872 | 2956 | ||
2873 | page = grab_cache_page(mapping, page_index); | 2957 | page = grab_cache_page(mapping, page_index); |
2874 | 2958 | ||
2875 | /* This page can't be dirtied before we CoW it out. */ | 2959 | /* |
2876 | BUG_ON(PageDirty(page)); | 2960 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page |
2961 | * can't be dirtied before we CoW it out. | ||
2962 | */ | ||
2963 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | ||
2964 | BUG_ON(PageDirty(page)); | ||
2877 | 2965 | ||
2878 | if (!PageUptodate(page)) { | 2966 | if (!PageUptodate(page)) { |
2879 | ret = block_read_full_page(page, ocfs2_get_block); | 2967 | ret = block_read_full_page(page, ocfs2_get_block); |
@@ -3085,7 +3173,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, | |||
3085 | 3173 | ||
3086 | while (offset < end) { | 3174 | while (offset < end) { |
3087 | page_index = offset >> PAGE_CACHE_SHIFT; | 3175 | page_index = offset >> PAGE_CACHE_SHIFT; |
3088 | map_end = (page_index + 1) << PAGE_CACHE_SHIFT; | 3176 | map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; |
3089 | if (map_end > end) | 3177 | if (map_end > end) |
3090 | map_end = end; | 3178 | map_end = end; |
3091 | 3179 | ||
@@ -3840,8 +3928,7 @@ static int ocfs2_add_refcounted_extent(struct inode *inode, | |||
3840 | } | 3928 | } |
3841 | 3929 | ||
3842 | ret = ocfs2_insert_extent(handle, et, cpos, | 3930 | ret = ocfs2_insert_extent(handle, et, cpos, |
3843 | cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, | 3931 | ocfs2_clusters_to_blocks(inode->i_sb, p_cluster), |
3844 | p_cluster)), | ||
3845 | num_clusters, ext_flags, meta_ac); | 3932 | num_clusters, ext_flags, meta_ac); |
3846 | if (ret) { | 3933 | if (ret) { |
3847 | mlog_errno(ret); | 3934 | mlog_errno(ret); |
@@ -3987,6 +4074,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode, | |||
3987 | OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; | 4074 | OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; |
3988 | spin_unlock(&OCFS2_I(t_inode)->ip_lock); | 4075 | spin_unlock(&OCFS2_I(t_inode)->ip_lock); |
3989 | i_size_write(t_inode, size); | 4076 | i_size_write(t_inode, size); |
4077 | t_inode->i_blocks = s_inode->i_blocks; | ||
3990 | 4078 | ||
3991 | di->i_xattr_inline_size = s_di->i_xattr_inline_size; | 4079 | di->i_xattr_inline_size = s_di->i_xattr_inline_size; |
3992 | di->i_clusters = s_di->i_clusters; | 4080 | di->i_clusters = s_di->i_clusters; |
@@ -3995,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode, | |||
3995 | di->i_attr = s_di->i_attr; | 4083 | di->i_attr = s_di->i_attr; |
3996 | 4084 | ||
3997 | if (preserve) { | 4085 | if (preserve) { |
4086 | t_inode->i_uid = s_inode->i_uid; | ||
4087 | t_inode->i_gid = s_inode->i_gid; | ||
4088 | t_inode->i_mode = s_inode->i_mode; | ||
3998 | di->i_uid = s_di->i_uid; | 4089 | di->i_uid = s_di->i_uid; |
3999 | di->i_gid = s_di->i_gid; | 4090 | di->i_gid = s_di->i_gid; |
4000 | di->i_mode = s_di->i_mode; | 4091 | di->i_mode = s_di->i_mode; |
@@ -4253,8 +4344,8 @@ static int ocfs2_user_path_parent(const char __user *path, | |||
4253 | * @new_dentry: target dentry | 4344 | * @new_dentry: target dentry |
4254 | * @preserve: if true, preserve all file attributes | 4345 | * @preserve: if true, preserve all file attributes |
4255 | */ | 4346 | */ |
4256 | int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, | 4347 | static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, |
4257 | struct dentry *new_dentry, bool preserve) | 4348 | struct dentry *new_dentry, bool preserve) |
4258 | { | 4349 | { |
4259 | struct inode *inode = old_dentry->d_inode; | 4350 | struct inode *inode = old_dentry->d_inode; |
4260 | int error; | 4351 | int error; |
@@ -4302,7 +4393,7 @@ int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, | |||
4302 | } | 4393 | } |
4303 | 4394 | ||
4304 | mutex_lock(&inode->i_mutex); | 4395 | mutex_lock(&inode->i_mutex); |
4305 | vfs_dq_init(dir); | 4396 | dquot_initialize(dir); |
4306 | error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); | 4397 | error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); |
4307 | mutex_unlock(&inode->i_mutex); | 4398 | mutex_unlock(&inode->i_mutex); |
4308 | if (!error) | 4399 | if (!error) |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index e49c41050264..0d3049f696c5 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -19,6 +19,7 @@ | |||
19 | 19 | ||
20 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
21 | #include <linux/crc32.h> | 21 | #include <linux/crc32.h> |
22 | #include <linux/slab.h> | ||
22 | #include <linux/module.h> | 23 | #include <linux/module.h> |
23 | 24 | ||
24 | /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ | 25 | /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ |
@@ -161,24 +162,23 @@ static int dlm_status_to_errno(enum dlm_status status) | |||
161 | 162 | ||
162 | static void o2dlm_lock_ast_wrapper(void *astarg) | 163 | static void o2dlm_lock_ast_wrapper(void *astarg) |
163 | { | 164 | { |
164 | BUG_ON(o2cb_stack.sp_proto == NULL); | 165 | struct ocfs2_dlm_lksb *lksb = astarg; |
165 | 166 | ||
166 | o2cb_stack.sp_proto->lp_lock_ast(astarg); | 167 | lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); |
167 | } | 168 | } |
168 | 169 | ||
169 | static void o2dlm_blocking_ast_wrapper(void *astarg, int level) | 170 | static void o2dlm_blocking_ast_wrapper(void *astarg, int level) |
170 | { | 171 | { |
171 | BUG_ON(o2cb_stack.sp_proto == NULL); | 172 | struct ocfs2_dlm_lksb *lksb = astarg; |
172 | 173 | ||
173 | o2cb_stack.sp_proto->lp_blocking_ast(astarg, level); | 174 | lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); |
174 | } | 175 | } |
175 | 176 | ||
176 | static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) | 177 | static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) |
177 | { | 178 | { |
179 | struct ocfs2_dlm_lksb *lksb = astarg; | ||
178 | int error = dlm_status_to_errno(status); | 180 | int error = dlm_status_to_errno(status); |
179 | 181 | ||
180 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
181 | |||
182 | /* | 182 | /* |
183 | * In o2dlm, you can get both the lock_ast() for the lock being | 183 | * In o2dlm, you can get both the lock_ast() for the lock being |
184 | * granted and the unlock_ast() for the CANCEL failing. A | 184 | * granted and the unlock_ast() for the CANCEL failing. A |
@@ -193,16 +193,15 @@ static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) | |||
193 | if (status == DLM_CANCELGRANT) | 193 | if (status == DLM_CANCELGRANT) |
194 | return; | 194 | return; |
195 | 195 | ||
196 | o2cb_stack.sp_proto->lp_unlock_ast(astarg, error); | 196 | lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, error); |
197 | } | 197 | } |
198 | 198 | ||
199 | static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, | 199 | static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, |
200 | int mode, | 200 | int mode, |
201 | union ocfs2_dlm_lksb *lksb, | 201 | struct ocfs2_dlm_lksb *lksb, |
202 | u32 flags, | 202 | u32 flags, |
203 | void *name, | 203 | void *name, |
204 | unsigned int namelen, | 204 | unsigned int namelen) |
205 | void *astarg) | ||
206 | { | 205 | { |
207 | enum dlm_status status; | 206 | enum dlm_status status; |
208 | int o2dlm_mode = mode_to_o2dlm(mode); | 207 | int o2dlm_mode = mode_to_o2dlm(mode); |
@@ -211,28 +210,27 @@ static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, | |||
211 | 210 | ||
212 | status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, | 211 | status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, |
213 | o2dlm_flags, name, namelen, | 212 | o2dlm_flags, name, namelen, |
214 | o2dlm_lock_ast_wrapper, astarg, | 213 | o2dlm_lock_ast_wrapper, lksb, |
215 | o2dlm_blocking_ast_wrapper); | 214 | o2dlm_blocking_ast_wrapper); |
216 | ret = dlm_status_to_errno(status); | 215 | ret = dlm_status_to_errno(status); |
217 | return ret; | 216 | return ret; |
218 | } | 217 | } |
219 | 218 | ||
220 | static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, | 219 | static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, |
221 | union ocfs2_dlm_lksb *lksb, | 220 | struct ocfs2_dlm_lksb *lksb, |
222 | u32 flags, | 221 | u32 flags) |
223 | void *astarg) | ||
224 | { | 222 | { |
225 | enum dlm_status status; | 223 | enum dlm_status status; |
226 | int o2dlm_flags = flags_to_o2dlm(flags); | 224 | int o2dlm_flags = flags_to_o2dlm(flags); |
227 | int ret; | 225 | int ret; |
228 | 226 | ||
229 | status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, | 227 | status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, |
230 | o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); | 228 | o2dlm_flags, o2dlm_unlock_ast_wrapper, lksb); |
231 | ret = dlm_status_to_errno(status); | 229 | ret = dlm_status_to_errno(status); |
232 | return ret; | 230 | return ret; |
233 | } | 231 | } |
234 | 232 | ||
235 | static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | 233 | static int o2cb_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) |
236 | { | 234 | { |
237 | return dlm_status_to_errno(lksb->lksb_o2dlm.status); | 235 | return dlm_status_to_errno(lksb->lksb_o2dlm.status); |
238 | } | 236 | } |
@@ -242,17 +240,17 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | |||
242 | * contents, it will zero out the LVB. Thus the caller can always trust | 240 | * contents, it will zero out the LVB. Thus the caller can always trust |
243 | * the contents. | 241 | * the contents. |
244 | */ | 242 | */ |
245 | static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) | 243 | static int o2cb_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) |
246 | { | 244 | { |
247 | return 1; | 245 | return 1; |
248 | } | 246 | } |
249 | 247 | ||
250 | static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) | 248 | static void *o2cb_dlm_lvb(struct ocfs2_dlm_lksb *lksb) |
251 | { | 249 | { |
252 | return (void *)(lksb->lksb_o2dlm.lvb); | 250 | return (void *)(lksb->lksb_o2dlm.lvb); |
253 | } | 251 | } |
254 | 252 | ||
255 | static void o2cb_dump_lksb(union ocfs2_dlm_lksb *lksb) | 253 | static void o2cb_dump_lksb(struct ocfs2_dlm_lksb *lksb) |
256 | { | 254 | { |
257 | dlm_print_one_lock(lksb->lksb_o2dlm.lockid); | 255 | dlm_print_one_lock(lksb->lksb_o2dlm.lockid); |
258 | } | 256 | } |
@@ -277,10 +275,10 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
277 | u32 dlm_key; | 275 | u32 dlm_key; |
278 | struct dlm_ctxt *dlm; | 276 | struct dlm_ctxt *dlm; |
279 | struct o2dlm_private *priv; | 277 | struct o2dlm_private *priv; |
280 | struct dlm_protocol_version dlm_version; | 278 | struct dlm_protocol_version fs_version; |
281 | 279 | ||
282 | BUG_ON(conn == NULL); | 280 | BUG_ON(conn == NULL); |
283 | BUG_ON(o2cb_stack.sp_proto == NULL); | 281 | BUG_ON(conn->cc_proto == NULL); |
284 | 282 | ||
285 | /* for now we only have one cluster/node, make sure we see it | 283 | /* for now we only have one cluster/node, make sure we see it |
286 | * in the heartbeat universe */ | 284 | * in the heartbeat universe */ |
@@ -304,18 +302,18 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
304 | /* used by the dlm code to make message headers unique, each | 302 | /* used by the dlm code to make message headers unique, each |
305 | * node in this domain must agree on this. */ | 303 | * node in this domain must agree on this. */ |
306 | dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); | 304 | dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); |
307 | dlm_version.pv_major = conn->cc_version.pv_major; | 305 | fs_version.pv_major = conn->cc_version.pv_major; |
308 | dlm_version.pv_minor = conn->cc_version.pv_minor; | 306 | fs_version.pv_minor = conn->cc_version.pv_minor; |
309 | 307 | ||
310 | dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); | 308 | dlm = dlm_register_domain(conn->cc_name, dlm_key, &fs_version); |
311 | if (IS_ERR(dlm)) { | 309 | if (IS_ERR(dlm)) { |
312 | rc = PTR_ERR(dlm); | 310 | rc = PTR_ERR(dlm); |
313 | mlog_errno(rc); | 311 | mlog_errno(rc); |
314 | goto out_free; | 312 | goto out_free; |
315 | } | 313 | } |
316 | 314 | ||
317 | conn->cc_version.pv_major = dlm_version.pv_major; | 315 | conn->cc_version.pv_major = fs_version.pv_major; |
318 | conn->cc_version.pv_minor = dlm_version.pv_minor; | 316 | conn->cc_version.pv_minor = fs_version.pv_minor; |
319 | conn->cc_lockspace = dlm; | 317 | conn->cc_lockspace = dlm; |
320 | 318 | ||
321 | dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); | 319 | dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index ff4c798a5635..2dc57bca0688 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -21,11 +21,11 @@ | |||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/miscdevice.h> | 22 | #include <linux/miscdevice.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | ||
24 | #include <linux/smp_lock.h> | 25 | #include <linux/smp_lock.h> |
25 | #include <linux/reboot.h> | 26 | #include <linux/reboot.h> |
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
27 | 28 | ||
28 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ | ||
29 | #include "stackglue.h" | 29 | #include "stackglue.h" |
30 | 30 | ||
31 | #include <linux/dlm_plock.h> | 31 | #include <linux/dlm_plock.h> |
@@ -63,8 +63,8 @@ | |||
63 | * negotiated by the client. The client negotiates based on the maximum | 63 | * negotiated by the client. The client negotiates based on the maximum |
64 | * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major | 64 | * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major |
65 | * number from the "SETV" message must match | 65 | * number from the "SETV" message must match |
66 | * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number | 66 | * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number |
67 | * must be less than or equal to ...->lp_max_version.pv_minor. | 67 | * must be less than or equal to ...sp_max_version.pv_minor. |
68 | * | 68 | * |
69 | * Once this information has been set, mounts will be allowed. From this | 69 | * Once this information has been set, mounts will be allowed. From this |
70 | * point on, the "DOWN" message can be sent for node down notification. | 70 | * point on, the "DOWN" message can be sent for node down notification. |
@@ -401,7 +401,7 @@ static int ocfs2_control_do_setversion_msg(struct file *file, | |||
401 | char *ptr = NULL; | 401 | char *ptr = NULL; |
402 | struct ocfs2_control_private *p = file->private_data; | 402 | struct ocfs2_control_private *p = file->private_data; |
403 | struct ocfs2_protocol_version *max = | 403 | struct ocfs2_protocol_version *max = |
404 | &ocfs2_user_plugin.sp_proto->lp_max_version; | 404 | &ocfs2_user_plugin.sp_max_proto; |
405 | 405 | ||
406 | if (ocfs2_control_get_handshake_state(file) != | 406 | if (ocfs2_control_get_handshake_state(file) != |
407 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) | 407 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) |
@@ -664,18 +664,10 @@ static void ocfs2_control_exit(void) | |||
664 | -rc); | 664 | -rc); |
665 | } | 665 | } |
666 | 666 | ||
667 | static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg) | ||
668 | { | ||
669 | struct ocfs2_lock_res *res = astarg; | ||
670 | return &res->l_lksb.lksb_fsdlm; | ||
671 | } | ||
672 | |||
673 | static void fsdlm_lock_ast_wrapper(void *astarg) | 667 | static void fsdlm_lock_ast_wrapper(void *astarg) |
674 | { | 668 | { |
675 | struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); | 669 | struct ocfs2_dlm_lksb *lksb = astarg; |
676 | int status = lksb->sb_status; | 670 | int status = lksb->lksb_fsdlm.sb_status; |
677 | |||
678 | BUG_ON(ocfs2_user_plugin.sp_proto == NULL); | ||
679 | 671 | ||
680 | /* | 672 | /* |
681 | * For now we're punting on the issue of other non-standard errors | 673 | * For now we're punting on the issue of other non-standard errors |
@@ -688,25 +680,24 @@ static void fsdlm_lock_ast_wrapper(void *astarg) | |||
688 | */ | 680 | */ |
689 | 681 | ||
690 | if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) | 682 | if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) |
691 | ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0); | 683 | lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0); |
692 | else | 684 | else |
693 | ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg); | 685 | lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); |
694 | } | 686 | } |
695 | 687 | ||
696 | static void fsdlm_blocking_ast_wrapper(void *astarg, int level) | 688 | static void fsdlm_blocking_ast_wrapper(void *astarg, int level) |
697 | { | 689 | { |
698 | BUG_ON(ocfs2_user_plugin.sp_proto == NULL); | 690 | struct ocfs2_dlm_lksb *lksb = astarg; |
699 | 691 | ||
700 | ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level); | 692 | lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); |
701 | } | 693 | } |
702 | 694 | ||
703 | static int user_dlm_lock(struct ocfs2_cluster_connection *conn, | 695 | static int user_dlm_lock(struct ocfs2_cluster_connection *conn, |
704 | int mode, | 696 | int mode, |
705 | union ocfs2_dlm_lksb *lksb, | 697 | struct ocfs2_dlm_lksb *lksb, |
706 | u32 flags, | 698 | u32 flags, |
707 | void *name, | 699 | void *name, |
708 | unsigned int namelen, | 700 | unsigned int namelen) |
709 | void *astarg) | ||
710 | { | 701 | { |
711 | int ret; | 702 | int ret; |
712 | 703 | ||
@@ -716,36 +707,35 @@ static int user_dlm_lock(struct ocfs2_cluster_connection *conn, | |||
716 | 707 | ||
717 | ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, | 708 | ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, |
718 | flags|DLM_LKF_NODLCKWT, name, namelen, 0, | 709 | flags|DLM_LKF_NODLCKWT, name, namelen, 0, |
719 | fsdlm_lock_ast_wrapper, astarg, | 710 | fsdlm_lock_ast_wrapper, lksb, |
720 | fsdlm_blocking_ast_wrapper); | 711 | fsdlm_blocking_ast_wrapper); |
721 | return ret; | 712 | return ret; |
722 | } | 713 | } |
723 | 714 | ||
724 | static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, | 715 | static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, |
725 | union ocfs2_dlm_lksb *lksb, | 716 | struct ocfs2_dlm_lksb *lksb, |
726 | u32 flags, | 717 | u32 flags) |
727 | void *astarg) | ||
728 | { | 718 | { |
729 | int ret; | 719 | int ret; |
730 | 720 | ||
731 | ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, | 721 | ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, |
732 | flags, &lksb->lksb_fsdlm, astarg); | 722 | flags, &lksb->lksb_fsdlm, lksb); |
733 | return ret; | 723 | return ret; |
734 | } | 724 | } |
735 | 725 | ||
736 | static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | 726 | static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) |
737 | { | 727 | { |
738 | return lksb->lksb_fsdlm.sb_status; | 728 | return lksb->lksb_fsdlm.sb_status; |
739 | } | 729 | } |
740 | 730 | ||
741 | static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) | 731 | static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) |
742 | { | 732 | { |
743 | int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; | 733 | int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; |
744 | 734 | ||
745 | return !invalid; | 735 | return !invalid; |
746 | } | 736 | } |
747 | 737 | ||
748 | static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) | 738 | static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb) |
749 | { | 739 | { |
750 | if (!lksb->lksb_fsdlm.sb_lvbptr) | 740 | if (!lksb->lksb_fsdlm.sb_lvbptr) |
751 | lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + | 741 | lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + |
@@ -753,7 +743,7 @@ static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) | |||
753 | return (void *)(lksb->lksb_fsdlm.sb_lvbptr); | 743 | return (void *)(lksb->lksb_fsdlm.sb_lvbptr); |
754 | } | 744 | } |
755 | 745 | ||
756 | static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | 746 | static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) |
757 | { | 747 | { |
758 | } | 748 | } |
759 | 749 | ||
@@ -814,7 +804,7 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing, | |||
814 | static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | 804 | static int user_cluster_connect(struct ocfs2_cluster_connection *conn) |
815 | { | 805 | { |
816 | dlm_lockspace_t *fsdlm; | 806 | dlm_lockspace_t *fsdlm; |
817 | struct ocfs2_live_connection *control; | 807 | struct ocfs2_live_connection *uninitialized_var(control); |
818 | int rc = 0; | 808 | int rc = 0; |
819 | 809 | ||
820 | BUG_ON(conn == NULL); | 810 | BUG_ON(conn == NULL); |
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 3f2f1c45b7b6..39abf89697ed 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
@@ -36,7 +36,7 @@ | |||
36 | #define OCFS2_STACK_PLUGIN_USER "user" | 36 | #define OCFS2_STACK_PLUGIN_USER "user" |
37 | #define OCFS2_MAX_HB_CTL_PATH 256 | 37 | #define OCFS2_MAX_HB_CTL_PATH 256 |
38 | 38 | ||
39 | static struct ocfs2_locking_protocol *lproto; | 39 | static struct ocfs2_protocol_version locking_max_version; |
40 | static DEFINE_SPINLOCK(ocfs2_stack_lock); | 40 | static DEFINE_SPINLOCK(ocfs2_stack_lock); |
41 | static LIST_HEAD(ocfs2_stack_list); | 41 | static LIST_HEAD(ocfs2_stack_list); |
42 | static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; | 42 | static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; |
@@ -176,7 +176,7 @@ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) | |||
176 | spin_lock(&ocfs2_stack_lock); | 176 | spin_lock(&ocfs2_stack_lock); |
177 | if (!ocfs2_stack_lookup(plugin->sp_name)) { | 177 | if (!ocfs2_stack_lookup(plugin->sp_name)) { |
178 | plugin->sp_count = 0; | 178 | plugin->sp_count = 0; |
179 | plugin->sp_proto = lproto; | 179 | plugin->sp_max_proto = locking_max_version; |
180 | list_add(&plugin->sp_list, &ocfs2_stack_list); | 180 | list_add(&plugin->sp_list, &ocfs2_stack_list); |
181 | printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", | 181 | printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", |
182 | plugin->sp_name); | 182 | plugin->sp_name); |
@@ -213,77 +213,76 @@ void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) | |||
213 | } | 213 | } |
214 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); | 214 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); |
215 | 215 | ||
216 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) | 216 | void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_proto) |
217 | { | 217 | { |
218 | struct ocfs2_stack_plugin *p; | 218 | struct ocfs2_stack_plugin *p; |
219 | 219 | ||
220 | BUG_ON(proto == NULL); | ||
221 | |||
222 | spin_lock(&ocfs2_stack_lock); | 220 | spin_lock(&ocfs2_stack_lock); |
223 | BUG_ON(active_stack != NULL); | 221 | if (memcmp(max_proto, &locking_max_version, |
222 | sizeof(struct ocfs2_protocol_version))) { | ||
223 | BUG_ON(locking_max_version.pv_major != 0); | ||
224 | 224 | ||
225 | lproto = proto; | 225 | locking_max_version = *max_proto; |
226 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | 226 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { |
227 | p->sp_proto = lproto; | 227 | p->sp_max_proto = locking_max_version; |
228 | } | ||
228 | } | 229 | } |
229 | |||
230 | spin_unlock(&ocfs2_stack_lock); | 230 | spin_unlock(&ocfs2_stack_lock); |
231 | } | 231 | } |
232 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); | 232 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_max_proto_version); |
233 | 233 | ||
234 | 234 | ||
235 | /* | 235 | /* |
236 | * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take | 236 | * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take no argument |
237 | * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the | 237 | * for the ast and bast functions. They will pass the lksb to the ast |
238 | * underlying stack plugins need to pilfer the lksb off of the lock_res. | 238 | * and bast. The caller can wrap the lksb with their own structure to |
239 | * If some other structure needs to be passed as an astarg, the plugins | 239 | * get more information. |
240 | * will need to be given a different avenue to the lksb. | ||
241 | */ | 240 | */ |
242 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | 241 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, |
243 | int mode, | 242 | int mode, |
244 | union ocfs2_dlm_lksb *lksb, | 243 | struct ocfs2_dlm_lksb *lksb, |
245 | u32 flags, | 244 | u32 flags, |
246 | void *name, | 245 | void *name, |
247 | unsigned int namelen, | 246 | unsigned int namelen) |
248 | struct ocfs2_lock_res *astarg) | ||
249 | { | 247 | { |
250 | BUG_ON(lproto == NULL); | 248 | if (!lksb->lksb_conn) |
251 | 249 | lksb->lksb_conn = conn; | |
250 | else | ||
251 | BUG_ON(lksb->lksb_conn != conn); | ||
252 | return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, | 252 | return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, |
253 | name, namelen, astarg); | 253 | name, namelen); |
254 | } | 254 | } |
255 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); | 255 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); |
256 | 256 | ||
257 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | 257 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, |
258 | union ocfs2_dlm_lksb *lksb, | 258 | struct ocfs2_dlm_lksb *lksb, |
259 | u32 flags, | 259 | u32 flags) |
260 | struct ocfs2_lock_res *astarg) | ||
261 | { | 260 | { |
262 | BUG_ON(lproto == NULL); | 261 | BUG_ON(lksb->lksb_conn == NULL); |
263 | 262 | ||
264 | return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); | 263 | return active_stack->sp_ops->dlm_unlock(conn, lksb, flags); |
265 | } | 264 | } |
266 | EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); | 265 | EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); |
267 | 266 | ||
268 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | 267 | int ocfs2_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) |
269 | { | 268 | { |
270 | return active_stack->sp_ops->lock_status(lksb); | 269 | return active_stack->sp_ops->lock_status(lksb); |
271 | } | 270 | } |
272 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); | 271 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); |
273 | 272 | ||
274 | int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) | 273 | int ocfs2_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) |
275 | { | 274 | { |
276 | return active_stack->sp_ops->lvb_valid(lksb); | 275 | return active_stack->sp_ops->lvb_valid(lksb); |
277 | } | 276 | } |
278 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); | 277 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); |
279 | 278 | ||
280 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) | 279 | void *ocfs2_dlm_lvb(struct ocfs2_dlm_lksb *lksb) |
281 | { | 280 | { |
282 | return active_stack->sp_ops->lock_lvb(lksb); | 281 | return active_stack->sp_ops->lock_lvb(lksb); |
283 | } | 282 | } |
284 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); | 283 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); |
285 | 284 | ||
286 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | 285 | void ocfs2_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) |
287 | { | 286 | { |
288 | active_stack->sp_ops->dump_lksb(lksb); | 287 | active_stack->sp_ops->dump_lksb(lksb); |
289 | } | 288 | } |
@@ -312,6 +311,7 @@ EXPORT_SYMBOL_GPL(ocfs2_plock); | |||
312 | int ocfs2_cluster_connect(const char *stack_name, | 311 | int ocfs2_cluster_connect(const char *stack_name, |
313 | const char *group, | 312 | const char *group, |
314 | int grouplen, | 313 | int grouplen, |
314 | struct ocfs2_locking_protocol *lproto, | ||
315 | void (*recovery_handler)(int node_num, | 315 | void (*recovery_handler)(int node_num, |
316 | void *recovery_data), | 316 | void *recovery_data), |
317 | void *recovery_data, | 317 | void *recovery_data, |
@@ -329,6 +329,12 @@ int ocfs2_cluster_connect(const char *stack_name, | |||
329 | goto out; | 329 | goto out; |
330 | } | 330 | } |
331 | 331 | ||
332 | if (memcmp(&lproto->lp_max_version, &locking_max_version, | ||
333 | sizeof(struct ocfs2_protocol_version))) { | ||
334 | rc = -EINVAL; | ||
335 | goto out; | ||
336 | } | ||
337 | |||
332 | new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), | 338 | new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), |
333 | GFP_KERNEL); | 339 | GFP_KERNEL); |
334 | if (!new_conn) { | 340 | if (!new_conn) { |
@@ -341,6 +347,7 @@ int ocfs2_cluster_connect(const char *stack_name, | |||
341 | new_conn->cc_recovery_handler = recovery_handler; | 347 | new_conn->cc_recovery_handler = recovery_handler; |
342 | new_conn->cc_recovery_data = recovery_data; | 348 | new_conn->cc_recovery_data = recovery_data; |
343 | 349 | ||
350 | new_conn->cc_proto = lproto; | ||
344 | /* Start the new connection at our maximum compatibility level */ | 351 | /* Start the new connection at our maximum compatibility level */ |
345 | new_conn->cc_version = lproto->lp_max_version; | 352 | new_conn->cc_version = lproto->lp_max_version; |
346 | 353 | ||
@@ -366,6 +373,24 @@ out: | |||
366 | } | 373 | } |
367 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); | 374 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); |
368 | 375 | ||
376 | /* The caller will ensure all nodes have the same cluster stack */ | ||
377 | int ocfs2_cluster_connect_agnostic(const char *group, | ||
378 | int grouplen, | ||
379 | struct ocfs2_locking_protocol *lproto, | ||
380 | void (*recovery_handler)(int node_num, | ||
381 | void *recovery_data), | ||
382 | void *recovery_data, | ||
383 | struct ocfs2_cluster_connection **conn) | ||
384 | { | ||
385 | char *stack_name = NULL; | ||
386 | |||
387 | if (cluster_stack_name[0]) | ||
388 | stack_name = cluster_stack_name; | ||
389 | return ocfs2_cluster_connect(stack_name, group, grouplen, lproto, | ||
390 | recovery_handler, recovery_data, conn); | ||
391 | } | ||
392 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic); | ||
393 | |||
369 | /* If hangup_pending is 0, the stack driver will be dropped */ | 394 | /* If hangup_pending is 0, the stack driver will be dropped */ |
370 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | 395 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, |
371 | int hangup_pending) | 396 | int hangup_pending) |
@@ -453,10 +478,10 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, | |||
453 | ssize_t ret = 0; | 478 | ssize_t ret = 0; |
454 | 479 | ||
455 | spin_lock(&ocfs2_stack_lock); | 480 | spin_lock(&ocfs2_stack_lock); |
456 | if (lproto) | 481 | if (locking_max_version.pv_major) |
457 | ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", | 482 | ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", |
458 | lproto->lp_max_version.pv_major, | 483 | locking_max_version.pv_major, |
459 | lproto->lp_max_version.pv_minor); | 484 | locking_max_version.pv_minor); |
460 | spin_unlock(&ocfs2_stack_lock); | 485 | spin_unlock(&ocfs2_stack_lock); |
461 | 486 | ||
462 | return ret; | 487 | return ret; |
@@ -620,51 +645,46 @@ error: | |||
620 | 645 | ||
621 | static ctl_table ocfs2_nm_table[] = { | 646 | static ctl_table ocfs2_nm_table[] = { |
622 | { | 647 | { |
623 | .ctl_name = 1, | ||
624 | .procname = "hb_ctl_path", | 648 | .procname = "hb_ctl_path", |
625 | .data = ocfs2_hb_ctl_path, | 649 | .data = ocfs2_hb_ctl_path, |
626 | .maxlen = OCFS2_MAX_HB_CTL_PATH, | 650 | .maxlen = OCFS2_MAX_HB_CTL_PATH, |
627 | .mode = 0644, | 651 | .mode = 0644, |
628 | .proc_handler = &proc_dostring, | 652 | .proc_handler = proc_dostring, |
629 | .strategy = &sysctl_string, | ||
630 | }, | 653 | }, |
631 | { .ctl_name = 0 } | 654 | { } |
632 | }; | 655 | }; |
633 | 656 | ||
634 | static ctl_table ocfs2_mod_table[] = { | 657 | static ctl_table ocfs2_mod_table[] = { |
635 | { | 658 | { |
636 | .ctl_name = FS_OCFS2_NM, | ||
637 | .procname = "nm", | 659 | .procname = "nm", |
638 | .data = NULL, | 660 | .data = NULL, |
639 | .maxlen = 0, | 661 | .maxlen = 0, |
640 | .mode = 0555, | 662 | .mode = 0555, |
641 | .child = ocfs2_nm_table | 663 | .child = ocfs2_nm_table |
642 | }, | 664 | }, |
643 | { .ctl_name = 0} | 665 | { } |
644 | }; | 666 | }; |
645 | 667 | ||
646 | static ctl_table ocfs2_kern_table[] = { | 668 | static ctl_table ocfs2_kern_table[] = { |
647 | { | 669 | { |
648 | .ctl_name = FS_OCFS2, | ||
649 | .procname = "ocfs2", | 670 | .procname = "ocfs2", |
650 | .data = NULL, | 671 | .data = NULL, |
651 | .maxlen = 0, | 672 | .maxlen = 0, |
652 | .mode = 0555, | 673 | .mode = 0555, |
653 | .child = ocfs2_mod_table | 674 | .child = ocfs2_mod_table |
654 | }, | 675 | }, |
655 | { .ctl_name = 0} | 676 | { } |
656 | }; | 677 | }; |
657 | 678 | ||
658 | static ctl_table ocfs2_root_table[] = { | 679 | static ctl_table ocfs2_root_table[] = { |
659 | { | 680 | { |
660 | .ctl_name = CTL_FS, | ||
661 | .procname = "fs", | 681 | .procname = "fs", |
662 | .data = NULL, | 682 | .data = NULL, |
663 | .maxlen = 0, | 683 | .maxlen = 0, |
664 | .mode = 0555, | 684 | .mode = 0555, |
665 | .child = ocfs2_kern_table | 685 | .child = ocfs2_kern_table |
666 | }, | 686 | }, |
667 | { .ctl_name = 0 } | 687 | { } |
668 | }; | 688 | }; |
669 | 689 | ||
670 | static struct ctl_table_header *ocfs2_table_header = NULL; | 690 | static struct ctl_table_header *ocfs2_table_header = NULL; |
@@ -690,7 +710,10 @@ static int __init ocfs2_stack_glue_init(void) | |||
690 | 710 | ||
691 | static void __exit ocfs2_stack_glue_exit(void) | 711 | static void __exit ocfs2_stack_glue_exit(void) |
692 | { | 712 | { |
693 | lproto = NULL; | 713 | memset(&locking_max_version, 0, |
714 | sizeof(struct ocfs2_protocol_version)); | ||
715 | locking_max_version.pv_major = 0; | ||
716 | locking_max_version.pv_minor = 0; | ||
694 | ocfs2_sysfs_exit(); | 717 | ocfs2_sysfs_exit(); |
695 | if (ocfs2_table_header) | 718 | if (ocfs2_table_header) |
696 | unregister_sysctl_table(ocfs2_table_header); | 719 | unregister_sysctl_table(ocfs2_table_header); |
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 03a44d60eac9..8ce7398ae1d2 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h | |||
@@ -56,17 +56,6 @@ struct ocfs2_protocol_version { | |||
56 | }; | 56 | }; |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf. | ||
60 | */ | ||
61 | struct ocfs2_locking_protocol { | ||
62 | struct ocfs2_protocol_version lp_max_version; | ||
63 | void (*lp_lock_ast)(void *astarg); | ||
64 | void (*lp_blocking_ast)(void *astarg, int level); | ||
65 | void (*lp_unlock_ast)(void *astarg, int error); | ||
66 | }; | ||
67 | |||
68 | |||
69 | /* | ||
70 | * The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only | 59 | * The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only |
71 | * has a pointer to separately allocated lvb space. This struct exists only to | 60 | * has a pointer to separately allocated lvb space. This struct exists only to |
72 | * include in the lksb union to make space for a combined dlm_lksb and lvb. | 61 | * include in the lksb union to make space for a combined dlm_lksb and lvb. |
@@ -81,12 +70,27 @@ struct fsdlm_lksb_plus_lvb { | |||
81 | * size of the union is known. Lock status structures are embedded in | 70 | * size of the union is known. Lock status structures are embedded in |
82 | * ocfs2 inodes. | 71 | * ocfs2 inodes. |
83 | */ | 72 | */ |
84 | union ocfs2_dlm_lksb { | 73 | struct ocfs2_cluster_connection; |
85 | struct dlm_lockstatus lksb_o2dlm; | 74 | struct ocfs2_dlm_lksb { |
86 | struct dlm_lksb lksb_fsdlm; | 75 | union { |
87 | struct fsdlm_lksb_plus_lvb padding; | 76 | struct dlm_lockstatus lksb_o2dlm; |
77 | struct dlm_lksb lksb_fsdlm; | ||
78 | struct fsdlm_lksb_plus_lvb padding; | ||
79 | }; | ||
80 | struct ocfs2_cluster_connection *lksb_conn; | ||
81 | }; | ||
82 | |||
83 | /* | ||
84 | * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf. | ||
85 | */ | ||
86 | struct ocfs2_locking_protocol { | ||
87 | struct ocfs2_protocol_version lp_max_version; | ||
88 | void (*lp_lock_ast)(struct ocfs2_dlm_lksb *lksb); | ||
89 | void (*lp_blocking_ast)(struct ocfs2_dlm_lksb *lksb, int level); | ||
90 | void (*lp_unlock_ast)(struct ocfs2_dlm_lksb *lksb, int error); | ||
88 | }; | 91 | }; |
89 | 92 | ||
93 | |||
90 | /* | 94 | /* |
91 | * A cluster connection. Mostly opaque to ocfs2, the connection holds | 95 | * A cluster connection. Mostly opaque to ocfs2, the connection holds |
92 | * state for the underlying stack. ocfs2 does use cc_version to determine | 96 | * state for the underlying stack. ocfs2 does use cc_version to determine |
@@ -96,6 +100,7 @@ struct ocfs2_cluster_connection { | |||
96 | char cc_name[GROUP_NAME_MAX]; | 100 | char cc_name[GROUP_NAME_MAX]; |
97 | int cc_namelen; | 101 | int cc_namelen; |
98 | struct ocfs2_protocol_version cc_version; | 102 | struct ocfs2_protocol_version cc_version; |
103 | struct ocfs2_locking_protocol *cc_proto; | ||
99 | void (*cc_recovery_handler)(int node_num, void *recovery_data); | 104 | void (*cc_recovery_handler)(int node_num, void *recovery_data); |
100 | void *cc_recovery_data; | 105 | void *cc_recovery_data; |
101 | void *cc_lockspace; | 106 | void *cc_lockspace; |
@@ -155,27 +160,29 @@ struct ocfs2_stack_operations { | |||
155 | * | 160 | * |
156 | * ast and bast functions are not part of the call because the | 161 | * ast and bast functions are not part of the call because the |
157 | * stack will likely want to wrap ast and bast calls before passing | 162 | * stack will likely want to wrap ast and bast calls before passing |
158 | * them to stack->sp_proto. | 163 | * them to stack->sp_proto. There is no astarg. The lksb will |
164 | * be passed back to the ast and bast functions. The caller can | ||
165 | * use this to find their object. | ||
159 | */ | 166 | */ |
160 | int (*dlm_lock)(struct ocfs2_cluster_connection *conn, | 167 | int (*dlm_lock)(struct ocfs2_cluster_connection *conn, |
161 | int mode, | 168 | int mode, |
162 | union ocfs2_dlm_lksb *lksb, | 169 | struct ocfs2_dlm_lksb *lksb, |
163 | u32 flags, | 170 | u32 flags, |
164 | void *name, | 171 | void *name, |
165 | unsigned int namelen, | 172 | unsigned int namelen); |
166 | void *astarg); | ||
167 | 173 | ||
168 | /* | 174 | /* |
169 | * Call the underlying dlm unlock function. The ->dlm_unlock() | 175 | * Call the underlying dlm unlock function. The ->dlm_unlock() |
170 | * function should convert the flags as appropriate. | 176 | * function should convert the flags as appropriate. |
171 | * | 177 | * |
172 | * The unlock ast is not passed, as the stack will want to wrap | 178 | * The unlock ast is not passed, as the stack will want to wrap |
173 | * it before calling stack->sp_proto->lp_unlock_ast(). | 179 | * it before calling stack->sp_proto->lp_unlock_ast(). There is |
180 | * no astarg. The lksb will be passed back to the unlock ast | ||
181 | * function. The caller can use this to find their object. | ||
174 | */ | 182 | */ |
175 | int (*dlm_unlock)(struct ocfs2_cluster_connection *conn, | 183 | int (*dlm_unlock)(struct ocfs2_cluster_connection *conn, |
176 | union ocfs2_dlm_lksb *lksb, | 184 | struct ocfs2_dlm_lksb *lksb, |
177 | u32 flags, | 185 | u32 flags); |
178 | void *astarg); | ||
179 | 186 | ||
180 | /* | 187 | /* |
181 | * Return the status of the current lock status block. The fs | 188 | * Return the status of the current lock status block. The fs |
@@ -183,17 +190,17 @@ struct ocfs2_stack_operations { | |||
183 | * callback pulls out the stack-specific lksb, converts the status | 190 | * callback pulls out the stack-specific lksb, converts the status |
184 | * to a proper errno, and returns it. | 191 | * to a proper errno, and returns it. |
185 | */ | 192 | */ |
186 | int (*lock_status)(union ocfs2_dlm_lksb *lksb); | 193 | int (*lock_status)(struct ocfs2_dlm_lksb *lksb); |
187 | 194 | ||
188 | /* | 195 | /* |
189 | * Return non-zero if the LVB is valid. | 196 | * Return non-zero if the LVB is valid. |
190 | */ | 197 | */ |
191 | int (*lvb_valid)(union ocfs2_dlm_lksb *lksb); | 198 | int (*lvb_valid)(struct ocfs2_dlm_lksb *lksb); |
192 | 199 | ||
193 | /* | 200 | /* |
194 | * Pull the lvb pointer off of the stack-specific lksb. | 201 | * Pull the lvb pointer off of the stack-specific lksb. |
195 | */ | 202 | */ |
196 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); | 203 | void *(*lock_lvb)(struct ocfs2_dlm_lksb *lksb); |
197 | 204 | ||
198 | /* | 205 | /* |
199 | * Cluster-aware posix locks | 206 | * Cluster-aware posix locks |
@@ -210,7 +217,7 @@ struct ocfs2_stack_operations { | |||
210 | * This is an optoinal debugging hook. If provided, the | 217 | * This is an optoinal debugging hook. If provided, the |
211 | * stack can dump debugging information about this lock. | 218 | * stack can dump debugging information about this lock. |
212 | */ | 219 | */ |
213 | void (*dump_lksb)(union ocfs2_dlm_lksb *lksb); | 220 | void (*dump_lksb)(struct ocfs2_dlm_lksb *lksb); |
214 | }; | 221 | }; |
215 | 222 | ||
216 | /* | 223 | /* |
@@ -226,7 +233,7 @@ struct ocfs2_stack_plugin { | |||
226 | /* These are managed by the stackglue code. */ | 233 | /* These are managed by the stackglue code. */ |
227 | struct list_head sp_list; | 234 | struct list_head sp_list; |
228 | unsigned int sp_count; | 235 | unsigned int sp_count; |
229 | struct ocfs2_locking_protocol *sp_proto; | 236 | struct ocfs2_protocol_version sp_max_proto; |
230 | }; | 237 | }; |
231 | 238 | ||
232 | 239 | ||
@@ -234,10 +241,22 @@ struct ocfs2_stack_plugin { | |||
234 | int ocfs2_cluster_connect(const char *stack_name, | 241 | int ocfs2_cluster_connect(const char *stack_name, |
235 | const char *group, | 242 | const char *group, |
236 | int grouplen, | 243 | int grouplen, |
244 | struct ocfs2_locking_protocol *lproto, | ||
237 | void (*recovery_handler)(int node_num, | 245 | void (*recovery_handler)(int node_num, |
238 | void *recovery_data), | 246 | void *recovery_data), |
239 | void *recovery_data, | 247 | void *recovery_data, |
240 | struct ocfs2_cluster_connection **conn); | 248 | struct ocfs2_cluster_connection **conn); |
249 | /* | ||
250 | * Used by callers that don't store their stack name. They must ensure | ||
251 | * all nodes have the same stack. | ||
252 | */ | ||
253 | int ocfs2_cluster_connect_agnostic(const char *group, | ||
254 | int grouplen, | ||
255 | struct ocfs2_locking_protocol *lproto, | ||
256 | void (*recovery_handler)(int node_num, | ||
257 | void *recovery_data), | ||
258 | void *recovery_data, | ||
259 | struct ocfs2_cluster_connection **conn); | ||
241 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | 260 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, |
242 | int hangup_pending); | 261 | int hangup_pending); |
243 | void ocfs2_cluster_hangup(const char *group, int grouplen); | 262 | void ocfs2_cluster_hangup(const char *group, int grouplen); |
@@ -246,26 +265,24 @@ int ocfs2_cluster_this_node(unsigned int *node); | |||
246 | struct ocfs2_lock_res; | 265 | struct ocfs2_lock_res; |
247 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | 266 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, |
248 | int mode, | 267 | int mode, |
249 | union ocfs2_dlm_lksb *lksb, | 268 | struct ocfs2_dlm_lksb *lksb, |
250 | u32 flags, | 269 | u32 flags, |
251 | void *name, | 270 | void *name, |
252 | unsigned int namelen, | 271 | unsigned int namelen); |
253 | struct ocfs2_lock_res *astarg); | ||
254 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | 272 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, |
255 | union ocfs2_dlm_lksb *lksb, | 273 | struct ocfs2_dlm_lksb *lksb, |
256 | u32 flags, | 274 | u32 flags); |
257 | struct ocfs2_lock_res *astarg); | ||
258 | 275 | ||
259 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); | 276 | int ocfs2_dlm_lock_status(struct ocfs2_dlm_lksb *lksb); |
260 | int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb); | 277 | int ocfs2_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb); |
261 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); | 278 | void *ocfs2_dlm_lvb(struct ocfs2_dlm_lksb *lksb); |
262 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); | 279 | void ocfs2_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb); |
263 | 280 | ||
264 | int ocfs2_stack_supports_plocks(void); | 281 | int ocfs2_stack_supports_plocks(void); |
265 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | 282 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, |
266 | struct file *file, int cmd, struct file_lock *fl); | 283 | struct file *file, int cmd, struct file_lock *fl); |
267 | 284 | ||
268 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); | 285 | void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_proto); |
269 | 286 | ||
270 | 287 | ||
271 | /* Used by stack plugins */ | 288 | /* Used by stack plugins */ |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c30b644d9572..19ba00f28547 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -51,7 +51,7 @@ | |||
51 | #define ALLOC_NEW_GROUP 0x1 | 51 | #define ALLOC_NEW_GROUP 0x1 |
52 | #define ALLOC_GROUPS_FROM_GLOBAL 0x2 | 52 | #define ALLOC_GROUPS_FROM_GLOBAL 0x2 |
53 | 53 | ||
54 | #define OCFS2_MAX_INODES_TO_STEAL 1024 | 54 | #define OCFS2_MAX_TO_STEAL 1024 |
55 | 55 | ||
56 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 56 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
57 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 57 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
@@ -95,13 +95,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
95 | struct buffer_head *group_bh, | 95 | struct buffer_head *group_bh, |
96 | unsigned int bit_off, | 96 | unsigned int bit_off, |
97 | unsigned int num_bits); | 97 | unsigned int num_bits); |
98 | static inline int ocfs2_block_group_clear_bits(handle_t *handle, | ||
99 | struct inode *alloc_inode, | ||
100 | struct ocfs2_group_desc *bg, | ||
101 | struct buffer_head *group_bh, | ||
102 | unsigned int bit_off, | ||
103 | unsigned int num_bits); | ||
104 | |||
105 | static int ocfs2_relink_block_group(handle_t *handle, | 98 | static int ocfs2_relink_block_group(handle_t *handle, |
106 | struct inode *alloc_inode, | 99 | struct inode *alloc_inode, |
107 | struct buffer_head *fe_bh, | 100 | struct buffer_head *fe_bh, |
@@ -152,7 +145,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
152 | 145 | ||
153 | #define do_error(fmt, ...) \ | 146 | #define do_error(fmt, ...) \ |
154 | do{ \ | 147 | do{ \ |
155 | if (clean_error) \ | 148 | if (resize) \ |
156 | mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ | 149 | mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ |
157 | else \ | 150 | else \ |
158 | ocfs2_error(sb, fmt, ##__VA_ARGS__); \ | 151 | ocfs2_error(sb, fmt, ##__VA_ARGS__); \ |
@@ -160,7 +153,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
160 | 153 | ||
161 | static int ocfs2_validate_gd_self(struct super_block *sb, | 154 | static int ocfs2_validate_gd_self(struct super_block *sb, |
162 | struct buffer_head *bh, | 155 | struct buffer_head *bh, |
163 | int clean_error) | 156 | int resize) |
164 | { | 157 | { |
165 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; | 158 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
166 | 159 | ||
@@ -211,7 +204,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb, | |||
211 | static int ocfs2_validate_gd_parent(struct super_block *sb, | 204 | static int ocfs2_validate_gd_parent(struct super_block *sb, |
212 | struct ocfs2_dinode *di, | 205 | struct ocfs2_dinode *di, |
213 | struct buffer_head *bh, | 206 | struct buffer_head *bh, |
214 | int clean_error) | 207 | int resize) |
215 | { | 208 | { |
216 | unsigned int max_bits; | 209 | unsigned int max_bits; |
217 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; | 210 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
@@ -233,8 +226,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb, | |||
233 | return -EINVAL; | 226 | return -EINVAL; |
234 | } | 227 | } |
235 | 228 | ||
236 | if (le16_to_cpu(gd->bg_chain) >= | 229 | /* In resize, we may meet the case bg_chain == cl_next_free_rec. */ |
237 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { | 230 | if ((le16_to_cpu(gd->bg_chain) > |
231 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) || | ||
232 | ((le16_to_cpu(gd->bg_chain) == | ||
233 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) { | ||
238 | do_error("Group descriptor #%llu has bad chain %u", | 234 | do_error("Group descriptor #%llu has bad chain %u", |
239 | (unsigned long long)bh->b_blocknr, | 235 | (unsigned long long)bh->b_blocknr, |
240 | le16_to_cpu(gd->bg_chain)); | 236 | le16_to_cpu(gd->bg_chain)); |
@@ -637,12 +633,113 @@ bail: | |||
637 | return status; | 633 | return status; |
638 | } | 634 | } |
639 | 635 | ||
636 | static void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | ||
637 | { | ||
638 | spin_lock(&osb->osb_lock); | ||
639 | osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; | ||
640 | spin_unlock(&osb->osb_lock); | ||
641 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
642 | } | ||
643 | |||
644 | static void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb) | ||
645 | { | ||
646 | spin_lock(&osb->osb_lock); | ||
647 | osb->s_meta_steal_slot = OCFS2_INVALID_SLOT; | ||
648 | spin_unlock(&osb->osb_lock); | ||
649 | atomic_set(&osb->s_num_meta_stolen, 0); | ||
650 | } | ||
651 | |||
652 | void ocfs2_init_steal_slots(struct ocfs2_super *osb) | ||
653 | { | ||
654 | ocfs2_init_inode_steal_slot(osb); | ||
655 | ocfs2_init_meta_steal_slot(osb); | ||
656 | } | ||
657 | |||
658 | static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type) | ||
659 | { | ||
660 | spin_lock(&osb->osb_lock); | ||
661 | if (type == INODE_ALLOC_SYSTEM_INODE) | ||
662 | osb->s_inode_steal_slot = slot; | ||
663 | else if (type == EXTENT_ALLOC_SYSTEM_INODE) | ||
664 | osb->s_meta_steal_slot = slot; | ||
665 | spin_unlock(&osb->osb_lock); | ||
666 | } | ||
667 | |||
668 | static int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type) | ||
669 | { | ||
670 | int slot = OCFS2_INVALID_SLOT; | ||
671 | |||
672 | spin_lock(&osb->osb_lock); | ||
673 | if (type == INODE_ALLOC_SYSTEM_INODE) | ||
674 | slot = osb->s_inode_steal_slot; | ||
675 | else if (type == EXTENT_ALLOC_SYSTEM_INODE) | ||
676 | slot = osb->s_meta_steal_slot; | ||
677 | spin_unlock(&osb->osb_lock); | ||
678 | |||
679 | return slot; | ||
680 | } | ||
681 | |||
682 | static int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) | ||
683 | { | ||
684 | return __ocfs2_get_steal_slot(osb, INODE_ALLOC_SYSTEM_INODE); | ||
685 | } | ||
686 | |||
687 | static int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb) | ||
688 | { | ||
689 | return __ocfs2_get_steal_slot(osb, EXTENT_ALLOC_SYSTEM_INODE); | ||
690 | } | ||
691 | |||
692 | static int ocfs2_steal_resource(struct ocfs2_super *osb, | ||
693 | struct ocfs2_alloc_context *ac, | ||
694 | int type) | ||
695 | { | ||
696 | int i, status = -ENOSPC; | ||
697 | int slot = __ocfs2_get_steal_slot(osb, type); | ||
698 | |||
699 | /* Start to steal resource from the first slot after ours. */ | ||
700 | if (slot == OCFS2_INVALID_SLOT) | ||
701 | slot = osb->slot_num + 1; | ||
702 | |||
703 | for (i = 0; i < osb->max_slots; i++, slot++) { | ||
704 | if (slot == osb->max_slots) | ||
705 | slot = 0; | ||
706 | |||
707 | if (slot == osb->slot_num) | ||
708 | continue; | ||
709 | |||
710 | status = ocfs2_reserve_suballoc_bits(osb, ac, | ||
711 | type, | ||
712 | (u32)slot, NULL, | ||
713 | NOT_ALLOC_NEW_GROUP); | ||
714 | if (status >= 0) { | ||
715 | __ocfs2_set_steal_slot(osb, slot, type); | ||
716 | break; | ||
717 | } | ||
718 | |||
719 | ocfs2_free_ac_resource(ac); | ||
720 | } | ||
721 | |||
722 | return status; | ||
723 | } | ||
724 | |||
725 | static int ocfs2_steal_inode(struct ocfs2_super *osb, | ||
726 | struct ocfs2_alloc_context *ac) | ||
727 | { | ||
728 | return ocfs2_steal_resource(osb, ac, INODE_ALLOC_SYSTEM_INODE); | ||
729 | } | ||
730 | |||
731 | static int ocfs2_steal_meta(struct ocfs2_super *osb, | ||
732 | struct ocfs2_alloc_context *ac) | ||
733 | { | ||
734 | return ocfs2_steal_resource(osb, ac, EXTENT_ALLOC_SYSTEM_INODE); | ||
735 | } | ||
736 | |||
640 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, | 737 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, |
641 | int blocks, | 738 | int blocks, |
642 | struct ocfs2_alloc_context **ac) | 739 | struct ocfs2_alloc_context **ac) |
643 | { | 740 | { |
644 | int status; | 741 | int status; |
645 | u32 slot; | 742 | int slot = ocfs2_get_meta_steal_slot(osb); |
646 | 743 | ||
647 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 744 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
648 | if (!(*ac)) { | 745 | if (!(*ac)) { |
@@ -653,12 +750,34 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, | |||
653 | 750 | ||
654 | (*ac)->ac_bits_wanted = blocks; | 751 | (*ac)->ac_bits_wanted = blocks; |
655 | (*ac)->ac_which = OCFS2_AC_USE_META; | 752 | (*ac)->ac_which = OCFS2_AC_USE_META; |
656 | slot = osb->slot_num; | ||
657 | (*ac)->ac_group_search = ocfs2_block_group_search; | 753 | (*ac)->ac_group_search = ocfs2_block_group_search; |
658 | 754 | ||
755 | if (slot != OCFS2_INVALID_SLOT && | ||
756 | atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL) | ||
757 | goto extent_steal; | ||
758 | |||
759 | atomic_set(&osb->s_num_meta_stolen, 0); | ||
659 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 760 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
660 | EXTENT_ALLOC_SYSTEM_INODE, | 761 | EXTENT_ALLOC_SYSTEM_INODE, |
661 | slot, NULL, ALLOC_NEW_GROUP); | 762 | (u32)osb->slot_num, NULL, |
763 | ALLOC_NEW_GROUP); | ||
764 | |||
765 | |||
766 | if (status >= 0) { | ||
767 | status = 0; | ||
768 | if (slot != OCFS2_INVALID_SLOT) | ||
769 | ocfs2_init_meta_steal_slot(osb); | ||
770 | goto bail; | ||
771 | } else if (status < 0 && status != -ENOSPC) { | ||
772 | mlog_errno(status); | ||
773 | goto bail; | ||
774 | } | ||
775 | |||
776 | ocfs2_free_ac_resource(*ac); | ||
777 | |||
778 | extent_steal: | ||
779 | status = ocfs2_steal_meta(osb, *ac); | ||
780 | atomic_inc(&osb->s_num_meta_stolen); | ||
662 | if (status < 0) { | 781 | if (status < 0) { |
663 | if (status != -ENOSPC) | 782 | if (status != -ENOSPC) |
664 | mlog_errno(status); | 783 | mlog_errno(status); |
@@ -685,43 +804,11 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
685 | ac); | 804 | ac); |
686 | } | 805 | } |
687 | 806 | ||
688 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | ||
689 | struct ocfs2_alloc_context *ac) | ||
690 | { | ||
691 | int i, status = -ENOSPC; | ||
692 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
693 | |||
694 | /* Start to steal inodes from the first slot after ours. */ | ||
695 | if (slot == OCFS2_INVALID_SLOT) | ||
696 | slot = osb->slot_num + 1; | ||
697 | |||
698 | for (i = 0; i < osb->max_slots; i++, slot++) { | ||
699 | if (slot == osb->max_slots) | ||
700 | slot = 0; | ||
701 | |||
702 | if (slot == osb->slot_num) | ||
703 | continue; | ||
704 | |||
705 | status = ocfs2_reserve_suballoc_bits(osb, ac, | ||
706 | INODE_ALLOC_SYSTEM_INODE, | ||
707 | slot, NULL, | ||
708 | NOT_ALLOC_NEW_GROUP); | ||
709 | if (status >= 0) { | ||
710 | ocfs2_set_inode_steal_slot(osb, slot); | ||
711 | break; | ||
712 | } | ||
713 | |||
714 | ocfs2_free_ac_resource(ac); | ||
715 | } | ||
716 | |||
717 | return status; | ||
718 | } | ||
719 | |||
720 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 807 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
721 | struct ocfs2_alloc_context **ac) | 808 | struct ocfs2_alloc_context **ac) |
722 | { | 809 | { |
723 | int status; | 810 | int status; |
724 | s16 slot = ocfs2_get_inode_steal_slot(osb); | 811 | int slot = ocfs2_get_inode_steal_slot(osb); |
725 | u64 alloc_group; | 812 | u64 alloc_group; |
726 | 813 | ||
727 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 814 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
@@ -754,14 +841,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
754 | * need to check our slots to see whether there is some space for us. | 841 | * need to check our slots to see whether there is some space for us. |
755 | */ | 842 | */ |
756 | if (slot != OCFS2_INVALID_SLOT && | 843 | if (slot != OCFS2_INVALID_SLOT && |
757 | atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) | 844 | atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL) |
758 | goto inode_steal; | 845 | goto inode_steal; |
759 | 846 | ||
760 | atomic_set(&osb->s_num_inodes_stolen, 0); | 847 | atomic_set(&osb->s_num_inodes_stolen, 0); |
761 | alloc_group = osb->osb_inode_alloc_group; | 848 | alloc_group = osb->osb_inode_alloc_group; |
762 | status = ocfs2_reserve_suballoc_bits(osb, *ac, | 849 | status = ocfs2_reserve_suballoc_bits(osb, *ac, |
763 | INODE_ALLOC_SYSTEM_INODE, | 850 | INODE_ALLOC_SYSTEM_INODE, |
764 | osb->slot_num, | 851 | (u32)osb->slot_num, |
765 | &alloc_group, | 852 | &alloc_group, |
766 | ALLOC_NEW_GROUP | | 853 | ALLOC_NEW_GROUP | |
767 | ALLOC_GROUPS_FROM_GLOBAL); | 854 | ALLOC_GROUPS_FROM_GLOBAL); |
@@ -789,7 +876,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
789 | ocfs2_free_ac_resource(*ac); | 876 | ocfs2_free_ac_resource(*ac); |
790 | 877 | ||
791 | inode_steal: | 878 | inode_steal: |
792 | status = ocfs2_steal_inode_from_other_nodes(osb, *ac); | 879 | status = ocfs2_steal_inode(osb, *ac); |
793 | atomic_inc(&osb->s_num_inodes_stolen); | 880 | atomic_inc(&osb->s_num_inodes_stolen); |
794 | if (status < 0) { | 881 | if (status < 0) { |
795 | if (status != -ENOSPC) | 882 | if (status != -ENOSPC) |
@@ -1884,18 +1971,18 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
1884 | bits_wanted, cluster_start, num_clusters); | 1971 | bits_wanted, cluster_start, num_clusters); |
1885 | } | 1972 | } |
1886 | 1973 | ||
1887 | static inline int ocfs2_block_group_clear_bits(handle_t *handle, | 1974 | static int ocfs2_block_group_clear_bits(handle_t *handle, |
1888 | struct inode *alloc_inode, | 1975 | struct inode *alloc_inode, |
1889 | struct ocfs2_group_desc *bg, | 1976 | struct ocfs2_group_desc *bg, |
1890 | struct buffer_head *group_bh, | 1977 | struct buffer_head *group_bh, |
1891 | unsigned int bit_off, | 1978 | unsigned int bit_off, |
1892 | unsigned int num_bits) | 1979 | unsigned int num_bits, |
1980 | void (*undo_fn)(unsigned int bit, | ||
1981 | unsigned long *bmap)) | ||
1893 | { | 1982 | { |
1894 | int status; | 1983 | int status; |
1895 | unsigned int tmp; | 1984 | unsigned int tmp; |
1896 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | ||
1897 | struct ocfs2_group_desc *undo_bg = NULL; | 1985 | struct ocfs2_group_desc *undo_bg = NULL; |
1898 | int cluster_bitmap = 0; | ||
1899 | 1986 | ||
1900 | mlog_entry_void(); | 1987 | mlog_entry_void(); |
1901 | 1988 | ||
@@ -1905,20 +1992,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, | |||
1905 | 1992 | ||
1906 | mlog(0, "off = %u, num = %u\n", bit_off, num_bits); | 1993 | mlog(0, "off = %u, num = %u\n", bit_off, num_bits); |
1907 | 1994 | ||
1908 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | 1995 | BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); |
1909 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | ||
1910 | |||
1911 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1996 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1912 | group_bh, journal_type); | 1997 | group_bh, |
1998 | undo_fn ? | ||
1999 | OCFS2_JOURNAL_ACCESS_UNDO : | ||
2000 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1913 | if (status < 0) { | 2001 | if (status < 0) { |
1914 | mlog_errno(status); | 2002 | mlog_errno(status); |
1915 | goto bail; | 2003 | goto bail; |
1916 | } | 2004 | } |
1917 | 2005 | ||
1918 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | 2006 | if (undo_fn) { |
1919 | cluster_bitmap = 1; | ||
1920 | |||
1921 | if (cluster_bitmap) { | ||
1922 | jbd_lock_bh_state(group_bh); | 2007 | jbd_lock_bh_state(group_bh); |
1923 | undo_bg = (struct ocfs2_group_desc *) | 2008 | undo_bg = (struct ocfs2_group_desc *) |
1924 | bh2jh(group_bh)->b_committed_data; | 2009 | bh2jh(group_bh)->b_committed_data; |
@@ -1929,13 +2014,13 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, | |||
1929 | while(tmp--) { | 2014 | while(tmp--) { |
1930 | ocfs2_clear_bit((bit_off + tmp), | 2015 | ocfs2_clear_bit((bit_off + tmp), |
1931 | (unsigned long *) bg->bg_bitmap); | 2016 | (unsigned long *) bg->bg_bitmap); |
1932 | if (cluster_bitmap) | 2017 | if (undo_fn) |
1933 | ocfs2_set_bit(bit_off + tmp, | 2018 | undo_fn(bit_off + tmp, |
1934 | (unsigned long *) undo_bg->bg_bitmap); | 2019 | (unsigned long *) undo_bg->bg_bitmap); |
1935 | } | 2020 | } |
1936 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); | 2021 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); |
1937 | 2022 | ||
1938 | if (cluster_bitmap) | 2023 | if (undo_fn) |
1939 | jbd_unlock_bh_state(group_bh); | 2024 | jbd_unlock_bh_state(group_bh); |
1940 | 2025 | ||
1941 | status = ocfs2_journal_dirty(handle, group_bh); | 2026 | status = ocfs2_journal_dirty(handle, group_bh); |
@@ -1948,12 +2033,14 @@ bail: | |||
1948 | /* | 2033 | /* |
1949 | * expects the suballoc inode to already be locked. | 2034 | * expects the suballoc inode to already be locked. |
1950 | */ | 2035 | */ |
1951 | int ocfs2_free_suballoc_bits(handle_t *handle, | 2036 | static int _ocfs2_free_suballoc_bits(handle_t *handle, |
1952 | struct inode *alloc_inode, | 2037 | struct inode *alloc_inode, |
1953 | struct buffer_head *alloc_bh, | 2038 | struct buffer_head *alloc_bh, |
1954 | unsigned int start_bit, | 2039 | unsigned int start_bit, |
1955 | u64 bg_blkno, | 2040 | u64 bg_blkno, |
1956 | unsigned int count) | 2041 | unsigned int count, |
2042 | void (*undo_fn)(unsigned int bit, | ||
2043 | unsigned long *bitmap)) | ||
1957 | { | 2044 | { |
1958 | int status = 0; | 2045 | int status = 0; |
1959 | u32 tmp_used; | 2046 | u32 tmp_used; |
@@ -1988,7 +2075,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1988 | 2075 | ||
1989 | status = ocfs2_block_group_clear_bits(handle, alloc_inode, | 2076 | status = ocfs2_block_group_clear_bits(handle, alloc_inode, |
1990 | group, group_bh, | 2077 | group, group_bh, |
1991 | start_bit, count); | 2078 | start_bit, count, undo_fn); |
1992 | if (status < 0) { | 2079 | if (status < 0) { |
1993 | mlog_errno(status); | 2080 | mlog_errno(status); |
1994 | goto bail; | 2081 | goto bail; |
@@ -2019,6 +2106,17 @@ bail: | |||
2019 | return status; | 2106 | return status; |
2020 | } | 2107 | } |
2021 | 2108 | ||
2109 | int ocfs2_free_suballoc_bits(handle_t *handle, | ||
2110 | struct inode *alloc_inode, | ||
2111 | struct buffer_head *alloc_bh, | ||
2112 | unsigned int start_bit, | ||
2113 | u64 bg_blkno, | ||
2114 | unsigned int count) | ||
2115 | { | ||
2116 | return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh, | ||
2117 | start_bit, bg_blkno, count, NULL); | ||
2118 | } | ||
2119 | |||
2022 | int ocfs2_free_dinode(handle_t *handle, | 2120 | int ocfs2_free_dinode(handle_t *handle, |
2023 | struct inode *inode_alloc_inode, | 2121 | struct inode *inode_alloc_inode, |
2024 | struct buffer_head *inode_alloc_bh, | 2122 | struct buffer_head *inode_alloc_bh, |
@@ -2032,11 +2130,13 @@ int ocfs2_free_dinode(handle_t *handle, | |||
2032 | inode_alloc_bh, bit, bg_blkno, 1); | 2130 | inode_alloc_bh, bit, bg_blkno, 1); |
2033 | } | 2131 | } |
2034 | 2132 | ||
2035 | int ocfs2_free_clusters(handle_t *handle, | 2133 | static int _ocfs2_free_clusters(handle_t *handle, |
2036 | struct inode *bitmap_inode, | 2134 | struct inode *bitmap_inode, |
2037 | struct buffer_head *bitmap_bh, | 2135 | struct buffer_head *bitmap_bh, |
2038 | u64 start_blk, | 2136 | u64 start_blk, |
2039 | unsigned int num_clusters) | 2137 | unsigned int num_clusters, |
2138 | void (*undo_fn)(unsigned int bit, | ||
2139 | unsigned long *bitmap)) | ||
2040 | { | 2140 | { |
2041 | int status; | 2141 | int status; |
2042 | u16 bg_start_bit; | 2142 | u16 bg_start_bit; |
@@ -2063,9 +2163,9 @@ int ocfs2_free_clusters(handle_t *handle, | |||
2063 | mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", | 2163 | mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", |
2064 | (unsigned long long)bg_blkno, bg_start_bit); | 2164 | (unsigned long long)bg_blkno, bg_start_bit); |
2065 | 2165 | ||
2066 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, | 2166 | status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, |
2067 | bg_start_bit, bg_blkno, | 2167 | bg_start_bit, bg_blkno, |
2068 | num_clusters); | 2168 | num_clusters, undo_fn); |
2069 | if (status < 0) { | 2169 | if (status < 0) { |
2070 | mlog_errno(status); | 2170 | mlog_errno(status); |
2071 | goto out; | 2171 | goto out; |
@@ -2079,6 +2179,32 @@ out: | |||
2079 | return status; | 2179 | return status; |
2080 | } | 2180 | } |
2081 | 2181 | ||
2182 | int ocfs2_free_clusters(handle_t *handle, | ||
2183 | struct inode *bitmap_inode, | ||
2184 | struct buffer_head *bitmap_bh, | ||
2185 | u64 start_blk, | ||
2186 | unsigned int num_clusters) | ||
2187 | { | ||
2188 | return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, | ||
2189 | start_blk, num_clusters, | ||
2190 | _ocfs2_set_bit); | ||
2191 | } | ||
2192 | |||
2193 | /* | ||
2194 | * Give never-used clusters back to the global bitmap. We don't need | ||
2195 | * to protect these bits in the undo buffer. | ||
2196 | */ | ||
2197 | int ocfs2_release_clusters(handle_t *handle, | ||
2198 | struct inode *bitmap_inode, | ||
2199 | struct buffer_head *bitmap_bh, | ||
2200 | u64 start_blk, | ||
2201 | unsigned int num_clusters) | ||
2202 | { | ||
2203 | return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, | ||
2204 | start_blk, num_clusters, | ||
2205 | _ocfs2_clear_bit); | ||
2206 | } | ||
2207 | |||
2082 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) | 2208 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) |
2083 | { | 2209 | { |
2084 | printk("Block Group:\n"); | 2210 | printk("Block Group:\n"); |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 8c9a78a43164..e0f46df357e6 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -56,6 +56,7 @@ struct ocfs2_alloc_context { | |||
56 | is the same as ~0 - unlimited */ | 56 | is the same as ~0 - unlimited */ |
57 | }; | 57 | }; |
58 | 58 | ||
59 | void ocfs2_init_steal_slots(struct ocfs2_super *osb); | ||
59 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); | 60 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); |
60 | static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac) | 61 | static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac) |
61 | { | 62 | { |
@@ -126,6 +127,11 @@ int ocfs2_free_clusters(handle_t *handle, | |||
126 | struct buffer_head *bitmap_bh, | 127 | struct buffer_head *bitmap_bh, |
127 | u64 start_blk, | 128 | u64 start_blk, |
128 | unsigned int num_clusters); | 129 | unsigned int num_clusters); |
130 | int ocfs2_release_clusters(handle_t *handle, | ||
131 | struct inode *bitmap_inode, | ||
132 | struct buffer_head *bitmap_bh, | ||
133 | u64 start_blk, | ||
134 | unsigned int num_clusters); | ||
129 | 135 | ||
130 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | 136 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) |
131 | { | 137 | { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 14f47d2bfe02..dee03197a494 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #include "xattr.h" | 69 | #include "xattr.h" |
70 | #include "quota.h" | 70 | #include "quota.h" |
71 | #include "refcounttree.h" | 71 | #include "refcounttree.h" |
72 | #include "suballoc.h" | ||
72 | 73 | ||
73 | #include "buffer_head_io.h" | 74 | #include "buffer_head_io.h" |
74 | 75 | ||
@@ -100,6 +101,8 @@ struct mount_options | |||
100 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 101 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
101 | struct mount_options *mopt, | 102 | struct mount_options *mopt, |
102 | int is_remount); | 103 | int is_remount); |
104 | static int ocfs2_check_set_options(struct super_block *sb, | ||
105 | struct mount_options *options); | ||
103 | static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); | 106 | static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); |
104 | static void ocfs2_put_super(struct super_block *sb); | 107 | static void ocfs2_put_super(struct super_block *sb); |
105 | static int ocfs2_mount_volume(struct super_block *sb); | 108 | static int ocfs2_mount_volume(struct super_block *sb); |
@@ -299,9 +302,12 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
299 | 302 | ||
300 | spin_lock(&osb->osb_lock); | 303 | spin_lock(&osb->osb_lock); |
301 | out += snprintf(buf + out, len - out, | 304 | out += snprintf(buf + out, len - out, |
302 | "%10s => Slot: %d NumStolen: %d\n", "Steal", | 305 | "%10s => InodeSlot: %d StolenInodes: %d, " |
306 | "MetaSlot: %d StolenMeta: %d\n", "Steal", | ||
303 | osb->s_inode_steal_slot, | 307 | osb->s_inode_steal_slot, |
304 | atomic_read(&osb->s_num_inodes_stolen)); | 308 | atomic_read(&osb->s_num_inodes_stolen), |
309 | osb->s_meta_steal_slot, | ||
310 | atomic_read(&osb->s_num_meta_stolen)); | ||
305 | spin_unlock(&osb->osb_lock); | 311 | spin_unlock(&osb->osb_lock); |
306 | 312 | ||
307 | out += snprintf(buf + out, len - out, "OrphanScan => "); | 313 | out += snprintf(buf + out, len - out, "OrphanScan => "); |
@@ -600,7 +606,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
600 | 606 | ||
601 | lock_kernel(); | 607 | lock_kernel(); |
602 | 608 | ||
603 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { | 609 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || |
610 | !ocfs2_check_set_options(sb, &parsed_options)) { | ||
604 | ret = -EINVAL; | 611 | ret = -EINVAL; |
605 | goto out; | 612 | goto out; |
606 | } | 613 | } |
@@ -691,8 +698,6 @@ unlock_osb: | |||
691 | if (!ret) { | 698 | if (!ret) { |
692 | /* Only save off the new mount options in case of a successful | 699 | /* Only save off the new mount options in case of a successful |
693 | * remount. */ | 700 | * remount. */ |
694 | if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) | ||
695 | parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; | ||
696 | osb->s_mount_opt = parsed_options.mount_opt; | 701 | osb->s_mount_opt = parsed_options.mount_opt; |
697 | osb->s_atime_quantum = parsed_options.atime_quantum; | 702 | osb->s_atime_quantum = parsed_options.atime_quantum; |
698 | osb->preferred_slot = parsed_options.slot; | 703 | osb->preferred_slot = parsed_options.slot; |
@@ -701,6 +706,10 @@ unlock_osb: | |||
701 | 706 | ||
702 | if (!ocfs2_is_hard_readonly(osb)) | 707 | if (!ocfs2_is_hard_readonly(osb)) |
703 | ocfs2_set_journal_params(osb); | 708 | ocfs2_set_journal_params(osb); |
709 | |||
710 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | ||
711 | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? | ||
712 | MS_POSIXACL : 0); | ||
704 | } | 713 | } |
705 | out: | 714 | out: |
706 | unlock_kernel(); | 715 | unlock_kernel(); |
@@ -1011,31 +1020,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1011 | brelse(bh); | 1020 | brelse(bh); |
1012 | bh = NULL; | 1021 | bh = NULL; |
1013 | 1022 | ||
1014 | if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) | 1023 | if (!ocfs2_check_set_options(sb, &parsed_options)) { |
1015 | parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; | 1024 | status = -EINVAL; |
1016 | 1025 | goto read_super_error; | |
1026 | } | ||
1017 | osb->s_mount_opt = parsed_options.mount_opt; | 1027 | osb->s_mount_opt = parsed_options.mount_opt; |
1018 | osb->s_atime_quantum = parsed_options.atime_quantum; | 1028 | osb->s_atime_quantum = parsed_options.atime_quantum; |
1019 | osb->preferred_slot = parsed_options.slot; | 1029 | osb->preferred_slot = parsed_options.slot; |
1020 | osb->osb_commit_interval = parsed_options.commit_interval; | 1030 | osb->osb_commit_interval = parsed_options.commit_interval; |
1021 | osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); | 1031 | osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); |
1022 | osb->local_alloc_bits = osb->local_alloc_default_bits; | 1032 | osb->local_alloc_bits = osb->local_alloc_default_bits; |
1023 | if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA && | ||
1024 | !OCFS2_HAS_RO_COMPAT_FEATURE(sb, | ||
1025 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { | ||
1026 | status = -EINVAL; | ||
1027 | mlog(ML_ERROR, "User quotas were requested, but this " | ||
1028 | "filesystem does not have the feature enabled.\n"); | ||
1029 | goto read_super_error; | ||
1030 | } | ||
1031 | if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA && | ||
1032 | !OCFS2_HAS_RO_COMPAT_FEATURE(sb, | ||
1033 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { | ||
1034 | status = -EINVAL; | ||
1035 | mlog(ML_ERROR, "Group quotas were requested, but this " | ||
1036 | "filesystem does not have the feature enabled.\n"); | ||
1037 | goto read_super_error; | ||
1038 | } | ||
1039 | 1033 | ||
1040 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | 1034 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); |
1041 | if (status) | 1035 | if (status) |
@@ -1072,7 +1066,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1072 | "file system, but write access is " | 1066 | "file system, but write access is " |
1073 | "unavailable.\n"); | 1067 | "unavailable.\n"); |
1074 | else | 1068 | else |
1075 | mlog_errno(status); | 1069 | mlog_errno(status); |
1076 | goto read_super_error; | 1070 | goto read_super_error; |
1077 | } | 1071 | } |
1078 | 1072 | ||
@@ -1245,6 +1239,40 @@ static struct file_system_type ocfs2_fs_type = { | |||
1245 | .next = NULL | 1239 | .next = NULL |
1246 | }; | 1240 | }; |
1247 | 1241 | ||
1242 | static int ocfs2_check_set_options(struct super_block *sb, | ||
1243 | struct mount_options *options) | ||
1244 | { | ||
1245 | if (options->mount_opt & OCFS2_MOUNT_USRQUOTA && | ||
1246 | !OCFS2_HAS_RO_COMPAT_FEATURE(sb, | ||
1247 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { | ||
1248 | mlog(ML_ERROR, "User quotas were requested, but this " | ||
1249 | "filesystem does not have the feature enabled.\n"); | ||
1250 | return 0; | ||
1251 | } | ||
1252 | if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA && | ||
1253 | !OCFS2_HAS_RO_COMPAT_FEATURE(sb, | ||
1254 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { | ||
1255 | mlog(ML_ERROR, "Group quotas were requested, but this " | ||
1256 | "filesystem does not have the feature enabled.\n"); | ||
1257 | return 0; | ||
1258 | } | ||
1259 | if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL && | ||
1260 | !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) { | ||
1261 | mlog(ML_ERROR, "ACL support requested but extended attributes " | ||
1262 | "feature is not enabled\n"); | ||
1263 | return 0; | ||
1264 | } | ||
1265 | /* No ACL setting specified? Use XATTR feature... */ | ||
1266 | if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL | | ||
1267 | OCFS2_MOUNT_NO_POSIX_ACL))) { | ||
1268 | if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) | ||
1269 | options->mount_opt |= OCFS2_MOUNT_POSIX_ACL; | ||
1270 | else | ||
1271 | options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; | ||
1272 | } | ||
1273 | return 1; | ||
1274 | } | ||
1275 | |||
1248 | static int ocfs2_parse_options(struct super_block *sb, | 1276 | static int ocfs2_parse_options(struct super_block *sb, |
1249 | char *options, | 1277 | char *options, |
1250 | struct mount_options *mopt, | 1278 | struct mount_options *mopt, |
@@ -1392,40 +1420,19 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1392 | mopt->mount_opt |= OCFS2_MOUNT_INODE64; | 1420 | mopt->mount_opt |= OCFS2_MOUNT_INODE64; |
1393 | break; | 1421 | break; |
1394 | case Opt_usrquota: | 1422 | case Opt_usrquota: |
1395 | /* We check only on remount, otherwise features | ||
1396 | * aren't yet initialized. */ | ||
1397 | if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, | ||
1398 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { | ||
1399 | mlog(ML_ERROR, "User quota requested but " | ||
1400 | "filesystem feature is not set\n"); | ||
1401 | status = 0; | ||
1402 | goto bail; | ||
1403 | } | ||
1404 | mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; | 1423 | mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; |
1405 | break; | 1424 | break; |
1406 | case Opt_grpquota: | 1425 | case Opt_grpquota: |
1407 | if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, | ||
1408 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { | ||
1409 | mlog(ML_ERROR, "Group quota requested but " | ||
1410 | "filesystem feature is not set\n"); | ||
1411 | status = 0; | ||
1412 | goto bail; | ||
1413 | } | ||
1414 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; | 1426 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; |
1415 | break; | 1427 | break; |
1416 | #ifdef CONFIG_OCFS2_FS_POSIX_ACL | ||
1417 | case Opt_acl: | 1428 | case Opt_acl: |
1418 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; | 1429 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; |
1430 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; | ||
1419 | break; | 1431 | break; |
1420 | case Opt_noacl: | 1432 | case Opt_noacl: |
1433 | mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; | ||
1421 | mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; | 1434 | mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; |
1422 | break; | 1435 | break; |
1423 | #else | ||
1424 | case Opt_acl: | ||
1425 | case Opt_noacl: | ||
1426 | printk(KERN_INFO "ocfs2 (no)acl options not supported\n"); | ||
1427 | break; | ||
1428 | #endif | ||
1429 | default: | 1436 | default: |
1430 | mlog(ML_ERROR, | 1437 | mlog(ML_ERROR, |
1431 | "Unrecognized mount option \"%s\" " | 1438 | "Unrecognized mount option \"%s\" " |
@@ -1502,12 +1509,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1502 | if (opts & OCFS2_MOUNT_INODE64) | 1509 | if (opts & OCFS2_MOUNT_INODE64) |
1503 | seq_printf(s, ",inode64"); | 1510 | seq_printf(s, ",inode64"); |
1504 | 1511 | ||
1505 | #ifdef CONFIG_OCFS2_FS_POSIX_ACL | ||
1506 | if (opts & OCFS2_MOUNT_POSIX_ACL) | 1512 | if (opts & OCFS2_MOUNT_POSIX_ACL) |
1507 | seq_printf(s, ",acl"); | 1513 | seq_printf(s, ",acl"); |
1508 | else | 1514 | else |
1509 | seq_printf(s, ",noacl"); | 1515 | seq_printf(s, ",noacl"); |
1510 | #endif | ||
1511 | 1516 | ||
1512 | return 0; | 1517 | return 0; |
1513 | } | 1518 | } |
@@ -1996,7 +2001,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1996 | osb->blocked_lock_count = 0; | 2001 | osb->blocked_lock_count = 0; |
1997 | spin_lock_init(&osb->osb_lock); | 2002 | spin_lock_init(&osb->osb_lock); |
1998 | spin_lock_init(&osb->osb_xattr_lock); | 2003 | spin_lock_init(&osb->osb_xattr_lock); |
1999 | ocfs2_init_inode_steal_slot(osb); | 2004 | ocfs2_init_steal_slots(osb); |
2000 | 2005 | ||
2001 | atomic_set(&osb->alloc_stats.moves, 0); | 2006 | atomic_set(&osb->alloc_stats.moves, 0); |
2002 | atomic_set(&osb->alloc_stats.local_data, 0); | 2007 | atomic_set(&osb->alloc_stats.local_data, 0); |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index e3421030a69f..32499d213fc4 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -137,20 +137,20 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, | |||
137 | } | 137 | } |
138 | 138 | ||
139 | memcpy(link, target, len); | 139 | memcpy(link, target, len); |
140 | nd_set_link(nd, link); | ||
141 | 140 | ||
142 | bail: | 141 | bail: |
142 | nd_set_link(nd, status ? ERR_PTR(status) : link); | ||
143 | brelse(bh); | 143 | brelse(bh); |
144 | 144 | ||
145 | mlog_exit(status); | 145 | mlog_exit(status); |
146 | return status ? ERR_PTR(status) : link; | 146 | return NULL; |
147 | } | 147 | } |
148 | 148 | ||
149 | static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) | 149 | static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) |
150 | { | 150 | { |
151 | char *link = cookie; | 151 | char *link = nd_get_link(nd); |
152 | 152 | if (!IS_ERR(link)) | |
153 | kfree(link); | 153 | kfree(link); |
154 | } | 154 | } |
155 | 155 | ||
156 | const struct inode_operations ocfs2_symlink_inode_operations = { | 156 | const struct inode_operations ocfs2_symlink_inode_operations = { |
@@ -163,6 +163,7 @@ const struct inode_operations ocfs2_symlink_inode_operations = { | |||
163 | .getxattr = generic_getxattr, | 163 | .getxattr = generic_getxattr, |
164 | .listxattr = ocfs2_listxattr, | 164 | .listxattr = ocfs2_listxattr, |
165 | .removexattr = generic_removexattr, | 165 | .removexattr = generic_removexattr, |
166 | .fiemap = ocfs2_fiemap, | ||
166 | }; | 167 | }; |
167 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { | 168 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { |
168 | .readlink = ocfs2_readlink, | 169 | .readlink = ocfs2_readlink, |
@@ -174,4 +175,5 @@ const struct inode_operations ocfs2_fast_symlink_inode_operations = { | |||
174 | .getxattr = generic_getxattr, | 175 | .getxattr = generic_getxattr, |
175 | .listxattr = ocfs2_listxattr, | 176 | .listxattr = ocfs2_listxattr, |
176 | .removexattr = generic_removexattr, | 177 | .removexattr = generic_removexattr, |
178 | .fiemap = ocfs2_fiemap, | ||
177 | }; | 179 | }; |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index 40e53702948c..bfe7190cdbf1 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -25,7 +25,6 @@ | |||
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
30 | 29 | ||
31 | #define MLOG_MASK_PREFIX ML_INODE | 30 | #define MLOG_MASK_PREFIX ML_INODE |
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index c61369342a27..a0a120e82b97 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c | |||
@@ -267,8 +267,8 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci, | |||
267 | } | 267 | } |
268 | 268 | ||
269 | /* Warning: even if it returns true, this does *not* guarantee that | 269 | /* Warning: even if it returns true, this does *not* guarantee that |
270 | * the block is stored in our inode metadata cache. | 270 | * the block is stored in our inode metadata cache. |
271 | * | 271 | * |
272 | * This can be called under lock_buffer() | 272 | * This can be called under lock_buffer() |
273 | */ | 273 | */ |
274 | int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, | 274 | int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index fe3419068df2..3e7773089b96 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -98,10 +98,8 @@ static struct ocfs2_xattr_def_value_root def_xv = { | |||
98 | 98 | ||
99 | struct xattr_handler *ocfs2_xattr_handlers[] = { | 99 | struct xattr_handler *ocfs2_xattr_handlers[] = { |
100 | &ocfs2_xattr_user_handler, | 100 | &ocfs2_xattr_user_handler, |
101 | #ifdef CONFIG_OCFS2_FS_POSIX_ACL | ||
102 | &ocfs2_xattr_acl_access_handler, | 101 | &ocfs2_xattr_acl_access_handler, |
103 | &ocfs2_xattr_acl_default_handler, | 102 | &ocfs2_xattr_acl_default_handler, |
104 | #endif | ||
105 | &ocfs2_xattr_trusted_handler, | 103 | &ocfs2_xattr_trusted_handler, |
106 | &ocfs2_xattr_security_handler, | 104 | &ocfs2_xattr_security_handler, |
107 | NULL | 105 | NULL |
@@ -109,21 +107,20 @@ struct xattr_handler *ocfs2_xattr_handlers[] = { | |||
109 | 107 | ||
110 | static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { | 108 | static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { |
111 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, | 109 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, |
112 | #ifdef CONFIG_OCFS2_FS_POSIX_ACL | ||
113 | [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] | 110 | [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] |
114 | = &ocfs2_xattr_acl_access_handler, | 111 | = &ocfs2_xattr_acl_access_handler, |
115 | [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] | 112 | [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] |
116 | = &ocfs2_xattr_acl_default_handler, | 113 | = &ocfs2_xattr_acl_default_handler, |
117 | #endif | ||
118 | [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, | 114 | [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, |
119 | [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, | 115 | [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, |
120 | }; | 116 | }; |
121 | 117 | ||
122 | struct ocfs2_xattr_info { | 118 | struct ocfs2_xattr_info { |
123 | int name_index; | 119 | int xi_name_index; |
124 | const char *name; | 120 | const char *xi_name; |
125 | const void *value; | 121 | int xi_name_len; |
126 | size_t value_len; | 122 | const void *xi_value; |
123 | size_t xi_value_len; | ||
127 | }; | 124 | }; |
128 | 125 | ||
129 | struct ocfs2_xattr_search { | 126 | struct ocfs2_xattr_search { |
@@ -141,6 +138,115 @@ struct ocfs2_xattr_search { | |||
141 | int not_found; | 138 | int not_found; |
142 | }; | 139 | }; |
143 | 140 | ||
141 | /* Operations on struct ocfs2_xa_entry */ | ||
142 | struct ocfs2_xa_loc; | ||
143 | struct ocfs2_xa_loc_operations { | ||
144 | /* | ||
145 | * Journal functions | ||
146 | */ | ||
147 | int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc, | ||
148 | int type); | ||
149 | void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc); | ||
150 | |||
151 | /* | ||
152 | * Return a pointer to the appropriate buffer in loc->xl_storage | ||
153 | * at the given offset from loc->xl_header. | ||
154 | */ | ||
155 | void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset); | ||
156 | |||
157 | /* Can we reuse the existing entry for the new value? */ | ||
158 | int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc, | ||
159 | struct ocfs2_xattr_info *xi); | ||
160 | |||
161 | /* How much space is needed for the new value? */ | ||
162 | int (*xlo_check_space)(struct ocfs2_xa_loc *loc, | ||
163 | struct ocfs2_xattr_info *xi); | ||
164 | |||
165 | /* | ||
166 | * Return the offset of the first name+value pair. This is | ||
167 | * the start of our downward-filling free space. | ||
168 | */ | ||
169 | int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc); | ||
170 | |||
171 | /* | ||
172 | * Remove the name+value at this location. Do whatever is | ||
173 | * appropriate with the remaining name+value pairs. | ||
174 | */ | ||
175 | void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc); | ||
176 | |||
177 | /* Fill xl_entry with a new entry */ | ||
178 | void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash); | ||
179 | |||
180 | /* Add name+value storage to an entry */ | ||
181 | void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size); | ||
182 | |||
183 | /* | ||
184 | * Initialize the value buf's access and bh fields for this entry. | ||
185 | * ocfs2_xa_fill_value_buf() will handle the xv pointer. | ||
186 | */ | ||
187 | void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc, | ||
188 | struct ocfs2_xattr_value_buf *vb); | ||
189 | }; | ||
190 | |||
191 | /* | ||
192 | * Describes an xattr entry location. This is a memory structure | ||
193 | * tracking the on-disk structure. | ||
194 | */ | ||
195 | struct ocfs2_xa_loc { | ||
196 | /* This xattr belongs to this inode */ | ||
197 | struct inode *xl_inode; | ||
198 | |||
199 | /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */ | ||
200 | struct ocfs2_xattr_header *xl_header; | ||
201 | |||
202 | /* Bytes from xl_header to the end of the storage */ | ||
203 | int xl_size; | ||
204 | |||
205 | /* | ||
206 | * The ocfs2_xattr_entry this location describes. If this is | ||
207 | * NULL, this location describes the on-disk structure where it | ||
208 | * would have been. | ||
209 | */ | ||
210 | struct ocfs2_xattr_entry *xl_entry; | ||
211 | |||
212 | /* | ||
213 | * Internal housekeeping | ||
214 | */ | ||
215 | |||
216 | /* Buffer(s) containing this entry */ | ||
217 | void *xl_storage; | ||
218 | |||
219 | /* Operations on the storage backing this location */ | ||
220 | const struct ocfs2_xa_loc_operations *xl_ops; | ||
221 | }; | ||
222 | |||
223 | /* | ||
224 | * Convenience functions to calculate how much space is needed for a | ||
225 | * given name+value pair | ||
226 | */ | ||
227 | static int namevalue_size(int name_len, uint64_t value_len) | ||
228 | { | ||
229 | if (value_len > OCFS2_XATTR_INLINE_SIZE) | ||
230 | return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
231 | else | ||
232 | return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); | ||
233 | } | ||
234 | |||
235 | static int namevalue_size_xi(struct ocfs2_xattr_info *xi) | ||
236 | { | ||
237 | return namevalue_size(xi->xi_name_len, xi->xi_value_len); | ||
238 | } | ||
239 | |||
240 | static int namevalue_size_xe(struct ocfs2_xattr_entry *xe) | ||
241 | { | ||
242 | u64 value_len = le64_to_cpu(xe->xe_value_size); | ||
243 | |||
244 | BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) && | ||
245 | ocfs2_xattr_is_local(xe)); | ||
246 | return namevalue_size(xe->xe_name_len, value_len); | ||
247 | } | ||
248 | |||
249 | |||
144 | static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, | 250 | static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, |
145 | struct ocfs2_xattr_header *xh, | 251 | struct ocfs2_xattr_header *xh, |
146 | int index, | 252 | int index, |
@@ -205,8 +311,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, | |||
205 | int offset, | 311 | int offset, |
206 | struct ocfs2_xattr_value_root **xv, | 312 | struct ocfs2_xattr_value_root **xv, |
207 | struct buffer_head **bh); | 313 | struct buffer_head **bh); |
208 | static int ocfs2_xattr_security_set(struct inode *inode, const char *name, | ||
209 | const void *value, size_t size, int flags); | ||
210 | 314 | ||
211 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) | 315 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) |
212 | { | 316 | { |
@@ -218,14 +322,6 @@ static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) | |||
218 | return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); | 322 | return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); |
219 | } | 323 | } |
220 | 324 | ||
221 | static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) | ||
222 | { | ||
223 | u16 len = sb->s_blocksize - | ||
224 | offsetof(struct ocfs2_xattr_header, xh_entries); | ||
225 | |||
226 | return len / sizeof(struct ocfs2_xattr_entry); | ||
227 | } | ||
228 | |||
229 | #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) | 325 | #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) |
230 | #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) | 326 | #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) |
231 | #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) | 327 | #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) |
@@ -469,35 +565,22 @@ static u32 ocfs2_xattr_name_hash(struct inode *inode, | |||
469 | return hash; | 565 | return hash; |
470 | } | 566 | } |
471 | 567 | ||
472 | /* | 568 | static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) |
473 | * ocfs2_xattr_hash_entry() | ||
474 | * | ||
475 | * Compute the hash of an extended attribute. | ||
476 | */ | ||
477 | static void ocfs2_xattr_hash_entry(struct inode *inode, | ||
478 | struct ocfs2_xattr_header *header, | ||
479 | struct ocfs2_xattr_entry *entry) | ||
480 | { | 569 | { |
481 | u32 hash = 0; | 570 | return namevalue_size(name_len, value_len) + |
482 | char *name = (char *)header + le16_to_cpu(entry->xe_name_offset); | 571 | sizeof(struct ocfs2_xattr_entry); |
483 | |||
484 | hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len); | ||
485 | entry->xe_name_hash = cpu_to_le32(hash); | ||
486 | |||
487 | return; | ||
488 | } | 572 | } |
489 | 573 | ||
490 | static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) | 574 | static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi) |
491 | { | 575 | { |
492 | int size = 0; | 576 | return namevalue_size_xi(xi) + |
493 | 577 | sizeof(struct ocfs2_xattr_entry); | |
494 | if (value_len <= OCFS2_XATTR_INLINE_SIZE) | 578 | } |
495 | size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); | ||
496 | else | ||
497 | size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
498 | size += sizeof(struct ocfs2_xattr_entry); | ||
499 | 579 | ||
500 | return size; | 580 | static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe) |
581 | { | ||
582 | return namevalue_size_xe(xe) + | ||
583 | sizeof(struct ocfs2_xattr_entry); | ||
501 | } | 584 | } |
502 | 585 | ||
503 | int ocfs2_calc_security_init(struct inode *dir, | 586 | int ocfs2_calc_security_init(struct inode *dir, |
@@ -1314,452 +1397,897 @@ out: | |||
1314 | return ret; | 1397 | return ret; |
1315 | } | 1398 | } |
1316 | 1399 | ||
1317 | static int ocfs2_xattr_cleanup(struct inode *inode, | 1400 | static int ocfs2_xa_check_space_helper(int needed_space, int free_start, |
1318 | handle_t *handle, | 1401 | int num_entries) |
1319 | struct ocfs2_xattr_info *xi, | ||
1320 | struct ocfs2_xattr_search *xs, | ||
1321 | struct ocfs2_xattr_value_buf *vb, | ||
1322 | size_t offs) | ||
1323 | { | 1402 | { |
1324 | int ret = 0; | 1403 | int free_space; |
1325 | size_t name_len = strlen(xi->name); | ||
1326 | void *val = xs->base + offs; | ||
1327 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
1328 | 1404 | ||
1329 | ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, | 1405 | if (!needed_space) |
1330 | OCFS2_JOURNAL_ACCESS_WRITE); | 1406 | return 0; |
1331 | if (ret) { | ||
1332 | mlog_errno(ret); | ||
1333 | goto out; | ||
1334 | } | ||
1335 | /* Decrease xattr count */ | ||
1336 | le16_add_cpu(&xs->header->xh_count, -1); | ||
1337 | /* Remove the xattr entry and tree root which has already be set*/ | ||
1338 | memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); | ||
1339 | memset(val, 0, size); | ||
1340 | 1407 | ||
1341 | ret = ocfs2_journal_dirty(handle, vb->vb_bh); | 1408 | free_space = free_start - |
1342 | if (ret < 0) | 1409 | sizeof(struct ocfs2_xattr_header) - |
1343 | mlog_errno(ret); | 1410 | (num_entries * sizeof(struct ocfs2_xattr_entry)) - |
1344 | out: | 1411 | OCFS2_XATTR_HEADER_GAP; |
1345 | return ret; | 1412 | if (free_space < 0) |
1413 | return -EIO; | ||
1414 | if (free_space < needed_space) | ||
1415 | return -ENOSPC; | ||
1416 | |||
1417 | return 0; | ||
1346 | } | 1418 | } |
1347 | 1419 | ||
1348 | static int ocfs2_xattr_update_entry(struct inode *inode, | 1420 | static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc, |
1349 | handle_t *handle, | 1421 | int type) |
1350 | struct ocfs2_xattr_info *xi, | ||
1351 | struct ocfs2_xattr_search *xs, | ||
1352 | struct ocfs2_xattr_value_buf *vb, | ||
1353 | size_t offs) | ||
1354 | { | 1422 | { |
1355 | int ret; | 1423 | return loc->xl_ops->xlo_journal_access(handle, loc, type); |
1424 | } | ||
1356 | 1425 | ||
1357 | ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, | 1426 | static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc) |
1358 | OCFS2_JOURNAL_ACCESS_WRITE); | 1427 | { |
1359 | if (ret) { | 1428 | loc->xl_ops->xlo_journal_dirty(handle, loc); |
1360 | mlog_errno(ret); | 1429 | } |
1361 | goto out; | ||
1362 | } | ||
1363 | 1430 | ||
1364 | xs->here->xe_name_offset = cpu_to_le16(offs); | 1431 | /* Give a pointer into the storage for the given offset */ |
1365 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | 1432 | static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset) |
1366 | if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE) | 1433 | { |
1367 | ocfs2_xattr_set_local(xs->here, 1); | 1434 | BUG_ON(offset >= loc->xl_size); |
1368 | else | 1435 | return loc->xl_ops->xlo_offset_pointer(loc, offset); |
1369 | ocfs2_xattr_set_local(xs->here, 0); | 1436 | } |
1370 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | ||
1371 | 1437 | ||
1372 | ret = ocfs2_journal_dirty(handle, vb->vb_bh); | 1438 | /* |
1373 | if (ret < 0) | 1439 | * Wipe the name+value pair and allow the storage to reclaim it. This |
1374 | mlog_errno(ret); | 1440 | * must be followed by either removal of the entry or a call to |
1375 | out: | 1441 | * ocfs2_xa_add_namevalue(). |
1376 | return ret; | 1442 | */ |
1443 | static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc) | ||
1444 | { | ||
1445 | loc->xl_ops->xlo_wipe_namevalue(loc); | ||
1377 | } | 1446 | } |
1378 | 1447 | ||
1379 | /* | 1448 | /* |
1380 | * ocfs2_xattr_set_value_outside() | 1449 | * Find lowest offset to a name+value pair. This is the start of our |
1381 | * | 1450 | * downward-growing free space. |
1382 | * Set large size value in B tree. | ||
1383 | */ | 1451 | */ |
1384 | static int ocfs2_xattr_set_value_outside(struct inode *inode, | 1452 | static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc) |
1385 | struct ocfs2_xattr_info *xi, | ||
1386 | struct ocfs2_xattr_search *xs, | ||
1387 | struct ocfs2_xattr_set_ctxt *ctxt, | ||
1388 | struct ocfs2_xattr_value_buf *vb, | ||
1389 | size_t offs) | ||
1390 | { | 1453 | { |
1391 | size_t name_len = strlen(xi->name); | 1454 | return loc->xl_ops->xlo_get_free_start(loc); |
1392 | void *val = xs->base + offs; | 1455 | } |
1393 | struct ocfs2_xattr_value_root *xv = NULL; | ||
1394 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
1395 | int ret = 0; | ||
1396 | 1456 | ||
1397 | memset(val, 0, size); | 1457 | /* Can we reuse loc->xl_entry for xi? */ |
1398 | memcpy(val, xi->name, name_len); | 1458 | static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc, |
1399 | xv = (struct ocfs2_xattr_value_root *) | 1459 | struct ocfs2_xattr_info *xi) |
1400 | (val + OCFS2_XATTR_SIZE(name_len)); | 1460 | { |
1401 | xv->xr_clusters = 0; | 1461 | return loc->xl_ops->xlo_can_reuse(loc, xi); |
1402 | xv->xr_last_eb_blk = 0; | 1462 | } |
1403 | xv->xr_list.l_tree_depth = 0; | 1463 | |
1404 | xv->xr_list.l_count = cpu_to_le16(1); | 1464 | /* How much free space is needed to set the new value */ |
1405 | xv->xr_list.l_next_free_rec = 0; | 1465 | static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc, |
1406 | vb->vb_xv = xv; | 1466 | struct ocfs2_xattr_info *xi) |
1407 | 1467 | { | |
1408 | ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt); | 1468 | return loc->xl_ops->xlo_check_space(loc, xi); |
1409 | if (ret < 0) { | 1469 | } |
1410 | mlog_errno(ret); | 1470 | |
1411 | return ret; | 1471 | static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) |
1472 | { | ||
1473 | loc->xl_ops->xlo_add_entry(loc, name_hash); | ||
1474 | loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash); | ||
1475 | /* | ||
1476 | * We can't leave the new entry's xe_name_offset at zero or | ||
1477 | * add_namevalue() will go nuts. We set it to the size of our | ||
1478 | * storage so that it can never be less than any other entry. | ||
1479 | */ | ||
1480 | loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size); | ||
1481 | } | ||
1482 | |||
1483 | static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc, | ||
1484 | struct ocfs2_xattr_info *xi) | ||
1485 | { | ||
1486 | int size = namevalue_size_xi(xi); | ||
1487 | int nameval_offset; | ||
1488 | char *nameval_buf; | ||
1489 | |||
1490 | loc->xl_ops->xlo_add_namevalue(loc, size); | ||
1491 | loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); | ||
1492 | loc->xl_entry->xe_name_len = xi->xi_name_len; | ||
1493 | ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index); | ||
1494 | ocfs2_xattr_set_local(loc->xl_entry, | ||
1495 | xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE); | ||
1496 | |||
1497 | nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); | ||
1498 | nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); | ||
1499 | memset(nameval_buf, 0, size); | ||
1500 | memcpy(nameval_buf, xi->xi_name, xi->xi_name_len); | ||
1501 | } | ||
1502 | |||
1503 | static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc, | ||
1504 | struct ocfs2_xattr_value_buf *vb) | ||
1505 | { | ||
1506 | int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); | ||
1507 | int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); | ||
1508 | |||
1509 | /* Value bufs are for value trees */ | ||
1510 | BUG_ON(ocfs2_xattr_is_local(loc->xl_entry)); | ||
1511 | BUG_ON(namevalue_size_xe(loc->xl_entry) != | ||
1512 | (name_size + OCFS2_XATTR_ROOT_SIZE)); | ||
1513 | |||
1514 | loc->xl_ops->xlo_fill_value_buf(loc, vb); | ||
1515 | vb->vb_xv = | ||
1516 | (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc, | ||
1517 | nameval_offset + | ||
1518 | name_size); | ||
1519 | } | ||
1520 | |||
1521 | static int ocfs2_xa_block_journal_access(handle_t *handle, | ||
1522 | struct ocfs2_xa_loc *loc, int type) | ||
1523 | { | ||
1524 | struct buffer_head *bh = loc->xl_storage; | ||
1525 | ocfs2_journal_access_func access; | ||
1526 | |||
1527 | if (loc->xl_size == (bh->b_size - | ||
1528 | offsetof(struct ocfs2_xattr_block, | ||
1529 | xb_attrs.xb_header))) | ||
1530 | access = ocfs2_journal_access_xb; | ||
1531 | else | ||
1532 | access = ocfs2_journal_access_di; | ||
1533 | return access(handle, INODE_CACHE(loc->xl_inode), bh, type); | ||
1534 | } | ||
1535 | |||
1536 | static void ocfs2_xa_block_journal_dirty(handle_t *handle, | ||
1537 | struct ocfs2_xa_loc *loc) | ||
1538 | { | ||
1539 | struct buffer_head *bh = loc->xl_storage; | ||
1540 | |||
1541 | ocfs2_journal_dirty(handle, bh); | ||
1542 | } | ||
1543 | |||
1544 | static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc, | ||
1545 | int offset) | ||
1546 | { | ||
1547 | return (char *)loc->xl_header + offset; | ||
1548 | } | ||
1549 | |||
1550 | static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc, | ||
1551 | struct ocfs2_xattr_info *xi) | ||
1552 | { | ||
1553 | /* | ||
1554 | * Block storage is strict. If the sizes aren't exact, we will | ||
1555 | * remove the old one and reinsert the new. | ||
1556 | */ | ||
1557 | return namevalue_size_xe(loc->xl_entry) == | ||
1558 | namevalue_size_xi(xi); | ||
1559 | } | ||
1560 | |||
1561 | static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc) | ||
1562 | { | ||
1563 | struct ocfs2_xattr_header *xh = loc->xl_header; | ||
1564 | int i, count = le16_to_cpu(xh->xh_count); | ||
1565 | int offset, free_start = loc->xl_size; | ||
1566 | |||
1567 | for (i = 0; i < count; i++) { | ||
1568 | offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); | ||
1569 | if (offset < free_start) | ||
1570 | free_start = offset; | ||
1412 | } | 1571 | } |
1413 | ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs); | 1572 | |
1414 | if (ret < 0) { | 1573 | return free_start; |
1415 | mlog_errno(ret); | 1574 | } |
1416 | return ret; | 1575 | |
1576 | static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc, | ||
1577 | struct ocfs2_xattr_info *xi) | ||
1578 | { | ||
1579 | int count = le16_to_cpu(loc->xl_header->xh_count); | ||
1580 | int free_start = ocfs2_xa_get_free_start(loc); | ||
1581 | int needed_space = ocfs2_xi_entry_usage(xi); | ||
1582 | |||
1583 | /* | ||
1584 | * Block storage will reclaim the original entry before inserting | ||
1585 | * the new value, so we only need the difference. If the new | ||
1586 | * entry is smaller than the old one, we don't need anything. | ||
1587 | */ | ||
1588 | if (loc->xl_entry) { | ||
1589 | /* Don't need space if we're reusing! */ | ||
1590 | if (ocfs2_xa_can_reuse_entry(loc, xi)) | ||
1591 | needed_space = 0; | ||
1592 | else | ||
1593 | needed_space -= ocfs2_xe_entry_usage(loc->xl_entry); | ||
1417 | } | 1594 | } |
1418 | ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb, | 1595 | if (needed_space < 0) |
1419 | xi->value, xi->value_len); | 1596 | needed_space = 0; |
1420 | if (ret < 0) | 1597 | return ocfs2_xa_check_space_helper(needed_space, free_start, count); |
1421 | mlog_errno(ret); | 1598 | } |
1422 | 1599 | ||
1423 | return ret; | 1600 | /* |
1601 | * Block storage for xattrs keeps the name+value pairs compacted. When | ||
1602 | * we remove one, we have to shift any that preceded it towards the end. | ||
1603 | */ | ||
1604 | static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) | ||
1605 | { | ||
1606 | int i, offset; | ||
1607 | int namevalue_offset, first_namevalue_offset, namevalue_size; | ||
1608 | struct ocfs2_xattr_entry *entry = loc->xl_entry; | ||
1609 | struct ocfs2_xattr_header *xh = loc->xl_header; | ||
1610 | int count = le16_to_cpu(xh->xh_count); | ||
1611 | |||
1612 | namevalue_offset = le16_to_cpu(entry->xe_name_offset); | ||
1613 | namevalue_size = namevalue_size_xe(entry); | ||
1614 | first_namevalue_offset = ocfs2_xa_get_free_start(loc); | ||
1615 | |||
1616 | /* Shift the name+value pairs */ | ||
1617 | memmove((char *)xh + first_namevalue_offset + namevalue_size, | ||
1618 | (char *)xh + first_namevalue_offset, | ||
1619 | namevalue_offset - first_namevalue_offset); | ||
1620 | memset((char *)xh + first_namevalue_offset, 0, namevalue_size); | ||
1621 | |||
1622 | /* Now tell xh->xh_entries about it */ | ||
1623 | for (i = 0; i < count; i++) { | ||
1624 | offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); | ||
1625 | if (offset <= namevalue_offset) | ||
1626 | le16_add_cpu(&xh->xh_entries[i].xe_name_offset, | ||
1627 | namevalue_size); | ||
1628 | } | ||
1629 | |||
1630 | /* | ||
1631 | * Note that we don't update xh_free_start or xh_name_value_len | ||
1632 | * because they're not used in block-stored xattrs. | ||
1633 | */ | ||
1634 | } | ||
1635 | |||
1636 | static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) | ||
1637 | { | ||
1638 | int count = le16_to_cpu(loc->xl_header->xh_count); | ||
1639 | loc->xl_entry = &(loc->xl_header->xh_entries[count]); | ||
1640 | le16_add_cpu(&loc->xl_header->xh_count, 1); | ||
1641 | memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); | ||
1642 | } | ||
1643 | |||
1644 | static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size) | ||
1645 | { | ||
1646 | int free_start = ocfs2_xa_get_free_start(loc); | ||
1647 | |||
1648 | loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size); | ||
1649 | } | ||
1650 | |||
1651 | static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc, | ||
1652 | struct ocfs2_xattr_value_buf *vb) | ||
1653 | { | ||
1654 | struct buffer_head *bh = loc->xl_storage; | ||
1655 | |||
1656 | if (loc->xl_size == (bh->b_size - | ||
1657 | offsetof(struct ocfs2_xattr_block, | ||
1658 | xb_attrs.xb_header))) | ||
1659 | vb->vb_access = ocfs2_journal_access_xb; | ||
1660 | else | ||
1661 | vb->vb_access = ocfs2_journal_access_di; | ||
1662 | vb->vb_bh = bh; | ||
1424 | } | 1663 | } |
1425 | 1664 | ||
1426 | /* | 1665 | /* |
1427 | * ocfs2_xattr_set_entry_local() | 1666 | * Operations for xattrs stored in blocks. This includes inline inode |
1428 | * | 1667 | * storage and unindexed ocfs2_xattr_blocks. |
1429 | * Set, replace or remove extended attribute in local. | ||
1430 | */ | 1668 | */ |
1431 | static void ocfs2_xattr_set_entry_local(struct inode *inode, | 1669 | static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = { |
1432 | struct ocfs2_xattr_info *xi, | 1670 | .xlo_journal_access = ocfs2_xa_block_journal_access, |
1433 | struct ocfs2_xattr_search *xs, | 1671 | .xlo_journal_dirty = ocfs2_xa_block_journal_dirty, |
1434 | struct ocfs2_xattr_entry *last, | 1672 | .xlo_offset_pointer = ocfs2_xa_block_offset_pointer, |
1435 | size_t min_offs) | 1673 | .xlo_check_space = ocfs2_xa_block_check_space, |
1674 | .xlo_can_reuse = ocfs2_xa_block_can_reuse, | ||
1675 | .xlo_get_free_start = ocfs2_xa_block_get_free_start, | ||
1676 | .xlo_wipe_namevalue = ocfs2_xa_block_wipe_namevalue, | ||
1677 | .xlo_add_entry = ocfs2_xa_block_add_entry, | ||
1678 | .xlo_add_namevalue = ocfs2_xa_block_add_namevalue, | ||
1679 | .xlo_fill_value_buf = ocfs2_xa_block_fill_value_buf, | ||
1680 | }; | ||
1681 | |||
1682 | static int ocfs2_xa_bucket_journal_access(handle_t *handle, | ||
1683 | struct ocfs2_xa_loc *loc, int type) | ||
1436 | { | 1684 | { |
1437 | size_t name_len = strlen(xi->name); | 1685 | struct ocfs2_xattr_bucket *bucket = loc->xl_storage; |
1438 | int i; | ||
1439 | 1686 | ||
1440 | if (xi->value && xs->not_found) { | 1687 | return ocfs2_xattr_bucket_journal_access(handle, bucket, type); |
1441 | /* Insert the new xattr entry. */ | 1688 | } |
1442 | le16_add_cpu(&xs->header->xh_count, 1); | 1689 | |
1443 | ocfs2_xattr_set_type(last, xi->name_index); | 1690 | static void ocfs2_xa_bucket_journal_dirty(handle_t *handle, |
1444 | ocfs2_xattr_set_local(last, 1); | 1691 | struct ocfs2_xa_loc *loc) |
1445 | last->xe_name_len = name_len; | 1692 | { |
1446 | } else { | 1693 | struct ocfs2_xattr_bucket *bucket = loc->xl_storage; |
1447 | void *first_val; | 1694 | |
1448 | void *val; | 1695 | ocfs2_xattr_bucket_journal_dirty(handle, bucket); |
1449 | size_t offs, size; | 1696 | } |
1450 | 1697 | ||
1451 | first_val = xs->base + min_offs; | 1698 | static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc, |
1452 | offs = le16_to_cpu(xs->here->xe_name_offset); | 1699 | int offset) |
1453 | val = xs->base + offs; | 1700 | { |
1454 | 1701 | struct ocfs2_xattr_bucket *bucket = loc->xl_storage; | |
1455 | if (le64_to_cpu(xs->here->xe_value_size) > | 1702 | int block, block_offset; |
1456 | OCFS2_XATTR_INLINE_SIZE) | 1703 | |
1457 | size = OCFS2_XATTR_SIZE(name_len) + | 1704 | /* The header is at the front of the bucket */ |
1458 | OCFS2_XATTR_ROOT_SIZE; | 1705 | block = offset >> loc->xl_inode->i_sb->s_blocksize_bits; |
1706 | block_offset = offset % loc->xl_inode->i_sb->s_blocksize; | ||
1707 | |||
1708 | return bucket_block(bucket, block) + block_offset; | ||
1709 | } | ||
1710 | |||
1711 | static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc, | ||
1712 | struct ocfs2_xattr_info *xi) | ||
1713 | { | ||
1714 | return namevalue_size_xe(loc->xl_entry) >= | ||
1715 | namevalue_size_xi(xi); | ||
1716 | } | ||
1717 | |||
1718 | static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc) | ||
1719 | { | ||
1720 | struct ocfs2_xattr_bucket *bucket = loc->xl_storage; | ||
1721 | return le16_to_cpu(bucket_xh(bucket)->xh_free_start); | ||
1722 | } | ||
1723 | |||
1724 | static int ocfs2_bucket_align_free_start(struct super_block *sb, | ||
1725 | int free_start, int size) | ||
1726 | { | ||
1727 | /* | ||
1728 | * We need to make sure that the name+value pair fits within | ||
1729 | * one block. | ||
1730 | */ | ||
1731 | if (((free_start - size) >> sb->s_blocksize_bits) != | ||
1732 | ((free_start - 1) >> sb->s_blocksize_bits)) | ||
1733 | free_start -= free_start % sb->s_blocksize; | ||
1734 | |||
1735 | return free_start; | ||
1736 | } | ||
1737 | |||
1738 | static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc, | ||
1739 | struct ocfs2_xattr_info *xi) | ||
1740 | { | ||
1741 | int rc; | ||
1742 | int count = le16_to_cpu(loc->xl_header->xh_count); | ||
1743 | int free_start = ocfs2_xa_get_free_start(loc); | ||
1744 | int needed_space = ocfs2_xi_entry_usage(xi); | ||
1745 | int size = namevalue_size_xi(xi); | ||
1746 | struct super_block *sb = loc->xl_inode->i_sb; | ||
1747 | |||
1748 | /* | ||
1749 | * Bucket storage does not reclaim name+value pairs it cannot | ||
1750 | * reuse. They live as holes until the bucket fills, and then | ||
1751 | * the bucket is defragmented. However, the bucket can reclaim | ||
1752 | * the ocfs2_xattr_entry. | ||
1753 | */ | ||
1754 | if (loc->xl_entry) { | ||
1755 | /* Don't need space if we're reusing! */ | ||
1756 | if (ocfs2_xa_can_reuse_entry(loc, xi)) | ||
1757 | needed_space = 0; | ||
1459 | else | 1758 | else |
1460 | size = OCFS2_XATTR_SIZE(name_len) + | 1759 | needed_space -= sizeof(struct ocfs2_xattr_entry); |
1461 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | 1760 | } |
1462 | 1761 | BUG_ON(needed_space < 0); | |
1463 | if (xi->value && size == OCFS2_XATTR_SIZE(name_len) + | ||
1464 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
1465 | /* The old and the new value have the | ||
1466 | same size. Just replace the value. */ | ||
1467 | ocfs2_xattr_set_local(xs->here, 1); | ||
1468 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
1469 | /* Clear value bytes. */ | ||
1470 | memset(val + OCFS2_XATTR_SIZE(name_len), | ||
1471 | 0, | ||
1472 | OCFS2_XATTR_SIZE(xi->value_len)); | ||
1473 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
1474 | xi->value, | ||
1475 | xi->value_len); | ||
1476 | return; | ||
1477 | } | ||
1478 | /* Remove the old name+value. */ | ||
1479 | memmove(first_val + size, first_val, val - first_val); | ||
1480 | memset(first_val, 0, size); | ||
1481 | xs->here->xe_name_hash = 0; | ||
1482 | xs->here->xe_name_offset = 0; | ||
1483 | ocfs2_xattr_set_local(xs->here, 1); | ||
1484 | xs->here->xe_value_size = 0; | ||
1485 | |||
1486 | min_offs += size; | ||
1487 | |||
1488 | /* Adjust all value offsets. */ | ||
1489 | last = xs->header->xh_entries; | ||
1490 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
1491 | size_t o = le16_to_cpu(last->xe_name_offset); | ||
1492 | |||
1493 | if (o < offs) | ||
1494 | last->xe_name_offset = cpu_to_le16(o + size); | ||
1495 | last += 1; | ||
1496 | } | ||
1497 | 1762 | ||
1498 | if (!xi->value) { | 1763 | if (free_start < size) { |
1499 | /* Remove the old entry. */ | 1764 | if (needed_space) |
1500 | last -= 1; | 1765 | return -ENOSPC; |
1501 | memmove(xs->here, xs->here + 1, | 1766 | } else { |
1502 | (void *)last - (void *)xs->here); | 1767 | /* |
1503 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | 1768 | * First we check if it would fit in the first place. |
1504 | le16_add_cpu(&xs->header->xh_count, -1); | 1769 | * Below, we align the free start to a block. This may |
1505 | } | 1770 | * slide us below the minimum gap. By checking unaligned |
1771 | * first, we avoid that error. | ||
1772 | */ | ||
1773 | rc = ocfs2_xa_check_space_helper(needed_space, free_start, | ||
1774 | count); | ||
1775 | if (rc) | ||
1776 | return rc; | ||
1777 | free_start = ocfs2_bucket_align_free_start(sb, free_start, | ||
1778 | size); | ||
1506 | } | 1779 | } |
1507 | if (xi->value) { | 1780 | return ocfs2_xa_check_space_helper(needed_space, free_start, count); |
1508 | /* Insert the new name+value. */ | 1781 | } |
1509 | size_t size = OCFS2_XATTR_SIZE(name_len) + | 1782 | |
1510 | OCFS2_XATTR_SIZE(xi->value_len); | 1783 | static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc) |
1511 | void *val = xs->base + min_offs - size; | 1784 | { |
1785 | le16_add_cpu(&loc->xl_header->xh_name_value_len, | ||
1786 | -namevalue_size_xe(loc->xl_entry)); | ||
1787 | } | ||
1788 | |||
1789 | static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash) | ||
1790 | { | ||
1791 | struct ocfs2_xattr_header *xh = loc->xl_header; | ||
1792 | int count = le16_to_cpu(xh->xh_count); | ||
1793 | int low = 0, high = count - 1, tmp; | ||
1794 | struct ocfs2_xattr_entry *tmp_xe; | ||
1512 | 1795 | ||
1513 | xs->here->xe_name_offset = cpu_to_le16(min_offs - size); | 1796 | /* |
1514 | memset(val, 0, size); | 1797 | * We keep buckets sorted by name_hash, so we need to find |
1515 | memcpy(val, xi->name, name_len); | 1798 | * our insert place. |
1516 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | 1799 | */ |
1517 | xi->value, | 1800 | while (low <= high && count) { |
1518 | xi->value_len); | 1801 | tmp = (low + high) / 2; |
1519 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | 1802 | tmp_xe = &xh->xh_entries[tmp]; |
1520 | ocfs2_xattr_set_local(xs->here, 1); | 1803 | |
1521 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | 1804 | if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) |
1805 | low = tmp + 1; | ||
1806 | else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash)) | ||
1807 | high = tmp - 1; | ||
1808 | else { | ||
1809 | low = tmp; | ||
1810 | break; | ||
1811 | } | ||
1522 | } | 1812 | } |
1523 | 1813 | ||
1524 | return; | 1814 | if (low != count) |
1815 | memmove(&xh->xh_entries[low + 1], | ||
1816 | &xh->xh_entries[low], | ||
1817 | ((count - low) * sizeof(struct ocfs2_xattr_entry))); | ||
1818 | |||
1819 | le16_add_cpu(&xh->xh_count, 1); | ||
1820 | loc->xl_entry = &xh->xh_entries[low]; | ||
1821 | memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry)); | ||
1822 | } | ||
1823 | |||
1824 | static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size) | ||
1825 | { | ||
1826 | int free_start = ocfs2_xa_get_free_start(loc); | ||
1827 | struct ocfs2_xattr_header *xh = loc->xl_header; | ||
1828 | struct super_block *sb = loc->xl_inode->i_sb; | ||
1829 | int nameval_offset; | ||
1830 | |||
1831 | free_start = ocfs2_bucket_align_free_start(sb, free_start, size); | ||
1832 | nameval_offset = free_start - size; | ||
1833 | loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset); | ||
1834 | xh->xh_free_start = cpu_to_le16(nameval_offset); | ||
1835 | le16_add_cpu(&xh->xh_name_value_len, size); | ||
1836 | |||
1837 | } | ||
1838 | |||
1839 | static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc, | ||
1840 | struct ocfs2_xattr_value_buf *vb) | ||
1841 | { | ||
1842 | struct ocfs2_xattr_bucket *bucket = loc->xl_storage; | ||
1843 | struct super_block *sb = loc->xl_inode->i_sb; | ||
1844 | int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); | ||
1845 | int size = namevalue_size_xe(loc->xl_entry); | ||
1846 | int block_offset = nameval_offset >> sb->s_blocksize_bits; | ||
1847 | |||
1848 | /* Values are not allowed to straddle block boundaries */ | ||
1849 | BUG_ON(block_offset != | ||
1850 | ((nameval_offset + size - 1) >> sb->s_blocksize_bits)); | ||
1851 | /* We expect the bucket to be filled in */ | ||
1852 | BUG_ON(!bucket->bu_bhs[block_offset]); | ||
1853 | |||
1854 | vb->vb_access = ocfs2_journal_access; | ||
1855 | vb->vb_bh = bucket->bu_bhs[block_offset]; | ||
1856 | } | ||
1857 | |||
1858 | /* Operations for xattrs stored in buckets. */ | ||
1859 | static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = { | ||
1860 | .xlo_journal_access = ocfs2_xa_bucket_journal_access, | ||
1861 | .xlo_journal_dirty = ocfs2_xa_bucket_journal_dirty, | ||
1862 | .xlo_offset_pointer = ocfs2_xa_bucket_offset_pointer, | ||
1863 | .xlo_check_space = ocfs2_xa_bucket_check_space, | ||
1864 | .xlo_can_reuse = ocfs2_xa_bucket_can_reuse, | ||
1865 | .xlo_get_free_start = ocfs2_xa_bucket_get_free_start, | ||
1866 | .xlo_wipe_namevalue = ocfs2_xa_bucket_wipe_namevalue, | ||
1867 | .xlo_add_entry = ocfs2_xa_bucket_add_entry, | ||
1868 | .xlo_add_namevalue = ocfs2_xa_bucket_add_namevalue, | ||
1869 | .xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf, | ||
1870 | }; | ||
1871 | |||
1872 | static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc) | ||
1873 | { | ||
1874 | struct ocfs2_xattr_value_buf vb; | ||
1875 | |||
1876 | if (ocfs2_xattr_is_local(loc->xl_entry)) | ||
1877 | return 0; | ||
1878 | |||
1879 | ocfs2_xa_fill_value_buf(loc, &vb); | ||
1880 | return le32_to_cpu(vb.vb_xv->xr_clusters); | ||
1881 | } | ||
1882 | |||
1883 | static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes, | ||
1884 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
1885 | { | ||
1886 | int trunc_rc, access_rc; | ||
1887 | struct ocfs2_xattr_value_buf vb; | ||
1888 | |||
1889 | ocfs2_xa_fill_value_buf(loc, &vb); | ||
1890 | trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes, | ||
1891 | ctxt); | ||
1892 | |||
1893 | /* | ||
1894 | * The caller of ocfs2_xa_value_truncate() has already called | ||
1895 | * ocfs2_xa_journal_access on the loc. However, The truncate code | ||
1896 | * calls ocfs2_extend_trans(). This may commit the previous | ||
1897 | * transaction and open a new one. If this is a bucket, truncate | ||
1898 | * could leave only vb->vb_bh set up for journaling. Meanwhile, | ||
1899 | * the caller is expecting to dirty the entire bucket. So we must | ||
1900 | * reset the journal work. We do this even if truncate has failed, | ||
1901 | * as it could have failed after committing the extend. | ||
1902 | */ | ||
1903 | access_rc = ocfs2_xa_journal_access(ctxt->handle, loc, | ||
1904 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1905 | |||
1906 | /* Errors in truncate take precedence */ | ||
1907 | return trunc_rc ? trunc_rc : access_rc; | ||
1908 | } | ||
1909 | |||
1910 | static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) | ||
1911 | { | ||
1912 | int index, count; | ||
1913 | struct ocfs2_xattr_header *xh = loc->xl_header; | ||
1914 | struct ocfs2_xattr_entry *entry = loc->xl_entry; | ||
1915 | |||
1916 | ocfs2_xa_wipe_namevalue(loc); | ||
1917 | loc->xl_entry = NULL; | ||
1918 | |||
1919 | le16_add_cpu(&xh->xh_count, -1); | ||
1920 | count = le16_to_cpu(xh->xh_count); | ||
1921 | |||
1922 | /* | ||
1923 | * Only zero out the entry if there are more remaining. This is | ||
1924 | * important for an empty bucket, as it keeps track of the | ||
1925 | * bucket's hash value. It doesn't hurt empty block storage. | ||
1926 | */ | ||
1927 | if (count) { | ||
1928 | index = ((char *)entry - (char *)&xh->xh_entries) / | ||
1929 | sizeof(struct ocfs2_xattr_entry); | ||
1930 | memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1], | ||
1931 | (count - index) * sizeof(struct ocfs2_xattr_entry)); | ||
1932 | memset(&xh->xh_entries[count], 0, | ||
1933 | sizeof(struct ocfs2_xattr_entry)); | ||
1934 | } | ||
1525 | } | 1935 | } |
1526 | 1936 | ||
1527 | /* | 1937 | /* |
1528 | * ocfs2_xattr_set_entry() | 1938 | * If we have a problem adjusting the size of an external value during |
1939 | * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr | ||
1940 | * in an intermediate state. For example, the value may be partially | ||
1941 | * truncated. | ||
1529 | * | 1942 | * |
1530 | * Set extended attribute entry into inode or block. | 1943 | * If the value tree hasn't changed, the extend/truncate went nowhere. |
1944 | * We have nothing to do. The caller can treat it as a straight error. | ||
1531 | * | 1945 | * |
1532 | * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE, | 1946 | * If the value tree got partially truncated, we now have a corrupted |
1533 | * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(), | 1947 | * extended attribute. We're going to wipe its entry and leak the |
1534 | * then set value in B tree with set_value_outside(). | 1948 | * clusters. Better to leak some storage than leave a corrupt entry. |
1949 | * | ||
1950 | * If the value tree grew, it obviously didn't grow enough for the | ||
1951 | * new entry. We're not going to try and reclaim those clusters either. | ||
1952 | * If there was already an external value there (orig_clusters != 0), | ||
1953 | * the new clusters are attached safely and we can just leave the old | ||
1954 | * value in place. If there was no external value there, we remove | ||
1955 | * the entry. | ||
1956 | * | ||
1957 | * This way, the xattr block we store in the journal will be consistent. | ||
1958 | * If the size change broke because of the journal, no changes will hit | ||
1959 | * disk anyway. | ||
1535 | */ | 1960 | */ |
1536 | static int ocfs2_xattr_set_entry(struct inode *inode, | 1961 | static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc, |
1537 | struct ocfs2_xattr_info *xi, | 1962 | const char *what, |
1538 | struct ocfs2_xattr_search *xs, | 1963 | unsigned int orig_clusters) |
1539 | struct ocfs2_xattr_set_ctxt *ctxt, | 1964 | { |
1540 | int flag) | 1965 | unsigned int new_clusters = ocfs2_xa_value_clusters(loc); |
1541 | { | 1966 | char *nameval_buf = ocfs2_xa_offset_pointer(loc, |
1542 | struct ocfs2_xattr_entry *last; | 1967 | le16_to_cpu(loc->xl_entry->xe_name_offset)); |
1543 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1968 | |
1544 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | 1969 | if (new_clusters < orig_clusters) { |
1545 | size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); | 1970 | mlog(ML_ERROR, |
1546 | size_t size_l = 0; | 1971 | "Partial truncate while %s xattr %.*s. Leaking " |
1547 | handle_t *handle = ctxt->handle; | 1972 | "%u clusters and removing the entry\n", |
1548 | int free, i, ret; | 1973 | what, loc->xl_entry->xe_name_len, nameval_buf, |
1549 | struct ocfs2_xattr_info xi_l = { | 1974 | orig_clusters - new_clusters); |
1550 | .name_index = xi->name_index, | 1975 | ocfs2_xa_remove_entry(loc); |
1551 | .name = xi->name, | 1976 | } else if (!orig_clusters) { |
1552 | .value = xi->value, | 1977 | mlog(ML_ERROR, |
1553 | .value_len = xi->value_len, | 1978 | "Unable to allocate an external value for xattr " |
1554 | }; | 1979 | "%.*s safely. Leaking %u clusters and removing the " |
1555 | struct ocfs2_xattr_value_buf vb = { | 1980 | "entry\n", |
1556 | .vb_bh = xs->xattr_bh, | 1981 | loc->xl_entry->xe_name_len, nameval_buf, |
1557 | .vb_access = ocfs2_journal_access_di, | 1982 | new_clusters - orig_clusters); |
1558 | }; | 1983 | ocfs2_xa_remove_entry(loc); |
1984 | } else if (new_clusters > orig_clusters) | ||
1985 | mlog(ML_ERROR, | ||
1986 | "Unable to grow xattr %.*s safely. %u new clusters " | ||
1987 | "have been added, but the value will not be " | ||
1988 | "modified\n", | ||
1989 | loc->xl_entry->xe_name_len, nameval_buf, | ||
1990 | new_clusters - orig_clusters); | ||
1991 | } | ||
1992 | |||
1993 | static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, | ||
1994 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
1995 | { | ||
1996 | int rc = 0; | ||
1997 | unsigned int orig_clusters; | ||
1998 | |||
1999 | if (!ocfs2_xattr_is_local(loc->xl_entry)) { | ||
2000 | orig_clusters = ocfs2_xa_value_clusters(loc); | ||
2001 | rc = ocfs2_xa_value_truncate(loc, 0, ctxt); | ||
2002 | if (rc) { | ||
2003 | mlog_errno(rc); | ||
2004 | /* | ||
2005 | * Since this is remove, we can return 0 if | ||
2006 | * ocfs2_xa_cleanup_value_truncate() is going to | ||
2007 | * wipe the entry anyway. So we check the | ||
2008 | * cluster count as well. | ||
2009 | */ | ||
2010 | if (orig_clusters != ocfs2_xa_value_clusters(loc)) | ||
2011 | rc = 0; | ||
2012 | ocfs2_xa_cleanup_value_truncate(loc, "removing", | ||
2013 | orig_clusters); | ||
2014 | if (rc) | ||
2015 | goto out; | ||
2016 | } | ||
2017 | } | ||
1559 | 2018 | ||
1560 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | 2019 | ocfs2_xa_remove_entry(loc); |
1561 | BUG_ON(xs->xattr_bh == xs->inode_bh); | ||
1562 | vb.vb_access = ocfs2_journal_access_xb; | ||
1563 | } else | ||
1564 | BUG_ON(xs->xattr_bh != xs->inode_bh); | ||
1565 | 2020 | ||
1566 | /* Compute min_offs, last and free space. */ | 2021 | out: |
1567 | last = xs->header->xh_entries; | 2022 | return rc; |
2023 | } | ||
1568 | 2024 | ||
1569 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | 2025 | static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc) |
1570 | size_t offs = le16_to_cpu(last->xe_name_offset); | 2026 | { |
1571 | if (offs < min_offs) | 2027 | int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len); |
1572 | min_offs = offs; | 2028 | char *nameval_buf; |
1573 | last += 1; | ||
1574 | } | ||
1575 | 2029 | ||
1576 | free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP; | 2030 | nameval_buf = ocfs2_xa_offset_pointer(loc, |
1577 | if (free < 0) | 2031 | le16_to_cpu(loc->xl_entry->xe_name_offset)); |
1578 | return -EIO; | 2032 | memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE); |
2033 | } | ||
1579 | 2034 | ||
1580 | if (!xs->not_found) { | 2035 | /* |
1581 | size_t size = 0; | 2036 | * Take an existing entry and make it ready for the new value. This |
1582 | if (ocfs2_xattr_is_local(xs->here)) | 2037 | * won't allocate space, but it may free space. It should be ready for |
1583 | size = OCFS2_XATTR_SIZE(name_len) + | 2038 | * ocfs2_xa_prepare_entry() to finish the work. |
1584 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | 2039 | */ |
1585 | else | 2040 | static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc, |
1586 | size = OCFS2_XATTR_SIZE(name_len) + | 2041 | struct ocfs2_xattr_info *xi, |
1587 | OCFS2_XATTR_ROOT_SIZE; | 2042 | struct ocfs2_xattr_set_ctxt *ctxt) |
1588 | free += (size + sizeof(struct ocfs2_xattr_entry)); | 2043 | { |
1589 | } | 2044 | int rc = 0; |
1590 | /* Check free space in inode or block */ | 2045 | int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); |
1591 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | 2046 | unsigned int orig_clusters; |
1592 | if (free < sizeof(struct ocfs2_xattr_entry) + | 2047 | char *nameval_buf; |
1593 | OCFS2_XATTR_SIZE(name_len) + | 2048 | int xe_local = ocfs2_xattr_is_local(loc->xl_entry); |
1594 | OCFS2_XATTR_ROOT_SIZE) { | 2049 | int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE; |
1595 | ret = -ENOSPC; | 2050 | |
1596 | goto out; | 2051 | BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) != |
2052 | name_size); | ||
2053 | |||
2054 | nameval_buf = ocfs2_xa_offset_pointer(loc, | ||
2055 | le16_to_cpu(loc->xl_entry->xe_name_offset)); | ||
2056 | if (xe_local) { | ||
2057 | memset(nameval_buf + name_size, 0, | ||
2058 | namevalue_size_xe(loc->xl_entry) - name_size); | ||
2059 | if (!xi_local) | ||
2060 | ocfs2_xa_install_value_root(loc); | ||
2061 | } else { | ||
2062 | orig_clusters = ocfs2_xa_value_clusters(loc); | ||
2063 | if (xi_local) { | ||
2064 | rc = ocfs2_xa_value_truncate(loc, 0, ctxt); | ||
2065 | if (rc < 0) | ||
2066 | mlog_errno(rc); | ||
2067 | else | ||
2068 | memset(nameval_buf + name_size, 0, | ||
2069 | namevalue_size_xe(loc->xl_entry) - | ||
2070 | name_size); | ||
2071 | } else if (le64_to_cpu(loc->xl_entry->xe_value_size) > | ||
2072 | xi->xi_value_len) { | ||
2073 | rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, | ||
2074 | ctxt); | ||
2075 | if (rc < 0) | ||
2076 | mlog_errno(rc); | ||
1597 | } | 2077 | } |
1598 | size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | 2078 | |
1599 | xi_l.value = (void *)&def_xv; | 2079 | if (rc) { |
1600 | xi_l.value_len = OCFS2_XATTR_ROOT_SIZE; | 2080 | ocfs2_xa_cleanup_value_truncate(loc, "reusing", |
1601 | } else if (xi->value) { | 2081 | orig_clusters); |
1602 | if (free < sizeof(struct ocfs2_xattr_entry) + | ||
1603 | OCFS2_XATTR_SIZE(name_len) + | ||
1604 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
1605 | ret = -ENOSPC; | ||
1606 | goto out; | 2082 | goto out; |
1607 | } | 2083 | } |
1608 | } | 2084 | } |
1609 | 2085 | ||
1610 | if (!xs->not_found) { | 2086 | loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len); |
1611 | /* For existing extended attribute */ | 2087 | ocfs2_xattr_set_local(loc->xl_entry, xi_local); |
1612 | size_t size = OCFS2_XATTR_SIZE(name_len) + | ||
1613 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
1614 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1615 | void *val = xs->base + offs; | ||
1616 | 2088 | ||
1617 | if (ocfs2_xattr_is_local(xs->here) && size == size_l) { | 2089 | out: |
1618 | /* Replace existing local xattr with tree root */ | 2090 | return rc; |
1619 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, | 2091 | } |
1620 | ctxt, &vb, offs); | ||
1621 | if (ret < 0) | ||
1622 | mlog_errno(ret); | ||
1623 | goto out; | ||
1624 | } else if (!ocfs2_xattr_is_local(xs->here)) { | ||
1625 | /* For existing xattr which has value outside */ | ||
1626 | vb.vb_xv = (struct ocfs2_xattr_value_root *) | ||
1627 | (val + OCFS2_XATTR_SIZE(name_len)); | ||
1628 | 2092 | ||
1629 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | 2093 | /* |
1630 | /* | 2094 | * Prepares loc->xl_entry to receive the new xattr. This includes |
1631 | * If new value need set outside also, | 2095 | * properly setting up the name+value pair region. If loc->xl_entry |
1632 | * first truncate old value to new value, | 2096 | * already exists, it will take care of modifying it appropriately. |
1633 | * then set new value with set_value_outside(). | 2097 | * |
1634 | */ | 2098 | * Note that this modifies the data. You did journal_access already, |
1635 | ret = ocfs2_xattr_value_truncate(inode, | 2099 | * right? |
1636 | &vb, | 2100 | */ |
1637 | xi->value_len, | 2101 | static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc, |
1638 | ctxt); | 2102 | struct ocfs2_xattr_info *xi, |
1639 | if (ret < 0) { | 2103 | u32 name_hash, |
1640 | mlog_errno(ret); | 2104 | struct ocfs2_xattr_set_ctxt *ctxt) |
1641 | goto out; | 2105 | { |
1642 | } | 2106 | int rc = 0; |
2107 | unsigned int orig_clusters; | ||
2108 | __le64 orig_value_size = 0; | ||
1643 | 2109 | ||
1644 | ret = ocfs2_xattr_update_entry(inode, | 2110 | rc = ocfs2_xa_check_space(loc, xi); |
1645 | handle, | 2111 | if (rc) |
1646 | xi, | 2112 | goto out; |
1647 | xs, | ||
1648 | &vb, | ||
1649 | offs); | ||
1650 | if (ret < 0) { | ||
1651 | mlog_errno(ret); | ||
1652 | goto out; | ||
1653 | } | ||
1654 | 2113 | ||
1655 | ret = __ocfs2_xattr_set_value_outside(inode, | 2114 | if (loc->xl_entry) { |
1656 | handle, | 2115 | if (ocfs2_xa_can_reuse_entry(loc, xi)) { |
1657 | &vb, | 2116 | orig_value_size = loc->xl_entry->xe_value_size; |
1658 | xi->value, | 2117 | rc = ocfs2_xa_reuse_entry(loc, xi, ctxt); |
1659 | xi->value_len); | 2118 | if (rc) |
1660 | if (ret < 0) | 2119 | goto out; |
1661 | mlog_errno(ret); | 2120 | goto alloc_value; |
2121 | } | ||
2122 | |||
2123 | if (!ocfs2_xattr_is_local(loc->xl_entry)) { | ||
2124 | orig_clusters = ocfs2_xa_value_clusters(loc); | ||
2125 | rc = ocfs2_xa_value_truncate(loc, 0, ctxt); | ||
2126 | if (rc) { | ||
2127 | mlog_errno(rc); | ||
2128 | ocfs2_xa_cleanup_value_truncate(loc, | ||
2129 | "overwriting", | ||
2130 | orig_clusters); | ||
1662 | goto out; | 2131 | goto out; |
1663 | } else { | ||
1664 | /* | ||
1665 | * If new value need set in local, | ||
1666 | * just trucate old value to zero. | ||
1667 | */ | ||
1668 | ret = ocfs2_xattr_value_truncate(inode, | ||
1669 | &vb, | ||
1670 | 0, | ||
1671 | ctxt); | ||
1672 | if (ret < 0) | ||
1673 | mlog_errno(ret); | ||
1674 | } | 2132 | } |
1675 | } | 2133 | } |
2134 | ocfs2_xa_wipe_namevalue(loc); | ||
2135 | } else | ||
2136 | ocfs2_xa_add_entry(loc, name_hash); | ||
2137 | |||
2138 | /* | ||
2139 | * If we get here, we have a blank entry. Fill it. We grow our | ||
2140 | * name+value pair back from the end. | ||
2141 | */ | ||
2142 | ocfs2_xa_add_namevalue(loc, xi); | ||
2143 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) | ||
2144 | ocfs2_xa_install_value_root(loc); | ||
2145 | |||
2146 | alloc_value: | ||
2147 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
2148 | orig_clusters = ocfs2_xa_value_clusters(loc); | ||
2149 | rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); | ||
2150 | if (rc < 0) { | ||
2151 | /* | ||
2152 | * If we tried to grow an existing external value, | ||
2153 | * ocfs2_xa_cleanuP-value_truncate() is going to | ||
2154 | * let it stand. We have to restore its original | ||
2155 | * value size. | ||
2156 | */ | ||
2157 | loc->xl_entry->xe_value_size = orig_value_size; | ||
2158 | ocfs2_xa_cleanup_value_truncate(loc, "growing", | ||
2159 | orig_clusters); | ||
2160 | mlog_errno(rc); | ||
2161 | } | ||
1676 | } | 2162 | } |
1677 | 2163 | ||
1678 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh, | 2164 | out: |
2165 | return rc; | ||
2166 | } | ||
2167 | |||
2168 | /* | ||
2169 | * Store the value portion of the name+value pair. This will skip | ||
2170 | * values that are stored externally. Their tree roots were set up | ||
2171 | * by ocfs2_xa_prepare_entry(). | ||
2172 | */ | ||
2173 | static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc, | ||
2174 | struct ocfs2_xattr_info *xi, | ||
2175 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
2176 | { | ||
2177 | int rc = 0; | ||
2178 | int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset); | ||
2179 | int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len); | ||
2180 | char *nameval_buf; | ||
2181 | struct ocfs2_xattr_value_buf vb; | ||
2182 | |||
2183 | nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset); | ||
2184 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
2185 | ocfs2_xa_fill_value_buf(loc, &vb); | ||
2186 | rc = __ocfs2_xattr_set_value_outside(loc->xl_inode, | ||
2187 | ctxt->handle, &vb, | ||
2188 | xi->xi_value, | ||
2189 | xi->xi_value_len); | ||
2190 | } else | ||
2191 | memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len); | ||
2192 | |||
2193 | return rc; | ||
2194 | } | ||
2195 | |||
2196 | static int ocfs2_xa_set(struct ocfs2_xa_loc *loc, | ||
2197 | struct ocfs2_xattr_info *xi, | ||
2198 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
2199 | { | ||
2200 | int ret; | ||
2201 | u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name, | ||
2202 | xi->xi_name_len); | ||
2203 | |||
2204 | ret = ocfs2_xa_journal_access(ctxt->handle, loc, | ||
1679 | OCFS2_JOURNAL_ACCESS_WRITE); | 2205 | OCFS2_JOURNAL_ACCESS_WRITE); |
1680 | if (ret) { | 2206 | if (ret) { |
1681 | mlog_errno(ret); | 2207 | mlog_errno(ret); |
1682 | goto out; | 2208 | goto out; |
1683 | } | 2209 | } |
1684 | 2210 | ||
1685 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | ||
1686 | ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh, | ||
1687 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1688 | if (ret) { | ||
1689 | mlog_errno(ret); | ||
1690 | goto out; | ||
1691 | } | ||
1692 | } | ||
1693 | |||
1694 | /* | 2211 | /* |
1695 | * Set value in local, include set tree root in local. | 2212 | * From here on out, everything is going to modify the buffer a |
1696 | * This is the first step for value size >INLINE_SIZE. | 2213 | * little. Errors are going to leave the xattr header in a |
2214 | * sane state. Thus, even with errors we dirty the sucker. | ||
1697 | */ | 2215 | */ |
1698 | ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs); | ||
1699 | 2216 | ||
1700 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | 2217 | /* Don't worry, we are never called with !xi_value and !xl_entry */ |
1701 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | 2218 | if (!xi->xi_value) { |
1702 | if (ret < 0) { | 2219 | ret = ocfs2_xa_remove(loc, ctxt); |
1703 | mlog_errno(ret); | 2220 | goto out_dirty; |
1704 | goto out; | ||
1705 | } | ||
1706 | } | 2221 | } |
1707 | 2222 | ||
1708 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) && | 2223 | ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt); |
1709 | (flag & OCFS2_INLINE_XATTR_FL)) { | 2224 | if (ret) { |
1710 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2225 | if (ret != -ENOSPC) |
1711 | unsigned int xattrsize = osb->s_xattr_inline_size; | 2226 | mlog_errno(ret); |
1712 | 2227 | goto out_dirty; | |
1713 | /* | ||
1714 | * Adjust extent record count or inline data size | ||
1715 | * to reserve space for extended attribute. | ||
1716 | */ | ||
1717 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
1718 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
1719 | le16_add_cpu(&idata->id_count, -xattrsize); | ||
1720 | } else if (!(ocfs2_inode_is_fast_symlink(inode))) { | ||
1721 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
1722 | le16_add_cpu(&el->l_count, -(xattrsize / | ||
1723 | sizeof(struct ocfs2_extent_rec))); | ||
1724 | } | ||
1725 | di->i_xattr_inline_size = cpu_to_le16(xattrsize); | ||
1726 | } | 2228 | } |
1727 | /* Update xattr flag */ | ||
1728 | spin_lock(&oi->ip_lock); | ||
1729 | oi->ip_dyn_features |= flag; | ||
1730 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
1731 | spin_unlock(&oi->ip_lock); | ||
1732 | 2229 | ||
1733 | ret = ocfs2_journal_dirty(handle, xs->inode_bh); | 2230 | ret = ocfs2_xa_store_value(loc, xi, ctxt); |
1734 | if (ret < 0) | 2231 | if (ret) |
1735 | mlog_errno(ret); | 2232 | mlog_errno(ret); |
1736 | 2233 | ||
1737 | if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | 2234 | out_dirty: |
1738 | /* | 2235 | ocfs2_xa_journal_dirty(ctxt->handle, loc); |
1739 | * Set value outside in B tree. | ||
1740 | * This is the second step for value size > INLINE_SIZE. | ||
1741 | */ | ||
1742 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1743 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, | ||
1744 | &vb, offs); | ||
1745 | if (ret < 0) { | ||
1746 | int ret2; | ||
1747 | 2236 | ||
1748 | mlog_errno(ret); | ||
1749 | /* | ||
1750 | * If set value outside failed, we have to clean | ||
1751 | * the junk tree root we have already set in local. | ||
1752 | */ | ||
1753 | ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle, | ||
1754 | xi, xs, &vb, offs); | ||
1755 | if (ret2 < 0) | ||
1756 | mlog_errno(ret2); | ||
1757 | } | ||
1758 | } | ||
1759 | out: | 2237 | out: |
1760 | return ret; | 2238 | return ret; |
1761 | } | 2239 | } |
1762 | 2240 | ||
2241 | static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc, | ||
2242 | struct inode *inode, | ||
2243 | struct buffer_head *bh, | ||
2244 | struct ocfs2_xattr_entry *entry) | ||
2245 | { | ||
2246 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | ||
2247 | |||
2248 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)); | ||
2249 | |||
2250 | loc->xl_inode = inode; | ||
2251 | loc->xl_ops = &ocfs2_xa_block_loc_ops; | ||
2252 | loc->xl_storage = bh; | ||
2253 | loc->xl_entry = entry; | ||
2254 | loc->xl_size = le16_to_cpu(di->i_xattr_inline_size); | ||
2255 | loc->xl_header = | ||
2256 | (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size - | ||
2257 | loc->xl_size); | ||
2258 | } | ||
2259 | |||
2260 | static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc, | ||
2261 | struct inode *inode, | ||
2262 | struct buffer_head *bh, | ||
2263 | struct ocfs2_xattr_entry *entry) | ||
2264 | { | ||
2265 | struct ocfs2_xattr_block *xb = | ||
2266 | (struct ocfs2_xattr_block *)bh->b_data; | ||
2267 | |||
2268 | BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED); | ||
2269 | |||
2270 | loc->xl_inode = inode; | ||
2271 | loc->xl_ops = &ocfs2_xa_block_loc_ops; | ||
2272 | loc->xl_storage = bh; | ||
2273 | loc->xl_header = &(xb->xb_attrs.xb_header); | ||
2274 | loc->xl_entry = entry; | ||
2275 | loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block, | ||
2276 | xb_attrs.xb_header); | ||
2277 | } | ||
2278 | |||
2279 | static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc, | ||
2280 | struct ocfs2_xattr_bucket *bucket, | ||
2281 | struct ocfs2_xattr_entry *entry) | ||
2282 | { | ||
2283 | loc->xl_inode = bucket->bu_inode; | ||
2284 | loc->xl_ops = &ocfs2_xa_bucket_loc_ops; | ||
2285 | loc->xl_storage = bucket; | ||
2286 | loc->xl_header = bucket_xh(bucket); | ||
2287 | loc->xl_entry = entry; | ||
2288 | loc->xl_size = OCFS2_XATTR_BUCKET_SIZE; | ||
2289 | } | ||
2290 | |||
1763 | /* | 2291 | /* |
1764 | * In xattr remove, if it is stored outside and refcounted, we may have | 2292 | * In xattr remove, if it is stored outside and refcounted, we may have |
1765 | * the chance to split the refcount tree. So need the allocators. | 2293 | * the chance to split the refcount tree. So need the allocators. |
@@ -2155,6 +2683,55 @@ static int ocfs2_xattr_ibody_find(struct inode *inode, | |||
2155 | return 0; | 2683 | return 0; |
2156 | } | 2684 | } |
2157 | 2685 | ||
2686 | static int ocfs2_xattr_ibody_init(struct inode *inode, | ||
2687 | struct buffer_head *di_bh, | ||
2688 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
2689 | { | ||
2690 | int ret; | ||
2691 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
2692 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2693 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2694 | unsigned int xattrsize = osb->s_xattr_inline_size; | ||
2695 | |||
2696 | if (!ocfs2_xattr_has_space_inline(inode, di)) { | ||
2697 | ret = -ENOSPC; | ||
2698 | goto out; | ||
2699 | } | ||
2700 | |||
2701 | ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh, | ||
2702 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2703 | if (ret) { | ||
2704 | mlog_errno(ret); | ||
2705 | goto out; | ||
2706 | } | ||
2707 | |||
2708 | /* | ||
2709 | * Adjust extent record count or inline data size | ||
2710 | * to reserve space for extended attribute. | ||
2711 | */ | ||
2712 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
2713 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
2714 | le16_add_cpu(&idata->id_count, -xattrsize); | ||
2715 | } else if (!(ocfs2_inode_is_fast_symlink(inode))) { | ||
2716 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
2717 | le16_add_cpu(&el->l_count, -(xattrsize / | ||
2718 | sizeof(struct ocfs2_extent_rec))); | ||
2719 | } | ||
2720 | di->i_xattr_inline_size = cpu_to_le16(xattrsize); | ||
2721 | |||
2722 | spin_lock(&oi->ip_lock); | ||
2723 | oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL; | ||
2724 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
2725 | spin_unlock(&oi->ip_lock); | ||
2726 | |||
2727 | ret = ocfs2_journal_dirty(ctxt->handle, di_bh); | ||
2728 | if (ret < 0) | ||
2729 | mlog_errno(ret); | ||
2730 | |||
2731 | out: | ||
2732 | return ret; | ||
2733 | } | ||
2734 | |||
2158 | /* | 2735 | /* |
2159 | * ocfs2_xattr_ibody_set() | 2736 | * ocfs2_xattr_ibody_set() |
2160 | * | 2737 | * |
@@ -2166,9 +2743,10 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
2166 | struct ocfs2_xattr_search *xs, | 2743 | struct ocfs2_xattr_search *xs, |
2167 | struct ocfs2_xattr_set_ctxt *ctxt) | 2744 | struct ocfs2_xattr_set_ctxt *ctxt) |
2168 | { | 2745 | { |
2746 | int ret; | ||
2169 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2747 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2170 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | 2748 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; |
2171 | int ret; | 2749 | struct ocfs2_xa_loc loc; |
2172 | 2750 | ||
2173 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | 2751 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) |
2174 | return -ENOSPC; | 2752 | return -ENOSPC; |
@@ -2181,8 +2759,25 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
2181 | } | 2759 | } |
2182 | } | 2760 | } |
2183 | 2761 | ||
2184 | ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, | 2762 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { |
2185 | (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); | 2763 | ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); |
2764 | if (ret) { | ||
2765 | if (ret != -ENOSPC) | ||
2766 | mlog_errno(ret); | ||
2767 | goto out; | ||
2768 | } | ||
2769 | } | ||
2770 | |||
2771 | ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh, | ||
2772 | xs->not_found ? NULL : xs->here); | ||
2773 | ret = ocfs2_xa_set(&loc, xi, ctxt); | ||
2774 | if (ret) { | ||
2775 | if (ret != -ENOSPC) | ||
2776 | mlog_errno(ret); | ||
2777 | goto out; | ||
2778 | } | ||
2779 | xs->here = loc.xl_entry; | ||
2780 | |||
2186 | out: | 2781 | out: |
2187 | up_write(&oi->ip_alloc_sem); | 2782 | up_write(&oi->ip_alloc_sem); |
2188 | 2783 | ||
@@ -2242,12 +2837,11 @@ cleanup: | |||
2242 | return ret; | 2837 | return ret; |
2243 | } | 2838 | } |
2244 | 2839 | ||
2245 | static int ocfs2_create_xattr_block(handle_t *handle, | 2840 | static int ocfs2_create_xattr_block(struct inode *inode, |
2246 | struct inode *inode, | ||
2247 | struct buffer_head *inode_bh, | 2841 | struct buffer_head *inode_bh, |
2248 | struct ocfs2_alloc_context *meta_ac, | 2842 | struct ocfs2_xattr_set_ctxt *ctxt, |
2249 | struct buffer_head **ret_bh, | 2843 | int indexed, |
2250 | int indexed) | 2844 | struct buffer_head **ret_bh) |
2251 | { | 2845 | { |
2252 | int ret; | 2846 | int ret; |
2253 | u16 suballoc_bit_start; | 2847 | u16 suballoc_bit_start; |
@@ -2258,14 +2852,14 @@ static int ocfs2_create_xattr_block(handle_t *handle, | |||
2258 | struct buffer_head *new_bh = NULL; | 2852 | struct buffer_head *new_bh = NULL; |
2259 | struct ocfs2_xattr_block *xblk; | 2853 | struct ocfs2_xattr_block *xblk; |
2260 | 2854 | ||
2261 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh, | 2855 | ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), |
2262 | OCFS2_JOURNAL_ACCESS_CREATE); | 2856 | inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); |
2263 | if (ret < 0) { | 2857 | if (ret < 0) { |
2264 | mlog_errno(ret); | 2858 | mlog_errno(ret); |
2265 | goto end; | 2859 | goto end; |
2266 | } | 2860 | } |
2267 | 2861 | ||
2268 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, | 2862 | ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1, |
2269 | &suballoc_bit_start, &num_got, | 2863 | &suballoc_bit_start, &num_got, |
2270 | &first_blkno); | 2864 | &first_blkno); |
2271 | if (ret < 0) { | 2865 | if (ret < 0) { |
@@ -2276,7 +2870,7 @@ static int ocfs2_create_xattr_block(handle_t *handle, | |||
2276 | new_bh = sb_getblk(inode->i_sb, first_blkno); | 2870 | new_bh = sb_getblk(inode->i_sb, first_blkno); |
2277 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); | 2871 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); |
2278 | 2872 | ||
2279 | ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), | 2873 | ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), |
2280 | new_bh, | 2874 | new_bh, |
2281 | OCFS2_JOURNAL_ACCESS_CREATE); | 2875 | OCFS2_JOURNAL_ACCESS_CREATE); |
2282 | if (ret < 0) { | 2876 | if (ret < 0) { |
@@ -2288,11 +2882,10 @@ static int ocfs2_create_xattr_block(handle_t *handle, | |||
2288 | xblk = (struct ocfs2_xattr_block *)new_bh->b_data; | 2882 | xblk = (struct ocfs2_xattr_block *)new_bh->b_data; |
2289 | memset(xblk, 0, inode->i_sb->s_blocksize); | 2883 | memset(xblk, 0, inode->i_sb->s_blocksize); |
2290 | strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); | 2884 | strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); |
2291 | xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num); | 2885 | xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); |
2292 | xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); | 2886 | xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); |
2293 | xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); | 2887 | xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); |
2294 | xblk->xb_blkno = cpu_to_le64(first_blkno); | 2888 | xblk->xb_blkno = cpu_to_le64(first_blkno); |
2295 | |||
2296 | if (indexed) { | 2889 | if (indexed) { |
2297 | struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; | 2890 | struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; |
2298 | xr->xt_clusters = cpu_to_le32(1); | 2891 | xr->xt_clusters = cpu_to_le32(1); |
@@ -2303,14 +2896,17 @@ static int ocfs2_create_xattr_block(handle_t *handle, | |||
2303 | xr->xt_list.l_next_free_rec = cpu_to_le16(1); | 2896 | xr->xt_list.l_next_free_rec = cpu_to_le16(1); |
2304 | xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); | 2897 | xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); |
2305 | } | 2898 | } |
2899 | ocfs2_journal_dirty(ctxt->handle, new_bh); | ||
2306 | 2900 | ||
2307 | ret = ocfs2_journal_dirty(handle, new_bh); | 2901 | /* Add it to the inode */ |
2308 | if (ret < 0) { | ||
2309 | mlog_errno(ret); | ||
2310 | goto end; | ||
2311 | } | ||
2312 | di->i_xattr_loc = cpu_to_le64(first_blkno); | 2902 | di->i_xattr_loc = cpu_to_le64(first_blkno); |
2313 | ocfs2_journal_dirty(handle, inode_bh); | 2903 | |
2904 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
2905 | OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL; | ||
2906 | di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features); | ||
2907 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
2908 | |||
2909 | ocfs2_journal_dirty(ctxt->handle, inode_bh); | ||
2314 | 2910 | ||
2315 | *ret_bh = new_bh; | 2911 | *ret_bh = new_bh; |
2316 | new_bh = NULL; | 2912 | new_bh = NULL; |
@@ -2332,13 +2928,13 @@ static int ocfs2_xattr_block_set(struct inode *inode, | |||
2332 | struct ocfs2_xattr_set_ctxt *ctxt) | 2928 | struct ocfs2_xattr_set_ctxt *ctxt) |
2333 | { | 2929 | { |
2334 | struct buffer_head *new_bh = NULL; | 2930 | struct buffer_head *new_bh = NULL; |
2335 | handle_t *handle = ctxt->handle; | ||
2336 | struct ocfs2_xattr_block *xblk = NULL; | 2931 | struct ocfs2_xattr_block *xblk = NULL; |
2337 | int ret; | 2932 | int ret; |
2933 | struct ocfs2_xa_loc loc; | ||
2338 | 2934 | ||
2339 | if (!xs->xattr_bh) { | 2935 | if (!xs->xattr_bh) { |
2340 | ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh, | 2936 | ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt, |
2341 | ctxt->meta_ac, &new_bh, 0); | 2937 | 0, &new_bh); |
2342 | if (ret) { | 2938 | if (ret) { |
2343 | mlog_errno(ret); | 2939 | mlog_errno(ret); |
2344 | goto end; | 2940 | goto end; |
@@ -2354,21 +2950,25 @@ static int ocfs2_xattr_block_set(struct inode *inode, | |||
2354 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | 2950 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; |
2355 | 2951 | ||
2356 | if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { | 2952 | if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { |
2357 | /* Set extended attribute into external block */ | 2953 | ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh, |
2358 | ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, | 2954 | xs->not_found ? NULL : xs->here); |
2359 | OCFS2_HAS_XATTR_FL); | ||
2360 | if (!ret || ret != -ENOSPC) | ||
2361 | goto end; | ||
2362 | 2955 | ||
2363 | ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); | 2956 | ret = ocfs2_xa_set(&loc, xi, ctxt); |
2364 | if (ret) | 2957 | if (!ret) |
2958 | xs->here = loc.xl_entry; | ||
2959 | else if (ret != -ENOSPC) | ||
2365 | goto end; | 2960 | goto end; |
2961 | else { | ||
2962 | ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); | ||
2963 | if (ret) | ||
2964 | goto end; | ||
2965 | } | ||
2366 | } | 2966 | } |
2367 | 2967 | ||
2368 | ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); | 2968 | if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED) |
2969 | ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); | ||
2369 | 2970 | ||
2370 | end: | 2971 | end: |
2371 | |||
2372 | return ret; | 2972 | return ret; |
2373 | } | 2973 | } |
2374 | 2974 | ||
@@ -2377,7 +2977,6 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode, | |||
2377 | struct ocfs2_xattr_info *xi, | 2977 | struct ocfs2_xattr_info *xi, |
2378 | struct ocfs2_xattr_search *xs) | 2978 | struct ocfs2_xattr_search *xs) |
2379 | { | 2979 | { |
2380 | u64 value_size; | ||
2381 | struct ocfs2_xattr_entry *last; | 2980 | struct ocfs2_xattr_entry *last; |
2382 | int free, i; | 2981 | int free, i; |
2383 | size_t min_offs = xs->end - xs->base; | 2982 | size_t min_offs = xs->end - xs->base; |
@@ -2400,13 +2999,7 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode, | |||
2400 | 2999 | ||
2401 | BUG_ON(!xs->not_found); | 3000 | BUG_ON(!xs->not_found); |
2402 | 3001 | ||
2403 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) | 3002 | if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi))) |
2404 | value_size = OCFS2_XATTR_ROOT_SIZE; | ||
2405 | else | ||
2406 | value_size = OCFS2_XATTR_SIZE(xi->value_len); | ||
2407 | |||
2408 | if (free >= sizeof(struct ocfs2_xattr_entry) + | ||
2409 | OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size) | ||
2410 | return 1; | 3003 | return 1; |
2411 | 3004 | ||
2412 | return 0; | 3005 | return 0; |
@@ -2430,7 +3023,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, | |||
2430 | char *base = NULL; | 3023 | char *base = NULL; |
2431 | int name_offset, name_len = 0; | 3024 | int name_offset, name_len = 0; |
2432 | u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, | 3025 | u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, |
2433 | xi->value_len); | 3026 | xi->xi_value_len); |
2434 | u64 value_size; | 3027 | u64 value_size; |
2435 | 3028 | ||
2436 | /* | 3029 | /* |
@@ -2438,14 +3031,14 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, | |||
2438 | * No matter whether we replace an old one or add a new one, | 3031 | * No matter whether we replace an old one or add a new one, |
2439 | * we need this for writing. | 3032 | * we need this for writing. |
2440 | */ | 3033 | */ |
2441 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) | 3034 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) |
2442 | credits += new_clusters * | 3035 | credits += new_clusters * |
2443 | ocfs2_clusters_to_blocks(inode->i_sb, 1); | 3036 | ocfs2_clusters_to_blocks(inode->i_sb, 1); |
2444 | 3037 | ||
2445 | if (xis->not_found && xbs->not_found) { | 3038 | if (xis->not_found && xbs->not_found) { |
2446 | credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); | 3039 | credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); |
2447 | 3040 | ||
2448 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | 3041 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { |
2449 | clusters_add += new_clusters; | 3042 | clusters_add += new_clusters; |
2450 | credits += ocfs2_calc_extend_credits(inode->i_sb, | 3043 | credits += ocfs2_calc_extend_credits(inode->i_sb, |
2451 | &def_xv.xv.xr_list, | 3044 | &def_xv.xv.xr_list, |
@@ -2490,7 +3083,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, | |||
2490 | * The credits for removing the value tree will be extended | 3083 | * The credits for removing the value tree will be extended |
2491 | * by ocfs2_remove_extent itself. | 3084 | * by ocfs2_remove_extent itself. |
2492 | */ | 3085 | */ |
2493 | if (!xi->value) { | 3086 | if (!xi->xi_value) { |
2494 | if (!ocfs2_xattr_is_local(xe)) | 3087 | if (!ocfs2_xattr_is_local(xe)) |
2495 | credits += ocfs2_remove_extent_credits(inode->i_sb); | 3088 | credits += ocfs2_remove_extent_credits(inode->i_sb); |
2496 | 3089 | ||
@@ -2520,7 +3113,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, | |||
2520 | } | 3113 | } |
2521 | } | 3114 | } |
2522 | 3115 | ||
2523 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | 3116 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { |
2524 | /* the new values will be stored outside. */ | 3117 | /* the new values will be stored outside. */ |
2525 | u32 old_clusters = 0; | 3118 | u32 old_clusters = 0; |
2526 | 3119 | ||
@@ -2553,9 +3146,10 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, | |||
2553 | * value, we don't need any allocation, otherwise we have | 3146 | * value, we don't need any allocation, otherwise we have |
2554 | * to guess metadata allocation. | 3147 | * to guess metadata allocation. |
2555 | */ | 3148 | */ |
2556 | if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) || | 3149 | if ((ocfs2_xattr_is_local(xe) && |
3150 | (value_size >= xi->xi_value_len)) || | ||
2557 | (!ocfs2_xattr_is_local(xe) && | 3151 | (!ocfs2_xattr_is_local(xe) && |
2558 | OCFS2_XATTR_ROOT_SIZE >= xi->value_len)) | 3152 | OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len)) |
2559 | goto out; | 3153 | goto out; |
2560 | } | 3154 | } |
2561 | 3155 | ||
@@ -2645,7 +3239,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode, | |||
2645 | 3239 | ||
2646 | meta_add += extra_meta; | 3240 | meta_add += extra_meta; |
2647 | mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " | 3241 | mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " |
2648 | "credits = %d\n", xi->name, meta_add, clusters_add, *credits); | 3242 | "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits); |
2649 | 3243 | ||
2650 | if (meta_add) { | 3244 | if (meta_add) { |
2651 | ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, | 3245 | ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, |
@@ -2685,7 +3279,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, | |||
2685 | { | 3279 | { |
2686 | int ret = 0, credits, old_found; | 3280 | int ret = 0, credits, old_found; |
2687 | 3281 | ||
2688 | if (!xi->value) { | 3282 | if (!xi->xi_value) { |
2689 | /* Remove existing extended attribute */ | 3283 | /* Remove existing extended attribute */ |
2690 | if (!xis->not_found) | 3284 | if (!xis->not_found) |
2691 | ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); | 3285 | ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); |
@@ -2699,8 +3293,8 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, | |||
2699 | * If succeed and that extended attribute existing in | 3293 | * If succeed and that extended attribute existing in |
2700 | * external block, then we will remove it. | 3294 | * external block, then we will remove it. |
2701 | */ | 3295 | */ |
2702 | xi->value = NULL; | 3296 | xi->xi_value = NULL; |
2703 | xi->value_len = 0; | 3297 | xi->xi_value_len = 0; |
2704 | 3298 | ||
2705 | old_found = xis->not_found; | 3299 | old_found = xis->not_found; |
2706 | xis->not_found = -ENODATA; | 3300 | xis->not_found = -ENODATA; |
@@ -2728,8 +3322,8 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, | |||
2728 | } else if (ret == -ENOSPC) { | 3322 | } else if (ret == -ENOSPC) { |
2729 | if (di->i_xattr_loc && !xbs->xattr_bh) { | 3323 | if (di->i_xattr_loc && !xbs->xattr_bh) { |
2730 | ret = ocfs2_xattr_block_find(inode, | 3324 | ret = ocfs2_xattr_block_find(inode, |
2731 | xi->name_index, | 3325 | xi->xi_name_index, |
2732 | xi->name, xbs); | 3326 | xi->xi_name, xbs); |
2733 | if (ret) | 3327 | if (ret) |
2734 | goto out; | 3328 | goto out; |
2735 | 3329 | ||
@@ -2768,8 +3362,8 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, | |||
2768 | * If succeed and that extended attribute | 3362 | * If succeed and that extended attribute |
2769 | * existing in inode, we will remove it. | 3363 | * existing in inode, we will remove it. |
2770 | */ | 3364 | */ |
2771 | xi->value = NULL; | 3365 | xi->xi_value = NULL; |
2772 | xi->value_len = 0; | 3366 | xi->xi_value_len = 0; |
2773 | xbs->not_found = -ENODATA; | 3367 | xbs->not_found = -ENODATA; |
2774 | ret = ocfs2_calc_xattr_set_need(inode, | 3368 | ret = ocfs2_calc_xattr_set_need(inode, |
2775 | di, | 3369 | di, |
@@ -2835,10 +3429,11 @@ int ocfs2_xattr_set_handle(handle_t *handle, | |||
2835 | int ret; | 3429 | int ret; |
2836 | 3430 | ||
2837 | struct ocfs2_xattr_info xi = { | 3431 | struct ocfs2_xattr_info xi = { |
2838 | .name_index = name_index, | 3432 | .xi_name_index = name_index, |
2839 | .name = name, | 3433 | .xi_name = name, |
2840 | .value = value, | 3434 | .xi_name_len = strlen(name), |
2841 | .value_len = value_len, | 3435 | .xi_value = value, |
3436 | .xi_value_len = value_len, | ||
2842 | }; | 3437 | }; |
2843 | 3438 | ||
2844 | struct ocfs2_xattr_search xis = { | 3439 | struct ocfs2_xattr_search xis = { |
@@ -2918,10 +3513,11 @@ int ocfs2_xattr_set(struct inode *inode, | |||
2918 | struct ocfs2_refcount_tree *ref_tree = NULL; | 3513 | struct ocfs2_refcount_tree *ref_tree = NULL; |
2919 | 3514 | ||
2920 | struct ocfs2_xattr_info xi = { | 3515 | struct ocfs2_xattr_info xi = { |
2921 | .name_index = name_index, | 3516 | .xi_name_index = name_index, |
2922 | .name = name, | 3517 | .xi_name = name, |
2923 | .value = value, | 3518 | .xi_name_len = strlen(name), |
2924 | .value_len = value_len, | 3519 | .xi_value = value, |
3520 | .xi_value_len = value_len, | ||
2925 | }; | 3521 | }; |
2926 | 3522 | ||
2927 | struct ocfs2_xattr_search xis = { | 3523 | struct ocfs2_xattr_search xis = { |
@@ -3765,7 +4361,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, | |||
3765 | struct ocfs2_xattr_bucket *bucket) | 4361 | struct ocfs2_xattr_bucket *bucket) |
3766 | { | 4362 | { |
3767 | int ret, i; | 4363 | int ret, i; |
3768 | size_t end, offset, len, value_len; | 4364 | size_t end, offset, len; |
3769 | struct ocfs2_xattr_header *xh; | 4365 | struct ocfs2_xattr_header *xh; |
3770 | char *entries, *buf, *bucket_buf = NULL; | 4366 | char *entries, *buf, *bucket_buf = NULL; |
3771 | u64 blkno = bucket_blkno(bucket); | 4367 | u64 blkno = bucket_blkno(bucket); |
@@ -3819,12 +4415,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, | |||
3819 | end = OCFS2_XATTR_BUCKET_SIZE; | 4415 | end = OCFS2_XATTR_BUCKET_SIZE; |
3820 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { | 4416 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { |
3821 | offset = le16_to_cpu(xe->xe_name_offset); | 4417 | offset = le16_to_cpu(xe->xe_name_offset); |
3822 | if (ocfs2_xattr_is_local(xe)) | 4418 | len = namevalue_size_xe(xe); |
3823 | value_len = OCFS2_XATTR_SIZE( | ||
3824 | le64_to_cpu(xe->xe_value_size)); | ||
3825 | else | ||
3826 | value_len = OCFS2_XATTR_ROOT_SIZE; | ||
3827 | len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len; | ||
3828 | 4419 | ||
3829 | /* | 4420 | /* |
3830 | * We must make sure that the name/value pair | 4421 | * We must make sure that the name/value pair |
@@ -4013,7 +4604,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, | |||
4013 | int new_bucket_head) | 4604 | int new_bucket_head) |
4014 | { | 4605 | { |
4015 | int ret, i; | 4606 | int ret, i; |
4016 | int count, start, len, name_value_len = 0, xe_len, name_offset = 0; | 4607 | int count, start, len, name_value_len = 0, name_offset = 0; |
4017 | struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; | 4608 | struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; |
4018 | struct ocfs2_xattr_header *xh; | 4609 | struct ocfs2_xattr_header *xh; |
4019 | struct ocfs2_xattr_entry *xe; | 4610 | struct ocfs2_xattr_entry *xe; |
@@ -4104,13 +4695,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, | |||
4104 | name_value_len = 0; | 4695 | name_value_len = 0; |
4105 | for (i = 0; i < start; i++) { | 4696 | for (i = 0; i < start; i++) { |
4106 | xe = &xh->xh_entries[i]; | 4697 | xe = &xh->xh_entries[i]; |
4107 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | 4698 | name_value_len += namevalue_size_xe(xe); |
4108 | if (ocfs2_xattr_is_local(xe)) | ||
4109 | xe_len += | ||
4110 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
4111 | else | ||
4112 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
4113 | name_value_len += xe_len; | ||
4114 | if (le16_to_cpu(xe->xe_name_offset) < name_offset) | 4699 | if (le16_to_cpu(xe->xe_name_offset) < name_offset) |
4115 | name_offset = le16_to_cpu(xe->xe_name_offset); | 4700 | name_offset = le16_to_cpu(xe->xe_name_offset); |
4116 | } | 4701 | } |
@@ -4140,12 +4725,6 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, | |||
4140 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | 4725 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); |
4141 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | 4726 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { |
4142 | xe = &xh->xh_entries[i]; | 4727 | xe = &xh->xh_entries[i]; |
4143 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
4144 | if (ocfs2_xattr_is_local(xe)) | ||
4145 | xe_len += | ||
4146 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
4147 | else | ||
4148 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
4149 | if (le16_to_cpu(xe->xe_name_offset) < | 4728 | if (le16_to_cpu(xe->xe_name_offset) < |
4150 | le16_to_cpu(xh->xh_free_start)) | 4729 | le16_to_cpu(xh->xh_free_start)) |
4151 | xh->xh_free_start = xe->xe_name_offset; | 4730 | xh->xh_free_start = xe->xe_name_offset; |
@@ -4757,195 +5336,6 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, | |||
4757 | } | 5336 | } |
4758 | 5337 | ||
4759 | /* | 5338 | /* |
4760 | * Handle the normal xattr set, including replace, delete and new. | ||
4761 | * | ||
4762 | * Note: "local" indicates the real data's locality. So we can't | ||
4763 | * just its bucket locality by its length. | ||
4764 | */ | ||
4765 | static void ocfs2_xattr_set_entry_normal(struct inode *inode, | ||
4766 | struct ocfs2_xattr_info *xi, | ||
4767 | struct ocfs2_xattr_search *xs, | ||
4768 | u32 name_hash, | ||
4769 | int local) | ||
4770 | { | ||
4771 | struct ocfs2_xattr_entry *last, *xe; | ||
4772 | int name_len = strlen(xi->name); | ||
4773 | struct ocfs2_xattr_header *xh = xs->header; | ||
4774 | u16 count = le16_to_cpu(xh->xh_count), start; | ||
4775 | size_t blocksize = inode->i_sb->s_blocksize; | ||
4776 | char *val; | ||
4777 | size_t offs, size, new_size; | ||
4778 | |||
4779 | last = &xh->xh_entries[count]; | ||
4780 | if (!xs->not_found) { | ||
4781 | xe = xs->here; | ||
4782 | offs = le16_to_cpu(xe->xe_name_offset); | ||
4783 | if (ocfs2_xattr_is_local(xe)) | ||
4784 | size = OCFS2_XATTR_SIZE(name_len) + | ||
4785 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
4786 | else | ||
4787 | size = OCFS2_XATTR_SIZE(name_len) + | ||
4788 | OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
4789 | |||
4790 | /* | ||
4791 | * If the new value will be stored outside, xi->value has been | ||
4792 | * initalized as an empty ocfs2_xattr_value_root, and the same | ||
4793 | * goes with xi->value_len, so we can set new_size safely here. | ||
4794 | * See ocfs2_xattr_set_in_bucket. | ||
4795 | */ | ||
4796 | new_size = OCFS2_XATTR_SIZE(name_len) + | ||
4797 | OCFS2_XATTR_SIZE(xi->value_len); | ||
4798 | |||
4799 | le16_add_cpu(&xh->xh_name_value_len, -size); | ||
4800 | if (xi->value) { | ||
4801 | if (new_size > size) | ||
4802 | goto set_new_name_value; | ||
4803 | |||
4804 | /* Now replace the old value with new one. */ | ||
4805 | if (local) | ||
4806 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
4807 | else | ||
4808 | xe->xe_value_size = 0; | ||
4809 | |||
4810 | val = ocfs2_xattr_bucket_get_val(inode, | ||
4811 | xs->bucket, offs); | ||
4812 | memset(val + OCFS2_XATTR_SIZE(name_len), 0, | ||
4813 | size - OCFS2_XATTR_SIZE(name_len)); | ||
4814 | if (OCFS2_XATTR_SIZE(xi->value_len) > 0) | ||
4815 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
4816 | xi->value, xi->value_len); | ||
4817 | |||
4818 | le16_add_cpu(&xh->xh_name_value_len, new_size); | ||
4819 | ocfs2_xattr_set_local(xe, local); | ||
4820 | return; | ||
4821 | } else { | ||
4822 | /* | ||
4823 | * Remove the old entry if there is more than one. | ||
4824 | * We don't remove the last entry so that we can | ||
4825 | * use it to indicate the hash value of the empty | ||
4826 | * bucket. | ||
4827 | */ | ||
4828 | last -= 1; | ||
4829 | le16_add_cpu(&xh->xh_count, -1); | ||
4830 | if (xh->xh_count) { | ||
4831 | memmove(xe, xe + 1, | ||
4832 | (void *)last - (void *)xe); | ||
4833 | memset(last, 0, | ||
4834 | sizeof(struct ocfs2_xattr_entry)); | ||
4835 | } else | ||
4836 | xh->xh_free_start = | ||
4837 | cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | ||
4838 | |||
4839 | return; | ||
4840 | } | ||
4841 | } else { | ||
4842 | /* find a new entry for insert. */ | ||
4843 | int low = 0, high = count - 1, tmp; | ||
4844 | struct ocfs2_xattr_entry *tmp_xe; | ||
4845 | |||
4846 | while (low <= high && count) { | ||
4847 | tmp = (low + high) / 2; | ||
4848 | tmp_xe = &xh->xh_entries[tmp]; | ||
4849 | |||
4850 | if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) | ||
4851 | low = tmp + 1; | ||
4852 | else if (name_hash < | ||
4853 | le32_to_cpu(tmp_xe->xe_name_hash)) | ||
4854 | high = tmp - 1; | ||
4855 | else { | ||
4856 | low = tmp; | ||
4857 | break; | ||
4858 | } | ||
4859 | } | ||
4860 | |||
4861 | xe = &xh->xh_entries[low]; | ||
4862 | if (low != count) | ||
4863 | memmove(xe + 1, xe, (void *)last - (void *)xe); | ||
4864 | |||
4865 | le16_add_cpu(&xh->xh_count, 1); | ||
4866 | memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); | ||
4867 | xe->xe_name_hash = cpu_to_le32(name_hash); | ||
4868 | xe->xe_name_len = name_len; | ||
4869 | ocfs2_xattr_set_type(xe, xi->name_index); | ||
4870 | } | ||
4871 | |||
4872 | set_new_name_value: | ||
4873 | /* Insert the new name+value. */ | ||
4874 | size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len); | ||
4875 | |||
4876 | /* | ||
4877 | * We must make sure that the name/value pair | ||
4878 | * exists in the same block. | ||
4879 | */ | ||
4880 | offs = le16_to_cpu(xh->xh_free_start); | ||
4881 | start = offs - size; | ||
4882 | |||
4883 | if (start >> inode->i_sb->s_blocksize_bits != | ||
4884 | (offs - 1) >> inode->i_sb->s_blocksize_bits) { | ||
4885 | offs = offs - offs % blocksize; | ||
4886 | xh->xh_free_start = cpu_to_le16(offs); | ||
4887 | } | ||
4888 | |||
4889 | val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size); | ||
4890 | xe->xe_name_offset = cpu_to_le16(offs - size); | ||
4891 | |||
4892 | memset(val, 0, size); | ||
4893 | memcpy(val, xi->name, name_len); | ||
4894 | memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len); | ||
4895 | |||
4896 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
4897 | ocfs2_xattr_set_local(xe, local); | ||
4898 | xs->here = xe; | ||
4899 | le16_add_cpu(&xh->xh_free_start, -size); | ||
4900 | le16_add_cpu(&xh->xh_name_value_len, size); | ||
4901 | |||
4902 | return; | ||
4903 | } | ||
4904 | |||
4905 | /* | ||
4906 | * Set the xattr entry in the specified bucket. | ||
4907 | * The bucket is indicated by xs->bucket and it should have the enough | ||
4908 | * space for the xattr insertion. | ||
4909 | */ | ||
4910 | static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, | ||
4911 | handle_t *handle, | ||
4912 | struct ocfs2_xattr_info *xi, | ||
4913 | struct ocfs2_xattr_search *xs, | ||
4914 | u32 name_hash, | ||
4915 | int local) | ||
4916 | { | ||
4917 | int ret; | ||
4918 | u64 blkno; | ||
4919 | |||
4920 | mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", | ||
4921 | (unsigned long)xi->value_len, xi->name_index, | ||
4922 | (unsigned long long)bucket_blkno(xs->bucket)); | ||
4923 | |||
4924 | if (!xs->bucket->bu_bhs[1]) { | ||
4925 | blkno = bucket_blkno(xs->bucket); | ||
4926 | ocfs2_xattr_bucket_relse(xs->bucket); | ||
4927 | ret = ocfs2_read_xattr_bucket(xs->bucket, blkno); | ||
4928 | if (ret) { | ||
4929 | mlog_errno(ret); | ||
4930 | goto out; | ||
4931 | } | ||
4932 | } | ||
4933 | |||
4934 | ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, | ||
4935 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4936 | if (ret < 0) { | ||
4937 | mlog_errno(ret); | ||
4938 | goto out; | ||
4939 | } | ||
4940 | |||
4941 | ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); | ||
4942 | ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); | ||
4943 | |||
4944 | out: | ||
4945 | return ret; | ||
4946 | } | ||
4947 | |||
4948 | /* | ||
4949 | * Truncate the specified xe_off entry in xattr bucket. | 5339 | * Truncate the specified xe_off entry in xattr bucket. |
4950 | * bucket is indicated by header_bh and len is the new length. | 5340 | * bucket is indicated by header_bh and len is the new length. |
4951 | * Both the ocfs2_xattr_value_root and the entry will be updated here. | 5341 | * Both the ocfs2_xattr_value_root and the entry will be updated here. |
@@ -5015,66 +5405,6 @@ out: | |||
5015 | return ret; | 5405 | return ret; |
5016 | } | 5406 | } |
5017 | 5407 | ||
5018 | static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, | ||
5019 | struct ocfs2_xattr_search *xs, | ||
5020 | int len, | ||
5021 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
5022 | { | ||
5023 | int ret, offset; | ||
5024 | struct ocfs2_xattr_entry *xe = xs->here; | ||
5025 | struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; | ||
5026 | |||
5027 | BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe)); | ||
5028 | |||
5029 | offset = xe - xh->xh_entries; | ||
5030 | ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket, | ||
5031 | offset, len, ctxt); | ||
5032 | if (ret) | ||
5033 | mlog_errno(ret); | ||
5034 | |||
5035 | return ret; | ||
5036 | } | ||
5037 | |||
5038 | static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, | ||
5039 | handle_t *handle, | ||
5040 | struct ocfs2_xattr_search *xs, | ||
5041 | char *val, | ||
5042 | int value_len) | ||
5043 | { | ||
5044 | int ret, offset, block_off; | ||
5045 | struct ocfs2_xattr_value_root *xv; | ||
5046 | struct ocfs2_xattr_entry *xe = xs->here; | ||
5047 | struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); | ||
5048 | void *base; | ||
5049 | struct ocfs2_xattr_value_buf vb = { | ||
5050 | .vb_access = ocfs2_journal_access, | ||
5051 | }; | ||
5052 | |||
5053 | BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); | ||
5054 | |||
5055 | ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh, | ||
5056 | xe - xh->xh_entries, | ||
5057 | &block_off, | ||
5058 | &offset); | ||
5059 | if (ret) { | ||
5060 | mlog_errno(ret); | ||
5061 | goto out; | ||
5062 | } | ||
5063 | |||
5064 | base = bucket_block(xs->bucket, block_off); | ||
5065 | xv = (struct ocfs2_xattr_value_root *)(base + offset + | ||
5066 | OCFS2_XATTR_SIZE(xe->xe_name_len)); | ||
5067 | |||
5068 | vb.vb_xv = xv; | ||
5069 | vb.vb_bh = xs->bucket->bu_bhs[block_off]; | ||
5070 | ret = __ocfs2_xattr_set_value_outside(inode, handle, | ||
5071 | &vb, val, value_len); | ||
5072 | if (ret) | ||
5073 | mlog_errno(ret); | ||
5074 | out: | ||
5075 | return ret; | ||
5076 | } | ||
5077 | |||
5078 | static int ocfs2_rm_xattr_cluster(struct inode *inode, | 5408 | static int ocfs2_rm_xattr_cluster(struct inode *inode, |
5079 | struct buffer_head *root_bh, | 5409 | struct buffer_head *root_bh, |
5080 | u64 blkno, | 5410 | u64 blkno, |
@@ -5173,128 +5503,6 @@ out: | |||
5173 | return ret; | 5503 | return ret; |
5174 | } | 5504 | } |
5175 | 5505 | ||
5176 | static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, | ||
5177 | handle_t *handle, | ||
5178 | struct ocfs2_xattr_search *xs) | ||
5179 | { | ||
5180 | struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); | ||
5181 | struct ocfs2_xattr_entry *last = &xh->xh_entries[ | ||
5182 | le16_to_cpu(xh->xh_count) - 1]; | ||
5183 | int ret = 0; | ||
5184 | |||
5185 | ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, | ||
5186 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
5187 | if (ret) { | ||
5188 | mlog_errno(ret); | ||
5189 | return; | ||
5190 | } | ||
5191 | |||
5192 | /* Remove the old entry. */ | ||
5193 | memmove(xs->here, xs->here + 1, | ||
5194 | (void *)last - (void *)xs->here); | ||
5195 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | ||
5196 | le16_add_cpu(&xh->xh_count, -1); | ||
5197 | |||
5198 | ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); | ||
5199 | } | ||
5200 | |||
5201 | /* | ||
5202 | * Set the xattr name/value in the bucket specified in xs. | ||
5203 | * | ||
5204 | * As the new value in xi may be stored in the bucket or in an outside cluster, | ||
5205 | * we divide the whole process into 3 steps: | ||
5206 | * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket) | ||
5207 | * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs) | ||
5208 | * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside) | ||
5209 | * 4. If the clusters for the new outside value can't be allocated, we need | ||
5210 | * to free the xattr we allocated in set. | ||
5211 | */ | ||
5212 | static int ocfs2_xattr_set_in_bucket(struct inode *inode, | ||
5213 | struct ocfs2_xattr_info *xi, | ||
5214 | struct ocfs2_xattr_search *xs, | ||
5215 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
5216 | { | ||
5217 | int ret, local = 1; | ||
5218 | size_t value_len; | ||
5219 | char *val = (char *)xi->value; | ||
5220 | struct ocfs2_xattr_entry *xe = xs->here; | ||
5221 | u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name, | ||
5222 | strlen(xi->name)); | ||
5223 | |||
5224 | if (!xs->not_found && !ocfs2_xattr_is_local(xe)) { | ||
5225 | /* | ||
5226 | * We need to truncate the xattr storage first. | ||
5227 | * | ||
5228 | * If both the old and new value are stored to | ||
5229 | * outside block, we only need to truncate | ||
5230 | * the storage and then set the value outside. | ||
5231 | * | ||
5232 | * If the new value should be stored within block, | ||
5233 | * we should free all the outside block first and | ||
5234 | * the modification to the xattr block will be done | ||
5235 | * by following steps. | ||
5236 | */ | ||
5237 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) | ||
5238 | value_len = xi->value_len; | ||
5239 | else | ||
5240 | value_len = 0; | ||
5241 | |||
5242 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
5243 | value_len, | ||
5244 | ctxt); | ||
5245 | if (ret) | ||
5246 | goto out; | ||
5247 | |||
5248 | if (value_len) | ||
5249 | goto set_value_outside; | ||
5250 | } | ||
5251 | |||
5252 | value_len = xi->value_len; | ||
5253 | /* So we have to handle the inside block change now. */ | ||
5254 | if (value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
5255 | /* | ||
5256 | * If the new value will be stored outside of block, | ||
5257 | * initalize a new empty value root and insert it first. | ||
5258 | */ | ||
5259 | local = 0; | ||
5260 | xi->value = &def_xv; | ||
5261 | xi->value_len = OCFS2_XATTR_ROOT_SIZE; | ||
5262 | } | ||
5263 | |||
5264 | ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs, | ||
5265 | name_hash, local); | ||
5266 | if (ret) { | ||
5267 | mlog_errno(ret); | ||
5268 | goto out; | ||
5269 | } | ||
5270 | |||
5271 | if (value_len <= OCFS2_XATTR_INLINE_SIZE) | ||
5272 | goto out; | ||
5273 | |||
5274 | /* allocate the space now for the outside block storage. */ | ||
5275 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
5276 | value_len, ctxt); | ||
5277 | if (ret) { | ||
5278 | mlog_errno(ret); | ||
5279 | |||
5280 | if (xs->not_found) { | ||
5281 | /* | ||
5282 | * We can't allocate enough clusters for outside | ||
5283 | * storage and we have allocated xattr already, | ||
5284 | * so need to remove it. | ||
5285 | */ | ||
5286 | ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs); | ||
5287 | } | ||
5288 | goto out; | ||
5289 | } | ||
5290 | |||
5291 | set_value_outside: | ||
5292 | ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle, | ||
5293 | xs, val, value_len); | ||
5294 | out: | ||
5295 | return ret; | ||
5296 | } | ||
5297 | |||
5298 | /* | 5506 | /* |
5299 | * check whether the xattr bucket is filled up with the same hash value. | 5507 | * check whether the xattr bucket is filled up with the same hash value. |
5300 | * If we want to insert the xattr with the same hash, return -ENOSPC. | 5508 | * If we want to insert the xattr with the same hash, return -ENOSPC. |
@@ -5323,156 +5531,116 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, | |||
5323 | return 0; | 5531 | return 0; |
5324 | } | 5532 | } |
5325 | 5533 | ||
5326 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | 5534 | /* |
5327 | struct ocfs2_xattr_info *xi, | 5535 | * Try to set the entry in the current bucket. If we fail, the caller |
5328 | struct ocfs2_xattr_search *xs, | 5536 | * will handle getting us another bucket. |
5329 | struct ocfs2_xattr_set_ctxt *ctxt) | 5537 | */ |
5538 | static int ocfs2_xattr_set_entry_bucket(struct inode *inode, | ||
5539 | struct ocfs2_xattr_info *xi, | ||
5540 | struct ocfs2_xattr_search *xs, | ||
5541 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
5330 | { | 5542 | { |
5331 | struct ocfs2_xattr_header *xh; | 5543 | int ret; |
5332 | struct ocfs2_xattr_entry *xe; | 5544 | struct ocfs2_xa_loc loc; |
5333 | u16 count, header_size, xh_free_start; | ||
5334 | int free, max_free, need, old; | ||
5335 | size_t value_size = 0, name_len = strlen(xi->name); | ||
5336 | size_t blocksize = inode->i_sb->s_blocksize; | ||
5337 | int ret, allocation = 0; | ||
5338 | |||
5339 | mlog_entry("Set xattr %s in xattr index block\n", xi->name); | ||
5340 | |||
5341 | try_again: | ||
5342 | xh = xs->header; | ||
5343 | count = le16_to_cpu(xh->xh_count); | ||
5344 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
5345 | header_size = sizeof(struct ocfs2_xattr_header) + | ||
5346 | count * sizeof(struct ocfs2_xattr_entry); | ||
5347 | max_free = OCFS2_XATTR_BUCKET_SIZE - header_size - | ||
5348 | le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP; | ||
5349 | |||
5350 | mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " | ||
5351 | "of %u which exceed block size\n", | ||
5352 | (unsigned long long)bucket_blkno(xs->bucket), | ||
5353 | header_size); | ||
5354 | 5545 | ||
5355 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) | 5546 | mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name); |
5356 | value_size = OCFS2_XATTR_ROOT_SIZE; | ||
5357 | else if (xi->value) | ||
5358 | value_size = OCFS2_XATTR_SIZE(xi->value_len); | ||
5359 | 5547 | ||
5360 | if (xs->not_found) | 5548 | ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, |
5361 | need = sizeof(struct ocfs2_xattr_entry) + | 5549 | xs->not_found ? NULL : xs->here); |
5362 | OCFS2_XATTR_SIZE(name_len) + value_size; | 5550 | ret = ocfs2_xa_set(&loc, xi, ctxt); |
5363 | else { | 5551 | if (!ret) { |
5364 | need = value_size + OCFS2_XATTR_SIZE(name_len); | 5552 | xs->here = loc.xl_entry; |
5553 | goto out; | ||
5554 | } | ||
5555 | if (ret != -ENOSPC) { | ||
5556 | mlog_errno(ret); | ||
5557 | goto out; | ||
5558 | } | ||
5365 | 5559 | ||
5366 | /* | 5560 | /* Ok, we need space. Let's try defragmenting the bucket. */ |
5367 | * We only replace the old value if the new length is smaller | 5561 | ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, |
5368 | * than the old one. Otherwise we will allocate new space in the | 5562 | xs->bucket); |
5369 | * bucket to store it. | 5563 | if (ret) { |
5370 | */ | 5564 | mlog_errno(ret); |
5371 | xe = xs->here; | 5565 | goto out; |
5372 | if (ocfs2_xattr_is_local(xe)) | 5566 | } |
5373 | old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
5374 | else | ||
5375 | old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
5376 | 5567 | ||
5377 | if (old >= value_size) | 5568 | ret = ocfs2_xa_set(&loc, xi, ctxt); |
5378 | need = 0; | 5569 | if (!ret) { |
5570 | xs->here = loc.xl_entry; | ||
5571 | goto out; | ||
5379 | } | 5572 | } |
5573 | if (ret != -ENOSPC) | ||
5574 | mlog_errno(ret); | ||
5380 | 5575 | ||
5381 | free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP; | ||
5382 | /* | ||
5383 | * We need to make sure the new name/value pair | ||
5384 | * can exist in the same block. | ||
5385 | */ | ||
5386 | if (xh_free_start % blocksize < need) | ||
5387 | free -= xh_free_start % blocksize; | ||
5388 | |||
5389 | mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " | ||
5390 | "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" | ||
5391 | " %u\n", xs->not_found, | ||
5392 | (unsigned long long)bucket_blkno(xs->bucket), | ||
5393 | free, need, max_free, le16_to_cpu(xh->xh_free_start), | ||
5394 | le16_to_cpu(xh->xh_name_value_len)); | ||
5395 | |||
5396 | if (free < need || | ||
5397 | (xs->not_found && | ||
5398 | count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) { | ||
5399 | if (need <= max_free && | ||
5400 | count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { | ||
5401 | /* | ||
5402 | * We can create the space by defragment. Since only the | ||
5403 | * name/value will be moved, the xe shouldn't be changed | ||
5404 | * in xs. | ||
5405 | */ | ||
5406 | ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, | ||
5407 | xs->bucket); | ||
5408 | if (ret) { | ||
5409 | mlog_errno(ret); | ||
5410 | goto out; | ||
5411 | } | ||
5412 | 5576 | ||
5413 | xh_free_start = le16_to_cpu(xh->xh_free_start); | 5577 | out: |
5414 | free = xh_free_start - header_size | 5578 | mlog_exit(ret); |
5415 | - OCFS2_XATTR_HEADER_GAP; | 5579 | return ret; |
5416 | if (xh_free_start % blocksize < need) | 5580 | } |
5417 | free -= xh_free_start % blocksize; | ||
5418 | 5581 | ||
5419 | if (free >= need) | 5582 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, |
5420 | goto xattr_set; | 5583 | struct ocfs2_xattr_info *xi, |
5584 | struct ocfs2_xattr_search *xs, | ||
5585 | struct ocfs2_xattr_set_ctxt *ctxt) | ||
5586 | { | ||
5587 | int ret; | ||
5421 | 5588 | ||
5422 | mlog(0, "Can't get enough space for xattr insert by " | 5589 | mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name); |
5423 | "defragment. Need %u bytes, but we have %d, so " | ||
5424 | "allocate new bucket for it.\n", need, free); | ||
5425 | } | ||
5426 | 5590 | ||
5427 | /* | 5591 | ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); |
5428 | * We have to add new buckets or clusters and one | 5592 | if (!ret) |
5429 | * allocation should leave us enough space for insert. | 5593 | goto out; |
5430 | */ | 5594 | if (ret != -ENOSPC) { |
5431 | BUG_ON(allocation); | 5595 | mlog_errno(ret); |
5596 | goto out; | ||
5597 | } | ||
5432 | 5598 | ||
5433 | /* | 5599 | /* Ack, need more space. Let's try to get another bucket! */ |
5434 | * We do not allow for overlapping ranges between buckets. And | ||
5435 | * the maximum number of collisions we will allow for then is | ||
5436 | * one bucket's worth, so check it here whether we need to | ||
5437 | * add a new bucket for the insert. | ||
5438 | */ | ||
5439 | ret = ocfs2_check_xattr_bucket_collision(inode, | ||
5440 | xs->bucket, | ||
5441 | xi->name); | ||
5442 | if (ret) { | ||
5443 | mlog_errno(ret); | ||
5444 | goto out; | ||
5445 | } | ||
5446 | 5600 | ||
5447 | ret = ocfs2_add_new_xattr_bucket(inode, | 5601 | /* |
5448 | xs->xattr_bh, | 5602 | * We do not allow for overlapping ranges between buckets. And |
5603 | * the maximum number of collisions we will allow for then is | ||
5604 | * one bucket's worth, so check it here whether we need to | ||
5605 | * add a new bucket for the insert. | ||
5606 | */ | ||
5607 | ret = ocfs2_check_xattr_bucket_collision(inode, | ||
5449 | xs->bucket, | 5608 | xs->bucket, |
5450 | ctxt); | 5609 | xi->xi_name); |
5451 | if (ret) { | 5610 | if (ret) { |
5452 | mlog_errno(ret); | 5611 | mlog_errno(ret); |
5453 | goto out; | 5612 | goto out; |
5454 | } | 5613 | } |
5455 | 5614 | ||
5456 | /* | 5615 | ret = ocfs2_add_new_xattr_bucket(inode, |
5457 | * ocfs2_add_new_xattr_bucket() will have updated | 5616 | xs->xattr_bh, |
5458 | * xs->bucket if it moved, but it will not have updated | 5617 | xs->bucket, |
5459 | * any of the other search fields. Thus, we drop it and | 5618 | ctxt); |
5460 | * re-search. Everything should be cached, so it'll be | 5619 | if (ret) { |
5461 | * quick. | 5620 | mlog_errno(ret); |
5462 | */ | 5621 | goto out; |
5463 | ocfs2_xattr_bucket_relse(xs->bucket); | ||
5464 | ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, | ||
5465 | xi->name_index, | ||
5466 | xi->name, xs); | ||
5467 | if (ret && ret != -ENODATA) | ||
5468 | goto out; | ||
5469 | xs->not_found = ret; | ||
5470 | allocation = 1; | ||
5471 | goto try_again; | ||
5472 | } | 5622 | } |
5473 | 5623 | ||
5474 | xattr_set: | 5624 | /* |
5475 | ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt); | 5625 | * ocfs2_add_new_xattr_bucket() will have updated |
5626 | * xs->bucket if it moved, but it will not have updated | ||
5627 | * any of the other search fields. Thus, we drop it and | ||
5628 | * re-search. Everything should be cached, so it'll be | ||
5629 | * quick. | ||
5630 | */ | ||
5631 | ocfs2_xattr_bucket_relse(xs->bucket); | ||
5632 | ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, | ||
5633 | xi->xi_name_index, | ||
5634 | xi->xi_name, xs); | ||
5635 | if (ret && ret != -ENODATA) | ||
5636 | goto out; | ||
5637 | xs->not_found = ret; | ||
5638 | |||
5639 | /* Ok, we have a new bucket, let's try again */ | ||
5640 | ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); | ||
5641 | if (ret && (ret != -ENOSPC)) | ||
5642 | mlog_errno(ret); | ||
5643 | |||
5476 | out: | 5644 | out: |
5477 | mlog_exit(ret); | 5645 | mlog_exit(ret); |
5478 | return ret; | 5646 | return ret; |
@@ -5684,7 +5852,7 @@ static int ocfs2_prepare_refcount_xattr(struct inode *inode, | |||
5684 | * refcount tree, and make the original extent become 3. So we will need | 5852 | * refcount tree, and make the original extent become 3. So we will need |
5685 | * 2 * cluster more extent recs at most. | 5853 | * 2 * cluster more extent recs at most. |
5686 | */ | 5854 | */ |
5687 | if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) { | 5855 | if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) { |
5688 | 5856 | ||
5689 | ret = ocfs2_refcounted_xattr_delete_need(inode, | 5857 | ret = ocfs2_refcounted_xattr_delete_need(inode, |
5690 | &(*ref_tree)->rf_ci, | 5858 | &(*ref_tree)->rf_ci, |
@@ -6066,7 +6234,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, | |||
6066 | * to the extent block, so just calculate a maximum record num. | 6234 | * to the extent block, so just calculate a maximum record num. |
6067 | */ | 6235 | */ |
6068 | if (!xv->xr_list.l_tree_depth) | 6236 | if (!xv->xr_list.l_tree_depth) |
6069 | *num_recs += xv->xr_list.l_next_free_rec; | 6237 | *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec); |
6070 | else | 6238 | else |
6071 | *num_recs += ocfs2_clusters_for_bytes(sb, | 6239 | *num_recs += ocfs2_clusters_for_bytes(sb, |
6072 | XATTR_SIZE_MAX); | 6240 | XATTR_SIZE_MAX); |
@@ -6360,33 +6528,33 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode, | |||
6360 | int indexed) | 6528 | int indexed) |
6361 | { | 6529 | { |
6362 | int ret; | 6530 | int ret; |
6363 | handle_t *handle; | ||
6364 | struct ocfs2_alloc_context *meta_ac; | ||
6365 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 6531 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
6532 | struct ocfs2_xattr_set_ctxt ctxt; | ||
6366 | 6533 | ||
6367 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | 6534 | memset(&ctxt, 0, sizeof(ctxt)); |
6535 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); | ||
6368 | if (ret < 0) { | 6536 | if (ret < 0) { |
6369 | mlog_errno(ret); | 6537 | mlog_errno(ret); |
6370 | return ret; | 6538 | return ret; |
6371 | } | 6539 | } |
6372 | 6540 | ||
6373 | handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); | 6541 | ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); |
6374 | if (IS_ERR(handle)) { | 6542 | if (IS_ERR(ctxt.handle)) { |
6375 | ret = PTR_ERR(handle); | 6543 | ret = PTR_ERR(ctxt.handle); |
6376 | mlog_errno(ret); | 6544 | mlog_errno(ret); |
6377 | goto out; | 6545 | goto out; |
6378 | } | 6546 | } |
6379 | 6547 | ||
6380 | mlog(0, "create new xattr block for inode %llu, index = %d\n", | 6548 | mlog(0, "create new xattr block for inode %llu, index = %d\n", |
6381 | (unsigned long long)fe_bh->b_blocknr, indexed); | 6549 | (unsigned long long)fe_bh->b_blocknr, indexed); |
6382 | ret = ocfs2_create_xattr_block(handle, inode, fe_bh, | 6550 | ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, |
6383 | meta_ac, ret_bh, indexed); | 6551 | ret_bh); |
6384 | if (ret) | 6552 | if (ret) |
6385 | mlog_errno(ret); | 6553 | mlog_errno(ret); |
6386 | 6554 | ||
6387 | ocfs2_commit_trans(osb, handle); | 6555 | ocfs2_commit_trans(osb, ctxt.handle); |
6388 | out: | 6556 | out: |
6389 | ocfs2_free_alloc_context(meta_ac); | 6557 | ocfs2_free_alloc_context(ctxt.meta_ac); |
6390 | return ret; | 6558 | return ret; |
6391 | } | 6559 | } |
6392 | 6560 | ||
@@ -6978,9 +7146,9 @@ int ocfs2_init_security_and_acl(struct inode *dir, | |||
6978 | 7146 | ||
6979 | ret = ocfs2_init_security_get(inode, dir, &si); | 7147 | ret = ocfs2_init_security_get(inode, dir, &si); |
6980 | if (!ret) { | 7148 | if (!ret) { |
6981 | ret = ocfs2_xattr_security_set(inode, si.name, | 7149 | ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, |
6982 | si.value, si.value_len, | 7150 | si.name, si.value, si.value_len, |
6983 | XATTR_CREATE); | 7151 | XATTR_CREATE); |
6984 | if (ret) { | 7152 | if (ret) { |
6985 | mlog_errno(ret); | 7153 | mlog_errno(ret); |
6986 | goto leave; | 7154 | goto leave; |
@@ -7008,9 +7176,9 @@ leave: | |||
7008 | /* | 7176 | /* |
7009 | * 'security' attributes support | 7177 | * 'security' attributes support |
7010 | */ | 7178 | */ |
7011 | static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, | 7179 | static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, |
7012 | size_t list_size, const char *name, | 7180 | size_t list_size, const char *name, |
7013 | size_t name_len) | 7181 | size_t name_len, int type) |
7014 | { | 7182 | { |
7015 | const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; | 7183 | const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; |
7016 | const size_t total_len = prefix_len + name_len + 1; | 7184 | const size_t total_len = prefix_len + name_len + 1; |
@@ -7023,23 +7191,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, | |||
7023 | return total_len; | 7191 | return total_len; |
7024 | } | 7192 | } |
7025 | 7193 | ||
7026 | static int ocfs2_xattr_security_get(struct inode *inode, const char *name, | 7194 | static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, |
7027 | void *buffer, size_t size) | 7195 | void *buffer, size_t size, int type) |
7028 | { | 7196 | { |
7029 | if (strcmp(name, "") == 0) | 7197 | if (strcmp(name, "") == 0) |
7030 | return -EINVAL; | 7198 | return -EINVAL; |
7031 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, | 7199 | return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY, |
7032 | buffer, size); | 7200 | name, buffer, size); |
7033 | } | 7201 | } |
7034 | 7202 | ||
7035 | static int ocfs2_xattr_security_set(struct inode *inode, const char *name, | 7203 | static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, |
7036 | const void *value, size_t size, int flags) | 7204 | const void *value, size_t size, int flags, int type) |
7037 | { | 7205 | { |
7038 | if (strcmp(name, "") == 0) | 7206 | if (strcmp(name, "") == 0) |
7039 | return -EINVAL; | 7207 | return -EINVAL; |
7040 | 7208 | ||
7041 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, | 7209 | return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY, |
7042 | size, flags); | 7210 | name, value, size, flags); |
7043 | } | 7211 | } |
7044 | 7212 | ||
7045 | int ocfs2_init_security_get(struct inode *inode, | 7213 | int ocfs2_init_security_get(struct inode *inode, |
@@ -7076,9 +7244,9 @@ struct xattr_handler ocfs2_xattr_security_handler = { | |||
7076 | /* | 7244 | /* |
7077 | * 'trusted' attributes support | 7245 | * 'trusted' attributes support |
7078 | */ | 7246 | */ |
7079 | static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, | 7247 | static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, |
7080 | size_t list_size, const char *name, | 7248 | size_t list_size, const char *name, |
7081 | size_t name_len) | 7249 | size_t name_len, int type) |
7082 | { | 7250 | { |
7083 | const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; | 7251 | const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; |
7084 | const size_t total_len = prefix_len + name_len + 1; | 7252 | const size_t total_len = prefix_len + name_len + 1; |
@@ -7091,23 +7259,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, | |||
7091 | return total_len; | 7259 | return total_len; |
7092 | } | 7260 | } |
7093 | 7261 | ||
7094 | static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, | 7262 | static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, |
7095 | void *buffer, size_t size) | 7263 | void *buffer, size_t size, int type) |
7096 | { | 7264 | { |
7097 | if (strcmp(name, "") == 0) | 7265 | if (strcmp(name, "") == 0) |
7098 | return -EINVAL; | 7266 | return -EINVAL; |
7099 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, | 7267 | return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED, |
7100 | buffer, size); | 7268 | name, buffer, size); |
7101 | } | 7269 | } |
7102 | 7270 | ||
7103 | static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, | 7271 | static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, |
7104 | const void *value, size_t size, int flags) | 7272 | const void *value, size_t size, int flags, int type) |
7105 | { | 7273 | { |
7106 | if (strcmp(name, "") == 0) | 7274 | if (strcmp(name, "") == 0) |
7107 | return -EINVAL; | 7275 | return -EINVAL; |
7108 | 7276 | ||
7109 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, | 7277 | return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED, |
7110 | size, flags); | 7278 | name, value, size, flags); |
7111 | } | 7279 | } |
7112 | 7280 | ||
7113 | struct xattr_handler ocfs2_xattr_trusted_handler = { | 7281 | struct xattr_handler ocfs2_xattr_trusted_handler = { |
@@ -7120,13 +7288,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = { | |||
7120 | /* | 7288 | /* |
7121 | * 'user' attributes support | 7289 | * 'user' attributes support |
7122 | */ | 7290 | */ |
7123 | static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, | 7291 | static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, |
7124 | size_t list_size, const char *name, | 7292 | size_t list_size, const char *name, |
7125 | size_t name_len) | 7293 | size_t name_len, int type) |
7126 | { | 7294 | { |
7127 | const size_t prefix_len = XATTR_USER_PREFIX_LEN; | 7295 | const size_t prefix_len = XATTR_USER_PREFIX_LEN; |
7128 | const size_t total_len = prefix_len + name_len + 1; | 7296 | const size_t total_len = prefix_len + name_len + 1; |
7129 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 7297 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
7130 | 7298 | ||
7131 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | 7299 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) |
7132 | return 0; | 7300 | return 0; |
@@ -7139,31 +7307,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, | |||
7139 | return total_len; | 7307 | return total_len; |
7140 | } | 7308 | } |
7141 | 7309 | ||
7142 | static int ocfs2_xattr_user_get(struct inode *inode, const char *name, | 7310 | static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, |
7143 | void *buffer, size_t size) | 7311 | void *buffer, size_t size, int type) |
7144 | { | 7312 | { |
7145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 7313 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
7146 | 7314 | ||
7147 | if (strcmp(name, "") == 0) | 7315 | if (strcmp(name, "") == 0) |
7148 | return -EINVAL; | 7316 | return -EINVAL; |
7149 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | 7317 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) |
7150 | return -EOPNOTSUPP; | 7318 | return -EOPNOTSUPP; |
7151 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, | 7319 | return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name, |
7152 | buffer, size); | 7320 | buffer, size); |
7153 | } | 7321 | } |
7154 | 7322 | ||
7155 | static int ocfs2_xattr_user_set(struct inode *inode, const char *name, | 7323 | static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, |
7156 | const void *value, size_t size, int flags) | 7324 | const void *value, size_t size, int flags, int type) |
7157 | { | 7325 | { |
7158 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 7326 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
7159 | 7327 | ||
7160 | if (strcmp(name, "") == 0) | 7328 | if (strcmp(name, "") == 0) |
7161 | return -EINVAL; | 7329 | return -EINVAL; |
7162 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | 7330 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) |
7163 | return -EOPNOTSUPP; | 7331 | return -EOPNOTSUPP; |
7164 | 7332 | ||
7165 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, | 7333 | return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER, |
7166 | size, flags); | 7334 | name, value, size, flags); |
7167 | } | 7335 | } |
7168 | 7336 | ||
7169 | struct xattr_handler ocfs2_xattr_user_handler = { | 7337 | struct xattr_handler ocfs2_xattr_user_handler = { |
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 08e36389f56d..abd72a47f520 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h | |||
@@ -40,10 +40,8 @@ struct ocfs2_security_xattr_info { | |||
40 | extern struct xattr_handler ocfs2_xattr_user_handler; | 40 | extern struct xattr_handler ocfs2_xattr_user_handler; |
41 | extern struct xattr_handler ocfs2_xattr_trusted_handler; | 41 | extern struct xattr_handler ocfs2_xattr_trusted_handler; |
42 | extern struct xattr_handler ocfs2_xattr_security_handler; | 42 | extern struct xattr_handler ocfs2_xattr_security_handler; |
43 | #ifdef CONFIG_OCFS2_FS_POSIX_ACL | ||
44 | extern struct xattr_handler ocfs2_xattr_acl_access_handler; | 43 | extern struct xattr_handler ocfs2_xattr_acl_access_handler; |
45 | extern struct xattr_handler ocfs2_xattr_acl_default_handler; | 44 | extern struct xattr_handler ocfs2_xattr_acl_default_handler; |
46 | #endif | ||
47 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | 45 | extern struct xattr_handler *ocfs2_xattr_handlers[]; |
48 | 46 | ||
49 | ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); | 47 | ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); |