diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 13:15:22 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 13:15:22 -0400 |
| commit | e675349e2bdbfb661fa0d8ff2441b4cf48fb7e48 (patch) | |
| tree | 7443e324c951f375945905dc436b012c98a00e05 | |
| parent | ef38ff9d372d4fe69e415370939a0f1fb5783af1 (diff) | |
| parent | 2309e9e040fe29469fb85a384636c455b62fe525 (diff) | |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (64 commits)
ocfs2/net: Add debug interface to o2net
ocfs2: Only build ocfs2/dlm with the o2cb stack module
ocfs2/cluster: Get rid of arguments to the timeout routines
ocfs2: Put tree in MAINTAINERS
ocfs2: Use BUG_ON
ocfs2: Convert ocfs2 over to unlocked_ioctl
ocfs2: Improve rename locking
fs/ocfs2/aops.c: test for IS_ERR rather than 0
ocfs2: Add inode stealing for ocfs2_reserve_new_inode
ocfs2: Add ac_alloc_slot in ocfs2_alloc_context
ocfs2: Add a new parameter for ocfs2_reserve_suballoc_bits
ocfs2: Enable cross extent block merge.
ocfs2: Add support for cross extent block
ocfs2: Move /sys/o2cb to /sys/fs/o2cb
sysfs: Allow removal of symlinks in the sysfs root
ocfs2: Reconnect after idle time out.
ocfs2/dlm: Cleanup lockres print
ocfs2/dlm: Fix lockname in lockres print function
ocfs2/dlm: Move dlm_print_one_mle() from dlmmaster.c to dlmdebug.c
ocfs2/dlm: Dumps the purgelist into a debugfs file
...
47 files changed, 5800 insertions, 1042 deletions
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb new file mode 100644 index 000000000000..9c49d8e6c0cc --- /dev/null +++ b/Documentation/ABI/obsolete/o2cb | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | What: /sys/o2cb symlink | ||
| 2 | Date: Dec 2005 | ||
| 3 | KernelVersion: 2.6.16 | ||
| 4 | Contact: ocfs2-devel@oss.oracle.com | ||
| 5 | Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will | ||
| 6 | be removed when new versions of ocfs2-tools which know to look | ||
| 7 | in /sys/fs/o2cb are sufficiently prevalent. Don't code new | ||
| 8 | software to look here, it should try /sys/fs/o2cb instead. | ||
| 9 | See Documentation/ABI/stable/o2cb for more information on usage. | ||
| 10 | Users: ocfs2-tools. It's sufficient to mail proposed changes to | ||
| 11 | ocfs2-devel@oss.oracle.com. | ||
diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb new file mode 100644 index 000000000000..5eb1545e0b8d --- /dev/null +++ b/Documentation/ABI/stable/o2cb | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | What: /sys/fs/o2cb/ (was /sys/o2cb) | ||
| 2 | Date: Dec 2005 | ||
| 3 | KernelVersion: 2.6.16 | ||
| 4 | Contact: ocfs2-devel@oss.oracle.com | ||
| 5 | Description: Ocfs2-tools looks at 'interface-revision' for versioning | ||
| 6 | information. Each logmask/ file controls a set of debug prints | ||
| 7 | and can be written into with the strings "allow", "deny", or | ||
| 8 | "off". Reading the file returns the current state. | ||
| 9 | Users: ocfs2-tools. It's sufficient to mail proposed changes to | ||
| 10 | ocfs2-devel@oss.oracle.com. | ||
diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2 new file mode 100644 index 000000000000..b7cc516a8a8a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ocfs2 | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | What: /sys/fs/ocfs2/ | ||
| 2 | Date: April 2008 | ||
| 3 | Contact: ocfs2-devel@oss.oracle.com | ||
| 4 | Description: | ||
| 5 | The /sys/fs/ocfs2 directory contains knobs used by the | ||
| 6 | ocfs2-tools to interact with the filesystem. | ||
| 7 | |||
| 8 | What: /sys/fs/ocfs2/max_locking_protocol | ||
| 9 | Date: April 2008 | ||
| 10 | Contact: ocfs2-devel@oss.oracle.com | ||
| 11 | Description: | ||
| 12 | The /sys/fs/ocfs2/max_locking_protocol file displays version | ||
| 13 | of ocfs2 locking supported by the filesystem. This version | ||
| 14 | covers how ocfs2 uses distributed locking between cluster | ||
| 15 | nodes. | ||
| 16 | |||
| 17 | The protocol version has a major and minor number. Two | ||
| 18 | cluster nodes can interoperate if they have an identical | ||
| 19 | major number and an overlapping minor number - thus, | ||
| 20 | a node with version 1.10 can interoperate with a node | ||
| 21 | sporting version 1.8, as long as both use the 1.8 protocol. | ||
| 22 | |||
| 23 | Reading from this file returns a single line, the major | ||
| 24 | number and minor number joined by a period, eg "1.10". | ||
| 25 | |||
| 26 | This file is read-only. The value is compiled into the | ||
| 27 | driver. | ||
| 28 | |||
| 29 | What: /sys/fs/ocfs2/loaded_cluster_plugins | ||
| 30 | Date: April 2008 | ||
| 31 | Contact: ocfs2-devel@oss.oracle.com | ||
| 32 | Description: | ||
| 33 | The /sys/fs/ocfs2/loaded_cluster_plugins file describes | ||
| 34 | the available plugins to support ocfs2 cluster operation. | ||
| 35 | A cluster plugin is required to use ocfs2 in a cluster. | ||
| 36 | There are currently two available plugins: | ||
| 37 | |||
| 38 | * 'o2cb' - The classic o2cb cluster stack that ocfs2 has | ||
| 39 | used since its inception. | ||
| 40 | * 'user' - A plugin supporting userspace cluster software | ||
| 41 | in conjunction with fs/dlm. | ||
| 42 | |||
| 43 | Reading from this file returns the names of all loaded | ||
| 44 | plugins, one per line. | ||
| 45 | |||
| 46 | This file is read-only. Its contents may change as | ||
| 47 | plugins are loaded or removed. | ||
| 48 | |||
| 49 | What: /sys/fs/ocfs2/active_cluster_plugin | ||
| 50 | Date: April 2008 | ||
| 51 | Contact: ocfs2-devel@oss.oracle.com | ||
| 52 | Description: | ||
| 53 | The /sys/fs/ocfs2/active_cluster_plugin displays which | ||
| 54 | cluster plugin is currently in use by the filesystem. | ||
| 55 | The active plugin will appear in the loaded_cluster_plugins | ||
| 56 | file as well. Only one plugin can be used at a time. | ||
| 57 | |||
| 58 | Reading from this file returns the name of the active plugin | ||
| 59 | on a single line. | ||
| 60 | |||
| 61 | This file is read-only. Which plugin is active depends on | ||
| 62 | the cluster stack in use. The contents may change | ||
| 63 | when all filesystems are unmounted and the cluster stack | ||
| 64 | is changed. | ||
| 65 | |||
| 66 | What: /sys/fs/ocfs2/cluster_stack | ||
| 67 | Date: April 2008 | ||
| 68 | Contact: ocfs2-devel@oss.oracle.com | ||
| 69 | Description: | ||
| 70 | The /sys/fs/ocfs2/cluster_stack file contains the name | ||
| 71 | of current ocfs2 cluster stack. This value is set by | ||
| 72 | userspace tools when bringing the cluster stack online. | ||
| 73 | |||
| 74 | Cluster stack names are 4 characters in length. | ||
| 75 | |||
| 76 | When the 'o2cb' cluster stack is used, the 'o2cb' cluster | ||
| 77 | plugin is active. All other cluster stacks use the 'user' | ||
| 78 | cluster plugin. | ||
| 79 | |||
| 80 | Reading from this file returns the name of the current | ||
| 81 | cluster stack on a single line. | ||
| 82 | |||
| 83 | Writing a new stack name to this file changes the current | ||
| 84 | cluster stack unless there are mounted ocfs2 filesystems. | ||
| 85 | If there are mounted filesystems, attempts to change the | ||
| 86 | stack return an error. | ||
| 87 | |||
| 88 | Users: | ||
| 89 | ocfs2-tools <ocfs2-tools-devel@oss.oracle.com> | ||
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 164c89394cff..4b70622a8a91 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -318,3 +318,13 @@ Why: Not used in-tree. The current out-of-tree users used it to | |||
| 318 | code / infrastructure should be in the kernel and not in some | 318 | code / infrastructure should be in the kernel and not in some |
| 319 | out-of-tree driver. | 319 | out-of-tree driver. |
| 320 | Who: Thomas Gleixner <tglx@linutronix.de> | 320 | Who: Thomas Gleixner <tglx@linutronix.de> |
| 321 | |||
| 322 | --------------------------- | ||
| 323 | |||
| 324 | What: /sys/o2cb symlink | ||
| 325 | When: January 2010 | ||
| 326 | Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb | ||
| 327 | exists as a symlink for backwards compatibility for old versions of | ||
| 328 | ocfs2-tools. 2 years should be sufficient time to phase in new versions | ||
| 329 | which know to look in /sys/fs/o2cb. | ||
| 330 | Who: ocfs2-devel@oss.oracle.com | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 3eceebb48c92..974ee8ddb12c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -2952,6 +2952,7 @@ P: Joel Becker | |||
| 2952 | M: joel.becker@oracle.com | 2952 | M: joel.becker@oracle.com |
| 2953 | L: ocfs2-devel@oss.oracle.com | 2953 | L: ocfs2-devel@oss.oracle.com |
| 2954 | W: http://oss.oracle.com/projects/ocfs2/ | 2954 | W: http://oss.oracle.com/projects/ocfs2/ |
| 2955 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git | ||
| 2955 | S: Supported | 2956 | S: Supported |
| 2956 | 2957 | ||
| 2957 | OMNIKEY CARDMAN 4000 DRIVER | 2958 | OMNIKEY CARDMAN 4000 DRIVER |
diff --git a/fs/Kconfig b/fs/Kconfig index c509123bea49..028ae38ecc52 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -444,6 +444,32 @@ config OCFS2_FS | |||
| 444 | For more information on OCFS2, see the file | 444 | For more information on OCFS2, see the file |
| 445 | <file:Documentation/filesystems/ocfs2.txt>. | 445 | <file:Documentation/filesystems/ocfs2.txt>. |
| 446 | 446 | ||
| 447 | config OCFS2_FS_O2CB | ||
| 448 | tristate "O2CB Kernelspace Clustering" | ||
| 449 | depends on OCFS2_FS | ||
| 450 | default y | ||
| 451 | help | ||
| 452 | OCFS2 includes a simple kernelspace clustering package, the OCFS2 | ||
| 453 | Cluster Base. It only requires a very small userspace component | ||
| 454 | to configure it. This comes with the standard ocfs2-tools package. | ||
| 455 | O2CB is limited to maintaining a cluster for OCFS2 file systems. | ||
| 456 | It cannot manage any other cluster applications. | ||
| 457 | |||
| 458 | It is always safe to say Y here, as the clustering method is | ||
| 459 | run-time selectable. | ||
| 460 | |||
| 461 | config OCFS2_FS_USERSPACE_CLUSTER | ||
| 462 | tristate "OCFS2 Userspace Clustering" | ||
| 463 | depends on OCFS2_FS && DLM | ||
| 464 | default y | ||
| 465 | help | ||
| 466 | This option will allow OCFS2 to use userspace clustering services | ||
| 467 | in conjunction with the DLM in fs/dlm. If you are using a | ||
| 468 | userspace cluster manager, say Y here. | ||
| 469 | |||
| 470 | It is safe to say Y, as the clustering method is run-time | ||
| 471 | selectable. | ||
| 472 | |||
| 447 | config OCFS2_DEBUG_MASKLOG | 473 | config OCFS2_DEBUG_MASKLOG |
| 448 | bool "OCFS2 logging support" | 474 | bool "OCFS2 logging support" |
| 449 | depends on OCFS2_FS | 475 | depends on OCFS2_FS |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 4d4ce48bb42c..f6956de56fdb 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
| @@ -2,7 +2,12 @@ EXTRA_CFLAGS += -Ifs/ocfs2 | |||
| 2 | 2 | ||
| 3 | EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES | 3 | EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_OCFS2_FS) += ocfs2.o | 5 | obj-$(CONFIG_OCFS2_FS) += \ |
| 6 | ocfs2.o \ | ||
| 7 | ocfs2_stackglue.o | ||
| 8 | |||
| 9 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_stack_o2cb.o | ||
| 10 | obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o | ||
| 6 | 11 | ||
| 7 | ocfs2-objs := \ | 12 | ocfs2-objs := \ |
| 8 | alloc.o \ | 13 | alloc.o \ |
| @@ -31,5 +36,10 @@ ocfs2-objs := \ | |||
| 31 | uptodate.o \ | 36 | uptodate.o \ |
| 32 | ver.o | 37 | ver.o |
| 33 | 38 | ||
| 39 | ocfs2_stackglue-objs := stackglue.o | ||
| 40 | ocfs2_stack_o2cb-objs := stack_o2cb.o | ||
| 41 | ocfs2_stack_user-objs := stack_user.o | ||
| 42 | |||
| 43 | # cluster/ is always needed when OCFS2_FS for masklog support | ||
| 34 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 44 | obj-$(CONFIG_OCFS2_FS) += cluster/ |
| 35 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 45 | obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 447206eb5c2e..41f84c92094f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, | |||
| 1029 | BUG_ON(!next_free); | 1029 | BUG_ON(!next_free); |
| 1030 | 1030 | ||
| 1031 | /* The tree code before us didn't allow enough room in the leaf. */ | 1031 | /* The tree code before us didn't allow enough room in the leaf. */ |
| 1032 | if (el->l_next_free_rec == el->l_count && !has_empty) | 1032 | BUG_ON(el->l_next_free_rec == el->l_count && !has_empty); |
| 1033 | BUG(); | ||
| 1034 | 1033 | ||
| 1035 | /* | 1034 | /* |
| 1036 | * The easiest way to approach this is to just remove the | 1035 | * The easiest way to approach this is to just remove the |
| @@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, | |||
| 1450 | * - When our insert into the right path leaf is at the leftmost edge | 1449 | * - When our insert into the right path leaf is at the leftmost edge |
| 1451 | * and requires an update of the path immediately to it's left. This | 1450 | * and requires an update of the path immediately to it's left. This |
| 1452 | * can occur at the end of some types of rotation and appending inserts. | 1451 | * can occur at the end of some types of rotation and appending inserts. |
| 1452 | * - When we've adjusted the last extent record in the left path leaf and the | ||
| 1453 | * 1st extent record in the right path leaf during cross extent block merge. | ||
| 1453 | */ | 1454 | */ |
| 1454 | static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, | 1455 | static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, |
| 1455 | struct ocfs2_path *left_path, | 1456 | struct ocfs2_path *left_path, |
| @@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, | |||
| 2712 | } | 2713 | } |
| 2713 | } | 2714 | } |
| 2714 | 2715 | ||
| 2716 | static int ocfs2_get_right_path(struct inode *inode, | ||
| 2717 | struct ocfs2_path *left_path, | ||
| 2718 | struct ocfs2_path **ret_right_path) | ||
| 2719 | { | ||
| 2720 | int ret; | ||
| 2721 | u32 right_cpos; | ||
| 2722 | struct ocfs2_path *right_path = NULL; | ||
| 2723 | struct ocfs2_extent_list *left_el; | ||
| 2724 | |||
| 2725 | *ret_right_path = NULL; | ||
| 2726 | |||
| 2727 | /* This function shouldn't be called for non-trees. */ | ||
| 2728 | BUG_ON(left_path->p_tree_depth == 0); | ||
| 2729 | |||
| 2730 | left_el = path_leaf_el(left_path); | ||
| 2731 | BUG_ON(left_el->l_next_free_rec != left_el->l_count); | ||
| 2732 | |||
| 2733 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, | ||
| 2734 | &right_cpos); | ||
| 2735 | if (ret) { | ||
| 2736 | mlog_errno(ret); | ||
| 2737 | goto out; | ||
| 2738 | } | ||
| 2739 | |||
| 2740 | /* This function shouldn't be called for the rightmost leaf. */ | ||
| 2741 | BUG_ON(right_cpos == 0); | ||
| 2742 | |||
| 2743 | right_path = ocfs2_new_path(path_root_bh(left_path), | ||
| 2744 | path_root_el(left_path)); | ||
| 2745 | if (!right_path) { | ||
| 2746 | ret = -ENOMEM; | ||
| 2747 | mlog_errno(ret); | ||
| 2748 | goto out; | ||
| 2749 | } | ||
| 2750 | |||
| 2751 | ret = ocfs2_find_path(inode, right_path, right_cpos); | ||
| 2752 | if (ret) { | ||
| 2753 | mlog_errno(ret); | ||
| 2754 | goto out; | ||
| 2755 | } | ||
| 2756 | |||
| 2757 | *ret_right_path = right_path; | ||
| 2758 | out: | ||
| 2759 | if (ret) | ||
| 2760 | ocfs2_free_path(right_path); | ||
| 2761 | return ret; | ||
| 2762 | } | ||
| 2763 | |||
| 2715 | /* | 2764 | /* |
| 2716 | * Remove split_rec clusters from the record at index and merge them | 2765 | * Remove split_rec clusters from the record at index and merge them |
| 2717 | * onto the beginning of the record at index + 1. | 2766 | * onto the beginning of the record "next" to it. |
| 2767 | * For index < l_count - 1, the next means the extent rec at index + 1. | ||
| 2768 | * For index == l_count - 1, the "next" means the 1st extent rec of the | ||
| 2769 | * next extent block. | ||
| 2718 | */ | 2770 | */ |
| 2719 | static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | 2771 | static int ocfs2_merge_rec_right(struct inode *inode, |
| 2720 | handle_t *handle, | 2772 | struct ocfs2_path *left_path, |
| 2721 | struct ocfs2_extent_rec *split_rec, | 2773 | handle_t *handle, |
| 2722 | struct ocfs2_extent_list *el, int index) | 2774 | struct ocfs2_extent_rec *split_rec, |
| 2775 | int index) | ||
| 2723 | { | 2776 | { |
| 2724 | int ret; | 2777 | int ret, next_free, i; |
| 2725 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | 2778 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); |
| 2726 | struct ocfs2_extent_rec *left_rec; | 2779 | struct ocfs2_extent_rec *left_rec; |
| 2727 | struct ocfs2_extent_rec *right_rec; | 2780 | struct ocfs2_extent_rec *right_rec; |
| 2781 | struct ocfs2_extent_list *right_el; | ||
| 2782 | struct ocfs2_path *right_path = NULL; | ||
| 2783 | int subtree_index = 0; | ||
| 2784 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | ||
| 2785 | struct buffer_head *bh = path_leaf_bh(left_path); | ||
| 2786 | struct buffer_head *root_bh = NULL; | ||
| 2728 | 2787 | ||
| 2729 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); | 2788 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); |
| 2730 | |||
| 2731 | left_rec = &el->l_recs[index]; | 2789 | left_rec = &el->l_recs[index]; |
| 2732 | right_rec = &el->l_recs[index + 1]; | 2790 | |
| 2791 | if (index == le16_to_cpu(el->l_next_free_rec - 1) && | ||
| 2792 | le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { | ||
| 2793 | /* we meet with a cross extent block merge. */ | ||
| 2794 | ret = ocfs2_get_right_path(inode, left_path, &right_path); | ||
| 2795 | if (ret) { | ||
| 2796 | mlog_errno(ret); | ||
| 2797 | goto out; | ||
| 2798 | } | ||
| 2799 | |||
| 2800 | right_el = path_leaf_el(right_path); | ||
| 2801 | next_free = le16_to_cpu(right_el->l_next_free_rec); | ||
| 2802 | BUG_ON(next_free <= 0); | ||
| 2803 | right_rec = &right_el->l_recs[0]; | ||
| 2804 | if (ocfs2_is_empty_extent(right_rec)) { | ||
| 2805 | BUG_ON(le16_to_cpu(next_free) <= 1); | ||
| 2806 | right_rec = &right_el->l_recs[1]; | ||
| 2807 | } | ||
| 2808 | |||
| 2809 | BUG_ON(le32_to_cpu(left_rec->e_cpos) + | ||
| 2810 | le16_to_cpu(left_rec->e_leaf_clusters) != | ||
| 2811 | le32_to_cpu(right_rec->e_cpos)); | ||
| 2812 | |||
| 2813 | subtree_index = ocfs2_find_subtree_root(inode, | ||
| 2814 | left_path, right_path); | ||
| 2815 | |||
| 2816 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | ||
| 2817 | handle->h_buffer_credits, | ||
| 2818 | right_path); | ||
| 2819 | if (ret) { | ||
| 2820 | mlog_errno(ret); | ||
| 2821 | goto out; | ||
| 2822 | } | ||
| 2823 | |||
| 2824 | root_bh = left_path->p_node[subtree_index].bh; | ||
| 2825 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
| 2826 | |||
| 2827 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 2828 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2829 | if (ret) { | ||
| 2830 | mlog_errno(ret); | ||
| 2831 | goto out; | ||
| 2832 | } | ||
| 2833 | |||
| 2834 | for (i = subtree_index + 1; | ||
| 2835 | i < path_num_items(right_path); i++) { | ||
| 2836 | ret = ocfs2_journal_access(handle, inode, | ||
| 2837 | right_path->p_node[i].bh, | ||
| 2838 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2839 | if (ret) { | ||
| 2840 | mlog_errno(ret); | ||
| 2841 | goto out; | ||
| 2842 | } | ||
| 2843 | |||
| 2844 | ret = ocfs2_journal_access(handle, inode, | ||
| 2845 | left_path->p_node[i].bh, | ||
| 2846 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2847 | if (ret) { | ||
| 2848 | mlog_errno(ret); | ||
| 2849 | goto out; | ||
| 2850 | } | ||
| 2851 | } | ||
| 2852 | |||
| 2853 | } else { | ||
| 2854 | BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1); | ||
| 2855 | right_rec = &el->l_recs[index + 1]; | ||
| 2856 | } | ||
| 2733 | 2857 | ||
| 2734 | ret = ocfs2_journal_access(handle, inode, bh, | 2858 | ret = ocfs2_journal_access(handle, inode, bh, |
| 2735 | OCFS2_JOURNAL_ACCESS_WRITE); | 2859 | OCFS2_JOURNAL_ACCESS_WRITE); |
| @@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | |||
| 2751 | if (ret) | 2875 | if (ret) |
| 2752 | mlog_errno(ret); | 2876 | mlog_errno(ret); |
| 2753 | 2877 | ||
| 2878 | if (right_path) { | ||
| 2879 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); | ||
| 2880 | if (ret) | ||
| 2881 | mlog_errno(ret); | ||
| 2882 | |||
| 2883 | ocfs2_complete_edge_insert(inode, handle, left_path, | ||
| 2884 | right_path, subtree_index); | ||
| 2885 | } | ||
| 2886 | out: | ||
| 2887 | if (right_path) | ||
| 2888 | ocfs2_free_path(right_path); | ||
| 2889 | return ret; | ||
| 2890 | } | ||
| 2891 | |||
| 2892 | static int ocfs2_get_left_path(struct inode *inode, | ||
| 2893 | struct ocfs2_path *right_path, | ||
| 2894 | struct ocfs2_path **ret_left_path) | ||
| 2895 | { | ||
| 2896 | int ret; | ||
| 2897 | u32 left_cpos; | ||
| 2898 | struct ocfs2_path *left_path = NULL; | ||
| 2899 | |||
| 2900 | *ret_left_path = NULL; | ||
| 2901 | |||
| 2902 | /* This function shouldn't be called for non-trees. */ | ||
| 2903 | BUG_ON(right_path->p_tree_depth == 0); | ||
| 2904 | |||
| 2905 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | ||
| 2906 | right_path, &left_cpos); | ||
| 2907 | if (ret) { | ||
| 2908 | mlog_errno(ret); | ||
| 2909 | goto out; | ||
| 2910 | } | ||
| 2911 | |||
| 2912 | /* This function shouldn't be called for the leftmost leaf. */ | ||
| 2913 | BUG_ON(left_cpos == 0); | ||
| 2914 | |||
| 2915 | left_path = ocfs2_new_path(path_root_bh(right_path), | ||
| 2916 | path_root_el(right_path)); | ||
| 2917 | if (!left_path) { | ||
| 2918 | ret = -ENOMEM; | ||
| 2919 | mlog_errno(ret); | ||
| 2920 | goto out; | ||
| 2921 | } | ||
| 2922 | |||
| 2923 | ret = ocfs2_find_path(inode, left_path, left_cpos); | ||
| 2924 | if (ret) { | ||
| 2925 | mlog_errno(ret); | ||
| 2926 | goto out; | ||
| 2927 | } | ||
| 2928 | |||
| 2929 | *ret_left_path = left_path; | ||
| 2754 | out: | 2930 | out: |
| 2931 | if (ret) | ||
| 2932 | ocfs2_free_path(left_path); | ||
| 2755 | return ret; | 2933 | return ret; |
| 2756 | } | 2934 | } |
| 2757 | 2935 | ||
| 2758 | /* | 2936 | /* |
| 2759 | * Remove split_rec clusters from the record at index and merge them | 2937 | * Remove split_rec clusters from the record at index and merge them |
| 2760 | * onto the tail of the record at index - 1. | 2938 | * onto the tail of the record "before" it. |
| 2939 | * For index > 0, the "before" means the extent rec at index - 1. | ||
| 2940 | * | ||
| 2941 | * For index == 0, the "before" means the last record of the previous | ||
| 2942 | * extent block. And there is also a situation that we may need to | ||
| 2943 | * remove the rightmost leaf extent block in the right_path and change | ||
| 2944 | * the right path to indicate the new rightmost path. | ||
| 2761 | */ | 2945 | */ |
| 2762 | static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | 2946 | static int ocfs2_merge_rec_left(struct inode *inode, |
| 2947 | struct ocfs2_path *right_path, | ||
| 2763 | handle_t *handle, | 2948 | handle_t *handle, |
| 2764 | struct ocfs2_extent_rec *split_rec, | 2949 | struct ocfs2_extent_rec *split_rec, |
| 2765 | struct ocfs2_extent_list *el, int index) | 2950 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 2951 | int index) | ||
| 2766 | { | 2952 | { |
| 2767 | int ret, has_empty_extent = 0; | 2953 | int ret, i, subtree_index = 0, has_empty_extent = 0; |
| 2768 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | 2954 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); |
| 2769 | struct ocfs2_extent_rec *left_rec; | 2955 | struct ocfs2_extent_rec *left_rec; |
| 2770 | struct ocfs2_extent_rec *right_rec; | 2956 | struct ocfs2_extent_rec *right_rec; |
| 2957 | struct ocfs2_extent_list *el = path_leaf_el(right_path); | ||
| 2958 | struct buffer_head *bh = path_leaf_bh(right_path); | ||
| 2959 | struct buffer_head *root_bh = NULL; | ||
| 2960 | struct ocfs2_path *left_path = NULL; | ||
| 2961 | struct ocfs2_extent_list *left_el; | ||
| 2771 | 2962 | ||
| 2772 | BUG_ON(index <= 0); | 2963 | BUG_ON(index < 0); |
| 2773 | 2964 | ||
| 2774 | left_rec = &el->l_recs[index - 1]; | ||
| 2775 | right_rec = &el->l_recs[index]; | 2965 | right_rec = &el->l_recs[index]; |
| 2776 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | 2966 | if (index == 0) { |
| 2777 | has_empty_extent = 1; | 2967 | /* we meet with a cross extent block merge. */ |
| 2968 | ret = ocfs2_get_left_path(inode, right_path, &left_path); | ||
| 2969 | if (ret) { | ||
| 2970 | mlog_errno(ret); | ||
| 2971 | goto out; | ||
| 2972 | } | ||
| 2973 | |||
| 2974 | left_el = path_leaf_el(left_path); | ||
| 2975 | BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != | ||
| 2976 | le16_to_cpu(left_el->l_count)); | ||
| 2977 | |||
| 2978 | left_rec = &left_el->l_recs[ | ||
| 2979 | le16_to_cpu(left_el->l_next_free_rec) - 1]; | ||
| 2980 | BUG_ON(le32_to_cpu(left_rec->e_cpos) + | ||
| 2981 | le16_to_cpu(left_rec->e_leaf_clusters) != | ||
| 2982 | le32_to_cpu(split_rec->e_cpos)); | ||
| 2983 | |||
| 2984 | subtree_index = ocfs2_find_subtree_root(inode, | ||
| 2985 | left_path, right_path); | ||
| 2986 | |||
| 2987 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | ||
| 2988 | handle->h_buffer_credits, | ||
| 2989 | left_path); | ||
| 2990 | if (ret) { | ||
| 2991 | mlog_errno(ret); | ||
| 2992 | goto out; | ||
| 2993 | } | ||
| 2994 | |||
| 2995 | root_bh = left_path->p_node[subtree_index].bh; | ||
| 2996 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
| 2997 | |||
| 2998 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 2999 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3000 | if (ret) { | ||
| 3001 | mlog_errno(ret); | ||
| 3002 | goto out; | ||
| 3003 | } | ||
| 3004 | |||
| 3005 | for (i = subtree_index + 1; | ||
| 3006 | i < path_num_items(right_path); i++) { | ||
| 3007 | ret = ocfs2_journal_access(handle, inode, | ||
| 3008 | right_path->p_node[i].bh, | ||
| 3009 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3010 | if (ret) { | ||
| 3011 | mlog_errno(ret); | ||
| 3012 | goto out; | ||
| 3013 | } | ||
| 3014 | |||
| 3015 | ret = ocfs2_journal_access(handle, inode, | ||
| 3016 | left_path->p_node[i].bh, | ||
| 3017 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3018 | if (ret) { | ||
| 3019 | mlog_errno(ret); | ||
| 3020 | goto out; | ||
| 3021 | } | ||
| 3022 | } | ||
| 3023 | } else { | ||
| 3024 | left_rec = &el->l_recs[index - 1]; | ||
| 3025 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | ||
| 3026 | has_empty_extent = 1; | ||
| 3027 | } | ||
| 2778 | 3028 | ||
| 2779 | ret = ocfs2_journal_access(handle, inode, bh, | 3029 | ret = ocfs2_journal_access(handle, inode, bh, |
| 2780 | OCFS2_JOURNAL_ACCESS_WRITE); | 3030 | OCFS2_JOURNAL_ACCESS_WRITE); |
| @@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | |||
| 2790 | *left_rec = *split_rec; | 3040 | *left_rec = *split_rec; |
| 2791 | 3041 | ||
| 2792 | has_empty_extent = 0; | 3042 | has_empty_extent = 0; |
| 2793 | } else { | 3043 | } else |
| 2794 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); | 3044 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); |
| 2795 | } | ||
| 2796 | 3045 | ||
| 2797 | le32_add_cpu(&right_rec->e_cpos, split_clusters); | 3046 | le32_add_cpu(&right_rec->e_cpos, split_clusters); |
| 2798 | le64_add_cpu(&right_rec->e_blkno, | 3047 | le64_add_cpu(&right_rec->e_blkno, |
| @@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | |||
| 2805 | if (ret) | 3054 | if (ret) |
| 2806 | mlog_errno(ret); | 3055 | mlog_errno(ret); |
| 2807 | 3056 | ||
| 3057 | if (left_path) { | ||
| 3058 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | ||
| 3059 | if (ret) | ||
| 3060 | mlog_errno(ret); | ||
| 3061 | |||
| 3062 | /* | ||
| 3063 | * In the situation that the right_rec is empty and the extent | ||
| 3064 | * block is empty also, ocfs2_complete_edge_insert can't handle | ||
| 3065 | * it and we need to delete the right extent block. | ||
| 3066 | */ | ||
| 3067 | if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && | ||
| 3068 | le16_to_cpu(el->l_next_free_rec) == 1) { | ||
| 3069 | |||
| 3070 | ret = ocfs2_remove_rightmost_path(inode, handle, | ||
| 3071 | right_path, dealloc); | ||
| 3072 | if (ret) { | ||
| 3073 | mlog_errno(ret); | ||
| 3074 | goto out; | ||
| 3075 | } | ||
| 3076 | |||
| 3077 | /* Now the rightmost extent block has been deleted. | ||
| 3078 | * So we use the new rightmost path. | ||
| 3079 | */ | ||
| 3080 | ocfs2_mv_path(right_path, left_path); | ||
| 3081 | left_path = NULL; | ||
| 3082 | } else | ||
| 3083 | ocfs2_complete_edge_insert(inode, handle, left_path, | ||
| 3084 | right_path, subtree_index); | ||
| 3085 | } | ||
| 2808 | out: | 3086 | out: |
| 3087 | if (left_path) | ||
| 3088 | ocfs2_free_path(left_path); | ||
| 2809 | return ret; | 3089 | return ret; |
| 2810 | } | 3090 | } |
| 2811 | 3091 | ||
| 2812 | static int ocfs2_try_to_merge_extent(struct inode *inode, | 3092 | static int ocfs2_try_to_merge_extent(struct inode *inode, |
| 2813 | handle_t *handle, | 3093 | handle_t *handle, |
| 2814 | struct ocfs2_path *left_path, | 3094 | struct ocfs2_path *path, |
| 2815 | int split_index, | 3095 | int split_index, |
| 2816 | struct ocfs2_extent_rec *split_rec, | 3096 | struct ocfs2_extent_rec *split_rec, |
| 2817 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3097 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| @@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2819 | 3099 | ||
| 2820 | { | 3100 | { |
| 2821 | int ret = 0; | 3101 | int ret = 0; |
| 2822 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | 3102 | struct ocfs2_extent_list *el = path_leaf_el(path); |
| 2823 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | 3103 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; |
| 2824 | 3104 | ||
| 2825 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); | 3105 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); |
| @@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2832 | * extents - having more than one in a leaf is | 3112 | * extents - having more than one in a leaf is |
| 2833 | * illegal. | 3113 | * illegal. |
| 2834 | */ | 3114 | */ |
| 2835 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | 3115 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 2836 | dealloc); | 3116 | dealloc); |
| 2837 | if (ret) { | 3117 | if (ret) { |
| 2838 | mlog_errno(ret); | 3118 | mlog_errno(ret); |
| @@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2847 | * Left-right contig implies this. | 3127 | * Left-right contig implies this. |
| 2848 | */ | 3128 | */ |
| 2849 | BUG_ON(!ctxt->c_split_covers_rec); | 3129 | BUG_ON(!ctxt->c_split_covers_rec); |
| 2850 | BUG_ON(split_index == 0); | ||
| 2851 | 3130 | ||
| 2852 | /* | 3131 | /* |
| 2853 | * Since the leftright insert always covers the entire | 3132 | * Since the leftright insert always covers the entire |
| @@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2858 | * Since the adding of an empty extent shifts | 3137 | * Since the adding of an empty extent shifts |
| 2859 | * everything back to the right, there's no need to | 3138 | * everything back to the right, there's no need to |
| 2860 | * update split_index here. | 3139 | * update split_index here. |
| 3140 | * | ||
| 3141 | * When the split_index is zero, we need to merge it to the | ||
| 3142 | * prevoius extent block. It is more efficient and easier | ||
| 3143 | * if we do merge_right first and merge_left later. | ||
| 2861 | */ | 3144 | */ |
| 2862 | ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), | 3145 | ret = ocfs2_merge_rec_right(inode, path, |
| 2863 | handle, split_rec, el, split_index); | 3146 | handle, split_rec, |
| 3147 | split_index); | ||
| 2864 | if (ret) { | 3148 | if (ret) { |
| 2865 | mlog_errno(ret); | 3149 | mlog_errno(ret); |
| 2866 | goto out; | 3150 | goto out; |
| @@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2871 | */ | 3155 | */ |
| 2872 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3156 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); |
| 2873 | 3157 | ||
| 2874 | /* | 3158 | /* The merge left us with an empty extent, remove it. */ |
| 2875 | * The left merge left us with an empty extent, remove | 3159 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); |
| 2876 | * it. | ||
| 2877 | */ | ||
| 2878 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); | ||
| 2879 | if (ret) { | 3160 | if (ret) { |
| 2880 | mlog_errno(ret); | 3161 | mlog_errno(ret); |
| 2881 | goto out; | 3162 | goto out; |
| 2882 | } | 3163 | } |
| 2883 | split_index--; | 3164 | |
| 2884 | rec = &el->l_recs[split_index]; | 3165 | rec = &el->l_recs[split_index]; |
| 2885 | 3166 | ||
| 2886 | /* | 3167 | /* |
| 2887 | * Note that we don't pass split_rec here on purpose - | 3168 | * Note that we don't pass split_rec here on purpose - |
| 2888 | * we've merged it into the left side. | 3169 | * we've merged it into the rec already. |
| 2889 | */ | 3170 | */ |
| 2890 | ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), | 3171 | ret = ocfs2_merge_rec_left(inode, path, |
| 2891 | handle, rec, el, split_index); | 3172 | handle, rec, |
| 3173 | dealloc, | ||
| 3174 | split_index); | ||
| 3175 | |||
| 2892 | if (ret) { | 3176 | if (ret) { |
| 2893 | mlog_errno(ret); | 3177 | mlog_errno(ret); |
| 2894 | goto out; | 3178 | goto out; |
| 2895 | } | 3179 | } |
| 2896 | 3180 | ||
| 2897 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3181 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 2898 | |||
| 2899 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
| 2900 | dealloc); | 3182 | dealloc); |
| 2901 | /* | 3183 | /* |
| 2902 | * Error from this last rotate is not critical, so | 3184 | * Error from this last rotate is not critical, so |
| @@ -2915,8 +3197,9 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2915 | */ | 3197 | */ |
| 2916 | if (ctxt->c_contig_type == CONTIG_RIGHT) { | 3198 | if (ctxt->c_contig_type == CONTIG_RIGHT) { |
| 2917 | ret = ocfs2_merge_rec_left(inode, | 3199 | ret = ocfs2_merge_rec_left(inode, |
| 2918 | path_leaf_bh(left_path), | 3200 | path, |
| 2919 | handle, split_rec, el, | 3201 | handle, split_rec, |
| 3202 | dealloc, | ||
| 2920 | split_index); | 3203 | split_index); |
| 2921 | if (ret) { | 3204 | if (ret) { |
| 2922 | mlog_errno(ret); | 3205 | mlog_errno(ret); |
| @@ -2924,8 +3207,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2924 | } | 3207 | } |
| 2925 | } else { | 3208 | } else { |
| 2926 | ret = ocfs2_merge_rec_right(inode, | 3209 | ret = ocfs2_merge_rec_right(inode, |
| 2927 | path_leaf_bh(left_path), | 3210 | path, |
| 2928 | handle, split_rec, el, | 3211 | handle, split_rec, |
| 2929 | split_index); | 3212 | split_index); |
| 2930 | if (ret) { | 3213 | if (ret) { |
| 2931 | mlog_errno(ret); | 3214 | mlog_errno(ret); |
| @@ -2938,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2938 | * The merge may have left an empty extent in | 3221 | * The merge may have left an empty extent in |
| 2939 | * our leaf. Try to rotate it away. | 3222 | * our leaf. Try to rotate it away. |
| 2940 | */ | 3223 | */ |
| 2941 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | 3224 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 2942 | dealloc); | 3225 | dealloc); |
| 2943 | if (ret) | 3226 | if (ret) |
| 2944 | mlog_errno(ret); | 3227 | mlog_errno(ret); |
| @@ -3498,20 +3781,57 @@ out: | |||
| 3498 | } | 3781 | } |
| 3499 | 3782 | ||
| 3500 | static enum ocfs2_contig_type | 3783 | static enum ocfs2_contig_type |
| 3501 | ocfs2_figure_merge_contig_type(struct inode *inode, | 3784 | ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, |
| 3502 | struct ocfs2_extent_list *el, int index, | 3785 | struct ocfs2_extent_list *el, int index, |
| 3503 | struct ocfs2_extent_rec *split_rec) | 3786 | struct ocfs2_extent_rec *split_rec) |
| 3504 | { | 3787 | { |
| 3505 | struct ocfs2_extent_rec *rec; | 3788 | int status; |
| 3506 | enum ocfs2_contig_type ret = CONTIG_NONE; | 3789 | enum ocfs2_contig_type ret = CONTIG_NONE; |
| 3790 | u32 left_cpos, right_cpos; | ||
| 3791 | struct ocfs2_extent_rec *rec = NULL; | ||
| 3792 | struct ocfs2_extent_list *new_el; | ||
| 3793 | struct ocfs2_path *left_path = NULL, *right_path = NULL; | ||
| 3794 | struct buffer_head *bh; | ||
| 3795 | struct ocfs2_extent_block *eb; | ||
| 3796 | |||
| 3797 | if (index > 0) { | ||
| 3798 | rec = &el->l_recs[index - 1]; | ||
| 3799 | } else if (path->p_tree_depth > 0) { | ||
| 3800 | status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | ||
| 3801 | path, &left_cpos); | ||
| 3802 | if (status) | ||
| 3803 | goto out; | ||
| 3804 | |||
| 3805 | if (left_cpos != 0) { | ||
| 3806 | left_path = ocfs2_new_path(path_root_bh(path), | ||
| 3807 | path_root_el(path)); | ||
| 3808 | if (!left_path) | ||
| 3809 | goto out; | ||
| 3810 | |||
| 3811 | status = ocfs2_find_path(inode, left_path, left_cpos); | ||
| 3812 | if (status) | ||
| 3813 | goto out; | ||
| 3814 | |||
| 3815 | new_el = path_leaf_el(left_path); | ||
| 3816 | |||
| 3817 | if (le16_to_cpu(new_el->l_next_free_rec) != | ||
| 3818 | le16_to_cpu(new_el->l_count)) { | ||
| 3819 | bh = path_leaf_bh(left_path); | ||
| 3820 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
| 3821 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, | ||
| 3822 | eb); | ||
| 3823 | goto out; | ||
| 3824 | } | ||
| 3825 | rec = &new_el->l_recs[ | ||
| 3826 | le16_to_cpu(new_el->l_next_free_rec) - 1]; | ||
| 3827 | } | ||
| 3828 | } | ||
| 3507 | 3829 | ||
| 3508 | /* | 3830 | /* |
| 3509 | * We're careful to check for an empty extent record here - | 3831 | * We're careful to check for an empty extent record here - |
| 3510 | * the merge code will know what to do if it sees one. | 3832 | * the merge code will know what to do if it sees one. |
| 3511 | */ | 3833 | */ |
| 3512 | 3834 | if (rec) { | |
| 3513 | if (index > 0) { | ||
| 3514 | rec = &el->l_recs[index - 1]; | ||
| 3515 | if (index == 1 && ocfs2_is_empty_extent(rec)) { | 3835 | if (index == 1 && ocfs2_is_empty_extent(rec)) { |
| 3516 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) | 3836 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) |
| 3517 | ret = CONTIG_RIGHT; | 3837 | ret = CONTIG_RIGHT; |
| @@ -3520,10 +3840,45 @@ ocfs2_figure_merge_contig_type(struct inode *inode, | |||
| 3520 | } | 3840 | } |
| 3521 | } | 3841 | } |
| 3522 | 3842 | ||
| 3523 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { | 3843 | rec = NULL; |
| 3844 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) | ||
| 3845 | rec = &el->l_recs[index + 1]; | ||
| 3846 | else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && | ||
| 3847 | path->p_tree_depth > 0) { | ||
| 3848 | status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, | ||
| 3849 | path, &right_cpos); | ||
| 3850 | if (status) | ||
| 3851 | goto out; | ||
| 3852 | |||
| 3853 | if (right_cpos == 0) | ||
| 3854 | goto out; | ||
| 3855 | |||
| 3856 | right_path = ocfs2_new_path(path_root_bh(path), | ||
| 3857 | path_root_el(path)); | ||
| 3858 | if (!right_path) | ||
| 3859 | goto out; | ||
| 3860 | |||
| 3861 | status = ocfs2_find_path(inode, right_path, right_cpos); | ||
| 3862 | if (status) | ||
| 3863 | goto out; | ||
| 3864 | |||
| 3865 | new_el = path_leaf_el(right_path); | ||
| 3866 | rec = &new_el->l_recs[0]; | ||
| 3867 | if (ocfs2_is_empty_extent(rec)) { | ||
| 3868 | if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { | ||
| 3869 | bh = path_leaf_bh(right_path); | ||
| 3870 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
| 3871 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, | ||
| 3872 | eb); | ||
| 3873 | goto out; | ||
| 3874 | } | ||
| 3875 | rec = &new_el->l_recs[1]; | ||
| 3876 | } | ||
| 3877 | } | ||
| 3878 | |||
| 3879 | if (rec) { | ||
| 3524 | enum ocfs2_contig_type contig_type; | 3880 | enum ocfs2_contig_type contig_type; |
| 3525 | 3881 | ||
| 3526 | rec = &el->l_recs[index + 1]; | ||
| 3527 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); | 3882 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); |
| 3528 | 3883 | ||
| 3529 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) | 3884 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) |
| @@ -3532,6 +3887,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, | |||
| 3532 | ret = contig_type; | 3887 | ret = contig_type; |
| 3533 | } | 3888 | } |
| 3534 | 3889 | ||
| 3890 | out: | ||
| 3891 | if (left_path) | ||
| 3892 | ocfs2_free_path(left_path); | ||
| 3893 | if (right_path) | ||
| 3894 | ocfs2_free_path(right_path); | ||
| 3895 | |||
| 3535 | return ret; | 3896 | return ret; |
| 3536 | } | 3897 | } |
| 3537 | 3898 | ||
| @@ -3994,7 +4355,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
| 3994 | goto out; | 4355 | goto out; |
| 3995 | } | 4356 | } |
| 3996 | 4357 | ||
| 3997 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, | 4358 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, |
| 3998 | split_index, | 4359 | split_index, |
| 3999 | split_rec); | 4360 | split_rec); |
| 4000 | 4361 | ||
| @@ -4788,6 +5149,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work) | |||
| 4788 | status = ocfs2_flush_truncate_log(osb); | 5149 | status = ocfs2_flush_truncate_log(osb); |
| 4789 | if (status < 0) | 5150 | if (status < 0) |
| 4790 | mlog_errno(status); | 5151 | mlog_errno(status); |
| 5152 | else | ||
| 5153 | ocfs2_init_inode_steal_slot(osb); | ||
| 4791 | 5154 | ||
| 4792 | mlog_exit(status); | 5155 | mlog_exit(status); |
| 4793 | } | 5156 | } |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 90383ed61005..17964c0505a9 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -467,11 +467,11 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
| 467 | unsigned to) | 467 | unsigned to) |
| 468 | { | 468 | { |
| 469 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 469 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 470 | handle_t *handle = NULL; | 470 | handle_t *handle; |
| 471 | int ret = 0; | 471 | int ret = 0; |
| 472 | 472 | ||
| 473 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 473 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
| 474 | if (!handle) { | 474 | if (IS_ERR(handle)) { |
| 475 | ret = -ENOMEM; | 475 | ret = -ENOMEM; |
| 476 | mlog_errno(ret); | 476 | mlog_errno(ret); |
| 477 | goto out; | 477 | goto out; |
| @@ -487,7 +487,7 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
| 487 | } | 487 | } |
| 488 | out: | 488 | out: |
| 489 | if (ret) { | 489 | if (ret) { |
| 490 | if (handle) | 490 | if (!IS_ERR(handle)) |
| 491 | ocfs2_commit_trans(osb, handle); | 491 | ocfs2_commit_trans(osb, handle); |
| 492 | handle = ERR_PTR(ret); | 492 | handle = ERR_PTR(ret); |
| 493 | } | 493 | } |
diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile index cdd162f13650..bc8c5e7d8608 100644 --- a/fs/ocfs2/cluster/Makefile +++ b/fs/ocfs2/cluster/Makefile | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o | 1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o |
| 2 | 2 | ||
| 3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ | 3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ |
| 4 | quorum.o tcp.o ver.o | 4 | quorum.o tcp.o netdebug.o ver.o |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c new file mode 100644 index 000000000000..7bf3c0ea7bd9 --- /dev/null +++ b/fs/ocfs2/cluster/netdebug.c | |||
| @@ -0,0 +1,441 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * netdebug.c | ||
| 5 | * | ||
| 6 | * debug functionality for o2net | ||
| 7 | * | ||
| 8 | * Copyright (C) 2005, 2008 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation; either | ||
| 13 | * version 2 of the License, or (at your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public | ||
| 21 | * License along with this program; if not, write to the | ||
| 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 23 | * Boston, MA 021110-1307, USA. | ||
| 24 | * | ||
| 25 | */ | ||
| 26 | |||
| 27 | #ifdef CONFIG_DEBUG_FS | ||
| 28 | |||
| 29 | #include <linux/module.h> | ||
| 30 | #include <linux/types.h> | ||
| 31 | #include <linux/slab.h> | ||
| 32 | #include <linux/idr.h> | ||
| 33 | #include <linux/kref.h> | ||
| 34 | #include <linux/seq_file.h> | ||
| 35 | #include <linux/debugfs.h> | ||
| 36 | |||
| 37 | #include <linux/uaccess.h> | ||
| 38 | |||
| 39 | #include "tcp.h" | ||
| 40 | #include "nodemanager.h" | ||
| 41 | #define MLOG_MASK_PREFIX ML_TCP | ||
| 42 | #include "masklog.h" | ||
| 43 | |||
| 44 | #include "tcp_internal.h" | ||
| 45 | |||
| 46 | #define O2NET_DEBUG_DIR "o2net" | ||
| 47 | #define SC_DEBUG_NAME "sock_containers" | ||
| 48 | #define NST_DEBUG_NAME "send_tracking" | ||
| 49 | |||
| 50 | static struct dentry *o2net_dentry; | ||
| 51 | static struct dentry *sc_dentry; | ||
| 52 | static struct dentry *nst_dentry; | ||
| 53 | |||
| 54 | static DEFINE_SPINLOCK(o2net_debug_lock); | ||
| 55 | |||
| 56 | static LIST_HEAD(sock_containers); | ||
| 57 | static LIST_HEAD(send_tracking); | ||
| 58 | |||
| 59 | void o2net_debug_add_nst(struct o2net_send_tracking *nst) | ||
| 60 | { | ||
| 61 | spin_lock(&o2net_debug_lock); | ||
| 62 | list_add(&nst->st_net_debug_item, &send_tracking); | ||
| 63 | spin_unlock(&o2net_debug_lock); | ||
| 64 | } | ||
| 65 | |||
| 66 | void o2net_debug_del_nst(struct o2net_send_tracking *nst) | ||
| 67 | { | ||
| 68 | spin_lock(&o2net_debug_lock); | ||
| 69 | if (!list_empty(&nst->st_net_debug_item)) | ||
| 70 | list_del_init(&nst->st_net_debug_item); | ||
| 71 | spin_unlock(&o2net_debug_lock); | ||
| 72 | } | ||
| 73 | |||
| 74 | static struct o2net_send_tracking | ||
| 75 | *next_nst(struct o2net_send_tracking *nst_start) | ||
| 76 | { | ||
| 77 | struct o2net_send_tracking *nst, *ret = NULL; | ||
| 78 | |||
| 79 | assert_spin_locked(&o2net_debug_lock); | ||
| 80 | |||
| 81 | list_for_each_entry(nst, &nst_start->st_net_debug_item, | ||
| 82 | st_net_debug_item) { | ||
| 83 | /* discover the head of the list */ | ||
| 84 | if (&nst->st_net_debug_item == &send_tracking) | ||
| 85 | break; | ||
| 86 | |||
| 87 | /* use st_task to detect real nsts in the list */ | ||
| 88 | if (nst->st_task != NULL) { | ||
| 89 | ret = nst; | ||
| 90 | break; | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | return ret; | ||
| 95 | } | ||
| 96 | |||
| 97 | static void *nst_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 98 | { | ||
| 99 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
| 100 | |||
| 101 | spin_lock(&o2net_debug_lock); | ||
| 102 | nst = next_nst(dummy_nst); | ||
| 103 | spin_unlock(&o2net_debug_lock); | ||
| 104 | |||
| 105 | return nst; | ||
| 106 | } | ||
| 107 | |||
| 108 | static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
| 109 | { | ||
| 110 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
| 111 | |||
| 112 | spin_lock(&o2net_debug_lock); | ||
| 113 | nst = next_nst(dummy_nst); | ||
| 114 | list_del_init(&dummy_nst->st_net_debug_item); | ||
| 115 | if (nst) | ||
| 116 | list_add(&dummy_nst->st_net_debug_item, | ||
| 117 | &nst->st_net_debug_item); | ||
| 118 | spin_unlock(&o2net_debug_lock); | ||
| 119 | |||
| 120 | return nst; /* unused, just needs to be null when done */ | ||
| 121 | } | ||
| 122 | |||
| 123 | static int nst_seq_show(struct seq_file *seq, void *v) | ||
| 124 | { | ||
| 125 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
| 126 | |||
| 127 | spin_lock(&o2net_debug_lock); | ||
| 128 | nst = next_nst(dummy_nst); | ||
| 129 | |||
| 130 | if (nst != NULL) { | ||
| 131 | /* get_task_comm isn't exported. oh well. */ | ||
| 132 | seq_printf(seq, "%p:\n" | ||
| 133 | " pid: %lu\n" | ||
| 134 | " tgid: %lu\n" | ||
| 135 | " process name: %s\n" | ||
| 136 | " node: %u\n" | ||
| 137 | " sc: %p\n" | ||
| 138 | " message id: %d\n" | ||
| 139 | " message type: %u\n" | ||
| 140 | " message key: 0x%08x\n" | ||
| 141 | " sock acquiry: %lu.%lu\n" | ||
| 142 | " send start: %lu.%lu\n" | ||
| 143 | " wait start: %lu.%lu\n", | ||
| 144 | nst, (unsigned long)nst->st_task->pid, | ||
| 145 | (unsigned long)nst->st_task->tgid, | ||
| 146 | nst->st_task->comm, nst->st_node, | ||
| 147 | nst->st_sc, nst->st_id, nst->st_msg_type, | ||
| 148 | nst->st_msg_key, | ||
| 149 | nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec, | ||
| 150 | nst->st_send_time.tv_sec, nst->st_send_time.tv_usec, | ||
| 151 | nst->st_status_time.tv_sec, | ||
| 152 | nst->st_status_time.tv_usec); | ||
| 153 | } | ||
| 154 | |||
| 155 | spin_unlock(&o2net_debug_lock); | ||
| 156 | |||
| 157 | return 0; | ||
| 158 | } | ||
| 159 | |||
| 160 | static void nst_seq_stop(struct seq_file *seq, void *v) | ||
| 161 | { | ||
| 162 | } | ||
| 163 | |||
| 164 | static struct seq_operations nst_seq_ops = { | ||
| 165 | .start = nst_seq_start, | ||
| 166 | .next = nst_seq_next, | ||
| 167 | .stop = nst_seq_stop, | ||
| 168 | .show = nst_seq_show, | ||
| 169 | }; | ||
| 170 | |||
| 171 | static int nst_fop_open(struct inode *inode, struct file *file) | ||
| 172 | { | ||
| 173 | struct o2net_send_tracking *dummy_nst; | ||
| 174 | struct seq_file *seq; | ||
| 175 | int ret; | ||
| 176 | |||
| 177 | dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL); | ||
| 178 | if (dummy_nst == NULL) { | ||
| 179 | ret = -ENOMEM; | ||
| 180 | goto out; | ||
| 181 | } | ||
| 182 | dummy_nst->st_task = NULL; | ||
| 183 | |||
| 184 | ret = seq_open(file, &nst_seq_ops); | ||
| 185 | if (ret) | ||
| 186 | goto out; | ||
| 187 | |||
| 188 | seq = file->private_data; | ||
| 189 | seq->private = dummy_nst; | ||
| 190 | o2net_debug_add_nst(dummy_nst); | ||
| 191 | |||
| 192 | dummy_nst = NULL; | ||
| 193 | |||
| 194 | out: | ||
| 195 | kfree(dummy_nst); | ||
| 196 | return ret; | ||
| 197 | } | ||
| 198 | |||
| 199 | static int nst_fop_release(struct inode *inode, struct file *file) | ||
| 200 | { | ||
| 201 | struct seq_file *seq = file->private_data; | ||
| 202 | struct o2net_send_tracking *dummy_nst = seq->private; | ||
| 203 | |||
| 204 | o2net_debug_del_nst(dummy_nst); | ||
| 205 | return seq_release_private(inode, file); | ||
| 206 | } | ||
| 207 | |||
| 208 | static struct file_operations nst_seq_fops = { | ||
| 209 | .open = nst_fop_open, | ||
| 210 | .read = seq_read, | ||
| 211 | .llseek = seq_lseek, | ||
| 212 | .release = nst_fop_release, | ||
| 213 | }; | ||
| 214 | |||
| 215 | void o2net_debug_add_sc(struct o2net_sock_container *sc) | ||
| 216 | { | ||
| 217 | spin_lock(&o2net_debug_lock); | ||
| 218 | list_add(&sc->sc_net_debug_item, &sock_containers); | ||
| 219 | spin_unlock(&o2net_debug_lock); | ||
| 220 | } | ||
| 221 | |||
| 222 | void o2net_debug_del_sc(struct o2net_sock_container *sc) | ||
| 223 | { | ||
| 224 | spin_lock(&o2net_debug_lock); | ||
| 225 | list_del_init(&sc->sc_net_debug_item); | ||
| 226 | spin_unlock(&o2net_debug_lock); | ||
| 227 | } | ||
| 228 | |||
| 229 | static struct o2net_sock_container | ||
| 230 | *next_sc(struct o2net_sock_container *sc_start) | ||
| 231 | { | ||
| 232 | struct o2net_sock_container *sc, *ret = NULL; | ||
| 233 | |||
| 234 | assert_spin_locked(&o2net_debug_lock); | ||
| 235 | |||
| 236 | list_for_each_entry(sc, &sc_start->sc_net_debug_item, | ||
| 237 | sc_net_debug_item) { | ||
| 238 | /* discover the head of the list miscast as a sc */ | ||
| 239 | if (&sc->sc_net_debug_item == &sock_containers) | ||
| 240 | break; | ||
| 241 | |||
| 242 | /* use sc_page to detect real scs in the list */ | ||
| 243 | if (sc->sc_page != NULL) { | ||
| 244 | ret = sc; | ||
| 245 | break; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | return ret; | ||
| 250 | } | ||
| 251 | |||
| 252 | static void *sc_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 253 | { | ||
| 254 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
| 255 | |||
| 256 | spin_lock(&o2net_debug_lock); | ||
| 257 | sc = next_sc(dummy_sc); | ||
| 258 | spin_unlock(&o2net_debug_lock); | ||
| 259 | |||
| 260 | return sc; | ||
| 261 | } | ||
| 262 | |||
| 263 | static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
| 264 | { | ||
| 265 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
| 266 | |||
| 267 | spin_lock(&o2net_debug_lock); | ||
| 268 | sc = next_sc(dummy_sc); | ||
| 269 | list_del_init(&dummy_sc->sc_net_debug_item); | ||
| 270 | if (sc) | ||
| 271 | list_add(&dummy_sc->sc_net_debug_item, &sc->sc_net_debug_item); | ||
| 272 | spin_unlock(&o2net_debug_lock); | ||
| 273 | |||
| 274 | return sc; /* unused, just needs to be null when done */ | ||
| 275 | } | ||
| 276 | |||
| 277 | #define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec | ||
| 278 | |||
| 279 | static int sc_seq_show(struct seq_file *seq, void *v) | ||
| 280 | { | ||
| 281 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
| 282 | |||
| 283 | spin_lock(&o2net_debug_lock); | ||
| 284 | sc = next_sc(dummy_sc); | ||
| 285 | |||
| 286 | if (sc != NULL) { | ||
| 287 | struct inet_sock *inet = NULL; | ||
| 288 | |||
| 289 | __be32 saddr = 0, daddr = 0; | ||
| 290 | __be16 sport = 0, dport = 0; | ||
| 291 | |||
| 292 | if (sc->sc_sock) { | ||
| 293 | inet = inet_sk(sc->sc_sock->sk); | ||
| 294 | /* the stack's structs aren't sparse endian clean */ | ||
| 295 | saddr = (__force __be32)inet->saddr; | ||
| 296 | daddr = (__force __be32)inet->daddr; | ||
| 297 | sport = (__force __be16)inet->sport; | ||
| 298 | dport = (__force __be16)inet->dport; | ||
| 299 | } | ||
| 300 | |||
| 301 | /* XXX sigh, inet-> doesn't have sparse annotation so any | ||
| 302 | * use of it here generates a warning with -Wbitwise */ | ||
| 303 | seq_printf(seq, "%p:\n" | ||
| 304 | " krefs: %d\n" | ||
| 305 | " sock: %u.%u.%u.%u:%u -> " | ||
| 306 | "%u.%u.%u.%u:%u\n" | ||
| 307 | " remote node: %s\n" | ||
| 308 | " page off: %zu\n" | ||
| 309 | " handshake ok: %u\n" | ||
| 310 | " timer: %lu.%lu\n" | ||
| 311 | " data ready: %lu.%lu\n" | ||
| 312 | " advance start: %lu.%lu\n" | ||
| 313 | " advance stop: %lu.%lu\n" | ||
| 314 | " func start: %lu.%lu\n" | ||
| 315 | " func stop: %lu.%lu\n" | ||
| 316 | " func key: %u\n" | ||
| 317 | " func type: %u\n", | ||
| 318 | sc, | ||
| 319 | atomic_read(&sc->sc_kref.refcount), | ||
| 320 | NIPQUAD(saddr), inet ? ntohs(sport) : 0, | ||
| 321 | NIPQUAD(daddr), inet ? ntohs(dport) : 0, | ||
| 322 | sc->sc_node->nd_name, | ||
| 323 | sc->sc_page_off, | ||
| 324 | sc->sc_handshake_ok, | ||
| 325 | TV_SEC_USEC(sc->sc_tv_timer), | ||
| 326 | TV_SEC_USEC(sc->sc_tv_data_ready), | ||
| 327 | TV_SEC_USEC(sc->sc_tv_advance_start), | ||
| 328 | TV_SEC_USEC(sc->sc_tv_advance_stop), | ||
| 329 | TV_SEC_USEC(sc->sc_tv_func_start), | ||
| 330 | TV_SEC_USEC(sc->sc_tv_func_stop), | ||
| 331 | sc->sc_msg_key, | ||
| 332 | sc->sc_msg_type); | ||
| 333 | } | ||
| 334 | |||
| 335 | |||
| 336 | spin_unlock(&o2net_debug_lock); | ||
| 337 | |||
| 338 | return 0; | ||
| 339 | } | ||
| 340 | |||
| 341 | static void sc_seq_stop(struct seq_file *seq, void *v) | ||
| 342 | { | ||
| 343 | } | ||
| 344 | |||
| 345 | static struct seq_operations sc_seq_ops = { | ||
| 346 | .start = sc_seq_start, | ||
| 347 | .next = sc_seq_next, | ||
| 348 | .stop = sc_seq_stop, | ||
| 349 | .show = sc_seq_show, | ||
| 350 | }; | ||
| 351 | |||
| 352 | static int sc_fop_open(struct inode *inode, struct file *file) | ||
| 353 | { | ||
| 354 | struct o2net_sock_container *dummy_sc; | ||
| 355 | struct seq_file *seq; | ||
| 356 | int ret; | ||
| 357 | |||
| 358 | dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL); | ||
| 359 | if (dummy_sc == NULL) { | ||
| 360 | ret = -ENOMEM; | ||
| 361 | goto out; | ||
| 362 | } | ||
| 363 | dummy_sc->sc_page = NULL; | ||
| 364 | |||
| 365 | ret = seq_open(file, &sc_seq_ops); | ||
| 366 | if (ret) | ||
| 367 | goto out; | ||
| 368 | |||
| 369 | seq = file->private_data; | ||
| 370 | seq->private = dummy_sc; | ||
| 371 | o2net_debug_add_sc(dummy_sc); | ||
| 372 | |||
| 373 | dummy_sc = NULL; | ||
| 374 | |||
| 375 | out: | ||
| 376 | kfree(dummy_sc); | ||
| 377 | return ret; | ||
| 378 | } | ||
| 379 | |||
| 380 | static int sc_fop_release(struct inode *inode, struct file *file) | ||
| 381 | { | ||
| 382 | struct seq_file *seq = file->private_data; | ||
| 383 | struct o2net_sock_container *dummy_sc = seq->private; | ||
| 384 | |||
| 385 | o2net_debug_del_sc(dummy_sc); | ||
| 386 | return seq_release_private(inode, file); | ||
| 387 | } | ||
| 388 | |||
| 389 | static struct file_operations sc_seq_fops = { | ||
| 390 | .open = sc_fop_open, | ||
| 391 | .read = seq_read, | ||
| 392 | .llseek = seq_lseek, | ||
| 393 | .release = sc_fop_release, | ||
| 394 | }; | ||
| 395 | |||
| 396 | int o2net_debugfs_init(void) | ||
| 397 | { | ||
| 398 | o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); | ||
| 399 | if (!o2net_dentry) { | ||
| 400 | mlog_errno(-ENOMEM); | ||
| 401 | goto bail; | ||
| 402 | } | ||
| 403 | |||
| 404 | nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR, | ||
| 405 | o2net_dentry, NULL, | ||
| 406 | &nst_seq_fops); | ||
| 407 | if (!nst_dentry) { | ||
| 408 | mlog_errno(-ENOMEM); | ||
| 409 | goto bail; | ||
| 410 | } | ||
| 411 | |||
| 412 | sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR, | ||
| 413 | o2net_dentry, NULL, | ||
| 414 | &sc_seq_fops); | ||
| 415 | if (!sc_dentry) { | ||
| 416 | mlog_errno(-ENOMEM); | ||
| 417 | goto bail; | ||
| 418 | } | ||
| 419 | |||
| 420 | return 0; | ||
| 421 | bail: | ||
| 422 | if (sc_dentry) | ||
| 423 | debugfs_remove(sc_dentry); | ||
| 424 | if (nst_dentry) | ||
| 425 | debugfs_remove(nst_dentry); | ||
| 426 | if (o2net_dentry) | ||
| 427 | debugfs_remove(o2net_dentry); | ||
| 428 | return -ENOMEM; | ||
| 429 | } | ||
| 430 | |||
| 431 | void o2net_debugfs_exit(void) | ||
| 432 | { | ||
| 433 | if (sc_dentry) | ||
| 434 | debugfs_remove(sc_dentry); | ||
| 435 | if (nst_dentry) | ||
| 436 | debugfs_remove(nst_dentry); | ||
| 437 | if (o2net_dentry) | ||
| 438 | debugfs_remove(o2net_dentry); | ||
| 439 | } | ||
| 440 | |||
| 441 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 709fba25bf7e..cf9401e8cd0b 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
| @@ -959,7 +959,10 @@ static int __init init_o2nm(void) | |||
| 959 | cluster_print_version(); | 959 | cluster_print_version(); |
| 960 | 960 | ||
| 961 | o2hb_init(); | 961 | o2hb_init(); |
| 962 | o2net_init(); | 962 | |
| 963 | ret = o2net_init(); | ||
| 964 | if (ret) | ||
| 965 | goto out; | ||
| 963 | 966 | ||
| 964 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); | 967 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); |
| 965 | if (!ocfs2_table_header) { | 968 | if (!ocfs2_table_header) { |
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 0c095ce7723d..98429fd68499 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
| @@ -57,6 +57,7 @@ static struct kset *o2cb_kset; | |||
| 57 | void o2cb_sys_shutdown(void) | 57 | void o2cb_sys_shutdown(void) |
| 58 | { | 58 | { |
| 59 | mlog_sys_shutdown(); | 59 | mlog_sys_shutdown(); |
| 60 | sysfs_remove_link(NULL, "o2cb"); | ||
| 60 | kset_unregister(o2cb_kset); | 61 | kset_unregister(o2cb_kset); |
| 61 | } | 62 | } |
| 62 | 63 | ||
| @@ -68,6 +69,14 @@ int o2cb_sys_init(void) | |||
| 68 | if (!o2cb_kset) | 69 | if (!o2cb_kset) |
| 69 | return -ENOMEM; | 70 | return -ENOMEM; |
| 70 | 71 | ||
| 72 | /* | ||
| 73 | * Create this symlink for backwards compatibility with old | ||
| 74 | * versions of ocfs2-tools which look for things in /sys/o2cb. | ||
| 75 | */ | ||
| 76 | ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb"); | ||
| 77 | if (ret) | ||
| 78 | goto error; | ||
| 79 | |||
| 71 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); | 80 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); |
| 72 | if (ret) | 81 | if (ret) |
| 73 | goto error; | 82 | goto error; |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index b8057c51b205..1e44ad14881a 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -142,23 +142,65 @@ static void o2net_idle_timer(unsigned long data); | |||
| 142 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | 142 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); |
| 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); |
| 144 | 144 | ||
| 145 | /* | 145 | static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, |
| 146 | * FIXME: These should use to_o2nm_cluster_from_node(), but we end up | 146 | u32 msgkey, struct task_struct *task, u8 node) |
| 147 | * losing our parent link to the cluster during shutdown. This can be | 147 | { |
| 148 | * solved by adding a pre-removal callback to configfs, or passing | 148 | #ifdef CONFIG_DEBUG_FS |
| 149 | * around the cluster with the node. -jeffm | 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); |
| 150 | */ | 150 | nst->st_task = task; |
| 151 | static inline int o2net_reconnect_delay(struct o2nm_node *node) | 151 | nst->st_msg_type = msgtype; |
| 152 | nst->st_msg_key = msgkey; | ||
| 153 | nst->st_node = node; | ||
| 154 | #endif | ||
| 155 | } | ||
| 156 | |||
| 157 | static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
| 158 | { | ||
| 159 | #ifdef CONFIG_DEBUG_FS | ||
| 160 | do_gettimeofday(&nst->st_sock_time); | ||
| 161 | #endif | ||
| 162 | } | ||
| 163 | |||
| 164 | static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
| 165 | { | ||
| 166 | #ifdef CONFIG_DEBUG_FS | ||
| 167 | do_gettimeofday(&nst->st_send_time); | ||
| 168 | #endif | ||
| 169 | } | ||
| 170 | |||
| 171 | static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
| 172 | { | ||
| 173 | #ifdef CONFIG_DEBUG_FS | ||
| 174 | do_gettimeofday(&nst->st_status_time); | ||
| 175 | #endif | ||
| 176 | } | ||
| 177 | |||
| 178 | static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
| 179 | struct o2net_sock_container *sc) | ||
| 180 | { | ||
| 181 | #ifdef CONFIG_DEBUG_FS | ||
| 182 | nst->st_sc = sc; | ||
| 183 | #endif | ||
| 184 | } | ||
| 185 | |||
| 186 | static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) | ||
| 187 | { | ||
| 188 | #ifdef CONFIG_DEBUG_FS | ||
| 189 | nst->st_id = msg_id; | ||
| 190 | #endif | ||
| 191 | } | ||
| 192 | |||
| 193 | static inline int o2net_reconnect_delay(void) | ||
| 152 | { | 194 | { |
| 153 | return o2nm_single_cluster->cl_reconnect_delay_ms; | 195 | return o2nm_single_cluster->cl_reconnect_delay_ms; |
| 154 | } | 196 | } |
| 155 | 197 | ||
| 156 | static inline int o2net_keepalive_delay(struct o2nm_node *node) | 198 | static inline int o2net_keepalive_delay(void) |
| 157 | { | 199 | { |
| 158 | return o2nm_single_cluster->cl_keepalive_delay_ms; | 200 | return o2nm_single_cluster->cl_keepalive_delay_ms; |
| 159 | } | 201 | } |
| 160 | 202 | ||
| 161 | static inline int o2net_idle_timeout(struct o2nm_node *node) | 203 | static inline int o2net_idle_timeout(void) |
| 162 | { | 204 | { |
| 163 | return o2nm_single_cluster->cl_idle_timeout_ms; | 205 | return o2nm_single_cluster->cl_idle_timeout_ms; |
| 164 | } | 206 | } |
| @@ -296,6 +338,7 @@ static void sc_kref_release(struct kref *kref) | |||
| 296 | o2nm_node_put(sc->sc_node); | 338 | o2nm_node_put(sc->sc_node); |
| 297 | sc->sc_node = NULL; | 339 | sc->sc_node = NULL; |
| 298 | 340 | ||
| 341 | o2net_debug_del_sc(sc); | ||
| 299 | kfree(sc); | 342 | kfree(sc); |
| 300 | } | 343 | } |
| 301 | 344 | ||
| @@ -336,6 +379,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) | |||
| 336 | 379 | ||
| 337 | ret = sc; | 380 | ret = sc; |
| 338 | sc->sc_page = page; | 381 | sc->sc_page = page; |
| 382 | o2net_debug_add_sc(sc); | ||
| 339 | sc = NULL; | 383 | sc = NULL; |
| 340 | page = NULL; | 384 | page = NULL; |
| 341 | 385 | ||
| @@ -399,8 +443,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 399 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); | 443 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); |
| 400 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); | 444 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); |
| 401 | 445 | ||
| 402 | /* we won't reconnect after our valid conn goes away for | ||
| 403 | * this hb iteration.. here so it shows up in the logs */ | ||
| 404 | if (was_valid && !valid && err == 0) | 446 | if (was_valid && !valid && err == 0) |
| 405 | err = -ENOTCONN; | 447 | err = -ENOTCONN; |
| 406 | 448 | ||
| @@ -430,11 +472,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 430 | 472 | ||
| 431 | if (!was_valid && valid) { | 473 | if (!was_valid && valid) { |
| 432 | o2quo_conn_up(o2net_num_from_nn(nn)); | 474 | o2quo_conn_up(o2net_num_from_nn(nn)); |
| 433 | /* this is a bit of a hack. we only try reconnecting | ||
| 434 | * when heartbeating starts until we get a connection. | ||
| 435 | * if that connection then dies we don't try reconnecting. | ||
| 436 | * the only way to start connecting again is to down | ||
| 437 | * heartbeat and bring it back up. */ | ||
| 438 | cancel_delayed_work(&nn->nn_connect_expired); | 475 | cancel_delayed_work(&nn->nn_connect_expired); |
| 439 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", | 476 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", |
| 440 | o2nm_this_node() > sc->sc_node->nd_num ? | 477 | o2nm_this_node() > sc->sc_node->nd_num ? |
| @@ -451,12 +488,24 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 451 | /* delay if we're withing a RECONNECT_DELAY of the | 488 | /* delay if we're withing a RECONNECT_DELAY of the |
| 452 | * last attempt */ | 489 | * last attempt */ |
| 453 | delay = (nn->nn_last_connect_attempt + | 490 | delay = (nn->nn_last_connect_attempt + |
| 454 | msecs_to_jiffies(o2net_reconnect_delay(NULL))) | 491 | msecs_to_jiffies(o2net_reconnect_delay())) |
| 455 | - jiffies; | 492 | - jiffies; |
| 456 | if (delay > msecs_to_jiffies(o2net_reconnect_delay(NULL))) | 493 | if (delay > msecs_to_jiffies(o2net_reconnect_delay())) |
| 457 | delay = 0; | 494 | delay = 0; |
| 458 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 495 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
| 459 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 496 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
| 497 | |||
| 498 | /* | ||
| 499 | * Delay the expired work after idle timeout. | ||
| 500 | * | ||
| 501 | * We might have lots of failed connection attempts that run | ||
| 502 | * through here but we only cancel the connect_expired work when | ||
| 503 | * a connection attempt succeeds. So only the first enqueue of | ||
| 504 | * the connect_expired work will do anything. The rest will see | ||
| 505 | * that it's already queued and do nothing. | ||
| 506 | */ | ||
| 507 | delay += msecs_to_jiffies(o2net_idle_timeout()); | ||
| 508 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); | ||
| 460 | } | 509 | } |
| 461 | 510 | ||
| 462 | /* keep track of the nn's sc ref for the caller */ | 511 | /* keep track of the nn's sc ref for the caller */ |
| @@ -914,6 +963,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 914 | struct o2net_status_wait nsw = { | 963 | struct o2net_status_wait nsw = { |
| 915 | .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), | 964 | .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), |
| 916 | }; | 965 | }; |
| 966 | struct o2net_send_tracking nst; | ||
| 967 | |||
| 968 | o2net_init_nst(&nst, msg_type, key, current, target_node); | ||
| 917 | 969 | ||
| 918 | if (o2net_wq == NULL) { | 970 | if (o2net_wq == NULL) { |
| 919 | mlog(0, "attempt to tx without o2netd running\n"); | 971 | mlog(0, "attempt to tx without o2netd running\n"); |
| @@ -939,6 +991,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 939 | goto out; | 991 | goto out; |
| 940 | } | 992 | } |
| 941 | 993 | ||
| 994 | o2net_debug_add_nst(&nst); | ||
| 995 | |||
| 996 | o2net_set_nst_sock_time(&nst); | ||
| 997 | |||
| 942 | ret = wait_event_interruptible(nn->nn_sc_wq, | 998 | ret = wait_event_interruptible(nn->nn_sc_wq, |
| 943 | o2net_tx_can_proceed(nn, &sc, &error)); | 999 | o2net_tx_can_proceed(nn, &sc, &error)); |
| 944 | if (!ret && error) | 1000 | if (!ret && error) |
| @@ -946,6 +1002,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 946 | if (ret) | 1002 | if (ret) |
| 947 | goto out; | 1003 | goto out; |
| 948 | 1004 | ||
| 1005 | o2net_set_nst_sock_container(&nst, sc); | ||
| 1006 | |||
| 949 | veclen = caller_veclen + 1; | 1007 | veclen = caller_veclen + 1; |
| 950 | vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); | 1008 | vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); |
| 951 | if (vec == NULL) { | 1009 | if (vec == NULL) { |
| @@ -972,6 +1030,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 972 | goto out; | 1030 | goto out; |
| 973 | 1031 | ||
| 974 | msg->msg_num = cpu_to_be32(nsw.ns_id); | 1032 | msg->msg_num = cpu_to_be32(nsw.ns_id); |
| 1033 | o2net_set_nst_msg_id(&nst, nsw.ns_id); | ||
| 1034 | |||
| 1035 | o2net_set_nst_send_time(&nst); | ||
| 975 | 1036 | ||
| 976 | /* finally, convert the message header to network byte-order | 1037 | /* finally, convert the message header to network byte-order |
| 977 | * and send */ | 1038 | * and send */ |
| @@ -986,6 +1047,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 986 | } | 1047 | } |
| 987 | 1048 | ||
| 988 | /* wait on other node's handler */ | 1049 | /* wait on other node's handler */ |
| 1050 | o2net_set_nst_status_time(&nst); | ||
| 989 | wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); | 1051 | wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); |
| 990 | 1052 | ||
| 991 | /* Note that we avoid overwriting the callers status return | 1053 | /* Note that we avoid overwriting the callers status return |
| @@ -998,6 +1060,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 998 | mlog(0, "woken, returning system status %d, user status %d\n", | 1060 | mlog(0, "woken, returning system status %d, user status %d\n", |
| 999 | ret, nsw.ns_status); | 1061 | ret, nsw.ns_status); |
| 1000 | out: | 1062 | out: |
| 1063 | o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ | ||
| 1001 | if (sc) | 1064 | if (sc) |
| 1002 | sc_put(sc); | 1065 | sc_put(sc); |
| 1003 | if (vec) | 1066 | if (vec) |
| @@ -1154,23 +1217,23 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
| 1154 | * but isn't. This can ultimately cause corruption. | 1217 | * but isn't. This can ultimately cause corruption. |
| 1155 | */ | 1218 | */ |
| 1156 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != | 1219 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != |
| 1157 | o2net_idle_timeout(sc->sc_node)) { | 1220 | o2net_idle_timeout()) { |
| 1158 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " | 1221 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " |
| 1159 | "%u ms, but we use %u ms locally. disconnecting\n", | 1222 | "%u ms, but we use %u ms locally. disconnecting\n", |
| 1160 | SC_NODEF_ARGS(sc), | 1223 | SC_NODEF_ARGS(sc), |
| 1161 | be32_to_cpu(hand->o2net_idle_timeout_ms), | 1224 | be32_to_cpu(hand->o2net_idle_timeout_ms), |
| 1162 | o2net_idle_timeout(sc->sc_node)); | 1225 | o2net_idle_timeout()); |
| 1163 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1226 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| 1164 | return -1; | 1227 | return -1; |
| 1165 | } | 1228 | } |
| 1166 | 1229 | ||
| 1167 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != | 1230 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != |
| 1168 | o2net_keepalive_delay(sc->sc_node)) { | 1231 | o2net_keepalive_delay()) { |
| 1169 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " | 1232 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " |
| 1170 | "%u ms, but we use %u ms locally. disconnecting\n", | 1233 | "%u ms, but we use %u ms locally. disconnecting\n", |
| 1171 | SC_NODEF_ARGS(sc), | 1234 | SC_NODEF_ARGS(sc), |
| 1172 | be32_to_cpu(hand->o2net_keepalive_delay_ms), | 1235 | be32_to_cpu(hand->o2net_keepalive_delay_ms), |
| 1173 | o2net_keepalive_delay(sc->sc_node)); | 1236 | o2net_keepalive_delay()); |
| 1174 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1237 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| 1175 | return -1; | 1238 | return -1; |
| 1176 | } | 1239 | } |
| @@ -1193,6 +1256,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
| 1193 | * shut down already */ | 1256 | * shut down already */ |
| 1194 | if (nn->nn_sc == sc) { | 1257 | if (nn->nn_sc == sc) { |
| 1195 | o2net_sc_reset_idle_timer(sc); | 1258 | o2net_sc_reset_idle_timer(sc); |
| 1259 | atomic_set(&nn->nn_timeout, 0); | ||
| 1196 | o2net_set_nn_state(nn, sc, 1, 0); | 1260 | o2net_set_nn_state(nn, sc, 1, 0); |
| 1197 | } | 1261 | } |
| 1198 | spin_unlock(&nn->nn_lock); | 1262 | spin_unlock(&nn->nn_lock); |
| @@ -1347,12 +1411,11 @@ static void o2net_initialize_handshake(void) | |||
| 1347 | { | 1411 | { |
| 1348 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( | 1412 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( |
| 1349 | O2HB_MAX_WRITE_TIMEOUT_MS); | 1413 | O2HB_MAX_WRITE_TIMEOUT_MS); |
| 1350 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( | 1414 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(o2net_idle_timeout()); |
| 1351 | o2net_idle_timeout(NULL)); | ||
| 1352 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( | 1415 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( |
| 1353 | o2net_keepalive_delay(NULL)); | 1416 | o2net_keepalive_delay()); |
| 1354 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( | 1417 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( |
| 1355 | o2net_reconnect_delay(NULL)); | 1418 | o2net_reconnect_delay()); |
| 1356 | } | 1419 | } |
| 1357 | 1420 | ||
| 1358 | /* ------------------------------------------------------------ */ | 1421 | /* ------------------------------------------------------------ */ |
| @@ -1391,14 +1454,15 @@ static void o2net_sc_send_keep_req(struct work_struct *work) | |||
| 1391 | static void o2net_idle_timer(unsigned long data) | 1454 | static void o2net_idle_timer(unsigned long data) |
| 1392 | { | 1455 | { |
| 1393 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; | 1456 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; |
| 1457 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | ||
| 1394 | struct timeval now; | 1458 | struct timeval now; |
| 1395 | 1459 | ||
| 1396 | do_gettimeofday(&now); | 1460 | do_gettimeofday(&now); |
| 1397 | 1461 | ||
| 1398 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " | 1462 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " |
| 1399 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), | 1463 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), |
| 1400 | o2net_idle_timeout(sc->sc_node) / 1000, | 1464 | o2net_idle_timeout() / 1000, |
| 1401 | o2net_idle_timeout(sc->sc_node) % 1000); | 1465 | o2net_idle_timeout() % 1000); |
| 1402 | mlog(ML_NOTICE, "here are some times that might help debug the " | 1466 | mlog(ML_NOTICE, "here are some times that might help debug the " |
| 1403 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " | 1467 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |
| 1404 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", | 1468 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", |
| @@ -1413,6 +1477,12 @@ static void o2net_idle_timer(unsigned long data) | |||
| 1413 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, | 1477 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, |
| 1414 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); | 1478 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); |
| 1415 | 1479 | ||
| 1480 | /* | ||
| 1481 | * Initialize the nn_timeout so that the next connection attempt | ||
| 1482 | * will continue in o2net_start_connect. | ||
| 1483 | */ | ||
| 1484 | atomic_set(&nn->nn_timeout, 1); | ||
| 1485 | |||
| 1416 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1486 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
| 1417 | } | 1487 | } |
| 1418 | 1488 | ||
| @@ -1420,10 +1490,10 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) | |||
| 1420 | { | 1490 | { |
| 1421 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); | 1491 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); |
| 1422 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, | 1492 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, |
| 1423 | msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); | 1493 | msecs_to_jiffies(o2net_keepalive_delay())); |
| 1424 | do_gettimeofday(&sc->sc_tv_timer); | 1494 | do_gettimeofday(&sc->sc_tv_timer); |
| 1425 | mod_timer(&sc->sc_idle_timeout, | 1495 | mod_timer(&sc->sc_idle_timeout, |
| 1426 | jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); | 1496 | jiffies + msecs_to_jiffies(o2net_idle_timeout())); |
| 1427 | } | 1497 | } |
| 1428 | 1498 | ||
| 1429 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | 1499 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) |
| @@ -1447,6 +1517,7 @@ static void o2net_start_connect(struct work_struct *work) | |||
| 1447 | struct socket *sock = NULL; | 1517 | struct socket *sock = NULL; |
| 1448 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1518 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
| 1449 | int ret = 0, stop; | 1519 | int ret = 0, stop; |
| 1520 | unsigned int timeout; | ||
| 1450 | 1521 | ||
| 1451 | /* if we're greater we initiate tx, otherwise we accept */ | 1522 | /* if we're greater we initiate tx, otherwise we accept */ |
| 1452 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 1523 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) |
| @@ -1466,8 +1537,17 @@ static void o2net_start_connect(struct work_struct *work) | |||
| 1466 | } | 1537 | } |
| 1467 | 1538 | ||
| 1468 | spin_lock(&nn->nn_lock); | 1539 | spin_lock(&nn->nn_lock); |
| 1469 | /* see if we already have one pending or have given up */ | 1540 | /* |
| 1470 | stop = (nn->nn_sc || nn->nn_persistent_error); | 1541 | * see if we already have one pending or have given up. |
| 1542 | * For nn_timeout, it is set when we close the connection | ||
| 1543 | * because of the idle time out. So it means that we have | ||
| 1544 | * at least connected to that node successfully once, | ||
| 1545 | * now try to connect to it again. | ||
| 1546 | */ | ||
| 1547 | timeout = atomic_read(&nn->nn_timeout); | ||
| 1548 | stop = (nn->nn_sc || | ||
| 1549 | (nn->nn_persistent_error && | ||
| 1550 | (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); | ||
| 1471 | spin_unlock(&nn->nn_lock); | 1551 | spin_unlock(&nn->nn_lock); |
| 1472 | if (stop) | 1552 | if (stop) |
| 1473 | goto out; | 1553 | goto out; |
| @@ -1555,8 +1635,8 @@ static void o2net_connect_expired(struct work_struct *work) | |||
| 1555 | mlog(ML_ERROR, "no connection established with node %u after " | 1635 | mlog(ML_ERROR, "no connection established with node %u after " |
| 1556 | "%u.%u seconds, giving up and returning errors.\n", | 1636 | "%u.%u seconds, giving up and returning errors.\n", |
| 1557 | o2net_num_from_nn(nn), | 1637 | o2net_num_from_nn(nn), |
| 1558 | o2net_idle_timeout(NULL) / 1000, | 1638 | o2net_idle_timeout() / 1000, |
| 1559 | o2net_idle_timeout(NULL) % 1000); | 1639 | o2net_idle_timeout() % 1000); |
| 1560 | 1640 | ||
| 1561 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1641 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
| 1562 | } | 1642 | } |
| @@ -1579,6 +1659,7 @@ void o2net_disconnect_node(struct o2nm_node *node) | |||
| 1579 | 1659 | ||
| 1580 | /* don't reconnect until it's heartbeating again */ | 1660 | /* don't reconnect until it's heartbeating again */ |
| 1581 | spin_lock(&nn->nn_lock); | 1661 | spin_lock(&nn->nn_lock); |
| 1662 | atomic_set(&nn->nn_timeout, 0); | ||
| 1582 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1663 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
| 1583 | spin_unlock(&nn->nn_lock); | 1664 | spin_unlock(&nn->nn_lock); |
| 1584 | 1665 | ||
| @@ -1610,20 +1691,15 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
| 1610 | 1691 | ||
| 1611 | /* ensure an immediate connect attempt */ | 1692 | /* ensure an immediate connect attempt */ |
| 1612 | nn->nn_last_connect_attempt = jiffies - | 1693 | nn->nn_last_connect_attempt = jiffies - |
| 1613 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); | 1694 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); |
| 1614 | 1695 | ||
| 1615 | if (node_num != o2nm_this_node()) { | 1696 | if (node_num != o2nm_this_node()) { |
| 1616 | /* heartbeat doesn't work unless a local node number is | ||
| 1617 | * configured and doing so brings up the o2net_wq, so we can | ||
| 1618 | * use it.. */ | ||
| 1619 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | ||
| 1620 | msecs_to_jiffies(o2net_idle_timeout(node))); | ||
| 1621 | |||
| 1622 | /* believe it or not, accept and node hearbeating testing | 1697 | /* believe it or not, accept and node hearbeating testing |
| 1623 | * can succeed for this node before we got here.. so | 1698 | * can succeed for this node before we got here.. so |
| 1624 | * only use set_nn_state to clear the persistent error | 1699 | * only use set_nn_state to clear the persistent error |
| 1625 | * if that hasn't already happened */ | 1700 | * if that hasn't already happened */ |
| 1626 | spin_lock(&nn->nn_lock); | 1701 | spin_lock(&nn->nn_lock); |
| 1702 | atomic_set(&nn->nn_timeout, 0); | ||
| 1627 | if (nn->nn_persistent_error) | 1703 | if (nn->nn_persistent_error) |
| 1628 | o2net_set_nn_state(nn, NULL, 0, 0); | 1704 | o2net_set_nn_state(nn, NULL, 0, 0); |
| 1629 | spin_unlock(&nn->nn_lock); | 1705 | spin_unlock(&nn->nn_lock); |
| @@ -1747,6 +1823,7 @@ static int o2net_accept_one(struct socket *sock) | |||
| 1747 | new_sock = NULL; | 1823 | new_sock = NULL; |
| 1748 | 1824 | ||
| 1749 | spin_lock(&nn->nn_lock); | 1825 | spin_lock(&nn->nn_lock); |
| 1826 | atomic_set(&nn->nn_timeout, 0); | ||
| 1750 | o2net_set_nn_state(nn, sc, 0, 0); | 1827 | o2net_set_nn_state(nn, sc, 0, 0); |
| 1751 | spin_unlock(&nn->nn_lock); | 1828 | spin_unlock(&nn->nn_lock); |
| 1752 | 1829 | ||
| @@ -1922,6 +1999,9 @@ int o2net_init(void) | |||
| 1922 | 1999 | ||
| 1923 | o2quo_init(); | 2000 | o2quo_init(); |
| 1924 | 2001 | ||
| 2002 | if (o2net_debugfs_init()) | ||
| 2003 | return -ENOMEM; | ||
| 2004 | |||
| 1925 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); | 2005 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); |
| 1926 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2006 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
| 1927 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2007 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
| @@ -1941,6 +2021,7 @@ int o2net_init(void) | |||
| 1941 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { | 2021 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { |
| 1942 | struct o2net_node *nn = o2net_nn_from_num(i); | 2022 | struct o2net_node *nn = o2net_nn_from_num(i); |
| 1943 | 2023 | ||
| 2024 | atomic_set(&nn->nn_timeout, 0); | ||
| 1944 | spin_lock_init(&nn->nn_lock); | 2025 | spin_lock_init(&nn->nn_lock); |
| 1945 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); | 2026 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); |
| 1946 | INIT_DELAYED_WORK(&nn->nn_connect_expired, | 2027 | INIT_DELAYED_WORK(&nn->nn_connect_expired, |
| @@ -1962,4 +2043,5 @@ void o2net_exit(void) | |||
| 1962 | kfree(o2net_hand); | 2043 | kfree(o2net_hand); |
| 1963 | kfree(o2net_keep_req); | 2044 | kfree(o2net_keep_req); |
| 1964 | kfree(o2net_keep_resp); | 2045 | kfree(o2net_keep_resp); |
| 2046 | o2net_debugfs_exit(); | ||
| 1965 | } | 2047 | } |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index f36f66aab3dd..a705d5d19036 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
| @@ -117,4 +117,36 @@ int o2net_num_connected_peers(void); | |||
| 117 | int o2net_init(void); | 117 | int o2net_init(void); |
| 118 | void o2net_exit(void); | 118 | void o2net_exit(void); |
| 119 | 119 | ||
| 120 | struct o2net_send_tracking; | ||
| 121 | struct o2net_sock_container; | ||
| 122 | |||
| 123 | #ifdef CONFIG_DEBUG_FS | ||
| 124 | int o2net_debugfs_init(void); | ||
| 125 | void o2net_debugfs_exit(void); | ||
| 126 | void o2net_debug_add_nst(struct o2net_send_tracking *nst); | ||
| 127 | void o2net_debug_del_nst(struct o2net_send_tracking *nst); | ||
| 128 | void o2net_debug_add_sc(struct o2net_sock_container *sc); | ||
| 129 | void o2net_debug_del_sc(struct o2net_sock_container *sc); | ||
| 130 | #else | ||
| 131 | static int o2net_debugfs_init(void) | ||
| 132 | { | ||
| 133 | return 0; | ||
| 134 | } | ||
| 135 | static void o2net_debugfs_exit(void) | ||
| 136 | { | ||
| 137 | } | ||
| 138 | static void o2net_debug_add_nst(struct o2net_send_tracking *nst) | ||
| 139 | { | ||
| 140 | } | ||
| 141 | static void o2net_debug_del_nst(struct o2net_send_tracking *nst) | ||
| 142 | { | ||
| 143 | } | ||
| 144 | static void o2net_debug_add_sc(struct o2net_sock_container *sc) | ||
| 145 | { | ||
| 146 | } | ||
| 147 | static void o2net_debug_del_sc(struct o2net_sock_container *sc) | ||
| 148 | { | ||
| 149 | } | ||
| 150 | #endif /* CONFIG_DEBUG_FS */ | ||
| 151 | |||
| 120 | #endif /* O2CLUSTER_TCP_H */ | 152 | #endif /* O2CLUSTER_TCP_H */ |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index d25b9af28500..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
| @@ -95,6 +95,8 @@ struct o2net_node { | |||
| 95 | unsigned nn_sc_valid:1; | 95 | unsigned nn_sc_valid:1; |
| 96 | /* if this is set tx just returns it */ | 96 | /* if this is set tx just returns it */ |
| 97 | int nn_persistent_error; | 97 | int nn_persistent_error; |
| 98 | /* It is only set to 1 after the idle time out. */ | ||
| 99 | atomic_t nn_timeout; | ||
| 98 | 100 | ||
| 99 | /* threads waiting for an sc to arrive wait on the wq for generation | 101 | /* threads waiting for an sc to arrive wait on the wq for generation |
| 100 | * to increase. it is increased when a connecting socket succeeds | 102 | * to increase. it is increased when a connecting socket succeeds |
| @@ -164,7 +166,9 @@ struct o2net_sock_container { | |||
| 164 | /* original handlers for the sockets */ | 166 | /* original handlers for the sockets */ |
| 165 | void (*sc_state_change)(struct sock *sk); | 167 | void (*sc_state_change)(struct sock *sk); |
| 166 | void (*sc_data_ready)(struct sock *sk, int bytes); | 168 | void (*sc_data_ready)(struct sock *sk, int bytes); |
| 167 | 169 | #ifdef CONFIG_DEBUG_FS | |
| 170 | struct list_head sc_net_debug_item; | ||
| 171 | #endif | ||
| 168 | struct timeval sc_tv_timer; | 172 | struct timeval sc_tv_timer; |
| 169 | struct timeval sc_tv_data_ready; | 173 | struct timeval sc_tv_data_ready; |
| 170 | struct timeval sc_tv_advance_start; | 174 | struct timeval sc_tv_advance_start; |
| @@ -206,4 +210,24 @@ struct o2net_status_wait { | |||
| 206 | struct list_head ns_node_item; | 210 | struct list_head ns_node_item; |
| 207 | }; | 211 | }; |
| 208 | 212 | ||
| 213 | #ifdef CONFIG_DEBUG_FS | ||
| 214 | /* just for state dumps */ | ||
| 215 | struct o2net_send_tracking { | ||
| 216 | struct list_head st_net_debug_item; | ||
| 217 | struct task_struct *st_task; | ||
| 218 | struct o2net_sock_container *st_sc; | ||
| 219 | u32 st_id; | ||
| 220 | u32 st_msg_type; | ||
| 221 | u32 st_msg_key; | ||
| 222 | u8 st_node; | ||
| 223 | struct timeval st_sock_time; | ||
| 224 | struct timeval st_send_time; | ||
| 225 | struct timeval st_status_time; | ||
| 226 | }; | ||
| 227 | #else | ||
| 228 | struct o2net_send_tracking { | ||
| 229 | u32 dummy; | ||
| 230 | }; | ||
| 231 | #endif /* CONFIG_DEBUG_FS */ | ||
| 232 | |||
| 209 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ | 233 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index ce3f7c29d270..190361375700 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | EXTRA_CFLAGS += -Ifs/ocfs2 | 1 | EXTRA_CFLAGS += -Ifs/ocfs2 |
| 2 | 2 | ||
| 3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o ocfs2_dlmfs.o | 3 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o |
| 4 | 4 | ||
| 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ | 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ |
| 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o | 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index dc8ea666efdb..d5a86fb81a49 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -49,6 +49,41 @@ | |||
| 49 | /* Intended to make it easier for us to switch out hash functions */ | 49 | /* Intended to make it easier for us to switch out hash functions */ |
| 50 | #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) | 50 | #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) |
| 51 | 51 | ||
| 52 | enum dlm_mle_type { | ||
| 53 | DLM_MLE_BLOCK, | ||
| 54 | DLM_MLE_MASTER, | ||
| 55 | DLM_MLE_MIGRATION | ||
| 56 | }; | ||
| 57 | |||
| 58 | struct dlm_lock_name { | ||
| 59 | u8 len; | ||
| 60 | u8 name[DLM_LOCKID_NAME_MAX]; | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct dlm_master_list_entry { | ||
| 64 | struct list_head list; | ||
| 65 | struct list_head hb_events; | ||
| 66 | struct dlm_ctxt *dlm; | ||
| 67 | spinlock_t spinlock; | ||
| 68 | wait_queue_head_t wq; | ||
| 69 | atomic_t woken; | ||
| 70 | struct kref mle_refs; | ||
| 71 | int inuse; | ||
| 72 | unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 73 | unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 74 | unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 75 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 76 | u8 master; | ||
| 77 | u8 new_master; | ||
| 78 | enum dlm_mle_type type; | ||
| 79 | struct o2hb_callback_func mle_hb_up; | ||
| 80 | struct o2hb_callback_func mle_hb_down; | ||
| 81 | union { | ||
| 82 | struct dlm_lock_resource *res; | ||
| 83 | struct dlm_lock_name name; | ||
| 84 | } u; | ||
| 85 | }; | ||
| 86 | |||
| 52 | enum dlm_ast_type { | 87 | enum dlm_ast_type { |
| 53 | DLM_AST = 0, | 88 | DLM_AST = 0, |
| 54 | DLM_BAST, | 89 | DLM_BAST, |
| @@ -101,6 +136,7 @@ struct dlm_ctxt | |||
| 101 | struct list_head purge_list; | 136 | struct list_head purge_list; |
| 102 | struct list_head pending_asts; | 137 | struct list_head pending_asts; |
| 103 | struct list_head pending_basts; | 138 | struct list_head pending_basts; |
| 139 | struct list_head tracking_list; | ||
| 104 | unsigned int purge_count; | 140 | unsigned int purge_count; |
| 105 | spinlock_t spinlock; | 141 | spinlock_t spinlock; |
| 106 | spinlock_t ast_lock; | 142 | spinlock_t ast_lock; |
| @@ -122,6 +158,9 @@ struct dlm_ctxt | |||
| 122 | atomic_t remote_resources; | 158 | atomic_t remote_resources; |
| 123 | atomic_t unknown_resources; | 159 | atomic_t unknown_resources; |
| 124 | 160 | ||
| 161 | struct dlm_debug_ctxt *dlm_debug_ctxt; | ||
| 162 | struct dentry *dlm_debugfs_subroot; | ||
| 163 | |||
| 125 | /* NOTE: Next three are protected by dlm_domain_lock */ | 164 | /* NOTE: Next three are protected by dlm_domain_lock */ |
| 126 | struct kref dlm_refs; | 165 | struct kref dlm_refs; |
| 127 | enum dlm_ctxt_state dlm_state; | 166 | enum dlm_ctxt_state dlm_state; |
| @@ -270,6 +309,9 @@ struct dlm_lock_resource | |||
| 270 | struct list_head dirty; | 309 | struct list_head dirty; |
| 271 | struct list_head recovering; // dlm_recovery_ctxt.resources list | 310 | struct list_head recovering; // dlm_recovery_ctxt.resources list |
| 272 | 311 | ||
| 312 | /* Added during init and removed during release */ | ||
| 313 | struct list_head tracking; /* dlm->tracking_list */ | ||
| 314 | |||
| 273 | /* unused lock resources have their last_used stamped and are | 315 | /* unused lock resources have their last_used stamped and are |
| 274 | * put on a list for the dlm thread to run. */ | 316 | * put on a list for the dlm thread to run. */ |
| 275 | unsigned long last_used; | 317 | unsigned long last_used; |
| @@ -963,9 +1005,16 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
| 963 | DLM_LOCK_RES_MIGRATING)); | 1005 | DLM_LOCK_RES_MIGRATING)); |
| 964 | } | 1006 | } |
| 965 | 1007 | ||
| 1008 | /* create/destroy slab caches */ | ||
| 1009 | int dlm_init_master_caches(void); | ||
| 1010 | void dlm_destroy_master_caches(void); | ||
| 1011 | |||
| 1012 | int dlm_init_lock_cache(void); | ||
| 1013 | void dlm_destroy_lock_cache(void); | ||
| 966 | 1014 | ||
| 967 | int dlm_init_mle_cache(void); | 1015 | int dlm_init_mle_cache(void); |
| 968 | void dlm_destroy_mle_cache(void); | 1016 | void dlm_destroy_mle_cache(void); |
| 1017 | |||
| 969 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); | 1018 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); |
| 970 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | 1019 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, |
| 971 | struct dlm_lock_resource *res); | 1020 | struct dlm_lock_resource *res); |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 64239b37e5d4..5f6d858770a2 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * | 5 | * |
| 6 | * debug functionality for the dlm | 6 | * debug functionality for the dlm |
| 7 | * | 7 | * |
| 8 | * Copyright (C) 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2004, 2008 Oracle. All rights reserved. |
| 9 | * | 9 | * |
| 10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
| 11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
| 31 | #include <linux/sysctl.h> | 31 | #include <linux/sysctl.h> |
| 32 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
| 33 | #include <linux/debugfs.h> | ||
| 33 | 34 | ||
| 34 | #include "cluster/heartbeat.h" | 35 | #include "cluster/heartbeat.h" |
| 35 | #include "cluster/nodemanager.h" | 36 | #include "cluster/nodemanager.h" |
| @@ -37,17 +38,16 @@ | |||
| 37 | 38 | ||
| 38 | #include "dlmapi.h" | 39 | #include "dlmapi.h" |
| 39 | #include "dlmcommon.h" | 40 | #include "dlmcommon.h" |
| 40 | |||
| 41 | #include "dlmdomain.h" | 41 | #include "dlmdomain.h" |
| 42 | #include "dlmdebug.h" | ||
| 42 | 43 | ||
| 43 | #define MLOG_MASK_PREFIX ML_DLM | 44 | #define MLOG_MASK_PREFIX ML_DLM |
| 44 | #include "cluster/masklog.h" | 45 | #include "cluster/masklog.h" |
| 45 | 46 | ||
| 47 | int stringify_lockname(const char *lockname, int locklen, char *buf, int len); | ||
| 48 | |||
| 46 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 49 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
| 47 | { | 50 | { |
| 48 | mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", | ||
| 49 | res->lockname.len, res->lockname.name, | ||
| 50 | res->owner, res->state); | ||
| 51 | spin_lock(&res->spinlock); | 51 | spin_lock(&res->spinlock); |
| 52 | __dlm_print_one_lock_resource(res); | 52 | __dlm_print_one_lock_resource(res); |
| 53 | spin_unlock(&res->spinlock); | 53 | spin_unlock(&res->spinlock); |
| @@ -58,7 +58,7 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | |||
| 58 | int bit; | 58 | int bit; |
| 59 | assert_spin_locked(&res->spinlock); | 59 | assert_spin_locked(&res->spinlock); |
| 60 | 60 | ||
| 61 | mlog(ML_NOTICE, " refmap nodes: [ "); | 61 | printk(" refmap nodes: [ "); |
| 62 | bit = 0; | 62 | bit = 0; |
| 63 | while (1) { | 63 | while (1) { |
| 64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); | 64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); |
| @@ -70,63 +70,66 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | |||
| 70 | printk("], inflight=%u\n", res->inflight_locks); | 70 | printk("], inflight=%u\n", res->inflight_locks); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | static void __dlm_print_lock(struct dlm_lock *lock) | ||
| 74 | { | ||
| 75 | spin_lock(&lock->spinlock); | ||
| 76 | |||
| 77 | printk(" type=%d, conv=%d, node=%u, cookie=%u:%llu, " | ||
| 78 | "ref=%u, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c), " | ||
| 79 | "pending=(conv=%c,lock=%c,cancel=%c,unlock=%c)\n", | ||
| 80 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 81 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 82 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 83 | atomic_read(&lock->lock_refs.refcount), | ||
| 84 | (list_empty(&lock->ast_list) ? 'y' : 'n'), | ||
| 85 | (lock->ast_pending ? 'y' : 'n'), | ||
| 86 | (list_empty(&lock->bast_list) ? 'y' : 'n'), | ||
| 87 | (lock->bast_pending ? 'y' : 'n'), | ||
| 88 | (lock->convert_pending ? 'y' : 'n'), | ||
| 89 | (lock->lock_pending ? 'y' : 'n'), | ||
| 90 | (lock->cancel_pending ? 'y' : 'n'), | ||
| 91 | (lock->unlock_pending ? 'y' : 'n')); | ||
| 92 | |||
| 93 | spin_unlock(&lock->spinlock); | ||
| 94 | } | ||
| 95 | |||
| 73 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 96 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
| 74 | { | 97 | { |
| 75 | struct list_head *iter2; | 98 | struct list_head *iter2; |
| 76 | struct dlm_lock *lock; | 99 | struct dlm_lock *lock; |
| 100 | char buf[DLM_LOCKID_NAME_MAX]; | ||
| 77 | 101 | ||
| 78 | assert_spin_locked(&res->spinlock); | 102 | assert_spin_locked(&res->spinlock); |
| 79 | 103 | ||
| 80 | mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", | 104 | stringify_lockname(res->lockname.name, res->lockname.len, |
| 81 | res->lockname.len, res->lockname.name, | 105 | buf, sizeof(buf) - 1); |
| 82 | res->owner, res->state); | 106 | printk("lockres: %s, owner=%u, state=%u\n", |
| 83 | mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", | 107 | buf, res->owner, res->state); |
| 84 | res->last_used, list_empty(&res->purge) ? "no" : "yes"); | 108 | printk(" last used: %lu, refcnt: %u, on purge list: %s\n", |
| 109 | res->last_used, atomic_read(&res->refs.refcount), | ||
| 110 | list_empty(&res->purge) ? "no" : "yes"); | ||
| 111 | printk(" on dirty list: %s, on reco list: %s, " | ||
| 112 | "migrating pending: %s\n", | ||
| 113 | list_empty(&res->dirty) ? "no" : "yes", | ||
| 114 | list_empty(&res->recovering) ? "no" : "yes", | ||
| 115 | res->migration_pending ? "yes" : "no"); | ||
| 116 | printk(" inflight locks: %d, asts reserved: %d\n", | ||
| 117 | res->inflight_locks, atomic_read(&res->asts_reserved)); | ||
| 85 | dlm_print_lockres_refmap(res); | 118 | dlm_print_lockres_refmap(res); |
| 86 | mlog(ML_NOTICE, " granted queue: \n"); | 119 | printk(" granted queue:\n"); |
| 87 | list_for_each(iter2, &res->granted) { | 120 | list_for_each(iter2, &res->granted) { |
| 88 | lock = list_entry(iter2, struct dlm_lock, list); | 121 | lock = list_entry(iter2, struct dlm_lock, list); |
| 89 | spin_lock(&lock->spinlock); | 122 | __dlm_print_lock(lock); |
| 90 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
| 91 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
| 92 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 93 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 94 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 95 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
| 96 | lock->ast_pending ? 'y' : 'n', | ||
| 97 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
| 98 | lock->bast_pending ? 'y' : 'n'); | ||
| 99 | spin_unlock(&lock->spinlock); | ||
| 100 | } | 123 | } |
| 101 | mlog(ML_NOTICE, " converting queue: \n"); | 124 | printk(" converting queue:\n"); |
| 102 | list_for_each(iter2, &res->converting) { | 125 | list_for_each(iter2, &res->converting) { |
| 103 | lock = list_entry(iter2, struct dlm_lock, list); | 126 | lock = list_entry(iter2, struct dlm_lock, list); |
| 104 | spin_lock(&lock->spinlock); | 127 | __dlm_print_lock(lock); |
| 105 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
| 106 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
| 107 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 108 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 109 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 110 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
| 111 | lock->ast_pending ? 'y' : 'n', | ||
| 112 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
| 113 | lock->bast_pending ? 'y' : 'n'); | ||
| 114 | spin_unlock(&lock->spinlock); | ||
| 115 | } | 128 | } |
| 116 | mlog(ML_NOTICE, " blocked queue: \n"); | 129 | printk(" blocked queue:\n"); |
| 117 | list_for_each(iter2, &res->blocked) { | 130 | list_for_each(iter2, &res->blocked) { |
| 118 | lock = list_entry(iter2, struct dlm_lock, list); | 131 | lock = list_entry(iter2, struct dlm_lock, list); |
| 119 | spin_lock(&lock->spinlock); | 132 | __dlm_print_lock(lock); |
| 120 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
| 121 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
| 122 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 123 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 124 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 125 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
| 126 | lock->ast_pending ? 'y' : 'n', | ||
| 127 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
| 128 | lock->bast_pending ? 'y' : 'n'); | ||
| 129 | spin_unlock(&lock->spinlock); | ||
| 130 | } | 133 | } |
| 131 | } | 134 | } |
| 132 | 135 | ||
| @@ -136,31 +139,6 @@ void dlm_print_one_lock(struct dlm_lock *lockid) | |||
| 136 | } | 139 | } |
| 137 | EXPORT_SYMBOL_GPL(dlm_print_one_lock); | 140 | EXPORT_SYMBOL_GPL(dlm_print_one_lock); |
| 138 | 141 | ||
| 139 | #if 0 | ||
| 140 | void dlm_dump_lock_resources(struct dlm_ctxt *dlm) | ||
| 141 | { | ||
| 142 | struct dlm_lock_resource *res; | ||
| 143 | struct hlist_node *iter; | ||
| 144 | struct hlist_head *bucket; | ||
| 145 | int i; | ||
| 146 | |||
| 147 | mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n", | ||
| 148 | dlm->name, dlm->node_num, dlm->key); | ||
| 149 | if (!dlm || !dlm->name) { | ||
| 150 | mlog(ML_ERROR, "dlm=%p\n", dlm); | ||
| 151 | return; | ||
| 152 | } | ||
| 153 | |||
| 154 | spin_lock(&dlm->spinlock); | ||
| 155 | for (i=0; i<DLM_HASH_BUCKETS; i++) { | ||
| 156 | bucket = dlm_lockres_hash(dlm, i); | ||
| 157 | hlist_for_each_entry(res, iter, bucket, hash_node) | ||
| 158 | dlm_print_one_lock_resource(res); | ||
| 159 | } | ||
| 160 | spin_unlock(&dlm->spinlock); | ||
| 161 | } | ||
| 162 | #endif /* 0 */ | ||
| 163 | |||
| 164 | static const char *dlm_errnames[] = { | 142 | static const char *dlm_errnames[] = { |
| 165 | [DLM_NORMAL] = "DLM_NORMAL", | 143 | [DLM_NORMAL] = "DLM_NORMAL", |
| 166 | [DLM_GRANTED] = "DLM_GRANTED", | 144 | [DLM_GRANTED] = "DLM_GRANTED", |
| @@ -266,3 +244,792 @@ const char *dlm_errname(enum dlm_status err) | |||
| 266 | return dlm_errnames[err]; | 244 | return dlm_errnames[err]; |
| 267 | } | 245 | } |
| 268 | EXPORT_SYMBOL_GPL(dlm_errname); | 246 | EXPORT_SYMBOL_GPL(dlm_errname); |
| 247 | |||
| 248 | /* NOTE: This function converts a lockname into a string. It uses knowledge | ||
| 249 | * of the format of the lockname that should be outside the purview of the dlm. | ||
| 250 | * We are adding only to make dlm debugging slightly easier. | ||
| 251 | * | ||
| 252 | * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. | ||
| 253 | */ | ||
| 254 | int stringify_lockname(const char *lockname, int locklen, char *buf, int len) | ||
| 255 | { | ||
| 256 | int out = 0; | ||
| 257 | __be64 inode_blkno_be; | ||
| 258 | |||
| 259 | #define OCFS2_DENTRY_LOCK_INO_START 18 | ||
| 260 | if (*lockname == 'N') { | ||
| 261 | memcpy((__be64 *)&inode_blkno_be, | ||
| 262 | (char *)&lockname[OCFS2_DENTRY_LOCK_INO_START], | ||
| 263 | sizeof(__be64)); | ||
| 264 | out += snprintf(buf + out, len - out, "%.*s%08x", | ||
| 265 | OCFS2_DENTRY_LOCK_INO_START - 1, lockname, | ||
| 266 | (unsigned int)be64_to_cpu(inode_blkno_be)); | ||
| 267 | } else | ||
| 268 | out += snprintf(buf + out, len - out, "%.*s", | ||
| 269 | locklen, lockname); | ||
| 270 | return out; | ||
| 271 | } | ||
| 272 | |||
| 273 | static int stringify_nodemap(unsigned long *nodemap, int maxnodes, | ||
| 274 | char *buf, int len) | ||
| 275 | { | ||
| 276 | int out = 0; | ||
| 277 | int i = -1; | ||
| 278 | |||
| 279 | while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes) | ||
| 280 | out += snprintf(buf + out, len - out, "%d ", i); | ||
| 281 | |||
| 282 | return out; | ||
| 283 | } | ||
| 284 | |||
| 285 | static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) | ||
| 286 | { | ||
| 287 | int out = 0; | ||
| 288 | unsigned int namelen; | ||
| 289 | const char *name; | ||
| 290 | char *mle_type; | ||
| 291 | |||
| 292 | if (mle->type != DLM_MLE_MASTER) { | ||
| 293 | namelen = mle->u.name.len; | ||
| 294 | name = mle->u.name.name; | ||
| 295 | } else { | ||
| 296 | namelen = mle->u.res->lockname.len; | ||
| 297 | name = mle->u.res->lockname.name; | ||
| 298 | } | ||
| 299 | |||
| 300 | if (mle->type == DLM_MLE_BLOCK) | ||
| 301 | mle_type = "BLK"; | ||
| 302 | else if (mle->type == DLM_MLE_MASTER) | ||
| 303 | mle_type = "MAS"; | ||
| 304 | else | ||
| 305 | mle_type = "MIG"; | ||
| 306 | |||
| 307 | out += stringify_lockname(name, namelen, buf + out, len - out); | ||
| 308 | out += snprintf(buf + out, len - out, | ||
| 309 | "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", | ||
| 310 | mle_type, mle->master, mle->new_master, | ||
| 311 | !list_empty(&mle->hb_events), | ||
| 312 | !!mle->inuse, | ||
| 313 | atomic_read(&mle->mle_refs.refcount)); | ||
| 314 | |||
| 315 | out += snprintf(buf + out, len - out, "Maybe="); | ||
| 316 | out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, | ||
| 317 | buf + out, len - out); | ||
| 318 | out += snprintf(buf + out, len - out, "\n"); | ||
| 319 | |||
| 320 | out += snprintf(buf + out, len - out, "Vote="); | ||
| 321 | out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES, | ||
| 322 | buf + out, len - out); | ||
| 323 | out += snprintf(buf + out, len - out, "\n"); | ||
| 324 | |||
| 325 | out += snprintf(buf + out, len - out, "Response="); | ||
| 326 | out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES, | ||
| 327 | buf + out, len - out); | ||
| 328 | out += snprintf(buf + out, len - out, "\n"); | ||
| 329 | |||
| 330 | out += snprintf(buf + out, len - out, "Node="); | ||
| 331 | out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES, | ||
| 332 | buf + out, len - out); | ||
| 333 | out += snprintf(buf + out, len - out, "\n"); | ||
| 334 | |||
| 335 | out += snprintf(buf + out, len - out, "\n"); | ||
| 336 | |||
| 337 | return out; | ||
| 338 | } | ||
| 339 | |||
| 340 | void dlm_print_one_mle(struct dlm_master_list_entry *mle) | ||
| 341 | { | ||
| 342 | char *buf; | ||
| 343 | |||
| 344 | buf = (char *) get_zeroed_page(GFP_NOFS); | ||
| 345 | if (buf) { | ||
| 346 | dump_mle(mle, buf, PAGE_SIZE - 1); | ||
| 347 | free_page((unsigned long)buf); | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | #ifdef CONFIG_DEBUG_FS | ||
| 352 | |||
| 353 | static struct dentry *dlm_debugfs_root = NULL; | ||
| 354 | |||
| 355 | #define DLM_DEBUGFS_DIR "o2dlm" | ||
| 356 | #define DLM_DEBUGFS_DLM_STATE "dlm_state" | ||
| 357 | #define DLM_DEBUGFS_LOCKING_STATE "locking_state" | ||
| 358 | #define DLM_DEBUGFS_MLE_STATE "mle_state" | ||
| 359 | #define DLM_DEBUGFS_PURGE_LIST "purge_list" | ||
| 360 | |||
| 361 | /* begin - utils funcs */ | ||
| 362 | static void dlm_debug_free(struct kref *kref) | ||
| 363 | { | ||
| 364 | struct dlm_debug_ctxt *dc; | ||
| 365 | |||
| 366 | dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt); | ||
| 367 | |||
| 368 | kfree(dc); | ||
| 369 | } | ||
| 370 | |||
| 371 | void dlm_debug_put(struct dlm_debug_ctxt *dc) | ||
| 372 | { | ||
| 373 | if (dc) | ||
| 374 | kref_put(&dc->debug_refcnt, dlm_debug_free); | ||
| 375 | } | ||
| 376 | |||
| 377 | static void dlm_debug_get(struct dlm_debug_ctxt *dc) | ||
| 378 | { | ||
| 379 | kref_get(&dc->debug_refcnt); | ||
| 380 | } | ||
| 381 | |||
| 382 | static struct debug_buffer *debug_buffer_allocate(void) | ||
| 383 | { | ||
| 384 | struct debug_buffer *db = NULL; | ||
| 385 | |||
| 386 | db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL); | ||
| 387 | if (!db) | ||
| 388 | goto bail; | ||
| 389 | |||
| 390 | db->len = PAGE_SIZE; | ||
| 391 | db->buf = kmalloc(db->len, GFP_KERNEL); | ||
| 392 | if (!db->buf) | ||
| 393 | goto bail; | ||
| 394 | |||
| 395 | return db; | ||
| 396 | bail: | ||
| 397 | kfree(db); | ||
| 398 | return NULL; | ||
| 399 | } | ||
| 400 | |||
| 401 | static ssize_t debug_buffer_read(struct file *file, char __user *buf, | ||
| 402 | size_t nbytes, loff_t *ppos) | ||
| 403 | { | ||
| 404 | struct debug_buffer *db = file->private_data; | ||
| 405 | |||
| 406 | return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len); | ||
| 407 | } | ||
| 408 | |||
| 409 | static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) | ||
| 410 | { | ||
| 411 | struct debug_buffer *db = file->private_data; | ||
| 412 | loff_t new = -1; | ||
| 413 | |||
| 414 | switch (whence) { | ||
| 415 | case 0: | ||
| 416 | new = off; | ||
| 417 | break; | ||
| 418 | case 1: | ||
| 419 | new = file->f_pos + off; | ||
| 420 | break; | ||
| 421 | } | ||
| 422 | |||
| 423 | if (new < 0 || new > db->len) | ||
| 424 | return -EINVAL; | ||
| 425 | |||
| 426 | return (file->f_pos = new); | ||
| 427 | } | ||
| 428 | |||
| 429 | static int debug_buffer_release(struct inode *inode, struct file *file) | ||
| 430 | { | ||
| 431 | struct debug_buffer *db = (struct debug_buffer *)file->private_data; | ||
| 432 | |||
| 433 | if (db) | ||
| 434 | kfree(db->buf); | ||
| 435 | kfree(db); | ||
| 436 | |||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | /* end - util funcs */ | ||
| 440 | |||
| 441 | /* begin - purge list funcs */ | ||
| 442 | static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
| 443 | { | ||
| 444 | struct dlm_lock_resource *res; | ||
| 445 | int out = 0; | ||
| 446 | unsigned long total = 0; | ||
| 447 | |||
| 448 | out += snprintf(db->buf + out, db->len - out, | ||
| 449 | "Dumping Purgelist for Domain: %s\n", dlm->name); | ||
| 450 | |||
| 451 | spin_lock(&dlm->spinlock); | ||
| 452 | list_for_each_entry(res, &dlm->purge_list, purge) { | ||
| 453 | ++total; | ||
| 454 | if (db->len - out < 100) | ||
| 455 | continue; | ||
| 456 | spin_lock(&res->spinlock); | ||
| 457 | out += stringify_lockname(res->lockname.name, | ||
| 458 | res->lockname.len, | ||
| 459 | db->buf + out, db->len - out); | ||
| 460 | out += snprintf(db->buf + out, db->len - out, "\t%ld\n", | ||
| 461 | (jiffies - res->last_used)/HZ); | ||
| 462 | spin_unlock(&res->spinlock); | ||
| 463 | } | ||
| 464 | spin_unlock(&dlm->spinlock); | ||
| 465 | |||
| 466 | out += snprintf(db->buf + out, db->len - out, | ||
| 467 | "Total on list: %ld\n", total); | ||
| 468 | |||
| 469 | return out; | ||
| 470 | } | ||
| 471 | |||
| 472 | static int debug_purgelist_open(struct inode *inode, struct file *file) | ||
| 473 | { | ||
| 474 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 475 | struct debug_buffer *db; | ||
| 476 | |||
| 477 | db = debug_buffer_allocate(); | ||
| 478 | if (!db) | ||
| 479 | goto bail; | ||
| 480 | |||
| 481 | db->len = debug_purgelist_print(dlm, db); | ||
| 482 | |||
| 483 | file->private_data = db; | ||
| 484 | |||
| 485 | return 0; | ||
| 486 | bail: | ||
| 487 | return -ENOMEM; | ||
| 488 | } | ||
| 489 | |||
| 490 | static struct file_operations debug_purgelist_fops = { | ||
| 491 | .open = debug_purgelist_open, | ||
| 492 | .release = debug_buffer_release, | ||
| 493 | .read = debug_buffer_read, | ||
| 494 | .llseek = debug_buffer_llseek, | ||
| 495 | }; | ||
| 496 | /* end - purge list funcs */ | ||
| 497 | |||
| 498 | /* begin - debug mle funcs */ | ||
| 499 | static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
| 500 | { | ||
| 501 | struct dlm_master_list_entry *mle; | ||
| 502 | int out = 0; | ||
| 503 | unsigned long total = 0; | ||
| 504 | |||
| 505 | out += snprintf(db->buf + out, db->len - out, | ||
| 506 | "Dumping MLEs for Domain: %s\n", dlm->name); | ||
| 507 | |||
| 508 | spin_lock(&dlm->master_lock); | ||
| 509 | list_for_each_entry(mle, &dlm->master_list, list) { | ||
| 510 | ++total; | ||
| 511 | if (db->len - out < 200) | ||
| 512 | continue; | ||
| 513 | out += dump_mle(mle, db->buf + out, db->len - out); | ||
| 514 | } | ||
| 515 | spin_unlock(&dlm->master_lock); | ||
| 516 | |||
| 517 | out += snprintf(db->buf + out, db->len - out, | ||
| 518 | "Total on list: %ld\n", total); | ||
| 519 | return out; | ||
| 520 | } | ||
| 521 | |||
| 522 | static int debug_mle_open(struct inode *inode, struct file *file) | ||
| 523 | { | ||
| 524 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 525 | struct debug_buffer *db; | ||
| 526 | |||
| 527 | db = debug_buffer_allocate(); | ||
| 528 | if (!db) | ||
| 529 | goto bail; | ||
| 530 | |||
| 531 | db->len = debug_mle_print(dlm, db); | ||
| 532 | |||
| 533 | file->private_data = db; | ||
| 534 | |||
| 535 | return 0; | ||
| 536 | bail: | ||
| 537 | return -ENOMEM; | ||
| 538 | } | ||
| 539 | |||
| 540 | static struct file_operations debug_mle_fops = { | ||
| 541 | .open = debug_mle_open, | ||
| 542 | .release = debug_buffer_release, | ||
| 543 | .read = debug_buffer_read, | ||
| 544 | .llseek = debug_buffer_llseek, | ||
| 545 | }; | ||
| 546 | |||
| 547 | /* end - debug mle funcs */ | ||
| 548 | |||
| 549 | /* begin - debug lockres funcs */ | ||
| 550 | static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) | ||
| 551 | { | ||
| 552 | int out; | ||
| 553 | |||
| 554 | #define DEBUG_LOCK_VERSION 1 | ||
| 555 | spin_lock(&lock->spinlock); | ||
| 556 | out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d," | ||
| 557 | "%d,%d,%d,%d\n", | ||
| 558 | DEBUG_LOCK_VERSION, | ||
| 559 | list_type, lock->ml.type, lock->ml.convert_type, | ||
| 560 | lock->ml.node, | ||
| 561 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 562 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 563 | !list_empty(&lock->ast_list), | ||
| 564 | !list_empty(&lock->bast_list), | ||
| 565 | lock->ast_pending, lock->bast_pending, | ||
| 566 | lock->convert_pending, lock->lock_pending, | ||
| 567 | lock->cancel_pending, lock->unlock_pending, | ||
| 568 | atomic_read(&lock->lock_refs.refcount)); | ||
| 569 | spin_unlock(&lock->spinlock); | ||
| 570 | |||
| 571 | return out; | ||
| 572 | } | ||
| 573 | |||
| 574 | static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) | ||
| 575 | { | ||
| 576 | struct dlm_lock *lock; | ||
| 577 | int i; | ||
| 578 | int out = 0; | ||
| 579 | |||
| 580 | out += snprintf(buf + out, len - out, "NAME:"); | ||
| 581 | out += stringify_lockname(res->lockname.name, res->lockname.len, | ||
| 582 | buf + out, len - out); | ||
| 583 | out += snprintf(buf + out, len - out, "\n"); | ||
| 584 | |||
| 585 | #define DEBUG_LRES_VERSION 1 | ||
| 586 | out += snprintf(buf + out, len - out, | ||
| 587 | "LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n", | ||
| 588 | DEBUG_LRES_VERSION, | ||
| 589 | res->owner, res->state, res->last_used, | ||
| 590 | !list_empty(&res->purge), | ||
| 591 | !list_empty(&res->dirty), | ||
| 592 | !list_empty(&res->recovering), | ||
| 593 | res->inflight_locks, res->migration_pending, | ||
| 594 | atomic_read(&res->asts_reserved), | ||
| 595 | atomic_read(&res->refs.refcount)); | ||
| 596 | |||
| 597 | /* refmap */ | ||
| 598 | out += snprintf(buf + out, len - out, "RMAP:"); | ||
| 599 | out += stringify_nodemap(res->refmap, O2NM_MAX_NODES, | ||
| 600 | buf + out, len - out); | ||
| 601 | out += snprintf(buf + out, len - out, "\n"); | ||
| 602 | |||
| 603 | /* lvb */ | ||
| 604 | out += snprintf(buf + out, len - out, "LVBX:"); | ||
| 605 | for (i = 0; i < DLM_LVB_LEN; i++) | ||
| 606 | out += snprintf(buf + out, len - out, | ||
| 607 | "%02x", (unsigned char)res->lvb[i]); | ||
| 608 | out += snprintf(buf + out, len - out, "\n"); | ||
| 609 | |||
| 610 | /* granted */ | ||
| 611 | list_for_each_entry(lock, &res->granted, list) | ||
| 612 | out += dump_lock(lock, 0, buf + out, len - out); | ||
| 613 | |||
| 614 | /* converting */ | ||
| 615 | list_for_each_entry(lock, &res->converting, list) | ||
| 616 | out += dump_lock(lock, 1, buf + out, len - out); | ||
| 617 | |||
| 618 | /* blocked */ | ||
| 619 | list_for_each_entry(lock, &res->blocked, list) | ||
| 620 | out += dump_lock(lock, 2, buf + out, len - out); | ||
| 621 | |||
| 622 | out += snprintf(buf + out, len - out, "\n"); | ||
| 623 | |||
| 624 | return out; | ||
| 625 | } | ||
| 626 | |||
| 627 | static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | ||
| 628 | { | ||
| 629 | struct debug_lockres *dl = m->private; | ||
| 630 | struct dlm_ctxt *dlm = dl->dl_ctxt; | ||
| 631 | struct dlm_lock_resource *res = NULL; | ||
| 632 | |||
| 633 | spin_lock(&dlm->spinlock); | ||
| 634 | |||
| 635 | if (dl->dl_res) { | ||
| 636 | list_for_each_entry(res, &dl->dl_res->tracking, tracking) { | ||
| 637 | if (dl->dl_res) { | ||
| 638 | dlm_lockres_put(dl->dl_res); | ||
| 639 | dl->dl_res = NULL; | ||
| 640 | } | ||
| 641 | if (&res->tracking == &dlm->tracking_list) { | ||
| 642 | mlog(0, "End of list found, %p\n", res); | ||
| 643 | dl = NULL; | ||
| 644 | break; | ||
| 645 | } | ||
| 646 | dlm_lockres_get(res); | ||
| 647 | dl->dl_res = res; | ||
| 648 | break; | ||
| 649 | } | ||
| 650 | } else { | ||
| 651 | if (!list_empty(&dlm->tracking_list)) { | ||
| 652 | list_for_each_entry(res, &dlm->tracking_list, tracking) | ||
| 653 | break; | ||
| 654 | dlm_lockres_get(res); | ||
| 655 | dl->dl_res = res; | ||
| 656 | } else | ||
| 657 | dl = NULL; | ||
| 658 | } | ||
| 659 | |||
| 660 | if (dl) { | ||
| 661 | spin_lock(&dl->dl_res->spinlock); | ||
| 662 | dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1); | ||
| 663 | spin_unlock(&dl->dl_res->spinlock); | ||
| 664 | } | ||
| 665 | |||
| 666 | spin_unlock(&dlm->spinlock); | ||
| 667 | |||
| 668 | return dl; | ||
| 669 | } | ||
| 670 | |||
| 671 | static void lockres_seq_stop(struct seq_file *m, void *v) | ||
| 672 | { | ||
| 673 | } | ||
| 674 | |||
| 675 | static void *lockres_seq_next(struct seq_file *m, void *v, loff_t *pos) | ||
| 676 | { | ||
| 677 | return NULL; | ||
| 678 | } | ||
| 679 | |||
| 680 | static int lockres_seq_show(struct seq_file *s, void *v) | ||
| 681 | { | ||
| 682 | struct debug_lockres *dl = (struct debug_lockres *)v; | ||
| 683 | |||
| 684 | seq_printf(s, "%s", dl->dl_buf); | ||
| 685 | |||
| 686 | return 0; | ||
| 687 | } | ||
| 688 | |||
| 689 | static struct seq_operations debug_lockres_ops = { | ||
| 690 | .start = lockres_seq_start, | ||
| 691 | .stop = lockres_seq_stop, | ||
| 692 | .next = lockres_seq_next, | ||
| 693 | .show = lockres_seq_show, | ||
| 694 | }; | ||
| 695 | |||
| 696 | static int debug_lockres_open(struct inode *inode, struct file *file) | ||
| 697 | { | ||
| 698 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 699 | int ret = -ENOMEM; | ||
| 700 | struct seq_file *seq; | ||
| 701 | struct debug_lockres *dl = NULL; | ||
| 702 | |||
| 703 | dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL); | ||
| 704 | if (!dl) { | ||
| 705 | mlog_errno(ret); | ||
| 706 | goto bail; | ||
| 707 | } | ||
| 708 | |||
| 709 | dl->dl_len = PAGE_SIZE; | ||
| 710 | dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL); | ||
| 711 | if (!dl->dl_buf) { | ||
| 712 | mlog_errno(ret); | ||
| 713 | goto bail; | ||
| 714 | } | ||
| 715 | |||
| 716 | ret = seq_open(file, &debug_lockres_ops); | ||
| 717 | if (ret) { | ||
| 718 | mlog_errno(ret); | ||
| 719 | goto bail; | ||
| 720 | } | ||
| 721 | |||
| 722 | seq = (struct seq_file *) file->private_data; | ||
| 723 | seq->private = dl; | ||
| 724 | |||
| 725 | dlm_grab(dlm); | ||
| 726 | dl->dl_ctxt = dlm; | ||
| 727 | |||
| 728 | return 0; | ||
| 729 | bail: | ||
| 730 | if (dl) | ||
| 731 | kfree(dl->dl_buf); | ||
| 732 | kfree(dl); | ||
| 733 | return ret; | ||
| 734 | } | ||
| 735 | |||
| 736 | static int debug_lockres_release(struct inode *inode, struct file *file) | ||
| 737 | { | ||
| 738 | struct seq_file *seq = (struct seq_file *)file->private_data; | ||
| 739 | struct debug_lockres *dl = (struct debug_lockres *)seq->private; | ||
| 740 | |||
| 741 | if (dl->dl_res) | ||
| 742 | dlm_lockres_put(dl->dl_res); | ||
| 743 | dlm_put(dl->dl_ctxt); | ||
| 744 | kfree(dl->dl_buf); | ||
| 745 | return seq_release_private(inode, file); | ||
| 746 | } | ||
| 747 | |||
| 748 | static struct file_operations debug_lockres_fops = { | ||
| 749 | .open = debug_lockres_open, | ||
| 750 | .release = debug_lockres_release, | ||
| 751 | .read = seq_read, | ||
| 752 | .llseek = seq_lseek, | ||
| 753 | }; | ||
| 754 | /* end - debug lockres funcs */ | ||
| 755 | |||
| 756 | /* begin - debug state funcs */ | ||
| 757 | static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
| 758 | { | ||
| 759 | int out = 0; | ||
| 760 | struct dlm_reco_node_data *node; | ||
| 761 | char *state; | ||
| 762 | int lres, rres, ures, tres; | ||
| 763 | |||
| 764 | lres = atomic_read(&dlm->local_resources); | ||
| 765 | rres = atomic_read(&dlm->remote_resources); | ||
| 766 | ures = atomic_read(&dlm->unknown_resources); | ||
| 767 | tres = lres + rres + ures; | ||
| 768 | |||
| 769 | spin_lock(&dlm->spinlock); | ||
| 770 | |||
| 771 | switch (dlm->dlm_state) { | ||
| 772 | case DLM_CTXT_NEW: | ||
| 773 | state = "NEW"; break; | ||
| 774 | case DLM_CTXT_JOINED: | ||
| 775 | state = "JOINED"; break; | ||
| 776 | case DLM_CTXT_IN_SHUTDOWN: | ||
| 777 | state = "SHUTDOWN"; break; | ||
| 778 | case DLM_CTXT_LEAVING: | ||
| 779 | state = "LEAVING"; break; | ||
| 780 | default: | ||
| 781 | state = "UNKNOWN"; break; | ||
| 782 | } | ||
| 783 | |||
| 784 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ | ||
| 785 | out += snprintf(db->buf + out, db->len - out, | ||
| 786 | "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); | ||
| 787 | |||
| 788 | /* Thread Pid: xxx Node: xxx State: xxxxx */ | ||
| 789 | out += snprintf(db->buf + out, db->len - out, | ||
| 790 | "Thread Pid: %d Node: %d State: %s\n", | ||
| 791 | dlm->dlm_thread_task->pid, dlm->node_num, state); | ||
| 792 | |||
| 793 | /* Number of Joins: xxx Joining Node: xxx */ | ||
| 794 | out += snprintf(db->buf + out, db->len - out, | ||
| 795 | "Number of Joins: %d Joining Node: %d\n", | ||
| 796 | dlm->num_joins, dlm->joining_node); | ||
| 797 | |||
| 798 | /* Domain Map: xx xx xx */ | ||
| 799 | out += snprintf(db->buf + out, db->len - out, "Domain Map: "); | ||
| 800 | out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, | ||
| 801 | db->buf + out, db->len - out); | ||
| 802 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
| 803 | |||
| 804 | /* Live Map: xx xx xx */ | ||
| 805 | out += snprintf(db->buf + out, db->len - out, "Live Map: "); | ||
| 806 | out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, | ||
| 807 | db->buf + out, db->len - out); | ||
| 808 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
| 809 | |||
| 810 | /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */ | ||
| 811 | out += snprintf(db->buf + out, db->len - out, | ||
| 812 | "Mastered Resources Total: %d Locally: %d " | ||
| 813 | "Remotely: %d Unknown: %d\n", | ||
| 814 | tres, lres, rres, ures); | ||
| 815 | |||
| 816 | /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ | ||
| 817 | out += snprintf(db->buf + out, db->len - out, | ||
| 818 | "Lists: Dirty=%s Purge=%s PendingASTs=%s " | ||
| 819 | "PendingBASTs=%s Master=%s\n", | ||
| 820 | (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), | ||
| 821 | (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), | ||
| 822 | (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), | ||
| 823 | (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"), | ||
| 824 | (list_empty(&dlm->master_list) ? "Empty" : "InUse")); | ||
| 825 | |||
| 826 | /* Purge Count: xxx Refs: xxx */ | ||
| 827 | out += snprintf(db->buf + out, db->len - out, | ||
| 828 | "Purge Count: %d Refs: %d\n", dlm->purge_count, | ||
| 829 | atomic_read(&dlm->dlm_refs.refcount)); | ||
| 830 | |||
| 831 | /* Dead Node: xxx */ | ||
| 832 | out += snprintf(db->buf + out, db->len - out, | ||
| 833 | "Dead Node: %d\n", dlm->reco.dead_node); | ||
| 834 | |||
| 835 | /* What about DLM_RECO_STATE_FINALIZE? */ | ||
| 836 | if (dlm->reco.state == DLM_RECO_STATE_ACTIVE) | ||
| 837 | state = "ACTIVE"; | ||
| 838 | else | ||
| 839 | state = "INACTIVE"; | ||
| 840 | |||
| 841 | /* Recovery Pid: xxxx Master: xxx State: xxxx */ | ||
| 842 | out += snprintf(db->buf + out, db->len - out, | ||
| 843 | "Recovery Pid: %d Master: %d State: %s\n", | ||
| 844 | dlm->dlm_reco_thread_task->pid, | ||
| 845 | dlm->reco.new_master, state); | ||
| 846 | |||
| 847 | /* Recovery Map: xx xx */ | ||
| 848 | out += snprintf(db->buf + out, db->len - out, "Recovery Map: "); | ||
| 849 | out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, | ||
| 850 | db->buf + out, db->len - out); | ||
| 851 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
| 852 | |||
| 853 | /* Recovery Node State: */ | ||
| 854 | out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n"); | ||
| 855 | list_for_each_entry(node, &dlm->reco.node_data, list) { | ||
| 856 | switch (node->state) { | ||
| 857 | case DLM_RECO_NODE_DATA_INIT: | ||
| 858 | state = "INIT"; | ||
| 859 | break; | ||
| 860 | case DLM_RECO_NODE_DATA_REQUESTING: | ||
| 861 | state = "REQUESTING"; | ||
| 862 | break; | ||
| 863 | case DLM_RECO_NODE_DATA_DEAD: | ||
| 864 | state = "DEAD"; | ||
| 865 | break; | ||
| 866 | case DLM_RECO_NODE_DATA_RECEIVING: | ||
| 867 | state = "RECEIVING"; | ||
| 868 | break; | ||
| 869 | case DLM_RECO_NODE_DATA_REQUESTED: | ||
| 870 | state = "REQUESTED"; | ||
| 871 | break; | ||
| 872 | case DLM_RECO_NODE_DATA_DONE: | ||
| 873 | state = "DONE"; | ||
| 874 | break; | ||
| 875 | case DLM_RECO_NODE_DATA_FINALIZE_SENT: | ||
| 876 | state = "FINALIZE-SENT"; | ||
| 877 | break; | ||
| 878 | default: | ||
| 879 | state = "BAD"; | ||
| 880 | break; | ||
| 881 | } | ||
| 882 | out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n", | ||
| 883 | node->node_num, state); | ||
| 884 | } | ||
| 885 | |||
| 886 | spin_unlock(&dlm->spinlock); | ||
| 887 | |||
| 888 | return out; | ||
| 889 | } | ||
| 890 | |||
| 891 | static int debug_state_open(struct inode *inode, struct file *file) | ||
| 892 | { | ||
| 893 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 894 | struct debug_buffer *db = NULL; | ||
| 895 | |||
| 896 | db = debug_buffer_allocate(); | ||
| 897 | if (!db) | ||
| 898 | goto bail; | ||
| 899 | |||
| 900 | db->len = debug_state_print(dlm, db); | ||
| 901 | |||
| 902 | file->private_data = db; | ||
| 903 | |||
| 904 | return 0; | ||
| 905 | bail: | ||
| 906 | return -ENOMEM; | ||
| 907 | } | ||
| 908 | |||
| 909 | static struct file_operations debug_state_fops = { | ||
| 910 | .open = debug_state_open, | ||
| 911 | .release = debug_buffer_release, | ||
| 912 | .read = debug_buffer_read, | ||
| 913 | .llseek = debug_buffer_llseek, | ||
| 914 | }; | ||
| 915 | /* end - debug state funcs */ | ||
| 916 | |||
| 917 | /* files in subroot */ | ||
| 918 | int dlm_debug_init(struct dlm_ctxt *dlm) | ||
| 919 | { | ||
| 920 | struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; | ||
| 921 | |||
| 922 | /* for dumping dlm_ctxt */ | ||
| 923 | dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE, | ||
| 924 | S_IFREG|S_IRUSR, | ||
| 925 | dlm->dlm_debugfs_subroot, | ||
| 926 | dlm, &debug_state_fops); | ||
| 927 | if (!dc->debug_state_dentry) { | ||
| 928 | mlog_errno(-ENOMEM); | ||
| 929 | goto bail; | ||
| 930 | } | ||
| 931 | |||
| 932 | /* for dumping lockres */ | ||
| 933 | dc->debug_lockres_dentry = | ||
| 934 | debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, | ||
| 935 | S_IFREG|S_IRUSR, | ||
| 936 | dlm->dlm_debugfs_subroot, | ||
| 937 | dlm, &debug_lockres_fops); | ||
| 938 | if (!dc->debug_lockres_dentry) { | ||
| 939 | mlog_errno(-ENOMEM); | ||
| 940 | goto bail; | ||
| 941 | } | ||
| 942 | |||
| 943 | /* for dumping mles */ | ||
| 944 | dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE, | ||
| 945 | S_IFREG|S_IRUSR, | ||
| 946 | dlm->dlm_debugfs_subroot, | ||
| 947 | dlm, &debug_mle_fops); | ||
| 948 | if (!dc->debug_mle_dentry) { | ||
| 949 | mlog_errno(-ENOMEM); | ||
| 950 | goto bail; | ||
| 951 | } | ||
| 952 | |||
| 953 | /* for dumping lockres on the purge list */ | ||
| 954 | dc->debug_purgelist_dentry = | ||
| 955 | debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, | ||
| 956 | S_IFREG|S_IRUSR, | ||
| 957 | dlm->dlm_debugfs_subroot, | ||
| 958 | dlm, &debug_purgelist_fops); | ||
| 959 | if (!dc->debug_purgelist_dentry) { | ||
| 960 | mlog_errno(-ENOMEM); | ||
| 961 | goto bail; | ||
| 962 | } | ||
| 963 | |||
| 964 | dlm_debug_get(dc); | ||
| 965 | return 0; | ||
| 966 | |||
| 967 | bail: | ||
| 968 | dlm_debug_shutdown(dlm); | ||
| 969 | return -ENOMEM; | ||
| 970 | } | ||
| 971 | |||
| 972 | void dlm_debug_shutdown(struct dlm_ctxt *dlm) | ||
| 973 | { | ||
| 974 | struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; | ||
| 975 | |||
| 976 | if (dc) { | ||
| 977 | if (dc->debug_purgelist_dentry) | ||
| 978 | debugfs_remove(dc->debug_purgelist_dentry); | ||
| 979 | if (dc->debug_mle_dentry) | ||
| 980 | debugfs_remove(dc->debug_mle_dentry); | ||
| 981 | if (dc->debug_lockres_dentry) | ||
| 982 | debugfs_remove(dc->debug_lockres_dentry); | ||
| 983 | if (dc->debug_state_dentry) | ||
| 984 | debugfs_remove(dc->debug_state_dentry); | ||
| 985 | dlm_debug_put(dc); | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | /* subroot - domain dir */ | ||
| 990 | int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 991 | { | ||
| 992 | dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, | ||
| 993 | dlm_debugfs_root); | ||
| 994 | if (!dlm->dlm_debugfs_subroot) { | ||
| 995 | mlog_errno(-ENOMEM); | ||
| 996 | goto bail; | ||
| 997 | } | ||
| 998 | |||
| 999 | dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), | ||
| 1000 | GFP_KERNEL); | ||
| 1001 | if (!dlm->dlm_debug_ctxt) { | ||
| 1002 | mlog_errno(-ENOMEM); | ||
| 1003 | goto bail; | ||
| 1004 | } | ||
| 1005 | kref_init(&dlm->dlm_debug_ctxt->debug_refcnt); | ||
| 1006 | |||
| 1007 | return 0; | ||
| 1008 | bail: | ||
| 1009 | dlm_destroy_debugfs_subroot(dlm); | ||
| 1010 | return -ENOMEM; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 1014 | { | ||
| 1015 | if (dlm->dlm_debugfs_subroot) | ||
| 1016 | debugfs_remove(dlm->dlm_debugfs_subroot); | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | /* debugfs root */ | ||
| 1020 | int dlm_create_debugfs_root(void) | ||
| 1021 | { | ||
| 1022 | dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL); | ||
| 1023 | if (!dlm_debugfs_root) { | ||
| 1024 | mlog_errno(-ENOMEM); | ||
| 1025 | return -ENOMEM; | ||
| 1026 | } | ||
| 1027 | return 0; | ||
| 1028 | } | ||
| 1029 | |||
| 1030 | void dlm_destroy_debugfs_root(void) | ||
| 1031 | { | ||
| 1032 | if (dlm_debugfs_root) | ||
| 1033 | debugfs_remove(dlm_debugfs_root); | ||
| 1034 | } | ||
| 1035 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h new file mode 100644 index 000000000000..d34a62a3a625 --- /dev/null +++ b/fs/ocfs2/dlm/dlmdebug.h | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * dlmdebug.h | ||
| 5 | * | ||
| 6 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public | ||
| 10 | * License as published by the Free Software Foundation; either | ||
| 11 | * version 2 of the License, or (at your option) any later version. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 16 | * General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public | ||
| 19 | * License along with this program; if not, write to the | ||
| 20 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 21 | * Boston, MA 021110-1307, USA. | ||
| 22 | * | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifndef DLMDEBUG_H | ||
| 26 | #define DLMDEBUG_H | ||
| 27 | |||
| 28 | void dlm_print_one_mle(struct dlm_master_list_entry *mle); | ||
| 29 | |||
| 30 | #ifdef CONFIG_DEBUG_FS | ||
| 31 | |||
| 32 | struct dlm_debug_ctxt { | ||
| 33 | struct kref debug_refcnt; | ||
| 34 | struct dentry *debug_state_dentry; | ||
| 35 | struct dentry *debug_lockres_dentry; | ||
| 36 | struct dentry *debug_mle_dentry; | ||
| 37 | struct dentry *debug_purgelist_dentry; | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct debug_buffer { | ||
| 41 | int len; | ||
| 42 | char *buf; | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct debug_lockres { | ||
| 46 | int dl_len; | ||
| 47 | char *dl_buf; | ||
| 48 | struct dlm_ctxt *dl_ctxt; | ||
| 49 | struct dlm_lock_resource *dl_res; | ||
| 50 | }; | ||
| 51 | |||
| 52 | int dlm_debug_init(struct dlm_ctxt *dlm); | ||
| 53 | void dlm_debug_shutdown(struct dlm_ctxt *dlm); | ||
| 54 | |||
| 55 | int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); | ||
| 56 | void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm); | ||
| 57 | |||
| 58 | int dlm_create_debugfs_root(void); | ||
| 59 | void dlm_destroy_debugfs_root(void); | ||
| 60 | |||
| 61 | #else | ||
| 62 | |||
| 63 | static int dlm_debug_init(struct dlm_ctxt *dlm) | ||
| 64 | { | ||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | static void dlm_debug_shutdown(struct dlm_ctxt *dlm) | ||
| 68 | { | ||
| 69 | } | ||
| 70 | static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 71 | { | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 75 | { | ||
| 76 | } | ||
| 77 | static int dlm_create_debugfs_root(void) | ||
| 78 | { | ||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | static void dlm_destroy_debugfs_root(void) | ||
| 82 | { | ||
| 83 | } | ||
| 84 | |||
| 85 | #endif /* CONFIG_DEBUG_FS */ | ||
| 86 | #endif /* DLMDEBUG_H */ | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0879d86113e3..63f8125824e8 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
| 34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
| 35 | #include <linux/err.h> | 35 | #include <linux/err.h> |
| 36 | #include <linux/debugfs.h> | ||
| 36 | 37 | ||
| 37 | #include "cluster/heartbeat.h" | 38 | #include "cluster/heartbeat.h" |
| 38 | #include "cluster/nodemanager.h" | 39 | #include "cluster/nodemanager.h" |
| @@ -40,8 +41,8 @@ | |||
| 40 | 41 | ||
| 41 | #include "dlmapi.h" | 42 | #include "dlmapi.h" |
| 42 | #include "dlmcommon.h" | 43 | #include "dlmcommon.h" |
| 43 | |||
| 44 | #include "dlmdomain.h" | 44 | #include "dlmdomain.h" |
| 45 | #include "dlmdebug.h" | ||
| 45 | 46 | ||
| 46 | #include "dlmver.h" | 47 | #include "dlmver.h" |
| 47 | 48 | ||
| @@ -298,6 +299,8 @@ static int dlm_wait_on_domain_helper(const char *domain) | |||
| 298 | 299 | ||
| 299 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) | 300 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) |
| 300 | { | 301 | { |
| 302 | dlm_destroy_debugfs_subroot(dlm); | ||
| 303 | |||
| 301 | if (dlm->lockres_hash) | 304 | if (dlm->lockres_hash) |
| 302 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | 305 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); |
| 303 | 306 | ||
| @@ -395,6 +398,7 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) | |||
| 395 | static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) | 398 | static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) |
| 396 | { | 399 | { |
| 397 | dlm_unregister_domain_handlers(dlm); | 400 | dlm_unregister_domain_handlers(dlm); |
| 401 | dlm_debug_shutdown(dlm); | ||
| 398 | dlm_complete_thread(dlm); | 402 | dlm_complete_thread(dlm); |
| 399 | dlm_complete_recovery_thread(dlm); | 403 | dlm_complete_recovery_thread(dlm); |
| 400 | dlm_destroy_dlm_worker(dlm); | 404 | dlm_destroy_dlm_worker(dlm); |
| @@ -644,6 +648,7 @@ int dlm_shutting_down(struct dlm_ctxt *dlm) | |||
| 644 | void dlm_unregister_domain(struct dlm_ctxt *dlm) | 648 | void dlm_unregister_domain(struct dlm_ctxt *dlm) |
| 645 | { | 649 | { |
| 646 | int leave = 0; | 650 | int leave = 0; |
| 651 | struct dlm_lock_resource *res; | ||
| 647 | 652 | ||
| 648 | spin_lock(&dlm_domain_lock); | 653 | spin_lock(&dlm_domain_lock); |
| 649 | BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); | 654 | BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); |
| @@ -673,6 +678,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
| 673 | msleep(500); | 678 | msleep(500); |
| 674 | mlog(0, "%s: more migration to do\n", dlm->name); | 679 | mlog(0, "%s: more migration to do\n", dlm->name); |
| 675 | } | 680 | } |
| 681 | |||
| 682 | /* This list should be empty. If not, print remaining lockres */ | ||
| 683 | if (!list_empty(&dlm->tracking_list)) { | ||
| 684 | mlog(ML_ERROR, "Following lockres' are still on the " | ||
| 685 | "tracking list:\n"); | ||
| 686 | list_for_each_entry(res, &dlm->tracking_list, tracking) | ||
| 687 | dlm_print_one_lock_resource(res); | ||
| 688 | } | ||
| 689 | |||
| 676 | dlm_mark_domain_leaving(dlm); | 690 | dlm_mark_domain_leaving(dlm); |
| 677 | dlm_leave_domain(dlm); | 691 | dlm_leave_domain(dlm); |
| 678 | dlm_complete_dlm_shutdown(dlm); | 692 | dlm_complete_dlm_shutdown(dlm); |
| @@ -1405,6 +1419,12 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
| 1405 | goto bail; | 1419 | goto bail; |
| 1406 | } | 1420 | } |
| 1407 | 1421 | ||
| 1422 | status = dlm_debug_init(dlm); | ||
| 1423 | if (status < 0) { | ||
| 1424 | mlog_errno(status); | ||
| 1425 | goto bail; | ||
| 1426 | } | ||
| 1427 | |||
| 1408 | status = dlm_launch_thread(dlm); | 1428 | status = dlm_launch_thread(dlm); |
| 1409 | if (status < 0) { | 1429 | if (status < 0) { |
| 1410 | mlog_errno(status); | 1430 | mlog_errno(status); |
| @@ -1472,6 +1492,7 @@ bail: | |||
| 1472 | 1492 | ||
| 1473 | if (status) { | 1493 | if (status) { |
| 1474 | dlm_unregister_domain_handlers(dlm); | 1494 | dlm_unregister_domain_handlers(dlm); |
| 1495 | dlm_debug_shutdown(dlm); | ||
| 1475 | dlm_complete_thread(dlm); | 1496 | dlm_complete_thread(dlm); |
| 1476 | dlm_complete_recovery_thread(dlm); | 1497 | dlm_complete_recovery_thread(dlm); |
| 1477 | dlm_destroy_dlm_worker(dlm); | 1498 | dlm_destroy_dlm_worker(dlm); |
| @@ -1484,6 +1505,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
| 1484 | u32 key) | 1505 | u32 key) |
| 1485 | { | 1506 | { |
| 1486 | int i; | 1507 | int i; |
| 1508 | int ret; | ||
| 1487 | struct dlm_ctxt *dlm = NULL; | 1509 | struct dlm_ctxt *dlm = NULL; |
| 1488 | 1510 | ||
| 1489 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); | 1511 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); |
| @@ -1516,6 +1538,15 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
| 1516 | dlm->key = key; | 1538 | dlm->key = key; |
| 1517 | dlm->node_num = o2nm_this_node(); | 1539 | dlm->node_num = o2nm_this_node(); |
| 1518 | 1540 | ||
| 1541 | ret = dlm_create_debugfs_subroot(dlm); | ||
| 1542 | if (ret < 0) { | ||
| 1543 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | ||
| 1544 | kfree(dlm->name); | ||
| 1545 | kfree(dlm); | ||
| 1546 | dlm = NULL; | ||
| 1547 | goto leave; | ||
| 1548 | } | ||
| 1549 | |||
| 1519 | spin_lock_init(&dlm->spinlock); | 1550 | spin_lock_init(&dlm->spinlock); |
| 1520 | spin_lock_init(&dlm->master_lock); | 1551 | spin_lock_init(&dlm->master_lock); |
| 1521 | spin_lock_init(&dlm->ast_lock); | 1552 | spin_lock_init(&dlm->ast_lock); |
| @@ -1526,6 +1557,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
| 1526 | INIT_LIST_HEAD(&dlm->reco.node_data); | 1557 | INIT_LIST_HEAD(&dlm->reco.node_data); |
| 1527 | INIT_LIST_HEAD(&dlm->purge_list); | 1558 | INIT_LIST_HEAD(&dlm->purge_list); |
| 1528 | INIT_LIST_HEAD(&dlm->dlm_domain_handlers); | 1559 | INIT_LIST_HEAD(&dlm->dlm_domain_handlers); |
| 1560 | INIT_LIST_HEAD(&dlm->tracking_list); | ||
| 1529 | dlm->reco.state = 0; | 1561 | dlm->reco.state = 0; |
| 1530 | 1562 | ||
| 1531 | INIT_LIST_HEAD(&dlm->pending_asts); | 1563 | INIT_LIST_HEAD(&dlm->pending_asts); |
| @@ -1816,21 +1848,49 @@ static int __init dlm_init(void) | |||
| 1816 | dlm_print_version(); | 1848 | dlm_print_version(); |
| 1817 | 1849 | ||
| 1818 | status = dlm_init_mle_cache(); | 1850 | status = dlm_init_mle_cache(); |
| 1819 | if (status) | 1851 | if (status) { |
| 1820 | return -1; | 1852 | mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n"); |
| 1853 | goto error; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | status = dlm_init_master_caches(); | ||
| 1857 | if (status) { | ||
| 1858 | mlog(ML_ERROR, "Could not create o2dlm_lockres and " | ||
| 1859 | "o2dlm_lockname slabcaches\n"); | ||
| 1860 | goto error; | ||
| 1861 | } | ||
| 1862 | |||
| 1863 | status = dlm_init_lock_cache(); | ||
| 1864 | if (status) { | ||
| 1865 | mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n"); | ||
| 1866 | goto error; | ||
| 1867 | } | ||
| 1821 | 1868 | ||
| 1822 | status = dlm_register_net_handlers(); | 1869 | status = dlm_register_net_handlers(); |
| 1823 | if (status) { | 1870 | if (status) { |
| 1824 | dlm_destroy_mle_cache(); | 1871 | mlog(ML_ERROR, "Unable to register network handlers\n"); |
| 1825 | return -1; | 1872 | goto error; |
| 1826 | } | 1873 | } |
| 1827 | 1874 | ||
| 1875 | status = dlm_create_debugfs_root(); | ||
| 1876 | if (status) | ||
| 1877 | goto error; | ||
| 1878 | |||
| 1828 | return 0; | 1879 | return 0; |
| 1880 | error: | ||
| 1881 | dlm_unregister_net_handlers(); | ||
| 1882 | dlm_destroy_lock_cache(); | ||
| 1883 | dlm_destroy_master_caches(); | ||
| 1884 | dlm_destroy_mle_cache(); | ||
| 1885 | return -1; | ||
| 1829 | } | 1886 | } |
| 1830 | 1887 | ||
| 1831 | static void __exit dlm_exit (void) | 1888 | static void __exit dlm_exit (void) |
| 1832 | { | 1889 | { |
| 1890 | dlm_destroy_debugfs_root(); | ||
| 1833 | dlm_unregister_net_handlers(); | 1891 | dlm_unregister_net_handlers(); |
| 1892 | dlm_destroy_lock_cache(); | ||
| 1893 | dlm_destroy_master_caches(); | ||
| 1834 | dlm_destroy_mle_cache(); | 1894 | dlm_destroy_mle_cache(); |
| 1835 | } | 1895 | } |
| 1836 | 1896 | ||
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 52578d907d9a..83a9f2972ac8 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
| @@ -53,6 +53,8 @@ | |||
| 53 | #define MLOG_MASK_PREFIX ML_DLM | 53 | #define MLOG_MASK_PREFIX ML_DLM |
| 54 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
| 55 | 55 | ||
| 56 | static struct kmem_cache *dlm_lock_cache = NULL; | ||
| 57 | |||
| 56 | static DEFINE_SPINLOCK(dlm_cookie_lock); | 58 | static DEFINE_SPINLOCK(dlm_cookie_lock); |
| 57 | static u64 dlm_next_cookie = 1; | 59 | static u64 dlm_next_cookie = 1; |
| 58 | 60 | ||
| @@ -64,6 +66,22 @@ static void dlm_init_lock(struct dlm_lock *newlock, int type, | |||
| 64 | static void dlm_lock_release(struct kref *kref); | 66 | static void dlm_lock_release(struct kref *kref); |
| 65 | static void dlm_lock_detach_lockres(struct dlm_lock *lock); | 67 | static void dlm_lock_detach_lockres(struct dlm_lock *lock); |
| 66 | 68 | ||
| 69 | int dlm_init_lock_cache(void) | ||
| 70 | { | ||
| 71 | dlm_lock_cache = kmem_cache_create("o2dlm_lock", | ||
| 72 | sizeof(struct dlm_lock), | ||
| 73 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
| 74 | if (dlm_lock_cache == NULL) | ||
| 75 | return -ENOMEM; | ||
| 76 | return 0; | ||
| 77 | } | ||
| 78 | |||
| 79 | void dlm_destroy_lock_cache(void) | ||
| 80 | { | ||
| 81 | if (dlm_lock_cache) | ||
| 82 | kmem_cache_destroy(dlm_lock_cache); | ||
| 83 | } | ||
| 84 | |||
| 67 | /* Tell us whether we can grant a new lock request. | 85 | /* Tell us whether we can grant a new lock request. |
| 68 | * locking: | 86 | * locking: |
| 69 | * caller needs: res->spinlock | 87 | * caller needs: res->spinlock |
| @@ -353,7 +371,7 @@ static void dlm_lock_release(struct kref *kref) | |||
| 353 | mlog(0, "freeing kernel-allocated lksb\n"); | 371 | mlog(0, "freeing kernel-allocated lksb\n"); |
| 354 | kfree(lock->lksb); | 372 | kfree(lock->lksb); |
| 355 | } | 373 | } |
| 356 | kfree(lock); | 374 | kmem_cache_free(dlm_lock_cache, lock); |
| 357 | } | 375 | } |
| 358 | 376 | ||
| 359 | /* associate a lock with it's lockres, getting a ref on the lockres */ | 377 | /* associate a lock with it's lockres, getting a ref on the lockres */ |
| @@ -412,7 +430,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, | |||
| 412 | struct dlm_lock *lock; | 430 | struct dlm_lock *lock; |
| 413 | int kernel_allocated = 0; | 431 | int kernel_allocated = 0; |
| 414 | 432 | ||
| 415 | lock = kzalloc(sizeof(*lock), GFP_NOFS); | 433 | lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); |
| 416 | if (!lock) | 434 | if (!lock) |
| 417 | return NULL; | 435 | return NULL; |
| 418 | 436 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ea6b89577860..efc015c6128a 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -48,47 +48,11 @@ | |||
| 48 | #include "dlmapi.h" | 48 | #include "dlmapi.h" |
| 49 | #include "dlmcommon.h" | 49 | #include "dlmcommon.h" |
| 50 | #include "dlmdomain.h" | 50 | #include "dlmdomain.h" |
| 51 | #include "dlmdebug.h" | ||
| 51 | 52 | ||
| 52 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) | 53 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) |
| 53 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
| 54 | 55 | ||
| 55 | enum dlm_mle_type { | ||
| 56 | DLM_MLE_BLOCK, | ||
| 57 | DLM_MLE_MASTER, | ||
| 58 | DLM_MLE_MIGRATION | ||
| 59 | }; | ||
| 60 | |||
| 61 | struct dlm_lock_name | ||
| 62 | { | ||
| 63 | u8 len; | ||
| 64 | u8 name[DLM_LOCKID_NAME_MAX]; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct dlm_master_list_entry | ||
| 68 | { | ||
| 69 | struct list_head list; | ||
| 70 | struct list_head hb_events; | ||
| 71 | struct dlm_ctxt *dlm; | ||
| 72 | spinlock_t spinlock; | ||
| 73 | wait_queue_head_t wq; | ||
| 74 | atomic_t woken; | ||
| 75 | struct kref mle_refs; | ||
| 76 | int inuse; | ||
| 77 | unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 78 | unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 79 | unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 80 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 81 | u8 master; | ||
| 82 | u8 new_master; | ||
| 83 | enum dlm_mle_type type; | ||
| 84 | struct o2hb_callback_func mle_hb_up; | ||
| 85 | struct o2hb_callback_func mle_hb_down; | ||
| 86 | union { | ||
| 87 | struct dlm_lock_resource *res; | ||
| 88 | struct dlm_lock_name name; | ||
| 89 | } u; | ||
| 90 | }; | ||
| 91 | |||
| 92 | static void dlm_mle_node_down(struct dlm_ctxt *dlm, | 56 | static void dlm_mle_node_down(struct dlm_ctxt *dlm, |
| 93 | struct dlm_master_list_entry *mle, | 57 | struct dlm_master_list_entry *mle, |
| 94 | struct o2nm_node *node, | 58 | struct o2nm_node *node, |
| @@ -128,98 +92,10 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm, | |||
| 128 | return 1; | 92 | return 1; |
| 129 | } | 93 | } |
| 130 | 94 | ||
| 131 | #define dlm_print_nodemap(m) _dlm_print_nodemap(m,#m) | 95 | static struct kmem_cache *dlm_lockres_cache = NULL; |
| 132 | static void _dlm_print_nodemap(unsigned long *map, const char *mapname) | 96 | static struct kmem_cache *dlm_lockname_cache = NULL; |
| 133 | { | ||
| 134 | int i; | ||
| 135 | printk("%s=[ ", mapname); | ||
| 136 | for (i=0; i<O2NM_MAX_NODES; i++) | ||
| 137 | if (test_bit(i, map)) | ||
| 138 | printk("%d ", i); | ||
| 139 | printk("]"); | ||
| 140 | } | ||
| 141 | |||
| 142 | static void dlm_print_one_mle(struct dlm_master_list_entry *mle) | ||
| 143 | { | ||
| 144 | int refs; | ||
| 145 | char *type; | ||
| 146 | char attached; | ||
| 147 | u8 master; | ||
| 148 | unsigned int namelen; | ||
| 149 | const char *name; | ||
| 150 | struct kref *k; | ||
| 151 | unsigned long *maybe = mle->maybe_map, | ||
| 152 | *vote = mle->vote_map, | ||
| 153 | *resp = mle->response_map, | ||
| 154 | *node = mle->node_map; | ||
| 155 | |||
| 156 | k = &mle->mle_refs; | ||
| 157 | if (mle->type == DLM_MLE_BLOCK) | ||
| 158 | type = "BLK"; | ||
| 159 | else if (mle->type == DLM_MLE_MASTER) | ||
| 160 | type = "MAS"; | ||
| 161 | else | ||
| 162 | type = "MIG"; | ||
| 163 | refs = atomic_read(&k->refcount); | ||
| 164 | master = mle->master; | ||
| 165 | attached = (list_empty(&mle->hb_events) ? 'N' : 'Y'); | ||
| 166 | |||
| 167 | if (mle->type != DLM_MLE_MASTER) { | ||
| 168 | namelen = mle->u.name.len; | ||
| 169 | name = mle->u.name.name; | ||
| 170 | } else { | ||
| 171 | namelen = mle->u.res->lockname.len; | ||
| 172 | name = mle->u.res->lockname.name; | ||
| 173 | } | ||
| 174 | |||
| 175 | mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ", | ||
| 176 | namelen, name, type, refs, master, mle->new_master, attached, | ||
| 177 | mle->inuse); | ||
| 178 | dlm_print_nodemap(maybe); | ||
| 179 | printk(", "); | ||
| 180 | dlm_print_nodemap(vote); | ||
| 181 | printk(", "); | ||
| 182 | dlm_print_nodemap(resp); | ||
| 183 | printk(", "); | ||
| 184 | dlm_print_nodemap(node); | ||
| 185 | printk(", "); | ||
| 186 | printk("\n"); | ||
| 187 | } | ||
| 188 | |||
| 189 | #if 0 | ||
| 190 | /* Code here is included but defined out as it aids debugging */ | ||
| 191 | |||
| 192 | static void dlm_dump_mles(struct dlm_ctxt *dlm) | ||
| 193 | { | ||
| 194 | struct dlm_master_list_entry *mle; | ||
| 195 | |||
| 196 | mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); | ||
| 197 | spin_lock(&dlm->master_lock); | ||
| 198 | list_for_each_entry(mle, &dlm->master_list, list) | ||
| 199 | dlm_print_one_mle(mle); | ||
| 200 | spin_unlock(&dlm->master_lock); | ||
| 201 | } | ||
| 202 | |||
| 203 | int dlm_dump_all_mles(const char __user *data, unsigned int len) | ||
| 204 | { | ||
| 205 | struct dlm_ctxt *dlm; | ||
| 206 | |||
| 207 | spin_lock(&dlm_domain_lock); | ||
| 208 | list_for_each_entry(dlm, &dlm_domains, list) { | ||
| 209 | mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); | ||
| 210 | dlm_dump_mles(dlm); | ||
| 211 | } | ||
| 212 | spin_unlock(&dlm_domain_lock); | ||
| 213 | return len; | ||
| 214 | } | ||
| 215 | EXPORT_SYMBOL_GPL(dlm_dump_all_mles); | ||
| 216 | |||
| 217 | #endif /* 0 */ | ||
| 218 | |||
| 219 | |||
| 220 | static struct kmem_cache *dlm_mle_cache = NULL; | 97 | static struct kmem_cache *dlm_mle_cache = NULL; |
| 221 | 98 | ||
| 222 | |||
| 223 | static void dlm_mle_release(struct kref *kref); | 99 | static void dlm_mle_release(struct kref *kref); |
| 224 | static void dlm_init_mle(struct dlm_master_list_entry *mle, | 100 | static void dlm_init_mle(struct dlm_master_list_entry *mle, |
| 225 | enum dlm_mle_type type, | 101 | enum dlm_mle_type type, |
| @@ -507,7 +383,7 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm, | |||
| 507 | 383 | ||
| 508 | int dlm_init_mle_cache(void) | 384 | int dlm_init_mle_cache(void) |
| 509 | { | 385 | { |
| 510 | dlm_mle_cache = kmem_cache_create("dlm_mle_cache", | 386 | dlm_mle_cache = kmem_cache_create("o2dlm_mle", |
| 511 | sizeof(struct dlm_master_list_entry), | 387 | sizeof(struct dlm_master_list_entry), |
| 512 | 0, SLAB_HWCACHE_ALIGN, | 388 | 0, SLAB_HWCACHE_ALIGN, |
| 513 | NULL); | 389 | NULL); |
| @@ -560,6 +436,35 @@ static void dlm_mle_release(struct kref *kref) | |||
| 560 | * LOCK RESOURCE FUNCTIONS | 436 | * LOCK RESOURCE FUNCTIONS |
| 561 | */ | 437 | */ |
| 562 | 438 | ||
| 439 | int dlm_init_master_caches(void) | ||
| 440 | { | ||
| 441 | dlm_lockres_cache = kmem_cache_create("o2dlm_lockres", | ||
| 442 | sizeof(struct dlm_lock_resource), | ||
| 443 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
| 444 | if (!dlm_lockres_cache) | ||
| 445 | goto bail; | ||
| 446 | |||
| 447 | dlm_lockname_cache = kmem_cache_create("o2dlm_lockname", | ||
| 448 | DLM_LOCKID_NAME_MAX, 0, | ||
| 449 | SLAB_HWCACHE_ALIGN, NULL); | ||
| 450 | if (!dlm_lockname_cache) | ||
| 451 | goto bail; | ||
| 452 | |||
| 453 | return 0; | ||
| 454 | bail: | ||
| 455 | dlm_destroy_master_caches(); | ||
| 456 | return -ENOMEM; | ||
| 457 | } | ||
| 458 | |||
| 459 | void dlm_destroy_master_caches(void) | ||
| 460 | { | ||
| 461 | if (dlm_lockname_cache) | ||
| 462 | kmem_cache_destroy(dlm_lockname_cache); | ||
| 463 | |||
| 464 | if (dlm_lockres_cache) | ||
| 465 | kmem_cache_destroy(dlm_lockres_cache); | ||
| 466 | } | ||
| 467 | |||
| 563 | static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, | 468 | static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, |
| 564 | struct dlm_lock_resource *res, | 469 | struct dlm_lock_resource *res, |
| 565 | u8 owner) | 470 | u8 owner) |
| @@ -610,6 +515,14 @@ static void dlm_lockres_release(struct kref *kref) | |||
| 610 | mlog(0, "destroying lockres %.*s\n", res->lockname.len, | 515 | mlog(0, "destroying lockres %.*s\n", res->lockname.len, |
| 611 | res->lockname.name); | 516 | res->lockname.name); |
| 612 | 517 | ||
| 518 | if (!list_empty(&res->tracking)) | ||
| 519 | list_del_init(&res->tracking); | ||
| 520 | else { | ||
| 521 | mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", | ||
| 522 | res->lockname.len, res->lockname.name); | ||
| 523 | dlm_print_one_lock_resource(res); | ||
| 524 | } | ||
| 525 | |||
| 613 | if (!hlist_unhashed(&res->hash_node) || | 526 | if (!hlist_unhashed(&res->hash_node) || |
| 614 | !list_empty(&res->granted) || | 527 | !list_empty(&res->granted) || |
| 615 | !list_empty(&res->converting) || | 528 | !list_empty(&res->converting) || |
| @@ -642,9 +555,9 @@ static void dlm_lockres_release(struct kref *kref) | |||
| 642 | BUG_ON(!list_empty(&res->recovering)); | 555 | BUG_ON(!list_empty(&res->recovering)); |
| 643 | BUG_ON(!list_empty(&res->purge)); | 556 | BUG_ON(!list_empty(&res->purge)); |
| 644 | 557 | ||
| 645 | kfree(res->lockname.name); | 558 | kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); |
| 646 | 559 | ||
| 647 | kfree(res); | 560 | kmem_cache_free(dlm_lockres_cache, res); |
| 648 | } | 561 | } |
| 649 | 562 | ||
| 650 | void dlm_lockres_put(struct dlm_lock_resource *res) | 563 | void dlm_lockres_put(struct dlm_lock_resource *res) |
| @@ -677,6 +590,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 677 | INIT_LIST_HEAD(&res->dirty); | 590 | INIT_LIST_HEAD(&res->dirty); |
| 678 | INIT_LIST_HEAD(&res->recovering); | 591 | INIT_LIST_HEAD(&res->recovering); |
| 679 | INIT_LIST_HEAD(&res->purge); | 592 | INIT_LIST_HEAD(&res->purge); |
| 593 | INIT_LIST_HEAD(&res->tracking); | ||
| 680 | atomic_set(&res->asts_reserved, 0); | 594 | atomic_set(&res->asts_reserved, 0); |
| 681 | res->migration_pending = 0; | 595 | res->migration_pending = 0; |
| 682 | res->inflight_locks = 0; | 596 | res->inflight_locks = 0; |
| @@ -692,6 +606,8 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 692 | 606 | ||
| 693 | res->last_used = 0; | 607 | res->last_used = 0; |
| 694 | 608 | ||
| 609 | list_add_tail(&res->tracking, &dlm->tracking_list); | ||
| 610 | |||
| 695 | memset(res->lvb, 0, DLM_LVB_LEN); | 611 | memset(res->lvb, 0, DLM_LVB_LEN); |
| 696 | memset(res->refmap, 0, sizeof(res->refmap)); | 612 | memset(res->refmap, 0, sizeof(res->refmap)); |
| 697 | } | 613 | } |
| @@ -700,20 +616,28 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
| 700 | const char *name, | 616 | const char *name, |
| 701 | unsigned int namelen) | 617 | unsigned int namelen) |
| 702 | { | 618 | { |
| 703 | struct dlm_lock_resource *res; | 619 | struct dlm_lock_resource *res = NULL; |
| 704 | 620 | ||
| 705 | res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS); | 621 | res = (struct dlm_lock_resource *) |
| 622 | kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS); | ||
| 706 | if (!res) | 623 | if (!res) |
| 707 | return NULL; | 624 | goto error; |
| 708 | 625 | ||
| 709 | res->lockname.name = kmalloc(namelen, GFP_NOFS); | 626 | res->lockname.name = (char *) |
| 710 | if (!res->lockname.name) { | 627 | kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS); |
| 711 | kfree(res); | 628 | if (!res->lockname.name) |
| 712 | return NULL; | 629 | goto error; |
| 713 | } | ||
| 714 | 630 | ||
| 715 | dlm_init_lockres(dlm, res, name, namelen); | 631 | dlm_init_lockres(dlm, res, name, namelen); |
| 716 | return res; | 632 | return res; |
| 633 | |||
| 634 | error: | ||
| 635 | if (res && res->lockname.name) | ||
| 636 | kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); | ||
| 637 | |||
| 638 | if (res) | ||
| 639 | kmem_cache_free(dlm_lockres_cache, res); | ||
| 640 | return NULL; | ||
| 717 | } | 641 | } |
| 718 | 642 | ||
| 719 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | 643 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1f1873bf41fb..394d25a131a5 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -27,18 +27,11 @@ | |||
| 27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
| 28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
| 29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
| 30 | #include <linux/crc32.h> | ||
| 31 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
| 32 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
| 33 | #include <linux/debugfs.h> | 32 | #include <linux/debugfs.h> |
| 34 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
| 35 | 34 | ||
| 36 | #include <cluster/heartbeat.h> | ||
| 37 | #include <cluster/nodemanager.h> | ||
| 38 | #include <cluster/tcp.h> | ||
| 39 | |||
| 40 | #include <dlm/dlmapi.h> | ||
| 41 | |||
| 42 | #define MLOG_MASK_PREFIX ML_DLM_GLUE | 35 | #define MLOG_MASK_PREFIX ML_DLM_GLUE |
| 43 | #include <cluster/masklog.h> | 36 | #include <cluster/masklog.h> |
| 44 | 37 | ||
| @@ -53,6 +46,7 @@ | |||
| 53 | #include "heartbeat.h" | 46 | #include "heartbeat.h" |
| 54 | #include "inode.h" | 47 | #include "inode.h" |
| 55 | #include "journal.h" | 48 | #include "journal.h" |
| 49 | #include "stackglue.h" | ||
| 56 | #include "slot_map.h" | 50 | #include "slot_map.h" |
| 57 | #include "super.h" | 51 | #include "super.h" |
| 58 | #include "uptodate.h" | 52 | #include "uptodate.h" |
| @@ -113,7 +107,8 @@ static void ocfs2_dump_meta_lvb_info(u64 level, | |||
| 113 | unsigned int line, | 107 | unsigned int line, |
| 114 | struct ocfs2_lock_res *lockres) | 108 | struct ocfs2_lock_res *lockres) |
| 115 | { | 109 | { |
| 116 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 110 | struct ocfs2_meta_lvb *lvb = |
| 111 | (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); | ||
| 117 | 112 | ||
| 118 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 113 | mlog(level, "LVB information for %s (called from %s:%u):\n", |
| 119 | lockres->l_name, function, line); | 114 | lockres->l_name, function, line); |
| @@ -259,31 +254,6 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | |||
| 259 | .flags = 0, | 254 | .flags = 0, |
| 260 | }; | 255 | }; |
| 261 | 256 | ||
| 262 | /* | ||
| 263 | * This is the filesystem locking protocol version. | ||
| 264 | * | ||
| 265 | * Whenever the filesystem does new things with locks (adds or removes a | ||
| 266 | * lock, orders them differently, does different things underneath a lock), | ||
| 267 | * the version must be changed. The protocol is negotiated when joining | ||
| 268 | * the dlm domain. A node may join the domain if its major version is | ||
| 269 | * identical to all other nodes and its minor version is greater than | ||
| 270 | * or equal to all other nodes. When its minor version is greater than | ||
| 271 | * the other nodes, it will run at the minor version specified by the | ||
| 272 | * other nodes. | ||
| 273 | * | ||
| 274 | * If a locking change is made that will not be compatible with older | ||
| 275 | * versions, the major number must be increased and the minor version set | ||
| 276 | * to zero. If a change merely adds a behavior that can be disabled when | ||
| 277 | * speaking to older versions, the minor version must be increased. If a | ||
| 278 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
| 279 | * are just ignored by older versions), the version does not need to be | ||
| 280 | * updated. | ||
| 281 | */ | ||
| 282 | const struct dlm_protocol_version ocfs2_locking_protocol = { | ||
| 283 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
| 284 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
| 285 | }; | ||
| 286 | |||
| 287 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 257 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
| 288 | { | 258 | { |
| 289 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 259 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
| @@ -316,7 +286,7 @@ static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *l | |||
| 316 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 286 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
| 317 | struct ocfs2_lock_res *lockres, | 287 | struct ocfs2_lock_res *lockres, |
| 318 | int level, | 288 | int level, |
| 319 | int dlm_flags); | 289 | u32 dlm_flags); |
| 320 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 290 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, |
| 321 | int wanted); | 291 | int wanted); |
| 322 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 292 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, |
| @@ -330,10 +300,9 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
| 330 | struct ocfs2_lock_res *lockres); | 300 | struct ocfs2_lock_res *lockres); |
| 331 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 301 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
| 332 | int convert); | 302 | int convert); |
| 333 | #define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \ | 303 | #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ |
| 334 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | 304 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ |
| 335 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 305 | _err, _func, _lockres->l_name); \ |
| 336 | _lockres->l_name, dlm_errmsg(_stat)); \ | ||
| 337 | } while (0) | 306 | } while (0) |
| 338 | static int ocfs2_downconvert_thread(void *arg); | 307 | static int ocfs2_downconvert_thread(void *arg); |
| 339 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 308 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
| @@ -342,12 +311,13 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
| 342 | struct buffer_head **bh); | 311 | struct buffer_head **bh); |
| 343 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 312 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
| 344 | static inline int ocfs2_highest_compat_lock_level(int level); | 313 | static inline int ocfs2_highest_compat_lock_level(int level); |
| 345 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 314 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
| 346 | int new_level); | 315 | int new_level); |
| 347 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 316 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
| 348 | struct ocfs2_lock_res *lockres, | 317 | struct ocfs2_lock_res *lockres, |
| 349 | int new_level, | 318 | int new_level, |
| 350 | int lvb); | 319 | int lvb, |
| 320 | unsigned int generation); | ||
| 351 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 321 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
| 352 | struct ocfs2_lock_res *lockres); | 322 | struct ocfs2_lock_res *lockres); |
| 353 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 323 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, |
| @@ -406,9 +376,9 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | |||
| 406 | res->l_ops = ops; | 376 | res->l_ops = ops; |
| 407 | res->l_priv = priv; | 377 | res->l_priv = priv; |
| 408 | 378 | ||
| 409 | res->l_level = LKM_IVMODE; | 379 | res->l_level = DLM_LOCK_IV; |
| 410 | res->l_requested = LKM_IVMODE; | 380 | res->l_requested = DLM_LOCK_IV; |
| 411 | res->l_blocking = LKM_IVMODE; | 381 | res->l_blocking = DLM_LOCK_IV; |
| 412 | res->l_action = OCFS2_AST_INVALID; | 382 | res->l_action = OCFS2_AST_INVALID; |
| 413 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; | 383 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; |
| 414 | 384 | ||
| @@ -604,10 +574,10 @@ static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | |||
| 604 | BUG_ON(!lockres); | 574 | BUG_ON(!lockres); |
| 605 | 575 | ||
| 606 | switch(level) { | 576 | switch(level) { |
| 607 | case LKM_EXMODE: | 577 | case DLM_LOCK_EX: |
| 608 | lockres->l_ex_holders++; | 578 | lockres->l_ex_holders++; |
| 609 | break; | 579 | break; |
| 610 | case LKM_PRMODE: | 580 | case DLM_LOCK_PR: |
| 611 | lockres->l_ro_holders++; | 581 | lockres->l_ro_holders++; |
| 612 | break; | 582 | break; |
| 613 | default: | 583 | default: |
| @@ -625,11 +595,11 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | |||
| 625 | BUG_ON(!lockres); | 595 | BUG_ON(!lockres); |
| 626 | 596 | ||
| 627 | switch(level) { | 597 | switch(level) { |
| 628 | case LKM_EXMODE: | 598 | case DLM_LOCK_EX: |
| 629 | BUG_ON(!lockres->l_ex_holders); | 599 | BUG_ON(!lockres->l_ex_holders); |
| 630 | lockres->l_ex_holders--; | 600 | lockres->l_ex_holders--; |
| 631 | break; | 601 | break; |
| 632 | case LKM_PRMODE: | 602 | case DLM_LOCK_PR: |
| 633 | BUG_ON(!lockres->l_ro_holders); | 603 | BUG_ON(!lockres->l_ro_holders); |
| 634 | lockres->l_ro_holders--; | 604 | lockres->l_ro_holders--; |
| 635 | break; | 605 | break; |
| @@ -644,12 +614,12 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | |||
| 644 | * lock types are added. */ | 614 | * lock types are added. */ |
| 645 | static inline int ocfs2_highest_compat_lock_level(int level) | 615 | static inline int ocfs2_highest_compat_lock_level(int level) |
| 646 | { | 616 | { |
| 647 | int new_level = LKM_EXMODE; | 617 | int new_level = DLM_LOCK_EX; |
| 648 | 618 | ||
| 649 | if (level == LKM_EXMODE) | 619 | if (level == DLM_LOCK_EX) |
| 650 | new_level = LKM_NLMODE; | 620 | new_level = DLM_LOCK_NL; |
| 651 | else if (level == LKM_PRMODE) | 621 | else if (level == DLM_LOCK_PR) |
| 652 | new_level = LKM_PRMODE; | 622 | new_level = DLM_LOCK_PR; |
| 653 | return new_level; | 623 | return new_level; |
| 654 | } | 624 | } |
| 655 | 625 | ||
| @@ -688,12 +658,12 @@ static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res | |||
| 688 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 658 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); |
| 689 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 659 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); |
| 690 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 660 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); |
| 691 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 661 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
| 692 | 662 | ||
| 693 | lockres->l_level = lockres->l_requested; | 663 | lockres->l_level = lockres->l_requested; |
| 694 | if (lockres->l_level <= | 664 | if (lockres->l_level <= |
| 695 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { | 665 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { |
| 696 | lockres->l_blocking = LKM_NLMODE; | 666 | lockres->l_blocking = DLM_LOCK_NL; |
| 697 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 667 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); |
| 698 | } | 668 | } |
| 699 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 669 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
| @@ -712,7 +682,7 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
| 712 | * information is already up to data. Convert from NL to | 682 | * information is already up to data. Convert from NL to |
| 713 | * *anything* however should mark ourselves as needing an | 683 | * *anything* however should mark ourselves as needing an |
| 714 | * update */ | 684 | * update */ |
| 715 | if (lockres->l_level == LKM_NLMODE && | 685 | if (lockres->l_level == DLM_LOCK_NL && |
| 716 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 686 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
| 717 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 687 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
| 718 | 688 | ||
| @@ -729,7 +699,7 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
| 729 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); | 699 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); |
| 730 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 700 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
| 731 | 701 | ||
| 732 | if (lockres->l_requested > LKM_NLMODE && | 702 | if (lockres->l_requested > DLM_LOCK_NL && |
| 733 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && | 703 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && |
| 734 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 704 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
| 735 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 705 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
| @@ -767,6 +737,113 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
| 767 | return needs_downconvert; | 737 | return needs_downconvert; |
| 768 | } | 738 | } |
| 769 | 739 | ||
| 740 | /* | ||
| 741 | * OCFS2_LOCK_PENDING and l_pending_gen. | ||
| 742 | * | ||
| 743 | * Why does OCFS2_LOCK_PENDING exist? To close a race between setting | ||
| 744 | * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() | ||
| 745 | * for more details on the race. | ||
| 746 | * | ||
| 747 | * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces | ||
| 748 | * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() | ||
| 749 | * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear | ||
| 750 | * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, | ||
| 751 | * the caller is going to try to clear PENDING again. If nothing else is | ||
| 752 | * happening, __lockres_clear_pending() sees PENDING is unset and does | ||
| 753 | * nothing. | ||
| 754 | * | ||
| 755 | * But what if another path (eg downconvert thread) has just started a | ||
| 756 | * new locking action? The other path has re-set PENDING. Our path | ||
| 757 | * cannot clear PENDING, because that will re-open the original race | ||
| 758 | * window. | ||
| 759 | * | ||
| 760 | * [Example] | ||
| 761 | * | ||
| 762 | * ocfs2_meta_lock() | ||
| 763 | * ocfs2_cluster_lock() | ||
| 764 | * set BUSY | ||
| 765 | * set PENDING | ||
| 766 | * drop l_lock | ||
| 767 | * ocfs2_dlm_lock() | ||
| 768 | * ocfs2_locking_ast() ocfs2_downconvert_thread() | ||
| 769 | * clear PENDING ocfs2_unblock_lock() | ||
| 770 | * take_l_lock | ||
| 771 | * !BUSY | ||
| 772 | * ocfs2_prepare_downconvert() | ||
| 773 | * set BUSY | ||
| 774 | * set PENDING | ||
| 775 | * drop l_lock | ||
| 776 | * take l_lock | ||
| 777 | * clear PENDING | ||
| 778 | * drop l_lock | ||
| 779 | * <window> | ||
| 780 | * ocfs2_dlm_lock() | ||
| 781 | * | ||
| 782 | * So as you can see, we now have a window where l_lock is not held, | ||
| 783 | * PENDING is not set, and ocfs2_dlm_lock() has not been called. | ||
| 784 | * | ||
| 785 | * The core problem is that ocfs2_cluster_lock() has cleared the PENDING | ||
| 786 | * set by ocfs2_prepare_downconvert(). That wasn't nice. | ||
| 787 | * | ||
| 788 | * To solve this we introduce l_pending_gen. A call to | ||
| 789 | * lockres_clear_pending() will only do so when it is passed a generation | ||
| 790 | * number that matches the lockres. lockres_set_pending() will return the | ||
| 791 | * current generation number. When ocfs2_cluster_lock() goes to clear | ||
| 792 | * PENDING, it passes the generation it got from set_pending(). In our | ||
| 793 | * example above, the generation numbers will *not* match. Thus, | ||
| 794 | * ocfs2_cluster_lock() will not clear the PENDING set by | ||
| 795 | * ocfs2_prepare_downconvert(). | ||
| 796 | */ | ||
| 797 | |||
| 798 | /* Unlocked version for ocfs2_locking_ast() */ | ||
| 799 | static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, | ||
| 800 | unsigned int generation, | ||
| 801 | struct ocfs2_super *osb) | ||
| 802 | { | ||
| 803 | assert_spin_locked(&lockres->l_lock); | ||
| 804 | |||
| 805 | /* | ||
| 806 | * The ast and locking functions can race us here. The winner | ||
| 807 | * will clear pending, the loser will not. | ||
| 808 | */ | ||
| 809 | if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || | ||
| 810 | (lockres->l_pending_gen != generation)) | ||
| 811 | return; | ||
| 812 | |||
| 813 | lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); | ||
| 814 | lockres->l_pending_gen++; | ||
| 815 | |||
| 816 | /* | ||
| 817 | * The downconvert thread may have skipped us because we | ||
| 818 | * were PENDING. Wake it up. | ||
| 819 | */ | ||
| 820 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | ||
| 821 | ocfs2_wake_downconvert_thread(osb); | ||
| 822 | } | ||
| 823 | |||
| 824 | /* Locked version for callers of ocfs2_dlm_lock() */ | ||
| 825 | static void lockres_clear_pending(struct ocfs2_lock_res *lockres, | ||
| 826 | unsigned int generation, | ||
| 827 | struct ocfs2_super *osb) | ||
| 828 | { | ||
| 829 | unsigned long flags; | ||
| 830 | |||
| 831 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 832 | __lockres_clear_pending(lockres, generation, osb); | ||
| 833 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 834 | } | ||
| 835 | |||
| 836 | static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) | ||
| 837 | { | ||
| 838 | assert_spin_locked(&lockres->l_lock); | ||
| 839 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | ||
| 840 | |||
| 841 | lockres_or_flags(lockres, OCFS2_LOCK_PENDING); | ||
| 842 | |||
| 843 | return lockres->l_pending_gen; | ||
| 844 | } | ||
| 845 | |||
| 846 | |||
| 770 | static void ocfs2_blocking_ast(void *opaque, int level) | 847 | static void ocfs2_blocking_ast(void *opaque, int level) |
| 771 | { | 848 | { |
| 772 | struct ocfs2_lock_res *lockres = opaque; | 849 | struct ocfs2_lock_res *lockres = opaque; |
| @@ -774,7 +851,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
| 774 | int needs_downconvert; | 851 | int needs_downconvert; |
| 775 | unsigned long flags; | 852 | unsigned long flags; |
| 776 | 853 | ||
| 777 | BUG_ON(level <= LKM_NLMODE); | 854 | BUG_ON(level <= DLM_LOCK_NL); |
| 778 | 855 | ||
| 779 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | 856 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", |
| 780 | lockres->l_name, level, lockres->l_level, | 857 | lockres->l_name, level, lockres->l_level, |
| @@ -801,14 +878,22 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
| 801 | static void ocfs2_locking_ast(void *opaque) | 878 | static void ocfs2_locking_ast(void *opaque) |
| 802 | { | 879 | { |
| 803 | struct ocfs2_lock_res *lockres = opaque; | 880 | struct ocfs2_lock_res *lockres = opaque; |
| 804 | struct dlm_lockstatus *lksb = &lockres->l_lksb; | 881 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
| 805 | unsigned long flags; | 882 | unsigned long flags; |
| 883 | int status; | ||
| 806 | 884 | ||
| 807 | spin_lock_irqsave(&lockres->l_lock, flags); | 885 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 808 | 886 | ||
| 809 | if (lksb->status != DLM_NORMAL) { | 887 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); |
| 810 | mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n", | 888 | |
| 811 | lockres->l_name, lksb->status); | 889 | if (status == -EAGAIN) { |
| 890 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | ||
| 891 | goto out; | ||
| 892 | } | ||
| 893 | |||
| 894 | if (status) { | ||
| 895 | mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", | ||
| 896 | lockres->l_name, status); | ||
| 812 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 897 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 813 | return; | 898 | return; |
| 814 | } | 899 | } |
| @@ -831,11 +916,23 @@ static void ocfs2_locking_ast(void *opaque) | |||
| 831 | lockres->l_unlock_action); | 916 | lockres->l_unlock_action); |
| 832 | BUG(); | 917 | BUG(); |
| 833 | } | 918 | } |
| 834 | 919 | out: | |
| 835 | /* set it to something invalid so if we get called again we | 920 | /* set it to something invalid so if we get called again we |
| 836 | * can catch it. */ | 921 | * can catch it. */ |
| 837 | lockres->l_action = OCFS2_AST_INVALID; | 922 | lockres->l_action = OCFS2_AST_INVALID; |
| 838 | 923 | ||
| 924 | /* Did we try to cancel this lock? Clear that state */ | ||
| 925 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) | ||
| 926 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | ||
| 927 | |||
| 928 | /* | ||
| 929 | * We may have beaten the locking functions here. We certainly | ||
| 930 | * know that dlm_lock() has been called :-) | ||
| 931 | * Because we can't have two lock calls in flight at once, we | ||
| 932 | * can use lockres->l_pending_gen. | ||
| 933 | */ | ||
| 934 | __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); | ||
| 935 | |||
| 839 | wake_up(&lockres->l_event); | 936 | wake_up(&lockres->l_event); |
| 840 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 937 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 841 | } | 938 | } |
| @@ -865,15 +962,15 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
| 865 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 962 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
| 866 | struct ocfs2_lock_res *lockres, | 963 | struct ocfs2_lock_res *lockres, |
| 867 | int level, | 964 | int level, |
| 868 | int dlm_flags) | 965 | u32 dlm_flags) |
| 869 | { | 966 | { |
| 870 | int ret = 0; | 967 | int ret = 0; |
| 871 | enum dlm_status status = DLM_NORMAL; | ||
| 872 | unsigned long flags; | 968 | unsigned long flags; |
| 969 | unsigned int gen; | ||
| 873 | 970 | ||
| 874 | mlog_entry_void(); | 971 | mlog_entry_void(); |
| 875 | 972 | ||
| 876 | mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level, | 973 | mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, |
| 877 | dlm_flags); | 974 | dlm_flags); |
| 878 | 975 | ||
| 879 | spin_lock_irqsave(&lockres->l_lock, flags); | 976 | spin_lock_irqsave(&lockres->l_lock, flags); |
| @@ -886,24 +983,23 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, | |||
| 886 | lockres->l_action = OCFS2_AST_ATTACH; | 983 | lockres->l_action = OCFS2_AST_ATTACH; |
| 887 | lockres->l_requested = level; | 984 | lockres->l_requested = level; |
| 888 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 985 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 986 | gen = lockres_set_pending(lockres); | ||
| 889 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 987 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 890 | 988 | ||
| 891 | status = dlmlock(osb->dlm, | 989 | ret = ocfs2_dlm_lock(osb->cconn, |
| 892 | level, | 990 | level, |
| 893 | &lockres->l_lksb, | 991 | &lockres->l_lksb, |
| 894 | dlm_flags, | 992 | dlm_flags, |
| 895 | lockres->l_name, | 993 | lockres->l_name, |
| 896 | OCFS2_LOCK_ID_MAX_LEN - 1, | 994 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 897 | ocfs2_locking_ast, | 995 | lockres); |
| 898 | lockres, | 996 | lockres_clear_pending(lockres, gen, osb); |
| 899 | ocfs2_blocking_ast); | 997 | if (ret) { |
| 900 | if (status != DLM_NORMAL) { | 998 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
| 901 | ocfs2_log_dlm_error("dlmlock", status, lockres); | ||
| 902 | ret = -EINVAL; | ||
| 903 | ocfs2_recover_from_dlm_error(lockres, 1); | 999 | ocfs2_recover_from_dlm_error(lockres, 1); |
| 904 | } | 1000 | } |
| 905 | 1001 | ||
| 906 | mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name); | 1002 | mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); |
| 907 | 1003 | ||
| 908 | bail: | 1004 | bail: |
| 909 | mlog_exit(ret); | 1005 | mlog_exit(ret); |
| @@ -1016,21 +1112,22 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | |||
| 1016 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 1112 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
| 1017 | struct ocfs2_lock_res *lockres, | 1113 | struct ocfs2_lock_res *lockres, |
| 1018 | int level, | 1114 | int level, |
| 1019 | int lkm_flags, | 1115 | u32 lkm_flags, |
| 1020 | int arg_flags) | 1116 | int arg_flags) |
| 1021 | { | 1117 | { |
| 1022 | struct ocfs2_mask_waiter mw; | 1118 | struct ocfs2_mask_waiter mw; |
| 1023 | enum dlm_status status; | ||
| 1024 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); | 1119 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); |
| 1025 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ | 1120 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ |
| 1026 | unsigned long flags; | 1121 | unsigned long flags; |
| 1122 | unsigned int gen; | ||
| 1123 | int noqueue_attempted = 0; | ||
| 1027 | 1124 | ||
| 1028 | mlog_entry_void(); | 1125 | mlog_entry_void(); |
| 1029 | 1126 | ||
| 1030 | ocfs2_init_mask_waiter(&mw); | 1127 | ocfs2_init_mask_waiter(&mw); |
| 1031 | 1128 | ||
| 1032 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 1129 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
| 1033 | lkm_flags |= LKM_VALBLK; | 1130 | lkm_flags |= DLM_LKF_VALBLK; |
| 1034 | 1131 | ||
| 1035 | again: | 1132 | again: |
| 1036 | wait = 0; | 1133 | wait = 0; |
| @@ -1068,52 +1165,56 @@ again: | |||
| 1068 | } | 1165 | } |
| 1069 | 1166 | ||
| 1070 | if (level > lockres->l_level) { | 1167 | if (level > lockres->l_level) { |
| 1168 | if (noqueue_attempted > 0) { | ||
| 1169 | ret = -EAGAIN; | ||
| 1170 | goto unlock; | ||
| 1171 | } | ||
| 1172 | if (lkm_flags & DLM_LKF_NOQUEUE) | ||
| 1173 | noqueue_attempted = 1; | ||
| 1174 | |||
| 1071 | if (lockres->l_action != OCFS2_AST_INVALID) | 1175 | if (lockres->l_action != OCFS2_AST_INVALID) |
| 1072 | mlog(ML_ERROR, "lockres %s has action %u pending\n", | 1176 | mlog(ML_ERROR, "lockres %s has action %u pending\n", |
| 1073 | lockres->l_name, lockres->l_action); | 1177 | lockres->l_name, lockres->l_action); |
| 1074 | 1178 | ||
| 1075 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 1179 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { |
| 1076 | lockres->l_action = OCFS2_AST_ATTACH; | 1180 | lockres->l_action = OCFS2_AST_ATTACH; |
| 1077 | lkm_flags &= ~LKM_CONVERT; | 1181 | lkm_flags &= ~DLM_LKF_CONVERT; |
| 1078 | } else { | 1182 | } else { |
| 1079 | lockres->l_action = OCFS2_AST_CONVERT; | 1183 | lockres->l_action = OCFS2_AST_CONVERT; |
| 1080 | lkm_flags |= LKM_CONVERT; | 1184 | lkm_flags |= DLM_LKF_CONVERT; |
| 1081 | } | 1185 | } |
| 1082 | 1186 | ||
| 1083 | lockres->l_requested = level; | 1187 | lockres->l_requested = level; |
| 1084 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1188 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 1189 | gen = lockres_set_pending(lockres); | ||
| 1085 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1190 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1086 | 1191 | ||
| 1087 | BUG_ON(level == LKM_IVMODE); | 1192 | BUG_ON(level == DLM_LOCK_IV); |
| 1088 | BUG_ON(level == LKM_NLMODE); | 1193 | BUG_ON(level == DLM_LOCK_NL); |
| 1089 | 1194 | ||
| 1090 | mlog(0, "lock %s, convert from %d to level = %d\n", | 1195 | mlog(0, "lock %s, convert from %d to level = %d\n", |
| 1091 | lockres->l_name, lockres->l_level, level); | 1196 | lockres->l_name, lockres->l_level, level); |
| 1092 | 1197 | ||
| 1093 | /* call dlm_lock to upgrade lock now */ | 1198 | /* call dlm_lock to upgrade lock now */ |
| 1094 | status = dlmlock(osb->dlm, | 1199 | ret = ocfs2_dlm_lock(osb->cconn, |
| 1095 | level, | 1200 | level, |
| 1096 | &lockres->l_lksb, | 1201 | &lockres->l_lksb, |
| 1097 | lkm_flags, | 1202 | lkm_flags, |
| 1098 | lockres->l_name, | 1203 | lockres->l_name, |
| 1099 | OCFS2_LOCK_ID_MAX_LEN - 1, | 1204 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 1100 | ocfs2_locking_ast, | 1205 | lockres); |
| 1101 | lockres, | 1206 | lockres_clear_pending(lockres, gen, osb); |
| 1102 | ocfs2_blocking_ast); | 1207 | if (ret) { |
| 1103 | if (status != DLM_NORMAL) { | 1208 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || |
| 1104 | if ((lkm_flags & LKM_NOQUEUE) && | 1209 | (ret != -EAGAIN)) { |
| 1105 | (status == DLM_NOTQUEUED)) | 1210 | ocfs2_log_dlm_error("ocfs2_dlm_lock", |
| 1106 | ret = -EAGAIN; | 1211 | ret, lockres); |
| 1107 | else { | ||
| 1108 | ocfs2_log_dlm_error("dlmlock", status, | ||
| 1109 | lockres); | ||
| 1110 | ret = -EINVAL; | ||
| 1111 | } | 1212 | } |
| 1112 | ocfs2_recover_from_dlm_error(lockres, 1); | 1213 | ocfs2_recover_from_dlm_error(lockres, 1); |
| 1113 | goto out; | 1214 | goto out; |
| 1114 | } | 1215 | } |
| 1115 | 1216 | ||
| 1116 | mlog(0, "lock %s, successfull return from dlmlock\n", | 1217 | mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", |
| 1117 | lockres->l_name); | 1218 | lockres->l_name); |
| 1118 | 1219 | ||
| 1119 | /* At this point we've gone inside the dlm and need to | 1220 | /* At this point we've gone inside the dlm and need to |
| @@ -1177,9 +1278,9 @@ static int ocfs2_create_new_lock(struct ocfs2_super *osb, | |||
| 1177 | int ex, | 1278 | int ex, |
| 1178 | int local) | 1279 | int local) |
| 1179 | { | 1280 | { |
| 1180 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 1281 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1181 | unsigned long flags; | 1282 | unsigned long flags; |
| 1182 | int lkm_flags = local ? LKM_LOCAL : 0; | 1283 | u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; |
| 1183 | 1284 | ||
| 1184 | spin_lock_irqsave(&lockres->l_lock, flags); | 1285 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 1185 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 1286 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
| @@ -1222,7 +1323,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
| 1222 | } | 1323 | } |
| 1223 | 1324 | ||
| 1224 | /* | 1325 | /* |
| 1225 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1326 | * We don't want to use DLM_LKF_LOCAL on a meta data lock as they |
| 1226 | * don't use a generation in their lock names. | 1327 | * don't use a generation in their lock names. |
| 1227 | */ | 1328 | */ |
| 1228 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); | 1329 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
| @@ -1261,7 +1362,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
| 1261 | 1362 | ||
| 1262 | lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1363 | lockres = &OCFS2_I(inode)->ip_rw_lockres; |
| 1263 | 1364 | ||
| 1264 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1365 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1265 | 1366 | ||
| 1266 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 1367 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, |
| 1267 | 0); | 1368 | 0); |
| @@ -1274,7 +1375,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
| 1274 | 1375 | ||
| 1275 | void ocfs2_rw_unlock(struct inode *inode, int write) | 1376 | void ocfs2_rw_unlock(struct inode *inode, int write) |
| 1276 | { | 1377 | { |
| 1277 | int level = write ? LKM_EXMODE : LKM_PRMODE; | 1378 | int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1278 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1379 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; |
| 1279 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1380 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 1280 | 1381 | ||
| @@ -1312,7 +1413,7 @@ int ocfs2_open_lock(struct inode *inode) | |||
| 1312 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1413 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
| 1313 | 1414 | ||
| 1314 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1415 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
| 1315 | LKM_PRMODE, 0, 0); | 1416 | DLM_LOCK_PR, 0, 0); |
| 1316 | if (status < 0) | 1417 | if (status < 0) |
| 1317 | mlog_errno(status); | 1418 | mlog_errno(status); |
| 1318 | 1419 | ||
| @@ -1340,16 +1441,16 @@ int ocfs2_try_open_lock(struct inode *inode, int write) | |||
| 1340 | 1441 | ||
| 1341 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1442 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
| 1342 | 1443 | ||
| 1343 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1444 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1344 | 1445 | ||
| 1345 | /* | 1446 | /* |
| 1346 | * The file system may already holding a PRMODE/EXMODE open lock. | 1447 | * The file system may already holding a PRMODE/EXMODE open lock. |
| 1347 | * Since we pass LKM_NOQUEUE, the request won't block waiting on | 1448 | * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on |
| 1348 | * other nodes and the -EAGAIN will indicate to the caller that | 1449 | * other nodes and the -EAGAIN will indicate to the caller that |
| 1349 | * this inode is still in use. | 1450 | * this inode is still in use. |
| 1350 | */ | 1451 | */ |
| 1351 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1452 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
| 1352 | level, LKM_NOQUEUE, 0); | 1453 | level, DLM_LKF_NOQUEUE, 0); |
| 1353 | 1454 | ||
| 1354 | out: | 1455 | out: |
| 1355 | mlog_exit(status); | 1456 | mlog_exit(status); |
| @@ -1374,10 +1475,10 @@ void ocfs2_open_unlock(struct inode *inode) | |||
| 1374 | 1475 | ||
| 1375 | if(lockres->l_ro_holders) | 1476 | if(lockres->l_ro_holders) |
| 1376 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1477 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
| 1377 | LKM_PRMODE); | 1478 | DLM_LOCK_PR); |
| 1378 | if(lockres->l_ex_holders) | 1479 | if(lockres->l_ex_holders) |
| 1379 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1480 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
| 1380 | LKM_EXMODE); | 1481 | DLM_LOCK_EX); |
| 1381 | 1482 | ||
| 1382 | out: | 1483 | out: |
| 1383 | mlog_exit_void(); | 1484 | mlog_exit_void(); |
| @@ -1464,7 +1565,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1464 | ocfs2_init_mask_waiter(&mw); | 1565 | ocfs2_init_mask_waiter(&mw); |
| 1465 | 1566 | ||
| 1466 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | 1567 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || |
| 1467 | (lockres->l_level > LKM_NLMODE)) { | 1568 | (lockres->l_level > DLM_LOCK_NL)) { |
| 1468 | mlog(ML_ERROR, | 1569 | mlog(ML_ERROR, |
| 1469 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | 1570 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " |
| 1470 | "level: %u\n", lockres->l_name, lockres->l_flags, | 1571 | "level: %u\n", lockres->l_name, lockres->l_flags, |
| @@ -1503,14 +1604,12 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1503 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1604 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
| 1504 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1605 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1505 | 1606 | ||
| 1506 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, | 1607 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, |
| 1507 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | 1608 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, |
| 1508 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | 1609 | lockres); |
| 1509 | if (ret != DLM_NORMAL) { | 1610 | if (ret) { |
| 1510 | if (trylock && ret == DLM_NOTQUEUED) | 1611 | if (!trylock || (ret != -EAGAIN)) { |
| 1511 | ret = -EAGAIN; | 1612 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
| 1512 | else { | ||
| 1513 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
| 1514 | ret = -EINVAL; | 1613 | ret = -EINVAL; |
| 1515 | } | 1614 | } |
| 1516 | 1615 | ||
| @@ -1537,6 +1636,10 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1537 | * to just bubble sucess back up to the user. | 1636 | * to just bubble sucess back up to the user. |
| 1538 | */ | 1637 | */ |
| 1539 | ret = ocfs2_flock_handle_signal(lockres, level); | 1638 | ret = ocfs2_flock_handle_signal(lockres, level); |
| 1639 | } else if (!ret && (level > lockres->l_level)) { | ||
| 1640 | /* Trylock failed asynchronously */ | ||
| 1641 | BUG_ON(!trylock); | ||
| 1642 | ret = -EAGAIN; | ||
| 1540 | } | 1643 | } |
| 1541 | 1644 | ||
| 1542 | out: | 1645 | out: |
| @@ -1549,6 +1652,7 @@ out: | |||
| 1549 | void ocfs2_file_unlock(struct file *file) | 1652 | void ocfs2_file_unlock(struct file *file) |
| 1550 | { | 1653 | { |
| 1551 | int ret; | 1654 | int ret; |
| 1655 | unsigned int gen; | ||
| 1552 | unsigned long flags; | 1656 | unsigned long flags; |
| 1553 | struct ocfs2_file_private *fp = file->private_data; | 1657 | struct ocfs2_file_private *fp = file->private_data; |
| 1554 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1658 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
| @@ -1572,13 +1676,13 @@ void ocfs2_file_unlock(struct file *file) | |||
| 1572 | * Fake a blocking ast for the downconvert code. | 1676 | * Fake a blocking ast for the downconvert code. |
| 1573 | */ | 1677 | */ |
| 1574 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
| 1575 | lockres->l_blocking = LKM_EXMODE; | 1679 | lockres->l_blocking = DLM_LOCK_EX; |
| 1576 | 1680 | ||
| 1577 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | 1681 | gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); |
| 1578 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
| 1579 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1580 | 1684 | ||
| 1581 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | 1685 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); |
| 1582 | if (ret) { | 1686 | if (ret) { |
| 1583 | mlog_errno(ret); | 1687 | mlog_errno(ret); |
| 1584 | return; | 1688 | return; |
| @@ -1601,11 +1705,11 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | |||
| 1601 | * condition. */ | 1705 | * condition. */ |
| 1602 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1706 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
| 1603 | switch(lockres->l_blocking) { | 1707 | switch(lockres->l_blocking) { |
| 1604 | case LKM_EXMODE: | 1708 | case DLM_LOCK_EX: |
| 1605 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 1709 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) |
| 1606 | kick = 1; | 1710 | kick = 1; |
| 1607 | break; | 1711 | break; |
| 1608 | case LKM_PRMODE: | 1712 | case DLM_LOCK_PR: |
| 1609 | if (!lockres->l_ex_holders) | 1713 | if (!lockres->l_ex_holders) |
| 1610 | kick = 1; | 1714 | kick = 1; |
| 1611 | break; | 1715 | break; |
| @@ -1648,7 +1752,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
| 1648 | 1752 | ||
| 1649 | mlog_entry_void(); | 1753 | mlog_entry_void(); |
| 1650 | 1754 | ||
| 1651 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1755 | lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); |
| 1652 | 1756 | ||
| 1653 | /* | 1757 | /* |
| 1654 | * Invalidate the LVB of a deleted inode - this way other | 1758 | * Invalidate the LVB of a deleted inode - this way other |
| @@ -1700,7 +1804,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
| 1700 | 1804 | ||
| 1701 | mlog_meta_lvb(0, lockres); | 1805 | mlog_meta_lvb(0, lockres); |
| 1702 | 1806 | ||
| 1703 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1807 | lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); |
| 1704 | 1808 | ||
| 1705 | /* We're safe here without the lockres lock... */ | 1809 | /* We're safe here without the lockres lock... */ |
| 1706 | spin_lock(&oi->ip_lock); | 1810 | spin_lock(&oi->ip_lock); |
| @@ -1735,7 +1839,8 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
| 1735 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, | 1839 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, |
| 1736 | struct ocfs2_lock_res *lockres) | 1840 | struct ocfs2_lock_res *lockres) |
| 1737 | { | 1841 | { |
| 1738 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1842 | struct ocfs2_meta_lvb *lvb = |
| 1843 | (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); | ||
| 1739 | 1844 | ||
| 1740 | if (lvb->lvb_version == OCFS2_LVB_VERSION | 1845 | if (lvb->lvb_version == OCFS2_LVB_VERSION |
| 1741 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | 1846 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) |
| @@ -1923,7 +2028,8 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1923 | int ex, | 2028 | int ex, |
| 1924 | int arg_flags) | 2029 | int arg_flags) |
| 1925 | { | 2030 | { |
| 1926 | int status, level, dlm_flags, acquired; | 2031 | int status, level, acquired; |
| 2032 | u32 dlm_flags; | ||
| 1927 | struct ocfs2_lock_res *lockres = NULL; | 2033 | struct ocfs2_lock_res *lockres = NULL; |
| 1928 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2034 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 1929 | struct buffer_head *local_bh = NULL; | 2035 | struct buffer_head *local_bh = NULL; |
| @@ -1950,14 +2056,13 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1950 | goto local; | 2056 | goto local; |
| 1951 | 2057 | ||
| 1952 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2058 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
| 1953 | wait_event(osb->recovery_event, | 2059 | ocfs2_wait_for_recovery(osb); |
| 1954 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
| 1955 | 2060 | ||
| 1956 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2061 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
| 1957 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 2062 | level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1958 | dlm_flags = 0; | 2063 | dlm_flags = 0; |
| 1959 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 2064 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
| 1960 | dlm_flags |= LKM_NOQUEUE; | 2065 | dlm_flags |= DLM_LKF_NOQUEUE; |
| 1961 | 2066 | ||
| 1962 | status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); | 2067 | status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); |
| 1963 | if (status < 0) { | 2068 | if (status < 0) { |
| @@ -1974,8 +2079,7 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1974 | * committed to owning this lock so we don't allow signals to | 2079 | * committed to owning this lock so we don't allow signals to |
| 1975 | * abort the operation. */ | 2080 | * abort the operation. */ |
| 1976 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2081 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
| 1977 | wait_event(osb->recovery_event, | 2082 | ocfs2_wait_for_recovery(osb); |
| 1978 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
| 1979 | 2083 | ||
| 1980 | local: | 2084 | local: |
| 1981 | /* | 2085 | /* |
| @@ -2109,7 +2213,7 @@ int ocfs2_inode_lock_atime(struct inode *inode, | |||
| 2109 | void ocfs2_inode_unlock(struct inode *inode, | 2213 | void ocfs2_inode_unlock(struct inode *inode, |
| 2110 | int ex) | 2214 | int ex) |
| 2111 | { | 2215 | { |
| 2112 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2216 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2113 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2217 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
| 2114 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2218 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 2115 | 2219 | ||
| @@ -2130,10 +2234,8 @@ int ocfs2_super_lock(struct ocfs2_super *osb, | |||
| 2130 | int ex) | 2234 | int ex) |
| 2131 | { | 2235 | { |
| 2132 | int status = 0; | 2236 | int status = 0; |
| 2133 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2237 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2134 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2238 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
| 2135 | struct buffer_head *bh; | ||
| 2136 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 2137 | 2239 | ||
| 2138 | mlog_entry_void(); | 2240 | mlog_entry_void(); |
| 2139 | 2241 | ||
| @@ -2159,11 +2261,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, | |||
| 2159 | goto bail; | 2261 | goto bail; |
| 2160 | } | 2262 | } |
| 2161 | if (status) { | 2263 | if (status) { |
| 2162 | bh = si->si_bh; | 2264 | status = ocfs2_refresh_slot_info(osb); |
| 2163 | status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, | ||
| 2164 | si->si_inode); | ||
| 2165 | if (status == 0) | ||
| 2166 | ocfs2_update_slot_info(si); | ||
| 2167 | 2265 | ||
| 2168 | ocfs2_complete_lock_res_refresh(lockres, status); | 2266 | ocfs2_complete_lock_res_refresh(lockres, status); |
| 2169 | 2267 | ||
| @@ -2178,7 +2276,7 @@ bail: | |||
| 2178 | void ocfs2_super_unlock(struct ocfs2_super *osb, | 2276 | void ocfs2_super_unlock(struct ocfs2_super *osb, |
| 2179 | int ex) | 2277 | int ex) |
| 2180 | { | 2278 | { |
| 2181 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2279 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2182 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2280 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
| 2183 | 2281 | ||
| 2184 | if (!ocfs2_mount_local(osb)) | 2282 | if (!ocfs2_mount_local(osb)) |
| @@ -2196,7 +2294,7 @@ int ocfs2_rename_lock(struct ocfs2_super *osb) | |||
| 2196 | if (ocfs2_mount_local(osb)) | 2294 | if (ocfs2_mount_local(osb)) |
| 2197 | return 0; | 2295 | return 0; |
| 2198 | 2296 | ||
| 2199 | status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); | 2297 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); |
| 2200 | if (status < 0) | 2298 | if (status < 0) |
| 2201 | mlog_errno(status); | 2299 | mlog_errno(status); |
| 2202 | 2300 | ||
| @@ -2208,13 +2306,13 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) | |||
| 2208 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 2306 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; |
| 2209 | 2307 | ||
| 2210 | if (!ocfs2_mount_local(osb)) | 2308 | if (!ocfs2_mount_local(osb)) |
| 2211 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); | 2309 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); |
| 2212 | } | 2310 | } |
| 2213 | 2311 | ||
| 2214 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 2312 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) |
| 2215 | { | 2313 | { |
| 2216 | int ret; | 2314 | int ret; |
| 2217 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2315 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2218 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2316 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
| 2219 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2317 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
| 2220 | 2318 | ||
| @@ -2235,7 +2333,7 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex) | |||
| 2235 | 2333 | ||
| 2236 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | 2334 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) |
| 2237 | { | 2335 | { |
| 2238 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2336 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2239 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2337 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
| 2240 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2338 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
| 2241 | 2339 | ||
| @@ -2400,7 +2498,7 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | |||
| 2400 | lockres->l_blocking); | 2498 | lockres->l_blocking); |
| 2401 | 2499 | ||
| 2402 | /* Dump the raw LVB */ | 2500 | /* Dump the raw LVB */ |
| 2403 | lvb = lockres->l_lksb.lvb; | 2501 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
| 2404 | for(i = 0; i < DLM_LVB_LEN; i++) | 2502 | for(i = 0; i < DLM_LVB_LEN; i++) |
| 2405 | seq_printf(m, "0x%x\t", lvb[i]); | 2503 | seq_printf(m, "0x%x\t", lvb[i]); |
| 2406 | 2504 | ||
| @@ -2504,13 +2602,14 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) | |||
| 2504 | int ocfs2_dlm_init(struct ocfs2_super *osb) | 2602 | int ocfs2_dlm_init(struct ocfs2_super *osb) |
| 2505 | { | 2603 | { |
| 2506 | int status = 0; | 2604 | int status = 0; |
| 2507 | u32 dlm_key; | 2605 | struct ocfs2_cluster_connection *conn = NULL; |
| 2508 | struct dlm_ctxt *dlm = NULL; | ||
| 2509 | 2606 | ||
| 2510 | mlog_entry_void(); | 2607 | mlog_entry_void(); |
| 2511 | 2608 | ||
| 2512 | if (ocfs2_mount_local(osb)) | 2609 | if (ocfs2_mount_local(osb)) { |
| 2610 | osb->node_num = 0; | ||
| 2513 | goto local; | 2611 | goto local; |
| 2612 | } | ||
| 2514 | 2613 | ||
| 2515 | status = ocfs2_dlm_init_debug(osb); | 2614 | status = ocfs2_dlm_init_debug(osb); |
| 2516 | if (status < 0) { | 2615 | if (status < 0) { |
| @@ -2527,26 +2626,31 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
| 2527 | goto bail; | 2626 | goto bail; |
| 2528 | } | 2627 | } |
| 2529 | 2628 | ||
| 2530 | /* used by the dlm code to make message headers unique, each | ||
| 2531 | * node in this domain must agree on this. */ | ||
| 2532 | dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str)); | ||
| 2533 | |||
| 2534 | /* for now, uuid == domain */ | 2629 | /* for now, uuid == domain */ |
| 2535 | dlm = dlm_register_domain(osb->uuid_str, dlm_key, | 2630 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, |
| 2536 | &osb->osb_locking_proto); | 2631 | osb->uuid_str, |
| 2537 | if (IS_ERR(dlm)) { | 2632 | strlen(osb->uuid_str), |
| 2538 | status = PTR_ERR(dlm); | 2633 | ocfs2_do_node_down, osb, |
| 2634 | &conn); | ||
| 2635 | if (status) { | ||
| 2539 | mlog_errno(status); | 2636 | mlog_errno(status); |
| 2540 | goto bail; | 2637 | goto bail; |
| 2541 | } | 2638 | } |
| 2542 | 2639 | ||
| 2543 | dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); | 2640 | status = ocfs2_cluster_this_node(&osb->node_num); |
| 2641 | if (status < 0) { | ||
| 2642 | mlog_errno(status); | ||
| 2643 | mlog(ML_ERROR, | ||
| 2644 | "could not find this host's node number\n"); | ||
| 2645 | ocfs2_cluster_disconnect(conn, 0); | ||
| 2646 | goto bail; | ||
| 2647 | } | ||
| 2544 | 2648 | ||
| 2545 | local: | 2649 | local: |
| 2546 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 2650 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); |
| 2547 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 2651 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); |
| 2548 | 2652 | ||
| 2549 | osb->dlm = dlm; | 2653 | osb->cconn = conn; |
| 2550 | 2654 | ||
| 2551 | status = 0; | 2655 | status = 0; |
| 2552 | bail: | 2656 | bail: |
| @@ -2560,14 +2664,19 @@ bail: | |||
| 2560 | return status; | 2664 | return status; |
| 2561 | } | 2665 | } |
| 2562 | 2666 | ||
| 2563 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | 2667 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, |
| 2668 | int hangup_pending) | ||
| 2564 | { | 2669 | { |
| 2565 | mlog_entry_void(); | 2670 | mlog_entry_void(); |
| 2566 | 2671 | ||
| 2567 | dlm_unregister_eviction_cb(&osb->osb_eviction_cb); | ||
| 2568 | |||
| 2569 | ocfs2_drop_osb_locks(osb); | 2672 | ocfs2_drop_osb_locks(osb); |
| 2570 | 2673 | ||
| 2674 | /* | ||
| 2675 | * Now that we have dropped all locks and ocfs2_dismount_volume() | ||
| 2676 | * has disabled recovery, the DLM won't be talking to us. It's | ||
| 2677 | * safe to tear things down before disconnecting the cluster. | ||
| 2678 | */ | ||
| 2679 | |||
| 2571 | if (osb->dc_task) { | 2680 | if (osb->dc_task) { |
| 2572 | kthread_stop(osb->dc_task); | 2681 | kthread_stop(osb->dc_task); |
| 2573 | osb->dc_task = NULL; | 2682 | osb->dc_task = NULL; |
| @@ -2576,15 +2685,15 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
| 2576 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2685 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
| 2577 | ocfs2_lock_res_free(&osb->osb_rename_lockres); | 2686 | ocfs2_lock_res_free(&osb->osb_rename_lockres); |
| 2578 | 2687 | ||
| 2579 | dlm_unregister_domain(osb->dlm); | 2688 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); |
| 2580 | osb->dlm = NULL; | 2689 | osb->cconn = NULL; |
| 2581 | 2690 | ||
| 2582 | ocfs2_dlm_shutdown_debug(osb); | 2691 | ocfs2_dlm_shutdown_debug(osb); |
| 2583 | 2692 | ||
| 2584 | mlog_exit_void(); | 2693 | mlog_exit_void(); |
| 2585 | } | 2694 | } |
| 2586 | 2695 | ||
| 2587 | static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | 2696 | static void ocfs2_unlock_ast(void *opaque, int error) |
| 2588 | { | 2697 | { |
| 2589 | struct ocfs2_lock_res *lockres = opaque; | 2698 | struct ocfs2_lock_res *lockres = opaque; |
| 2590 | unsigned long flags; | 2699 | unsigned long flags; |
| @@ -2595,24 +2704,9 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | |||
| 2595 | lockres->l_unlock_action); | 2704 | lockres->l_unlock_action); |
| 2596 | 2705 | ||
| 2597 | spin_lock_irqsave(&lockres->l_lock, flags); | 2706 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2598 | /* We tried to cancel a convert request, but it was already | 2707 | if (error) { |
| 2599 | * granted. All we want to do here is clear our unlock | 2708 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " |
| 2600 | * state. The wake_up call done at the bottom is redundant | 2709 | "unlock_action %d\n", error, lockres->l_name, |
| 2601 | * (ocfs2_prepare_cancel_convert doesn't sleep on this) but doesn't | ||
| 2602 | * hurt anything anyway */ | ||
| 2603 | if (status == DLM_CANCELGRANT && | ||
| 2604 | lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | ||
| 2605 | mlog(0, "Got cancelgrant for %s\n", lockres->l_name); | ||
| 2606 | |||
| 2607 | /* We don't clear the busy flag in this case as it | ||
| 2608 | * should have been cleared by the ast which the dlm | ||
| 2609 | * has called. */ | ||
| 2610 | goto complete_unlock; | ||
| 2611 | } | ||
| 2612 | |||
| 2613 | if (status != DLM_NORMAL) { | ||
| 2614 | mlog(ML_ERROR, "Dlm passes status %d for lock %s, " | ||
| 2615 | "unlock_action %d\n", status, lockres->l_name, | ||
| 2616 | lockres->l_unlock_action); | 2710 | lockres->l_unlock_action); |
| 2617 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2711 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2618 | return; | 2712 | return; |
| @@ -2624,14 +2718,13 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | |||
| 2624 | lockres->l_action = OCFS2_AST_INVALID; | 2718 | lockres->l_action = OCFS2_AST_INVALID; |
| 2625 | break; | 2719 | break; |
| 2626 | case OCFS2_UNLOCK_DROP_LOCK: | 2720 | case OCFS2_UNLOCK_DROP_LOCK: |
| 2627 | lockres->l_level = LKM_IVMODE; | 2721 | lockres->l_level = DLM_LOCK_IV; |
| 2628 | break; | 2722 | break; |
| 2629 | default: | 2723 | default: |
| 2630 | BUG(); | 2724 | BUG(); |
| 2631 | } | 2725 | } |
| 2632 | 2726 | ||
| 2633 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 2727 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
| 2634 | complete_unlock: | ||
| 2635 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 2728 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; |
| 2636 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2729 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2637 | 2730 | ||
| @@ -2643,16 +2736,16 @@ complete_unlock: | |||
| 2643 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 2736 | static int ocfs2_drop_lock(struct ocfs2_super *osb, |
| 2644 | struct ocfs2_lock_res *lockres) | 2737 | struct ocfs2_lock_res *lockres) |
| 2645 | { | 2738 | { |
| 2646 | enum dlm_status status; | 2739 | int ret; |
| 2647 | unsigned long flags; | 2740 | unsigned long flags; |
| 2648 | int lkm_flags = 0; | 2741 | u32 lkm_flags = 0; |
| 2649 | 2742 | ||
| 2650 | /* We didn't get anywhere near actually using this lockres. */ | 2743 | /* We didn't get anywhere near actually using this lockres. */ |
| 2651 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 2744 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) |
| 2652 | goto out; | 2745 | goto out; |
| 2653 | 2746 | ||
| 2654 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 2747 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
| 2655 | lkm_flags |= LKM_VALBLK; | 2748 | lkm_flags |= DLM_LKF_VALBLK; |
| 2656 | 2749 | ||
| 2657 | spin_lock_irqsave(&lockres->l_lock, flags); | 2750 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2658 | 2751 | ||
| @@ -2678,7 +2771,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
| 2678 | 2771 | ||
| 2679 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 2772 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
| 2680 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | 2773 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && |
| 2681 | lockres->l_level == LKM_EXMODE && | 2774 | lockres->l_level == DLM_LOCK_EX && |
| 2682 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 2775 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) |
| 2683 | lockres->l_ops->set_lvb(lockres); | 2776 | lockres->l_ops->set_lvb(lockres); |
| 2684 | } | 2777 | } |
| @@ -2707,15 +2800,15 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
| 2707 | 2800 | ||
| 2708 | mlog(0, "lock %s\n", lockres->l_name); | 2801 | mlog(0, "lock %s\n", lockres->l_name); |
| 2709 | 2802 | ||
| 2710 | status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, | 2803 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, |
| 2711 | ocfs2_unlock_ast, lockres); | 2804 | lockres); |
| 2712 | if (status != DLM_NORMAL) { | 2805 | if (ret) { |
| 2713 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 2806 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
| 2714 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 2807 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); |
| 2715 | dlm_print_one_lock(lockres->l_lksb.lockid); | 2808 | ocfs2_dlm_dump_lksb(&lockres->l_lksb); |
| 2716 | BUG(); | 2809 | BUG(); |
| 2717 | } | 2810 | } |
| 2718 | mlog(0, "lock %s, successfull return from dlmunlock\n", | 2811 | mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", |
| 2719 | lockres->l_name); | 2812 | lockres->l_name); |
| 2720 | 2813 | ||
| 2721 | ocfs2_wait_on_busy_lock(lockres); | 2814 | ocfs2_wait_on_busy_lock(lockres); |
| @@ -2806,15 +2899,15 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
| 2806 | return status; | 2899 | return status; |
| 2807 | } | 2900 | } |
| 2808 | 2901 | ||
| 2809 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 2902 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
| 2810 | int new_level) | 2903 | int new_level) |
| 2811 | { | 2904 | { |
| 2812 | assert_spin_locked(&lockres->l_lock); | 2905 | assert_spin_locked(&lockres->l_lock); |
| 2813 | 2906 | ||
| 2814 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 2907 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
| 2815 | 2908 | ||
| 2816 | if (lockres->l_level <= new_level) { | 2909 | if (lockres->l_level <= new_level) { |
| 2817 | mlog(ML_ERROR, "lockres->l_level (%u) <= new_level (%u)\n", | 2910 | mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", |
| 2818 | lockres->l_level, new_level); | 2911 | lockres->l_level, new_level); |
| 2819 | BUG(); | 2912 | BUG(); |
| 2820 | } | 2913 | } |
| @@ -2825,33 +2918,33 @@ static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | |||
| 2825 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 2918 | lockres->l_action = OCFS2_AST_DOWNCONVERT; |
| 2826 | lockres->l_requested = new_level; | 2919 | lockres->l_requested = new_level; |
| 2827 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 2920 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 2921 | return lockres_set_pending(lockres); | ||
| 2828 | } | 2922 | } |
| 2829 | 2923 | ||
| 2830 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 2924 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
| 2831 | struct ocfs2_lock_res *lockres, | 2925 | struct ocfs2_lock_res *lockres, |
| 2832 | int new_level, | 2926 | int new_level, |
| 2833 | int lvb) | 2927 | int lvb, |
| 2928 | unsigned int generation) | ||
| 2834 | { | 2929 | { |
| 2835 | int ret, dlm_flags = LKM_CONVERT; | 2930 | int ret; |
| 2836 | enum dlm_status status; | 2931 | u32 dlm_flags = DLM_LKF_CONVERT; |
| 2837 | 2932 | ||
| 2838 | mlog_entry_void(); | 2933 | mlog_entry_void(); |
| 2839 | 2934 | ||
| 2840 | if (lvb) | 2935 | if (lvb) |
| 2841 | dlm_flags |= LKM_VALBLK; | 2936 | dlm_flags |= DLM_LKF_VALBLK; |
| 2842 | 2937 | ||
| 2843 | status = dlmlock(osb->dlm, | 2938 | ret = ocfs2_dlm_lock(osb->cconn, |
| 2844 | new_level, | 2939 | new_level, |
| 2845 | &lockres->l_lksb, | 2940 | &lockres->l_lksb, |
| 2846 | dlm_flags, | 2941 | dlm_flags, |
| 2847 | lockres->l_name, | 2942 | lockres->l_name, |
| 2848 | OCFS2_LOCK_ID_MAX_LEN - 1, | 2943 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 2849 | ocfs2_locking_ast, | 2944 | lockres); |
| 2850 | lockres, | 2945 | lockres_clear_pending(lockres, generation, osb); |
| 2851 | ocfs2_blocking_ast); | 2946 | if (ret) { |
| 2852 | if (status != DLM_NORMAL) { | 2947 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
| 2853 | ocfs2_log_dlm_error("dlmlock", status, lockres); | ||
| 2854 | ret = -EINVAL; | ||
| 2855 | ocfs2_recover_from_dlm_error(lockres, 1); | 2948 | ocfs2_recover_from_dlm_error(lockres, 1); |
| 2856 | goto bail; | 2949 | goto bail; |
| 2857 | } | 2950 | } |
| @@ -2862,7 +2955,7 @@ bail: | |||
| 2862 | return ret; | 2955 | return ret; |
| 2863 | } | 2956 | } |
| 2864 | 2957 | ||
| 2865 | /* returns 1 when the caller should unlock and call dlmunlock */ | 2958 | /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ |
| 2866 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 2959 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
| 2867 | struct ocfs2_lock_res *lockres) | 2960 | struct ocfs2_lock_res *lockres) |
| 2868 | { | 2961 | { |
| @@ -2898,24 +2991,18 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
| 2898 | struct ocfs2_lock_res *lockres) | 2991 | struct ocfs2_lock_res *lockres) |
| 2899 | { | 2992 | { |
| 2900 | int ret; | 2993 | int ret; |
| 2901 | enum dlm_status status; | ||
| 2902 | 2994 | ||
| 2903 | mlog_entry_void(); | 2995 | mlog_entry_void(); |
| 2904 | mlog(0, "lock %s\n", lockres->l_name); | 2996 | mlog(0, "lock %s\n", lockres->l_name); |
| 2905 | 2997 | ||
| 2906 | ret = 0; | 2998 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, |
| 2907 | status = dlmunlock(osb->dlm, | 2999 | DLM_LKF_CANCEL, lockres); |
| 2908 | &lockres->l_lksb, | 3000 | if (ret) { |
| 2909 | LKM_CANCEL, | 3001 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
| 2910 | ocfs2_unlock_ast, | ||
| 2911 | lockres); | ||
| 2912 | if (status != DLM_NORMAL) { | ||
| 2913 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | ||
| 2914 | ret = -EINVAL; | ||
| 2915 | ocfs2_recover_from_dlm_error(lockres, 0); | 3002 | ocfs2_recover_from_dlm_error(lockres, 0); |
| 2916 | } | 3003 | } |
| 2917 | 3004 | ||
| 2918 | mlog(0, "lock %s return from dlmunlock\n", lockres->l_name); | 3005 | mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); |
| 2919 | 3006 | ||
| 2920 | mlog_exit(ret); | 3007 | mlog_exit(ret); |
| 2921 | return ret; | 3008 | return ret; |
| @@ -2930,6 +3017,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
| 2930 | int new_level; | 3017 | int new_level; |
| 2931 | int ret = 0; | 3018 | int ret = 0; |
| 2932 | int set_lvb = 0; | 3019 | int set_lvb = 0; |
| 3020 | unsigned int gen; | ||
| 2933 | 3021 | ||
| 2934 | mlog_entry_void(); | 3022 | mlog_entry_void(); |
| 2935 | 3023 | ||
| @@ -2939,6 +3027,32 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
| 2939 | 3027 | ||
| 2940 | recheck: | 3028 | recheck: |
| 2941 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 3029 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
| 3030 | /* XXX | ||
| 3031 | * This is a *big* race. The OCFS2_LOCK_PENDING flag | ||
| 3032 | * exists entirely for one reason - another thread has set | ||
| 3033 | * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). | ||
| 3034 | * | ||
| 3035 | * If we do ocfs2_cancel_convert() before the other thread | ||
| 3036 | * calls dlm_lock(), our cancel will do nothing. We will | ||
| 3037 | * get no ast, and we will have no way of knowing the | ||
| 3038 | * cancel failed. Meanwhile, the other thread will call | ||
| 3039 | * into dlm_lock() and wait...forever. | ||
| 3040 | * | ||
| 3041 | * Why forever? Because another node has asked for the | ||
| 3042 | * lock first; that's why we're here in unblock_lock(). | ||
| 3043 | * | ||
| 3044 | * The solution is OCFS2_LOCK_PENDING. When PENDING is | ||
| 3045 | * set, we just requeue the unblock. Only when the other | ||
| 3046 | * thread has called dlm_lock() and cleared PENDING will | ||
| 3047 | * we then cancel their request. | ||
| 3048 | * | ||
| 3049 | * All callers of dlm_lock() must set OCFS2_DLM_PENDING | ||
| 3050 | * at the same time they set OCFS2_DLM_BUSY. They must | ||
| 3051 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. | ||
| 3052 | */ | ||
| 3053 | if (lockres->l_flags & OCFS2_LOCK_PENDING) | ||
| 3054 | goto leave_requeue; | ||
| 3055 | |||
| 2942 | ctl->requeue = 1; | 3056 | ctl->requeue = 1; |
| 2943 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 3057 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
| 2944 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3058 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| @@ -2952,13 +3066,13 @@ recheck: | |||
| 2952 | 3066 | ||
| 2953 | /* if we're blocking an exclusive and we have *any* holders, | 3067 | /* if we're blocking an exclusive and we have *any* holders, |
| 2954 | * then requeue. */ | 3068 | * then requeue. */ |
| 2955 | if ((lockres->l_blocking == LKM_EXMODE) | 3069 | if ((lockres->l_blocking == DLM_LOCK_EX) |
| 2956 | && (lockres->l_ex_holders || lockres->l_ro_holders)) | 3070 | && (lockres->l_ex_holders || lockres->l_ro_holders)) |
| 2957 | goto leave_requeue; | 3071 | goto leave_requeue; |
| 2958 | 3072 | ||
| 2959 | /* If it's a PR we're blocking, then only | 3073 | /* If it's a PR we're blocking, then only |
| 2960 | * requeue if we've got any EX holders */ | 3074 | * requeue if we've got any EX holders */ |
| 2961 | if (lockres->l_blocking == LKM_PRMODE && | 3075 | if (lockres->l_blocking == DLM_LOCK_PR && |
| 2962 | lockres->l_ex_holders) | 3076 | lockres->l_ex_holders) |
| 2963 | goto leave_requeue; | 3077 | goto leave_requeue; |
| 2964 | 3078 | ||
| @@ -3005,7 +3119,7 @@ downconvert: | |||
| 3005 | ctl->requeue = 0; | 3119 | ctl->requeue = 0; |
| 3006 | 3120 | ||
| 3007 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 3121 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
| 3008 | if (lockres->l_level == LKM_EXMODE) | 3122 | if (lockres->l_level == DLM_LOCK_EX) |
| 3009 | set_lvb = 1; | 3123 | set_lvb = 1; |
| 3010 | 3124 | ||
| 3011 | /* | 3125 | /* |
| @@ -3018,9 +3132,11 @@ downconvert: | |||
| 3018 | lockres->l_ops->set_lvb(lockres); | 3132 | lockres->l_ops->set_lvb(lockres); |
| 3019 | } | 3133 | } |
| 3020 | 3134 | ||
| 3021 | ocfs2_prepare_downconvert(lockres, new_level); | 3135 | gen = ocfs2_prepare_downconvert(lockres, new_level); |
| 3022 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3136 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 3023 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); | 3137 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, |
| 3138 | gen); | ||
| 3139 | |||
| 3024 | leave: | 3140 | leave: |
| 3025 | mlog_exit(ret); | 3141 | mlog_exit(ret); |
| 3026 | return ret; | 3142 | return ret; |
| @@ -3059,7 +3175,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 3059 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 3175 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
| 3060 | } | 3176 | } |
| 3061 | sync_mapping_buffers(mapping); | 3177 | sync_mapping_buffers(mapping); |
| 3062 | if (blocking == LKM_EXMODE) { | 3178 | if (blocking == DLM_LOCK_EX) { |
| 3063 | truncate_inode_pages(mapping, 0); | 3179 | truncate_inode_pages(mapping, 0); |
| 3064 | } else { | 3180 | } else { |
| 3065 | /* We only need to wait on the I/O if we're not also | 3181 | /* We only need to wait on the I/O if we're not also |
| @@ -3080,8 +3196,8 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | |||
| 3080 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 3196 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
| 3081 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); | 3197 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); |
| 3082 | 3198 | ||
| 3083 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); | 3199 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); |
| 3084 | BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); | 3200 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); |
| 3085 | 3201 | ||
| 3086 | if (checkpointed) | 3202 | if (checkpointed) |
| 3087 | return 1; | 3203 | return 1; |
| @@ -3145,7 +3261,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 3145 | * valid. The downconvert code will retain a PR for this node, | 3261 | * valid. The downconvert code will retain a PR for this node, |
| 3146 | * so there's no further work to do. | 3262 | * so there's no further work to do. |
| 3147 | */ | 3263 | */ |
| 3148 | if (blocking == LKM_PRMODE) | 3264 | if (blocking == DLM_LOCK_PR) |
| 3149 | return UNBLOCK_CONTINUE; | 3265 | return UNBLOCK_CONTINUE; |
| 3150 | 3266 | ||
| 3151 | /* | 3267 | /* |
| @@ -3219,6 +3335,45 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 3219 | return UNBLOCK_CONTINUE_POST; | 3335 | return UNBLOCK_CONTINUE_POST; |
| 3220 | } | 3336 | } |
| 3221 | 3337 | ||
| 3338 | /* | ||
| 3339 | * This is the filesystem locking protocol. It provides the lock handling | ||
| 3340 | * hooks for the underlying DLM. It has a maximum version number. | ||
| 3341 | * The version number allows interoperability with systems running at | ||
| 3342 | * the same major number and an equal or smaller minor number. | ||
| 3343 | * | ||
| 3344 | * Whenever the filesystem does new things with locks (adds or removes a | ||
| 3345 | * lock, orders them differently, does different things underneath a lock), | ||
| 3346 | * the version must be changed. The protocol is negotiated when joining | ||
| 3347 | * the dlm domain. A node may join the domain if its major version is | ||
| 3348 | * identical to all other nodes and its minor version is greater than | ||
| 3349 | * or equal to all other nodes. When its minor version is greater than | ||
| 3350 | * the other nodes, it will run at the minor version specified by the | ||
| 3351 | * other nodes. | ||
| 3352 | * | ||
| 3353 | * If a locking change is made that will not be compatible with older | ||
| 3354 | * versions, the major number must be increased and the minor version set | ||
| 3355 | * to zero. If a change merely adds a behavior that can be disabled when | ||
| 3356 | * speaking to older versions, the minor version must be increased. If a | ||
| 3357 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
| 3358 | * are just ignored by older versions), the version does not need to be | ||
| 3359 | * updated. | ||
| 3360 | */ | ||
| 3361 | static struct ocfs2_locking_protocol lproto = { | ||
| 3362 | .lp_max_version = { | ||
| 3363 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
| 3364 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
| 3365 | }, | ||
| 3366 | .lp_lock_ast = ocfs2_locking_ast, | ||
| 3367 | .lp_blocking_ast = ocfs2_blocking_ast, | ||
| 3368 | .lp_unlock_ast = ocfs2_unlock_ast, | ||
| 3369 | }; | ||
| 3370 | |||
| 3371 | void ocfs2_set_locking_protocol(void) | ||
| 3372 | { | ||
| 3373 | ocfs2_stack_glue_set_locking_protocol(&lproto); | ||
| 3374 | } | ||
| 3375 | |||
| 3376 | |||
| 3222 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 3377 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
| 3223 | struct ocfs2_lock_res *lockres) | 3378 | struct ocfs2_lock_res *lockres) |
| 3224 | { | 3379 | { |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e3cf902404b4..2bb01f09c1b1 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
| @@ -58,7 +58,7 @@ struct ocfs2_meta_lvb { | |||
| 58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 58 | #define OCFS2_LOCK_NONBLOCK (0x04) |
| 59 | 59 | ||
| 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
| 61 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb); | 61 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); |
| 62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); | 62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); |
| 63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
| 64 | enum ocfs2_lock_type type, | 64 | enum ocfs2_lock_type type, |
| @@ -114,5 +114,6 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); | |||
| 114 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 114 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); |
| 115 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 115 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); |
| 116 | 116 | ||
| 117 | extern const struct dlm_protocol_version ocfs2_locking_protocol; | 117 | /* To set the locking protocol on module initialization */ |
| 118 | void ocfs2_set_locking_protocol(void); | ||
| 118 | #endif /* DLMGLUE_H */ | 119 | #endif /* DLMGLUE_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ed5d5232e85d..9154c82d3258 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2242,7 +2242,7 @@ const struct file_operations ocfs2_fops = { | |||
| 2242 | .open = ocfs2_file_open, | 2242 | .open = ocfs2_file_open, |
| 2243 | .aio_read = ocfs2_file_aio_read, | 2243 | .aio_read = ocfs2_file_aio_read, |
| 2244 | .aio_write = ocfs2_file_aio_write, | 2244 | .aio_write = ocfs2_file_aio_write, |
| 2245 | .ioctl = ocfs2_ioctl, | 2245 | .unlocked_ioctl = ocfs2_ioctl, |
| 2246 | #ifdef CONFIG_COMPAT | 2246 | #ifdef CONFIG_COMPAT |
| 2247 | .compat_ioctl = ocfs2_compat_ioctl, | 2247 | .compat_ioctl = ocfs2_compat_ioctl, |
| 2248 | #endif | 2248 | #endif |
| @@ -2258,7 +2258,7 @@ const struct file_operations ocfs2_dops = { | |||
| 2258 | .fsync = ocfs2_sync_file, | 2258 | .fsync = ocfs2_sync_file, |
| 2259 | .release = ocfs2_dir_release, | 2259 | .release = ocfs2_dir_release, |
| 2260 | .open = ocfs2_dir_open, | 2260 | .open = ocfs2_dir_open, |
| 2261 | .ioctl = ocfs2_ioctl, | 2261 | .unlocked_ioctl = ocfs2_ioctl, |
| 2262 | #ifdef CONFIG_COMPAT | 2262 | #ifdef CONFIG_COMPAT |
| 2263 | .compat_ioctl = ocfs2_compat_ioctl, | 2263 | .compat_ioctl = ocfs2_compat_ioctl, |
| 2264 | #endif | 2264 | #endif |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 0758daf64da0..c6e7213db868 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
| @@ -28,9 +28,6 @@ | |||
| 28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
| 31 | #include <linux/kmod.h> | ||
| 32 | |||
| 33 | #include <dlm/dlmapi.h> | ||
| 34 | 31 | ||
| 35 | #define MLOG_MASK_PREFIX ML_SUPER | 32 | #define MLOG_MASK_PREFIX ML_SUPER |
| 36 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
| @@ -48,7 +45,6 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | |||
| 48 | int bit); | 45 | int bit); |
| 49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 46 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, |
| 50 | int bit); | 47 | int bit); |
| 51 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map); | ||
| 52 | 48 | ||
| 53 | /* special case -1 for now | 49 | /* special case -1 for now |
| 54 | * TODO: should *really* make sure the calling func never passes -1!! */ | 50 | * TODO: should *really* make sure the calling func never passes -1!! */ |
| @@ -62,23 +58,23 @@ static void ocfs2_node_map_init(struct ocfs2_node_map *map) | |||
| 62 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 58 | void ocfs2_init_node_maps(struct ocfs2_super *osb) |
| 63 | { | 59 | { |
| 64 | spin_lock_init(&osb->node_map_lock); | 60 | spin_lock_init(&osb->node_map_lock); |
| 65 | ocfs2_node_map_init(&osb->recovery_map); | ||
| 66 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 61 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); |
| 67 | } | 62 | } |
| 68 | 63 | ||
| 69 | static void ocfs2_do_node_down(int node_num, | 64 | void ocfs2_do_node_down(int node_num, void *data) |
| 70 | struct ocfs2_super *osb) | ||
| 71 | { | 65 | { |
| 66 | struct ocfs2_super *osb = data; | ||
| 67 | |||
| 72 | BUG_ON(osb->node_num == node_num); | 68 | BUG_ON(osb->node_num == node_num); |
| 73 | 69 | ||
| 74 | mlog(0, "ocfs2: node down event for %d\n", node_num); | 70 | mlog(0, "ocfs2: node down event for %d\n", node_num); |
| 75 | 71 | ||
| 76 | if (!osb->dlm) { | 72 | if (!osb->cconn) { |
| 77 | /* | 73 | /* |
| 78 | * No DLM means we're not even ready to participate yet. | 74 | * No cluster connection means we're not even ready to |
| 79 | * We check the slots after the DLM comes up, so we will | 75 | * participate yet. We check the slots after the cluster |
| 80 | * notice the node death then. We can safely ignore it | 76 | * comes up, so we will notice the node death then. We |
| 81 | * here. | 77 | * can safely ignore it here. |
| 82 | */ | 78 | */ |
| 83 | return; | 79 | return; |
| 84 | } | 80 | } |
| @@ -86,61 +82,6 @@ static void ocfs2_do_node_down(int node_num, | |||
| 86 | ocfs2_recovery_thread(osb, node_num); | 82 | ocfs2_recovery_thread(osb, node_num); |
| 87 | } | 83 | } |
| 88 | 84 | ||
| 89 | /* Called from the dlm when it's about to evict a node. We may also | ||
| 90 | * get a heartbeat callback later. */ | ||
| 91 | static void ocfs2_dlm_eviction_cb(int node_num, | ||
| 92 | void *data) | ||
| 93 | { | ||
| 94 | struct ocfs2_super *osb = (struct ocfs2_super *) data; | ||
| 95 | struct super_block *sb = osb->sb; | ||
| 96 | |||
| 97 | mlog(ML_NOTICE, "device (%u,%u): dlm has evicted node %d\n", | ||
| 98 | MAJOR(sb->s_dev), MINOR(sb->s_dev), node_num); | ||
| 99 | |||
| 100 | ocfs2_do_node_down(node_num, osb); | ||
| 101 | } | ||
| 102 | |||
| 103 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | ||
| 104 | { | ||
| 105 | /* Not exactly a heartbeat callback, but leads to essentially | ||
| 106 | * the same path so we set it up here. */ | ||
| 107 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, | ||
| 108 | ocfs2_dlm_eviction_cb, | ||
| 109 | osb); | ||
| 110 | } | ||
| 111 | |||
| 112 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | ||
| 113 | { | ||
| 114 | int ret; | ||
| 115 | char *argv[5], *envp[3]; | ||
| 116 | |||
| 117 | if (ocfs2_mount_local(osb)) | ||
| 118 | return; | ||
| 119 | |||
| 120 | if (!osb->uuid_str) { | ||
| 121 | /* This can happen if we don't get far enough in mount... */ | ||
| 122 | mlog(0, "No UUID with which to stop heartbeat!\n\n"); | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | |||
| 126 | argv[0] = (char *)o2nm_get_hb_ctl_path(); | ||
| 127 | argv[1] = "-K"; | ||
| 128 | argv[2] = "-u"; | ||
| 129 | argv[3] = osb->uuid_str; | ||
| 130 | argv[4] = NULL; | ||
| 131 | |||
| 132 | mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); | ||
| 133 | |||
| 134 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
| 135 | envp[0] = "HOME=/"; | ||
| 136 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
| 137 | envp[2] = NULL; | ||
| 138 | |||
| 139 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
| 140 | if (ret < 0) | ||
| 141 | mlog_errno(ret); | ||
| 142 | } | ||
| 143 | |||
| 144 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | 85 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, |
| 145 | int bit) | 86 | int bit) |
| 146 | { | 87 | { |
| @@ -192,112 +133,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | |||
| 192 | return ret; | 133 | return ret; |
| 193 | } | 134 | } |
| 194 | 135 | ||
| 195 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map) | ||
| 196 | { | ||
| 197 | int bit; | ||
| 198 | bit = find_next_bit(map->map, map->num_nodes, 0); | ||
| 199 | if (bit < map->num_nodes) | ||
| 200 | return 0; | ||
| 201 | return 1; | ||
| 202 | } | ||
| 203 | |||
| 204 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
| 205 | struct ocfs2_node_map *map) | ||
| 206 | { | ||
| 207 | int ret; | ||
| 208 | BUG_ON(map->num_nodes == 0); | ||
| 209 | spin_lock(&osb->node_map_lock); | ||
| 210 | ret = __ocfs2_node_map_is_empty(map); | ||
| 211 | spin_unlock(&osb->node_map_lock); | ||
| 212 | return ret; | ||
| 213 | } | ||
| 214 | |||
| 215 | #if 0 | ||
| 216 | |||
| 217 | static void __ocfs2_node_map_dup(struct ocfs2_node_map *target, | ||
| 218 | struct ocfs2_node_map *from) | ||
| 219 | { | ||
| 220 | BUG_ON(from->num_nodes == 0); | ||
| 221 | ocfs2_node_map_init(target); | ||
| 222 | __ocfs2_node_map_set(target, from); | ||
| 223 | } | ||
| 224 | |||
| 225 | /* returns 1 if bit is the only bit set in target, 0 otherwise */ | ||
| 226 | int ocfs2_node_map_is_only(struct ocfs2_super *osb, | ||
| 227 | struct ocfs2_node_map *target, | ||
| 228 | int bit) | ||
| 229 | { | ||
| 230 | struct ocfs2_node_map temp; | ||
| 231 | int ret; | ||
| 232 | |||
| 233 | spin_lock(&osb->node_map_lock); | ||
| 234 | __ocfs2_node_map_dup(&temp, target); | ||
| 235 | __ocfs2_node_map_clear_bit(&temp, bit); | ||
| 236 | ret = __ocfs2_node_map_is_empty(&temp); | ||
| 237 | spin_unlock(&osb->node_map_lock); | ||
| 238 | |||
| 239 | return ret; | ||
| 240 | } | ||
| 241 | |||
| 242 | static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | ||
| 243 | struct ocfs2_node_map *from) | ||
| 244 | { | ||
| 245 | int num_longs, i; | ||
| 246 | |||
| 247 | BUG_ON(target->num_nodes != from->num_nodes); | ||
| 248 | BUG_ON(target->num_nodes == 0); | ||
| 249 | |||
| 250 | num_longs = BITS_TO_LONGS(target->num_nodes); | ||
| 251 | for (i = 0; i < num_longs; i++) | ||
| 252 | target->map[i] = from->map[i]; | ||
| 253 | } | ||
| 254 | |||
| 255 | #endif /* 0 */ | ||
| 256 | |||
| 257 | /* Returns whether the recovery bit was actually set - it may not be | ||
| 258 | * if a node is still marked as needing recovery */ | ||
| 259 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 260 | int num) | ||
| 261 | { | ||
| 262 | int set = 0; | ||
| 263 | |||
| 264 | spin_lock(&osb->node_map_lock); | ||
| 265 | |||
| 266 | if (!test_bit(num, osb->recovery_map.map)) { | ||
| 267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | ||
| 268 | set = 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | spin_unlock(&osb->node_map_lock); | ||
| 272 | |||
| 273 | return set; | ||
| 274 | } | ||
| 275 | |||
| 276 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 277 | int num) | ||
| 278 | { | ||
| 279 | ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num); | ||
| 280 | } | ||
| 281 | |||
| 282 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
| 283 | struct ocfs2_node_map *map, | ||
| 284 | int idx) | ||
| 285 | { | ||
| 286 | int i = idx; | ||
| 287 | |||
| 288 | idx = O2NM_INVALID_NODE_NUM; | ||
| 289 | spin_lock(&osb->node_map_lock); | ||
| 290 | if ((i != O2NM_INVALID_NODE_NUM) && | ||
| 291 | (i >= 0) && | ||
| 292 | (i < map->num_nodes)) { | ||
| 293 | while(i < map->num_nodes) { | ||
| 294 | if (test_bit(i, map->map)) { | ||
| 295 | idx = i; | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | i++; | ||
| 299 | } | ||
| 300 | } | ||
| 301 | spin_unlock(&osb->node_map_lock); | ||
| 302 | return idx; | ||
| 303 | } | ||
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index eac63aed7611..74b9c5dda28d 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h | |||
| @@ -28,13 +28,10 @@ | |||
| 28 | 28 | ||
| 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); | 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); |
| 30 | 30 | ||
| 31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); | 31 | void ocfs2_do_node_down(int node_num, void *data); |
| 32 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | ||
| 33 | 32 | ||
| 34 | /* node map functions - used to keep track of mounted and in-recovery | 33 | /* node map functions - used to keep track of mounted and in-recovery |
| 35 | * nodes. */ | 34 | * nodes. */ |
| 36 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
| 37 | struct ocfs2_node_map *map); | ||
| 38 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, | 35 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, |
| 39 | struct ocfs2_node_map *map, | 36 | struct ocfs2_node_map *map, |
| 40 | int bit); | 37 | int bit); |
| @@ -44,17 +41,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb, | |||
| 44 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | 41 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, |
| 45 | struct ocfs2_node_map *map, | 42 | struct ocfs2_node_map *map, |
| 46 | int bit); | 43 | int bit); |
| 47 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
| 48 | struct ocfs2_node_map *map, | ||
| 49 | int idx); | ||
| 50 | static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb, | ||
| 51 | struct ocfs2_node_map *map) | ||
| 52 | { | ||
| 53 | return ocfs2_node_map_iterate(osb, map, 0); | ||
| 54 | } | ||
| 55 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 56 | int num); | ||
| 57 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 58 | int num); | ||
| 59 | 44 | ||
| 60 | #endif /* OCFS2_HEARTBEAT_H */ | 45 | #endif /* OCFS2_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 5177fba5162b..b413166dd163 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | 7 | ||
| 8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
| 9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
| 10 | #include <linux/smp_lock.h> | ||
| 10 | 11 | ||
| 11 | #define MLOG_MASK_PREFIX ML_INODE | 12 | #define MLOG_MASK_PREFIX ML_INODE |
| 12 | #include <cluster/masklog.h> | 13 | #include <cluster/masklog.h> |
| @@ -112,9 +113,9 @@ bail: | |||
| 112 | return status; | 113 | return status; |
| 113 | } | 114 | } |
| 114 | 115 | ||
| 115 | int ocfs2_ioctl(struct inode * inode, struct file * filp, | 116 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
| 116 | unsigned int cmd, unsigned long arg) | ||
| 117 | { | 117 | { |
| 118 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 118 | unsigned int flags; | 119 | unsigned int flags; |
| 119 | int new_clusters; | 120 | int new_clusters; |
| 120 | int status; | 121 | int status; |
| @@ -168,9 +169,6 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
| 168 | #ifdef CONFIG_COMPAT | 169 | #ifdef CONFIG_COMPAT |
| 169 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 170 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| 170 | { | 171 | { |
| 171 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 172 | int ret; | ||
| 173 | |||
| 174 | switch (cmd) { | 172 | switch (cmd) { |
| 175 | case OCFS2_IOC32_GETFLAGS: | 173 | case OCFS2_IOC32_GETFLAGS: |
| 176 | cmd = OCFS2_IOC_GETFLAGS; | 174 | cmd = OCFS2_IOC_GETFLAGS; |
| @@ -190,9 +188,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 190 | return -ENOIOCTLCMD; | 188 | return -ENOIOCTLCMD; |
| 191 | } | 189 | } |
| 192 | 190 | ||
| 193 | lock_kernel(); | 191 | return ocfs2_ioctl(file, cmd, arg); |
| 194 | ret = ocfs2_ioctl(inode, file, cmd, arg); | ||
| 195 | unlock_kernel(); | ||
| 196 | return ret; | ||
| 197 | } | 192 | } |
| 198 | #endif | 193 | #endif |
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h index 4d6c4f430d0d..cf9a5ee30fef 100644 --- a/fs/ocfs2/ioctl.h +++ b/fs/ocfs2/ioctl.h | |||
| @@ -10,8 +10,7 @@ | |||
| 10 | #ifndef OCFS2_IOCTL_H | 10 | #ifndef OCFS2_IOCTL_H |
| 11 | #define OCFS2_IOCTL_H | 11 | #define OCFS2_IOCTL_H |
| 12 | 12 | ||
| 13 | int ocfs2_ioctl(struct inode * inode, struct file * filp, | 13 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
| 14 | unsigned int cmd, unsigned long arg); | ||
| 15 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); | 14 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); |
| 16 | 15 | ||
| 17 | #endif /* OCFS2_IOCTL_H */ | 16 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f31c7e8c19c3..9698338adc39 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 64 | int slot); | 64 | int slot); |
| 65 | static int ocfs2_commit_thread(void *arg); | 65 | static int ocfs2_commit_thread(void *arg); |
| 66 | 66 | ||
| 67 | |||
| 68 | /* | ||
| 69 | * The recovery_list is a simple linked list of node numbers to recover. | ||
| 70 | * It is protected by the recovery_lock. | ||
| 71 | */ | ||
| 72 | |||
| 73 | struct ocfs2_recovery_map { | ||
| 74 | unsigned int rm_used; | ||
| 75 | unsigned int *rm_entries; | ||
| 76 | }; | ||
| 77 | |||
| 78 | int ocfs2_recovery_init(struct ocfs2_super *osb) | ||
| 79 | { | ||
| 80 | struct ocfs2_recovery_map *rm; | ||
| 81 | |||
| 82 | mutex_init(&osb->recovery_lock); | ||
| 83 | osb->disable_recovery = 0; | ||
| 84 | osb->recovery_thread_task = NULL; | ||
| 85 | init_waitqueue_head(&osb->recovery_event); | ||
| 86 | |||
| 87 | rm = kzalloc(sizeof(struct ocfs2_recovery_map) + | ||
| 88 | osb->max_slots * sizeof(unsigned int), | ||
| 89 | GFP_KERNEL); | ||
| 90 | if (!rm) { | ||
| 91 | mlog_errno(-ENOMEM); | ||
| 92 | return -ENOMEM; | ||
| 93 | } | ||
| 94 | |||
| 95 | rm->rm_entries = (unsigned int *)((char *)rm + | ||
| 96 | sizeof(struct ocfs2_recovery_map)); | ||
| 97 | osb->recovery_map = rm; | ||
| 98 | |||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
| 103 | * memory barriers to make sure that we'll see the null task before | ||
| 104 | * being woken up */ | ||
| 105 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
| 106 | { | ||
| 107 | mb(); | ||
| 108 | return osb->recovery_thread_task != NULL; | ||
| 109 | } | ||
| 110 | |||
| 111 | void ocfs2_recovery_exit(struct ocfs2_super *osb) | ||
| 112 | { | ||
| 113 | struct ocfs2_recovery_map *rm; | ||
| 114 | |||
| 115 | /* disable any new recovery threads and wait for any currently | ||
| 116 | * running ones to exit. Do this before setting the vol_state. */ | ||
| 117 | mutex_lock(&osb->recovery_lock); | ||
| 118 | osb->disable_recovery = 1; | ||
| 119 | mutex_unlock(&osb->recovery_lock); | ||
| 120 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
| 121 | |||
| 122 | /* At this point, we know that no more recovery threads can be | ||
| 123 | * launched, so wait for any recovery completion work to | ||
| 124 | * complete. */ | ||
| 125 | flush_workqueue(ocfs2_wq); | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Now that recovery is shut down, and the osb is about to be | ||
| 129 | * freed, the osb_lock is not taken here. | ||
| 130 | */ | ||
| 131 | rm = osb->recovery_map; | ||
| 132 | /* XXX: Should we bug if there are dirty entries? */ | ||
| 133 | |||
| 134 | kfree(rm); | ||
| 135 | } | ||
| 136 | |||
| 137 | static int __ocfs2_recovery_map_test(struct ocfs2_super *osb, | ||
| 138 | unsigned int node_num) | ||
| 139 | { | ||
| 140 | int i; | ||
| 141 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 142 | |||
| 143 | assert_spin_locked(&osb->osb_lock); | ||
| 144 | |||
| 145 | for (i = 0; i < rm->rm_used; i++) { | ||
| 146 | if (rm->rm_entries[i] == node_num) | ||
| 147 | return 1; | ||
| 148 | } | ||
| 149 | |||
| 150 | return 0; | ||
| 151 | } | ||
| 152 | |||
| 153 | /* Behaves like test-and-set. Returns the previous value */ | ||
| 154 | static int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 155 | unsigned int node_num) | ||
| 156 | { | ||
| 157 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 158 | |||
| 159 | spin_lock(&osb->osb_lock); | ||
| 160 | if (__ocfs2_recovery_map_test(osb, node_num)) { | ||
| 161 | spin_unlock(&osb->osb_lock); | ||
| 162 | return 1; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* XXX: Can this be exploited? Not from o2dlm... */ | ||
| 166 | BUG_ON(rm->rm_used >= osb->max_slots); | ||
| 167 | |||
| 168 | rm->rm_entries[rm->rm_used] = node_num; | ||
| 169 | rm->rm_used++; | ||
| 170 | spin_unlock(&osb->osb_lock); | ||
| 171 | |||
| 172 | return 0; | ||
| 173 | } | ||
| 174 | |||
| 175 | static void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 176 | unsigned int node_num) | ||
| 177 | { | ||
| 178 | int i; | ||
| 179 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 180 | |||
| 181 | spin_lock(&osb->osb_lock); | ||
| 182 | |||
| 183 | for (i = 0; i < rm->rm_used; i++) { | ||
| 184 | if (rm->rm_entries[i] == node_num) | ||
| 185 | break; | ||
| 186 | } | ||
| 187 | |||
| 188 | if (i < rm->rm_used) { | ||
| 189 | /* XXX: be careful with the pointer math */ | ||
| 190 | memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), | ||
| 191 | (rm->rm_used - i - 1) * sizeof(unsigned int)); | ||
| 192 | rm->rm_used--; | ||
| 193 | } | ||
| 194 | |||
| 195 | spin_unlock(&osb->osb_lock); | ||
| 196 | } | ||
| 197 | |||
| 67 | static int ocfs2_commit_cache(struct ocfs2_super *osb) | 198 | static int ocfs2_commit_cache(struct ocfs2_super *osb) |
| 68 | { | 199 | { |
| 69 | int status = 0; | 200 | int status = 0; |
| @@ -586,8 +717,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local) | |||
| 586 | 717 | ||
| 587 | mlog_entry_void(); | 718 | mlog_entry_void(); |
| 588 | 719 | ||
| 589 | if (!journal) | 720 | BUG_ON(!journal); |
| 590 | BUG(); | ||
| 591 | 721 | ||
| 592 | osb = journal->j_osb; | 722 | osb = journal->j_osb; |
| 593 | 723 | ||
| @@ -650,6 +780,23 @@ bail: | |||
| 650 | return status; | 780 | return status; |
| 651 | } | 781 | } |
| 652 | 782 | ||
| 783 | static int ocfs2_recovery_completed(struct ocfs2_super *osb) | ||
| 784 | { | ||
| 785 | int empty; | ||
| 786 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 787 | |||
| 788 | spin_lock(&osb->osb_lock); | ||
| 789 | empty = (rm->rm_used == 0); | ||
| 790 | spin_unlock(&osb->osb_lock); | ||
| 791 | |||
| 792 | return empty; | ||
| 793 | } | ||
| 794 | |||
| 795 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb) | ||
| 796 | { | ||
| 797 | wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); | ||
| 798 | } | ||
| 799 | |||
| 653 | /* | 800 | /* |
| 654 | * JBD Might read a cached version of another nodes journal file. We | 801 | * JBD Might read a cached version of another nodes journal file. We |
| 655 | * don't want this as this file changes often and we get no | 802 | * don't want this as this file changes often and we get no |
| @@ -848,6 +995,7 @@ static int __ocfs2_recovery_thread(void *arg) | |||
| 848 | { | 995 | { |
| 849 | int status, node_num; | 996 | int status, node_num; |
| 850 | struct ocfs2_super *osb = arg; | 997 | struct ocfs2_super *osb = arg; |
| 998 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 851 | 999 | ||
| 852 | mlog_entry_void(); | 1000 | mlog_entry_void(); |
| 853 | 1001 | ||
| @@ -863,26 +1011,29 @@ restart: | |||
| 863 | goto bail; | 1011 | goto bail; |
| 864 | } | 1012 | } |
| 865 | 1013 | ||
| 866 | while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 1014 | spin_lock(&osb->osb_lock); |
| 867 | node_num = ocfs2_node_map_first_set_bit(osb, | 1015 | while (rm->rm_used) { |
| 868 | &osb->recovery_map); | 1016 | /* It's always safe to remove entry zero, as we won't |
| 869 | if (node_num == O2NM_INVALID_NODE_NUM) { | 1017 | * clear it until ocfs2_recover_node() has succeeded. */ |
| 870 | mlog(0, "Out of nodes to recover.\n"); | 1018 | node_num = rm->rm_entries[0]; |
| 871 | break; | 1019 | spin_unlock(&osb->osb_lock); |
| 872 | } | ||
| 873 | 1020 | ||
| 874 | status = ocfs2_recover_node(osb, node_num); | 1021 | status = ocfs2_recover_node(osb, node_num); |
| 875 | if (status < 0) { | 1022 | if (!status) { |
| 1023 | ocfs2_recovery_map_clear(osb, node_num); | ||
| 1024 | } else { | ||
| 876 | mlog(ML_ERROR, | 1025 | mlog(ML_ERROR, |
| 877 | "Error %d recovering node %d on device (%u,%u)!\n", | 1026 | "Error %d recovering node %d on device (%u,%u)!\n", |
| 878 | status, node_num, | 1027 | status, node_num, |
| 879 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1028 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
| 880 | mlog(ML_ERROR, "Volume requires unmount.\n"); | 1029 | mlog(ML_ERROR, "Volume requires unmount.\n"); |
| 881 | continue; | ||
| 882 | } | 1030 | } |
| 883 | 1031 | ||
| 884 | ocfs2_recovery_map_clear(osb, node_num); | 1032 | spin_lock(&osb->osb_lock); |
| 885 | } | 1033 | } |
| 1034 | spin_unlock(&osb->osb_lock); | ||
| 1035 | mlog(0, "All nodes recovered\n"); | ||
| 1036 | |||
| 886 | ocfs2_super_unlock(osb, 1); | 1037 | ocfs2_super_unlock(osb, 1); |
| 887 | 1038 | ||
| 888 | /* We always run recovery on our own orphan dir - the dead | 1039 | /* We always run recovery on our own orphan dir - the dead |
| @@ -893,8 +1044,7 @@ restart: | |||
| 893 | 1044 | ||
| 894 | bail: | 1045 | bail: |
| 895 | mutex_lock(&osb->recovery_lock); | 1046 | mutex_lock(&osb->recovery_lock); |
| 896 | if (!status && | 1047 | if (!status && !ocfs2_recovery_completed(osb)) { |
| 897 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | ||
| 898 | mutex_unlock(&osb->recovery_lock); | 1048 | mutex_unlock(&osb->recovery_lock); |
| 899 | goto restart; | 1049 | goto restart; |
| 900 | } | 1050 | } |
| @@ -924,8 +1074,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | |||
| 924 | 1074 | ||
| 925 | /* People waiting on recovery will wait on | 1075 | /* People waiting on recovery will wait on |
| 926 | * the recovery map to empty. */ | 1076 | * the recovery map to empty. */ |
| 927 | if (!ocfs2_recovery_map_set(osb, node_num)) | 1077 | if (ocfs2_recovery_map_set(osb, node_num)) |
| 928 | mlog(0, "node %d already be in recovery.\n", node_num); | 1078 | mlog(0, "node %d already in recovery map.\n", node_num); |
| 929 | 1079 | ||
| 930 | mlog(0, "starting recovery thread...\n"); | 1080 | mlog(0, "starting recovery thread...\n"); |
| 931 | 1081 | ||
| @@ -1079,7 +1229,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
| 1079 | { | 1229 | { |
| 1080 | int status = 0; | 1230 | int status = 0; |
| 1081 | int slot_num; | 1231 | int slot_num; |
| 1082 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 1083 | struct ocfs2_dinode *la_copy = NULL; | 1232 | struct ocfs2_dinode *la_copy = NULL; |
| 1084 | struct ocfs2_dinode *tl_copy = NULL; | 1233 | struct ocfs2_dinode *tl_copy = NULL; |
| 1085 | 1234 | ||
| @@ -1092,8 +1241,8 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
| 1092 | * case we should've called ocfs2_journal_load instead. */ | 1241 | * case we should've called ocfs2_journal_load instead. */ |
| 1093 | BUG_ON(osb->node_num == node_num); | 1242 | BUG_ON(osb->node_num == node_num); |
| 1094 | 1243 | ||
| 1095 | slot_num = ocfs2_node_num_to_slot(si, node_num); | 1244 | slot_num = ocfs2_node_num_to_slot(osb, node_num); |
| 1096 | if (slot_num == OCFS2_INVALID_SLOT) { | 1245 | if (slot_num == -ENOENT) { |
| 1097 | status = 0; | 1246 | status = 0; |
| 1098 | mlog(0, "no slot for this node, so no recovery required.\n"); | 1247 | mlog(0, "no slot for this node, so no recovery required.\n"); |
| 1099 | goto done; | 1248 | goto done; |
| @@ -1123,8 +1272,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
| 1123 | 1272 | ||
| 1124 | /* Likewise, this would be a strange but ultimately not so | 1273 | /* Likewise, this would be a strange but ultimately not so |
| 1125 | * harmful place to get an error... */ | 1274 | * harmful place to get an error... */ |
| 1126 | ocfs2_clear_slot(si, slot_num); | 1275 | status = ocfs2_clear_slot(osb, slot_num); |
| 1127 | status = ocfs2_update_disk_slots(osb, si); | ||
| 1128 | if (status < 0) | 1276 | if (status < 0) |
| 1129 | mlog_errno(status); | 1277 | mlog_errno(status); |
| 1130 | 1278 | ||
| @@ -1184,23 +1332,24 @@ bail: | |||
| 1184 | * slot info struct has been updated from disk. */ | 1332 | * slot info struct has been updated from disk. */ |
| 1185 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | 1333 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) |
| 1186 | { | 1334 | { |
| 1187 | int status, i, node_num; | 1335 | unsigned int node_num; |
| 1188 | struct ocfs2_slot_info *si = osb->slot_info; | 1336 | int status, i; |
| 1189 | 1337 | ||
| 1190 | /* This is called with the super block cluster lock, so we | 1338 | /* This is called with the super block cluster lock, so we |
| 1191 | * know that the slot map can't change underneath us. */ | 1339 | * know that the slot map can't change underneath us. */ |
| 1192 | 1340 | ||
| 1193 | spin_lock(&si->si_lock); | 1341 | spin_lock(&osb->osb_lock); |
| 1194 | for(i = 0; i < si->si_num_slots; i++) { | 1342 | for (i = 0; i < osb->max_slots; i++) { |
| 1195 | if (i == osb->slot_num) | 1343 | if (i == osb->slot_num) |
| 1196 | continue; | 1344 | continue; |
| 1197 | if (ocfs2_is_empty_slot(si, i)) | 1345 | |
| 1346 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); | ||
| 1347 | if (status == -ENOENT) | ||
| 1198 | continue; | 1348 | continue; |
| 1199 | 1349 | ||
| 1200 | node_num = si->si_global_node_nums[i]; | 1350 | if (__ocfs2_recovery_map_test(osb, node_num)) |
| 1201 | if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) | ||
| 1202 | continue; | 1351 | continue; |
| 1203 | spin_unlock(&si->si_lock); | 1352 | spin_unlock(&osb->osb_lock); |
| 1204 | 1353 | ||
| 1205 | /* Ok, we have a slot occupied by another node which | 1354 | /* Ok, we have a slot occupied by another node which |
| 1206 | * is not in the recovery map. We trylock his journal | 1355 | * is not in the recovery map. We trylock his journal |
| @@ -1216,9 +1365,9 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
| 1216 | goto bail; | 1365 | goto bail; |
| 1217 | } | 1366 | } |
| 1218 | 1367 | ||
| 1219 | spin_lock(&si->si_lock); | 1368 | spin_lock(&osb->osb_lock); |
| 1220 | } | 1369 | } |
| 1221 | spin_unlock(&si->si_lock); | 1370 | spin_unlock(&osb->osb_lock); |
| 1222 | 1371 | ||
| 1223 | status = 0; | 1372 | status = 0; |
| 1224 | bail: | 1373 | bail: |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 220f3e818e78..db82be2532ed 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, | |||
| 134 | 134 | ||
| 135 | /* Exported only for the journal struct init code in super.c. Do not call. */ | 135 | /* Exported only for the journal struct init code in super.c. Do not call. */ |
| 136 | void ocfs2_complete_recovery(struct work_struct *work); | 136 | void ocfs2_complete_recovery(struct work_struct *work); |
| 137 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb); | ||
| 138 | |||
| 139 | int ocfs2_recovery_init(struct ocfs2_super *osb); | ||
| 140 | void ocfs2_recovery_exit(struct ocfs2_super *osb); | ||
| 137 | 141 | ||
| 138 | /* | 142 | /* |
| 139 | * Journal Control: | 143 | * Journal Control: |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ab83fd562429..ce0dc147602a 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -447,6 +447,8 @@ out_mutex: | |||
| 447 | iput(main_bm_inode); | 447 | iput(main_bm_inode); |
| 448 | 448 | ||
| 449 | out: | 449 | out: |
| 450 | if (!status) | ||
| 451 | ocfs2_init_inode_steal_slot(osb); | ||
| 450 | mlog_exit(status); | 452 | mlog_exit(status); |
| 451 | return status; | 453 | return status; |
| 452 | } | 454 | } |
| @@ -523,6 +525,8 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 523 | } | 525 | } |
| 524 | 526 | ||
| 525 | ac->ac_inode = local_alloc_inode; | 527 | ac->ac_inode = local_alloc_inode; |
| 528 | /* We should never use localalloc from another slot */ | ||
| 529 | ac->ac_alloc_slot = osb->slot_num; | ||
| 526 | ac->ac_which = OCFS2_AC_USE_LOCAL; | 530 | ac->ac_which = OCFS2_AC_USE_LOCAL; |
| 527 | get_bh(osb->local_alloc_bh); | 531 | get_bh(osb->local_alloc_bh); |
| 528 | ac->ac_bh = osb->local_alloc_bh; | 532 | ac->ac_bh = osb->local_alloc_bh; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ae9ad9587516..d5d808fe0140 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
| 424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); | 424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); |
| 425 | fe->i_blkno = cpu_to_le64(fe_blkno); | 425 | fe->i_blkno = cpu_to_le64(fe_blkno); |
| 426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); | 426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); |
| 427 | fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); | 427 | fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); |
| 428 | fe->i_uid = cpu_to_le32(current->fsuid); | 428 | fe->i_uid = cpu_to_le32(current->fsuid); |
| 429 | if (dir->i_mode & S_ISGID) { | 429 | if (dir->i_mode & S_ISGID) { |
| 430 | fe->i_gid = cpu_to_le32(dir->i_gid); | 430 | fe->i_gid = cpu_to_le32(dir->i_gid); |
| @@ -997,7 +997,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 997 | * | 997 | * |
| 998 | * And that's why, just like the VFS, we need a file system | 998 | * And that's why, just like the VFS, we need a file system |
| 999 | * rename lock. */ | 999 | * rename lock. */ |
| 1000 | if (old_dentry != new_dentry) { | 1000 | if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) { |
| 1001 | status = ocfs2_rename_lock(osb); | 1001 | status = ocfs2_rename_lock(osb); |
| 1002 | if (status < 0) { | 1002 | if (status < 0) { |
| 1003 | mlog_errno(status); | 1003 | mlog_errno(status); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6546cef212e3..31692379c170 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -36,11 +36,8 @@ | |||
| 36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
| 37 | #include <linux/jbd.h> | 37 | #include <linux/jbd.h> |
| 38 | 38 | ||
| 39 | #include "cluster/nodemanager.h" | 39 | /* For union ocfs2_dlm_lksb */ |
| 40 | #include "cluster/heartbeat.h" | 40 | #include "stackglue.h" |
| 41 | #include "cluster/tcp.h" | ||
| 42 | |||
| 43 | #include "dlm/dlmapi.h" | ||
| 44 | 41 | ||
| 45 | #include "ocfs2_fs.h" | 42 | #include "ocfs2_fs.h" |
| 46 | #include "ocfs2_lockid.h" | 43 | #include "ocfs2_lockid.h" |
| @@ -101,6 +98,9 @@ enum ocfs2_unlock_action { | |||
| 101 | * dropped. */ | 98 | * dropped. */ |
| 102 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 99 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ |
| 103 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ | 100 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ |
| 101 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a | ||
| 102 | call to dlm_lock. Only | ||
| 103 | exists with BUSY set. */ | ||
| 104 | 104 | ||
| 105 | struct ocfs2_lock_res_ops; | 105 | struct ocfs2_lock_res_ops; |
| 106 | 106 | ||
| @@ -120,13 +120,14 @@ struct ocfs2_lock_res { | |||
| 120 | int l_level; | 120 | int l_level; |
| 121 | unsigned int l_ro_holders; | 121 | unsigned int l_ro_holders; |
| 122 | unsigned int l_ex_holders; | 122 | unsigned int l_ex_holders; |
| 123 | struct dlm_lockstatus l_lksb; | 123 | union ocfs2_dlm_lksb l_lksb; |
| 124 | 124 | ||
| 125 | /* used from AST/BAST funcs. */ | 125 | /* used from AST/BAST funcs. */ |
| 126 | enum ocfs2_ast_action l_action; | 126 | enum ocfs2_ast_action l_action; |
| 127 | enum ocfs2_unlock_action l_unlock_action; | 127 | enum ocfs2_unlock_action l_unlock_action; |
| 128 | int l_requested; | 128 | int l_requested; |
| 129 | int l_blocking; | 129 | int l_blocking; |
| 130 | unsigned int l_pending_gen; | ||
| 130 | 131 | ||
| 131 | wait_queue_head_t l_event; | 132 | wait_queue_head_t l_event; |
| 132 | 133 | ||
| @@ -179,6 +180,8 @@ enum ocfs2_mount_options | |||
| 179 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | 180 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 |
| 180 | 181 | ||
| 181 | struct ocfs2_journal; | 182 | struct ocfs2_journal; |
| 183 | struct ocfs2_slot_info; | ||
| 184 | struct ocfs2_recovery_map; | ||
| 182 | struct ocfs2_super | 185 | struct ocfs2_super |
| 183 | { | 186 | { |
| 184 | struct task_struct *commit_task; | 187 | struct task_struct *commit_task; |
| @@ -190,7 +193,6 @@ struct ocfs2_super | |||
| 190 | struct ocfs2_slot_info *slot_info; | 193 | struct ocfs2_slot_info *slot_info; |
| 191 | 194 | ||
| 192 | spinlock_t node_map_lock; | 195 | spinlock_t node_map_lock; |
| 193 | struct ocfs2_node_map recovery_map; | ||
| 194 | 196 | ||
| 195 | u64 root_blkno; | 197 | u64 root_blkno; |
| 196 | u64 system_dir_blkno; | 198 | u64 system_dir_blkno; |
| @@ -206,25 +208,29 @@ struct ocfs2_super | |||
| 206 | u32 s_feature_incompat; | 208 | u32 s_feature_incompat; |
| 207 | u32 s_feature_ro_compat; | 209 | u32 s_feature_ro_compat; |
| 208 | 210 | ||
| 209 | /* Protects s_next_generaion, osb_flags. Could protect more on | 211 | /* Protects s_next_generation, osb_flags and s_inode_steal_slot. |
| 210 | * osb as it's very short lived. */ | 212 | * Could protect more on osb as it's very short lived. |
| 213 | */ | ||
| 211 | spinlock_t osb_lock; | 214 | spinlock_t osb_lock; |
| 212 | u32 s_next_generation; | 215 | u32 s_next_generation; |
| 213 | unsigned long osb_flags; | 216 | unsigned long osb_flags; |
| 217 | s16 s_inode_steal_slot; | ||
| 218 | atomic_t s_num_inodes_stolen; | ||
| 214 | 219 | ||
| 215 | unsigned long s_mount_opt; | 220 | unsigned long s_mount_opt; |
| 216 | unsigned int s_atime_quantum; | 221 | unsigned int s_atime_quantum; |
| 217 | 222 | ||
| 218 | u16 max_slots; | 223 | unsigned int max_slots; |
| 219 | s16 node_num; | 224 | unsigned int node_num; |
| 220 | s16 slot_num; | 225 | int slot_num; |
| 221 | s16 preferred_slot; | 226 | int preferred_slot; |
| 222 | int s_sectsize_bits; | 227 | int s_sectsize_bits; |
| 223 | int s_clustersize; | 228 | int s_clustersize; |
| 224 | int s_clustersize_bits; | 229 | int s_clustersize_bits; |
| 225 | 230 | ||
| 226 | atomic_t vol_state; | 231 | atomic_t vol_state; |
| 227 | struct mutex recovery_lock; | 232 | struct mutex recovery_lock; |
| 233 | struct ocfs2_recovery_map *recovery_map; | ||
| 228 | struct task_struct *recovery_thread_task; | 234 | struct task_struct *recovery_thread_task; |
| 229 | int disable_recovery; | 235 | int disable_recovery; |
| 230 | wait_queue_head_t checkpoint_event; | 236 | wait_queue_head_t checkpoint_event; |
| @@ -245,12 +251,11 @@ struct ocfs2_super | |||
| 245 | struct ocfs2_alloc_stats alloc_stats; | 251 | struct ocfs2_alloc_stats alloc_stats; |
| 246 | char dev_str[20]; /* "major,minor" of the device */ | 252 | char dev_str[20]; /* "major,minor" of the device */ |
| 247 | 253 | ||
| 248 | struct dlm_ctxt *dlm; | 254 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
| 255 | struct ocfs2_cluster_connection *cconn; | ||
| 249 | struct ocfs2_lock_res osb_super_lockres; | 256 | struct ocfs2_lock_res osb_super_lockres; |
| 250 | struct ocfs2_lock_res osb_rename_lockres; | 257 | struct ocfs2_lock_res osb_rename_lockres; |
| 251 | struct dlm_eviction_cb osb_eviction_cb; | ||
| 252 | struct ocfs2_dlm_debug *osb_dlm_debug; | 258 | struct ocfs2_dlm_debug *osb_dlm_debug; |
| 253 | struct dlm_protocol_version osb_locking_proto; | ||
| 254 | 259 | ||
| 255 | struct dentry *osb_debug_root; | 260 | struct dentry *osb_debug_root; |
| 256 | 261 | ||
| @@ -367,11 +372,24 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | |||
| 367 | return ret; | 372 | return ret; |
| 368 | } | 373 | } |
| 369 | 374 | ||
| 375 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | ||
| 376 | { | ||
| 377 | return (osb->s_feature_incompat & | ||
| 378 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); | ||
| 379 | } | ||
| 380 | |||
| 370 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 381 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
| 371 | { | 382 | { |
| 372 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); | 383 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); |
| 373 | } | 384 | } |
| 374 | 385 | ||
| 386 | static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) | ||
| 387 | { | ||
| 388 | return (osb->s_feature_incompat & | ||
| 389 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); | ||
| 390 | } | ||
| 391 | |||
| 392 | |||
| 375 | #define OCFS2_IS_VALID_DINODE(ptr) \ | 393 | #define OCFS2_IS_VALID_DINODE(ptr) \ |
| 376 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) | 394 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) |
| 377 | 395 | ||
| @@ -522,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | |||
| 522 | return pages_per_cluster; | 540 | return pages_per_cluster; |
| 523 | } | 541 | } |
| 524 | 542 | ||
| 543 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | ||
| 544 | { | ||
| 545 | spin_lock(&osb->osb_lock); | ||
| 546 | osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; | ||
| 547 | spin_unlock(&osb->osb_lock); | ||
| 548 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
| 549 | } | ||
| 550 | |||
| 551 | static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, | ||
| 552 | s16 slot) | ||
| 553 | { | ||
| 554 | spin_lock(&osb->osb_lock); | ||
| 555 | osb->s_inode_steal_slot = slot; | ||
| 556 | spin_unlock(&osb->osb_lock); | ||
| 557 | } | ||
| 558 | |||
| 559 | static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) | ||
| 560 | { | ||
| 561 | s16 slot; | ||
| 562 | |||
| 563 | spin_lock(&osb->osb_lock); | ||
| 564 | slot = osb->s_inode_steal_slot; | ||
| 565 | spin_unlock(&osb->osb_lock); | ||
| 566 | |||
| 567 | return slot; | ||
| 568 | } | ||
| 569 | |||
| 525 | #define ocfs2_set_bit ext2_set_bit | 570 | #define ocfs2_set_bit ext2_set_bit |
| 526 | #define ocfs2_clear_bit ext2_clear_bit | 571 | #define ocfs2_clear_bit ext2_clear_bit |
| 527 | #define ocfs2_test_bit ext2_test_bit | 572 | #define ocfs2_test_bit ext2_test_bit |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 3633edd3982f..52c426665154 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
| @@ -88,7 +88,9 @@ | |||
| 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB | 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB |
| 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ |
| 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ |
| 91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA) | 91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ |
| 92 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | ||
| 93 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) | ||
| 92 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | 94 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
| 93 | 95 | ||
| 94 | /* | 96 | /* |
| @@ -125,6 +127,21 @@ | |||
| 125 | /* Support for data packed into inode blocks */ | 127 | /* Support for data packed into inode blocks */ |
| 126 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 | 128 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 |
| 127 | 129 | ||
| 130 | /* Support for the extended slot map */ | ||
| 131 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
| 132 | |||
| 133 | |||
| 134 | /* | ||
| 135 | * Support for alternate, userspace cluster stacks. If set, the superblock | ||
| 136 | * field s_cluster_info contains a tag for the alternate stack in use as | ||
| 137 | * well as the name of the cluster being joined. | ||
| 138 | * mount.ocfs2 must pass in a matching stack name. | ||
| 139 | * | ||
| 140 | * If not set, the classic stack will be used. This is compatbile with | ||
| 141 | * all older versions. | ||
| 142 | */ | ||
| 143 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 | ||
| 144 | |||
| 128 | /* | 145 | /* |
| 129 | * backup superblock flag is used to indicate that this volume | 146 | * backup superblock flag is used to indicate that this volume |
| 130 | * has backup superblocks. | 147 | * has backup superblocks. |
| @@ -267,6 +284,10 @@ struct ocfs2_new_group_input { | |||
| 267 | #define OCFS2_VOL_UUID_LEN 16 | 284 | #define OCFS2_VOL_UUID_LEN 16 |
| 268 | #define OCFS2_MAX_VOL_LABEL_LEN 64 | 285 | #define OCFS2_MAX_VOL_LABEL_LEN 64 |
| 269 | 286 | ||
| 287 | /* The alternate, userspace stack fields */ | ||
| 288 | #define OCFS2_STACK_LABEL_LEN 4 | ||
| 289 | #define OCFS2_CLUSTER_NAME_LEN 16 | ||
| 290 | |||
| 270 | /* Journal limits (in bytes) */ | 291 | /* Journal limits (in bytes) */ |
| 271 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 292 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
| 272 | 293 | ||
| @@ -475,6 +496,47 @@ struct ocfs2_extent_block | |||
| 475 | }; | 496 | }; |
| 476 | 497 | ||
| 477 | /* | 498 | /* |
| 499 | * On disk slot map for OCFS2. This defines the contents of the "slot_map" | ||
| 500 | * system file. A slot is valid if it contains a node number >= 0. The | ||
| 501 | * value -1 (0xFFFF) is OCFS2_INVALID_SLOT. This marks a slot empty. | ||
| 502 | */ | ||
| 503 | struct ocfs2_slot_map { | ||
| 504 | /*00*/ __le16 sm_slots[0]; | ||
| 505 | /* | ||
| 506 | * Actual on-disk size is one block. OCFS2_MAX_SLOTS is 255, | ||
| 507 | * 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize. | ||
| 508 | */ | ||
| 509 | }; | ||
| 510 | |||
| 511 | struct ocfs2_extended_slot { | ||
| 512 | /*00*/ __u8 es_valid; | ||
| 513 | __u8 es_reserved1[3]; | ||
| 514 | __le32 es_node_num; | ||
| 515 | /*10*/ | ||
| 516 | }; | ||
| 517 | |||
| 518 | /* | ||
| 519 | * The extended slot map, used when OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP | ||
| 520 | * is set. It separates out the valid marker from the node number, and | ||
| 521 | * has room to grow. Unlike the old slot map, this format is defined by | ||
| 522 | * i_size. | ||
| 523 | */ | ||
| 524 | struct ocfs2_slot_map_extended { | ||
| 525 | /*00*/ struct ocfs2_extended_slot se_slots[0]; | ||
| 526 | /* | ||
| 527 | * Actual size is i_size of the slot_map system file. It should | ||
| 528 | * match s_max_slots * sizeof(struct ocfs2_extended_slot) | ||
| 529 | */ | ||
| 530 | }; | ||
| 531 | |||
| 532 | struct ocfs2_cluster_info { | ||
| 533 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; | ||
| 534 | __le32 ci_reserved; | ||
| 535 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; | ||
| 536 | /*18*/ | ||
| 537 | }; | ||
| 538 | |||
| 539 | /* | ||
| 478 | * On disk superblock for OCFS2 | 540 | * On disk superblock for OCFS2 |
| 479 | * Note that it is contained inside an ocfs2_dinode, so all offsets | 541 | * Note that it is contained inside an ocfs2_dinode, so all offsets |
| 480 | * are relative to the start of ocfs2_dinode.id2. | 542 | * are relative to the start of ocfs2_dinode.id2. |
| @@ -506,7 +568,20 @@ struct ocfs2_super_block { | |||
| 506 | * group header */ | 568 | * group header */ |
| 507 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 569 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
| 508 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ | 570 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ |
| 509 | /*A0*/ | 571 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace |
| 572 | stack. Only valid | ||
| 573 | with INCOMPAT flag. */ | ||
| 574 | /*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ | ||
| 575 | /*140*/ | ||
| 576 | |||
| 577 | /* | ||
| 578 | * NOTE: As stated above, all offsets are relative to | ||
| 579 | * ocfs2_dinode.id2, which is at 0xC0 in the inode. | ||
| 580 | * 0xC0 + 0x140 = 0x200 or 512 bytes. A superblock must fit within | ||
| 581 | * our smallest blocksize, which is 512 bytes. To ensure this, | ||
| 582 | * we reserve the space in s_reserved2. Anything past s_reserved2 | ||
| 583 | * will not be available on the smallest blocksize. | ||
| 584 | */ | ||
| 510 | }; | 585 | }; |
| 511 | 586 | ||
| 512 | /* | 587 | /* |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 86f3e3799c2b..82c200f7a8f1 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
| @@ -100,7 +100,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
| 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) |
| 101 | { | 101 | { |
| 102 | #ifdef __KERNEL__ | 102 | #ifdef __KERNEL__ |
| 103 | mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); | 103 | BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); |
| 104 | #endif | 104 | #endif |
| 105 | return ocfs2_lock_type_strings[type]; | 105 | return ocfs2_lock_type_strings[type]; |
| 106 | } | 106 | } |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 3a50ce555e64..bb5ff8939bf1 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -42,81 +42,244 @@ | |||
| 42 | 42 | ||
| 43 | #include "buffer_head_io.h" | 43 | #include "buffer_head_io.h" |
| 44 | 44 | ||
| 45 | static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 45 | |
| 46 | s16 global); | 46 | struct ocfs2_slot { |
| 47 | static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | 47 | int sl_valid; |
| 48 | s16 slot_num, | 48 | unsigned int sl_node_num; |
| 49 | s16 node_num); | 49 | }; |
| 50 | 50 | ||
| 51 | /* post the slot information on disk into our slot_info struct. */ | 51 | struct ocfs2_slot_info { |
| 52 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 52 | int si_extended; |
| 53 | int si_slots_per_block; | ||
| 54 | struct inode *si_inode; | ||
| 55 | unsigned int si_blocks; | ||
| 56 | struct buffer_head **si_bh; | ||
| 57 | unsigned int si_num_slots; | ||
| 58 | struct ocfs2_slot *si_slots; | ||
| 59 | }; | ||
| 60 | |||
| 61 | |||
| 62 | static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
| 63 | unsigned int node_num); | ||
| 64 | |||
| 65 | static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si, | ||
| 66 | int slot_num) | ||
| 67 | { | ||
| 68 | BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); | ||
| 69 | si->si_slots[slot_num].sl_valid = 0; | ||
| 70 | } | ||
| 71 | |||
| 72 | static void ocfs2_set_slot(struct ocfs2_slot_info *si, | ||
| 73 | int slot_num, unsigned int node_num) | ||
| 74 | { | ||
| 75 | BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); | ||
| 76 | |||
| 77 | si->si_slots[slot_num].sl_valid = 1; | ||
| 78 | si->si_slots[slot_num].sl_node_num = node_num; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* This version is for the extended slot map */ | ||
| 82 | static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si) | ||
| 83 | { | ||
| 84 | int b, i, slotno; | ||
| 85 | struct ocfs2_slot_map_extended *se; | ||
| 86 | |||
| 87 | slotno = 0; | ||
| 88 | for (b = 0; b < si->si_blocks; b++) { | ||
| 89 | se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data; | ||
| 90 | for (i = 0; | ||
| 91 | (i < si->si_slots_per_block) && | ||
| 92 | (slotno < si->si_num_slots); | ||
| 93 | i++, slotno++) { | ||
| 94 | if (se->se_slots[i].es_valid) | ||
| 95 | ocfs2_set_slot(si, slotno, | ||
| 96 | le32_to_cpu(se->se_slots[i].es_node_num)); | ||
| 97 | else | ||
| 98 | ocfs2_invalidate_slot(si, slotno); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Post the slot information on disk into our slot_info struct. | ||
| 105 | * Must be protected by osb_lock. | ||
| 106 | */ | ||
| 107 | static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si) | ||
| 53 | { | 108 | { |
| 54 | int i; | 109 | int i; |
| 55 | __le16 *disk_info; | 110 | struct ocfs2_slot_map *sm; |
| 56 | 111 | ||
| 57 | /* we don't read the slot block here as ocfs2_super_lock | 112 | sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; |
| 58 | * should've made sure we have the most recent copy. */ | ||
| 59 | spin_lock(&si->si_lock); | ||
| 60 | disk_info = (__le16 *) si->si_bh->b_data; | ||
| 61 | 113 | ||
| 62 | for (i = 0; i < si->si_size; i++) | 114 | for (i = 0; i < si->si_num_slots; i++) { |
| 63 | si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]); | 115 | if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT) |
| 116 | ocfs2_invalidate_slot(si, i); | ||
| 117 | else | ||
| 118 | ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i])); | ||
| 119 | } | ||
| 120 | } | ||
| 64 | 121 | ||
| 65 | spin_unlock(&si->si_lock); | 122 | static void ocfs2_update_slot_info(struct ocfs2_slot_info *si) |
| 123 | { | ||
| 124 | /* | ||
| 125 | * The slot data will have been refreshed when ocfs2_super_lock | ||
| 126 | * was taken. | ||
| 127 | */ | ||
| 128 | if (si->si_extended) | ||
| 129 | ocfs2_update_slot_info_extended(si); | ||
| 130 | else | ||
| 131 | ocfs2_update_slot_info_old(si); | ||
| 132 | } | ||
| 133 | |||
| 134 | int ocfs2_refresh_slot_info(struct ocfs2_super *osb) | ||
| 135 | { | ||
| 136 | int ret; | ||
| 137 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 138 | |||
| 139 | if (si == NULL) | ||
| 140 | return 0; | ||
| 141 | |||
| 142 | BUG_ON(si->si_blocks == 0); | ||
| 143 | BUG_ON(si->si_bh == NULL); | ||
| 144 | |||
| 145 | mlog(0, "Refreshing slot map, reading %u block(s)\n", | ||
| 146 | si->si_blocks); | ||
| 147 | |||
| 148 | /* | ||
| 149 | * We pass -1 as blocknr because we expect all of si->si_bh to | ||
| 150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If | ||
| 151 | * this is not true, the read of -1 (UINT64_MAX) will fail. | ||
| 152 | */ | ||
| 153 | ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, | ||
| 154 | si->si_inode); | ||
| 155 | if (ret == 0) { | ||
| 156 | spin_lock(&osb->osb_lock); | ||
| 157 | ocfs2_update_slot_info(si); | ||
| 158 | spin_unlock(&osb->osb_lock); | ||
| 159 | } | ||
| 160 | |||
| 161 | return ret; | ||
| 66 | } | 162 | } |
| 67 | 163 | ||
| 68 | /* post the our slot info stuff into it's destination bh and write it | 164 | /* post the our slot info stuff into it's destination bh and write it |
| 69 | * out. */ | 165 | * out. */ |
| 70 | int ocfs2_update_disk_slots(struct ocfs2_super *osb, | 166 | static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si, |
| 71 | struct ocfs2_slot_info *si) | 167 | int slot_num, |
| 168 | struct buffer_head **bh) | ||
| 72 | { | 169 | { |
| 73 | int status, i; | 170 | int blkind = slot_num / si->si_slots_per_block; |
| 74 | __le16 *disk_info = (__le16 *) si->si_bh->b_data; | 171 | int slotno = slot_num % si->si_slots_per_block; |
| 172 | struct ocfs2_slot_map_extended *se; | ||
| 173 | |||
| 174 | BUG_ON(blkind >= si->si_blocks); | ||
| 175 | |||
| 176 | se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data; | ||
| 177 | se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid; | ||
| 178 | if (si->si_slots[slot_num].sl_valid) | ||
| 179 | se->se_slots[slotno].es_node_num = | ||
| 180 | cpu_to_le32(si->si_slots[slot_num].sl_node_num); | ||
| 181 | *bh = si->si_bh[blkind]; | ||
| 182 | } | ||
| 75 | 183 | ||
| 76 | spin_lock(&si->si_lock); | 184 | static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si, |
| 77 | for (i = 0; i < si->si_size; i++) | 185 | int slot_num, |
| 78 | disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]); | 186 | struct buffer_head **bh) |
| 79 | spin_unlock(&si->si_lock); | 187 | { |
| 188 | int i; | ||
| 189 | struct ocfs2_slot_map *sm; | ||
| 190 | |||
| 191 | sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; | ||
| 192 | for (i = 0; i < si->si_num_slots; i++) { | ||
| 193 | if (si->si_slots[i].sl_valid) | ||
| 194 | sm->sm_slots[i] = | ||
| 195 | cpu_to_le16(si->si_slots[i].sl_node_num); | ||
| 196 | else | ||
| 197 | sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT); | ||
| 198 | } | ||
| 199 | *bh = si->si_bh[0]; | ||
| 200 | } | ||
| 201 | |||
| 202 | static int ocfs2_update_disk_slot(struct ocfs2_super *osb, | ||
| 203 | struct ocfs2_slot_info *si, | ||
| 204 | int slot_num) | ||
| 205 | { | ||
| 206 | int status; | ||
| 207 | struct buffer_head *bh; | ||
| 208 | |||
| 209 | spin_lock(&osb->osb_lock); | ||
| 210 | if (si->si_extended) | ||
| 211 | ocfs2_update_disk_slot_extended(si, slot_num, &bh); | ||
| 212 | else | ||
| 213 | ocfs2_update_disk_slot_old(si, slot_num, &bh); | ||
| 214 | spin_unlock(&osb->osb_lock); | ||
| 80 | 215 | ||
| 81 | status = ocfs2_write_block(osb, si->si_bh, si->si_inode); | 216 | status = ocfs2_write_block(osb, bh, si->si_inode); |
| 82 | if (status < 0) | 217 | if (status < 0) |
| 83 | mlog_errno(status); | 218 | mlog_errno(status); |
| 84 | 219 | ||
| 85 | return status; | 220 | return status; |
| 86 | } | 221 | } |
| 87 | 222 | ||
| 88 | /* try to find global node in the slot info. Returns | 223 | /* |
| 89 | * OCFS2_INVALID_SLOT if nothing is found. */ | 224 | * Calculate how many bytes are needed by the slot map. Returns |
| 90 | static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 225 | * an error if the slot map file is too small. |
| 91 | s16 global) | 226 | */ |
| 227 | static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb, | ||
| 228 | struct inode *inode, | ||
| 229 | unsigned long long *bytes) | ||
| 92 | { | 230 | { |
| 93 | int i; | 231 | unsigned long long bytes_needed; |
| 94 | s16 ret = OCFS2_INVALID_SLOT; | 232 | |
| 233 | if (ocfs2_uses_extended_slot_map(osb)) { | ||
| 234 | bytes_needed = osb->max_slots * | ||
| 235 | sizeof(struct ocfs2_extended_slot); | ||
| 236 | } else { | ||
| 237 | bytes_needed = osb->max_slots * sizeof(__le16); | ||
| 238 | } | ||
| 239 | if (bytes_needed > i_size_read(inode)) { | ||
| 240 | mlog(ML_ERROR, | ||
| 241 | "Slot map file is too small! (size %llu, needed %llu)\n", | ||
| 242 | i_size_read(inode), bytes_needed); | ||
| 243 | return -ENOSPC; | ||
| 244 | } | ||
| 245 | |||
| 246 | *bytes = bytes_needed; | ||
| 247 | return 0; | ||
| 248 | } | ||
| 249 | |||
| 250 | /* try to find global node in the slot info. Returns -ENOENT | ||
| 251 | * if nothing is found. */ | ||
| 252 | static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
| 253 | unsigned int node_num) | ||
| 254 | { | ||
| 255 | int i, ret = -ENOENT; | ||
| 95 | 256 | ||
| 96 | for(i = 0; i < si->si_num_slots; i++) { | 257 | for(i = 0; i < si->si_num_slots; i++) { |
| 97 | if (global == si->si_global_node_nums[i]) { | 258 | if (si->si_slots[i].sl_valid && |
| 98 | ret = (s16) i; | 259 | (node_num == si->si_slots[i].sl_node_num)) { |
| 260 | ret = i; | ||
| 99 | break; | 261 | break; |
| 100 | } | 262 | } |
| 101 | } | 263 | } |
| 264 | |||
| 102 | return ret; | 265 | return ret; |
| 103 | } | 266 | } |
| 104 | 267 | ||
| 105 | static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) | 268 | static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, |
| 269 | int preferred) | ||
| 106 | { | 270 | { |
| 107 | int i; | 271 | int i, ret = -ENOSPC; |
| 108 | s16 ret = OCFS2_INVALID_SLOT; | ||
| 109 | 272 | ||
| 110 | if (preferred >= 0 && preferred < si->si_num_slots) { | 273 | if ((preferred >= 0) && (preferred < si->si_num_slots)) { |
| 111 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { | 274 | if (!si->si_slots[preferred].sl_valid) { |
| 112 | ret = preferred; | 275 | ret = preferred; |
| 113 | goto out; | 276 | goto out; |
| 114 | } | 277 | } |
| 115 | } | 278 | } |
| 116 | 279 | ||
| 117 | for(i = 0; i < si->si_num_slots; i++) { | 280 | for(i = 0; i < si->si_num_slots; i++) { |
| 118 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { | 281 | if (!si->si_slots[i].sl_valid) { |
| 119 | ret = (s16) i; | 282 | ret = i; |
| 120 | break; | 283 | break; |
| 121 | } | 284 | } |
| 122 | } | 285 | } |
| @@ -124,58 +287,155 @@ out: | |||
| 124 | return ret; | 287 | return ret; |
| 125 | } | 288 | } |
| 126 | 289 | ||
| 127 | s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 290 | int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num) |
| 128 | s16 global) | ||
| 129 | { | 291 | { |
| 130 | s16 ret; | 292 | int slot; |
| 293 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 131 | 294 | ||
| 132 | spin_lock(&si->si_lock); | 295 | spin_lock(&osb->osb_lock); |
| 133 | ret = __ocfs2_node_num_to_slot(si, global); | 296 | slot = __ocfs2_node_num_to_slot(si, node_num); |
| 134 | spin_unlock(&si->si_lock); | 297 | spin_unlock(&osb->osb_lock); |
| 135 | return ret; | 298 | |
| 299 | return slot; | ||
| 300 | } | ||
| 301 | |||
| 302 | int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, | ||
| 303 | unsigned int *node_num) | ||
| 304 | { | ||
| 305 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 306 | |||
| 307 | assert_spin_locked(&osb->osb_lock); | ||
| 308 | |||
| 309 | BUG_ON(slot_num < 0); | ||
| 310 | BUG_ON(slot_num > osb->max_slots); | ||
| 311 | |||
| 312 | if (!si->si_slots[slot_num].sl_valid) | ||
| 313 | return -ENOENT; | ||
| 314 | |||
| 315 | *node_num = si->si_slots[slot_num].sl_node_num; | ||
| 316 | return 0; | ||
| 136 | } | 317 | } |
| 137 | 318 | ||
| 138 | static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | 319 | static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si) |
| 139 | s16 slot_num, | ||
| 140 | s16 node_num) | ||
| 141 | { | 320 | { |
| 142 | BUG_ON(slot_num == OCFS2_INVALID_SLOT); | 321 | unsigned int i; |
| 143 | BUG_ON(slot_num >= si->si_num_slots); | 322 | |
| 144 | BUG_ON((node_num != O2NM_INVALID_NODE_NUM) && | 323 | if (si == NULL) |
| 145 | (node_num >= O2NM_MAX_NODES)); | 324 | return; |
| 325 | |||
| 326 | if (si->si_inode) | ||
| 327 | iput(si->si_inode); | ||
| 328 | if (si->si_bh) { | ||
| 329 | for (i = 0; i < si->si_blocks; i++) { | ||
| 330 | if (si->si_bh[i]) { | ||
| 331 | brelse(si->si_bh[i]); | ||
| 332 | si->si_bh[i] = NULL; | ||
| 333 | } | ||
| 334 | } | ||
| 335 | kfree(si->si_bh); | ||
| 336 | } | ||
| 146 | 337 | ||
| 147 | si->si_global_node_nums[slot_num] = node_num; | 338 | kfree(si); |
| 148 | } | 339 | } |
| 149 | 340 | ||
| 150 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 341 | int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num) |
| 151 | s16 slot_num) | ||
| 152 | { | 342 | { |
| 153 | spin_lock(&si->si_lock); | 343 | struct ocfs2_slot_info *si = osb->slot_info; |
| 154 | __ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT); | 344 | |
| 155 | spin_unlock(&si->si_lock); | 345 | if (si == NULL) |
| 346 | return 0; | ||
| 347 | |||
| 348 | spin_lock(&osb->osb_lock); | ||
| 349 | ocfs2_invalidate_slot(si, slot_num); | ||
| 350 | spin_unlock(&osb->osb_lock); | ||
| 351 | |||
| 352 | return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num); | ||
| 156 | } | 353 | } |
| 157 | 354 | ||
| 158 | int ocfs2_init_slot_info(struct ocfs2_super *osb) | 355 | static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, |
| 356 | struct ocfs2_slot_info *si) | ||
| 159 | { | 357 | { |
| 160 | int status, i; | 358 | int status = 0; |
| 161 | u64 blkno; | 359 | u64 blkno; |
| 360 | unsigned long long blocks, bytes; | ||
| 361 | unsigned int i; | ||
| 362 | struct buffer_head *bh; | ||
| 363 | |||
| 364 | status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes); | ||
| 365 | if (status) | ||
| 366 | goto bail; | ||
| 367 | |||
| 368 | blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes); | ||
| 369 | BUG_ON(blocks > UINT_MAX); | ||
| 370 | si->si_blocks = blocks; | ||
| 371 | if (!si->si_blocks) | ||
| 372 | goto bail; | ||
| 373 | |||
| 374 | if (si->si_extended) | ||
| 375 | si->si_slots_per_block = | ||
| 376 | (osb->sb->s_blocksize / | ||
| 377 | sizeof(struct ocfs2_extended_slot)); | ||
| 378 | else | ||
| 379 | si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16); | ||
| 380 | |||
| 381 | /* The size checks above should ensure this */ | ||
| 382 | BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); | ||
| 383 | |||
| 384 | mlog(0, "Slot map needs %u buffers for %llu bytes\n", | ||
| 385 | si->si_blocks, bytes); | ||
| 386 | |||
| 387 | si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, | ||
| 388 | GFP_KERNEL); | ||
| 389 | if (!si->si_bh) { | ||
| 390 | status = -ENOMEM; | ||
| 391 | mlog_errno(status); | ||
| 392 | goto bail; | ||
| 393 | } | ||
| 394 | |||
| 395 | for (i = 0; i < si->si_blocks; i++) { | ||
| 396 | status = ocfs2_extent_map_get_blocks(si->si_inode, i, | ||
| 397 | &blkno, NULL, NULL); | ||
| 398 | if (status < 0) { | ||
| 399 | mlog_errno(status); | ||
| 400 | goto bail; | ||
| 401 | } | ||
| 402 | |||
| 403 | mlog(0, "Reading slot map block %u at %llu\n", i, | ||
| 404 | (unsigned long long)blkno); | ||
| 405 | |||
| 406 | bh = NULL; /* Acquire a fresh bh */ | ||
| 407 | status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); | ||
| 408 | if (status < 0) { | ||
| 409 | mlog_errno(status); | ||
| 410 | goto bail; | ||
| 411 | } | ||
| 412 | |||
| 413 | si->si_bh[i] = bh; | ||
| 414 | } | ||
| 415 | |||
| 416 | bail: | ||
| 417 | return status; | ||
| 418 | } | ||
| 419 | |||
| 420 | int ocfs2_init_slot_info(struct ocfs2_super *osb) | ||
| 421 | { | ||
| 422 | int status; | ||
| 162 | struct inode *inode = NULL; | 423 | struct inode *inode = NULL; |
| 163 | struct buffer_head *bh = NULL; | ||
| 164 | struct ocfs2_slot_info *si; | 424 | struct ocfs2_slot_info *si; |
| 165 | 425 | ||
| 166 | si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL); | 426 | si = kzalloc(sizeof(struct ocfs2_slot_info) + |
| 427 | (sizeof(struct ocfs2_slot) * osb->max_slots), | ||
| 428 | GFP_KERNEL); | ||
| 167 | if (!si) { | 429 | if (!si) { |
| 168 | status = -ENOMEM; | 430 | status = -ENOMEM; |
| 169 | mlog_errno(status); | 431 | mlog_errno(status); |
| 170 | goto bail; | 432 | goto bail; |
| 171 | } | 433 | } |
| 172 | 434 | ||
| 173 | spin_lock_init(&si->si_lock); | 435 | si->si_extended = ocfs2_uses_extended_slot_map(osb); |
| 174 | si->si_num_slots = osb->max_slots; | 436 | si->si_num_slots = osb->max_slots; |
| 175 | si->si_size = OCFS2_MAX_SLOTS; | 437 | si->si_slots = (struct ocfs2_slot *)((char *)si + |
| 176 | 438 | sizeof(struct ocfs2_slot_info)); | |
| 177 | for(i = 0; i < si->si_num_slots; i++) | ||
| 178 | si->si_global_node_nums[i] = OCFS2_INVALID_SLOT; | ||
| 179 | 439 | ||
| 180 | inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, | 440 | inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, |
| 181 | OCFS2_INVALID_SLOT); | 441 | OCFS2_INVALID_SLOT); |
| @@ -185,61 +445,53 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
| 185 | goto bail; | 445 | goto bail; |
| 186 | } | 446 | } |
| 187 | 447 | ||
| 188 | status = ocfs2_extent_map_get_blocks(inode, 0ULL, &blkno, NULL, NULL); | 448 | si->si_inode = inode; |
| 189 | if (status < 0) { | 449 | status = ocfs2_map_slot_buffers(osb, si); |
| 190 | mlog_errno(status); | ||
| 191 | goto bail; | ||
| 192 | } | ||
| 193 | |||
| 194 | status = ocfs2_read_block(osb, blkno, &bh, 0, inode); | ||
| 195 | if (status < 0) { | 450 | if (status < 0) { |
| 196 | mlog_errno(status); | 451 | mlog_errno(status); |
| 197 | goto bail; | 452 | goto bail; |
| 198 | } | 453 | } |
| 199 | 454 | ||
| 200 | si->si_inode = inode; | 455 | osb->slot_info = (struct ocfs2_slot_info *)si; |
| 201 | si->si_bh = bh; | ||
| 202 | osb->slot_info = si; | ||
| 203 | bail: | 456 | bail: |
| 204 | if (status < 0 && si) | 457 | if (status < 0 && si) |
| 205 | ocfs2_free_slot_info(si); | 458 | __ocfs2_free_slot_info(si); |
| 206 | 459 | ||
| 207 | return status; | 460 | return status; |
| 208 | } | 461 | } |
| 209 | 462 | ||
| 210 | void ocfs2_free_slot_info(struct ocfs2_slot_info *si) | 463 | void ocfs2_free_slot_info(struct ocfs2_super *osb) |
| 211 | { | 464 | { |
| 212 | if (si->si_inode) | 465 | struct ocfs2_slot_info *si = osb->slot_info; |
| 213 | iput(si->si_inode); | 466 | |
| 214 | if (si->si_bh) | 467 | osb->slot_info = NULL; |
| 215 | brelse(si->si_bh); | 468 | __ocfs2_free_slot_info(si); |
| 216 | kfree(si); | ||
| 217 | } | 469 | } |
| 218 | 470 | ||
| 219 | int ocfs2_find_slot(struct ocfs2_super *osb) | 471 | int ocfs2_find_slot(struct ocfs2_super *osb) |
| 220 | { | 472 | { |
| 221 | int status; | 473 | int status; |
| 222 | s16 slot; | 474 | int slot; |
| 223 | struct ocfs2_slot_info *si; | 475 | struct ocfs2_slot_info *si; |
| 224 | 476 | ||
| 225 | mlog_entry_void(); | 477 | mlog_entry_void(); |
| 226 | 478 | ||
| 227 | si = osb->slot_info; | 479 | si = osb->slot_info; |
| 228 | 480 | ||
| 481 | spin_lock(&osb->osb_lock); | ||
| 229 | ocfs2_update_slot_info(si); | 482 | ocfs2_update_slot_info(si); |
| 230 | 483 | ||
| 231 | spin_lock(&si->si_lock); | ||
| 232 | /* search for ourselves first and take the slot if it already | 484 | /* search for ourselves first and take the slot if it already |
| 233 | * exists. Perhaps we need to mark this in a variable for our | 485 | * exists. Perhaps we need to mark this in a variable for our |
| 234 | * own journal recovery? Possibly not, though we certainly | 486 | * own journal recovery? Possibly not, though we certainly |
| 235 | * need to warn to the user */ | 487 | * need to warn to the user */ |
| 236 | slot = __ocfs2_node_num_to_slot(si, osb->node_num); | 488 | slot = __ocfs2_node_num_to_slot(si, osb->node_num); |
| 237 | if (slot == OCFS2_INVALID_SLOT) { | 489 | if (slot < 0) { |
| 238 | /* if no slot yet, then just take 1st available | 490 | /* if no slot yet, then just take 1st available |
| 239 | * one. */ | 491 | * one. */ |
| 240 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); | 492 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); |
| 241 | if (slot == OCFS2_INVALID_SLOT) { | 493 | if (slot < 0) { |
| 242 | spin_unlock(&si->si_lock); | 494 | spin_unlock(&osb->osb_lock); |
| 243 | mlog(ML_ERROR, "no free slots available!\n"); | 495 | mlog(ML_ERROR, "no free slots available!\n"); |
| 244 | status = -EINVAL; | 496 | status = -EINVAL; |
| 245 | goto bail; | 497 | goto bail; |
| @@ -248,13 +500,13 @@ int ocfs2_find_slot(struct ocfs2_super *osb) | |||
| 248 | mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", | 500 | mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", |
| 249 | slot); | 501 | slot); |
| 250 | 502 | ||
| 251 | __ocfs2_fill_slot(si, slot, osb->node_num); | 503 | ocfs2_set_slot(si, slot, osb->node_num); |
| 252 | osb->slot_num = slot; | 504 | osb->slot_num = slot; |
| 253 | spin_unlock(&si->si_lock); | 505 | spin_unlock(&osb->osb_lock); |
| 254 | 506 | ||
| 255 | mlog(0, "taking node slot %d\n", osb->slot_num); | 507 | mlog(0, "taking node slot %d\n", osb->slot_num); |
| 256 | 508 | ||
| 257 | status = ocfs2_update_disk_slots(osb, si); | 509 | status = ocfs2_update_disk_slot(osb, si, osb->slot_num); |
| 258 | if (status < 0) | 510 | if (status < 0) |
| 259 | mlog_errno(status); | 511 | mlog_errno(status); |
| 260 | 512 | ||
| @@ -265,27 +517,27 @@ bail: | |||
| 265 | 517 | ||
| 266 | void ocfs2_put_slot(struct ocfs2_super *osb) | 518 | void ocfs2_put_slot(struct ocfs2_super *osb) |
| 267 | { | 519 | { |
| 268 | int status; | 520 | int status, slot_num; |
| 269 | struct ocfs2_slot_info *si = osb->slot_info; | 521 | struct ocfs2_slot_info *si = osb->slot_info; |
| 270 | 522 | ||
| 271 | if (!si) | 523 | if (!si) |
| 272 | return; | 524 | return; |
| 273 | 525 | ||
| 526 | spin_lock(&osb->osb_lock); | ||
| 274 | ocfs2_update_slot_info(si); | 527 | ocfs2_update_slot_info(si); |
| 275 | 528 | ||
| 276 | spin_lock(&si->si_lock); | 529 | slot_num = osb->slot_num; |
| 277 | __ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT); | 530 | ocfs2_invalidate_slot(si, osb->slot_num); |
| 278 | osb->slot_num = OCFS2_INVALID_SLOT; | 531 | osb->slot_num = OCFS2_INVALID_SLOT; |
| 279 | spin_unlock(&si->si_lock); | 532 | spin_unlock(&osb->osb_lock); |
| 280 | 533 | ||
| 281 | status = ocfs2_update_disk_slots(osb, si); | 534 | status = ocfs2_update_disk_slot(osb, si, slot_num); |
| 282 | if (status < 0) { | 535 | if (status < 0) { |
| 283 | mlog_errno(status); | 536 | mlog_errno(status); |
| 284 | goto bail; | 537 | goto bail; |
| 285 | } | 538 | } |
| 286 | 539 | ||
| 287 | bail: | 540 | bail: |
| 288 | osb->slot_info = NULL; | 541 | ocfs2_free_slot_info(osb); |
| 289 | ocfs2_free_slot_info(si); | ||
| 290 | } | 542 | } |
| 291 | 543 | ||
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index 1025872aaade..601c95fd7003 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h | |||
| @@ -27,38 +27,18 @@ | |||
| 27 | #ifndef SLOTMAP_H | 27 | #ifndef SLOTMAP_H |
| 28 | #define SLOTMAP_H | 28 | #define SLOTMAP_H |
| 29 | 29 | ||
| 30 | struct ocfs2_slot_info { | ||
| 31 | spinlock_t si_lock; | ||
| 32 | |||
| 33 | struct inode *si_inode; | ||
| 34 | struct buffer_head *si_bh; | ||
| 35 | unsigned int si_num_slots; | ||
| 36 | unsigned int si_size; | ||
| 37 | s16 si_global_node_nums[OCFS2_MAX_SLOTS]; | ||
| 38 | }; | ||
| 39 | |||
| 40 | int ocfs2_init_slot_info(struct ocfs2_super *osb); | 30 | int ocfs2_init_slot_info(struct ocfs2_super *osb); |
| 41 | void ocfs2_free_slot_info(struct ocfs2_slot_info *si); | 31 | void ocfs2_free_slot_info(struct ocfs2_super *osb); |
| 42 | 32 | ||
| 43 | int ocfs2_find_slot(struct ocfs2_super *osb); | 33 | int ocfs2_find_slot(struct ocfs2_super *osb); |
| 44 | void ocfs2_put_slot(struct ocfs2_super *osb); | 34 | void ocfs2_put_slot(struct ocfs2_super *osb); |
| 45 | 35 | ||
| 46 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si); | 36 | int ocfs2_refresh_slot_info(struct ocfs2_super *osb); |
| 47 | int ocfs2_update_disk_slots(struct ocfs2_super *osb, | ||
| 48 | struct ocfs2_slot_info *si); | ||
| 49 | |||
| 50 | s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
| 51 | s16 global); | ||
| 52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | ||
| 53 | s16 slot_num); | ||
| 54 | 37 | ||
| 55 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 38 | int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num); |
| 56 | int slot_num) | 39 | int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, |
| 57 | { | 40 | unsigned int *node_num); |
| 58 | BUG_ON(slot_num == OCFS2_INVALID_SLOT); | ||
| 59 | assert_spin_locked(&si->si_lock); | ||
| 60 | 41 | ||
| 61 | return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT; | 42 | int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num); |
| 62 | } | ||
| 63 | 43 | ||
| 64 | #endif | 44 | #endif |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c new file mode 100644 index 000000000000..ac1d74c63bf5 --- /dev/null +++ b/fs/ocfs2/stack_o2cb.c | |||
| @@ -0,0 +1,420 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stack_o2cb.c | ||
| 5 | * | ||
| 6 | * Code which interfaces ocfs2 with the o2cb stack. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation, version 2. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * General Public License for more details. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <linux/crc32.h> | ||
| 21 | #include <linux/module.h> | ||
| 22 | |||
| 23 | /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ | ||
| 24 | #include <linux/fs.h> | ||
| 25 | |||
| 26 | #include "cluster/masklog.h" | ||
| 27 | #include "cluster/nodemanager.h" | ||
| 28 | #include "cluster/heartbeat.h" | ||
| 29 | |||
| 30 | #include "stackglue.h" | ||
| 31 | |||
| 32 | struct o2dlm_private { | ||
| 33 | struct dlm_eviction_cb op_eviction_cb; | ||
| 34 | }; | ||
| 35 | |||
| 36 | static struct ocfs2_stack_plugin o2cb_stack; | ||
| 37 | |||
| 38 | /* These should be identical */ | ||
| 39 | #if (DLM_LOCK_IV != LKM_IVMODE) | ||
| 40 | # error Lock modes do not match | ||
| 41 | #endif | ||
| 42 | #if (DLM_LOCK_NL != LKM_NLMODE) | ||
| 43 | # error Lock modes do not match | ||
| 44 | #endif | ||
| 45 | #if (DLM_LOCK_CR != LKM_CRMODE) | ||
| 46 | # error Lock modes do not match | ||
| 47 | #endif | ||
| 48 | #if (DLM_LOCK_CW != LKM_CWMODE) | ||
| 49 | # error Lock modes do not match | ||
| 50 | #endif | ||
| 51 | #if (DLM_LOCK_PR != LKM_PRMODE) | ||
| 52 | # error Lock modes do not match | ||
| 53 | #endif | ||
| 54 | #if (DLM_LOCK_PW != LKM_PWMODE) | ||
| 55 | # error Lock modes do not match | ||
| 56 | #endif | ||
| 57 | #if (DLM_LOCK_EX != LKM_EXMODE) | ||
| 58 | # error Lock modes do not match | ||
| 59 | #endif | ||
| 60 | static inline int mode_to_o2dlm(int mode) | ||
| 61 | { | ||
| 62 | BUG_ON(mode > LKM_MAXMODE); | ||
| 63 | |||
| 64 | return mode; | ||
| 65 | } | ||
| 66 | |||
| 67 | #define map_flag(_generic, _o2dlm) \ | ||
| 68 | if (flags & (_generic)) { \ | ||
| 69 | flags &= ~(_generic); \ | ||
| 70 | o2dlm_flags |= (_o2dlm); \ | ||
| 71 | } | ||
| 72 | static int flags_to_o2dlm(u32 flags) | ||
| 73 | { | ||
| 74 | int o2dlm_flags = 0; | ||
| 75 | |||
| 76 | map_flag(DLM_LKF_NOQUEUE, LKM_NOQUEUE); | ||
| 77 | map_flag(DLM_LKF_CANCEL, LKM_CANCEL); | ||
| 78 | map_flag(DLM_LKF_CONVERT, LKM_CONVERT); | ||
| 79 | map_flag(DLM_LKF_VALBLK, LKM_VALBLK); | ||
| 80 | map_flag(DLM_LKF_IVVALBLK, LKM_INVVALBLK); | ||
| 81 | map_flag(DLM_LKF_ORPHAN, LKM_ORPHAN); | ||
| 82 | map_flag(DLM_LKF_FORCEUNLOCK, LKM_FORCE); | ||
| 83 | map_flag(DLM_LKF_TIMEOUT, LKM_TIMEOUT); | ||
| 84 | map_flag(DLM_LKF_LOCAL, LKM_LOCAL); | ||
| 85 | |||
| 86 | /* map_flag() should have cleared every flag passed in */ | ||
| 87 | BUG_ON(flags != 0); | ||
| 88 | |||
| 89 | return o2dlm_flags; | ||
| 90 | } | ||
| 91 | #undef map_flag | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Map an o2dlm status to standard errno values. | ||
| 95 | * | ||
| 96 | * o2dlm only uses a handful of these, and returns even fewer to the | ||
| 97 | * caller. Still, we try to assign sane values to each error. | ||
| 98 | * | ||
| 99 | * The following value pairs have special meanings to dlmglue, thus | ||
| 100 | * the right hand side needs to stay unique - never duplicate the | ||
| 101 | * mapping elsewhere in the table! | ||
| 102 | * | ||
| 103 | * DLM_NORMAL: 0 | ||
| 104 | * DLM_NOTQUEUED: -EAGAIN | ||
| 105 | * DLM_CANCELGRANT: -EBUSY | ||
| 106 | * DLM_CANCEL: -DLM_ECANCEL | ||
| 107 | */ | ||
| 108 | /* Keep in sync with dlmapi.h */ | ||
| 109 | static int status_map[] = { | ||
| 110 | [DLM_NORMAL] = 0, /* Success */ | ||
| 111 | [DLM_GRANTED] = -EINVAL, | ||
| 112 | [DLM_DENIED] = -EACCES, | ||
| 113 | [DLM_DENIED_NOLOCKS] = -EACCES, | ||
| 114 | [DLM_WORKING] = -EACCES, | ||
| 115 | [DLM_BLOCKED] = -EINVAL, | ||
| 116 | [DLM_BLOCKED_ORPHAN] = -EINVAL, | ||
| 117 | [DLM_DENIED_GRACE_PERIOD] = -EACCES, | ||
| 118 | [DLM_SYSERR] = -ENOMEM, /* It is what it is */ | ||
| 119 | [DLM_NOSUPPORT] = -EPROTO, | ||
| 120 | [DLM_CANCELGRANT] = -EBUSY, /* Cancel after grant */ | ||
| 121 | [DLM_IVLOCKID] = -EINVAL, | ||
| 122 | [DLM_SYNC] = -EINVAL, | ||
| 123 | [DLM_BADTYPE] = -EINVAL, | ||
| 124 | [DLM_BADRESOURCE] = -EINVAL, | ||
| 125 | [DLM_MAXHANDLES] = -ENOMEM, | ||
| 126 | [DLM_NOCLINFO] = -EINVAL, | ||
| 127 | [DLM_NOLOCKMGR] = -EINVAL, | ||
| 128 | [DLM_NOPURGED] = -EINVAL, | ||
| 129 | [DLM_BADARGS] = -EINVAL, | ||
| 130 | [DLM_VOID] = -EINVAL, | ||
| 131 | [DLM_NOTQUEUED] = -EAGAIN, /* Trylock failed */ | ||
| 132 | [DLM_IVBUFLEN] = -EINVAL, | ||
| 133 | [DLM_CVTUNGRANT] = -EPERM, | ||
| 134 | [DLM_BADPARAM] = -EINVAL, | ||
| 135 | [DLM_VALNOTVALID] = -EINVAL, | ||
| 136 | [DLM_REJECTED] = -EPERM, | ||
| 137 | [DLM_ABORT] = -EINVAL, | ||
| 138 | [DLM_CANCEL] = -DLM_ECANCEL, /* Successful cancel */ | ||
| 139 | [DLM_IVRESHANDLE] = -EINVAL, | ||
| 140 | [DLM_DEADLOCK] = -EDEADLK, | ||
| 141 | [DLM_DENIED_NOASTS] = -EINVAL, | ||
| 142 | [DLM_FORWARD] = -EINVAL, | ||
| 143 | [DLM_TIMEOUT] = -ETIMEDOUT, | ||
| 144 | [DLM_IVGROUPID] = -EINVAL, | ||
| 145 | [DLM_VERS_CONFLICT] = -EOPNOTSUPP, | ||
| 146 | [DLM_BAD_DEVICE_PATH] = -ENOENT, | ||
| 147 | [DLM_NO_DEVICE_PERMISSION] = -EPERM, | ||
| 148 | [DLM_NO_CONTROL_DEVICE] = -ENOENT, | ||
| 149 | [DLM_RECOVERING] = -ENOTCONN, | ||
| 150 | [DLM_MIGRATING] = -ERESTART, | ||
| 151 | [DLM_MAXSTATS] = -EINVAL, | ||
| 152 | }; | ||
| 153 | |||
| 154 | static int dlm_status_to_errno(enum dlm_status status) | ||
| 155 | { | ||
| 156 | BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); | ||
| 157 | |||
| 158 | return status_map[status]; | ||
| 159 | } | ||
| 160 | |||
| 161 | static void o2dlm_lock_ast_wrapper(void *astarg) | ||
| 162 | { | ||
| 163 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 164 | |||
| 165 | o2cb_stack.sp_proto->lp_lock_ast(astarg); | ||
| 166 | } | ||
| 167 | |||
| 168 | static void o2dlm_blocking_ast_wrapper(void *astarg, int level) | ||
| 169 | { | ||
| 170 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 171 | |||
| 172 | o2cb_stack.sp_proto->lp_blocking_ast(astarg, level); | ||
| 173 | } | ||
| 174 | |||
| 175 | static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) | ||
| 176 | { | ||
| 177 | int error = dlm_status_to_errno(status); | ||
| 178 | |||
| 179 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 180 | |||
| 181 | /* | ||
| 182 | * In o2dlm, you can get both the lock_ast() for the lock being | ||
| 183 | * granted and the unlock_ast() for the CANCEL failing. A | ||
| 184 | * successful cancel sends DLM_NORMAL here. If the | ||
| 185 | * lock grant happened before the cancel arrived, you get | ||
| 186 | * DLM_CANCELGRANT. | ||
| 187 | * | ||
| 188 | * There's no need for the double-ast. If we see DLM_CANCELGRANT, | ||
| 189 | * we just ignore it. We expect the lock_ast() to handle the | ||
| 190 | * granted lock. | ||
| 191 | */ | ||
| 192 | if (status == DLM_CANCELGRANT) | ||
| 193 | return; | ||
| 194 | |||
| 195 | o2cb_stack.sp_proto->lp_unlock_ast(astarg, error); | ||
| 196 | } | ||
| 197 | |||
| 198 | static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 199 | int mode, | ||
| 200 | union ocfs2_dlm_lksb *lksb, | ||
| 201 | u32 flags, | ||
| 202 | void *name, | ||
| 203 | unsigned int namelen, | ||
| 204 | void *astarg) | ||
| 205 | { | ||
| 206 | enum dlm_status status; | ||
| 207 | int o2dlm_mode = mode_to_o2dlm(mode); | ||
| 208 | int o2dlm_flags = flags_to_o2dlm(flags); | ||
| 209 | int ret; | ||
| 210 | |||
| 211 | status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, | ||
| 212 | o2dlm_flags, name, namelen, | ||
| 213 | o2dlm_lock_ast_wrapper, astarg, | ||
| 214 | o2dlm_blocking_ast_wrapper); | ||
| 215 | ret = dlm_status_to_errno(status); | ||
| 216 | return ret; | ||
| 217 | } | ||
| 218 | |||
| 219 | static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 220 | union ocfs2_dlm_lksb *lksb, | ||
| 221 | u32 flags, | ||
| 222 | void *astarg) | ||
| 223 | { | ||
| 224 | enum dlm_status status; | ||
| 225 | int o2dlm_flags = flags_to_o2dlm(flags); | ||
| 226 | int ret; | ||
| 227 | |||
| 228 | status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, | ||
| 229 | o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); | ||
| 230 | ret = dlm_status_to_errno(status); | ||
| 231 | return ret; | ||
| 232 | } | ||
| 233 | |||
| 234 | static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
| 235 | { | ||
| 236 | return dlm_status_to_errno(lksb->lksb_o2dlm.status); | ||
| 237 | } | ||
| 238 | |||
| 239 | static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
| 240 | { | ||
| 241 | return (void *)(lksb->lksb_o2dlm.lvb); | ||
| 242 | } | ||
| 243 | |||
| 244 | static void o2cb_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
| 245 | { | ||
| 246 | dlm_print_one_lock(lksb->lksb_o2dlm.lockid); | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * Called from the dlm when it's about to evict a node. This is how the | ||
| 251 | * classic stack signals node death. | ||
| 252 | */ | ||
| 253 | static void o2dlm_eviction_cb(int node_num, void *data) | ||
| 254 | { | ||
| 255 | struct ocfs2_cluster_connection *conn = data; | ||
| 256 | |||
| 257 | mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", | ||
| 258 | node_num, conn->cc_namelen, conn->cc_name); | ||
| 259 | |||
| 260 | conn->cc_recovery_handler(node_num, conn->cc_recovery_data); | ||
| 261 | } | ||
| 262 | |||
| 263 | static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | ||
| 264 | { | ||
| 265 | int rc = 0; | ||
| 266 | u32 dlm_key; | ||
| 267 | struct dlm_ctxt *dlm; | ||
| 268 | struct o2dlm_private *priv; | ||
| 269 | struct dlm_protocol_version dlm_version; | ||
| 270 | |||
| 271 | BUG_ON(conn == NULL); | ||
| 272 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 273 | |||
| 274 | /* for now we only have one cluster/node, make sure we see it | ||
| 275 | * in the heartbeat universe */ | ||
| 276 | if (!o2hb_check_local_node_heartbeating()) { | ||
| 277 | rc = -EINVAL; | ||
| 278 | goto out; | ||
| 279 | } | ||
| 280 | |||
| 281 | priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL); | ||
| 282 | if (!priv) { | ||
| 283 | rc = -ENOMEM; | ||
| 284 | goto out_free; | ||
| 285 | } | ||
| 286 | |||
| 287 | /* This just fills the structure in. It is safe to pass conn. */ | ||
| 288 | dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb, | ||
| 289 | conn); | ||
| 290 | |||
| 291 | conn->cc_private = priv; | ||
| 292 | |||
| 293 | /* used by the dlm code to make message headers unique, each | ||
| 294 | * node in this domain must agree on this. */ | ||
| 295 | dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); | ||
| 296 | dlm_version.pv_major = conn->cc_version.pv_major; | ||
| 297 | dlm_version.pv_minor = conn->cc_version.pv_minor; | ||
| 298 | |||
| 299 | dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); | ||
| 300 | if (IS_ERR(dlm)) { | ||
| 301 | rc = PTR_ERR(dlm); | ||
| 302 | mlog_errno(rc); | ||
| 303 | goto out_free; | ||
| 304 | } | ||
| 305 | |||
| 306 | conn->cc_version.pv_major = dlm_version.pv_major; | ||
| 307 | conn->cc_version.pv_minor = dlm_version.pv_minor; | ||
| 308 | conn->cc_lockspace = dlm; | ||
| 309 | |||
| 310 | dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); | ||
| 311 | |||
| 312 | out_free: | ||
| 313 | if (rc && conn->cc_private) | ||
| 314 | kfree(conn->cc_private); | ||
| 315 | |||
| 316 | out: | ||
| 317 | return rc; | ||
| 318 | } | ||
| 319 | |||
| 320 | static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 321 | int hangup_pending) | ||
| 322 | { | ||
| 323 | struct dlm_ctxt *dlm = conn->cc_lockspace; | ||
| 324 | struct o2dlm_private *priv = conn->cc_private; | ||
| 325 | |||
| 326 | dlm_unregister_eviction_cb(&priv->op_eviction_cb); | ||
| 327 | conn->cc_private = NULL; | ||
| 328 | kfree(priv); | ||
| 329 | |||
| 330 | dlm_unregister_domain(dlm); | ||
| 331 | conn->cc_lockspace = NULL; | ||
| 332 | |||
| 333 | return 0; | ||
| 334 | } | ||
| 335 | |||
| 336 | static void o2hb_stop(const char *group) | ||
| 337 | { | ||
| 338 | int ret; | ||
| 339 | char *argv[5], *envp[3]; | ||
| 340 | |||
| 341 | argv[0] = (char *)o2nm_get_hb_ctl_path(); | ||
| 342 | argv[1] = "-K"; | ||
| 343 | argv[2] = "-u"; | ||
| 344 | argv[3] = (char *)group; | ||
| 345 | argv[4] = NULL; | ||
| 346 | |||
| 347 | mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); | ||
| 348 | |||
| 349 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
| 350 | envp[0] = "HOME=/"; | ||
| 351 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
| 352 | envp[2] = NULL; | ||
| 353 | |||
| 354 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
| 355 | if (ret < 0) | ||
| 356 | mlog_errno(ret); | ||
| 357 | } | ||
| 358 | |||
| 359 | /* | ||
| 360 | * Hangup is a hack for tools compatibility. Older ocfs2-tools software | ||
| 361 | * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This | ||
| 362 | * happens regardless of whether the DLM got started, so we can't do it | ||
| 363 | * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into | ||
| 364 | * the glue and provide a "hangup" API for super.c to call. | ||
| 365 | * | ||
| 366 | * Other stacks will eventually provide a NULL ->hangup() pointer. | ||
| 367 | */ | ||
| 368 | static void o2cb_cluster_hangup(const char *group, int grouplen) | ||
| 369 | { | ||
| 370 | o2hb_stop(group); | ||
| 371 | } | ||
| 372 | |||
| 373 | static int o2cb_cluster_this_node(unsigned int *node) | ||
| 374 | { | ||
| 375 | int node_num; | ||
| 376 | |||
| 377 | node_num = o2nm_this_node(); | ||
| 378 | if (node_num == O2NM_INVALID_NODE_NUM) | ||
| 379 | return -ENOENT; | ||
| 380 | |||
| 381 | if (node_num >= O2NM_MAX_NODES) | ||
| 382 | return -EOVERFLOW; | ||
| 383 | |||
| 384 | *node = node_num; | ||
| 385 | return 0; | ||
| 386 | } | ||
| 387 | |||
| 388 | struct ocfs2_stack_operations o2cb_stack_ops = { | ||
| 389 | .connect = o2cb_cluster_connect, | ||
| 390 | .disconnect = o2cb_cluster_disconnect, | ||
| 391 | .hangup = o2cb_cluster_hangup, | ||
| 392 | .this_node = o2cb_cluster_this_node, | ||
| 393 | .dlm_lock = o2cb_dlm_lock, | ||
| 394 | .dlm_unlock = o2cb_dlm_unlock, | ||
| 395 | .lock_status = o2cb_dlm_lock_status, | ||
| 396 | .lock_lvb = o2cb_dlm_lvb, | ||
| 397 | .dump_lksb = o2cb_dump_lksb, | ||
| 398 | }; | ||
| 399 | |||
| 400 | static struct ocfs2_stack_plugin o2cb_stack = { | ||
| 401 | .sp_name = "o2cb", | ||
| 402 | .sp_ops = &o2cb_stack_ops, | ||
| 403 | .sp_owner = THIS_MODULE, | ||
| 404 | }; | ||
| 405 | |||
| 406 | static int __init o2cb_stack_init(void) | ||
| 407 | { | ||
| 408 | return ocfs2_stack_glue_register(&o2cb_stack); | ||
| 409 | } | ||
| 410 | |||
| 411 | static void __exit o2cb_stack_exit(void) | ||
| 412 | { | ||
| 413 | ocfs2_stack_glue_unregister(&o2cb_stack); | ||
| 414 | } | ||
| 415 | |||
| 416 | MODULE_AUTHOR("Oracle"); | ||
| 417 | MODULE_DESCRIPTION("ocfs2 driver for the classic o2cb stack"); | ||
| 418 | MODULE_LICENSE("GPL"); | ||
| 419 | module_init(o2cb_stack_init); | ||
| 420 | module_exit(o2cb_stack_exit); | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c new file mode 100644 index 000000000000..7428663f9cbb --- /dev/null +++ b/fs/ocfs2/stack_user.c | |||
| @@ -0,0 +1,883 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stack_user.c | ||
| 5 | * | ||
| 6 | * Code which interfaces ocfs2 with fs/dlm and a userspace stack. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation, version 2. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * General Public License for more details. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <linux/module.h> | ||
| 21 | #include <linux/fs.h> | ||
| 22 | #include <linux/miscdevice.h> | ||
| 23 | #include <linux/mutex.h> | ||
| 24 | #include <linux/reboot.h> | ||
| 25 | #include <asm/uaccess.h> | ||
| 26 | |||
| 27 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ | ||
| 28 | #include "stackglue.h" | ||
| 29 | |||
| 30 | |||
| 31 | /* | ||
| 32 | * The control protocol starts with a handshake. Until the handshake | ||
| 33 | * is complete, the control device will fail all write(2)s. | ||
| 34 | * | ||
| 35 | * The handshake is simple. First, the client reads until EOF. Each line | ||
| 36 | * of output is a supported protocol tag. All protocol tags are a single | ||
| 37 | * character followed by a two hex digit version number. Currently the | ||
| 38 | * only things supported is T01, for "Text-base version 0x01". Next, the | ||
| 39 | * client writes the version they would like to use, including the newline. | ||
| 40 | * Thus, the protocol tag is 'T01\n'. If the version tag written is | ||
| 41 | * unknown, -EINVAL is returned. Once the negotiation is complete, the | ||
| 42 | * client can start sending messages. | ||
| 43 | * | ||
| 44 | * The T01 protocol has three messages. First is the "SETN" message. | ||
| 45 | * It has the following syntax: | ||
| 46 | * | ||
| 47 | * SETN<space><8-char-hex-nodenum><newline> | ||
| 48 | * | ||
| 49 | * This is 14 characters. | ||
| 50 | * | ||
| 51 | * The "SETN" message must be the first message following the protocol. | ||
| 52 | * It tells ocfs2_control the local node number. | ||
| 53 | * | ||
| 54 | * Next comes the "SETV" message. It has the following syntax: | ||
| 55 | * | ||
| 56 | * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> | ||
| 57 | * | ||
| 58 | * This is 11 characters. | ||
| 59 | * | ||
| 60 | * The "SETV" message sets the filesystem locking protocol version as | ||
| 61 | * negotiated by the client. The client negotiates based on the maximum | ||
| 62 | * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major | ||
| 63 | * number from the "SETV" message must match | ||
| 64 | * user_stack.sp_proto->lp_max_version.pv_major, and the minor number | ||
| 65 | * must be less than or equal to ...->lp_max_version.pv_minor. | ||
| 66 | * | ||
| 67 | * Once this information has been set, mounts will be allowed. From this | ||
| 68 | * point on, the "DOWN" message can be sent for node down notification. | ||
| 69 | * It has the following syntax: | ||
| 70 | * | ||
| 71 | * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> | ||
| 72 | * | ||
| 73 | * eg: | ||
| 74 | * | ||
| 75 | * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n | ||
| 76 | * | ||
| 77 | * This is 47 characters. | ||
| 78 | */ | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Whether or not the client has done the handshake. | ||
| 82 | * For now, we have just one protocol version. | ||
| 83 | */ | ||
| 84 | #define OCFS2_CONTROL_PROTO "T01\n" | ||
| 85 | #define OCFS2_CONTROL_PROTO_LEN 4 | ||
| 86 | |||
| 87 | /* Handshake states */ | ||
| 88 | #define OCFS2_CONTROL_HANDSHAKE_INVALID (0) | ||
| 89 | #define OCFS2_CONTROL_HANDSHAKE_READ (1) | ||
| 90 | #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) | ||
| 91 | #define OCFS2_CONTROL_HANDSHAKE_VALID (3) | ||
| 92 | |||
| 93 | /* Messages */ | ||
| 94 | #define OCFS2_CONTROL_MESSAGE_OP_LEN 4 | ||
| 95 | #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" | ||
| 96 | #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 | ||
| 97 | #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" | ||
| 98 | #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 | ||
| 99 | #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" | ||
| 100 | #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 | ||
| 101 | #define OCFS2_TEXT_UUID_LEN 32 | ||
| 102 | #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 | ||
| 103 | #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 | ||
| 104 | |||
| 105 | /* | ||
| 106 | * ocfs2_live_connection is refcounted because the filesystem and | ||
| 107 | * miscdevice sides can detach in different order. Let's just be safe. | ||
| 108 | */ | ||
| 109 | struct ocfs2_live_connection { | ||
| 110 | struct list_head oc_list; | ||
| 111 | struct ocfs2_cluster_connection *oc_conn; | ||
| 112 | }; | ||
| 113 | |||
| 114 | struct ocfs2_control_private { | ||
| 115 | struct list_head op_list; | ||
| 116 | int op_state; | ||
| 117 | int op_this_node; | ||
| 118 | struct ocfs2_protocol_version op_proto; | ||
| 119 | }; | ||
| 120 | |||
| 121 | /* SETN<space><8-char-hex-nodenum><newline> */ | ||
| 122 | struct ocfs2_control_message_setn { | ||
| 123 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 124 | char space; | ||
| 125 | char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; | ||
| 126 | char newline; | ||
| 127 | }; | ||
| 128 | |||
| 129 | /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ | ||
| 130 | struct ocfs2_control_message_setv { | ||
| 131 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 132 | char space1; | ||
| 133 | char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; | ||
| 134 | char space2; | ||
| 135 | char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; | ||
| 136 | char newline; | ||
| 137 | }; | ||
| 138 | |||
| 139 | /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ | ||
| 140 | struct ocfs2_control_message_down { | ||
| 141 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 142 | char space1; | ||
| 143 | char uuid[OCFS2_TEXT_UUID_LEN]; | ||
| 144 | char space2; | ||
| 145 | char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; | ||
| 146 | char newline; | ||
| 147 | }; | ||
| 148 | |||
| 149 | union ocfs2_control_message { | ||
| 150 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 151 | struct ocfs2_control_message_setn u_setn; | ||
| 152 | struct ocfs2_control_message_setv u_setv; | ||
| 153 | struct ocfs2_control_message_down u_down; | ||
| 154 | }; | ||
| 155 | |||
| 156 | static struct ocfs2_stack_plugin user_stack; | ||
| 157 | |||
| 158 | static atomic_t ocfs2_control_opened; | ||
| 159 | static int ocfs2_control_this_node = -1; | ||
| 160 | static struct ocfs2_protocol_version running_proto; | ||
| 161 | |||
| 162 | static LIST_HEAD(ocfs2_live_connection_list); | ||
| 163 | static LIST_HEAD(ocfs2_control_private_list); | ||
| 164 | static DEFINE_MUTEX(ocfs2_control_lock); | ||
| 165 | |||
| 166 | static inline void ocfs2_control_set_handshake_state(struct file *file, | ||
| 167 | int state) | ||
| 168 | { | ||
| 169 | struct ocfs2_control_private *p = file->private_data; | ||
| 170 | p->op_state = state; | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline int ocfs2_control_get_handshake_state(struct file *file) | ||
| 174 | { | ||
| 175 | struct ocfs2_control_private *p = file->private_data; | ||
| 176 | return p->op_state; | ||
| 177 | } | ||
| 178 | |||
| 179 | static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) | ||
| 180 | { | ||
| 181 | size_t len = strlen(name); | ||
| 182 | struct ocfs2_live_connection *c; | ||
| 183 | |||
| 184 | BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); | ||
| 185 | |||
| 186 | list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { | ||
| 187 | if ((c->oc_conn->cc_namelen == len) && | ||
| 188 | !strncmp(c->oc_conn->cc_name, name, len)) | ||
| 189 | return c; | ||
| 190 | } | ||
| 191 | |||
| 192 | return c; | ||
| 193 | } | ||
| 194 | |||
| 195 | /* | ||
| 196 | * ocfs2_live_connection structures are created underneath the ocfs2 | ||
| 197 | * mount path. Since the VFS prevents multiple calls to | ||
| 198 | * fill_super(), we can't get dupes here. | ||
| 199 | */ | ||
| 200 | static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, | ||
| 201 | struct ocfs2_live_connection **c_ret) | ||
| 202 | { | ||
| 203 | int rc = 0; | ||
| 204 | struct ocfs2_live_connection *c; | ||
| 205 | |||
| 206 | c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); | ||
| 207 | if (!c) | ||
| 208 | return -ENOMEM; | ||
| 209 | |||
| 210 | mutex_lock(&ocfs2_control_lock); | ||
| 211 | c->oc_conn = conn; | ||
| 212 | |||
| 213 | if (atomic_read(&ocfs2_control_opened)) | ||
| 214 | list_add(&c->oc_list, &ocfs2_live_connection_list); | ||
| 215 | else { | ||
| 216 | printk(KERN_ERR | ||
| 217 | "ocfs2: Userspace control daemon is not present\n"); | ||
| 218 | rc = -ESRCH; | ||
| 219 | } | ||
| 220 | |||
| 221 | mutex_unlock(&ocfs2_control_lock); | ||
| 222 | |||
| 223 | if (!rc) | ||
| 224 | *c_ret = c; | ||
| 225 | else | ||
| 226 | kfree(c); | ||
| 227 | |||
| 228 | return rc; | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * This function disconnects the cluster connection from ocfs2_control. | ||
| 233 | * Afterwards, userspace can't affect the cluster connection. | ||
| 234 | */ | ||
| 235 | static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) | ||
| 236 | { | ||
| 237 | mutex_lock(&ocfs2_control_lock); | ||
| 238 | list_del_init(&c->oc_list); | ||
| 239 | c->oc_conn = NULL; | ||
| 240 | mutex_unlock(&ocfs2_control_lock); | ||
| 241 | |||
| 242 | kfree(c); | ||
| 243 | } | ||
| 244 | |||
| 245 | static int ocfs2_control_cfu(void *target, size_t target_len, | ||
| 246 | const char __user *buf, size_t count) | ||
| 247 | { | ||
| 248 | /* The T01 expects write(2) calls to have exactly one command */ | ||
| 249 | if ((count != target_len) || | ||
| 250 | (count > sizeof(union ocfs2_control_message))) | ||
| 251 | return -EINVAL; | ||
| 252 | |||
| 253 | if (copy_from_user(target, buf, target_len)) | ||
| 254 | return -EFAULT; | ||
| 255 | |||
| 256 | return 0; | ||
| 257 | } | ||
| 258 | |||
| 259 | static ssize_t ocfs2_control_validate_protocol(struct file *file, | ||
| 260 | const char __user *buf, | ||
| 261 | size_t count) | ||
| 262 | { | ||
| 263 | ssize_t ret; | ||
| 264 | char kbuf[OCFS2_CONTROL_PROTO_LEN]; | ||
| 265 | |||
| 266 | ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, | ||
| 267 | buf, count); | ||
| 268 | if (ret) | ||
| 269 | return ret; | ||
| 270 | |||
| 271 | if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) | ||
| 272 | return -EINVAL; | ||
| 273 | |||
| 274 | ocfs2_control_set_handshake_state(file, | ||
| 275 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL); | ||
| 276 | |||
| 277 | return count; | ||
| 278 | } | ||
| 279 | |||
| 280 | static void ocfs2_control_send_down(const char *uuid, | ||
| 281 | int nodenum) | ||
| 282 | { | ||
| 283 | struct ocfs2_live_connection *c; | ||
| 284 | |||
| 285 | mutex_lock(&ocfs2_control_lock); | ||
| 286 | |||
| 287 | c = ocfs2_connection_find(uuid); | ||
| 288 | if (c) { | ||
| 289 | BUG_ON(c->oc_conn == NULL); | ||
| 290 | c->oc_conn->cc_recovery_handler(nodenum, | ||
| 291 | c->oc_conn->cc_recovery_data); | ||
| 292 | } | ||
| 293 | |||
| 294 | mutex_unlock(&ocfs2_control_lock); | ||
| 295 | } | ||
| 296 | |||
| 297 | /* | ||
| 298 | * Called whenever configuration elements are sent to /dev/ocfs2_control. | ||
| 299 | * If all configuration elements are present, try to set the global | ||
| 300 | * values. If there is a problem, return an error. Skip any missing | ||
| 301 | * elements, and only bump ocfs2_control_opened when we have all elements | ||
| 302 | * and are successful. | ||
| 303 | */ | ||
| 304 | static int ocfs2_control_install_private(struct file *file) | ||
| 305 | { | ||
| 306 | int rc = 0; | ||
| 307 | int set_p = 1; | ||
| 308 | struct ocfs2_control_private *p = file->private_data; | ||
| 309 | |||
| 310 | BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); | ||
| 311 | |||
| 312 | mutex_lock(&ocfs2_control_lock); | ||
| 313 | |||
| 314 | if (p->op_this_node < 0) { | ||
| 315 | set_p = 0; | ||
| 316 | } else if ((ocfs2_control_this_node >= 0) && | ||
| 317 | (ocfs2_control_this_node != p->op_this_node)) { | ||
| 318 | rc = -EINVAL; | ||
| 319 | goto out_unlock; | ||
| 320 | } | ||
| 321 | |||
| 322 | if (!p->op_proto.pv_major) { | ||
| 323 | set_p = 0; | ||
| 324 | } else if (!list_empty(&ocfs2_live_connection_list) && | ||
| 325 | ((running_proto.pv_major != p->op_proto.pv_major) || | ||
| 326 | (running_proto.pv_minor != p->op_proto.pv_minor))) { | ||
| 327 | rc = -EINVAL; | ||
| 328 | goto out_unlock; | ||
| 329 | } | ||
| 330 | |||
| 331 | if (set_p) { | ||
| 332 | ocfs2_control_this_node = p->op_this_node; | ||
| 333 | running_proto.pv_major = p->op_proto.pv_major; | ||
| 334 | running_proto.pv_minor = p->op_proto.pv_minor; | ||
| 335 | } | ||
| 336 | |||
| 337 | out_unlock: | ||
| 338 | mutex_unlock(&ocfs2_control_lock); | ||
| 339 | |||
| 340 | if (!rc && set_p) { | ||
| 341 | /* We set the global values successfully */ | ||
| 342 | atomic_inc(&ocfs2_control_opened); | ||
| 343 | ocfs2_control_set_handshake_state(file, | ||
| 344 | OCFS2_CONTROL_HANDSHAKE_VALID); | ||
| 345 | } | ||
| 346 | |||
| 347 | return rc; | ||
| 348 | } | ||
| 349 | |||
| 350 | static int ocfs2_control_get_this_node(void) | ||
| 351 | { | ||
| 352 | int rc; | ||
| 353 | |||
| 354 | mutex_lock(&ocfs2_control_lock); | ||
| 355 | if (ocfs2_control_this_node < 0) | ||
| 356 | rc = -EINVAL; | ||
| 357 | else | ||
| 358 | rc = ocfs2_control_this_node; | ||
| 359 | mutex_unlock(&ocfs2_control_lock); | ||
| 360 | |||
| 361 | return rc; | ||
| 362 | } | ||
| 363 | |||
| 364 | static int ocfs2_control_do_setnode_msg(struct file *file, | ||
| 365 | struct ocfs2_control_message_setn *msg) | ||
| 366 | { | ||
| 367 | long nodenum; | ||
| 368 | char *ptr = NULL; | ||
| 369 | struct ocfs2_control_private *p = file->private_data; | ||
| 370 | |||
| 371 | if (ocfs2_control_get_handshake_state(file) != | ||
| 372 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) | ||
| 373 | return -EINVAL; | ||
| 374 | |||
| 375 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, | ||
| 376 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 377 | return -EINVAL; | ||
| 378 | |||
| 379 | if ((msg->space != ' ') || (msg->newline != '\n')) | ||
| 380 | return -EINVAL; | ||
| 381 | msg->space = msg->newline = '\0'; | ||
| 382 | |||
| 383 | nodenum = simple_strtol(msg->nodestr, &ptr, 16); | ||
| 384 | if (!ptr || *ptr) | ||
| 385 | return -EINVAL; | ||
| 386 | |||
| 387 | if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || | ||
| 388 | (nodenum > INT_MAX) || (nodenum < 0)) | ||
| 389 | return -ERANGE; | ||
| 390 | p->op_this_node = nodenum; | ||
| 391 | |||
| 392 | return ocfs2_control_install_private(file); | ||
| 393 | } | ||
| 394 | |||
| 395 | static int ocfs2_control_do_setversion_msg(struct file *file, | ||
| 396 | struct ocfs2_control_message_setv *msg) | ||
| 397 | { | ||
| 398 | long major, minor; | ||
| 399 | char *ptr = NULL; | ||
| 400 | struct ocfs2_control_private *p = file->private_data; | ||
| 401 | struct ocfs2_protocol_version *max = | ||
| 402 | &user_stack.sp_proto->lp_max_version; | ||
| 403 | |||
| 404 | if (ocfs2_control_get_handshake_state(file) != | ||
| 405 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) | ||
| 406 | return -EINVAL; | ||
| 407 | |||
| 408 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, | ||
| 409 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 410 | return -EINVAL; | ||
| 411 | |||
| 412 | if ((msg->space1 != ' ') || (msg->space2 != ' ') || | ||
| 413 | (msg->newline != '\n')) | ||
| 414 | return -EINVAL; | ||
| 415 | msg->space1 = msg->space2 = msg->newline = '\0'; | ||
| 416 | |||
| 417 | major = simple_strtol(msg->major, &ptr, 16); | ||
| 418 | if (!ptr || *ptr) | ||
| 419 | return -EINVAL; | ||
| 420 | minor = simple_strtol(msg->minor, &ptr, 16); | ||
| 421 | if (!ptr || *ptr) | ||
| 422 | return -EINVAL; | ||
| 423 | |||
| 424 | /* | ||
| 425 | * The major must be between 1 and 255, inclusive. The minor | ||
| 426 | * must be between 0 and 255, inclusive. The version passed in | ||
| 427 | * must be within the maximum version supported by the filesystem. | ||
| 428 | */ | ||
| 429 | if ((major == LONG_MIN) || (major == LONG_MAX) || | ||
| 430 | (major > (u8)-1) || (major < 1)) | ||
| 431 | return -ERANGE; | ||
| 432 | if ((minor == LONG_MIN) || (minor == LONG_MAX) || | ||
| 433 | (minor > (u8)-1) || (minor < 0)) | ||
| 434 | return -ERANGE; | ||
| 435 | if ((major != max->pv_major) || | ||
| 436 | (minor > max->pv_minor)) | ||
| 437 | return -EINVAL; | ||
| 438 | |||
| 439 | p->op_proto.pv_major = major; | ||
| 440 | p->op_proto.pv_minor = minor; | ||
| 441 | |||
| 442 | return ocfs2_control_install_private(file); | ||
| 443 | } | ||
| 444 | |||
| 445 | static int ocfs2_control_do_down_msg(struct file *file, | ||
| 446 | struct ocfs2_control_message_down *msg) | ||
| 447 | { | ||
| 448 | long nodenum; | ||
| 449 | char *p = NULL; | ||
| 450 | |||
| 451 | if (ocfs2_control_get_handshake_state(file) != | ||
| 452 | OCFS2_CONTROL_HANDSHAKE_VALID) | ||
| 453 | return -EINVAL; | ||
| 454 | |||
| 455 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, | ||
| 456 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 457 | return -EINVAL; | ||
| 458 | |||
| 459 | if ((msg->space1 != ' ') || (msg->space2 != ' ') || | ||
| 460 | (msg->newline != '\n')) | ||
| 461 | return -EINVAL; | ||
| 462 | msg->space1 = msg->space2 = msg->newline = '\0'; | ||
| 463 | |||
| 464 | nodenum = simple_strtol(msg->nodestr, &p, 16); | ||
| 465 | if (!p || *p) | ||
| 466 | return -EINVAL; | ||
| 467 | |||
| 468 | if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || | ||
| 469 | (nodenum > INT_MAX) || (nodenum < 0)) | ||
| 470 | return -ERANGE; | ||
| 471 | |||
| 472 | ocfs2_control_send_down(msg->uuid, nodenum); | ||
| 473 | |||
| 474 | return 0; | ||
| 475 | } | ||
| 476 | |||
| 477 | static ssize_t ocfs2_control_message(struct file *file, | ||
| 478 | const char __user *buf, | ||
| 479 | size_t count) | ||
| 480 | { | ||
| 481 | ssize_t ret; | ||
| 482 | union ocfs2_control_message msg; | ||
| 483 | |||
| 484 | /* Try to catch padding issues */ | ||
| 485 | WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != | ||
| 486 | (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); | ||
| 487 | |||
| 488 | memset(&msg, 0, sizeof(union ocfs2_control_message)); | ||
| 489 | ret = ocfs2_control_cfu(&msg, count, buf, count); | ||
| 490 | if (ret) | ||
| 491 | goto out; | ||
| 492 | |||
| 493 | if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && | ||
| 494 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, | ||
| 495 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 496 | ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); | ||
| 497 | else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && | ||
| 498 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, | ||
| 499 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 500 | ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); | ||
| 501 | else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && | ||
| 502 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, | ||
| 503 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 504 | ret = ocfs2_control_do_down_msg(file, &msg.u_down); | ||
| 505 | else | ||
| 506 | ret = -EINVAL; | ||
| 507 | |||
| 508 | out: | ||
| 509 | return ret ? ret : count; | ||
| 510 | } | ||
| 511 | |||
| 512 | static ssize_t ocfs2_control_write(struct file *file, | ||
| 513 | const char __user *buf, | ||
| 514 | size_t count, | ||
| 515 | loff_t *ppos) | ||
| 516 | { | ||
| 517 | ssize_t ret; | ||
| 518 | |||
| 519 | switch (ocfs2_control_get_handshake_state(file)) { | ||
| 520 | case OCFS2_CONTROL_HANDSHAKE_INVALID: | ||
| 521 | ret = -EINVAL; | ||
| 522 | break; | ||
| 523 | |||
| 524 | case OCFS2_CONTROL_HANDSHAKE_READ: | ||
| 525 | ret = ocfs2_control_validate_protocol(file, buf, | ||
| 526 | count); | ||
| 527 | break; | ||
| 528 | |||
| 529 | case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: | ||
| 530 | case OCFS2_CONTROL_HANDSHAKE_VALID: | ||
| 531 | ret = ocfs2_control_message(file, buf, count); | ||
| 532 | break; | ||
| 533 | |||
| 534 | default: | ||
| 535 | BUG(); | ||
| 536 | ret = -EIO; | ||
| 537 | break; | ||
| 538 | } | ||
| 539 | |||
| 540 | return ret; | ||
| 541 | } | ||
| 542 | |||
| 543 | /* | ||
| 544 | * This is a naive version. If we ever have a new protocol, we'll expand | ||
| 545 | * it. Probably using seq_file. | ||
| 546 | */ | ||
| 547 | static ssize_t ocfs2_control_read(struct file *file, | ||
| 548 | char __user *buf, | ||
| 549 | size_t count, | ||
| 550 | loff_t *ppos) | ||
| 551 | { | ||
| 552 | char *proto_string = OCFS2_CONTROL_PROTO; | ||
| 553 | size_t to_write = 0; | ||
| 554 | |||
| 555 | if (*ppos >= OCFS2_CONTROL_PROTO_LEN) | ||
| 556 | return 0; | ||
| 557 | |||
| 558 | to_write = OCFS2_CONTROL_PROTO_LEN - *ppos; | ||
| 559 | if (to_write > count) | ||
| 560 | to_write = count; | ||
| 561 | if (copy_to_user(buf, proto_string + *ppos, to_write)) | ||
| 562 | return -EFAULT; | ||
| 563 | |||
| 564 | *ppos += to_write; | ||
| 565 | |||
| 566 | /* Have we read the whole protocol list? */ | ||
| 567 | if (*ppos >= OCFS2_CONTROL_PROTO_LEN) | ||
| 568 | ocfs2_control_set_handshake_state(file, | ||
| 569 | OCFS2_CONTROL_HANDSHAKE_READ); | ||
| 570 | |||
| 571 | return to_write; | ||
| 572 | } | ||
| 573 | |||
| 574 | static int ocfs2_control_release(struct inode *inode, struct file *file) | ||
| 575 | { | ||
| 576 | struct ocfs2_control_private *p = file->private_data; | ||
| 577 | |||
| 578 | mutex_lock(&ocfs2_control_lock); | ||
| 579 | |||
| 580 | if (ocfs2_control_get_handshake_state(file) != | ||
| 581 | OCFS2_CONTROL_HANDSHAKE_VALID) | ||
| 582 | goto out; | ||
| 583 | |||
| 584 | if (atomic_dec_and_test(&ocfs2_control_opened)) { | ||
| 585 | if (!list_empty(&ocfs2_live_connection_list)) { | ||
| 586 | /* XXX: Do bad things! */ | ||
| 587 | printk(KERN_ERR | ||
| 588 | "ocfs2: Unexpected release of ocfs2_control!\n" | ||
| 589 | " Loss of cluster connection requires " | ||
| 590 | "an emergency restart!\n"); | ||
| 591 | emergency_restart(); | ||
| 592 | } | ||
| 593 | /* | ||
| 594 | * Last valid close clears the node number and resets | ||
| 595 | * the locking protocol version | ||
| 596 | */ | ||
| 597 | ocfs2_control_this_node = -1; | ||
| 598 | running_proto.pv_major = 0; | ||
| 599 | running_proto.pv_major = 0; | ||
| 600 | } | ||
| 601 | |||
| 602 | out: | ||
| 603 | list_del_init(&p->op_list); | ||
| 604 | file->private_data = NULL; | ||
| 605 | |||
| 606 | mutex_unlock(&ocfs2_control_lock); | ||
| 607 | |||
| 608 | kfree(p); | ||
| 609 | |||
| 610 | return 0; | ||
| 611 | } | ||
| 612 | |||
| 613 | static int ocfs2_control_open(struct inode *inode, struct file *file) | ||
| 614 | { | ||
| 615 | struct ocfs2_control_private *p; | ||
| 616 | |||
| 617 | p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); | ||
| 618 | if (!p) | ||
| 619 | return -ENOMEM; | ||
| 620 | p->op_this_node = -1; | ||
| 621 | |||
| 622 | mutex_lock(&ocfs2_control_lock); | ||
| 623 | file->private_data = p; | ||
| 624 | list_add(&p->op_list, &ocfs2_control_private_list); | ||
| 625 | mutex_unlock(&ocfs2_control_lock); | ||
| 626 | |||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | |||
| 630 | static const struct file_operations ocfs2_control_fops = { | ||
| 631 | .open = ocfs2_control_open, | ||
| 632 | .release = ocfs2_control_release, | ||
| 633 | .read = ocfs2_control_read, | ||
| 634 | .write = ocfs2_control_write, | ||
| 635 | .owner = THIS_MODULE, | ||
| 636 | }; | ||
| 637 | |||
| 638 | struct miscdevice ocfs2_control_device = { | ||
| 639 | .minor = MISC_DYNAMIC_MINOR, | ||
| 640 | .name = "ocfs2_control", | ||
| 641 | .fops = &ocfs2_control_fops, | ||
| 642 | }; | ||
| 643 | |||
| 644 | static int ocfs2_control_init(void) | ||
| 645 | { | ||
| 646 | int rc; | ||
| 647 | |||
| 648 | atomic_set(&ocfs2_control_opened, 0); | ||
| 649 | |||
| 650 | rc = misc_register(&ocfs2_control_device); | ||
| 651 | if (rc) | ||
| 652 | printk(KERN_ERR | ||
| 653 | "ocfs2: Unable to register ocfs2_control device " | ||
| 654 | "(errno %d)\n", | ||
| 655 | -rc); | ||
| 656 | |||
| 657 | return rc; | ||
| 658 | } | ||
| 659 | |||
| 660 | static void ocfs2_control_exit(void) | ||
| 661 | { | ||
| 662 | int rc; | ||
| 663 | |||
| 664 | rc = misc_deregister(&ocfs2_control_device); | ||
| 665 | if (rc) | ||
| 666 | printk(KERN_ERR | ||
| 667 | "ocfs2: Unable to deregister ocfs2_control device " | ||
| 668 | "(errno %d)\n", | ||
| 669 | -rc); | ||
| 670 | } | ||
| 671 | |||
| 672 | static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg) | ||
| 673 | { | ||
| 674 | struct ocfs2_lock_res *res = astarg; | ||
| 675 | return &res->l_lksb.lksb_fsdlm; | ||
| 676 | } | ||
| 677 | |||
| 678 | static void fsdlm_lock_ast_wrapper(void *astarg) | ||
| 679 | { | ||
| 680 | struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); | ||
| 681 | int status = lksb->sb_status; | ||
| 682 | |||
| 683 | BUG_ON(user_stack.sp_proto == NULL); | ||
| 684 | |||
| 685 | /* | ||
| 686 | * For now we're punting on the issue of other non-standard errors | ||
| 687 | * where we can't tell if the unlock_ast or lock_ast should be called. | ||
| 688 | * The main "other error" that's possible is EINVAL which means the | ||
| 689 | * function was called with invalid args, which shouldn't be possible | ||
| 690 | * since the caller here is under our control. Other non-standard | ||
| 691 | * errors probably fall into the same category, or otherwise are fatal | ||
| 692 | * which means we can't carry on anyway. | ||
| 693 | */ | ||
| 694 | |||
| 695 | if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) | ||
| 696 | user_stack.sp_proto->lp_unlock_ast(astarg, 0); | ||
| 697 | else | ||
| 698 | user_stack.sp_proto->lp_lock_ast(astarg); | ||
| 699 | } | ||
| 700 | |||
| 701 | static void fsdlm_blocking_ast_wrapper(void *astarg, int level) | ||
| 702 | { | ||
| 703 | BUG_ON(user_stack.sp_proto == NULL); | ||
| 704 | |||
| 705 | user_stack.sp_proto->lp_blocking_ast(astarg, level); | ||
| 706 | } | ||
| 707 | |||
| 708 | static int user_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 709 | int mode, | ||
| 710 | union ocfs2_dlm_lksb *lksb, | ||
| 711 | u32 flags, | ||
| 712 | void *name, | ||
| 713 | unsigned int namelen, | ||
| 714 | void *astarg) | ||
| 715 | { | ||
| 716 | int ret; | ||
| 717 | |||
| 718 | if (!lksb->lksb_fsdlm.sb_lvbptr) | ||
| 719 | lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + | ||
| 720 | sizeof(struct dlm_lksb); | ||
| 721 | |||
| 722 | ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, | ||
| 723 | flags|DLM_LKF_NODLCKWT, name, namelen, 0, | ||
| 724 | fsdlm_lock_ast_wrapper, astarg, | ||
| 725 | fsdlm_blocking_ast_wrapper); | ||
| 726 | return ret; | ||
| 727 | } | ||
| 728 | |||
| 729 | static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 730 | union ocfs2_dlm_lksb *lksb, | ||
| 731 | u32 flags, | ||
| 732 | void *astarg) | ||
| 733 | { | ||
| 734 | int ret; | ||
| 735 | |||
| 736 | ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, | ||
| 737 | flags, &lksb->lksb_fsdlm, astarg); | ||
| 738 | return ret; | ||
| 739 | } | ||
| 740 | |||
| 741 | static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
| 742 | { | ||
| 743 | return lksb->lksb_fsdlm.sb_status; | ||
| 744 | } | ||
| 745 | |||
| 746 | static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
| 747 | { | ||
| 748 | return (void *)(lksb->lksb_fsdlm.sb_lvbptr); | ||
| 749 | } | ||
| 750 | |||
| 751 | static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
| 752 | { | ||
| 753 | } | ||
| 754 | |||
| 755 | /* | ||
| 756 | * Compare a requested locking protocol version against the current one. | ||
| 757 | * | ||
| 758 | * If the major numbers are different, they are incompatible. | ||
| 759 | * If the current minor is greater than the request, they are incompatible. | ||
| 760 | * If the current minor is less than or equal to the request, they are | ||
| 761 | * compatible, and the requester should run at the current minor version. | ||
| 762 | */ | ||
| 763 | static int fs_protocol_compare(struct ocfs2_protocol_version *existing, | ||
| 764 | struct ocfs2_protocol_version *request) | ||
| 765 | { | ||
| 766 | if (existing->pv_major != request->pv_major) | ||
| 767 | return 1; | ||
| 768 | |||
| 769 | if (existing->pv_minor > request->pv_minor) | ||
| 770 | return 1; | ||
| 771 | |||
| 772 | if (existing->pv_minor < request->pv_minor) | ||
| 773 | request->pv_minor = existing->pv_minor; | ||
| 774 | |||
| 775 | return 0; | ||
| 776 | } | ||
| 777 | |||
| 778 | static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | ||
| 779 | { | ||
| 780 | dlm_lockspace_t *fsdlm; | ||
| 781 | struct ocfs2_live_connection *control; | ||
| 782 | int rc = 0; | ||
| 783 | |||
| 784 | BUG_ON(conn == NULL); | ||
| 785 | |||
| 786 | rc = ocfs2_live_connection_new(conn, &control); | ||
| 787 | if (rc) | ||
| 788 | goto out; | ||
| 789 | |||
| 790 | /* | ||
| 791 | * running_proto must have been set before we allowed any mounts | ||
| 792 | * to proceed. | ||
| 793 | */ | ||
| 794 | if (fs_protocol_compare(&running_proto, &conn->cc_version)) { | ||
| 795 | printk(KERN_ERR | ||
| 796 | "Unable to mount with fs locking protocol version " | ||
| 797 | "%u.%u because the userspace control daemon has " | ||
| 798 | "negotiated %u.%u\n", | ||
| 799 | conn->cc_version.pv_major, conn->cc_version.pv_minor, | ||
| 800 | running_proto.pv_major, running_proto.pv_minor); | ||
| 801 | rc = -EPROTO; | ||
| 802 | ocfs2_live_connection_drop(control); | ||
| 803 | goto out; | ||
| 804 | } | ||
| 805 | |||
| 806 | rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), | ||
| 807 | &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); | ||
| 808 | if (rc) { | ||
| 809 | ocfs2_live_connection_drop(control); | ||
| 810 | goto out; | ||
| 811 | } | ||
| 812 | |||
| 813 | conn->cc_private = control; | ||
| 814 | conn->cc_lockspace = fsdlm; | ||
| 815 | out: | ||
| 816 | return rc; | ||
| 817 | } | ||
| 818 | |||
| 819 | static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 820 | int hangup_pending) | ||
| 821 | { | ||
| 822 | dlm_release_lockspace(conn->cc_lockspace, 2); | ||
| 823 | conn->cc_lockspace = NULL; | ||
| 824 | ocfs2_live_connection_drop(conn->cc_private); | ||
| 825 | conn->cc_private = NULL; | ||
| 826 | return 0; | ||
| 827 | } | ||
| 828 | |||
| 829 | static int user_cluster_this_node(unsigned int *this_node) | ||
| 830 | { | ||
| 831 | int rc; | ||
| 832 | |||
| 833 | rc = ocfs2_control_get_this_node(); | ||
| 834 | if (rc < 0) | ||
| 835 | return rc; | ||
| 836 | |||
| 837 | *this_node = rc; | ||
| 838 | return 0; | ||
| 839 | } | ||
| 840 | |||
| 841 | static struct ocfs2_stack_operations user_stack_ops = { | ||
| 842 | .connect = user_cluster_connect, | ||
| 843 | .disconnect = user_cluster_disconnect, | ||
| 844 | .this_node = user_cluster_this_node, | ||
| 845 | .dlm_lock = user_dlm_lock, | ||
| 846 | .dlm_unlock = user_dlm_unlock, | ||
| 847 | .lock_status = user_dlm_lock_status, | ||
| 848 | .lock_lvb = user_dlm_lvb, | ||
| 849 | .dump_lksb = user_dlm_dump_lksb, | ||
| 850 | }; | ||
| 851 | |||
| 852 | static struct ocfs2_stack_plugin user_stack = { | ||
| 853 | .sp_name = "user", | ||
| 854 | .sp_ops = &user_stack_ops, | ||
| 855 | .sp_owner = THIS_MODULE, | ||
| 856 | }; | ||
| 857 | |||
| 858 | |||
| 859 | static int __init user_stack_init(void) | ||
| 860 | { | ||
| 861 | int rc; | ||
| 862 | |||
| 863 | rc = ocfs2_control_init(); | ||
| 864 | if (!rc) { | ||
| 865 | rc = ocfs2_stack_glue_register(&user_stack); | ||
| 866 | if (rc) | ||
| 867 | ocfs2_control_exit(); | ||
| 868 | } | ||
| 869 | |||
| 870 | return rc; | ||
| 871 | } | ||
| 872 | |||
| 873 | static void __exit user_stack_exit(void) | ||
| 874 | { | ||
| 875 | ocfs2_stack_glue_unregister(&user_stack); | ||
| 876 | ocfs2_control_exit(); | ||
| 877 | } | ||
| 878 | |||
| 879 | MODULE_AUTHOR("Oracle"); | ||
| 880 | MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); | ||
| 881 | MODULE_LICENSE("GPL"); | ||
| 882 | module_init(user_stack_init); | ||
| 883 | module_exit(user_stack_exit); | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c new file mode 100644 index 000000000000..119f60cea9cc --- /dev/null +++ b/fs/ocfs2/stackglue.c | |||
| @@ -0,0 +1,568 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stackglue.c | ||
| 5 | * | ||
| 6 | * Code which implements an OCFS2 specific interface to underlying | ||
| 7 | * cluster stacks. | ||
| 8 | * | ||
| 9 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or | ||
| 12 | * modify it under the terms of the GNU General Public | ||
| 13 | * License as published by the Free Software Foundation, version 2. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <linux/list.h> | ||
| 22 | #include <linux/spinlock.h> | ||
| 23 | #include <linux/module.h> | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/kmod.h> | ||
| 26 | #include <linux/fs.h> | ||
| 27 | #include <linux/kobject.h> | ||
| 28 | #include <linux/sysfs.h> | ||
| 29 | |||
| 30 | #include "ocfs2_fs.h" | ||
| 31 | |||
| 32 | #include "stackglue.h" | ||
| 33 | |||
| 34 | #define OCFS2_STACK_PLUGIN_O2CB "o2cb" | ||
| 35 | #define OCFS2_STACK_PLUGIN_USER "user" | ||
| 36 | |||
| 37 | static struct ocfs2_locking_protocol *lproto; | ||
| 38 | static DEFINE_SPINLOCK(ocfs2_stack_lock); | ||
| 39 | static LIST_HEAD(ocfs2_stack_list); | ||
| 40 | static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; | ||
| 41 | |||
| 42 | /* | ||
| 43 | * The stack currently in use. If not null, active_stack->sp_count > 0, | ||
| 44 | * the module is pinned, and the locking protocol cannot be changed. | ||
| 45 | */ | ||
| 46 | static struct ocfs2_stack_plugin *active_stack; | ||
| 47 | |||
| 48 | static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) | ||
| 49 | { | ||
| 50 | struct ocfs2_stack_plugin *p; | ||
| 51 | |||
| 52 | assert_spin_locked(&ocfs2_stack_lock); | ||
| 53 | |||
| 54 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
| 55 | if (!strcmp(p->sp_name, name)) | ||
| 56 | return p; | ||
| 57 | } | ||
| 58 | |||
| 59 | return NULL; | ||
| 60 | } | ||
| 61 | |||
| 62 | static int ocfs2_stack_driver_request(const char *stack_name, | ||
| 63 | const char *plugin_name) | ||
| 64 | { | ||
| 65 | int rc; | ||
| 66 | struct ocfs2_stack_plugin *p; | ||
| 67 | |||
| 68 | spin_lock(&ocfs2_stack_lock); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * If the stack passed by the filesystem isn't the selected one, | ||
| 72 | * we can't continue. | ||
| 73 | */ | ||
| 74 | if (strcmp(stack_name, cluster_stack_name)) { | ||
| 75 | rc = -EBUSY; | ||
| 76 | goto out; | ||
| 77 | } | ||
| 78 | |||
| 79 | if (active_stack) { | ||
| 80 | /* | ||
| 81 | * If the active stack isn't the one we want, it cannot | ||
| 82 | * be selected right now. | ||
| 83 | */ | ||
| 84 | if (!strcmp(active_stack->sp_name, plugin_name)) | ||
| 85 | rc = 0; | ||
| 86 | else | ||
| 87 | rc = -EBUSY; | ||
| 88 | goto out; | ||
| 89 | } | ||
| 90 | |||
| 91 | p = ocfs2_stack_lookup(plugin_name); | ||
| 92 | if (!p || !try_module_get(p->sp_owner)) { | ||
| 93 | rc = -ENOENT; | ||
| 94 | goto out; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* Ok, the stack is pinned */ | ||
| 98 | p->sp_count++; | ||
| 99 | active_stack = p; | ||
| 100 | |||
| 101 | rc = 0; | ||
| 102 | |||
| 103 | out: | ||
| 104 | spin_unlock(&ocfs2_stack_lock); | ||
| 105 | return rc; | ||
| 106 | } | ||
| 107 | |||
| 108 | /* | ||
| 109 | * This function looks up the appropriate stack and makes it active. If | ||
| 110 | * there is no stack, it tries to load it. It will fail if the stack still | ||
| 111 | * cannot be found. It will also fail if a different stack is in use. | ||
| 112 | */ | ||
| 113 | static int ocfs2_stack_driver_get(const char *stack_name) | ||
| 114 | { | ||
| 115 | int rc; | ||
| 116 | char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Classic stack does not pass in a stack name. This is | ||
| 120 | * compatible with older tools as well. | ||
| 121 | */ | ||
| 122 | if (!stack_name || !*stack_name) | ||
| 123 | stack_name = OCFS2_STACK_PLUGIN_O2CB; | ||
| 124 | |||
| 125 | if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { | ||
| 126 | printk(KERN_ERR | ||
| 127 | "ocfs2 passed an invalid cluster stack label: \"%s\"\n", | ||
| 128 | stack_name); | ||
| 129 | return -EINVAL; | ||
| 130 | } | ||
| 131 | |||
| 132 | /* Anything that isn't the classic stack is a user stack */ | ||
| 133 | if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) | ||
| 134 | plugin_name = OCFS2_STACK_PLUGIN_USER; | ||
| 135 | |||
| 136 | rc = ocfs2_stack_driver_request(stack_name, plugin_name); | ||
| 137 | if (rc == -ENOENT) { | ||
| 138 | request_module("ocfs2_stack_%s", plugin_name); | ||
| 139 | rc = ocfs2_stack_driver_request(stack_name, plugin_name); | ||
| 140 | } | ||
| 141 | |||
| 142 | if (rc == -ENOENT) { | ||
| 143 | printk(KERN_ERR | ||
| 144 | "ocfs2: Cluster stack driver \"%s\" cannot be found\n", | ||
| 145 | plugin_name); | ||
| 146 | } else if (rc == -EBUSY) { | ||
| 147 | printk(KERN_ERR | ||
| 148 | "ocfs2: A different cluster stack is in use\n"); | ||
| 149 | } | ||
| 150 | |||
| 151 | return rc; | ||
| 152 | } | ||
| 153 | |||
| 154 | static void ocfs2_stack_driver_put(void) | ||
| 155 | { | ||
| 156 | spin_lock(&ocfs2_stack_lock); | ||
| 157 | BUG_ON(active_stack == NULL); | ||
| 158 | BUG_ON(active_stack->sp_count == 0); | ||
| 159 | |||
| 160 | active_stack->sp_count--; | ||
| 161 | if (!active_stack->sp_count) { | ||
| 162 | module_put(active_stack->sp_owner); | ||
| 163 | active_stack = NULL; | ||
| 164 | } | ||
| 165 | spin_unlock(&ocfs2_stack_lock); | ||
| 166 | } | ||
| 167 | |||
| 168 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) | ||
| 169 | { | ||
| 170 | int rc; | ||
| 171 | |||
| 172 | spin_lock(&ocfs2_stack_lock); | ||
| 173 | if (!ocfs2_stack_lookup(plugin->sp_name)) { | ||
| 174 | plugin->sp_count = 0; | ||
| 175 | plugin->sp_proto = lproto; | ||
| 176 | list_add(&plugin->sp_list, &ocfs2_stack_list); | ||
| 177 | printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", | ||
| 178 | plugin->sp_name); | ||
| 179 | rc = 0; | ||
| 180 | } else { | ||
| 181 | printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", | ||
| 182 | plugin->sp_name); | ||
| 183 | rc = -EEXIST; | ||
| 184 | } | ||
| 185 | spin_unlock(&ocfs2_stack_lock); | ||
| 186 | |||
| 187 | return rc; | ||
| 188 | } | ||
| 189 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); | ||
| 190 | |||
| 191 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) | ||
| 192 | { | ||
| 193 | struct ocfs2_stack_plugin *p; | ||
| 194 | |||
| 195 | spin_lock(&ocfs2_stack_lock); | ||
| 196 | p = ocfs2_stack_lookup(plugin->sp_name); | ||
| 197 | if (p) { | ||
| 198 | BUG_ON(p != plugin); | ||
| 199 | BUG_ON(plugin == active_stack); | ||
| 200 | BUG_ON(plugin->sp_count != 0); | ||
| 201 | list_del_init(&plugin->sp_list); | ||
| 202 | printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", | ||
| 203 | plugin->sp_name); | ||
| 204 | } else { | ||
| 205 | printk(KERN_ERR "Stack \"%s\" is not registered\n", | ||
| 206 | plugin->sp_name); | ||
| 207 | } | ||
| 208 | spin_unlock(&ocfs2_stack_lock); | ||
| 209 | } | ||
| 210 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); | ||
| 211 | |||
| 212 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) | ||
| 213 | { | ||
| 214 | struct ocfs2_stack_plugin *p; | ||
| 215 | |||
| 216 | BUG_ON(proto == NULL); | ||
| 217 | |||
| 218 | spin_lock(&ocfs2_stack_lock); | ||
| 219 | BUG_ON(active_stack != NULL); | ||
| 220 | |||
| 221 | lproto = proto; | ||
| 222 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
| 223 | p->sp_proto = lproto; | ||
| 224 | } | ||
| 225 | |||
| 226 | spin_unlock(&ocfs2_stack_lock); | ||
| 227 | } | ||
| 228 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); | ||
| 229 | |||
| 230 | |||
| 231 | /* | ||
| 232 | * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take | ||
| 233 | * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the | ||
| 234 | * underlying stack plugins need to pilfer the lksb off of the lock_res. | ||
| 235 | * If some other structure needs to be passed as an astarg, the plugins | ||
| 236 | * will need to be given a different avenue to the lksb. | ||
| 237 | */ | ||
| 238 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 239 | int mode, | ||
| 240 | union ocfs2_dlm_lksb *lksb, | ||
| 241 | u32 flags, | ||
| 242 | void *name, | ||
| 243 | unsigned int namelen, | ||
| 244 | struct ocfs2_lock_res *astarg) | ||
| 245 | { | ||
| 246 | BUG_ON(lproto == NULL); | ||
| 247 | |||
| 248 | return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, | ||
| 249 | name, namelen, astarg); | ||
| 250 | } | ||
| 251 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); | ||
| 252 | |||
| 253 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 254 | union ocfs2_dlm_lksb *lksb, | ||
| 255 | u32 flags, | ||
| 256 | struct ocfs2_lock_res *astarg) | ||
| 257 | { | ||
| 258 | BUG_ON(lproto == NULL); | ||
| 259 | |||
| 260 | return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); | ||
| 261 | } | ||
| 262 | EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); | ||
| 263 | |||
| 264 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
| 265 | { | ||
| 266 | return active_stack->sp_ops->lock_status(lksb); | ||
| 267 | } | ||
| 268 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); | ||
| 269 | |||
| 270 | /* | ||
| 271 | * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we | ||
| 272 | * don't cast at the glue level. The real answer is that the header | ||
| 273 | * ordering is nigh impossible. | ||
| 274 | */ | ||
| 275 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
| 276 | { | ||
| 277 | return active_stack->sp_ops->lock_lvb(lksb); | ||
| 278 | } | ||
| 279 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); | ||
| 280 | |||
| 281 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
| 282 | { | ||
| 283 | active_stack->sp_ops->dump_lksb(lksb); | ||
| 284 | } | ||
| 285 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); | ||
| 286 | |||
| 287 | int ocfs2_cluster_connect(const char *stack_name, | ||
| 288 | const char *group, | ||
| 289 | int grouplen, | ||
| 290 | void (*recovery_handler)(int node_num, | ||
| 291 | void *recovery_data), | ||
| 292 | void *recovery_data, | ||
| 293 | struct ocfs2_cluster_connection **conn) | ||
| 294 | { | ||
| 295 | int rc = 0; | ||
| 296 | struct ocfs2_cluster_connection *new_conn; | ||
| 297 | |||
| 298 | BUG_ON(group == NULL); | ||
| 299 | BUG_ON(conn == NULL); | ||
| 300 | BUG_ON(recovery_handler == NULL); | ||
| 301 | |||
| 302 | if (grouplen > GROUP_NAME_MAX) { | ||
| 303 | rc = -EINVAL; | ||
| 304 | goto out; | ||
| 305 | } | ||
| 306 | |||
| 307 | new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), | ||
| 308 | GFP_KERNEL); | ||
| 309 | if (!new_conn) { | ||
| 310 | rc = -ENOMEM; | ||
| 311 | goto out; | ||
| 312 | } | ||
| 313 | |||
| 314 | memcpy(new_conn->cc_name, group, grouplen); | ||
| 315 | new_conn->cc_namelen = grouplen; | ||
| 316 | new_conn->cc_recovery_handler = recovery_handler; | ||
| 317 | new_conn->cc_recovery_data = recovery_data; | ||
| 318 | |||
| 319 | /* Start the new connection at our maximum compatibility level */ | ||
| 320 | new_conn->cc_version = lproto->lp_max_version; | ||
| 321 | |||
| 322 | /* This will pin the stack driver if successful */ | ||
| 323 | rc = ocfs2_stack_driver_get(stack_name); | ||
| 324 | if (rc) | ||
| 325 | goto out_free; | ||
| 326 | |||
| 327 | rc = active_stack->sp_ops->connect(new_conn); | ||
| 328 | if (rc) { | ||
| 329 | ocfs2_stack_driver_put(); | ||
| 330 | goto out_free; | ||
| 331 | } | ||
| 332 | |||
| 333 | *conn = new_conn; | ||
| 334 | |||
| 335 | out_free: | ||
| 336 | if (rc) | ||
| 337 | kfree(new_conn); | ||
| 338 | |||
| 339 | out: | ||
| 340 | return rc; | ||
| 341 | } | ||
| 342 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); | ||
| 343 | |||
| 344 | /* If hangup_pending is 0, the stack driver will be dropped */ | ||
| 345 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 346 | int hangup_pending) | ||
| 347 | { | ||
| 348 | int ret; | ||
| 349 | |||
| 350 | BUG_ON(conn == NULL); | ||
| 351 | |||
| 352 | ret = active_stack->sp_ops->disconnect(conn, hangup_pending); | ||
| 353 | |||
| 354 | /* XXX Should we free it anyway? */ | ||
| 355 | if (!ret) { | ||
| 356 | kfree(conn); | ||
| 357 | if (!hangup_pending) | ||
| 358 | ocfs2_stack_driver_put(); | ||
| 359 | } | ||
| 360 | |||
| 361 | return ret; | ||
| 362 | } | ||
| 363 | EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); | ||
| 364 | |||
| 365 | void ocfs2_cluster_hangup(const char *group, int grouplen) | ||
| 366 | { | ||
| 367 | BUG_ON(group == NULL); | ||
| 368 | BUG_ON(group[grouplen] != '\0'); | ||
| 369 | |||
| 370 | if (active_stack->sp_ops->hangup) | ||
| 371 | active_stack->sp_ops->hangup(group, grouplen); | ||
| 372 | |||
| 373 | /* cluster_disconnect() was called with hangup_pending==1 */ | ||
| 374 | ocfs2_stack_driver_put(); | ||
| 375 | } | ||
| 376 | EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); | ||
| 377 | |||
| 378 | int ocfs2_cluster_this_node(unsigned int *node) | ||
| 379 | { | ||
| 380 | return active_stack->sp_ops->this_node(node); | ||
| 381 | } | ||
| 382 | EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); | ||
| 383 | |||
| 384 | |||
| 385 | /* | ||
| 386 | * Sysfs bits | ||
| 387 | */ | ||
| 388 | |||
| 389 | static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, | ||
| 390 | struct kobj_attribute *attr, | ||
| 391 | char *buf) | ||
| 392 | { | ||
| 393 | ssize_t ret = 0; | ||
| 394 | |||
| 395 | spin_lock(&ocfs2_stack_lock); | ||
| 396 | if (lproto) | ||
| 397 | ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", | ||
| 398 | lproto->lp_max_version.pv_major, | ||
| 399 | lproto->lp_max_version.pv_minor); | ||
| 400 | spin_unlock(&ocfs2_stack_lock); | ||
| 401 | |||
| 402 | return ret; | ||
| 403 | } | ||
| 404 | |||
| 405 | static struct kobj_attribute ocfs2_attr_max_locking_protocol = | ||
| 406 | __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, | ||
| 407 | ocfs2_max_locking_protocol_show, NULL); | ||
| 408 | |||
| 409 | static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, | ||
| 410 | struct kobj_attribute *attr, | ||
| 411 | char *buf) | ||
| 412 | { | ||
| 413 | ssize_t ret = 0, total = 0, remain = PAGE_SIZE; | ||
| 414 | struct ocfs2_stack_plugin *p; | ||
| 415 | |||
| 416 | spin_lock(&ocfs2_stack_lock); | ||
| 417 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
| 418 | ret = snprintf(buf, remain, "%s\n", | ||
| 419 | p->sp_name); | ||
| 420 | if (ret < 0) { | ||
| 421 | total = ret; | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | if (ret == remain) { | ||
| 425 | /* snprintf() didn't fit */ | ||
| 426 | total = -E2BIG; | ||
| 427 | break; | ||
| 428 | } | ||
| 429 | total += ret; | ||
| 430 | remain -= ret; | ||
| 431 | } | ||
| 432 | spin_unlock(&ocfs2_stack_lock); | ||
| 433 | |||
| 434 | return total; | ||
| 435 | } | ||
| 436 | |||
| 437 | static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = | ||
| 438 | __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, | ||
| 439 | ocfs2_loaded_cluster_plugins_show, NULL); | ||
| 440 | |||
| 441 | static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, | ||
| 442 | struct kobj_attribute *attr, | ||
| 443 | char *buf) | ||
| 444 | { | ||
| 445 | ssize_t ret = 0; | ||
| 446 | |||
| 447 | spin_lock(&ocfs2_stack_lock); | ||
| 448 | if (active_stack) { | ||
| 449 | ret = snprintf(buf, PAGE_SIZE, "%s\n", | ||
| 450 | active_stack->sp_name); | ||
| 451 | if (ret == PAGE_SIZE) | ||
| 452 | ret = -E2BIG; | ||
| 453 | } | ||
| 454 | spin_unlock(&ocfs2_stack_lock); | ||
| 455 | |||
| 456 | return ret; | ||
| 457 | } | ||
| 458 | |||
| 459 | static struct kobj_attribute ocfs2_attr_active_cluster_plugin = | ||
| 460 | __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, | ||
| 461 | ocfs2_active_cluster_plugin_show, NULL); | ||
| 462 | |||
| 463 | static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, | ||
| 464 | struct kobj_attribute *attr, | ||
| 465 | char *buf) | ||
| 466 | { | ||
| 467 | ssize_t ret; | ||
| 468 | spin_lock(&ocfs2_stack_lock); | ||
| 469 | ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); | ||
| 470 | spin_unlock(&ocfs2_stack_lock); | ||
| 471 | |||
| 472 | return ret; | ||
| 473 | } | ||
| 474 | |||
| 475 | static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, | ||
| 476 | struct kobj_attribute *attr, | ||
| 477 | const char *buf, size_t count) | ||
| 478 | { | ||
| 479 | size_t len = count; | ||
| 480 | ssize_t ret; | ||
| 481 | |||
| 482 | if (len == 0) | ||
| 483 | return len; | ||
| 484 | |||
| 485 | if (buf[len - 1] == '\n') | ||
| 486 | len--; | ||
| 487 | |||
| 488 | if ((len != OCFS2_STACK_LABEL_LEN) || | ||
| 489 | (strnlen(buf, len) != len)) | ||
| 490 | return -EINVAL; | ||
| 491 | |||
| 492 | spin_lock(&ocfs2_stack_lock); | ||
| 493 | if (active_stack) { | ||
| 494 | if (!strncmp(buf, cluster_stack_name, len)) | ||
| 495 | ret = count; | ||
| 496 | else | ||
| 497 | ret = -EBUSY; | ||
| 498 | } else { | ||
| 499 | memcpy(cluster_stack_name, buf, len); | ||
| 500 | ret = count; | ||
| 501 | } | ||
| 502 | spin_unlock(&ocfs2_stack_lock); | ||
| 503 | |||
| 504 | return ret; | ||
| 505 | } | ||
| 506 | |||
| 507 | |||
| 508 | static struct kobj_attribute ocfs2_attr_cluster_stack = | ||
| 509 | __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, | ||
| 510 | ocfs2_cluster_stack_show, | ||
| 511 | ocfs2_cluster_stack_store); | ||
| 512 | |||
| 513 | static struct attribute *ocfs2_attrs[] = { | ||
| 514 | &ocfs2_attr_max_locking_protocol.attr, | ||
| 515 | &ocfs2_attr_loaded_cluster_plugins.attr, | ||
| 516 | &ocfs2_attr_active_cluster_plugin.attr, | ||
| 517 | &ocfs2_attr_cluster_stack.attr, | ||
| 518 | NULL, | ||
| 519 | }; | ||
| 520 | |||
| 521 | static struct attribute_group ocfs2_attr_group = { | ||
| 522 | .attrs = ocfs2_attrs, | ||
| 523 | }; | ||
| 524 | |||
| 525 | static struct kset *ocfs2_kset; | ||
| 526 | |||
| 527 | static void ocfs2_sysfs_exit(void) | ||
| 528 | { | ||
| 529 | kset_unregister(ocfs2_kset); | ||
| 530 | } | ||
| 531 | |||
| 532 | static int ocfs2_sysfs_init(void) | ||
| 533 | { | ||
| 534 | int ret; | ||
| 535 | |||
| 536 | ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); | ||
| 537 | if (!ocfs2_kset) | ||
| 538 | return -ENOMEM; | ||
| 539 | |||
| 540 | ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); | ||
| 541 | if (ret) | ||
| 542 | goto error; | ||
| 543 | |||
| 544 | return 0; | ||
| 545 | |||
| 546 | error: | ||
| 547 | kset_unregister(ocfs2_kset); | ||
| 548 | return ret; | ||
| 549 | } | ||
| 550 | |||
| 551 | static int __init ocfs2_stack_glue_init(void) | ||
| 552 | { | ||
| 553 | strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); | ||
| 554 | |||
| 555 | return ocfs2_sysfs_init(); | ||
| 556 | } | ||
| 557 | |||
| 558 | static void __exit ocfs2_stack_glue_exit(void) | ||
| 559 | { | ||
| 560 | lproto = NULL; | ||
| 561 | ocfs2_sysfs_exit(); | ||
| 562 | } | ||
| 563 | |||
| 564 | MODULE_AUTHOR("Oracle"); | ||
| 565 | MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); | ||
| 566 | MODULE_LICENSE("GPL"); | ||
| 567 | module_init(ocfs2_stack_glue_init); | ||
| 568 | module_exit(ocfs2_stack_glue_exit); | ||
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h new file mode 100644 index 000000000000..005e4f170e0f --- /dev/null +++ b/fs/ocfs2/stackglue.h | |||
| @@ -0,0 +1,261 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stackglue.h | ||
| 5 | * | ||
| 6 | * Glue to the underlying cluster stack. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation, version 2. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * General Public License for more details. | ||
| 18 | */ | ||
| 19 | |||
| 20 | |||
| 21 | #ifndef STACKGLUE_H | ||
| 22 | #define STACKGLUE_H | ||
| 23 | |||
| 24 | #include <linux/types.h> | ||
| 25 | #include <linux/list.h> | ||
| 26 | #include <linux/dlmconstants.h> | ||
| 27 | |||
| 28 | #include "dlm/dlmapi.h" | ||
| 29 | #include <linux/dlm.h> | ||
| 30 | |||
| 31 | /* | ||
| 32 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it | ||
| 33 | * some day, but right now we need it. Let's fake it. This value is larger | ||
| 34 | * than any flag in dlmconstants.h. | ||
| 35 | */ | ||
| 36 | #define DLM_LKF_LOCAL 0x00100000 | ||
| 37 | |||
| 38 | /* | ||
| 39 | * This shadows DLM_LOCKSPACE_LEN in fs/dlm/dlm_internal.h. That probably | ||
| 40 | * wants to be in a public header. | ||
| 41 | */ | ||
| 42 | #define GROUP_NAME_MAX 64 | ||
| 43 | |||
| 44 | |||
| 45 | /* | ||
| 46 | * ocfs2_protocol_version changes when ocfs2 does something different in | ||
| 47 | * its inter-node behavior. See dlmglue.c for more information. | ||
| 48 | */ | ||
| 49 | struct ocfs2_protocol_version { | ||
| 50 | u8 pv_major; | ||
| 51 | u8 pv_minor; | ||
| 52 | }; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf. | ||
| 56 | */ | ||
| 57 | struct ocfs2_locking_protocol { | ||
| 58 | struct ocfs2_protocol_version lp_max_version; | ||
| 59 | void (*lp_lock_ast)(void *astarg); | ||
| 60 | void (*lp_blocking_ast)(void *astarg, int level); | ||
| 61 | void (*lp_unlock_ast)(void *astarg, int error); | ||
| 62 | }; | ||
| 63 | |||
| 64 | |||
| 65 | /* | ||
| 66 | * The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only | ||
| 67 | * has a pointer to separately allocated lvb space. This struct exists only to | ||
| 68 | * include in the lksb union to make space for a combined dlm_lksb and lvb. | ||
| 69 | */ | ||
| 70 | struct fsdlm_lksb_plus_lvb { | ||
| 71 | struct dlm_lksb lksb; | ||
| 72 | char lvb[DLM_LVB_LEN]; | ||
| 73 | }; | ||
| 74 | |||
| 75 | /* | ||
| 76 | * A union of all lock status structures. We define it here so that the | ||
| 77 | * size of the union is known. Lock status structures are embedded in | ||
| 78 | * ocfs2 inodes. | ||
| 79 | */ | ||
| 80 | union ocfs2_dlm_lksb { | ||
| 81 | struct dlm_lockstatus lksb_o2dlm; | ||
| 82 | struct dlm_lksb lksb_fsdlm; | ||
| 83 | struct fsdlm_lksb_plus_lvb padding; | ||
| 84 | }; | ||
| 85 | |||
| 86 | /* | ||
| 87 | * A cluster connection. Mostly opaque to ocfs2, the connection holds | ||
| 88 | * state for the underlying stack. ocfs2 does use cc_version to determine | ||
| 89 | * locking compatibility. | ||
| 90 | */ | ||
| 91 | struct ocfs2_cluster_connection { | ||
| 92 | char cc_name[GROUP_NAME_MAX]; | ||
| 93 | int cc_namelen; | ||
| 94 | struct ocfs2_protocol_version cc_version; | ||
| 95 | void (*cc_recovery_handler)(int node_num, void *recovery_data); | ||
| 96 | void *cc_recovery_data; | ||
| 97 | void *cc_lockspace; | ||
| 98 | void *cc_private; | ||
| 99 | }; | ||
| 100 | |||
| 101 | /* | ||
| 102 | * Each cluster stack implements the stack operations structure. Not used | ||
| 103 | * in the ocfs2 code, the stackglue code translates generic cluster calls | ||
| 104 | * into stack operations. | ||
| 105 | */ | ||
| 106 | struct ocfs2_stack_operations { | ||
| 107 | /* | ||
| 108 | * The fs code calls ocfs2_cluster_connect() to attach a new | ||
| 109 | * filesystem to the cluster stack. The ->connect() op is passed | ||
| 110 | * an ocfs2_cluster_connection with the name and recovery field | ||
| 111 | * filled in. | ||
| 112 | * | ||
| 113 | * The stack must set up any notification mechanisms and create | ||
| 114 | * the filesystem lockspace in the DLM. The lockspace should be | ||
| 115 | * stored on cc_lockspace. Any other information can be stored on | ||
| 116 | * cc_private. | ||
| 117 | * | ||
| 118 | * ->connect() must not return until it is guaranteed that | ||
| 119 | * | ||
| 120 | * - Node down notifications for the filesystem will be recieved | ||
| 121 | * and passed to conn->cc_recovery_handler(). | ||
| 122 | * - Locking requests for the filesystem will be processed. | ||
| 123 | */ | ||
| 124 | int (*connect)(struct ocfs2_cluster_connection *conn); | ||
| 125 | |||
| 126 | /* | ||
| 127 | * The fs code calls ocfs2_cluster_disconnect() when a filesystem | ||
| 128 | * no longer needs cluster services. All DLM locks have been | ||
| 129 | * dropped, and recovery notification is being ignored by the | ||
| 130 | * fs code. The stack must disengage from the DLM and discontinue | ||
| 131 | * recovery notification. | ||
| 132 | * | ||
| 133 | * Once ->disconnect() has returned, the connection structure will | ||
| 134 | * be freed. Thus, a stack must not return from ->disconnect() | ||
| 135 | * until it will no longer reference the conn pointer. | ||
| 136 | * | ||
| 137 | * If hangup_pending is zero, ocfs2_cluster_disconnect() will also | ||
| 138 | * be dropping the reference on the module. | ||
| 139 | */ | ||
| 140 | int (*disconnect)(struct ocfs2_cluster_connection *conn, | ||
| 141 | int hangup_pending); | ||
| 142 | |||
| 143 | /* | ||
| 144 | * ocfs2_cluster_hangup() exists for compatibility with older | ||
| 145 | * ocfs2 tools. Only the classic stack really needs it. As such | ||
| 146 | * ->hangup() is not required of all stacks. See the comment by | ||
| 147 | * ocfs2_cluster_hangup() for more details. | ||
| 148 | * | ||
| 149 | * Note that ocfs2_cluster_hangup() can only be called if | ||
| 150 | * hangup_pending was passed to ocfs2_cluster_disconnect(). | ||
| 151 | */ | ||
| 152 | void (*hangup)(const char *group, int grouplen); | ||
| 153 | |||
| 154 | /* | ||
| 155 | * ->this_node() returns the cluster's unique identifier for the | ||
| 156 | * local node. | ||
| 157 | */ | ||
| 158 | int (*this_node)(unsigned int *node); | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Call the underlying dlm lock function. The ->dlm_lock() | ||
| 162 | * callback should convert the flags and mode as appropriate. | ||
| 163 | * | ||
| 164 | * ast and bast functions are not part of the call because the | ||
| 165 | * stack will likely want to wrap ast and bast calls before passing | ||
| 166 | * them to stack->sp_proto. | ||
| 167 | */ | ||
| 168 | int (*dlm_lock)(struct ocfs2_cluster_connection *conn, | ||
| 169 | int mode, | ||
| 170 | union ocfs2_dlm_lksb *lksb, | ||
| 171 | u32 flags, | ||
| 172 | void *name, | ||
| 173 | unsigned int namelen, | ||
| 174 | void *astarg); | ||
| 175 | |||
| 176 | /* | ||
| 177 | * Call the underlying dlm unlock function. The ->dlm_unlock() | ||
| 178 | * function should convert the flags as appropriate. | ||
| 179 | * | ||
| 180 | * The unlock ast is not passed, as the stack will want to wrap | ||
| 181 | * it before calling stack->sp_proto->lp_unlock_ast(). | ||
| 182 | */ | ||
| 183 | int (*dlm_unlock)(struct ocfs2_cluster_connection *conn, | ||
| 184 | union ocfs2_dlm_lksb *lksb, | ||
| 185 | u32 flags, | ||
| 186 | void *astarg); | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Return the status of the current lock status block. The fs | ||
| 190 | * code should never dereference the union. The ->lock_status() | ||
| 191 | * callback pulls out the stack-specific lksb, converts the status | ||
| 192 | * to a proper errno, and returns it. | ||
| 193 | */ | ||
| 194 | int (*lock_status)(union ocfs2_dlm_lksb *lksb); | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Pull the lvb pointer off of the stack-specific lksb. | ||
| 198 | */ | ||
| 199 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); | ||
| 200 | |||
| 201 | /* | ||
| 202 | * This is an optoinal debugging hook. If provided, the | ||
| 203 | * stack can dump debugging information about this lock. | ||
| 204 | */ | ||
| 205 | void (*dump_lksb)(union ocfs2_dlm_lksb *lksb); | ||
| 206 | }; | ||
| 207 | |||
| 208 | /* | ||
| 209 | * Each stack plugin must describe itself by registering a | ||
| 210 | * ocfs2_stack_plugin structure. This is only seen by stackglue and the | ||
| 211 | * stack driver. | ||
| 212 | */ | ||
| 213 | struct ocfs2_stack_plugin { | ||
| 214 | char *sp_name; | ||
| 215 | struct ocfs2_stack_operations *sp_ops; | ||
| 216 | struct module *sp_owner; | ||
| 217 | |||
| 218 | /* These are managed by the stackglue code. */ | ||
| 219 | struct list_head sp_list; | ||
| 220 | unsigned int sp_count; | ||
| 221 | struct ocfs2_locking_protocol *sp_proto; | ||
| 222 | }; | ||
| 223 | |||
| 224 | |||
| 225 | /* Used by the filesystem */ | ||
| 226 | int ocfs2_cluster_connect(const char *stack_name, | ||
| 227 | const char *group, | ||
| 228 | int grouplen, | ||
| 229 | void (*recovery_handler)(int node_num, | ||
| 230 | void *recovery_data), | ||
| 231 | void *recovery_data, | ||
| 232 | struct ocfs2_cluster_connection **conn); | ||
| 233 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 234 | int hangup_pending); | ||
| 235 | void ocfs2_cluster_hangup(const char *group, int grouplen); | ||
| 236 | int ocfs2_cluster_this_node(unsigned int *node); | ||
| 237 | |||
| 238 | struct ocfs2_lock_res; | ||
| 239 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 240 | int mode, | ||
| 241 | union ocfs2_dlm_lksb *lksb, | ||
| 242 | u32 flags, | ||
| 243 | void *name, | ||
| 244 | unsigned int namelen, | ||
| 245 | struct ocfs2_lock_res *astarg); | ||
| 246 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 247 | union ocfs2_dlm_lksb *lksb, | ||
| 248 | u32 flags, | ||
| 249 | struct ocfs2_lock_res *astarg); | ||
| 250 | |||
| 251 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); | ||
| 252 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); | ||
| 253 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); | ||
| 254 | |||
| 255 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); | ||
| 256 | |||
| 257 | |||
| 258 | /* Used by stack plugins */ | ||
| 259 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); | ||
| 260 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); | ||
| 261 | #endif /* STACKGLUE_H */ | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 72c198a004df..d2d278fb9819 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -46,6 +46,11 @@ | |||
| 46 | 46 | ||
| 47 | #include "buffer_head_io.h" | 47 | #include "buffer_head_io.h" |
| 48 | 48 | ||
| 49 | #define NOT_ALLOC_NEW_GROUP 0 | ||
| 50 | #define ALLOC_NEW_GROUP 1 | ||
| 51 | |||
| 52 | #define OCFS2_MAX_INODES_TO_STEAL 1024 | ||
| 53 | |||
| 49 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 54 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
| 50 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 55 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
| 51 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 56 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
| @@ -106,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
| 106 | u64 *bg_blkno, | 111 | u64 *bg_blkno, |
| 107 | u16 *bg_bit_off); | 112 | u16 *bg_bit_off); |
| 108 | 113 | ||
| 109 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 114 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
| 110 | { | 115 | { |
| 111 | struct inode *inode = ac->ac_inode; | 116 | struct inode *inode = ac->ac_inode; |
| 112 | 117 | ||
| @@ -117,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |||
| 117 | mutex_unlock(&inode->i_mutex); | 122 | mutex_unlock(&inode->i_mutex); |
| 118 | 123 | ||
| 119 | iput(inode); | 124 | iput(inode); |
| 125 | ac->ac_inode = NULL; | ||
| 120 | } | 126 | } |
| 121 | if (ac->ac_bh) | 127 | if (ac->ac_bh) { |
| 122 | brelse(ac->ac_bh); | 128 | brelse(ac->ac_bh); |
| 129 | ac->ac_bh = NULL; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | ||
| 134 | { | ||
| 135 | ocfs2_free_ac_resource(ac); | ||
| 123 | kfree(ac); | 136 | kfree(ac); |
| 124 | } | 137 | } |
| 125 | 138 | ||
| @@ -391,7 +404,8 @@ bail: | |||
| 391 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | 404 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, |
| 392 | struct ocfs2_alloc_context *ac, | 405 | struct ocfs2_alloc_context *ac, |
| 393 | int type, | 406 | int type, |
| 394 | u32 slot) | 407 | u32 slot, |
| 408 | int alloc_new_group) | ||
| 395 | { | 409 | { |
| 396 | int status; | 410 | int status; |
| 397 | u32 bits_wanted = ac->ac_bits_wanted; | 411 | u32 bits_wanted = ac->ac_bits_wanted; |
| @@ -420,6 +434,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
| 420 | } | 434 | } |
| 421 | 435 | ||
| 422 | ac->ac_inode = alloc_inode; | 436 | ac->ac_inode = alloc_inode; |
| 437 | ac->ac_alloc_slot = slot; | ||
| 423 | 438 | ||
| 424 | fe = (struct ocfs2_dinode *) bh->b_data; | 439 | fe = (struct ocfs2_dinode *) bh->b_data; |
| 425 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 440 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
| @@ -446,6 +461,14 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
| 446 | goto bail; | 461 | goto bail; |
| 447 | } | 462 | } |
| 448 | 463 | ||
| 464 | if (alloc_new_group != ALLOC_NEW_GROUP) { | ||
| 465 | mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " | ||
| 466 | "and we don't alloc a new group for it.\n", | ||
| 467 | slot, bits_wanted, free_bits); | ||
| 468 | status = -ENOSPC; | ||
| 469 | goto bail; | ||
| 470 | } | ||
| 471 | |||
| 449 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | 472 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); |
| 450 | if (status < 0) { | 473 | if (status < 0) { |
| 451 | if (status != -ENOSPC) | 474 | if (status != -ENOSPC) |
| @@ -490,7 +513,8 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
| 490 | (*ac)->ac_group_search = ocfs2_block_group_search; | 513 | (*ac)->ac_group_search = ocfs2_block_group_search; |
| 491 | 514 | ||
| 492 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 515 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
| 493 | EXTENT_ALLOC_SYSTEM_INODE, slot); | 516 | EXTENT_ALLOC_SYSTEM_INODE, |
| 517 | slot, ALLOC_NEW_GROUP); | ||
| 494 | if (status < 0) { | 518 | if (status < 0) { |
| 495 | if (status != -ENOSPC) | 519 | if (status != -ENOSPC) |
| 496 | mlog_errno(status); | 520 | mlog_errno(status); |
| @@ -508,10 +532,42 @@ bail: | |||
| 508 | return status; | 532 | return status; |
| 509 | } | 533 | } |
| 510 | 534 | ||
| 535 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | ||
| 536 | struct ocfs2_alloc_context *ac) | ||
| 537 | { | ||
| 538 | int i, status = -ENOSPC; | ||
| 539 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
| 540 | |||
| 541 | /* Start to steal inodes from the first slot after ours. */ | ||
| 542 | if (slot == OCFS2_INVALID_SLOT) | ||
| 543 | slot = osb->slot_num + 1; | ||
| 544 | |||
| 545 | for (i = 0; i < osb->max_slots; i++, slot++) { | ||
| 546 | if (slot == osb->max_slots) | ||
| 547 | slot = 0; | ||
| 548 | |||
| 549 | if (slot == osb->slot_num) | ||
| 550 | continue; | ||
| 551 | |||
| 552 | status = ocfs2_reserve_suballoc_bits(osb, ac, | ||
| 553 | INODE_ALLOC_SYSTEM_INODE, | ||
| 554 | slot, NOT_ALLOC_NEW_GROUP); | ||
| 555 | if (status >= 0) { | ||
| 556 | ocfs2_set_inode_steal_slot(osb, slot); | ||
| 557 | break; | ||
| 558 | } | ||
| 559 | |||
| 560 | ocfs2_free_ac_resource(ac); | ||
| 561 | } | ||
| 562 | |||
| 563 | return status; | ||
| 564 | } | ||
| 565 | |||
| 511 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 566 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
| 512 | struct ocfs2_alloc_context **ac) | 567 | struct ocfs2_alloc_context **ac) |
| 513 | { | 568 | { |
| 514 | int status; | 569 | int status; |
| 570 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
| 515 | 571 | ||
| 516 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 572 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
| 517 | if (!(*ac)) { | 573 | if (!(*ac)) { |
| @@ -525,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
| 525 | 581 | ||
| 526 | (*ac)->ac_group_search = ocfs2_block_group_search; | 582 | (*ac)->ac_group_search = ocfs2_block_group_search; |
| 527 | 583 | ||
| 584 | /* | ||
| 585 | * slot is set when we successfully steal inode from other nodes. | ||
| 586 | * It is reset in 3 places: | ||
| 587 | * 1. when we flush the truncate log | ||
| 588 | * 2. when we complete local alloc recovery. | ||
| 589 | * 3. when we successfully allocate from our own slot. | ||
| 590 | * After it is set, we will go on stealing inodes until we find the | ||
| 591 | * need to check our slots to see whether there is some space for us. | ||
| 592 | */ | ||
| 593 | if (slot != OCFS2_INVALID_SLOT && | ||
| 594 | atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) | ||
| 595 | goto inode_steal; | ||
| 596 | |||
| 597 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
| 528 | status = ocfs2_reserve_suballoc_bits(osb, *ac, | 598 | status = ocfs2_reserve_suballoc_bits(osb, *ac, |
| 529 | INODE_ALLOC_SYSTEM_INODE, | 599 | INODE_ALLOC_SYSTEM_INODE, |
| 530 | osb->slot_num); | 600 | osb->slot_num, ALLOC_NEW_GROUP); |
| 601 | if (status >= 0) { | ||
| 602 | status = 0; | ||
| 603 | |||
| 604 | /* | ||
| 605 | * Some inodes must be freed by us, so try to allocate | ||
| 606 | * from our own next time. | ||
| 607 | */ | ||
| 608 | if (slot != OCFS2_INVALID_SLOT) | ||
| 609 | ocfs2_init_inode_steal_slot(osb); | ||
| 610 | goto bail; | ||
| 611 | } else if (status < 0 && status != -ENOSPC) { | ||
| 612 | mlog_errno(status); | ||
| 613 | goto bail; | ||
| 614 | } | ||
| 615 | |||
| 616 | ocfs2_free_ac_resource(*ac); | ||
| 617 | |||
| 618 | inode_steal: | ||
| 619 | status = ocfs2_steal_inode_from_other_nodes(osb, *ac); | ||
| 620 | atomic_inc(&osb->s_num_inodes_stolen); | ||
| 531 | if (status < 0) { | 621 | if (status < 0) { |
| 532 | if (status != -ENOSPC) | 622 | if (status != -ENOSPC) |
| 533 | mlog_errno(status); | 623 | mlog_errno(status); |
| @@ -557,7 +647,8 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |||
| 557 | 647 | ||
| 558 | status = ocfs2_reserve_suballoc_bits(osb, ac, | 648 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
| 559 | GLOBAL_BITMAP_SYSTEM_INODE, | 649 | GLOBAL_BITMAP_SYSTEM_INODE, |
| 560 | OCFS2_INVALID_SLOT); | 650 | OCFS2_INVALID_SLOT, |
| 651 | ALLOC_NEW_GROUP); | ||
| 561 | if (status < 0 && status != -ENOSPC) { | 652 | if (status < 0 && status != -ENOSPC) { |
| 562 | mlog_errno(status); | 653 | mlog_errno(status); |
| 563 | goto bail; | 654 | goto bail; |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 8799033bb459..544c600662bd 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
| @@ -36,6 +36,7 @@ typedef int (group_search_t)(struct inode *, | |||
| 36 | struct ocfs2_alloc_context { | 36 | struct ocfs2_alloc_context { |
| 37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ | 37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ |
| 38 | struct buffer_head *ac_bh; /* file entry bh */ | 38 | struct buffer_head *ac_bh; /* file entry bh */ |
| 39 | u32 ac_alloc_slot; /* which slot are we allocating from? */ | ||
| 39 | u32 ac_bits_wanted; | 40 | u32 ac_bits_wanted; |
| 40 | u32 ac_bits_given; | 41 | u32 ac_bits_given; |
| 41 | #define OCFS2_AC_USE_LOCAL 1 | 42 | #define OCFS2_AC_USE_LOCAL 1 |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index bec75aff3d9f..df63ba20ae90 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -40,8 +40,7 @@ | |||
| 40 | #include <linux/crc32.h> | 40 | #include <linux/crc32.h> |
| 41 | #include <linux/debugfs.h> | 41 | #include <linux/debugfs.h> |
| 42 | #include <linux/mount.h> | 42 | #include <linux/mount.h> |
| 43 | 43 | #include <linux/seq_file.h> | |
| 44 | #include <cluster/nodemanager.h> | ||
| 45 | 44 | ||
| 46 | #define MLOG_MASK_PREFIX ML_SUPER | 45 | #define MLOG_MASK_PREFIX ML_SUPER |
| 47 | #include <cluster/masklog.h> | 46 | #include <cluster/masklog.h> |
| @@ -88,6 +87,7 @@ struct mount_options | |||
| 88 | unsigned int atime_quantum; | 87 | unsigned int atime_quantum; |
| 89 | signed short slot; | 88 | signed short slot; |
| 90 | unsigned int localalloc_opt; | 89 | unsigned int localalloc_opt; |
| 90 | char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | ||
| 91 | }; | 91 | }; |
| 92 | 92 | ||
| 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
| @@ -109,7 +109,6 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait); | |||
| 109 | static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); | 109 | static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); |
| 110 | static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); | 110 | static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); |
| 111 | static void ocfs2_release_system_inodes(struct ocfs2_super *osb); | 111 | static void ocfs2_release_system_inodes(struct ocfs2_super *osb); |
| 112 | static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); | ||
| 113 | static int ocfs2_check_volume(struct ocfs2_super *osb); | 112 | static int ocfs2_check_volume(struct ocfs2_super *osb); |
| 114 | static int ocfs2_verify_volume(struct ocfs2_dinode *di, | 113 | static int ocfs2_verify_volume(struct ocfs2_dinode *di, |
| 115 | struct buffer_head *bh, | 114 | struct buffer_head *bh, |
| @@ -154,6 +153,7 @@ enum { | |||
| 154 | Opt_commit, | 153 | Opt_commit, |
| 155 | Opt_localalloc, | 154 | Opt_localalloc, |
| 156 | Opt_localflocks, | 155 | Opt_localflocks, |
| 156 | Opt_stack, | ||
| 157 | Opt_err, | 157 | Opt_err, |
| 158 | }; | 158 | }; |
| 159 | 159 | ||
| @@ -172,6 +172,7 @@ static match_table_t tokens = { | |||
| 172 | {Opt_commit, "commit=%u"}, | 172 | {Opt_commit, "commit=%u"}, |
| 173 | {Opt_localalloc, "localalloc=%d"}, | 173 | {Opt_localalloc, "localalloc=%d"}, |
| 174 | {Opt_localflocks, "localflocks"}, | 174 | {Opt_localflocks, "localflocks"}, |
| 175 | {Opt_stack, "cluster_stack=%s"}, | ||
| 175 | {Opt_err, NULL} | 176 | {Opt_err, NULL} |
| 176 | }; | 177 | }; |
| 177 | 178 | ||
| @@ -551,8 +552,17 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | |||
| 551 | } | 552 | } |
| 552 | } | 553 | } |
| 553 | 554 | ||
| 555 | if (ocfs2_userspace_stack(osb)) { | ||
| 556 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
| 557 | mlog(ML_ERROR, "Userspace stack expected, but " | ||
| 558 | "o2cb heartbeat arguments passed to mount\n"); | ||
| 559 | return -EINVAL; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 554 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 563 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { |
| 555 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { | 564 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && |
| 565 | !ocfs2_userspace_stack(osb)) { | ||
| 556 | mlog(ML_ERROR, "Heartbeat has to be started to mount " | 566 | mlog(ML_ERROR, "Heartbeat has to be started to mount " |
| 557 | "a read-write clustered device.\n"); | 567 | "a read-write clustered device.\n"); |
| 558 | return -EINVAL; | 568 | return -EINVAL; |
| @@ -562,6 +572,35 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | |||
| 562 | return 0; | 572 | return 0; |
| 563 | } | 573 | } |
| 564 | 574 | ||
| 575 | /* | ||
| 576 | * If we're using a userspace stack, mount should have passed | ||
| 577 | * a name that matches the disk. If not, mount should not | ||
| 578 | * have passed a stack. | ||
| 579 | */ | ||
| 580 | static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, | ||
| 581 | struct mount_options *mopt) | ||
| 582 | { | ||
| 583 | if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) { | ||
| 584 | mlog(ML_ERROR, | ||
| 585 | "cluster stack passed to mount, but this filesystem " | ||
| 586 | "does not support it\n"); | ||
| 587 | return -EINVAL; | ||
| 588 | } | ||
| 589 | |||
| 590 | if (ocfs2_userspace_stack(osb) && | ||
| 591 | strncmp(osb->osb_cluster_stack, mopt->cluster_stack, | ||
| 592 | OCFS2_STACK_LABEL_LEN)) { | ||
| 593 | mlog(ML_ERROR, | ||
| 594 | "cluster stack passed to mount (\"%s\") does not " | ||
| 595 | "match the filesystem (\"%s\")\n", | ||
| 596 | mopt->cluster_stack, | ||
| 597 | osb->osb_cluster_stack); | ||
| 598 | return -EINVAL; | ||
| 599 | } | ||
| 600 | |||
| 601 | return 0; | ||
| 602 | } | ||
| 603 | |||
| 565 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | 604 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) |
| 566 | { | 605 | { |
| 567 | struct dentry *root; | 606 | struct dentry *root; |
| @@ -579,15 +618,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 579 | goto read_super_error; | 618 | goto read_super_error; |
| 580 | } | 619 | } |
| 581 | 620 | ||
| 582 | /* for now we only have one cluster/node, make sure we see it | ||
| 583 | * in the heartbeat universe */ | ||
| 584 | if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
| 585 | if (!o2hb_check_local_node_heartbeating()) { | ||
| 586 | status = -EINVAL; | ||
| 587 | goto read_super_error; | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
| 591 | /* probe for superblock */ | 621 | /* probe for superblock */ |
| 592 | status = ocfs2_sb_probe(sb, &bh, §or_size); | 622 | status = ocfs2_sb_probe(sb, &bh, §or_size); |
| 593 | if (status < 0) { | 623 | if (status < 0) { |
| @@ -609,6 +639,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 609 | osb->osb_commit_interval = parsed_options.commit_interval; | 639 | osb->osb_commit_interval = parsed_options.commit_interval; |
| 610 | osb->local_alloc_size = parsed_options.localalloc_opt; | 640 | osb->local_alloc_size = parsed_options.localalloc_opt; |
| 611 | 641 | ||
| 642 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | ||
| 643 | if (status) | ||
| 644 | goto read_super_error; | ||
| 645 | |||
| 612 | sb->s_magic = OCFS2_SUPER_MAGIC; | 646 | sb->s_magic = OCFS2_SUPER_MAGIC; |
| 613 | 647 | ||
| 614 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, | 648 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, |
| @@ -694,7 +728,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 694 | if (ocfs2_mount_local(osb)) | 728 | if (ocfs2_mount_local(osb)) |
| 695 | snprintf(nodestr, sizeof(nodestr), "local"); | 729 | snprintf(nodestr, sizeof(nodestr), "local"); |
| 696 | else | 730 | else |
| 697 | snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); | 731 | snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); |
| 698 | 732 | ||
| 699 | printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " | 733 | printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " |
| 700 | "with %s data mode.\n", | 734 | "with %s data mode.\n", |
| @@ -763,6 +797,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 763 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 797 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
| 764 | mopt->slot = OCFS2_INVALID_SLOT; | 798 | mopt->slot = OCFS2_INVALID_SLOT; |
| 765 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 799 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; |
| 800 | mopt->cluster_stack[0] = '\0'; | ||
| 766 | 801 | ||
| 767 | if (!options) { | 802 | if (!options) { |
| 768 | status = 1; | 803 | status = 1; |
| @@ -864,6 +899,25 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 864 | if (!is_remount) | 899 | if (!is_remount) |
| 865 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; | 900 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; |
| 866 | break; | 901 | break; |
| 902 | case Opt_stack: | ||
| 903 | /* Check both that the option we were passed | ||
| 904 | * is of the right length and that it is a proper | ||
| 905 | * string of the right length. | ||
| 906 | */ | ||
| 907 | if (((args[0].to - args[0].from) != | ||
| 908 | OCFS2_STACK_LABEL_LEN) || | ||
| 909 | (strnlen(args[0].from, | ||
| 910 | OCFS2_STACK_LABEL_LEN) != | ||
| 911 | OCFS2_STACK_LABEL_LEN)) { | ||
| 912 | mlog(ML_ERROR, | ||
| 913 | "Invalid cluster_stack option\n"); | ||
| 914 | status = 0; | ||
| 915 | goto bail; | ||
| 916 | } | ||
| 917 | memcpy(mopt->cluster_stack, args[0].from, | ||
| 918 | OCFS2_STACK_LABEL_LEN); | ||
| 919 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | ||
| 920 | break; | ||
| 867 | default: | 921 | default: |
| 868 | mlog(ML_ERROR, | 922 | mlog(ML_ERROR, |
| 869 | "Unrecognized mount option \"%s\" " | 923 | "Unrecognized mount option \"%s\" " |
| @@ -922,6 +976,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 922 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | 976 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) |
| 923 | seq_printf(s, ",localflocks,"); | 977 | seq_printf(s, ",localflocks,"); |
| 924 | 978 | ||
| 979 | if (osb->osb_cluster_stack[0]) | ||
| 980 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, | ||
| 981 | osb->osb_cluster_stack); | ||
| 982 | |||
| 925 | return 0; | 983 | return 0; |
| 926 | } | 984 | } |
| 927 | 985 | ||
| @@ -957,6 +1015,8 @@ static int __init ocfs2_init(void) | |||
| 957 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1015 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
| 958 | } | 1016 | } |
| 959 | 1017 | ||
| 1018 | ocfs2_set_locking_protocol(); | ||
| 1019 | |||
| 960 | leave: | 1020 | leave: |
| 961 | if (status < 0) { | 1021 | if (status < 0) { |
| 962 | ocfs2_free_mem_caches(); | 1022 | ocfs2_free_mem_caches(); |
| @@ -1132,31 +1192,6 @@ static int ocfs2_get_sector(struct super_block *sb, | |||
| 1132 | return 0; | 1192 | return 0; |
| 1133 | } | 1193 | } |
| 1134 | 1194 | ||
| 1135 | /* ocfs2 1.0 only allows one cluster and node identity per kernel image. */ | ||
| 1136 | static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) | ||
| 1137 | { | ||
| 1138 | int status; | ||
| 1139 | |||
| 1140 | /* XXX hold a ref on the node while mounte? easy enough, if | ||
| 1141 | * desirable. */ | ||
| 1142 | if (ocfs2_mount_local(osb)) | ||
| 1143 | osb->node_num = 0; | ||
| 1144 | else | ||
| 1145 | osb->node_num = o2nm_this_node(); | ||
| 1146 | |||
| 1147 | if (osb->node_num == O2NM_MAX_NODES) { | ||
| 1148 | mlog(ML_ERROR, "could not find this host's node number\n"); | ||
| 1149 | status = -ENOENT; | ||
| 1150 | goto bail; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | mlog(0, "I am node %d\n", osb->node_num); | ||
| 1154 | |||
| 1155 | status = 0; | ||
| 1156 | bail: | ||
| 1157 | return status; | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | static int ocfs2_mount_volume(struct super_block *sb) | 1195 | static int ocfs2_mount_volume(struct super_block *sb) |
| 1161 | { | 1196 | { |
| 1162 | int status = 0; | 1197 | int status = 0; |
| @@ -1168,12 +1203,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
| 1168 | if (ocfs2_is_hard_readonly(osb)) | 1203 | if (ocfs2_is_hard_readonly(osb)) |
| 1169 | goto leave; | 1204 | goto leave; |
| 1170 | 1205 | ||
| 1171 | status = ocfs2_fill_local_node_info(osb); | ||
| 1172 | if (status < 0) { | ||
| 1173 | mlog_errno(status); | ||
| 1174 | goto leave; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | status = ocfs2_dlm_init(osb); | 1206 | status = ocfs2_dlm_init(osb); |
| 1178 | if (status < 0) { | 1207 | if (status < 0) { |
| 1179 | mlog_errno(status); | 1208 | mlog_errno(status); |
| @@ -1224,18 +1253,9 @@ leave: | |||
| 1224 | return status; | 1253 | return status; |
| 1225 | } | 1254 | } |
| 1226 | 1255 | ||
| 1227 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
| 1228 | * memory barriers to make sure that we'll see the null task before | ||
| 1229 | * being woken up */ | ||
| 1230 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
| 1231 | { | ||
| 1232 | mb(); | ||
| 1233 | return osb->recovery_thread_task != NULL; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | 1256 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) |
| 1237 | { | 1257 | { |
| 1238 | int tmp; | 1258 | int tmp, hangup_needed = 0; |
| 1239 | struct ocfs2_super *osb = NULL; | 1259 | struct ocfs2_super *osb = NULL; |
| 1240 | char nodestr[8]; | 1260 | char nodestr[8]; |
| 1241 | 1261 | ||
| @@ -1249,25 +1269,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1249 | 1269 | ||
| 1250 | ocfs2_truncate_log_shutdown(osb); | 1270 | ocfs2_truncate_log_shutdown(osb); |
| 1251 | 1271 | ||
| 1252 | /* disable any new recovery threads and wait for any currently | 1272 | /* This will disable recovery and flush any recovery work. */ |
| 1253 | * running ones to exit. Do this before setting the vol_state. */ | 1273 | ocfs2_recovery_exit(osb); |
| 1254 | mutex_lock(&osb->recovery_lock); | ||
| 1255 | osb->disable_recovery = 1; | ||
| 1256 | mutex_unlock(&osb->recovery_lock); | ||
| 1257 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
| 1258 | |||
| 1259 | /* At this point, we know that no more recovery threads can be | ||
| 1260 | * launched, so wait for any recovery completion work to | ||
| 1261 | * complete. */ | ||
| 1262 | flush_workqueue(ocfs2_wq); | ||
| 1263 | 1274 | ||
| 1264 | ocfs2_journal_shutdown(osb); | 1275 | ocfs2_journal_shutdown(osb); |
| 1265 | 1276 | ||
| 1266 | ocfs2_sync_blockdev(sb); | 1277 | ocfs2_sync_blockdev(sb); |
| 1267 | 1278 | ||
| 1268 | /* No dlm means we've failed during mount, so skip all the | 1279 | /* No cluster connection means we've failed during mount, so skip |
| 1269 | * steps which depended on that to complete. */ | 1280 | * all the steps which depended on that to complete. */ |
| 1270 | if (osb->dlm) { | 1281 | if (osb->cconn) { |
| 1271 | tmp = ocfs2_super_lock(osb, 1); | 1282 | tmp = ocfs2_super_lock(osb, 1); |
| 1272 | if (tmp < 0) { | 1283 | if (tmp < 0) { |
| 1273 | mlog_errno(tmp); | 1284 | mlog_errno(tmp); |
| @@ -1278,25 +1289,34 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1278 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 1289 | if (osb->slot_num != OCFS2_INVALID_SLOT) |
| 1279 | ocfs2_put_slot(osb); | 1290 | ocfs2_put_slot(osb); |
| 1280 | 1291 | ||
| 1281 | if (osb->dlm) | 1292 | if (osb->cconn) |
| 1282 | ocfs2_super_unlock(osb, 1); | 1293 | ocfs2_super_unlock(osb, 1); |
| 1283 | 1294 | ||
| 1284 | ocfs2_release_system_inodes(osb); | 1295 | ocfs2_release_system_inodes(osb); |
| 1285 | 1296 | ||
| 1286 | if (osb->dlm) | 1297 | /* |
| 1287 | ocfs2_dlm_shutdown(osb); | 1298 | * If we're dismounting due to mount error, mount.ocfs2 will clean |
| 1299 | * up heartbeat. If we're a local mount, there is no heartbeat. | ||
| 1300 | * If we failed before we got a uuid_str yet, we can't stop | ||
| 1301 | * heartbeat. Otherwise, do it. | ||
| 1302 | */ | ||
| 1303 | if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str) | ||
| 1304 | hangup_needed = 1; | ||
| 1305 | |||
| 1306 | if (osb->cconn) | ||
| 1307 | ocfs2_dlm_shutdown(osb, hangup_needed); | ||
| 1288 | 1308 | ||
| 1289 | debugfs_remove(osb->osb_debug_root); | 1309 | debugfs_remove(osb->osb_debug_root); |
| 1290 | 1310 | ||
| 1291 | if (!mnt_err) | 1311 | if (hangup_needed) |
| 1292 | ocfs2_stop_heartbeat(osb); | 1312 | ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str)); |
| 1293 | 1313 | ||
| 1294 | atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); | 1314 | atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); |
| 1295 | 1315 | ||
| 1296 | if (ocfs2_mount_local(osb)) | 1316 | if (ocfs2_mount_local(osb)) |
| 1297 | snprintf(nodestr, sizeof(nodestr), "local"); | 1317 | snprintf(nodestr, sizeof(nodestr), "local"); |
| 1298 | else | 1318 | else |
| 1299 | snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); | 1319 | snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); |
| 1300 | 1320 | ||
| 1301 | printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", | 1321 | printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", |
| 1302 | osb->dev_str, nodestr); | 1322 | osb->dev_str, nodestr); |
| @@ -1355,7 +1375,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1355 | sb->s_fs_info = osb; | 1375 | sb->s_fs_info = osb; |
| 1356 | sb->s_op = &ocfs2_sops; | 1376 | sb->s_op = &ocfs2_sops; |
| 1357 | sb->s_export_op = &ocfs2_export_ops; | 1377 | sb->s_export_op = &ocfs2_export_ops; |
| 1358 | osb->osb_locking_proto = ocfs2_locking_protocol; | ||
| 1359 | sb->s_time_gran = 1; | 1378 | sb->s_time_gran = 1; |
| 1360 | sb->s_flags |= MS_NOATIME; | 1379 | sb->s_flags |= MS_NOATIME; |
| 1361 | /* this is needed to support O_LARGEFILE */ | 1380 | /* this is needed to support O_LARGEFILE */ |
| @@ -1368,7 +1387,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1368 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1387 | osb->s_sectsize_bits = blksize_bits(sector_size); |
| 1369 | BUG_ON(!osb->s_sectsize_bits); | 1388 | BUG_ON(!osb->s_sectsize_bits); |
| 1370 | 1389 | ||
| 1371 | init_waitqueue_head(&osb->recovery_event); | ||
| 1372 | spin_lock_init(&osb->dc_task_lock); | 1390 | spin_lock_init(&osb->dc_task_lock); |
| 1373 | init_waitqueue_head(&osb->dc_event); | 1391 | init_waitqueue_head(&osb->dc_event); |
| 1374 | osb->dc_work_sequence = 0; | 1392 | osb->dc_work_sequence = 0; |
| @@ -1376,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1376 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1394 | INIT_LIST_HEAD(&osb->blocked_lock_list); |
| 1377 | osb->blocked_lock_count = 0; | 1395 | osb->blocked_lock_count = 0; |
| 1378 | spin_lock_init(&osb->osb_lock); | 1396 | spin_lock_init(&osb->osb_lock); |
| 1397 | ocfs2_init_inode_steal_slot(osb); | ||
| 1379 | 1398 | ||
| 1380 | atomic_set(&osb->alloc_stats.moves, 0); | 1399 | atomic_set(&osb->alloc_stats.moves, 0); |
| 1381 | atomic_set(&osb->alloc_stats.local_data, 0); | 1400 | atomic_set(&osb->alloc_stats.local_data, 0); |
| @@ -1388,24 +1407,23 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1388 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 1407 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
| 1389 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1408 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
| 1390 | 1409 | ||
| 1391 | mutex_init(&osb->recovery_lock); | 1410 | status = ocfs2_recovery_init(osb); |
| 1392 | 1411 | if (status) { | |
| 1393 | osb->disable_recovery = 0; | 1412 | mlog(ML_ERROR, "Unable to initialize recovery state\n"); |
| 1394 | osb->recovery_thread_task = NULL; | 1413 | mlog_errno(status); |
| 1414 | goto bail; | ||
| 1415 | } | ||
| 1395 | 1416 | ||
| 1396 | init_waitqueue_head(&osb->checkpoint_event); | 1417 | init_waitqueue_head(&osb->checkpoint_event); |
| 1397 | atomic_set(&osb->needs_checkpoint, 0); | 1418 | atomic_set(&osb->needs_checkpoint, 0); |
| 1398 | 1419 | ||
| 1399 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 1420 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
| 1400 | 1421 | ||
| 1401 | osb->node_num = O2NM_INVALID_NODE_NUM; | ||
| 1402 | osb->slot_num = OCFS2_INVALID_SLOT; | 1422 | osb->slot_num = OCFS2_INVALID_SLOT; |
| 1403 | 1423 | ||
| 1404 | osb->local_alloc_state = OCFS2_LA_UNUSED; | 1424 | osb->local_alloc_state = OCFS2_LA_UNUSED; |
| 1405 | osb->local_alloc_bh = NULL; | 1425 | osb->local_alloc_bh = NULL; |
| 1406 | 1426 | ||
| 1407 | ocfs2_setup_hb_callbacks(osb); | ||
| 1408 | |||
| 1409 | init_waitqueue_head(&osb->osb_mount_event); | 1427 | init_waitqueue_head(&osb->osb_mount_event); |
| 1410 | 1428 | ||
| 1411 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); | 1429 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); |
| @@ -1455,6 +1473,25 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1455 | goto bail; | 1473 | goto bail; |
| 1456 | } | 1474 | } |
| 1457 | 1475 | ||
| 1476 | if (ocfs2_userspace_stack(osb)) { | ||
| 1477 | memcpy(osb->osb_cluster_stack, | ||
| 1478 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | ||
| 1479 | OCFS2_STACK_LABEL_LEN); | ||
| 1480 | osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | ||
| 1481 | if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { | ||
| 1482 | mlog(ML_ERROR, | ||
| 1483 | "couldn't mount because of an invalid " | ||
| 1484 | "cluster stack label (%s) \n", | ||
| 1485 | osb->osb_cluster_stack); | ||
| 1486 | status = -EINVAL; | ||
| 1487 | goto bail; | ||
| 1488 | } | ||
| 1489 | } else { | ||
| 1490 | /* The empty string is identical with classic tools that | ||
| 1491 | * don't know about s_cluster_info. */ | ||
| 1492 | osb->osb_cluster_stack[0] = '\0'; | ||
| 1493 | } | ||
| 1494 | |||
| 1458 | get_random_bytes(&osb->s_next_generation, sizeof(u32)); | 1495 | get_random_bytes(&osb->s_next_generation, sizeof(u32)); |
| 1459 | 1496 | ||
| 1460 | /* FIXME | 1497 | /* FIXME |
| @@ -1724,8 +1761,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
| 1724 | 1761 | ||
| 1725 | /* This function assumes that the caller has the main osb resource */ | 1762 | /* This function assumes that the caller has the main osb resource */ |
| 1726 | 1763 | ||
| 1727 | if (osb->slot_info) | 1764 | ocfs2_free_slot_info(osb); |
| 1728 | ocfs2_free_slot_info(osb->slot_info); | ||
| 1729 | 1765 | ||
| 1730 | kfree(osb->osb_orphan_wipes); | 1766 | kfree(osb->osb_orphan_wipes); |
| 1731 | /* FIXME | 1767 | /* FIXME |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 5f66c4466151..817f5966edca 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
| @@ -87,7 +87,14 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char | |||
| 87 | 87 | ||
| 88 | void sysfs_remove_link(struct kobject * kobj, const char * name) | 88 | void sysfs_remove_link(struct kobject * kobj, const char * name) |
| 89 | { | 89 | { |
| 90 | sysfs_hash_and_remove(kobj->sd, name); | 90 | struct sysfs_dirent *parent_sd = NULL; |
| 91 | |||
| 92 | if (!kobj) | ||
| 93 | parent_sd = &sysfs_root; | ||
| 94 | else | ||
| 95 | parent_sd = kobj->sd; | ||
| 96 | |||
| 97 | sysfs_hash_and_remove(parent_sd, name); | ||
| 91 | } | 98 | } |
| 92 | 99 | ||
| 93 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, | 100 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, |
