diff options
Diffstat (limited to 'fs/ocfs2/cluster/heartbeat.c')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 787 |
1 files changed, 716 insertions, 71 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 41d5f1f92d56..b108e863d8f6 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -62,10 +62,53 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | |||
62 | static LIST_HEAD(o2hb_node_events); | 62 | static LIST_HEAD(o2hb_node_events); |
63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); | 63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); |
64 | 64 | ||
65 | /* | ||
66 | * In global heartbeat, we maintain a series of region bitmaps. | ||
67 | * - o2hb_region_bitmap allows us to limit the region number to max region. | ||
68 | * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). | ||
69 | * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes | ||
70 | * heartbeat on it. | ||
71 | * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts. | ||
72 | */ | ||
73 | static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
74 | static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
75 | static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
76 | static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
77 | |||
78 | #define O2HB_DB_TYPE_LIVENODES 0 | ||
79 | #define O2HB_DB_TYPE_LIVEREGIONS 1 | ||
80 | #define O2HB_DB_TYPE_QUORUMREGIONS 2 | ||
81 | #define O2HB_DB_TYPE_FAILEDREGIONS 3 | ||
82 | #define O2HB_DB_TYPE_REGION_LIVENODES 4 | ||
83 | #define O2HB_DB_TYPE_REGION_NUMBER 5 | ||
84 | #define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 | ||
85 | #define O2HB_DB_TYPE_REGION_PINNED 7 | ||
86 | struct o2hb_debug_buf { | ||
87 | int db_type; | ||
88 | int db_size; | ||
89 | int db_len; | ||
90 | void *db_data; | ||
91 | }; | ||
92 | |||
93 | static struct o2hb_debug_buf *o2hb_db_livenodes; | ||
94 | static struct o2hb_debug_buf *o2hb_db_liveregions; | ||
95 | static struct o2hb_debug_buf *o2hb_db_quorumregions; | ||
96 | static struct o2hb_debug_buf *o2hb_db_failedregions; | ||
97 | |||
65 | #define O2HB_DEBUG_DIR "o2hb" | 98 | #define O2HB_DEBUG_DIR "o2hb" |
66 | #define O2HB_DEBUG_LIVENODES "livenodes" | 99 | #define O2HB_DEBUG_LIVENODES "livenodes" |
100 | #define O2HB_DEBUG_LIVEREGIONS "live_regions" | ||
101 | #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" | ||
102 | #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" | ||
103 | #define O2HB_DEBUG_REGION_NUMBER "num" | ||
104 | #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" | ||
105 | #define O2HB_DEBUG_REGION_PINNED "pinned" | ||
106 | |||
67 | static struct dentry *o2hb_debug_dir; | 107 | static struct dentry *o2hb_debug_dir; |
68 | static struct dentry *o2hb_debug_livenodes; | 108 | static struct dentry *o2hb_debug_livenodes; |
109 | static struct dentry *o2hb_debug_liveregions; | ||
110 | static struct dentry *o2hb_debug_quorumregions; | ||
111 | static struct dentry *o2hb_debug_failedregions; | ||
69 | 112 | ||
70 | static LIST_HEAD(o2hb_all_regions); | 113 | static LIST_HEAD(o2hb_all_regions); |
71 | 114 | ||
@@ -77,7 +120,46 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); | |||
77 | 120 | ||
78 | #define O2HB_DEFAULT_BLOCK_BITS 9 | 121 | #define O2HB_DEFAULT_BLOCK_BITS 9 |
79 | 122 | ||
123 | enum o2hb_heartbeat_modes { | ||
124 | O2HB_HEARTBEAT_LOCAL = 0, | ||
125 | O2HB_HEARTBEAT_GLOBAL, | ||
126 | O2HB_HEARTBEAT_NUM_MODES, | ||
127 | }; | ||
128 | |||
129 | char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { | ||
130 | "local", /* O2HB_HEARTBEAT_LOCAL */ | ||
131 | "global", /* O2HB_HEARTBEAT_GLOBAL */ | ||
132 | }; | ||
133 | |||
80 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; | 134 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; |
135 | unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; | ||
136 | |||
137 | /* | ||
138 | * o2hb_dependent_users tracks the number of registered callbacks that depend | ||
139 | * on heartbeat. o2net and o2dlm are two entities that register this callback. | ||
140 | * However only o2dlm depends on the heartbeat. It does not want the heartbeat | ||
141 | * to stop while a dlm domain is still active. | ||
142 | */ | ||
143 | unsigned int o2hb_dependent_users; | ||
144 | |||
145 | /* | ||
146 | * In global heartbeat mode, all regions are pinned if there are one or more | ||
147 | * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All | ||
148 | * regions are unpinned if the region count exceeds the cut off or the number | ||
149 | * of dependent users falls to zero. | ||
150 | */ | ||
151 | #define O2HB_PIN_CUT_OFF 3 | ||
152 | |||
153 | /* | ||
154 | * In local heartbeat mode, we assume the dlm domain name to be the same as | ||
155 | * region uuid. This is true for domains created for the file system but not | ||
156 | * necessarily true for userdlm domains. This is a known limitation. | ||
157 | * | ||
158 | * In global heartbeat mode, we pin/unpin all o2hb regions. This solution | ||
159 | * works for both file system and userdlm domains. | ||
160 | */ | ||
161 | static int o2hb_region_pin(const char *region_uuid); | ||
162 | static void o2hb_region_unpin(const char *region_uuid); | ||
81 | 163 | ||
82 | /* Only sets a new threshold if there are no active regions. | 164 | /* Only sets a new threshold if there are no active regions. |
83 | * | 165 | * |
@@ -94,6 +176,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
94 | } | 176 | } |
95 | } | 177 | } |
96 | 178 | ||
179 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | ||
180 | { | ||
181 | int ret = -1; | ||
182 | |||
183 | if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) { | ||
184 | spin_lock(&o2hb_live_lock); | ||
185 | if (list_empty(&o2hb_all_regions)) { | ||
186 | o2hb_heartbeat_mode = hb_mode; | ||
187 | ret = 0; | ||
188 | } | ||
189 | spin_unlock(&o2hb_live_lock); | ||
190 | } | ||
191 | |||
192 | return ret; | ||
193 | } | ||
194 | |||
97 | struct o2hb_node_event { | 195 | struct o2hb_node_event { |
98 | struct list_head hn_item; | 196 | struct list_head hn_item; |
99 | enum o2hb_callback_type hn_event_type; | 197 | enum o2hb_callback_type hn_event_type; |
@@ -117,7 +215,9 @@ struct o2hb_region { | |||
117 | struct config_item hr_item; | 215 | struct config_item hr_item; |
118 | 216 | ||
119 | struct list_head hr_all_item; | 217 | struct list_head hr_all_item; |
120 | unsigned hr_unclean_stop:1; | 218 | unsigned hr_unclean_stop:1, |
219 | hr_item_pinned:1, | ||
220 | hr_item_dropped:1; | ||
121 | 221 | ||
122 | /* protected by the hr_callback_sem */ | 222 | /* protected by the hr_callback_sem */ |
123 | struct task_struct *hr_task; | 223 | struct task_struct *hr_task; |
@@ -135,6 +235,20 @@ struct o2hb_region { | |||
135 | struct block_device *hr_bdev; | 235 | struct block_device *hr_bdev; |
136 | struct o2hb_disk_slot *hr_slots; | 236 | struct o2hb_disk_slot *hr_slots; |
137 | 237 | ||
238 | /* live node map of this region */ | ||
239 | unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
240 | unsigned int hr_region_num; | ||
241 | |||
242 | struct dentry *hr_debug_dir; | ||
243 | struct dentry *hr_debug_livenodes; | ||
244 | struct dentry *hr_debug_regnum; | ||
245 | struct dentry *hr_debug_elapsed_time; | ||
246 | struct dentry *hr_debug_pinned; | ||
247 | struct o2hb_debug_buf *hr_db_livenodes; | ||
248 | struct o2hb_debug_buf *hr_db_regnum; | ||
249 | struct o2hb_debug_buf *hr_db_elapsed_time; | ||
250 | struct o2hb_debug_buf *hr_db_pinned; | ||
251 | |||
138 | /* let the person setting up hb wait for it to return until it | 252 | /* let the person setting up hb wait for it to return until it |
139 | * has reached a 'steady' state. This will be fixed when we have | 253 | * has reached a 'steady' state. This will be fixed when we have |
140 | * a more complete api that doesn't lead to this sort of fragility. */ | 254 | * a more complete api that doesn't lead to this sort of fragility. */ |
@@ -163,8 +277,19 @@ struct o2hb_bio_wait_ctxt { | |||
163 | int wc_error; | 277 | int wc_error; |
164 | }; | 278 | }; |
165 | 279 | ||
280 | static int o2hb_pop_count(void *map, int count) | ||
281 | { | ||
282 | int i = -1, pop = 0; | ||
283 | |||
284 | while ((i = find_next_bit(map, count, i + 1)) < count) | ||
285 | pop++; | ||
286 | return pop; | ||
287 | } | ||
288 | |||
166 | static void o2hb_write_timeout(struct work_struct *work) | 289 | static void o2hb_write_timeout(struct work_struct *work) |
167 | { | 290 | { |
291 | int failed, quorum; | ||
292 | unsigned long flags; | ||
168 | struct o2hb_region *reg = | 293 | struct o2hb_region *reg = |
169 | container_of(work, struct o2hb_region, | 294 | container_of(work, struct o2hb_region, |
170 | hr_write_timeout_work.work); | 295 | hr_write_timeout_work.work); |
@@ -172,6 +297,28 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
172 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " | 297 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " |
173 | "milliseconds\n", reg->hr_dev_name, | 298 | "milliseconds\n", reg->hr_dev_name, |
174 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 299 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
300 | |||
301 | if (o2hb_global_heartbeat_active()) { | ||
302 | spin_lock_irqsave(&o2hb_live_lock, flags); | ||
303 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
304 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
305 | failed = o2hb_pop_count(&o2hb_failed_region_bitmap, | ||
306 | O2NM_MAX_REGIONS); | ||
307 | quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
308 | O2NM_MAX_REGIONS); | ||
309 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | ||
310 | |||
311 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", | ||
312 | quorum, failed); | ||
313 | |||
314 | /* | ||
315 | * Fence if the number of failed regions >= half the number | ||
316 | * of quorum regions | ||
317 | */ | ||
318 | if ((failed << 1) < quorum) | ||
319 | return; | ||
320 | } | ||
321 | |||
175 | o2quo_disk_timeout(); | 322 | o2quo_disk_timeout(); |
176 | } | 323 | } |
177 | 324 | ||
@@ -180,6 +327,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) | |||
180 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", | 327 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
181 | O2HB_MAX_WRITE_TIMEOUT_MS); | 328 | O2HB_MAX_WRITE_TIMEOUT_MS); |
182 | 329 | ||
330 | if (o2hb_global_heartbeat_active()) { | ||
331 | spin_lock(&o2hb_live_lock); | ||
332 | clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
333 | spin_unlock(&o2hb_live_lock); | ||
334 | } | ||
183 | cancel_delayed_work(®->hr_write_timeout_work); | 335 | cancel_delayed_work(®->hr_write_timeout_work); |
184 | reg->hr_last_timeout_start = jiffies; | 336 | reg->hr_last_timeout_start = jiffies; |
185 | schedule_delayed_work(®->hr_write_timeout_work, | 337 | schedule_delayed_work(®->hr_write_timeout_work, |
@@ -188,8 +340,7 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) | |||
188 | 340 | ||
189 | static void o2hb_disarm_write_timeout(struct o2hb_region *reg) | 341 | static void o2hb_disarm_write_timeout(struct o2hb_region *reg) |
190 | { | 342 | { |
191 | cancel_delayed_work(®->hr_write_timeout_work); | 343 | cancel_delayed_work_sync(®->hr_write_timeout_work); |
192 | flush_scheduled_work(); | ||
193 | } | 344 | } |
194 | 345 | ||
195 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) | 346 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) |
@@ -513,6 +664,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event, | |||
513 | { | 664 | { |
514 | assert_spin_locked(&o2hb_live_lock); | 665 | assert_spin_locked(&o2hb_live_lock); |
515 | 666 | ||
667 | BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB)); | ||
668 | |||
516 | event->hn_event_type = type; | 669 | event->hn_event_type = type; |
517 | event->hn_node = node; | 670 | event->hn_node = node; |
518 | event->hn_node_num = node_num; | 671 | event->hn_node_num = node_num; |
@@ -554,6 +707,43 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
554 | o2nm_node_put(node); | 707 | o2nm_node_put(node); |
555 | } | 708 | } |
556 | 709 | ||
710 | static void o2hb_set_quorum_device(struct o2hb_region *reg, | ||
711 | struct o2hb_disk_slot *slot) | ||
712 | { | ||
713 | assert_spin_locked(&o2hb_live_lock); | ||
714 | |||
715 | if (!o2hb_global_heartbeat_active()) | ||
716 | return; | ||
717 | |||
718 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
719 | return; | ||
720 | |||
721 | /* | ||
722 | * A region can be added to the quorum only when it sees all | ||
723 | * live nodes heartbeat on it. In other words, the region has been | ||
724 | * added to all nodes. | ||
725 | */ | ||
726 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, | ||
727 | sizeof(o2hb_live_node_bitmap))) | ||
728 | return; | ||
729 | |||
730 | if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) | ||
731 | return; | ||
732 | |||
733 | printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", | ||
734 | config_item_name(®->hr_item)); | ||
735 | |||
736 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
737 | |||
738 | /* | ||
739 | * If global heartbeat active, unpin all regions if the | ||
740 | * region count > CUT_OFF | ||
741 | */ | ||
742 | if (o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
743 | O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) | ||
744 | o2hb_region_unpin(NULL); | ||
745 | } | ||
746 | |||
557 | static int o2hb_check_slot(struct o2hb_region *reg, | 747 | static int o2hb_check_slot(struct o2hb_region *reg, |
558 | struct o2hb_disk_slot *slot) | 748 | struct o2hb_disk_slot *slot) |
559 | { | 749 | { |
@@ -565,14 +755,22 @@ static int o2hb_check_slot(struct o2hb_region *reg, | |||
565 | u64 cputime; | 755 | u64 cputime; |
566 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; | 756 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; |
567 | unsigned int slot_dead_ms; | 757 | unsigned int slot_dead_ms; |
758 | int tmp; | ||
568 | 759 | ||
569 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); | 760 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); |
570 | 761 | ||
571 | /* Is this correct? Do we assume that the node doesn't exist | 762 | /* |
572 | * if we're not configured for him? */ | 763 | * If a node is no longer configured but is still in the livemap, we |
764 | * may need to clear that bit from the livemap. | ||
765 | */ | ||
573 | node = o2nm_get_node_by_num(slot->ds_node_num); | 766 | node = o2nm_get_node_by_num(slot->ds_node_num); |
574 | if (!node) | 767 | if (!node) { |
575 | return 0; | 768 | spin_lock(&o2hb_live_lock); |
769 | tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap); | ||
770 | spin_unlock(&o2hb_live_lock); | ||
771 | if (!tmp) | ||
772 | return 0; | ||
773 | } | ||
576 | 774 | ||
577 | if (!o2hb_verify_crc(reg, hb_block)) { | 775 | if (!o2hb_verify_crc(reg, hb_block)) { |
578 | /* all paths from here will drop o2hb_live_lock for | 776 | /* all paths from here will drop o2hb_live_lock for |
@@ -639,8 +837,12 @@ fire_callbacks: | |||
639 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", | 837 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", |
640 | slot->ds_node_num, (long long)slot->ds_last_generation); | 838 | slot->ds_node_num, (long long)slot->ds_last_generation); |
641 | 839 | ||
840 | set_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
841 | |||
642 | /* first on the list generates a callback */ | 842 | /* first on the list generates a callback */ |
643 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 843 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
844 | mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes " | ||
845 | "bitmap\n", slot->ds_node_num); | ||
644 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 846 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
645 | 847 | ||
646 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, | 848 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, |
@@ -684,13 +886,18 @@ fire_callbacks: | |||
684 | mlog(ML_HEARTBEAT, "Node %d left my region\n", | 886 | mlog(ML_HEARTBEAT, "Node %d left my region\n", |
685 | slot->ds_node_num); | 887 | slot->ds_node_num); |
686 | 888 | ||
889 | clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
890 | |||
687 | /* last off the live_slot generates a callback */ | 891 | /* last off the live_slot generates a callback */ |
688 | list_del_init(&slot->ds_live_item); | 892 | list_del_init(&slot->ds_live_item); |
689 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 893 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
894 | mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live " | ||
895 | "nodes bitmap\n", slot->ds_node_num); | ||
690 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 896 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
691 | 897 | ||
692 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, | 898 | /* node can be null */ |
693 | slot->ds_node_num); | 899 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, |
900 | node, slot->ds_node_num); | ||
694 | 901 | ||
695 | changed = 1; | 902 | changed = 1; |
696 | } | 903 | } |
@@ -706,11 +913,14 @@ fire_callbacks: | |||
706 | slot->ds_equal_samples = 0; | 913 | slot->ds_equal_samples = 0; |
707 | } | 914 | } |
708 | out: | 915 | out: |
916 | o2hb_set_quorum_device(reg, slot); | ||
917 | |||
709 | spin_unlock(&o2hb_live_lock); | 918 | spin_unlock(&o2hb_live_lock); |
710 | 919 | ||
711 | o2hb_run_event_list(&event); | 920 | o2hb_run_event_list(&event); |
712 | 921 | ||
713 | o2nm_node_put(node); | 922 | if (node) |
923 | o2nm_node_put(node); | ||
714 | return changed; | 924 | return changed; |
715 | } | 925 | } |
716 | 926 | ||
@@ -737,6 +947,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
737 | { | 947 | { |
738 | int i, ret, highest_node, change = 0; | 948 | int i, ret, highest_node, change = 0; |
739 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 949 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
950 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
740 | struct o2hb_bio_wait_ctxt write_wc; | 951 | struct o2hb_bio_wait_ctxt write_wc; |
741 | 952 | ||
742 | ret = o2nm_configured_node_map(configured_nodes, | 953 | ret = o2nm_configured_node_map(configured_nodes, |
@@ -746,6 +957,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
746 | return ret; | 957 | return ret; |
747 | } | 958 | } |
748 | 959 | ||
960 | /* | ||
961 | * If a node is not configured but is in the livemap, we still need | ||
962 | * to read the slot so as to be able to remove it from the livemap. | ||
963 | */ | ||
964 | o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); | ||
965 | i = -1; | ||
966 | while ((i = find_next_bit(live_node_bitmap, | ||
967 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | ||
968 | set_bit(i, configured_nodes); | ||
969 | } | ||
970 | |||
749 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 971 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
750 | if (highest_node >= O2NM_MAX_NODES) { | 972 | if (highest_node >= O2NM_MAX_NODES) { |
751 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 973 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); |
@@ -860,6 +1082,9 @@ static int o2hb_thread(void *data) | |||
860 | 1082 | ||
861 | set_user_nice(current, -20); | 1083 | set_user_nice(current, -20); |
862 | 1084 | ||
1085 | /* Pin node */ | ||
1086 | o2nm_depend_this_node(); | ||
1087 | |||
863 | while (!kthread_should_stop() && !reg->hr_unclean_stop) { | 1088 | while (!kthread_should_stop() && !reg->hr_unclean_stop) { |
864 | /* We track the time spent inside | 1089 | /* We track the time spent inside |
865 | * o2hb_do_disk_heartbeat so that we avoid more than | 1090 | * o2hb_do_disk_heartbeat so that we avoid more than |
@@ -909,6 +1134,9 @@ static int o2hb_thread(void *data) | |||
909 | mlog_errno(ret); | 1134 | mlog_errno(ret); |
910 | } | 1135 | } |
911 | 1136 | ||
1137 | /* Unpin node */ | ||
1138 | o2nm_undepend_this_node(); | ||
1139 | |||
912 | mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); | 1140 | mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); |
913 | 1141 | ||
914 | return 0; | 1142 | return 0; |
@@ -917,21 +1145,65 @@ static int o2hb_thread(void *data) | |||
917 | #ifdef CONFIG_DEBUG_FS | 1145 | #ifdef CONFIG_DEBUG_FS |
918 | static int o2hb_debug_open(struct inode *inode, struct file *file) | 1146 | static int o2hb_debug_open(struct inode *inode, struct file *file) |
919 | { | 1147 | { |
1148 | struct o2hb_debug_buf *db = inode->i_private; | ||
1149 | struct o2hb_region *reg; | ||
920 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 1150 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
921 | char *buf = NULL; | 1151 | char *buf = NULL; |
922 | int i = -1; | 1152 | int i = -1; |
923 | int out = 0; | 1153 | int out = 0; |
924 | 1154 | ||
1155 | /* max_nodes should be the largest bitmap we pass here */ | ||
1156 | BUG_ON(sizeof(map) < db->db_size); | ||
1157 | |||
925 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 1158 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
926 | if (!buf) | 1159 | if (!buf) |
927 | goto bail; | 1160 | goto bail; |
928 | 1161 | ||
929 | o2hb_fill_node_map(map, sizeof(map)); | 1162 | switch (db->db_type) { |
1163 | case O2HB_DB_TYPE_LIVENODES: | ||
1164 | case O2HB_DB_TYPE_LIVEREGIONS: | ||
1165 | case O2HB_DB_TYPE_QUORUMREGIONS: | ||
1166 | case O2HB_DB_TYPE_FAILEDREGIONS: | ||
1167 | spin_lock(&o2hb_live_lock); | ||
1168 | memcpy(map, db->db_data, db->db_size); | ||
1169 | spin_unlock(&o2hb_live_lock); | ||
1170 | break; | ||
930 | 1171 | ||
931 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) | 1172 | case O2HB_DB_TYPE_REGION_LIVENODES: |
1173 | spin_lock(&o2hb_live_lock); | ||
1174 | reg = (struct o2hb_region *)db->db_data; | ||
1175 | memcpy(map, reg->hr_live_node_bitmap, db->db_size); | ||
1176 | spin_unlock(&o2hb_live_lock); | ||
1177 | break; | ||
1178 | |||
1179 | case O2HB_DB_TYPE_REGION_NUMBER: | ||
1180 | reg = (struct o2hb_region *)db->db_data; | ||
1181 | out += snprintf(buf + out, PAGE_SIZE - out, "%d\n", | ||
1182 | reg->hr_region_num); | ||
1183 | goto done; | ||
1184 | |||
1185 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: | ||
1186 | reg = (struct o2hb_region *)db->db_data; | ||
1187 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | ||
1188 | jiffies_to_msecs(jiffies - | ||
1189 | reg->hr_last_timeout_start)); | ||
1190 | goto done; | ||
1191 | |||
1192 | case O2HB_DB_TYPE_REGION_PINNED: | ||
1193 | reg = (struct o2hb_region *)db->db_data; | ||
1194 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | ||
1195 | !!reg->hr_item_pinned); | ||
1196 | goto done; | ||
1197 | |||
1198 | default: | ||
1199 | goto done; | ||
1200 | } | ||
1201 | |||
1202 | while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len) | ||
932 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); | 1203 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); |
933 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); | 1204 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); |
934 | 1205 | ||
1206 | done: | ||
935 | i_size_write(inode, out); | 1207 | i_size_write(inode, out); |
936 | 1208 | ||
937 | file->private_data = buf; | 1209 | file->private_data = buf; |
@@ -978,10 +1250,104 @@ static const struct file_operations o2hb_debug_fops = { | |||
978 | 1250 | ||
979 | void o2hb_exit(void) | 1251 | void o2hb_exit(void) |
980 | { | 1252 | { |
981 | if (o2hb_debug_livenodes) | 1253 | kfree(o2hb_db_livenodes); |
982 | debugfs_remove(o2hb_debug_livenodes); | 1254 | kfree(o2hb_db_liveregions); |
983 | if (o2hb_debug_dir) | 1255 | kfree(o2hb_db_quorumregions); |
984 | debugfs_remove(o2hb_debug_dir); | 1256 | kfree(o2hb_db_failedregions); |
1257 | debugfs_remove(o2hb_debug_failedregions); | ||
1258 | debugfs_remove(o2hb_debug_quorumregions); | ||
1259 | debugfs_remove(o2hb_debug_liveregions); | ||
1260 | debugfs_remove(o2hb_debug_livenodes); | ||
1261 | debugfs_remove(o2hb_debug_dir); | ||
1262 | } | ||
1263 | |||
1264 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, | ||
1265 | struct o2hb_debug_buf **db, int db_len, | ||
1266 | int type, int size, int len, void *data) | ||
1267 | { | ||
1268 | *db = kmalloc(db_len, GFP_KERNEL); | ||
1269 | if (!*db) | ||
1270 | return NULL; | ||
1271 | |||
1272 | (*db)->db_type = type; | ||
1273 | (*db)->db_size = size; | ||
1274 | (*db)->db_len = len; | ||
1275 | (*db)->db_data = data; | ||
1276 | |||
1277 | return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, | ||
1278 | &o2hb_debug_fops); | ||
1279 | } | ||
1280 | |||
1281 | static int o2hb_debug_init(void) | ||
1282 | { | ||
1283 | int ret = -ENOMEM; | ||
1284 | |||
1285 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | ||
1286 | if (!o2hb_debug_dir) { | ||
1287 | mlog_errno(ret); | ||
1288 | goto bail; | ||
1289 | } | ||
1290 | |||
1291 | o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1292 | o2hb_debug_dir, | ||
1293 | &o2hb_db_livenodes, | ||
1294 | sizeof(*o2hb_db_livenodes), | ||
1295 | O2HB_DB_TYPE_LIVENODES, | ||
1296 | sizeof(o2hb_live_node_bitmap), | ||
1297 | O2NM_MAX_NODES, | ||
1298 | o2hb_live_node_bitmap); | ||
1299 | if (!o2hb_debug_livenodes) { | ||
1300 | mlog_errno(ret); | ||
1301 | goto bail; | ||
1302 | } | ||
1303 | |||
1304 | o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, | ||
1305 | o2hb_debug_dir, | ||
1306 | &o2hb_db_liveregions, | ||
1307 | sizeof(*o2hb_db_liveregions), | ||
1308 | O2HB_DB_TYPE_LIVEREGIONS, | ||
1309 | sizeof(o2hb_live_region_bitmap), | ||
1310 | O2NM_MAX_REGIONS, | ||
1311 | o2hb_live_region_bitmap); | ||
1312 | if (!o2hb_debug_liveregions) { | ||
1313 | mlog_errno(ret); | ||
1314 | goto bail; | ||
1315 | } | ||
1316 | |||
1317 | o2hb_debug_quorumregions = | ||
1318 | o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, | ||
1319 | o2hb_debug_dir, | ||
1320 | &o2hb_db_quorumregions, | ||
1321 | sizeof(*o2hb_db_quorumregions), | ||
1322 | O2HB_DB_TYPE_QUORUMREGIONS, | ||
1323 | sizeof(o2hb_quorum_region_bitmap), | ||
1324 | O2NM_MAX_REGIONS, | ||
1325 | o2hb_quorum_region_bitmap); | ||
1326 | if (!o2hb_debug_quorumregions) { | ||
1327 | mlog_errno(ret); | ||
1328 | goto bail; | ||
1329 | } | ||
1330 | |||
1331 | o2hb_debug_failedregions = | ||
1332 | o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, | ||
1333 | o2hb_debug_dir, | ||
1334 | &o2hb_db_failedregions, | ||
1335 | sizeof(*o2hb_db_failedregions), | ||
1336 | O2HB_DB_TYPE_FAILEDREGIONS, | ||
1337 | sizeof(o2hb_failed_region_bitmap), | ||
1338 | O2NM_MAX_REGIONS, | ||
1339 | o2hb_failed_region_bitmap); | ||
1340 | if (!o2hb_debug_failedregions) { | ||
1341 | mlog_errno(ret); | ||
1342 | goto bail; | ||
1343 | } | ||
1344 | |||
1345 | ret = 0; | ||
1346 | bail: | ||
1347 | if (ret) | ||
1348 | o2hb_exit(); | ||
1349 | |||
1350 | return ret; | ||
985 | } | 1351 | } |
986 | 1352 | ||
987 | int o2hb_init(void) | 1353 | int o2hb_init(void) |
@@ -997,24 +1363,14 @@ int o2hb_init(void) | |||
997 | INIT_LIST_HEAD(&o2hb_node_events); | 1363 | INIT_LIST_HEAD(&o2hb_node_events); |
998 | 1364 | ||
999 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); | 1365 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); |
1366 | memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); | ||
1367 | memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); | ||
1368 | memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); | ||
1369 | memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); | ||
1000 | 1370 | ||
1001 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1371 | o2hb_dependent_users = 0; |
1002 | if (!o2hb_debug_dir) { | ||
1003 | mlog_errno(-ENOMEM); | ||
1004 | return -ENOMEM; | ||
1005 | } | ||
1006 | 1372 | ||
1007 | o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, | 1373 | return o2hb_debug_init(); |
1008 | S_IFREG|S_IRUSR, | ||
1009 | o2hb_debug_dir, NULL, | ||
1010 | &o2hb_debug_fops); | ||
1011 | if (!o2hb_debug_livenodes) { | ||
1012 | mlog_errno(-ENOMEM); | ||
1013 | debugfs_remove(o2hb_debug_dir); | ||
1014 | return -ENOMEM; | ||
1015 | } | ||
1016 | |||
1017 | return 0; | ||
1018 | } | 1374 | } |
1019 | 1375 | ||
1020 | /* if we're already in a callback then we're already serialized by the sem */ | 1376 | /* if we're already in a callback then we're already serialized by the sem */ |
@@ -1078,6 +1434,14 @@ static void o2hb_region_release(struct config_item *item) | |||
1078 | if (reg->hr_slots) | 1434 | if (reg->hr_slots) |
1079 | kfree(reg->hr_slots); | 1435 | kfree(reg->hr_slots); |
1080 | 1436 | ||
1437 | kfree(reg->hr_db_regnum); | ||
1438 | kfree(reg->hr_db_livenodes); | ||
1439 | debugfs_remove(reg->hr_debug_livenodes); | ||
1440 | debugfs_remove(reg->hr_debug_regnum); | ||
1441 | debugfs_remove(reg->hr_debug_elapsed_time); | ||
1442 | debugfs_remove(reg->hr_debug_pinned); | ||
1443 | debugfs_remove(reg->hr_debug_dir); | ||
1444 | |||
1081 | spin_lock(&o2hb_live_lock); | 1445 | spin_lock(&o2hb_live_lock); |
1082 | list_del(®->hr_all_item); | 1446 | list_del(®->hr_all_item); |
1083 | spin_unlock(&o2hb_live_lock); | 1447 | spin_unlock(&o2hb_live_lock); |
@@ -1365,7 +1729,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1365 | goto out; | 1729 | goto out; |
1366 | 1730 | ||
1367 | reg->hr_bdev = I_BDEV(filp->f_mapping->host); | 1731 | reg->hr_bdev = I_BDEV(filp->f_mapping->host); |
1368 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); | 1732 | ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); |
1369 | if (ret) { | 1733 | if (ret) { |
1370 | reg->hr_bdev = NULL; | 1734 | reg->hr_bdev = NULL; |
1371 | goto out; | 1735 | goto out; |
@@ -1441,6 +1805,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1441 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1805 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
1442 | spin_lock(&o2hb_live_lock); | 1806 | spin_lock(&o2hb_live_lock); |
1443 | hb_task = reg->hr_task; | 1807 | hb_task = reg->hr_task; |
1808 | if (o2hb_global_heartbeat_active()) | ||
1809 | set_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
1444 | spin_unlock(&o2hb_live_lock); | 1810 | spin_unlock(&o2hb_live_lock); |
1445 | 1811 | ||
1446 | if (hb_task) | 1812 | if (hb_task) |
@@ -1448,6 +1814,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1448 | else | 1814 | else |
1449 | ret = -EIO; | 1815 | ret = -EIO; |
1450 | 1816 | ||
1817 | if (hb_task && o2hb_global_heartbeat_active()) | ||
1818 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", | ||
1819 | config_item_name(®->hr_item)); | ||
1820 | |||
1451 | out: | 1821 | out: |
1452 | if (filp) | 1822 | if (filp) |
1453 | fput(filp); | 1823 | fput(filp); |
@@ -1586,22 +1956,113 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group | |||
1586 | : NULL; | 1956 | : NULL; |
1587 | } | 1957 | } |
1588 | 1958 | ||
1959 | static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | ||
1960 | { | ||
1961 | int ret = -ENOMEM; | ||
1962 | |||
1963 | reg->hr_debug_dir = | ||
1964 | debugfs_create_dir(config_item_name(®->hr_item), dir); | ||
1965 | if (!reg->hr_debug_dir) { | ||
1966 | mlog_errno(ret); | ||
1967 | goto bail; | ||
1968 | } | ||
1969 | |||
1970 | reg->hr_debug_livenodes = | ||
1971 | o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1972 | reg->hr_debug_dir, | ||
1973 | &(reg->hr_db_livenodes), | ||
1974 | sizeof(*(reg->hr_db_livenodes)), | ||
1975 | O2HB_DB_TYPE_REGION_LIVENODES, | ||
1976 | sizeof(reg->hr_live_node_bitmap), | ||
1977 | O2NM_MAX_NODES, reg); | ||
1978 | if (!reg->hr_debug_livenodes) { | ||
1979 | mlog_errno(ret); | ||
1980 | goto bail; | ||
1981 | } | ||
1982 | |||
1983 | reg->hr_debug_regnum = | ||
1984 | o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, | ||
1985 | reg->hr_debug_dir, | ||
1986 | &(reg->hr_db_regnum), | ||
1987 | sizeof(*(reg->hr_db_regnum)), | ||
1988 | O2HB_DB_TYPE_REGION_NUMBER, | ||
1989 | 0, O2NM_MAX_NODES, reg); | ||
1990 | if (!reg->hr_debug_regnum) { | ||
1991 | mlog_errno(ret); | ||
1992 | goto bail; | ||
1993 | } | ||
1994 | |||
1995 | reg->hr_debug_elapsed_time = | ||
1996 | o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, | ||
1997 | reg->hr_debug_dir, | ||
1998 | &(reg->hr_db_elapsed_time), | ||
1999 | sizeof(*(reg->hr_db_elapsed_time)), | ||
2000 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | ||
2001 | 0, 0, reg); | ||
2002 | if (!reg->hr_debug_elapsed_time) { | ||
2003 | mlog_errno(ret); | ||
2004 | goto bail; | ||
2005 | } | ||
2006 | |||
2007 | reg->hr_debug_pinned = | ||
2008 | o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, | ||
2009 | reg->hr_debug_dir, | ||
2010 | &(reg->hr_db_pinned), | ||
2011 | sizeof(*(reg->hr_db_pinned)), | ||
2012 | O2HB_DB_TYPE_REGION_PINNED, | ||
2013 | 0, 0, reg); | ||
2014 | if (!reg->hr_debug_pinned) { | ||
2015 | mlog_errno(ret); | ||
2016 | goto bail; | ||
2017 | } | ||
2018 | |||
2019 | ret = 0; | ||
2020 | bail: | ||
2021 | return ret; | ||
2022 | } | ||
2023 | |||
1589 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, | 2024 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, |
1590 | const char *name) | 2025 | const char *name) |
1591 | { | 2026 | { |
1592 | struct o2hb_region *reg = NULL; | 2027 | struct o2hb_region *reg = NULL; |
2028 | int ret; | ||
1593 | 2029 | ||
1594 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); | 2030 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); |
1595 | if (reg == NULL) | 2031 | if (reg == NULL) |
1596 | return ERR_PTR(-ENOMEM); | 2032 | return ERR_PTR(-ENOMEM); |
1597 | 2033 | ||
1598 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | 2034 | if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) { |
2035 | ret = -ENAMETOOLONG; | ||
2036 | goto free; | ||
2037 | } | ||
1599 | 2038 | ||
1600 | spin_lock(&o2hb_live_lock); | 2039 | spin_lock(&o2hb_live_lock); |
2040 | reg->hr_region_num = 0; | ||
2041 | if (o2hb_global_heartbeat_active()) { | ||
2042 | reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap, | ||
2043 | O2NM_MAX_REGIONS); | ||
2044 | if (reg->hr_region_num >= O2NM_MAX_REGIONS) { | ||
2045 | spin_unlock(&o2hb_live_lock); | ||
2046 | ret = -EFBIG; | ||
2047 | goto free; | ||
2048 | } | ||
2049 | set_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
2050 | } | ||
1601 | list_add_tail(®->hr_all_item, &o2hb_all_regions); | 2051 | list_add_tail(®->hr_all_item, &o2hb_all_regions); |
1602 | spin_unlock(&o2hb_live_lock); | 2052 | spin_unlock(&o2hb_live_lock); |
1603 | 2053 | ||
2054 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | ||
2055 | |||
2056 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); | ||
2057 | if (ret) { | ||
2058 | config_item_put(®->hr_item); | ||
2059 | goto free; | ||
2060 | } | ||
2061 | |||
1604 | return ®->hr_item; | 2062 | return ®->hr_item; |
2063 | free: | ||
2064 | kfree(reg); | ||
2065 | return ERR_PTR(ret); | ||
1605 | } | 2066 | } |
1606 | 2067 | ||
1607 | static void o2hb_heartbeat_group_drop_item(struct config_group *group, | 2068 | static void o2hb_heartbeat_group_drop_item(struct config_group *group, |
@@ -1609,11 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1609 | { | 2070 | { |
1610 | struct task_struct *hb_task; | 2071 | struct task_struct *hb_task; |
1611 | struct o2hb_region *reg = to_o2hb_region(item); | 2072 | struct o2hb_region *reg = to_o2hb_region(item); |
2073 | int quorum_region = 0; | ||
1612 | 2074 | ||
1613 | /* stop the thread when the user removes the region dir */ | 2075 | /* stop the thread when the user removes the region dir */ |
1614 | spin_lock(&o2hb_live_lock); | 2076 | spin_lock(&o2hb_live_lock); |
2077 | if (o2hb_global_heartbeat_active()) { | ||
2078 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
2079 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
2080 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
2081 | quorum_region = 1; | ||
2082 | clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
2083 | } | ||
1615 | hb_task = reg->hr_task; | 2084 | hb_task = reg->hr_task; |
1616 | reg->hr_task = NULL; | 2085 | reg->hr_task = NULL; |
2086 | reg->hr_item_dropped = 1; | ||
1617 | spin_unlock(&o2hb_live_lock); | 2087 | spin_unlock(&o2hb_live_lock); |
1618 | 2088 | ||
1619 | if (hb_task) | 2089 | if (hb_task) |
@@ -1628,7 +2098,30 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1628 | wake_up(&o2hb_steady_queue); | 2098 | wake_up(&o2hb_steady_queue); |
1629 | } | 2099 | } |
1630 | 2100 | ||
2101 | if (o2hb_global_heartbeat_active()) | ||
2102 | printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", | ||
2103 | config_item_name(®->hr_item)); | ||
2104 | |||
1631 | config_item_put(item); | 2105 | config_item_put(item); |
2106 | |||
2107 | if (!o2hb_global_heartbeat_active() || !quorum_region) | ||
2108 | return; | ||
2109 | |||
2110 | /* | ||
2111 | * If global heartbeat active and there are dependent users, | ||
2112 | * pin all regions if quorum region count <= CUT_OFF | ||
2113 | */ | ||
2114 | spin_lock(&o2hb_live_lock); | ||
2115 | |||
2116 | if (!o2hb_dependent_users) | ||
2117 | goto unlock; | ||
2118 | |||
2119 | if (o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
2120 | O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) | ||
2121 | o2hb_region_pin(NULL); | ||
2122 | |||
2123 | unlock: | ||
2124 | spin_unlock(&o2hb_live_lock); | ||
1632 | } | 2125 | } |
1633 | 2126 | ||
1634 | struct o2hb_heartbeat_group_attribute { | 2127 | struct o2hb_heartbeat_group_attribute { |
@@ -1688,6 +2181,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group | |||
1688 | return count; | 2181 | return count; |
1689 | } | 2182 | } |
1690 | 2183 | ||
2184 | static | ||
2185 | ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, | ||
2186 | char *page) | ||
2187 | { | ||
2188 | return sprintf(page, "%s\n", | ||
2189 | o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); | ||
2190 | } | ||
2191 | |||
2192 | static | ||
2193 | ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | ||
2194 | const char *page, size_t count) | ||
2195 | { | ||
2196 | unsigned int i; | ||
2197 | int ret; | ||
2198 | size_t len; | ||
2199 | |||
2200 | len = (page[count - 1] == '\n') ? count - 1 : count; | ||
2201 | if (!len) | ||
2202 | return -EINVAL; | ||
2203 | |||
2204 | for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) { | ||
2205 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | ||
2206 | continue; | ||
2207 | |||
2208 | ret = o2hb_global_hearbeat_mode_set(i); | ||
2209 | if (!ret) | ||
2210 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | ||
2211 | o2hb_heartbeat_mode_desc[i]); | ||
2212 | return count; | ||
2213 | } | ||
2214 | |||
2215 | return -EINVAL; | ||
2216 | |||
2217 | } | ||
2218 | |||
1691 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { | 2219 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { |
1692 | .attr = { .ca_owner = THIS_MODULE, | 2220 | .attr = { .ca_owner = THIS_MODULE, |
1693 | .ca_name = "dead_threshold", | 2221 | .ca_name = "dead_threshold", |
@@ -1696,8 +2224,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold | |||
1696 | .store = o2hb_heartbeat_group_threshold_store, | 2224 | .store = o2hb_heartbeat_group_threshold_store, |
1697 | }; | 2225 | }; |
1698 | 2226 | ||
2227 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { | ||
2228 | .attr = { .ca_owner = THIS_MODULE, | ||
2229 | .ca_name = "mode", | ||
2230 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
2231 | .show = o2hb_heartbeat_group_mode_show, | ||
2232 | .store = o2hb_heartbeat_group_mode_store, | ||
2233 | }; | ||
2234 | |||
1699 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | 2235 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { |
1700 | &o2hb_heartbeat_group_attr_threshold.attr, | 2236 | &o2hb_heartbeat_group_attr_threshold.attr, |
2237 | &o2hb_heartbeat_group_attr_mode.attr, | ||
1701 | NULL, | 2238 | NULL, |
1702 | }; | 2239 | }; |
1703 | 2240 | ||
@@ -1770,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, | |||
1770 | } | 2307 | } |
1771 | EXPORT_SYMBOL_GPL(o2hb_setup_callback); | 2308 | EXPORT_SYMBOL_GPL(o2hb_setup_callback); |
1772 | 2309 | ||
1773 | static struct o2hb_region *o2hb_find_region(const char *region_uuid) | 2310 | /* |
2311 | * In local heartbeat mode, region_uuid passed matches the dlm domain name. | ||
2312 | * In global heartbeat mode, region_uuid passed is NULL. | ||
2313 | * | ||
2314 | * In local, we only pin the matching region. In global we pin all the active | ||
2315 | * regions. | ||
2316 | */ | ||
2317 | static int o2hb_region_pin(const char *region_uuid) | ||
1774 | { | 2318 | { |
1775 | struct o2hb_region *p, *reg = NULL; | 2319 | int ret = 0, found = 0; |
2320 | struct o2hb_region *reg; | ||
2321 | char *uuid; | ||
1776 | 2322 | ||
1777 | assert_spin_locked(&o2hb_live_lock); | 2323 | assert_spin_locked(&o2hb_live_lock); |
1778 | 2324 | ||
1779 | list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { | 2325 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
1780 | if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { | 2326 | uuid = config_item_name(®->hr_item); |
1781 | reg = p; | 2327 | |
1782 | break; | 2328 | /* local heartbeat */ |
2329 | if (region_uuid) { | ||
2330 | if (strcmp(region_uuid, uuid)) | ||
2331 | continue; | ||
2332 | found = 1; | ||
2333 | } | ||
2334 | |||
2335 | if (reg->hr_item_pinned || reg->hr_item_dropped) | ||
2336 | goto skip_pin; | ||
2337 | |||
2338 | /* Ignore ENOENT only for local hb (userdlm domain) */ | ||
2339 | ret = o2nm_depend_item(®->hr_item); | ||
2340 | if (!ret) { | ||
2341 | mlog(ML_CLUSTER, "Pin region %s\n", uuid); | ||
2342 | reg->hr_item_pinned = 1; | ||
2343 | } else { | ||
2344 | if (ret == -ENOENT && found) | ||
2345 | ret = 0; | ||
2346 | else { | ||
2347 | mlog(ML_ERROR, "Pin region %s fails with %d\n", | ||
2348 | uuid, ret); | ||
2349 | break; | ||
2350 | } | ||
1783 | } | 2351 | } |
2352 | skip_pin: | ||
2353 | if (found) | ||
2354 | break; | ||
1784 | } | 2355 | } |
1785 | 2356 | ||
1786 | return reg; | 2357 | return ret; |
1787 | } | 2358 | } |
1788 | 2359 | ||
1789 | static int o2hb_region_get(const char *region_uuid) | 2360 | /* |
2361 | * In local heartbeat mode, region_uuid passed matches the dlm domain name. | ||
2362 | * In global heartbeat mode, region_uuid passed is NULL. | ||
2363 | * | ||
2364 | * In local, we only unpin the matching region. In global we unpin all the | ||
2365 | * active regions. | ||
2366 | */ | ||
2367 | static void o2hb_region_unpin(const char *region_uuid) | ||
1790 | { | 2368 | { |
1791 | int ret = 0; | ||
1792 | struct o2hb_region *reg; | 2369 | struct o2hb_region *reg; |
2370 | char *uuid; | ||
2371 | int found = 0; | ||
1793 | 2372 | ||
1794 | spin_lock(&o2hb_live_lock); | 2373 | assert_spin_locked(&o2hb_live_lock); |
1795 | 2374 | ||
1796 | reg = o2hb_find_region(region_uuid); | 2375 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
1797 | if (!reg) | 2376 | uuid = config_item_name(®->hr_item); |
1798 | ret = -ENOENT; | 2377 | if (region_uuid) { |
1799 | spin_unlock(&o2hb_live_lock); | 2378 | if (strcmp(region_uuid, uuid)) |
2379 | continue; | ||
2380 | found = 1; | ||
2381 | } | ||
1800 | 2382 | ||
1801 | if (ret) | 2383 | if (reg->hr_item_pinned) { |
1802 | goto out; | 2384 | mlog(ML_CLUSTER, "Unpin region %s\n", uuid); |
2385 | o2nm_undepend_item(®->hr_item); | ||
2386 | reg->hr_item_pinned = 0; | ||
2387 | } | ||
2388 | if (found) | ||
2389 | break; | ||
2390 | } | ||
2391 | } | ||
1803 | 2392 | ||
1804 | ret = o2nm_depend_this_node(); | 2393 | static int o2hb_region_inc_user(const char *region_uuid) |
1805 | if (ret) | 2394 | { |
1806 | goto out; | 2395 | int ret = 0; |
1807 | 2396 | ||
1808 | ret = o2nm_depend_item(®->hr_item); | 2397 | spin_lock(&o2hb_live_lock); |
1809 | if (ret) | ||
1810 | o2nm_undepend_this_node(); | ||
1811 | 2398 | ||
1812 | out: | 2399 | /* local heartbeat */ |
2400 | if (!o2hb_global_heartbeat_active()) { | ||
2401 | ret = o2hb_region_pin(region_uuid); | ||
2402 | goto unlock; | ||
2403 | } | ||
2404 | |||
2405 | /* | ||
2406 | * if global heartbeat active and this is the first dependent user, | ||
2407 | * pin all regions if quorum region count <= CUT_OFF | ||
2408 | */ | ||
2409 | o2hb_dependent_users++; | ||
2410 | if (o2hb_dependent_users > 1) | ||
2411 | goto unlock; | ||
2412 | |||
2413 | if (o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
2414 | O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) | ||
2415 | ret = o2hb_region_pin(NULL); | ||
2416 | |||
2417 | unlock: | ||
2418 | spin_unlock(&o2hb_live_lock); | ||
1813 | return ret; | 2419 | return ret; |
1814 | } | 2420 | } |
1815 | 2421 | ||
1816 | static void o2hb_region_put(const char *region_uuid) | 2422 | void o2hb_region_dec_user(const char *region_uuid) |
1817 | { | 2423 | { |
1818 | struct o2hb_region *reg; | ||
1819 | |||
1820 | spin_lock(&o2hb_live_lock); | 2424 | spin_lock(&o2hb_live_lock); |
1821 | 2425 | ||
1822 | reg = o2hb_find_region(region_uuid); | 2426 | /* local heartbeat */ |
2427 | if (!o2hb_global_heartbeat_active()) { | ||
2428 | o2hb_region_unpin(region_uuid); | ||
2429 | goto unlock; | ||
2430 | } | ||
1823 | 2431 | ||
1824 | spin_unlock(&o2hb_live_lock); | 2432 | /* |
2433 | * if global heartbeat active and there are no dependent users, | ||
2434 | * unpin all quorum regions | ||
2435 | */ | ||
2436 | o2hb_dependent_users--; | ||
2437 | if (!o2hb_dependent_users) | ||
2438 | o2hb_region_unpin(NULL); | ||
1825 | 2439 | ||
1826 | if (reg) { | 2440 | unlock: |
1827 | o2nm_undepend_item(®->hr_item); | 2441 | spin_unlock(&o2hb_live_lock); |
1828 | o2nm_undepend_this_node(); | ||
1829 | } | ||
1830 | } | 2442 | } |
1831 | 2443 | ||
1832 | int o2hb_register_callback(const char *region_uuid, | 2444 | int o2hb_register_callback(const char *region_uuid, |
@@ -1847,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid, | |||
1847 | } | 2459 | } |
1848 | 2460 | ||
1849 | if (region_uuid) { | 2461 | if (region_uuid) { |
1850 | ret = o2hb_region_get(region_uuid); | 2462 | ret = o2hb_region_inc_user(region_uuid); |
1851 | if (ret) | 2463 | if (ret) { |
2464 | mlog_errno(ret); | ||
1852 | goto out; | 2465 | goto out; |
2466 | } | ||
1853 | } | 2467 | } |
1854 | 2468 | ||
1855 | down_write(&o2hb_callback_sem); | 2469 | down_write(&o2hb_callback_sem); |
@@ -1867,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid, | |||
1867 | up_write(&o2hb_callback_sem); | 2481 | up_write(&o2hb_callback_sem); |
1868 | ret = 0; | 2482 | ret = 0; |
1869 | out: | 2483 | out: |
1870 | mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n", | 2484 | mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n", |
1871 | ret, __builtin_return_address(0), hc); | 2485 | ret, __builtin_return_address(0), hc); |
1872 | return ret; | 2486 | return ret; |
1873 | } | 2487 | } |
@@ -1878,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid, | |||
1878 | { | 2492 | { |
1879 | BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); | 2493 | BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); |
1880 | 2494 | ||
1881 | mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", | 2495 | mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n", |
1882 | __builtin_return_address(0), hc); | 2496 | __builtin_return_address(0), hc); |
1883 | 2497 | ||
1884 | /* XXX Can this happen _with_ a region reference? */ | 2498 | /* XXX Can this happen _with_ a region reference? */ |
@@ -1886,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid, | |||
1886 | return; | 2500 | return; |
1887 | 2501 | ||
1888 | if (region_uuid) | 2502 | if (region_uuid) |
1889 | o2hb_region_put(region_uuid); | 2503 | o2hb_region_dec_user(region_uuid); |
1890 | 2504 | ||
1891 | down_write(&o2hb_callback_sem); | 2505 | down_write(&o2hb_callback_sem); |
1892 | 2506 | ||
@@ -1963,3 +2577,34 @@ void o2hb_stop_all_regions(void) | |||
1963 | spin_unlock(&o2hb_live_lock); | 2577 | spin_unlock(&o2hb_live_lock); |
1964 | } | 2578 | } |
1965 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); | 2579 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); |
2580 | |||
2581 | int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | ||
2582 | { | ||
2583 | struct o2hb_region *reg; | ||
2584 | int numregs = 0; | ||
2585 | char *p; | ||
2586 | |||
2587 | spin_lock(&o2hb_live_lock); | ||
2588 | |||
2589 | p = region_uuids; | ||
2590 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | ||
2591 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | ||
2592 | if (numregs < max_regions) { | ||
2593 | memcpy(p, config_item_name(®->hr_item), | ||
2594 | O2HB_MAX_REGION_NAME_LEN); | ||
2595 | p += O2HB_MAX_REGION_NAME_LEN; | ||
2596 | } | ||
2597 | numregs++; | ||
2598 | } | ||
2599 | |||
2600 | spin_unlock(&o2hb_live_lock); | ||
2601 | |||
2602 | return numregs; | ||
2603 | } | ||
2604 | EXPORT_SYMBOL_GPL(o2hb_get_all_regions); | ||
2605 | |||
2606 | int o2hb_global_heartbeat_active(void) | ||
2607 | { | ||
2608 | return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL); | ||
2609 | } | ||
2610 | EXPORT_SYMBOL(o2hb_global_heartbeat_active); | ||