diff options
Diffstat (limited to 'fs/ocfs2/cluster')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 194 | ||||
-rw-r--r-- | fs/ocfs2/cluster/netdebug.c | 102 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 138 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.h | 2 |
4 files changed, 265 insertions, 171 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 9a3e6bbff27..a4e855e3690 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -216,6 +216,7 @@ struct o2hb_region { | |||
216 | 216 | ||
217 | struct list_head hr_all_item; | 217 | struct list_head hr_all_item; |
218 | unsigned hr_unclean_stop:1, | 218 | unsigned hr_unclean_stop:1, |
219 | hr_aborted_start:1, | ||
219 | hr_item_pinned:1, | 220 | hr_item_pinned:1, |
220 | hr_item_dropped:1; | 221 | hr_item_dropped:1; |
221 | 222 | ||
@@ -254,6 +255,10 @@ struct o2hb_region { | |||
254 | * a more complete api that doesn't lead to this sort of fragility. */ | 255 | * a more complete api that doesn't lead to this sort of fragility. */ |
255 | atomic_t hr_steady_iterations; | 256 | atomic_t hr_steady_iterations; |
256 | 257 | ||
258 | /* terminate o2hb thread if it does not reach steady state | ||
259 | * (hr_steady_iterations == 0) within hr_unsteady_iterations */ | ||
260 | atomic_t hr_unsteady_iterations; | ||
261 | |||
257 | char hr_dev_name[BDEVNAME_SIZE]; | 262 | char hr_dev_name[BDEVNAME_SIZE]; |
258 | 263 | ||
259 | unsigned int hr_timeout_ms; | 264 | unsigned int hr_timeout_ms; |
@@ -324,6 +329,10 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
324 | 329 | ||
325 | static void o2hb_arm_write_timeout(struct o2hb_region *reg) | 330 | static void o2hb_arm_write_timeout(struct o2hb_region *reg) |
326 | { | 331 | { |
332 | /* Arm writeout only after thread reaches steady state */ | ||
333 | if (atomic_read(®->hr_steady_iterations) != 0) | ||
334 | return; | ||
335 | |||
327 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", | 336 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
328 | O2HB_MAX_WRITE_TIMEOUT_MS); | 337 | O2HB_MAX_WRITE_TIMEOUT_MS); |
329 | 338 | ||
@@ -537,9 +546,14 @@ static int o2hb_verify_crc(struct o2hb_region *reg, | |||
537 | return read == computed; | 546 | return read == computed; |
538 | } | 547 | } |
539 | 548 | ||
540 | /* We want to make sure that nobody is heartbeating on top of us -- | 549 | /* |
541 | * this will help detect an invalid configuration. */ | 550 | * Compare the slot data with what we wrote in the last iteration. |
542 | static void o2hb_check_last_timestamp(struct o2hb_region *reg) | 551 | * If the match fails, print an appropriate error message. This is to |
552 | * detect errors like... another node hearting on the same slot, | ||
553 | * flaky device that is losing writes, etc. | ||
554 | * Returns 1 if check succeeds, 0 otherwise. | ||
555 | */ | ||
556 | static int o2hb_check_own_slot(struct o2hb_region *reg) | ||
543 | { | 557 | { |
544 | struct o2hb_disk_slot *slot; | 558 | struct o2hb_disk_slot *slot; |
545 | struct o2hb_disk_heartbeat_block *hb_block; | 559 | struct o2hb_disk_heartbeat_block *hb_block; |
@@ -548,13 +562,13 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg) | |||
548 | slot = ®->hr_slots[o2nm_this_node()]; | 562 | slot = ®->hr_slots[o2nm_this_node()]; |
549 | /* Don't check on our 1st timestamp */ | 563 | /* Don't check on our 1st timestamp */ |
550 | if (!slot->ds_last_time) | 564 | if (!slot->ds_last_time) |
551 | return; | 565 | return 0; |
552 | 566 | ||
553 | hb_block = slot->ds_raw_block; | 567 | hb_block = slot->ds_raw_block; |
554 | if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && | 568 | if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && |
555 | le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && | 569 | le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && |
556 | hb_block->hb_node == slot->ds_node_num) | 570 | hb_block->hb_node == slot->ds_node_num) |
557 | return; | 571 | return 1; |
558 | 572 | ||
559 | #define ERRSTR1 "Another node is heartbeating on device" | 573 | #define ERRSTR1 "Another node is heartbeating on device" |
560 | #define ERRSTR2 "Heartbeat generation mismatch on device" | 574 | #define ERRSTR2 "Heartbeat generation mismatch on device" |
@@ -574,6 +588,8 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg) | |||
574 | (unsigned long long)slot->ds_last_time, hb_block->hb_node, | 588 | (unsigned long long)slot->ds_last_time, hb_block->hb_node, |
575 | (unsigned long long)le64_to_cpu(hb_block->hb_generation), | 589 | (unsigned long long)le64_to_cpu(hb_block->hb_generation), |
576 | (unsigned long long)le64_to_cpu(hb_block->hb_seq)); | 590 | (unsigned long long)le64_to_cpu(hb_block->hb_seq)); |
591 | |||
592 | return 0; | ||
577 | } | 593 | } |
578 | 594 | ||
579 | static inline void o2hb_prepare_block(struct o2hb_region *reg, | 595 | static inline void o2hb_prepare_block(struct o2hb_region *reg, |
@@ -719,17 +735,24 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
719 | o2nm_node_put(node); | 735 | o2nm_node_put(node); |
720 | } | 736 | } |
721 | 737 | ||
722 | static void o2hb_set_quorum_device(struct o2hb_region *reg, | 738 | static void o2hb_set_quorum_device(struct o2hb_region *reg) |
723 | struct o2hb_disk_slot *slot) | ||
724 | { | 739 | { |
725 | assert_spin_locked(&o2hb_live_lock); | ||
726 | |||
727 | if (!o2hb_global_heartbeat_active()) | 740 | if (!o2hb_global_heartbeat_active()) |
728 | return; | 741 | return; |
729 | 742 | ||
730 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | 743 | /* Prevent race with o2hb_heartbeat_group_drop_item() */ |
744 | if (kthread_should_stop()) | ||
745 | return; | ||
746 | |||
747 | /* Tag region as quorum only after thread reaches steady state */ | ||
748 | if (atomic_read(®->hr_steady_iterations) != 0) | ||
731 | return; | 749 | return; |
732 | 750 | ||
751 | spin_lock(&o2hb_live_lock); | ||
752 | |||
753 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
754 | goto unlock; | ||
755 | |||
733 | /* | 756 | /* |
734 | * A region can be added to the quorum only when it sees all | 757 | * A region can be added to the quorum only when it sees all |
735 | * live nodes heartbeat on it. In other words, the region has been | 758 | * live nodes heartbeat on it. In other words, the region has been |
@@ -737,13 +760,10 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg, | |||
737 | */ | 760 | */ |
738 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, | 761 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, |
739 | sizeof(o2hb_live_node_bitmap))) | 762 | sizeof(o2hb_live_node_bitmap))) |
740 | return; | 763 | goto unlock; |
741 | |||
742 | if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) | ||
743 | return; | ||
744 | 764 | ||
745 | printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", | 765 | printk(KERN_NOTICE "o2hb: Region %s (%s) is now a quorum device\n", |
746 | config_item_name(®->hr_item)); | 766 | config_item_name(®->hr_item), reg->hr_dev_name); |
747 | 767 | ||
748 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | 768 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); |
749 | 769 | ||
@@ -754,6 +774,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg, | |||
754 | if (o2hb_pop_count(&o2hb_quorum_region_bitmap, | 774 | if (o2hb_pop_count(&o2hb_quorum_region_bitmap, |
755 | O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) | 775 | O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) |
756 | o2hb_region_unpin(NULL); | 776 | o2hb_region_unpin(NULL); |
777 | unlock: | ||
778 | spin_unlock(&o2hb_live_lock); | ||
757 | } | 779 | } |
758 | 780 | ||
759 | static int o2hb_check_slot(struct o2hb_region *reg, | 781 | static int o2hb_check_slot(struct o2hb_region *reg, |
@@ -925,8 +947,6 @@ fire_callbacks: | |||
925 | slot->ds_equal_samples = 0; | 947 | slot->ds_equal_samples = 0; |
926 | } | 948 | } |
927 | out: | 949 | out: |
928 | o2hb_set_quorum_device(reg, slot); | ||
929 | |||
930 | spin_unlock(&o2hb_live_lock); | 950 | spin_unlock(&o2hb_live_lock); |
931 | 951 | ||
932 | o2hb_run_event_list(&event); | 952 | o2hb_run_event_list(&event); |
@@ -957,7 +977,8 @@ static int o2hb_highest_node(unsigned long *nodes, | |||
957 | 977 | ||
958 | static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | 978 | static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) |
959 | { | 979 | { |
960 | int i, ret, highest_node, change = 0; | 980 | int i, ret, highest_node; |
981 | int membership_change = 0, own_slot_ok = 0; | ||
961 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 982 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
962 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 983 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
963 | struct o2hb_bio_wait_ctxt write_wc; | 984 | struct o2hb_bio_wait_ctxt write_wc; |
@@ -966,7 +987,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
966 | sizeof(configured_nodes)); | 987 | sizeof(configured_nodes)); |
967 | if (ret) { | 988 | if (ret) { |
968 | mlog_errno(ret); | 989 | mlog_errno(ret); |
969 | return ret; | 990 | goto bail; |
970 | } | 991 | } |
971 | 992 | ||
972 | /* | 993 | /* |
@@ -982,8 +1003,9 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
982 | 1003 | ||
983 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 1004 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
984 | if (highest_node >= O2NM_MAX_NODES) { | 1005 | if (highest_node >= O2NM_MAX_NODES) { |
985 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 1006 | mlog(ML_NOTICE, "o2hb: No configured nodes found!\n"); |
986 | return -EINVAL; | 1007 | ret = -EINVAL; |
1008 | goto bail; | ||
987 | } | 1009 | } |
988 | 1010 | ||
989 | /* No sense in reading the slots of nodes that don't exist | 1011 | /* No sense in reading the slots of nodes that don't exist |
@@ -993,29 +1015,27 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
993 | ret = o2hb_read_slots(reg, highest_node + 1); | 1015 | ret = o2hb_read_slots(reg, highest_node + 1); |
994 | if (ret < 0) { | 1016 | if (ret < 0) { |
995 | mlog_errno(ret); | 1017 | mlog_errno(ret); |
996 | return ret; | 1018 | goto bail; |
997 | } | 1019 | } |
998 | 1020 | ||
999 | /* With an up to date view of the slots, we can check that no | 1021 | /* With an up to date view of the slots, we can check that no |
1000 | * other node has been improperly configured to heartbeat in | 1022 | * other node has been improperly configured to heartbeat in |
1001 | * our slot. */ | 1023 | * our slot. */ |
1002 | o2hb_check_last_timestamp(reg); | 1024 | own_slot_ok = o2hb_check_own_slot(reg); |
1003 | 1025 | ||
1004 | /* fill in the proper info for our next heartbeat */ | 1026 | /* fill in the proper info for our next heartbeat */ |
1005 | o2hb_prepare_block(reg, reg->hr_generation); | 1027 | o2hb_prepare_block(reg, reg->hr_generation); |
1006 | 1028 | ||
1007 | /* And fire off the write. Note that we don't wait on this I/O | ||
1008 | * until later. */ | ||
1009 | ret = o2hb_issue_node_write(reg, &write_wc); | 1029 | ret = o2hb_issue_node_write(reg, &write_wc); |
1010 | if (ret < 0) { | 1030 | if (ret < 0) { |
1011 | mlog_errno(ret); | 1031 | mlog_errno(ret); |
1012 | return ret; | 1032 | goto bail; |
1013 | } | 1033 | } |
1014 | 1034 | ||
1015 | i = -1; | 1035 | i = -1; |
1016 | while((i = find_next_bit(configured_nodes, | 1036 | while((i = find_next_bit(configured_nodes, |
1017 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | 1037 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { |
1018 | change |= o2hb_check_slot(reg, ®->hr_slots[i]); | 1038 | membership_change |= o2hb_check_slot(reg, ®->hr_slots[i]); |
1019 | } | 1039 | } |
1020 | 1040 | ||
1021 | /* | 1041 | /* |
@@ -1030,18 +1050,39 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
1030 | * disk */ | 1050 | * disk */ |
1031 | mlog(ML_ERROR, "Write error %d on device \"%s\"\n", | 1051 | mlog(ML_ERROR, "Write error %d on device \"%s\"\n", |
1032 | write_wc.wc_error, reg->hr_dev_name); | 1052 | write_wc.wc_error, reg->hr_dev_name); |
1033 | return write_wc.wc_error; | 1053 | ret = write_wc.wc_error; |
1054 | goto bail; | ||
1034 | } | 1055 | } |
1035 | 1056 | ||
1036 | o2hb_arm_write_timeout(reg); | 1057 | /* Skip disarming the timeout if own slot has stale/bad data */ |
1058 | if (own_slot_ok) { | ||
1059 | o2hb_set_quorum_device(reg); | ||
1060 | o2hb_arm_write_timeout(reg); | ||
1061 | } | ||
1037 | 1062 | ||
1063 | bail: | ||
1038 | /* let the person who launched us know when things are steady */ | 1064 | /* let the person who launched us know when things are steady */ |
1039 | if (!change && (atomic_read(®->hr_steady_iterations) != 0)) { | 1065 | if (atomic_read(®->hr_steady_iterations) != 0) { |
1040 | if (atomic_dec_and_test(®->hr_steady_iterations)) | 1066 | if (!ret && own_slot_ok && !membership_change) { |
1067 | if (atomic_dec_and_test(®->hr_steady_iterations)) | ||
1068 | wake_up(&o2hb_steady_queue); | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | if (atomic_read(®->hr_steady_iterations) != 0) { | ||
1073 | if (atomic_dec_and_test(®->hr_unsteady_iterations)) { | ||
1074 | printk(KERN_NOTICE "o2hb: Unable to stabilize " | ||
1075 | "heartbeart on region %s (%s)\n", | ||
1076 | config_item_name(®->hr_item), | ||
1077 | reg->hr_dev_name); | ||
1078 | atomic_set(®->hr_steady_iterations, 0); | ||
1079 | reg->hr_aborted_start = 1; | ||
1041 | wake_up(&o2hb_steady_queue); | 1080 | wake_up(&o2hb_steady_queue); |
1081 | ret = -EIO; | ||
1082 | } | ||
1042 | } | 1083 | } |
1043 | 1084 | ||
1044 | return 0; | 1085 | return ret; |
1045 | } | 1086 | } |
1046 | 1087 | ||
1047 | /* Subtract b from a, storing the result in a. a *must* have a larger | 1088 | /* Subtract b from a, storing the result in a. a *must* have a larger |
@@ -1095,7 +1136,8 @@ static int o2hb_thread(void *data) | |||
1095 | /* Pin node */ | 1136 | /* Pin node */ |
1096 | o2nm_depend_this_node(); | 1137 | o2nm_depend_this_node(); |
1097 | 1138 | ||
1098 | while (!kthread_should_stop() && !reg->hr_unclean_stop) { | 1139 | while (!kthread_should_stop() && |
1140 | !reg->hr_unclean_stop && !reg->hr_aborted_start) { | ||
1099 | /* We track the time spent inside | 1141 | /* We track the time spent inside |
1100 | * o2hb_do_disk_heartbeat so that we avoid more than | 1142 | * o2hb_do_disk_heartbeat so that we avoid more than |
1101 | * hr_timeout_ms between disk writes. On busy systems | 1143 | * hr_timeout_ms between disk writes. On busy systems |
@@ -1103,10 +1145,7 @@ static int o2hb_thread(void *data) | |||
1103 | * likely to time itself out. */ | 1145 | * likely to time itself out. */ |
1104 | do_gettimeofday(&before_hb); | 1146 | do_gettimeofday(&before_hb); |
1105 | 1147 | ||
1106 | i = 0; | 1148 | ret = o2hb_do_disk_heartbeat(reg); |
1107 | do { | ||
1108 | ret = o2hb_do_disk_heartbeat(reg); | ||
1109 | } while (ret && ++i < 2); | ||
1110 | 1149 | ||
1111 | do_gettimeofday(&after_hb); | 1150 | do_gettimeofday(&after_hb); |
1112 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); | 1151 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); |
@@ -1117,7 +1156,8 @@ static int o2hb_thread(void *data) | |||
1117 | after_hb.tv_sec, (unsigned long) after_hb.tv_usec, | 1156 | after_hb.tv_sec, (unsigned long) after_hb.tv_usec, |
1118 | elapsed_msec); | 1157 | elapsed_msec); |
1119 | 1158 | ||
1120 | if (elapsed_msec < reg->hr_timeout_ms) { | 1159 | if (!kthread_should_stop() && |
1160 | elapsed_msec < reg->hr_timeout_ms) { | ||
1121 | /* the kthread api has blocked signals for us so no | 1161 | /* the kthread api has blocked signals for us so no |
1122 | * need to record the return value. */ | 1162 | * need to record the return value. */ |
1123 | msleep_interruptible(reg->hr_timeout_ms - elapsed_msec); | 1163 | msleep_interruptible(reg->hr_timeout_ms - elapsed_msec); |
@@ -1134,20 +1174,20 @@ static int o2hb_thread(void *data) | |||
1134 | * to timeout on this region when we could just as easily | 1174 | * to timeout on this region when we could just as easily |
1135 | * write a clear generation - thus indicating to them that | 1175 | * write a clear generation - thus indicating to them that |
1136 | * this node has left this region. | 1176 | * this node has left this region. |
1137 | * | 1177 | */ |
1138 | * XXX: Should we skip this on unclean_stop? */ | 1178 | if (!reg->hr_unclean_stop && !reg->hr_aborted_start) { |
1139 | o2hb_prepare_block(reg, 0); | 1179 | o2hb_prepare_block(reg, 0); |
1140 | ret = o2hb_issue_node_write(reg, &write_wc); | 1180 | ret = o2hb_issue_node_write(reg, &write_wc); |
1141 | if (ret == 0) { | 1181 | if (ret == 0) |
1142 | o2hb_wait_on_io(reg, &write_wc); | 1182 | o2hb_wait_on_io(reg, &write_wc); |
1143 | } else { | 1183 | else |
1144 | mlog_errno(ret); | 1184 | mlog_errno(ret); |
1145 | } | 1185 | } |
1146 | 1186 | ||
1147 | /* Unpin node */ | 1187 | /* Unpin node */ |
1148 | o2nm_undepend_this_node(); | 1188 | o2nm_undepend_this_node(); |
1149 | 1189 | ||
1150 | mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); | 1190 | mlog(ML_HEARTBEAT|ML_KTHREAD, "o2hb thread exiting\n"); |
1151 | 1191 | ||
1152 | return 0; | 1192 | return 0; |
1153 | } | 1193 | } |
@@ -1158,6 +1198,7 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) | |||
1158 | struct o2hb_debug_buf *db = inode->i_private; | 1198 | struct o2hb_debug_buf *db = inode->i_private; |
1159 | struct o2hb_region *reg; | 1199 | struct o2hb_region *reg; |
1160 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 1200 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
1201 | unsigned long lts; | ||
1161 | char *buf = NULL; | 1202 | char *buf = NULL; |
1162 | int i = -1; | 1203 | int i = -1; |
1163 | int out = 0; | 1204 | int out = 0; |
@@ -1194,9 +1235,11 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) | |||
1194 | 1235 | ||
1195 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: | 1236 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: |
1196 | reg = (struct o2hb_region *)db->db_data; | 1237 | reg = (struct o2hb_region *)db->db_data; |
1197 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | 1238 | lts = reg->hr_last_timeout_start; |
1198 | jiffies_to_msecs(jiffies - | 1239 | /* If 0, it has never been set before */ |
1199 | reg->hr_last_timeout_start)); | 1240 | if (lts) |
1241 | lts = jiffies_to_msecs(jiffies - lts); | ||
1242 | out += snprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts); | ||
1200 | goto done; | 1243 | goto done; |
1201 | 1244 | ||
1202 | case O2HB_DB_TYPE_REGION_PINNED: | 1245 | case O2HB_DB_TYPE_REGION_PINNED: |
@@ -1426,6 +1469,8 @@ static void o2hb_region_release(struct config_item *item) | |||
1426 | struct page *page; | 1469 | struct page *page; |
1427 | struct o2hb_region *reg = to_o2hb_region(item); | 1470 | struct o2hb_region *reg = to_o2hb_region(item); |
1428 | 1471 | ||
1472 | mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); | ||
1473 | |||
1429 | if (reg->hr_tmp_block) | 1474 | if (reg->hr_tmp_block) |
1430 | kfree(reg->hr_tmp_block); | 1475 | kfree(reg->hr_tmp_block); |
1431 | 1476 | ||
@@ -1792,7 +1837,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1792 | live_threshold <<= 1; | 1837 | live_threshold <<= 1; |
1793 | spin_unlock(&o2hb_live_lock); | 1838 | spin_unlock(&o2hb_live_lock); |
1794 | } | 1839 | } |
1795 | atomic_set(®->hr_steady_iterations, live_threshold + 1); | 1840 | ++live_threshold; |
1841 | atomic_set(®->hr_steady_iterations, live_threshold); | ||
1842 | /* unsteady_iterations is double the steady_iterations */ | ||
1843 | atomic_set(®->hr_unsteady_iterations, (live_threshold << 1)); | ||
1796 | 1844 | ||
1797 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", | 1845 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", |
1798 | reg->hr_item.ci_name); | 1846 | reg->hr_item.ci_name); |
@@ -1809,14 +1857,12 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1809 | ret = wait_event_interruptible(o2hb_steady_queue, | 1857 | ret = wait_event_interruptible(o2hb_steady_queue, |
1810 | atomic_read(®->hr_steady_iterations) == 0); | 1858 | atomic_read(®->hr_steady_iterations) == 0); |
1811 | if (ret) { | 1859 | if (ret) { |
1812 | /* We got interrupted (hello ptrace!). Clean up */ | 1860 | atomic_set(®->hr_steady_iterations, 0); |
1813 | spin_lock(&o2hb_live_lock); | 1861 | reg->hr_aborted_start = 1; |
1814 | hb_task = reg->hr_task; | 1862 | } |
1815 | reg->hr_task = NULL; | ||
1816 | spin_unlock(&o2hb_live_lock); | ||
1817 | 1863 | ||
1818 | if (hb_task) | 1864 | if (reg->hr_aborted_start) { |
1819 | kthread_stop(hb_task); | 1865 | ret = -EIO; |
1820 | goto out; | 1866 | goto out; |
1821 | } | 1867 | } |
1822 | 1868 | ||
@@ -1833,8 +1879,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1833 | ret = -EIO; | 1879 | ret = -EIO; |
1834 | 1880 | ||
1835 | if (hb_task && o2hb_global_heartbeat_active()) | 1881 | if (hb_task && o2hb_global_heartbeat_active()) |
1836 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", | 1882 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", |
1837 | config_item_name(®->hr_item)); | 1883 | config_item_name(®->hr_item), reg->hr_dev_name); |
1838 | 1884 | ||
1839 | out: | 1885 | out: |
1840 | if (filp) | 1886 | if (filp) |
@@ -2092,13 +2138,6 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
2092 | 2138 | ||
2093 | /* stop the thread when the user removes the region dir */ | 2139 | /* stop the thread when the user removes the region dir */ |
2094 | spin_lock(&o2hb_live_lock); | 2140 | spin_lock(&o2hb_live_lock); |
2095 | if (o2hb_global_heartbeat_active()) { | ||
2096 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
2097 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
2098 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
2099 | quorum_region = 1; | ||
2100 | clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
2101 | } | ||
2102 | hb_task = reg->hr_task; | 2141 | hb_task = reg->hr_task; |
2103 | reg->hr_task = NULL; | 2142 | reg->hr_task = NULL; |
2104 | reg->hr_item_dropped = 1; | 2143 | reg->hr_item_dropped = 1; |
@@ -2107,19 +2146,30 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
2107 | if (hb_task) | 2146 | if (hb_task) |
2108 | kthread_stop(hb_task); | 2147 | kthread_stop(hb_task); |
2109 | 2148 | ||
2149 | if (o2hb_global_heartbeat_active()) { | ||
2150 | spin_lock(&o2hb_live_lock); | ||
2151 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
2152 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
2153 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
2154 | quorum_region = 1; | ||
2155 | clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
2156 | spin_unlock(&o2hb_live_lock); | ||
2157 | printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%s)\n", | ||
2158 | ((atomic_read(®->hr_steady_iterations) == 0) ? | ||
2159 | "stopped" : "start aborted"), config_item_name(item), | ||
2160 | reg->hr_dev_name); | ||
2161 | } | ||
2162 | |||
2110 | /* | 2163 | /* |
2111 | * If we're racing a dev_write(), we need to wake them. They will | 2164 | * If we're racing a dev_write(), we need to wake them. They will |
2112 | * check reg->hr_task | 2165 | * check reg->hr_task |
2113 | */ | 2166 | */ |
2114 | if (atomic_read(®->hr_steady_iterations) != 0) { | 2167 | if (atomic_read(®->hr_steady_iterations) != 0) { |
2168 | reg->hr_aborted_start = 1; | ||
2115 | atomic_set(®->hr_steady_iterations, 0); | 2169 | atomic_set(®->hr_steady_iterations, 0); |
2116 | wake_up(&o2hb_steady_queue); | 2170 | wake_up(&o2hb_steady_queue); |
2117 | } | 2171 | } |
2118 | 2172 | ||
2119 | if (o2hb_global_heartbeat_active()) | ||
2120 | printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", | ||
2121 | config_item_name(®->hr_item)); | ||
2122 | |||
2123 | config_item_put(item); | 2173 | config_item_put(item); |
2124 | 2174 | ||
2125 | if (!o2hb_global_heartbeat_active() || !quorum_region) | 2175 | if (!o2hb_global_heartbeat_active() || !quorum_region) |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 3a5835904b3..dc45deb19e6 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #define SC_DEBUG_NAME "sock_containers" | 47 | #define SC_DEBUG_NAME "sock_containers" |
48 | #define NST_DEBUG_NAME "send_tracking" | 48 | #define NST_DEBUG_NAME "send_tracking" |
49 | #define STATS_DEBUG_NAME "stats" | 49 | #define STATS_DEBUG_NAME "stats" |
50 | #define NODES_DEBUG_NAME "connected_nodes" | ||
50 | 51 | ||
51 | #define SHOW_SOCK_CONTAINERS 0 | 52 | #define SHOW_SOCK_CONTAINERS 0 |
52 | #define SHOW_SOCK_STATS 1 | 53 | #define SHOW_SOCK_STATS 1 |
@@ -55,6 +56,7 @@ static struct dentry *o2net_dentry; | |||
55 | static struct dentry *sc_dentry; | 56 | static struct dentry *sc_dentry; |
56 | static struct dentry *nst_dentry; | 57 | static struct dentry *nst_dentry; |
57 | static struct dentry *stats_dentry; | 58 | static struct dentry *stats_dentry; |
59 | static struct dentry *nodes_dentry; | ||
58 | 60 | ||
59 | static DEFINE_SPINLOCK(o2net_debug_lock); | 61 | static DEFINE_SPINLOCK(o2net_debug_lock); |
60 | 62 | ||
@@ -491,53 +493,87 @@ static const struct file_operations sc_seq_fops = { | |||
491 | .release = sc_fop_release, | 493 | .release = sc_fop_release, |
492 | }; | 494 | }; |
493 | 495 | ||
494 | int o2net_debugfs_init(void) | 496 | static int o2net_fill_bitmap(char *buf, int len) |
495 | { | 497 | { |
496 | o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); | 498 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
497 | if (!o2net_dentry) { | 499 | int i = -1, out = 0; |
498 | mlog_errno(-ENOMEM); | ||
499 | goto bail; | ||
500 | } | ||
501 | 500 | ||
502 | nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR, | 501 | o2net_fill_node_map(map, sizeof(map)); |
503 | o2net_dentry, NULL, | ||
504 | &nst_seq_fops); | ||
505 | if (!nst_dentry) { | ||
506 | mlog_errno(-ENOMEM); | ||
507 | goto bail; | ||
508 | } | ||
509 | 502 | ||
510 | sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR, | 503 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) |
511 | o2net_dentry, NULL, | 504 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); |
512 | &sc_seq_fops); | 505 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); |
513 | if (!sc_dentry) { | ||
514 | mlog_errno(-ENOMEM); | ||
515 | goto bail; | ||
516 | } | ||
517 | 506 | ||
518 | stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR, | 507 | return out; |
519 | o2net_dentry, NULL, | 508 | } |
520 | &stats_seq_fops); | 509 | |
521 | if (!stats_dentry) { | 510 | static int nodes_fop_open(struct inode *inode, struct file *file) |
522 | mlog_errno(-ENOMEM); | 511 | { |
523 | goto bail; | 512 | char *buf; |
524 | } | 513 | |
514 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
515 | if (!buf) | ||
516 | return -ENOMEM; | ||
517 | |||
518 | i_size_write(inode, o2net_fill_bitmap(buf, PAGE_SIZE)); | ||
519 | |||
520 | file->private_data = buf; | ||
525 | 521 | ||
526 | return 0; | 522 | return 0; |
527 | bail: | ||
528 | debugfs_remove(stats_dentry); | ||
529 | debugfs_remove(sc_dentry); | ||
530 | debugfs_remove(nst_dentry); | ||
531 | debugfs_remove(o2net_dentry); | ||
532 | return -ENOMEM; | ||
533 | } | 523 | } |
534 | 524 | ||
525 | static int o2net_debug_release(struct inode *inode, struct file *file) | ||
526 | { | ||
527 | kfree(file->private_data); | ||
528 | return 0; | ||
529 | } | ||
530 | |||
531 | static ssize_t o2net_debug_read(struct file *file, char __user *buf, | ||
532 | size_t nbytes, loff_t *ppos) | ||
533 | { | ||
534 | return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, | ||
535 | i_size_read(file->f_mapping->host)); | ||
536 | } | ||
537 | |||
538 | static const struct file_operations nodes_fops = { | ||
539 | .open = nodes_fop_open, | ||
540 | .release = o2net_debug_release, | ||
541 | .read = o2net_debug_read, | ||
542 | .llseek = generic_file_llseek, | ||
543 | }; | ||
544 | |||
535 | void o2net_debugfs_exit(void) | 545 | void o2net_debugfs_exit(void) |
536 | { | 546 | { |
547 | debugfs_remove(nodes_dentry); | ||
537 | debugfs_remove(stats_dentry); | 548 | debugfs_remove(stats_dentry); |
538 | debugfs_remove(sc_dentry); | 549 | debugfs_remove(sc_dentry); |
539 | debugfs_remove(nst_dentry); | 550 | debugfs_remove(nst_dentry); |
540 | debugfs_remove(o2net_dentry); | 551 | debugfs_remove(o2net_dentry); |
541 | } | 552 | } |
542 | 553 | ||
554 | int o2net_debugfs_init(void) | ||
555 | { | ||
556 | mode_t mode = S_IFREG|S_IRUSR; | ||
557 | |||
558 | o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); | ||
559 | if (o2net_dentry) | ||
560 | nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode, | ||
561 | o2net_dentry, NULL, &nst_seq_fops); | ||
562 | if (nst_dentry) | ||
563 | sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode, | ||
564 | o2net_dentry, NULL, &sc_seq_fops); | ||
565 | if (sc_dentry) | ||
566 | stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode, | ||
567 | o2net_dentry, NULL, &stats_seq_fops); | ||
568 | if (stats_dentry) | ||
569 | nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode, | ||
570 | o2net_dentry, NULL, &nodes_fops); | ||
571 | if (nodes_dentry) | ||
572 | return 0; | ||
573 | |||
574 | o2net_debugfs_exit(); | ||
575 | mlog_errno(-ENOMEM); | ||
576 | return -ENOMEM; | ||
577 | } | ||
578 | |||
543 | #endif /* CONFIG_DEBUG_FS */ | 579 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ad7d0c155de..044e7b58d31 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -546,7 +546,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
546 | } | 546 | } |
547 | 547 | ||
548 | if (was_valid && !valid) { | 548 | if (was_valid && !valid) { |
549 | printk(KERN_NOTICE "o2net: no longer connected to " | 549 | printk(KERN_NOTICE "o2net: No longer connected to " |
550 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); | 550 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); |
551 | o2net_complete_nodes_nsw(nn); | 551 | o2net_complete_nodes_nsw(nn); |
552 | } | 552 | } |
@@ -556,7 +556,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
556 | cancel_delayed_work(&nn->nn_connect_expired); | 556 | cancel_delayed_work(&nn->nn_connect_expired); |
557 | printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", | 557 | printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", |
558 | o2nm_this_node() > sc->sc_node->nd_num ? | 558 | o2nm_this_node() > sc->sc_node->nd_num ? |
559 | "connected to" : "accepted connection from", | 559 | "Connected to" : "Accepted connection from", |
560 | SC_NODEF_ARGS(sc)); | 560 | SC_NODEF_ARGS(sc)); |
561 | } | 561 | } |
562 | 562 | ||
@@ -644,7 +644,7 @@ static void o2net_state_change(struct sock *sk) | |||
644 | o2net_sc_queue_work(sc, &sc->sc_connect_work); | 644 | o2net_sc_queue_work(sc, &sc->sc_connect_work); |
645 | break; | 645 | break; |
646 | default: | 646 | default: |
647 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT | 647 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT |
648 | " shutdown, state %d\n", | 648 | " shutdown, state %d\n", |
649 | SC_NODEF_ARGS(sc), sk->sk_state); | 649 | SC_NODEF_ARGS(sc), sk->sk_state); |
650 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 650 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
@@ -1035,6 +1035,25 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, | |||
1035 | return ret; | 1035 | return ret; |
1036 | } | 1036 | } |
1037 | 1037 | ||
1038 | /* Get a map of all nodes to which this node is currently connected to */ | ||
1039 | void o2net_fill_node_map(unsigned long *map, unsigned bytes) | ||
1040 | { | ||
1041 | struct o2net_sock_container *sc; | ||
1042 | int node, ret; | ||
1043 | |||
1044 | BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); | ||
1045 | |||
1046 | memset(map, 0, bytes); | ||
1047 | for (node = 0; node < O2NM_MAX_NODES; ++node) { | ||
1048 | o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); | ||
1049 | if (!ret) { | ||
1050 | set_bit(node, map); | ||
1051 | sc_put(sc); | ||
1052 | } | ||
1053 | } | ||
1054 | } | ||
1055 | EXPORT_SYMBOL_GPL(o2net_fill_node_map); | ||
1056 | |||
1038 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | 1057 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, |
1039 | size_t caller_veclen, u8 target_node, int *status) | 1058 | size_t caller_veclen, u8 target_node, int *status) |
1040 | { | 1059 | { |
@@ -1285,11 +1304,11 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1285 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | 1304 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); |
1286 | 1305 | ||
1287 | if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { | 1306 | if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { |
1288 | mlog(ML_NOTICE, SC_NODEF_FMT " advertised net protocol " | 1307 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " Advertised net " |
1289 | "version %llu but %llu is required, disconnecting\n", | 1308 | "protocol version %llu but %llu is required. " |
1290 | SC_NODEF_ARGS(sc), | 1309 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
1291 | (unsigned long long)be64_to_cpu(hand->protocol_version), | 1310 | (unsigned long long)be64_to_cpu(hand->protocol_version), |
1292 | O2NET_PROTOCOL_VERSION); | 1311 | O2NET_PROTOCOL_VERSION); |
1293 | 1312 | ||
1294 | /* don't bother reconnecting if its the wrong version. */ | 1313 | /* don't bother reconnecting if its the wrong version. */ |
1295 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1314 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
@@ -1303,33 +1322,33 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1303 | */ | 1322 | */ |
1304 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != | 1323 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != |
1305 | o2net_idle_timeout()) { | 1324 | o2net_idle_timeout()) { |
1306 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " | 1325 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a network " |
1307 | "%u ms, but we use %u ms locally. disconnecting\n", | 1326 | "idle timeout of %u ms, but we use %u ms locally. " |
1308 | SC_NODEF_ARGS(sc), | 1327 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
1309 | be32_to_cpu(hand->o2net_idle_timeout_ms), | 1328 | be32_to_cpu(hand->o2net_idle_timeout_ms), |
1310 | o2net_idle_timeout()); | 1329 | o2net_idle_timeout()); |
1311 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1330 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
1312 | return -1; | 1331 | return -1; |
1313 | } | 1332 | } |
1314 | 1333 | ||
1315 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != | 1334 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != |
1316 | o2net_keepalive_delay()) { | 1335 | o2net_keepalive_delay()) { |
1317 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " | 1336 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a keepalive " |
1318 | "%u ms, but we use %u ms locally. disconnecting\n", | 1337 | "delay of %u ms, but we use %u ms locally. " |
1319 | SC_NODEF_ARGS(sc), | 1338 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
1320 | be32_to_cpu(hand->o2net_keepalive_delay_ms), | 1339 | be32_to_cpu(hand->o2net_keepalive_delay_ms), |
1321 | o2net_keepalive_delay()); | 1340 | o2net_keepalive_delay()); |
1322 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1341 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
1323 | return -1; | 1342 | return -1; |
1324 | } | 1343 | } |
1325 | 1344 | ||
1326 | if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != | 1345 | if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != |
1327 | O2HB_MAX_WRITE_TIMEOUT_MS) { | 1346 | O2HB_MAX_WRITE_TIMEOUT_MS) { |
1328 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " | 1347 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a heartbeat " |
1329 | "%u ms, but we use %u ms locally. disconnecting\n", | 1348 | "timeout of %u ms, but we use %u ms locally. " |
1330 | SC_NODEF_ARGS(sc), | 1349 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
1331 | be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), | 1350 | be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), |
1332 | O2HB_MAX_WRITE_TIMEOUT_MS); | 1351 | O2HB_MAX_WRITE_TIMEOUT_MS); |
1333 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1352 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
1334 | return -1; | 1353 | return -1; |
1335 | } | 1354 | } |
@@ -1540,28 +1559,16 @@ static void o2net_idle_timer(unsigned long data) | |||
1540 | { | 1559 | { |
1541 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; | 1560 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; |
1542 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | 1561 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); |
1543 | |||
1544 | #ifdef CONFIG_DEBUG_FS | 1562 | #ifdef CONFIG_DEBUG_FS |
1545 | ktime_t now = ktime_get(); | 1563 | unsigned long msecs = ktime_to_ms(ktime_get()) - |
1564 | ktime_to_ms(sc->sc_tv_timer); | ||
1565 | #else | ||
1566 | unsigned long msecs = o2net_idle_timeout(); | ||
1546 | #endif | 1567 | #endif |
1547 | 1568 | ||
1548 | printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " | 1569 | printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " |
1549 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), | 1570 | "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), |
1550 | o2net_idle_timeout() / 1000, | 1571 | msecs / 1000, msecs % 1000); |
1551 | o2net_idle_timeout() % 1000); | ||
1552 | |||
1553 | #ifdef CONFIG_DEBUG_FS | ||
1554 | mlog(ML_NOTICE, "Here are some times that might help debug the " | ||
1555 | "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, " | ||
1556 | "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n", | ||
1557 | (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now), | ||
1558 | (long long)ktime_to_us(sc->sc_tv_data_ready), | ||
1559 | (long long)ktime_to_us(sc->sc_tv_advance_start), | ||
1560 | (long long)ktime_to_us(sc->sc_tv_advance_stop), | ||
1561 | sc->sc_msg_key, sc->sc_msg_type, | ||
1562 | (long long)ktime_to_us(sc->sc_tv_func_start), | ||
1563 | (long long)ktime_to_us(sc->sc_tv_func_stop)); | ||
1564 | #endif | ||
1565 | 1572 | ||
1566 | /* | 1573 | /* |
1567 | * Initialize the nn_timeout so that the next connection attempt | 1574 | * Initialize the nn_timeout so that the next connection attempt |
@@ -1694,8 +1701,8 @@ static void o2net_start_connect(struct work_struct *work) | |||
1694 | 1701 | ||
1695 | out: | 1702 | out: |
1696 | if (ret) { | 1703 | if (ret) { |
1697 | mlog(ML_NOTICE, "connect attempt to " SC_NODEF_FMT " failed " | 1704 | printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT |
1698 | "with errno %d\n", SC_NODEF_ARGS(sc), ret); | 1705 | " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); |
1699 | /* 0 err so that another will be queued and attempted | 1706 | /* 0 err so that another will be queued and attempted |
1700 | * from set_nn_state */ | 1707 | * from set_nn_state */ |
1701 | if (sc) | 1708 | if (sc) |
@@ -1718,8 +1725,8 @@ static void o2net_connect_expired(struct work_struct *work) | |||
1718 | 1725 | ||
1719 | spin_lock(&nn->nn_lock); | 1726 | spin_lock(&nn->nn_lock); |
1720 | if (!nn->nn_sc_valid) { | 1727 | if (!nn->nn_sc_valid) { |
1721 | mlog(ML_ERROR, "no connection established with node %u after " | 1728 | printk(KERN_NOTICE "o2net: No connection established with " |
1722 | "%u.%u seconds, giving up and returning errors.\n", | 1729 | "node %u after %u.%u seconds, giving up.\n", |
1723 | o2net_num_from_nn(nn), | 1730 | o2net_num_from_nn(nn), |
1724 | o2net_idle_timeout() / 1000, | 1731 | o2net_idle_timeout() / 1000, |
1725 | o2net_idle_timeout() % 1000); | 1732 | o2net_idle_timeout() % 1000); |
@@ -1862,21 +1869,21 @@ static int o2net_accept_one(struct socket *sock) | |||
1862 | 1869 | ||
1863 | node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); | 1870 | node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); |
1864 | if (node == NULL) { | 1871 | if (node == NULL) { |
1865 | mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n", | 1872 | printk(KERN_NOTICE "o2net: Attempt to connect from unknown " |
1866 | &sin.sin_addr.s_addr, ntohs(sin.sin_port)); | 1873 | "node at %pI4:%d\n", &sin.sin_addr.s_addr, |
1874 | ntohs(sin.sin_port)); | ||
1867 | ret = -EINVAL; | 1875 | ret = -EINVAL; |
1868 | goto out; | 1876 | goto out; |
1869 | } | 1877 | } |
1870 | 1878 | ||
1871 | if (o2nm_this_node() >= node->nd_num) { | 1879 | if (o2nm_this_node() >= node->nd_num) { |
1872 | local_node = o2nm_get_node_by_num(o2nm_this_node()); | 1880 | local_node = o2nm_get_node_by_num(o2nm_this_node()); |
1873 | mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" | 1881 | printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " |
1874 | "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", | 1882 | "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " |
1875 | local_node->nd_name, local_node->nd_num, | 1883 | "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, |
1876 | &(local_node->nd_ipv4_address), | 1884 | &(local_node->nd_ipv4_address), |
1877 | ntohs(local_node->nd_ipv4_port), | 1885 | ntohs(local_node->nd_ipv4_port), node->nd_name, |
1878 | node->nd_name, node->nd_num, &sin.sin_addr.s_addr, | 1886 | node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); |
1879 | ntohs(sin.sin_port)); | ||
1880 | ret = -EINVAL; | 1887 | ret = -EINVAL; |
1881 | goto out; | 1888 | goto out; |
1882 | } | 1889 | } |
@@ -1901,10 +1908,10 @@ static int o2net_accept_one(struct socket *sock) | |||
1901 | ret = 0; | 1908 | ret = 0; |
1902 | spin_unlock(&nn->nn_lock); | 1909 | spin_unlock(&nn->nn_lock); |
1903 | if (ret) { | 1910 | if (ret) { |
1904 | mlog(ML_NOTICE, "attempt to connect from node '%s' at " | 1911 | printk(KERN_NOTICE "o2net: Attempt to connect from node '%s' " |
1905 | "%pI4:%d but it already has an open connection\n", | 1912 | "at %pI4:%d but it already has an open connection\n", |
1906 | node->nd_name, &sin.sin_addr.s_addr, | 1913 | node->nd_name, &sin.sin_addr.s_addr, |
1907 | ntohs(sin.sin_port)); | 1914 | ntohs(sin.sin_port)); |
1908 | goto out; | 1915 | goto out; |
1909 | } | 1916 | } |
1910 | 1917 | ||
@@ -1984,7 +1991,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) | |||
1984 | 1991 | ||
1985 | ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); | 1992 | ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); |
1986 | if (ret < 0) { | 1993 | if (ret < 0) { |
1987 | mlog(ML_ERROR, "unable to create socket, ret=%d\n", ret); | 1994 | printk(KERN_ERR "o2net: Error %d while creating socket\n", ret); |
1988 | goto out; | 1995 | goto out; |
1989 | } | 1996 | } |
1990 | 1997 | ||
@@ -2001,16 +2008,15 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) | |||
2001 | sock->sk->sk_reuse = 1; | 2008 | sock->sk->sk_reuse = 1; |
2002 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); | 2009 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); |
2003 | if (ret < 0) { | 2010 | if (ret < 0) { |
2004 | mlog(ML_ERROR, "unable to bind socket at %pI4:%u, " | 2011 | printk(KERN_ERR "o2net: Error %d while binding socket at " |
2005 | "ret=%d\n", &addr, ntohs(port), ret); | 2012 | "%pI4:%u\n", ret, &addr, ntohs(port)); |
2006 | goto out; | 2013 | goto out; |
2007 | } | 2014 | } |
2008 | 2015 | ||
2009 | ret = sock->ops->listen(sock, 64); | 2016 | ret = sock->ops->listen(sock, 64); |
2010 | if (ret < 0) { | 2017 | if (ret < 0) |
2011 | mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n", | 2018 | printk(KERN_ERR "o2net: Error %d while listening on %pI4:%u\n", |
2012 | &addr, ntohs(port), ret); | 2019 | ret, &addr, ntohs(port)); |
2013 | } | ||
2014 | 2020 | ||
2015 | out: | 2021 | out: |
2016 | if (ret) { | 2022 | if (ret) { |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index fd6179eb26d..5bada2a69b5 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -106,6 +106,8 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | |||
106 | struct list_head *unreg_list); | 106 | struct list_head *unreg_list); |
107 | void o2net_unregister_handler_list(struct list_head *list); | 107 | void o2net_unregister_handler_list(struct list_head *list); |
108 | 108 | ||
109 | void o2net_fill_node_map(unsigned long *map, unsigned bytes); | ||
110 | |||
109 | struct o2nm_node; | 111 | struct o2nm_node; |
110 | int o2net_register_hb_callbacks(void); | 112 | int o2net_register_hb_callbacks(void); |
111 | void o2net_unregister_hb_callbacks(void); | 113 | void o2net_unregister_hb_callbacks(void); |