aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/cluster')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c194
-rw-r--r--fs/ocfs2/cluster/netdebug.c102
-rw-r--r--fs/ocfs2/cluster/tcp.c138
-rw-r--r--fs/ocfs2/cluster/tcp.h2
4 files changed, 265 insertions, 171 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9a3e6bbff27..a4e855e3690 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -216,6 +216,7 @@ struct o2hb_region {
216 216
217 struct list_head hr_all_item; 217 struct list_head hr_all_item;
218 unsigned hr_unclean_stop:1, 218 unsigned hr_unclean_stop:1,
219 hr_aborted_start:1,
219 hr_item_pinned:1, 220 hr_item_pinned:1,
220 hr_item_dropped:1; 221 hr_item_dropped:1;
221 222
@@ -254,6 +255,10 @@ struct o2hb_region {
254 * a more complete api that doesn't lead to this sort of fragility. */ 255 * a more complete api that doesn't lead to this sort of fragility. */
255 atomic_t hr_steady_iterations; 256 atomic_t hr_steady_iterations;
256 257
258 /* terminate o2hb thread if it does not reach steady state
259 * (hr_steady_iterations == 0) within hr_unsteady_iterations */
260 atomic_t hr_unsteady_iterations;
261
257 char hr_dev_name[BDEVNAME_SIZE]; 262 char hr_dev_name[BDEVNAME_SIZE];
258 263
259 unsigned int hr_timeout_ms; 264 unsigned int hr_timeout_ms;
@@ -324,6 +329,10 @@ static void o2hb_write_timeout(struct work_struct *work)
324 329
325static void o2hb_arm_write_timeout(struct o2hb_region *reg) 330static void o2hb_arm_write_timeout(struct o2hb_region *reg)
326{ 331{
332 /* Arm writeout only after thread reaches steady state */
333 if (atomic_read(&reg->hr_steady_iterations) != 0)
334 return;
335
327 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", 336 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
328 O2HB_MAX_WRITE_TIMEOUT_MS); 337 O2HB_MAX_WRITE_TIMEOUT_MS);
329 338
@@ -537,9 +546,14 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
537 return read == computed; 546 return read == computed;
538} 547}
539 548
540/* We want to make sure that nobody is heartbeating on top of us -- 549/*
541 * this will help detect an invalid configuration. */ 550 * Compare the slot data with what we wrote in the last iteration.
542static void o2hb_check_last_timestamp(struct o2hb_region *reg) 551 * If the match fails, print an appropriate error message. This is to
552 * detect errors like... another node hearting on the same slot,
553 * flaky device that is losing writes, etc.
554 * Returns 1 if check succeeds, 0 otherwise.
555 */
556static int o2hb_check_own_slot(struct o2hb_region *reg)
543{ 557{
544 struct o2hb_disk_slot *slot; 558 struct o2hb_disk_slot *slot;
545 struct o2hb_disk_heartbeat_block *hb_block; 559 struct o2hb_disk_heartbeat_block *hb_block;
@@ -548,13 +562,13 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg)
548 slot = &reg->hr_slots[o2nm_this_node()]; 562 slot = &reg->hr_slots[o2nm_this_node()];
549 /* Don't check on our 1st timestamp */ 563 /* Don't check on our 1st timestamp */
550 if (!slot->ds_last_time) 564 if (!slot->ds_last_time)
551 return; 565 return 0;
552 566
553 hb_block = slot->ds_raw_block; 567 hb_block = slot->ds_raw_block;
554 if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && 568 if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
555 le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && 569 le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
556 hb_block->hb_node == slot->ds_node_num) 570 hb_block->hb_node == slot->ds_node_num)
557 return; 571 return 1;
558 572
559#define ERRSTR1 "Another node is heartbeating on device" 573#define ERRSTR1 "Another node is heartbeating on device"
560#define ERRSTR2 "Heartbeat generation mismatch on device" 574#define ERRSTR2 "Heartbeat generation mismatch on device"
@@ -574,6 +588,8 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg)
574 (unsigned long long)slot->ds_last_time, hb_block->hb_node, 588 (unsigned long long)slot->ds_last_time, hb_block->hb_node,
575 (unsigned long long)le64_to_cpu(hb_block->hb_generation), 589 (unsigned long long)le64_to_cpu(hb_block->hb_generation),
576 (unsigned long long)le64_to_cpu(hb_block->hb_seq)); 590 (unsigned long long)le64_to_cpu(hb_block->hb_seq));
591
592 return 0;
577} 593}
578 594
579static inline void o2hb_prepare_block(struct o2hb_region *reg, 595static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -719,17 +735,24 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
719 o2nm_node_put(node); 735 o2nm_node_put(node);
720} 736}
721 737
722static void o2hb_set_quorum_device(struct o2hb_region *reg, 738static void o2hb_set_quorum_device(struct o2hb_region *reg)
723 struct o2hb_disk_slot *slot)
724{ 739{
725 assert_spin_locked(&o2hb_live_lock);
726
727 if (!o2hb_global_heartbeat_active()) 740 if (!o2hb_global_heartbeat_active())
728 return; 741 return;
729 742
730 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) 743 /* Prevent race with o2hb_heartbeat_group_drop_item() */
744 if (kthread_should_stop())
745 return;
746
747 /* Tag region as quorum only after thread reaches steady state */
748 if (atomic_read(&reg->hr_steady_iterations) != 0)
731 return; 749 return;
732 750
751 spin_lock(&o2hb_live_lock);
752
753 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
754 goto unlock;
755
733 /* 756 /*
734 * A region can be added to the quorum only when it sees all 757 * A region can be added to the quorum only when it sees all
735 * live nodes heartbeat on it. In other words, the region has been 758 * live nodes heartbeat on it. In other words, the region has been
@@ -737,13 +760,10 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
737 */ 760 */
738 if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, 761 if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
739 sizeof(o2hb_live_node_bitmap))) 762 sizeof(o2hb_live_node_bitmap)))
740 return; 763 goto unlock;
741
742 if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD)
743 return;
744 764
745 printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", 765 printk(KERN_NOTICE "o2hb: Region %s (%s) is now a quorum device\n",
746 config_item_name(&reg->hr_item)); 766 config_item_name(&reg->hr_item), reg->hr_dev_name);
747 767
748 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); 768 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
749 769
@@ -754,6 +774,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
754 if (o2hb_pop_count(&o2hb_quorum_region_bitmap, 774 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
755 O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) 775 O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
756 o2hb_region_unpin(NULL); 776 o2hb_region_unpin(NULL);
777unlock:
778 spin_unlock(&o2hb_live_lock);
757} 779}
758 780
759static int o2hb_check_slot(struct o2hb_region *reg, 781static int o2hb_check_slot(struct o2hb_region *reg,
@@ -925,8 +947,6 @@ fire_callbacks:
925 slot->ds_equal_samples = 0; 947 slot->ds_equal_samples = 0;
926 } 948 }
927out: 949out:
928 o2hb_set_quorum_device(reg, slot);
929
930 spin_unlock(&o2hb_live_lock); 950 spin_unlock(&o2hb_live_lock);
931 951
932 o2hb_run_event_list(&event); 952 o2hb_run_event_list(&event);
@@ -957,7 +977,8 @@ static int o2hb_highest_node(unsigned long *nodes,
957 977
958static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) 978static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
959{ 979{
960 int i, ret, highest_node, change = 0; 980 int i, ret, highest_node;
981 int membership_change = 0, own_slot_ok = 0;
961 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 982 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
962 unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 983 unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
963 struct o2hb_bio_wait_ctxt write_wc; 984 struct o2hb_bio_wait_ctxt write_wc;
@@ -966,7 +987,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
966 sizeof(configured_nodes)); 987 sizeof(configured_nodes));
967 if (ret) { 988 if (ret) {
968 mlog_errno(ret); 989 mlog_errno(ret);
969 return ret; 990 goto bail;
970 } 991 }
971 992
972 /* 993 /*
@@ -982,8 +1003,9 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
982 1003
983 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); 1004 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
984 if (highest_node >= O2NM_MAX_NODES) { 1005 if (highest_node >= O2NM_MAX_NODES) {
985 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); 1006 mlog(ML_NOTICE, "o2hb: No configured nodes found!\n");
986 return -EINVAL; 1007 ret = -EINVAL;
1008 goto bail;
987 } 1009 }
988 1010
989 /* No sense in reading the slots of nodes that don't exist 1011 /* No sense in reading the slots of nodes that don't exist
@@ -993,29 +1015,27 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
993 ret = o2hb_read_slots(reg, highest_node + 1); 1015 ret = o2hb_read_slots(reg, highest_node + 1);
994 if (ret < 0) { 1016 if (ret < 0) {
995 mlog_errno(ret); 1017 mlog_errno(ret);
996 return ret; 1018 goto bail;
997 } 1019 }
998 1020
999 /* With an up to date view of the slots, we can check that no 1021 /* With an up to date view of the slots, we can check that no
1000 * other node has been improperly configured to heartbeat in 1022 * other node has been improperly configured to heartbeat in
1001 * our slot. */ 1023 * our slot. */
1002 o2hb_check_last_timestamp(reg); 1024 own_slot_ok = o2hb_check_own_slot(reg);
1003 1025
1004 /* fill in the proper info for our next heartbeat */ 1026 /* fill in the proper info for our next heartbeat */
1005 o2hb_prepare_block(reg, reg->hr_generation); 1027 o2hb_prepare_block(reg, reg->hr_generation);
1006 1028
1007 /* And fire off the write. Note that we don't wait on this I/O
1008 * until later. */
1009 ret = o2hb_issue_node_write(reg, &write_wc); 1029 ret = o2hb_issue_node_write(reg, &write_wc);
1010 if (ret < 0) { 1030 if (ret < 0) {
1011 mlog_errno(ret); 1031 mlog_errno(ret);
1012 return ret; 1032 goto bail;
1013 } 1033 }
1014 1034
1015 i = -1; 1035 i = -1;
1016 while((i = find_next_bit(configured_nodes, 1036 while((i = find_next_bit(configured_nodes,
1017 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { 1037 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
1018 change |= o2hb_check_slot(reg, &reg->hr_slots[i]); 1038 membership_change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
1019 } 1039 }
1020 1040
1021 /* 1041 /*
@@ -1030,18 +1050,39 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
1030 * disk */ 1050 * disk */
1031 mlog(ML_ERROR, "Write error %d on device \"%s\"\n", 1051 mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
1032 write_wc.wc_error, reg->hr_dev_name); 1052 write_wc.wc_error, reg->hr_dev_name);
1033 return write_wc.wc_error; 1053 ret = write_wc.wc_error;
1054 goto bail;
1034 } 1055 }
1035 1056
1036 o2hb_arm_write_timeout(reg); 1057 /* Skip disarming the timeout if own slot has stale/bad data */
1058 if (own_slot_ok) {
1059 o2hb_set_quorum_device(reg);
1060 o2hb_arm_write_timeout(reg);
1061 }
1037 1062
1063bail:
1038 /* let the person who launched us know when things are steady */ 1064 /* let the person who launched us know when things are steady */
1039 if (!change && (atomic_read(&reg->hr_steady_iterations) != 0)) { 1065 if (atomic_read(&reg->hr_steady_iterations) != 0) {
1040 if (atomic_dec_and_test(&reg->hr_steady_iterations)) 1066 if (!ret && own_slot_ok && !membership_change) {
1067 if (atomic_dec_and_test(&reg->hr_steady_iterations))
1068 wake_up(&o2hb_steady_queue);
1069 }
1070 }
1071
1072 if (atomic_read(&reg->hr_steady_iterations) != 0) {
1073 if (atomic_dec_and_test(&reg->hr_unsteady_iterations)) {
1074 printk(KERN_NOTICE "o2hb: Unable to stabilize "
1075 "heartbeart on region %s (%s)\n",
1076 config_item_name(&reg->hr_item),
1077 reg->hr_dev_name);
1078 atomic_set(&reg->hr_steady_iterations, 0);
1079 reg->hr_aborted_start = 1;
1041 wake_up(&o2hb_steady_queue); 1080 wake_up(&o2hb_steady_queue);
1081 ret = -EIO;
1082 }
1042 } 1083 }
1043 1084
1044 return 0; 1085 return ret;
1045} 1086}
1046 1087
1047/* Subtract b from a, storing the result in a. a *must* have a larger 1088/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -1095,7 +1136,8 @@ static int o2hb_thread(void *data)
1095 /* Pin node */ 1136 /* Pin node */
1096 o2nm_depend_this_node(); 1137 o2nm_depend_this_node();
1097 1138
1098 while (!kthread_should_stop() && !reg->hr_unclean_stop) { 1139 while (!kthread_should_stop() &&
1140 !reg->hr_unclean_stop && !reg->hr_aborted_start) {
1099 /* We track the time spent inside 1141 /* We track the time spent inside
1100 * o2hb_do_disk_heartbeat so that we avoid more than 1142 * o2hb_do_disk_heartbeat so that we avoid more than
1101 * hr_timeout_ms between disk writes. On busy systems 1143 * hr_timeout_ms between disk writes. On busy systems
@@ -1103,10 +1145,7 @@ static int o2hb_thread(void *data)
1103 * likely to time itself out. */ 1145 * likely to time itself out. */
1104 do_gettimeofday(&before_hb); 1146 do_gettimeofday(&before_hb);
1105 1147
1106 i = 0; 1148 ret = o2hb_do_disk_heartbeat(reg);
1107 do {
1108 ret = o2hb_do_disk_heartbeat(reg);
1109 } while (ret && ++i < 2);
1110 1149
1111 do_gettimeofday(&after_hb); 1150 do_gettimeofday(&after_hb);
1112 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 1151 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
@@ -1117,7 +1156,8 @@ static int o2hb_thread(void *data)
1117 after_hb.tv_sec, (unsigned long) after_hb.tv_usec, 1156 after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
1118 elapsed_msec); 1157 elapsed_msec);
1119 1158
1120 if (elapsed_msec < reg->hr_timeout_ms) { 1159 if (!kthread_should_stop() &&
1160 elapsed_msec < reg->hr_timeout_ms) {
1121 /* the kthread api has blocked signals for us so no 1161 /* the kthread api has blocked signals for us so no
1122 * need to record the return value. */ 1162 * need to record the return value. */
1123 msleep_interruptible(reg->hr_timeout_ms - elapsed_msec); 1163 msleep_interruptible(reg->hr_timeout_ms - elapsed_msec);
@@ -1134,20 +1174,20 @@ static int o2hb_thread(void *data)
1134 * to timeout on this region when we could just as easily 1174 * to timeout on this region when we could just as easily
1135 * write a clear generation - thus indicating to them that 1175 * write a clear generation - thus indicating to them that
1136 * this node has left this region. 1176 * this node has left this region.
1137 * 1177 */
1138 * XXX: Should we skip this on unclean_stop? */ 1178 if (!reg->hr_unclean_stop && !reg->hr_aborted_start) {
1139 o2hb_prepare_block(reg, 0); 1179 o2hb_prepare_block(reg, 0);
1140 ret = o2hb_issue_node_write(reg, &write_wc); 1180 ret = o2hb_issue_node_write(reg, &write_wc);
1141 if (ret == 0) { 1181 if (ret == 0)
1142 o2hb_wait_on_io(reg, &write_wc); 1182 o2hb_wait_on_io(reg, &write_wc);
1143 } else { 1183 else
1144 mlog_errno(ret); 1184 mlog_errno(ret);
1145 } 1185 }
1146 1186
1147 /* Unpin node */ 1187 /* Unpin node */
1148 o2nm_undepend_this_node(); 1188 o2nm_undepend_this_node();
1149 1189
1150 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); 1190 mlog(ML_HEARTBEAT|ML_KTHREAD, "o2hb thread exiting\n");
1151 1191
1152 return 0; 1192 return 0;
1153} 1193}
@@ -1158,6 +1198,7 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
1158 struct o2hb_debug_buf *db = inode->i_private; 1198 struct o2hb_debug_buf *db = inode->i_private;
1159 struct o2hb_region *reg; 1199 struct o2hb_region *reg;
1160 unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 1200 unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
1201 unsigned long lts;
1161 char *buf = NULL; 1202 char *buf = NULL;
1162 int i = -1; 1203 int i = -1;
1163 int out = 0; 1204 int out = 0;
@@ -1194,9 +1235,11 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
1194 1235
1195 case O2HB_DB_TYPE_REGION_ELAPSED_TIME: 1236 case O2HB_DB_TYPE_REGION_ELAPSED_TIME:
1196 reg = (struct o2hb_region *)db->db_data; 1237 reg = (struct o2hb_region *)db->db_data;
1197 out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", 1238 lts = reg->hr_last_timeout_start;
1198 jiffies_to_msecs(jiffies - 1239 /* If 0, it has never been set before */
1199 reg->hr_last_timeout_start)); 1240 if (lts)
1241 lts = jiffies_to_msecs(jiffies - lts);
1242 out += snprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts);
1200 goto done; 1243 goto done;
1201 1244
1202 case O2HB_DB_TYPE_REGION_PINNED: 1245 case O2HB_DB_TYPE_REGION_PINNED:
@@ -1426,6 +1469,8 @@ static void o2hb_region_release(struct config_item *item)
1426 struct page *page; 1469 struct page *page;
1427 struct o2hb_region *reg = to_o2hb_region(item); 1470 struct o2hb_region *reg = to_o2hb_region(item);
1428 1471
1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);
1473
1429 if (reg->hr_tmp_block) 1474 if (reg->hr_tmp_block)
1430 kfree(reg->hr_tmp_block); 1475 kfree(reg->hr_tmp_block);
1431 1476
@@ -1792,7 +1837,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1792 live_threshold <<= 1; 1837 live_threshold <<= 1;
1793 spin_unlock(&o2hb_live_lock); 1838 spin_unlock(&o2hb_live_lock);
1794 } 1839 }
1795 atomic_set(&reg->hr_steady_iterations, live_threshold + 1); 1840 ++live_threshold;
1841 atomic_set(&reg->hr_steady_iterations, live_threshold);
1842 /* unsteady_iterations is double the steady_iterations */
1843 atomic_set(&reg->hr_unsteady_iterations, (live_threshold << 1));
1796 1844
1797 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", 1845 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
1798 reg->hr_item.ci_name); 1846 reg->hr_item.ci_name);
@@ -1809,14 +1857,12 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1809 ret = wait_event_interruptible(o2hb_steady_queue, 1857 ret = wait_event_interruptible(o2hb_steady_queue,
1810 atomic_read(&reg->hr_steady_iterations) == 0); 1858 atomic_read(&reg->hr_steady_iterations) == 0);
1811 if (ret) { 1859 if (ret) {
1812 /* We got interrupted (hello ptrace!). Clean up */ 1860 atomic_set(&reg->hr_steady_iterations, 0);
1813 spin_lock(&o2hb_live_lock); 1861 reg->hr_aborted_start = 1;
1814 hb_task = reg->hr_task; 1862 }
1815 reg->hr_task = NULL;
1816 spin_unlock(&o2hb_live_lock);
1817 1863
1818 if (hb_task) 1864 if (reg->hr_aborted_start) {
1819 kthread_stop(hb_task); 1865 ret = -EIO;
1820 goto out; 1866 goto out;
1821 } 1867 }
1822 1868
@@ -1833,8 +1879,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1833 ret = -EIO; 1879 ret = -EIO;
1834 1880
1835 if (hb_task && o2hb_global_heartbeat_active()) 1881 if (hb_task && o2hb_global_heartbeat_active())
1836 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", 1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
1837 config_item_name(&reg->hr_item)); 1883 config_item_name(&reg->hr_item), reg->hr_dev_name);
1838 1884
1839out: 1885out:
1840 if (filp) 1886 if (filp)
@@ -2092,13 +2138,6 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2092 2138
2093 /* stop the thread when the user removes the region dir */ 2139 /* stop the thread when the user removes the region dir */
2094 spin_lock(&o2hb_live_lock); 2140 spin_lock(&o2hb_live_lock);
2095 if (o2hb_global_heartbeat_active()) {
2096 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
2097 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
2098 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
2099 quorum_region = 1;
2100 clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
2101 }
2102 hb_task = reg->hr_task; 2141 hb_task = reg->hr_task;
2103 reg->hr_task = NULL; 2142 reg->hr_task = NULL;
2104 reg->hr_item_dropped = 1; 2143 reg->hr_item_dropped = 1;
@@ -2107,19 +2146,30 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2107 if (hb_task) 2146 if (hb_task)
2108 kthread_stop(hb_task); 2147 kthread_stop(hb_task);
2109 2148
2149 if (o2hb_global_heartbeat_active()) {
2150 spin_lock(&o2hb_live_lock);
2151 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
2152 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
2153 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
2154 quorum_region = 1;
2155 clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
2156 spin_unlock(&o2hb_live_lock);
2157 printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%s)\n",
2158 ((atomic_read(&reg->hr_steady_iterations) == 0) ?
2159 "stopped" : "start aborted"), config_item_name(item),
2160 reg->hr_dev_name);
2161 }
2162
2110 /* 2163 /*
2111 * If we're racing a dev_write(), we need to wake them. They will 2164 * If we're racing a dev_write(), we need to wake them. They will
2112 * check reg->hr_task 2165 * check reg->hr_task
2113 */ 2166 */
2114 if (atomic_read(&reg->hr_steady_iterations) != 0) { 2167 if (atomic_read(&reg->hr_steady_iterations) != 0) {
2168 reg->hr_aborted_start = 1;
2115 atomic_set(&reg->hr_steady_iterations, 0); 2169 atomic_set(&reg->hr_steady_iterations, 0);
2116 wake_up(&o2hb_steady_queue); 2170 wake_up(&o2hb_steady_queue);
2117 } 2171 }
2118 2172
2119 if (o2hb_global_heartbeat_active())
2120 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
2121 config_item_name(&reg->hr_item));
2122
2123 config_item_put(item); 2173 config_item_put(item);
2124 2174
2125 if (!o2hb_global_heartbeat_active() || !quorum_region) 2175 if (!o2hb_global_heartbeat_active() || !quorum_region)
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 3a5835904b3..dc45deb19e6 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -47,6 +47,7 @@
47#define SC_DEBUG_NAME "sock_containers" 47#define SC_DEBUG_NAME "sock_containers"
48#define NST_DEBUG_NAME "send_tracking" 48#define NST_DEBUG_NAME "send_tracking"
49#define STATS_DEBUG_NAME "stats" 49#define STATS_DEBUG_NAME "stats"
50#define NODES_DEBUG_NAME "connected_nodes"
50 51
51#define SHOW_SOCK_CONTAINERS 0 52#define SHOW_SOCK_CONTAINERS 0
52#define SHOW_SOCK_STATS 1 53#define SHOW_SOCK_STATS 1
@@ -55,6 +56,7 @@ static struct dentry *o2net_dentry;
55static struct dentry *sc_dentry; 56static struct dentry *sc_dentry;
56static struct dentry *nst_dentry; 57static struct dentry *nst_dentry;
57static struct dentry *stats_dentry; 58static struct dentry *stats_dentry;
59static struct dentry *nodes_dentry;
58 60
59static DEFINE_SPINLOCK(o2net_debug_lock); 61static DEFINE_SPINLOCK(o2net_debug_lock);
60 62
@@ -491,53 +493,87 @@ static const struct file_operations sc_seq_fops = {
491 .release = sc_fop_release, 493 .release = sc_fop_release,
492}; 494};
493 495
494int o2net_debugfs_init(void) 496static int o2net_fill_bitmap(char *buf, int len)
495{ 497{
496 o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); 498 unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
497 if (!o2net_dentry) { 499 int i = -1, out = 0;
498 mlog_errno(-ENOMEM);
499 goto bail;
500 }
501 500
502 nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR, 501 o2net_fill_node_map(map, sizeof(map));
503 o2net_dentry, NULL,
504 &nst_seq_fops);
505 if (!nst_dentry) {
506 mlog_errno(-ENOMEM);
507 goto bail;
508 }
509 502
510 sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR, 503 while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
511 o2net_dentry, NULL, 504 out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
512 &sc_seq_fops); 505 out += snprintf(buf + out, PAGE_SIZE - out, "\n");
513 if (!sc_dentry) {
514 mlog_errno(-ENOMEM);
515 goto bail;
516 }
517 506
518 stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR, 507 return out;
519 o2net_dentry, NULL, 508}
520 &stats_seq_fops); 509
521 if (!stats_dentry) { 510static int nodes_fop_open(struct inode *inode, struct file *file)
522 mlog_errno(-ENOMEM); 511{
523 goto bail; 512 char *buf;
524 } 513
514 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
515 if (!buf)
516 return -ENOMEM;
517
518 i_size_write(inode, o2net_fill_bitmap(buf, PAGE_SIZE));
519
520 file->private_data = buf;
525 521
526 return 0; 522 return 0;
527bail:
528 debugfs_remove(stats_dentry);
529 debugfs_remove(sc_dentry);
530 debugfs_remove(nst_dentry);
531 debugfs_remove(o2net_dentry);
532 return -ENOMEM;
533} 523}
534 524
525static int o2net_debug_release(struct inode *inode, struct file *file)
526{
527 kfree(file->private_data);
528 return 0;
529}
530
531static ssize_t o2net_debug_read(struct file *file, char __user *buf,
532 size_t nbytes, loff_t *ppos)
533{
534 return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
535 i_size_read(file->f_mapping->host));
536}
537
538static const struct file_operations nodes_fops = {
539 .open = nodes_fop_open,
540 .release = o2net_debug_release,
541 .read = o2net_debug_read,
542 .llseek = generic_file_llseek,
543};
544
535void o2net_debugfs_exit(void) 545void o2net_debugfs_exit(void)
536{ 546{
547 debugfs_remove(nodes_dentry);
537 debugfs_remove(stats_dentry); 548 debugfs_remove(stats_dentry);
538 debugfs_remove(sc_dentry); 549 debugfs_remove(sc_dentry);
539 debugfs_remove(nst_dentry); 550 debugfs_remove(nst_dentry);
540 debugfs_remove(o2net_dentry); 551 debugfs_remove(o2net_dentry);
541} 552}
542 553
554int o2net_debugfs_init(void)
555{
556 mode_t mode = S_IFREG|S_IRUSR;
557
558 o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
559 if (o2net_dentry)
560 nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode,
561 o2net_dentry, NULL, &nst_seq_fops);
562 if (nst_dentry)
563 sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode,
564 o2net_dentry, NULL, &sc_seq_fops);
565 if (sc_dentry)
566 stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode,
567 o2net_dentry, NULL, &stats_seq_fops);
568 if (stats_dentry)
569 nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode,
570 o2net_dentry, NULL, &nodes_fops);
571 if (nodes_dentry)
572 return 0;
573
574 o2net_debugfs_exit();
575 mlog_errno(-ENOMEM);
576 return -ENOMEM;
577}
578
543#endif /* CONFIG_DEBUG_FS */ 579#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ad7d0c155de..044e7b58d31 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -546,7 +546,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
546 } 546 }
547 547
548 if (was_valid && !valid) { 548 if (was_valid && !valid) {
549 printk(KERN_NOTICE "o2net: no longer connected to " 549 printk(KERN_NOTICE "o2net: No longer connected to "
550 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); 550 SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
551 o2net_complete_nodes_nsw(nn); 551 o2net_complete_nodes_nsw(nn);
552 } 552 }
@@ -556,7 +556,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
556 cancel_delayed_work(&nn->nn_connect_expired); 556 cancel_delayed_work(&nn->nn_connect_expired);
557 printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", 557 printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n",
558 o2nm_this_node() > sc->sc_node->nd_num ? 558 o2nm_this_node() > sc->sc_node->nd_num ?
559 "connected to" : "accepted connection from", 559 "Connected to" : "Accepted connection from",
560 SC_NODEF_ARGS(sc)); 560 SC_NODEF_ARGS(sc));
561 } 561 }
562 562
@@ -644,7 +644,7 @@ static void o2net_state_change(struct sock *sk)
644 o2net_sc_queue_work(sc, &sc->sc_connect_work); 644 o2net_sc_queue_work(sc, &sc->sc_connect_work);
645 break; 645 break;
646 default: 646 default:
647 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT 647 printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT
648 " shutdown, state %d\n", 648 " shutdown, state %d\n",
649 SC_NODEF_ARGS(sc), sk->sk_state); 649 SC_NODEF_ARGS(sc), sk->sk_state);
650 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 650 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
@@ -1035,6 +1035,25 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
1035 return ret; 1035 return ret;
1036} 1036}
1037 1037
1038/* Get a map of all nodes to which this node is currently connected to */
1039void o2net_fill_node_map(unsigned long *map, unsigned bytes)
1040{
1041 struct o2net_sock_container *sc;
1042 int node, ret;
1043
1044 BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long)));
1045
1046 memset(map, 0, bytes);
1047 for (node = 0; node < O2NM_MAX_NODES; ++node) {
1048 o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret);
1049 if (!ret) {
1050 set_bit(node, map);
1051 sc_put(sc);
1052 }
1053 }
1054}
1055EXPORT_SYMBOL_GPL(o2net_fill_node_map);
1056
1038int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, 1057int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
1039 size_t caller_veclen, u8 target_node, int *status) 1058 size_t caller_veclen, u8 target_node, int *status)
1040{ 1059{
@@ -1285,11 +1304,11 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1285 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 1304 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1286 1305
1287 if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { 1306 if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) {
1288 mlog(ML_NOTICE, SC_NODEF_FMT " advertised net protocol " 1307 printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " Advertised net "
1289 "version %llu but %llu is required, disconnecting\n", 1308 "protocol version %llu but %llu is required. "
1290 SC_NODEF_ARGS(sc), 1309 "Disconnecting.\n", SC_NODEF_ARGS(sc),
1291 (unsigned long long)be64_to_cpu(hand->protocol_version), 1310 (unsigned long long)be64_to_cpu(hand->protocol_version),
1292 O2NET_PROTOCOL_VERSION); 1311 O2NET_PROTOCOL_VERSION);
1293 1312
1294 /* don't bother reconnecting if its the wrong version. */ 1313 /* don't bother reconnecting if its the wrong version. */
1295 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1314 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
@@ -1303,33 +1322,33 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1303 */ 1322 */
1304 if (be32_to_cpu(hand->o2net_idle_timeout_ms) != 1323 if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
1305 o2net_idle_timeout()) { 1324 o2net_idle_timeout()) {
1306 mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " 1325 printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a network "
1307 "%u ms, but we use %u ms locally. disconnecting\n", 1326 "idle timeout of %u ms, but we use %u ms locally. "
1308 SC_NODEF_ARGS(sc), 1327 "Disconnecting.\n", SC_NODEF_ARGS(sc),
1309 be32_to_cpu(hand->o2net_idle_timeout_ms), 1328 be32_to_cpu(hand->o2net_idle_timeout_ms),
1310 o2net_idle_timeout()); 1329 o2net_idle_timeout());
1311 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1330 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1312 return -1; 1331 return -1;
1313 } 1332 }
1314 1333
1315 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != 1334 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
1316 o2net_keepalive_delay()) { 1335 o2net_keepalive_delay()) {
1317 mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " 1336 printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a keepalive "
1318 "%u ms, but we use %u ms locally. disconnecting\n", 1337 "delay of %u ms, but we use %u ms locally. "
1319 SC_NODEF_ARGS(sc), 1338 "Disconnecting.\n", SC_NODEF_ARGS(sc),
1320 be32_to_cpu(hand->o2net_keepalive_delay_ms), 1339 be32_to_cpu(hand->o2net_keepalive_delay_ms),
1321 o2net_keepalive_delay()); 1340 o2net_keepalive_delay());
1322 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1341 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1323 return -1; 1342 return -1;
1324 } 1343 }
1325 1344
1326 if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != 1345 if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) !=
1327 O2HB_MAX_WRITE_TIMEOUT_MS) { 1346 O2HB_MAX_WRITE_TIMEOUT_MS) {
1328 mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " 1347 printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a heartbeat "
1329 "%u ms, but we use %u ms locally. disconnecting\n", 1348 "timeout of %u ms, but we use %u ms locally. "
1330 SC_NODEF_ARGS(sc), 1349 "Disconnecting.\n", SC_NODEF_ARGS(sc),
1331 be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), 1350 be32_to_cpu(hand->o2hb_heartbeat_timeout_ms),
1332 O2HB_MAX_WRITE_TIMEOUT_MS); 1351 O2HB_MAX_WRITE_TIMEOUT_MS);
1333 o2net_ensure_shutdown(nn, sc, -ENOTCONN); 1352 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1334 return -1; 1353 return -1;
1335 } 1354 }
@@ -1540,28 +1559,16 @@ static void o2net_idle_timer(unsigned long data)
1540{ 1559{
1541 struct o2net_sock_container *sc = (struct o2net_sock_container *)data; 1560 struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
1542 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 1561 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1543
1544#ifdef CONFIG_DEBUG_FS 1562#ifdef CONFIG_DEBUG_FS
1545 ktime_t now = ktime_get(); 1563 unsigned long msecs = ktime_to_ms(ktime_get()) -
1564 ktime_to_ms(sc->sc_tv_timer);
1565#else
1566 unsigned long msecs = o2net_idle_timeout();
1546#endif 1567#endif
1547 1568
1548 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1569 printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been "
1549 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1570 "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc),
1550 o2net_idle_timeout() / 1000, 1571 msecs / 1000, msecs % 1000);
1551 o2net_idle_timeout() % 1000);
1552
1553#ifdef CONFIG_DEBUG_FS
1554 mlog(ML_NOTICE, "Here are some times that might help debug the "
1555 "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
1556 "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
1557 (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
1558 (long long)ktime_to_us(sc->sc_tv_data_ready),
1559 (long long)ktime_to_us(sc->sc_tv_advance_start),
1560 (long long)ktime_to_us(sc->sc_tv_advance_stop),
1561 sc->sc_msg_key, sc->sc_msg_type,
1562 (long long)ktime_to_us(sc->sc_tv_func_start),
1563 (long long)ktime_to_us(sc->sc_tv_func_stop));
1564#endif
1565 1572
1566 /* 1573 /*
1567 * Initialize the nn_timeout so that the next connection attempt 1574 * Initialize the nn_timeout so that the next connection attempt
@@ -1694,8 +1701,8 @@ static void o2net_start_connect(struct work_struct *work)
1694 1701
1695out: 1702out:
1696 if (ret) { 1703 if (ret) {
1697 mlog(ML_NOTICE, "connect attempt to " SC_NODEF_FMT " failed " 1704 printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT
1698 "with errno %d\n", SC_NODEF_ARGS(sc), ret); 1705 " failed with errno %d\n", SC_NODEF_ARGS(sc), ret);
1699 /* 0 err so that another will be queued and attempted 1706 /* 0 err so that another will be queued and attempted
1700 * from set_nn_state */ 1707 * from set_nn_state */
1701 if (sc) 1708 if (sc)
@@ -1718,8 +1725,8 @@ static void o2net_connect_expired(struct work_struct *work)
1718 1725
1719 spin_lock(&nn->nn_lock); 1726 spin_lock(&nn->nn_lock);
1720 if (!nn->nn_sc_valid) { 1727 if (!nn->nn_sc_valid) {
1721 mlog(ML_ERROR, "no connection established with node %u after " 1728 printk(KERN_NOTICE "o2net: No connection established with "
1722 "%u.%u seconds, giving up and returning errors.\n", 1729 "node %u after %u.%u seconds, giving up.\n",
1723 o2net_num_from_nn(nn), 1730 o2net_num_from_nn(nn),
1724 o2net_idle_timeout() / 1000, 1731 o2net_idle_timeout() / 1000,
1725 o2net_idle_timeout() % 1000); 1732 o2net_idle_timeout() % 1000);
@@ -1862,21 +1869,21 @@ static int o2net_accept_one(struct socket *sock)
1862 1869
1863 node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); 1870 node = o2nm_get_node_by_ip(sin.sin_addr.s_addr);
1864 if (node == NULL) { 1871 if (node == NULL) {
1865 mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n", 1872 printk(KERN_NOTICE "o2net: Attempt to connect from unknown "
1866 &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 1873 "node at %pI4:%d\n", &sin.sin_addr.s_addr,
1874 ntohs(sin.sin_port));
1867 ret = -EINVAL; 1875 ret = -EINVAL;
1868 goto out; 1876 goto out;
1869 } 1877 }
1870 1878
1871 if (o2nm_this_node() >= node->nd_num) { 1879 if (o2nm_this_node() >= node->nd_num) {
1872 local_node = o2nm_get_node_by_num(o2nm_this_node()); 1880 local_node = o2nm_get_node_by_num(o2nm_this_node());
1873 mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" 1881 printk(KERN_NOTICE "o2net: Unexpected connect attempt seen "
1874 "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", 1882 "at node '%s' (%u, %pI4:%d) from node '%s' (%u, "
1875 local_node->nd_name, local_node->nd_num, 1883 "%pI4:%d)\n", local_node->nd_name, local_node->nd_num,
1876 &(local_node->nd_ipv4_address), 1884 &(local_node->nd_ipv4_address),
1877 ntohs(local_node->nd_ipv4_port), 1885 ntohs(local_node->nd_ipv4_port), node->nd_name,
1878 node->nd_name, node->nd_num, &sin.sin_addr.s_addr, 1886 node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port));
1879 ntohs(sin.sin_port));
1880 ret = -EINVAL; 1887 ret = -EINVAL;
1881 goto out; 1888 goto out;
1882 } 1889 }
@@ -1901,10 +1908,10 @@ static int o2net_accept_one(struct socket *sock)
1901 ret = 0; 1908 ret = 0;
1902 spin_unlock(&nn->nn_lock); 1909 spin_unlock(&nn->nn_lock);
1903 if (ret) { 1910 if (ret) {
1904 mlog(ML_NOTICE, "attempt to connect from node '%s' at " 1911 printk(KERN_NOTICE "o2net: Attempt to connect from node '%s' "
1905 "%pI4:%d but it already has an open connection\n", 1912 "at %pI4:%d but it already has an open connection\n",
1906 node->nd_name, &sin.sin_addr.s_addr, 1913 node->nd_name, &sin.sin_addr.s_addr,
1907 ntohs(sin.sin_port)); 1914 ntohs(sin.sin_port));
1908 goto out; 1915 goto out;
1909 } 1916 }
1910 1917
@@ -1984,7 +1991,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
1984 1991
1985 ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); 1992 ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1986 if (ret < 0) { 1993 if (ret < 0) {
1987 mlog(ML_ERROR, "unable to create socket, ret=%d\n", ret); 1994 printk(KERN_ERR "o2net: Error %d while creating socket\n", ret);
1988 goto out; 1995 goto out;
1989 } 1996 }
1990 1997
@@ -2001,16 +2008,15 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
2001 sock->sk->sk_reuse = 1; 2008 sock->sk->sk_reuse = 1;
2002 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); 2009 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
2003 if (ret < 0) { 2010 if (ret < 0) {
2004 mlog(ML_ERROR, "unable to bind socket at %pI4:%u, " 2011 printk(KERN_ERR "o2net: Error %d while binding socket at "
2005 "ret=%d\n", &addr, ntohs(port), ret); 2012 "%pI4:%u\n", ret, &addr, ntohs(port));
2006 goto out; 2013 goto out;
2007 } 2014 }
2008 2015
2009 ret = sock->ops->listen(sock, 64); 2016 ret = sock->ops->listen(sock, 64);
2010 if (ret < 0) { 2017 if (ret < 0)
2011 mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n", 2018 printk(KERN_ERR "o2net: Error %d while listening on %pI4:%u\n",
2012 &addr, ntohs(port), ret); 2019 ret, &addr, ntohs(port));
2013 }
2014 2020
2015out: 2021out:
2016 if (ret) { 2022 if (ret) {
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index fd6179eb26d..5bada2a69b5 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -106,6 +106,8 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
106 struct list_head *unreg_list); 106 struct list_head *unreg_list);
107void o2net_unregister_handler_list(struct list_head *list); 107void o2net_unregister_handler_list(struct list_head *list);
108 108
109void o2net_fill_node_map(unsigned long *map, unsigned bytes);
110
109struct o2nm_node; 111struct o2nm_node;
110int o2net_register_hb_callbacks(void); 112int o2net_register_hb_callbacks(void);
111void o2net_unregister_hb_callbacks(void); 113void o2net_unregister_hb_callbacks(void);