aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/cluster
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-11 14:28:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-11 14:28:34 -0500
commit498f7f505dc79934c878c7667840c50c64f232fc (patch)
tree67eca6dcb6fe76ec3d2bdef5e3102591fe957776 /fs/ocfs2/cluster
parent0969d11e201b82d30a158ccdb3aca67a7b845613 (diff)
parentd6351db2073315ddebac72cc1935e912f60f86e0 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (22 commits) MAINTAINERS: Update Joel Becker's email address ocfs2: Remove unused truncate function from alloc.c ocfs2/cluster: dereferencing before checking in nst_seq_show() ocfs2: fix build for OCFS2_FS_STATS not enabled ocfs2/cluster: Show o2net timing statistics ocfs2/cluster: Track process message timing stats for each socket ocfs2/cluster: Track send message timing stats for each socket ocfs2/cluster: Use ktime instead of timeval in struct o2net_sock_container ocfs2/cluster: Replace timeval with ktime in struct o2net_send_tracking ocfs2: Add DEBUG_FS dependency ocfs2/dlm: Hard code the values for enums ocfs2/dlm: Minor cleanup ocfs2/dlm: Cleanup dlmdebug.c ocfs2: Release buffer_head in case of error in ocfs2_double_lock. ocfs2/cluster: Pin the local node when o2hb thread starts ocfs2/cluster: Show pin state for each o2hb region ocfs2/cluster: Pin/unpin o2hb regions ocfs2/cluster: Remove dropped region from o2hb quorum region bitmap ocfs2/cluster: Pin the remote node item in configfs ocfs2/dlm: make existing convertion precedent over new lock ...
Diffstat (limited to 'fs/ocfs2/cluster')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c246
-rw-r--r--fs/ocfs2/cluster/netdebug.c286
-rw-r--r--fs/ocfs2/cluster/tcp.c145
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h33
4 files changed, 527 insertions, 183 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9e3d45bcb5fd..a6cc05302e9f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
82#define O2HB_DB_TYPE_REGION_LIVENODES 4 82#define O2HB_DB_TYPE_REGION_LIVENODES 4
83#define O2HB_DB_TYPE_REGION_NUMBER 5 83#define O2HB_DB_TYPE_REGION_NUMBER 5
84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
85#define O2HB_DB_TYPE_REGION_PINNED 7
85struct o2hb_debug_buf { 86struct o2hb_debug_buf {
86 int db_type; 87 int db_type;
87 int db_size; 88 int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
101#define O2HB_DEBUG_FAILEDREGIONS "failed_regions" 102#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
102#define O2HB_DEBUG_REGION_NUMBER "num" 103#define O2HB_DEBUG_REGION_NUMBER "num"
103#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" 104#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
105#define O2HB_DEBUG_REGION_PINNED "pinned"
104 106
105static struct dentry *o2hb_debug_dir; 107static struct dentry *o2hb_debug_dir;
106static struct dentry *o2hb_debug_livenodes; 108static struct dentry *o2hb_debug_livenodes;
@@ -132,6 +134,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
132unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; 134unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
133unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; 135unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
134 136
137/*
138 * o2hb_dependent_users tracks the number of registered callbacks that depend
139 * on heartbeat. o2net and o2dlm are two entities that register this callback.
140 * However only o2dlm depends on the heartbeat. It does not want the heartbeat
141 * to stop while a dlm domain is still active.
142 */
143unsigned int o2hb_dependent_users;
144
145/*
146 * In global heartbeat mode, all regions are pinned if there are one or more
147 * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
148 * regions are unpinned if the region count exceeds the cut off or the number
149 * of dependent users falls to zero.
150 */
151#define O2HB_PIN_CUT_OFF 3
152
153/*
154 * In local heartbeat mode, we assume the dlm domain name to be the same as
155 * region uuid. This is true for domains created for the file system but not
156 * necessarily true for userdlm domains. This is a known limitation.
157 *
158 * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
159 * works for both file system and userdlm domains.
160 */
161static int o2hb_region_pin(const char *region_uuid);
162static void o2hb_region_unpin(const char *region_uuid);
163
135/* Only sets a new threshold if there are no active regions. 164/* Only sets a new threshold if there are no active regions.
136 * 165 *
137 * No locking or otherwise interesting code is required for reading 166 * No locking or otherwise interesting code is required for reading
@@ -186,7 +215,9 @@ struct o2hb_region {
186 struct config_item hr_item; 215 struct config_item hr_item;
187 216
188 struct list_head hr_all_item; 217 struct list_head hr_all_item;
189 unsigned hr_unclean_stop:1; 218 unsigned hr_unclean_stop:1,
219 hr_item_pinned:1,
220 hr_item_dropped:1;
190 221
191 /* protected by the hr_callback_sem */ 222 /* protected by the hr_callback_sem */
192 struct task_struct *hr_task; 223 struct task_struct *hr_task;
@@ -212,9 +243,11 @@ struct o2hb_region {
212 struct dentry *hr_debug_livenodes; 243 struct dentry *hr_debug_livenodes;
213 struct dentry *hr_debug_regnum; 244 struct dentry *hr_debug_regnum;
214 struct dentry *hr_debug_elapsed_time; 245 struct dentry *hr_debug_elapsed_time;
246 struct dentry *hr_debug_pinned;
215 struct o2hb_debug_buf *hr_db_livenodes; 247 struct o2hb_debug_buf *hr_db_livenodes;
216 struct o2hb_debug_buf *hr_db_regnum; 248 struct o2hb_debug_buf *hr_db_regnum;
217 struct o2hb_debug_buf *hr_db_elapsed_time; 249 struct o2hb_debug_buf *hr_db_elapsed_time;
250 struct o2hb_debug_buf *hr_db_pinned;
218 251
219 /* let the person setting up hb wait for it to return until it 252 /* let the person setting up hb wait for it to return until it
220 * has reached a 'steady' state. This will be fixed when we have 253 * has reached a 'steady' state. This will be fixed when we have
@@ -701,6 +734,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
701 config_item_name(&reg->hr_item)); 734 config_item_name(&reg->hr_item));
702 735
703 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); 736 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
737
738 /*
739 * If global heartbeat active, unpin all regions if the
740 * region count > CUT_OFF
741 */
742 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
743 O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
744 o2hb_region_unpin(NULL);
704} 745}
705 746
706static int o2hb_check_slot(struct o2hb_region *reg, 747static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1041,6 +1082,9 @@ static int o2hb_thread(void *data)
1041 1082
1042 set_user_nice(current, -20); 1083 set_user_nice(current, -20);
1043 1084
1085 /* Pin node */
1086 o2nm_depend_this_node();
1087
1044 while (!kthread_should_stop() && !reg->hr_unclean_stop) { 1088 while (!kthread_should_stop() && !reg->hr_unclean_stop) {
1045 /* We track the time spent inside 1089 /* We track the time spent inside
1046 * o2hb_do_disk_heartbeat so that we avoid more than 1090 * o2hb_do_disk_heartbeat so that we avoid more than
@@ -1090,6 +1134,9 @@ static int o2hb_thread(void *data)
1090 mlog_errno(ret); 1134 mlog_errno(ret);
1091 } 1135 }
1092 1136
1137 /* Unpin node */
1138 o2nm_undepend_this_node();
1139
1093 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); 1140 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
1094 1141
1095 return 0; 1142 return 0;
@@ -1142,6 +1189,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
1142 reg->hr_last_timeout_start)); 1189 reg->hr_last_timeout_start));
1143 goto done; 1190 goto done;
1144 1191
1192 case O2HB_DB_TYPE_REGION_PINNED:
1193 reg = (struct o2hb_region *)db->db_data;
1194 out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
1195 !!reg->hr_item_pinned);
1196 goto done;
1197
1145 default: 1198 default:
1146 goto done; 1199 goto done;
1147 } 1200 }
@@ -1315,6 +1368,8 @@ int o2hb_init(void)
1315 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); 1368 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
1316 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); 1369 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
1317 1370
1371 o2hb_dependent_users = 0;
1372
1318 return o2hb_debug_init(); 1373 return o2hb_debug_init();
1319} 1374}
1320 1375
@@ -1384,6 +1439,7 @@ static void o2hb_region_release(struct config_item *item)
1384 debugfs_remove(reg->hr_debug_livenodes); 1439 debugfs_remove(reg->hr_debug_livenodes);
1385 debugfs_remove(reg->hr_debug_regnum); 1440 debugfs_remove(reg->hr_debug_regnum);
1386 debugfs_remove(reg->hr_debug_elapsed_time); 1441 debugfs_remove(reg->hr_debug_elapsed_time);
1442 debugfs_remove(reg->hr_debug_pinned);
1387 debugfs_remove(reg->hr_debug_dir); 1443 debugfs_remove(reg->hr_debug_dir);
1388 1444
1389 spin_lock(&o2hb_live_lock); 1445 spin_lock(&o2hb_live_lock);
@@ -1948,6 +2004,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
1948 goto bail; 2004 goto bail;
1949 } 2005 }
1950 2006
2007 reg->hr_debug_pinned =
2008 o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
2009 reg->hr_debug_dir,
2010 &(reg->hr_db_pinned),
2011 sizeof(*(reg->hr_db_pinned)),
2012 O2HB_DB_TYPE_REGION_PINNED,
2013 0, 0, reg);
2014 if (!reg->hr_debug_pinned) {
2015 mlog_errno(ret);
2016 goto bail;
2017 }
2018
1951 ret = 0; 2019 ret = 0;
1952bail: 2020bail:
1953 return ret; 2021 return ret;
@@ -2002,15 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2002{ 2070{
2003 struct task_struct *hb_task; 2071 struct task_struct *hb_task;
2004 struct o2hb_region *reg = to_o2hb_region(item); 2072 struct o2hb_region *reg = to_o2hb_region(item);
2073 int quorum_region = 0;
2005 2074
2006 /* stop the thread when the user removes the region dir */ 2075 /* stop the thread when the user removes the region dir */
2007 spin_lock(&o2hb_live_lock); 2076 spin_lock(&o2hb_live_lock);
2008 if (o2hb_global_heartbeat_active()) { 2077 if (o2hb_global_heartbeat_active()) {
2009 clear_bit(reg->hr_region_num, o2hb_region_bitmap); 2078 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
2010 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); 2079 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
2080 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
2081 quorum_region = 1;
2082 clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
2011 } 2083 }
2012 hb_task = reg->hr_task; 2084 hb_task = reg->hr_task;
2013 reg->hr_task = NULL; 2085 reg->hr_task = NULL;
2086 reg->hr_item_dropped = 1;
2014 spin_unlock(&o2hb_live_lock); 2087 spin_unlock(&o2hb_live_lock);
2015 2088
2016 if (hb_task) 2089 if (hb_task)
@@ -2028,7 +2101,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2028 if (o2hb_global_heartbeat_active()) 2101 if (o2hb_global_heartbeat_active())
2029 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", 2102 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
2030 config_item_name(&reg->hr_item)); 2103 config_item_name(&reg->hr_item));
2104
2031 config_item_put(item); 2105 config_item_put(item);
2106
2107 if (!o2hb_global_heartbeat_active() || !quorum_region)
2108 return;
2109
2110 /*
2111 * If global heartbeat active and there are dependent users,
2112 * pin all regions if quorum region count <= CUT_OFF
2113 */
2114 spin_lock(&o2hb_live_lock);
2115
2116 if (!o2hb_dependent_users)
2117 goto unlock;
2118
2119 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2120 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2121 o2hb_region_pin(NULL);
2122
2123unlock:
2124 spin_unlock(&o2hb_live_lock);
2032} 2125}
2033 2126
2034struct o2hb_heartbeat_group_attribute { 2127struct o2hb_heartbeat_group_attribute {
@@ -2214,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
2214} 2307}
2215EXPORT_SYMBOL_GPL(o2hb_setup_callback); 2308EXPORT_SYMBOL_GPL(o2hb_setup_callback);
2216 2309
2217static struct o2hb_region *o2hb_find_region(const char *region_uuid) 2310/*
2311 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2312 * In global heartbeat mode, region_uuid passed is NULL.
2313 *
2314 * In local, we only pin the matching region. In global we pin all the active
2315 * regions.
2316 */
2317static int o2hb_region_pin(const char *region_uuid)
2218{ 2318{
2219 struct o2hb_region *p, *reg = NULL; 2319 int ret = 0, found = 0;
2320 struct o2hb_region *reg;
2321 char *uuid;
2220 2322
2221 assert_spin_locked(&o2hb_live_lock); 2323 assert_spin_locked(&o2hb_live_lock);
2222 2324
2223 list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { 2325 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2224 if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { 2326 uuid = config_item_name(&reg->hr_item);
2225 reg = p; 2327
2226 break; 2328 /* local heartbeat */
2329 if (region_uuid) {
2330 if (strcmp(region_uuid, uuid))
2331 continue;
2332 found = 1;
2333 }
2334
2335 if (reg->hr_item_pinned || reg->hr_item_dropped)
2336 goto skip_pin;
2337
2338 /* Ignore ENOENT only for local hb (userdlm domain) */
2339 ret = o2nm_depend_item(&reg->hr_item);
2340 if (!ret) {
2341 mlog(ML_CLUSTER, "Pin region %s\n", uuid);
2342 reg->hr_item_pinned = 1;
2343 } else {
2344 if (ret == -ENOENT && found)
2345 ret = 0;
2346 else {
2347 mlog(ML_ERROR, "Pin region %s fails with %d\n",
2348 uuid, ret);
2349 break;
2350 }
2227 } 2351 }
2352skip_pin:
2353 if (found)
2354 break;
2228 } 2355 }
2229 2356
2230 return reg; 2357 return ret;
2231} 2358}
2232 2359
2233static int o2hb_region_get(const char *region_uuid) 2360/*
2361 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2362 * In global heartbeat mode, region_uuid passed is NULL.
2363 *
2364 * In local, we only unpin the matching region. In global we unpin all the
2365 * active regions.
2366 */
2367static void o2hb_region_unpin(const char *region_uuid)
2234{ 2368{
2235 int ret = 0;
2236 struct o2hb_region *reg; 2369 struct o2hb_region *reg;
2370 char *uuid;
2371 int found = 0;
2237 2372
2238 spin_lock(&o2hb_live_lock); 2373 assert_spin_locked(&o2hb_live_lock);
2239 2374
2240 reg = o2hb_find_region(region_uuid); 2375 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2241 if (!reg) 2376 uuid = config_item_name(&reg->hr_item);
2242 ret = -ENOENT; 2377 if (region_uuid) {
2243 spin_unlock(&o2hb_live_lock); 2378 if (strcmp(region_uuid, uuid))
2379 continue;
2380 found = 1;
2381 }
2244 2382
2245 if (ret) 2383 if (reg->hr_item_pinned) {
2246 goto out; 2384 mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
2385 o2nm_undepend_item(&reg->hr_item);
2386 reg->hr_item_pinned = 0;
2387 }
2388 if (found)
2389 break;
2390 }
2391}
2247 2392
2248 ret = o2nm_depend_this_node(); 2393static int o2hb_region_inc_user(const char *region_uuid)
2249 if (ret) 2394{
2250 goto out; 2395 int ret = 0;
2251 2396
2252 ret = o2nm_depend_item(&reg->hr_item); 2397 spin_lock(&o2hb_live_lock);
2253 if (ret)
2254 o2nm_undepend_this_node();
2255 2398
2256out: 2399 /* local heartbeat */
2400 if (!o2hb_global_heartbeat_active()) {
2401 ret = o2hb_region_pin(region_uuid);
2402 goto unlock;
2403 }
2404
2405 /*
2406 * if global heartbeat active and this is the first dependent user,
2407 * pin all regions if quorum region count <= CUT_OFF
2408 */
2409 o2hb_dependent_users++;
2410 if (o2hb_dependent_users > 1)
2411 goto unlock;
2412
2413 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2414 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2415 ret = o2hb_region_pin(NULL);
2416
2417unlock:
2418 spin_unlock(&o2hb_live_lock);
2257 return ret; 2419 return ret;
2258} 2420}
2259 2421
2260static void o2hb_region_put(const char *region_uuid) 2422void o2hb_region_dec_user(const char *region_uuid)
2261{ 2423{
2262 struct o2hb_region *reg;
2263
2264 spin_lock(&o2hb_live_lock); 2424 spin_lock(&o2hb_live_lock);
2265 2425
2266 reg = o2hb_find_region(region_uuid); 2426 /* local heartbeat */
2427 if (!o2hb_global_heartbeat_active()) {
2428 o2hb_region_unpin(region_uuid);
2429 goto unlock;
2430 }
2267 2431
2268 spin_unlock(&o2hb_live_lock); 2432 /*
2433 * if global heartbeat active and there are no dependent users,
2434 * unpin all quorum regions
2435 */
2436 o2hb_dependent_users--;
2437 if (!o2hb_dependent_users)
2438 o2hb_region_unpin(NULL);
2269 2439
2270 if (reg) { 2440unlock:
2271 o2nm_undepend_item(&reg->hr_item); 2441 spin_unlock(&o2hb_live_lock);
2272 o2nm_undepend_this_node();
2273 }
2274} 2442}
2275 2443
2276int o2hb_register_callback(const char *region_uuid, 2444int o2hb_register_callback(const char *region_uuid,
@@ -2291,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid,
2291 } 2459 }
2292 2460
2293 if (region_uuid) { 2461 if (region_uuid) {
2294 ret = o2hb_region_get(region_uuid); 2462 ret = o2hb_region_inc_user(region_uuid);
2295 if (ret) 2463 if (ret) {
2464 mlog_errno(ret);
2296 goto out; 2465 goto out;
2466 }
2297 } 2467 }
2298 2468
2299 down_write(&o2hb_callback_sem); 2469 down_write(&o2hb_callback_sem);
@@ -2311,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid,
2311 up_write(&o2hb_callback_sem); 2481 up_write(&o2hb_callback_sem);
2312 ret = 0; 2482 ret = 0;
2313out: 2483out:
2314 mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n", 2484 mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
2315 ret, __builtin_return_address(0), hc); 2485 ret, __builtin_return_address(0), hc);
2316 return ret; 2486 return ret;
2317} 2487}
@@ -2322,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2322{ 2492{
2323 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); 2493 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
2324 2494
2325 mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", 2495 mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
2326 __builtin_return_address(0), hc); 2496 __builtin_return_address(0), hc);
2327 2497
2328 /* XXX Can this happen _with_ a region reference? */ 2498 /* XXX Can this happen _with_ a region reference? */
@@ -2330,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2330 return; 2500 return;
2331 2501
2332 if (region_uuid) 2502 if (region_uuid)
2333 o2hb_region_put(region_uuid); 2503 o2hb_region_dec_user(region_uuid);
2334 2504
2335 down_write(&o2hb_callback_sem); 2505 down_write(&o2hb_callback_sem);
2336 2506
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index a3f150e52b02..3a5835904b3d 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -46,10 +46,15 @@
46#define O2NET_DEBUG_DIR "o2net" 46#define O2NET_DEBUG_DIR "o2net"
47#define SC_DEBUG_NAME "sock_containers" 47#define SC_DEBUG_NAME "sock_containers"
48#define NST_DEBUG_NAME "send_tracking" 48#define NST_DEBUG_NAME "send_tracking"
49#define STATS_DEBUG_NAME "stats"
50
51#define SHOW_SOCK_CONTAINERS 0
52#define SHOW_SOCK_STATS 1
49 53
50static struct dentry *o2net_dentry; 54static struct dentry *o2net_dentry;
51static struct dentry *sc_dentry; 55static struct dentry *sc_dentry;
52static struct dentry *nst_dentry; 56static struct dentry *nst_dentry;
57static struct dentry *stats_dentry;
53 58
54static DEFINE_SPINLOCK(o2net_debug_lock); 59static DEFINE_SPINLOCK(o2net_debug_lock);
55 60
@@ -123,37 +128,42 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
123static int nst_seq_show(struct seq_file *seq, void *v) 128static int nst_seq_show(struct seq_file *seq, void *v)
124{ 129{
125 struct o2net_send_tracking *nst, *dummy_nst = seq->private; 130 struct o2net_send_tracking *nst, *dummy_nst = seq->private;
131 ktime_t now;
132 s64 sock, send, status;
126 133
127 spin_lock(&o2net_debug_lock); 134 spin_lock(&o2net_debug_lock);
128 nst = next_nst(dummy_nst); 135 nst = next_nst(dummy_nst);
136 if (!nst)
137 goto out;
129 138
130 if (nst != NULL) { 139 now = ktime_get();
131 /* get_task_comm isn't exported. oh well. */ 140 sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
132 seq_printf(seq, "%p:\n" 141 send = ktime_to_us(ktime_sub(now, nst->st_send_time));
133 " pid: %lu\n" 142 status = ktime_to_us(ktime_sub(now, nst->st_status_time));
134 " tgid: %lu\n" 143
135 " process name: %s\n" 144 /* get_task_comm isn't exported. oh well. */
136 " node: %u\n" 145 seq_printf(seq, "%p:\n"
137 " sc: %p\n" 146 " pid: %lu\n"
138 " message id: %d\n" 147 " tgid: %lu\n"
139 " message type: %u\n" 148 " process name: %s\n"
140 " message key: 0x%08x\n" 149 " node: %u\n"
141 " sock acquiry: %lu.%ld\n" 150 " sc: %p\n"
142 " send start: %lu.%ld\n" 151 " message id: %d\n"
143 " wait start: %lu.%ld\n", 152 " message type: %u\n"
144 nst, (unsigned long)nst->st_task->pid, 153 " message key: 0x%08x\n"
145 (unsigned long)nst->st_task->tgid, 154 " sock acquiry: %lld usecs ago\n"
146 nst->st_task->comm, nst->st_node, 155 " send start: %lld usecs ago\n"
147 nst->st_sc, nst->st_id, nst->st_msg_type, 156 " wait start: %lld usecs ago\n",
148 nst->st_msg_key, 157 nst, (unsigned long)task_pid_nr(nst->st_task),
149 nst->st_sock_time.tv_sec, 158 (unsigned long)nst->st_task->tgid,
150 (long)nst->st_sock_time.tv_usec, 159 nst->st_task->comm, nst->st_node,
151 nst->st_send_time.tv_sec, 160 nst->st_sc, nst->st_id, nst->st_msg_type,
152 (long)nst->st_send_time.tv_usec, 161 nst->st_msg_key,
153 nst->st_status_time.tv_sec, 162 (long long)sock,
154 (long)nst->st_status_time.tv_usec); 163 (long long)send,
155 } 164 (long long)status);
156 165
166out:
157 spin_unlock(&o2net_debug_lock); 167 spin_unlock(&o2net_debug_lock);
158 168
159 return 0; 169 return 0;
@@ -228,6 +238,11 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc)
228 spin_unlock(&o2net_debug_lock); 238 spin_unlock(&o2net_debug_lock);
229} 239}
230 240
241struct o2net_sock_debug {
242 int dbg_ctxt;
243 struct o2net_sock_container *dbg_sock;
244};
245
231static struct o2net_sock_container 246static struct o2net_sock_container
232 *next_sc(struct o2net_sock_container *sc_start) 247 *next_sc(struct o2net_sock_container *sc_start)
233{ 248{
@@ -253,7 +268,8 @@ static struct o2net_sock_container
253 268
254static void *sc_seq_start(struct seq_file *seq, loff_t *pos) 269static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
255{ 270{
256 struct o2net_sock_container *sc, *dummy_sc = seq->private; 271 struct o2net_sock_debug *sd = seq->private;
272 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
257 273
258 spin_lock(&o2net_debug_lock); 274 spin_lock(&o2net_debug_lock);
259 sc = next_sc(dummy_sc); 275 sc = next_sc(dummy_sc);
@@ -264,7 +280,8 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
264 280
265static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 281static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
266{ 282{
267 struct o2net_sock_container *sc, *dummy_sc = seq->private; 283 struct o2net_sock_debug *sd = seq->private;
284 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
268 285
269 spin_lock(&o2net_debug_lock); 286 spin_lock(&o2net_debug_lock);
270 sc = next_sc(dummy_sc); 287 sc = next_sc(dummy_sc);
@@ -276,65 +293,107 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
276 return sc; /* unused, just needs to be null when done */ 293 return sc; /* unused, just needs to be null when done */
277} 294}
278 295
279#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec 296#ifdef CONFIG_OCFS2_FS_STATS
297# define sc_send_count(_s) ((_s)->sc_send_count)
298# define sc_recv_count(_s) ((_s)->sc_recv_count)
299# define sc_tv_acquiry_total_ns(_s) (ktime_to_ns((_s)->sc_tv_acquiry_total))
300# define sc_tv_send_total_ns(_s) (ktime_to_ns((_s)->sc_tv_send_total))
301# define sc_tv_status_total_ns(_s) (ktime_to_ns((_s)->sc_tv_status_total))
302# define sc_tv_process_total_ns(_s) (ktime_to_ns((_s)->sc_tv_process_total))
303#else
304# define sc_send_count(_s) (0U)
305# define sc_recv_count(_s) (0U)
306# define sc_tv_acquiry_total_ns(_s) (0LL)
307# define sc_tv_send_total_ns(_s) (0LL)
308# define sc_tv_status_total_ns(_s) (0LL)
309# define sc_tv_process_total_ns(_s) (0LL)
310#endif
311
312/* So that debugfs.ocfs2 can determine which format is being used */
313#define O2NET_STATS_STR_VERSION 1
314static void sc_show_sock_stats(struct seq_file *seq,
315 struct o2net_sock_container *sc)
316{
317 if (!sc)
318 return;
319
320 seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION,
321 sc->sc_node->nd_num, (unsigned long)sc_send_count(sc),
322 (long long)sc_tv_acquiry_total_ns(sc),
323 (long long)sc_tv_send_total_ns(sc),
324 (long long)sc_tv_status_total_ns(sc),
325 (unsigned long)sc_recv_count(sc),
326 (long long)sc_tv_process_total_ns(sc));
327}
328
329static void sc_show_sock_container(struct seq_file *seq,
330 struct o2net_sock_container *sc)
331{
332 struct inet_sock *inet = NULL;
333 __be32 saddr = 0, daddr = 0;
334 __be16 sport = 0, dport = 0;
335
336 if (!sc)
337 return;
338
339 if (sc->sc_sock) {
340 inet = inet_sk(sc->sc_sock->sk);
341 /* the stack's structs aren't sparse endian clean */
342 saddr = (__force __be32)inet->inet_saddr;
343 daddr = (__force __be32)inet->inet_daddr;
344 sport = (__force __be16)inet->inet_sport;
345 dport = (__force __be16)inet->inet_dport;
346 }
347
348 /* XXX sigh, inet-> doesn't have sparse annotation so any
349 * use of it here generates a warning with -Wbitwise */
350 seq_printf(seq, "%p:\n"
351 " krefs: %d\n"
352 " sock: %pI4:%u -> "
353 "%pI4:%u\n"
354 " remote node: %s\n"
355 " page off: %zu\n"
356 " handshake ok: %u\n"
357 " timer: %lld usecs\n"
358 " data ready: %lld usecs\n"
359 " advance start: %lld usecs\n"
360 " advance stop: %lld usecs\n"
361 " func start: %lld usecs\n"
362 " func stop: %lld usecs\n"
363 " func key: 0x%08x\n"
364 " func type: %u\n",
365 sc,
366 atomic_read(&sc->sc_kref.refcount),
367 &saddr, inet ? ntohs(sport) : 0,
368 &daddr, inet ? ntohs(dport) : 0,
369 sc->sc_node->nd_name,
370 sc->sc_page_off,
371 sc->sc_handshake_ok,
372 (long long)ktime_to_us(sc->sc_tv_timer),
373 (long long)ktime_to_us(sc->sc_tv_data_ready),
374 (long long)ktime_to_us(sc->sc_tv_advance_start),
375 (long long)ktime_to_us(sc->sc_tv_advance_stop),
376 (long long)ktime_to_us(sc->sc_tv_func_start),
377 (long long)ktime_to_us(sc->sc_tv_func_stop),
378 sc->sc_msg_key,
379 sc->sc_msg_type);
380}
280 381
281static int sc_seq_show(struct seq_file *seq, void *v) 382static int sc_seq_show(struct seq_file *seq, void *v)
282{ 383{
283 struct o2net_sock_container *sc, *dummy_sc = seq->private; 384 struct o2net_sock_debug *sd = seq->private;
385 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
284 386
285 spin_lock(&o2net_debug_lock); 387 spin_lock(&o2net_debug_lock);
286 sc = next_sc(dummy_sc); 388 sc = next_sc(dummy_sc);
287 389
288 if (sc != NULL) { 390 if (sc) {
289 struct inet_sock *inet = NULL; 391 if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS)
290 392 sc_show_sock_container(seq, sc);
291 __be32 saddr = 0, daddr = 0; 393 else
292 __be16 sport = 0, dport = 0; 394 sc_show_sock_stats(seq, sc);
293
294 if (sc->sc_sock) {
295 inet = inet_sk(sc->sc_sock->sk);
296 /* the stack's structs aren't sparse endian clean */
297 saddr = (__force __be32)inet->inet_saddr;
298 daddr = (__force __be32)inet->inet_daddr;
299 sport = (__force __be16)inet->inet_sport;
300 dport = (__force __be16)inet->inet_dport;
301 }
302
303 /* XXX sigh, inet-> doesn't have sparse annotation so any
304 * use of it here generates a warning with -Wbitwise */
305 seq_printf(seq, "%p:\n"
306 " krefs: %d\n"
307 " sock: %pI4:%u -> "
308 "%pI4:%u\n"
309 " remote node: %s\n"
310 " page off: %zu\n"
311 " handshake ok: %u\n"
312 " timer: %lu.%ld\n"
313 " data ready: %lu.%ld\n"
314 " advance start: %lu.%ld\n"
315 " advance stop: %lu.%ld\n"
316 " func start: %lu.%ld\n"
317 " func stop: %lu.%ld\n"
318 " func key: %u\n"
319 " func type: %u\n",
320 sc,
321 atomic_read(&sc->sc_kref.refcount),
322 &saddr, inet ? ntohs(sport) : 0,
323 &daddr, inet ? ntohs(dport) : 0,
324 sc->sc_node->nd_name,
325 sc->sc_page_off,
326 sc->sc_handshake_ok,
327 TV_SEC_USEC(sc->sc_tv_timer),
328 TV_SEC_USEC(sc->sc_tv_data_ready),
329 TV_SEC_USEC(sc->sc_tv_advance_start),
330 TV_SEC_USEC(sc->sc_tv_advance_stop),
331 TV_SEC_USEC(sc->sc_tv_func_start),
332 TV_SEC_USEC(sc->sc_tv_func_stop),
333 sc->sc_msg_key,
334 sc->sc_msg_type);
335 } 395 }
336 396
337
338 spin_unlock(&o2net_debug_lock); 397 spin_unlock(&o2net_debug_lock);
339 398
340 return 0; 399 return 0;
@@ -351,7 +410,7 @@ static const struct seq_operations sc_seq_ops = {
351 .show = sc_seq_show, 410 .show = sc_seq_show,
352}; 411};
353 412
354static int sc_fop_open(struct inode *inode, struct file *file) 413static int sc_common_open(struct file *file, struct o2net_sock_debug *sd)
355{ 414{
356 struct o2net_sock_container *dummy_sc; 415 struct o2net_sock_container *dummy_sc;
357 struct seq_file *seq; 416 struct seq_file *seq;
@@ -369,7 +428,8 @@ static int sc_fop_open(struct inode *inode, struct file *file)
369 goto out; 428 goto out;
370 429
371 seq = file->private_data; 430 seq = file->private_data;
372 seq->private = dummy_sc; 431 seq->private = sd;
432 sd->dbg_sock = dummy_sc;
373 o2net_debug_add_sc(dummy_sc); 433 o2net_debug_add_sc(dummy_sc);
374 434
375 dummy_sc = NULL; 435 dummy_sc = NULL;
@@ -382,12 +442,48 @@ out:
382static int sc_fop_release(struct inode *inode, struct file *file) 442static int sc_fop_release(struct inode *inode, struct file *file)
383{ 443{
384 struct seq_file *seq = file->private_data; 444 struct seq_file *seq = file->private_data;
385 struct o2net_sock_container *dummy_sc = seq->private; 445 struct o2net_sock_debug *sd = seq->private;
446 struct o2net_sock_container *dummy_sc = sd->dbg_sock;
386 447
387 o2net_debug_del_sc(dummy_sc); 448 o2net_debug_del_sc(dummy_sc);
388 return seq_release_private(inode, file); 449 return seq_release_private(inode, file);
389} 450}
390 451
452static int stats_fop_open(struct inode *inode, struct file *file)
453{
454 struct o2net_sock_debug *sd;
455
456 sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
457 if (sd == NULL)
458 return -ENOMEM;
459
460 sd->dbg_ctxt = SHOW_SOCK_STATS;
461 sd->dbg_sock = NULL;
462
463 return sc_common_open(file, sd);
464}
465
466static const struct file_operations stats_seq_fops = {
467 .open = stats_fop_open,
468 .read = seq_read,
469 .llseek = seq_lseek,
470 .release = sc_fop_release,
471};
472
473static int sc_fop_open(struct inode *inode, struct file *file)
474{
475 struct o2net_sock_debug *sd;
476
477 sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
478 if (sd == NULL)
479 return -ENOMEM;
480
481 sd->dbg_ctxt = SHOW_SOCK_CONTAINERS;
482 sd->dbg_sock = NULL;
483
484 return sc_common_open(file, sd);
485}
486
391static const struct file_operations sc_seq_fops = { 487static const struct file_operations sc_seq_fops = {
392 .open = sc_fop_open, 488 .open = sc_fop_open,
393 .read = seq_read, 489 .read = seq_read,
@@ -419,25 +515,29 @@ int o2net_debugfs_init(void)
419 goto bail; 515 goto bail;
420 } 516 }
421 517
518 stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
519 o2net_dentry, NULL,
520 &stats_seq_fops);
521 if (!stats_dentry) {
522 mlog_errno(-ENOMEM);
523 goto bail;
524 }
525
422 return 0; 526 return 0;
423bail: 527bail:
424 if (sc_dentry) 528 debugfs_remove(stats_dentry);
425 debugfs_remove(sc_dentry); 529 debugfs_remove(sc_dentry);
426 if (nst_dentry) 530 debugfs_remove(nst_dentry);
427 debugfs_remove(nst_dentry); 531 debugfs_remove(o2net_dentry);
428 if (o2net_dentry)
429 debugfs_remove(o2net_dentry);
430 return -ENOMEM; 532 return -ENOMEM;
431} 533}
432 534
433void o2net_debugfs_exit(void) 535void o2net_debugfs_exit(void)
434{ 536{
435 if (sc_dentry) 537 debugfs_remove(stats_dentry);
436 debugfs_remove(sc_dentry); 538 debugfs_remove(sc_dentry);
437 if (nst_dentry) 539 debugfs_remove(nst_dentry);
438 debugfs_remove(nst_dentry); 540 debugfs_remove(o2net_dentry);
439 if (o2net_dentry)
440 debugfs_remove(o2net_dentry);
441} 541}
442 542
443#endif /* CONFIG_DEBUG_FS */ 543#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9aa426e42123..3b11cb1e38fc 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -153,63 +153,114 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
153 nst->st_node = node; 153 nst->st_node = node;
154} 154}
155 155
156static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 156static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
157{ 157{
158 do_gettimeofday(&nst->st_sock_time); 158 nst->st_sock_time = ktime_get();
159} 159}
160 160
161static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 161static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
162{ 162{
163 do_gettimeofday(&nst->st_send_time); 163 nst->st_send_time = ktime_get();
164} 164}
165 165
166static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 166static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
167{ 167{
168 do_gettimeofday(&nst->st_status_time); 168 nst->st_status_time = ktime_get();
169} 169}
170 170
171static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 171static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
172 struct o2net_sock_container *sc) 172 struct o2net_sock_container *sc)
173{ 173{
174 nst->st_sc = sc; 174 nst->st_sc = sc;
175} 175}
176 176
177static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) 177static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
178 u32 msg_id)
178{ 179{
179 nst->st_id = msg_id; 180 nst->st_id = msg_id;
180} 181}
181 182
182#else /* CONFIG_DEBUG_FS */ 183static inline void o2net_set_sock_timer(struct o2net_sock_container *sc)
183
184static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
185 u32 msgkey, struct task_struct *task, u8 node)
186{ 184{
185 sc->sc_tv_timer = ktime_get();
187} 186}
188 187
189static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 188static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc)
190{ 189{
190 sc->sc_tv_data_ready = ktime_get();
191} 191}
192 192
193static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 193static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc)
194{ 194{
195 sc->sc_tv_advance_start = ktime_get();
195} 196}
196 197
197static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 198static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc)
198{ 199{
200 sc->sc_tv_advance_stop = ktime_get();
199} 201}
200 202
201static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 203static inline void o2net_set_func_start_time(struct o2net_sock_container *sc)
202 struct o2net_sock_container *sc)
203{ 204{
205 sc->sc_tv_func_start = ktime_get();
204} 206}
205 207
206static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, 208static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
207 u32 msg_id)
208{ 209{
210 sc->sc_tv_func_stop = ktime_get();
209} 211}
210 212
213static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
214{
215 return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
216}
217#else /* CONFIG_DEBUG_FS */
218# define o2net_init_nst(a, b, c, d, e)
219# define o2net_set_nst_sock_time(a)
220# define o2net_set_nst_send_time(a)
221# define o2net_set_nst_status_time(a)
222# define o2net_set_nst_sock_container(a, b)
223# define o2net_set_nst_msg_id(a, b)
224# define o2net_set_sock_timer(a)
225# define o2net_set_data_ready_time(a)
226# define o2net_set_advance_start_time(a)
227# define o2net_set_advance_stop_time(a)
228# define o2net_set_func_start_time(a)
229# define o2net_set_func_stop_time(a)
230# define o2net_get_func_run_time(a) (ktime_t)0
211#endif /* CONFIG_DEBUG_FS */ 231#endif /* CONFIG_DEBUG_FS */
212 232
233#ifdef CONFIG_OCFS2_FS_STATS
234static void o2net_update_send_stats(struct o2net_send_tracking *nst,
235 struct o2net_sock_container *sc)
236{
237 sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total,
238 ktime_sub(ktime_get(),
239 nst->st_status_time));
240 sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total,
241 ktime_sub(nst->st_status_time,
242 nst->st_send_time));
243 sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total,
244 ktime_sub(nst->st_send_time,
245 nst->st_sock_time));
246 sc->sc_send_count++;
247}
248
249static void o2net_update_recv_stats(struct o2net_sock_container *sc)
250{
251 sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total,
252 o2net_get_func_run_time(sc));
253 sc->sc_recv_count++;
254}
255
256#else
257
258# define o2net_update_send_stats(a, b)
259
260# define o2net_update_recv_stats(sc)
261
262#endif /* CONFIG_OCFS2_FS_STATS */
263
213static inline int o2net_reconnect_delay(void) 264static inline int o2net_reconnect_delay(void)
214{ 265{
215 return o2nm_single_cluster->cl_reconnect_delay_ms; 266 return o2nm_single_cluster->cl_reconnect_delay_ms;
@@ -355,6 +406,7 @@ static void sc_kref_release(struct kref *kref)
355 sc->sc_sock = NULL; 406 sc->sc_sock = NULL;
356 } 407 }
357 408
409 o2nm_undepend_item(&sc->sc_node->nd_item);
358 o2nm_node_put(sc->sc_node); 410 o2nm_node_put(sc->sc_node);
359 sc->sc_node = NULL; 411 sc->sc_node = NULL;
360 412
@@ -376,6 +428,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
376{ 428{
377 struct o2net_sock_container *sc, *ret = NULL; 429 struct o2net_sock_container *sc, *ret = NULL;
378 struct page *page = NULL; 430 struct page *page = NULL;
431 int status = 0;
379 432
380 page = alloc_page(GFP_NOFS); 433 page = alloc_page(GFP_NOFS);
381 sc = kzalloc(sizeof(*sc), GFP_NOFS); 434 sc = kzalloc(sizeof(*sc), GFP_NOFS);
@@ -386,6 +439,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
386 o2nm_node_get(node); 439 o2nm_node_get(node);
387 sc->sc_node = node; 440 sc->sc_node = node;
388 441
442 /* pin the node item of the remote node */
443 status = o2nm_depend_item(&node->nd_item);
444 if (status) {
445 mlog_errno(status);
446 o2nm_node_put(node);
447 goto out;
448 }
389 INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed); 449 INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
390 INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty); 450 INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
391 INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc); 451 INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
@@ -546,7 +606,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
546 if (sk->sk_user_data) { 606 if (sk->sk_user_data) {
547 struct o2net_sock_container *sc = sk->sk_user_data; 607 struct o2net_sock_container *sc = sk->sk_user_data;
548 sclog(sc, "data_ready hit\n"); 608 sclog(sc, "data_ready hit\n");
549 do_gettimeofday(&sc->sc_tv_data_ready); 609 o2net_set_data_ready_time(sc);
550 o2net_sc_queue_work(sc, &sc->sc_rx_work); 610 o2net_sc_queue_work(sc, &sc->sc_rx_work);
551 ready = sc->sc_data_ready; 611 ready = sc->sc_data_ready;
552 } else { 612 } else {
@@ -1070,6 +1130,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
1070 o2net_set_nst_status_time(&nst); 1130 o2net_set_nst_status_time(&nst);
1071 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); 1131 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
1072 1132
1133 o2net_update_send_stats(&nst, sc);
1134
1073 /* Note that we avoid overwriting the callers status return 1135 /* Note that we avoid overwriting the callers status return
1074 * variable if a system error was reported on the other 1136 * variable if a system error was reported on the other
1075 * side. Callers beware. */ 1137 * side. Callers beware. */
@@ -1183,13 +1245,15 @@ static int o2net_process_message(struct o2net_sock_container *sc,
1183 if (syserr != O2NET_ERR_NONE) 1245 if (syserr != O2NET_ERR_NONE)
1184 goto out_respond; 1246 goto out_respond;
1185 1247
1186 do_gettimeofday(&sc->sc_tv_func_start); 1248 o2net_set_func_start_time(sc);
1187 sc->sc_msg_key = be32_to_cpu(hdr->key); 1249 sc->sc_msg_key = be32_to_cpu(hdr->key);
1188 sc->sc_msg_type = be16_to_cpu(hdr->msg_type); 1250 sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
1189 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + 1251 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
1190 be16_to_cpu(hdr->data_len), 1252 be16_to_cpu(hdr->data_len),
1191 nmh->nh_func_data, &ret_data); 1253 nmh->nh_func_data, &ret_data);
1192 do_gettimeofday(&sc->sc_tv_func_stop); 1254 o2net_set_func_stop_time(sc);
1255
1256 o2net_update_recv_stats(sc);
1193 1257
1194out_respond: 1258out_respond:
1195 /* this destroys the hdr, so don't use it after this */ 1259 /* this destroys the hdr, so don't use it after this */
@@ -1300,7 +1364,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1300 size_t datalen; 1364 size_t datalen;
1301 1365
1302 sclog(sc, "receiving\n"); 1366 sclog(sc, "receiving\n");
1303 do_gettimeofday(&sc->sc_tv_advance_start); 1367 o2net_set_advance_start_time(sc);
1304 1368
1305 if (unlikely(sc->sc_handshake_ok == 0)) { 1369 if (unlikely(sc->sc_handshake_ok == 0)) {
1306 if(sc->sc_page_off < sizeof(struct o2net_handshake)) { 1370 if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
@@ -1375,7 +1439,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1375 1439
1376out: 1440out:
1377 sclog(sc, "ret = %d\n", ret); 1441 sclog(sc, "ret = %d\n", ret);
1378 do_gettimeofday(&sc->sc_tv_advance_stop); 1442 o2net_set_advance_stop_time(sc);
1379 return ret; 1443 return ret;
1380} 1444}
1381 1445
@@ -1475,27 +1539,28 @@ static void o2net_idle_timer(unsigned long data)
1475{ 1539{
1476 struct o2net_sock_container *sc = (struct o2net_sock_container *)data; 1540 struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
1477 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 1541 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1478 struct timeval now;
1479 1542
1480 do_gettimeofday(&now); 1543#ifdef CONFIG_DEBUG_FS
1544 ktime_t now = ktime_get();
1545#endif
1481 1546
1482 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1547 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1483 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1548 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1484 o2net_idle_timeout() / 1000, 1549 o2net_idle_timeout() / 1000,
1485 o2net_idle_timeout() % 1000); 1550 o2net_idle_timeout() % 1000);
1486 mlog(ML_NOTICE, "here are some times that might help debug the " 1551
1487 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1552#ifdef CONFIG_DEBUG_FS
1488 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1553 mlog(ML_NOTICE, "Here are some times that might help debug the "
1489 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, 1554 "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
1490 now.tv_sec, (long) now.tv_usec, 1555 "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
1491 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, 1556 (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
1492 sc->sc_tv_advance_start.tv_sec, 1557 (long long)ktime_to_us(sc->sc_tv_data_ready),
1493 (long) sc->sc_tv_advance_start.tv_usec, 1558 (long long)ktime_to_us(sc->sc_tv_advance_start),
1494 sc->sc_tv_advance_stop.tv_sec, 1559 (long long)ktime_to_us(sc->sc_tv_advance_stop),
1495 (long) sc->sc_tv_advance_stop.tv_usec,
1496 sc->sc_msg_key, sc->sc_msg_type, 1560 sc->sc_msg_key, sc->sc_msg_type,
1497 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, 1561 (long long)ktime_to_us(sc->sc_tv_func_start),
1498 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); 1562 (long long)ktime_to_us(sc->sc_tv_func_stop));
1563#endif
1499 1564
1500 /* 1565 /*
1501 * Initialize the nn_timeout so that the next connection attempt 1566 * Initialize the nn_timeout so that the next connection attempt
@@ -1511,7 +1576,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1511 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1576 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1512 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1577 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1513 msecs_to_jiffies(o2net_keepalive_delay())); 1578 msecs_to_jiffies(o2net_keepalive_delay()));
1514 do_gettimeofday(&sc->sc_tv_timer); 1579 o2net_set_sock_timer(sc);
1515 mod_timer(&sc->sc_idle_timeout, 1580 mod_timer(&sc->sc_idle_timeout,
1516 jiffies + msecs_to_jiffies(o2net_idle_timeout())); 1581 jiffies + msecs_to_jiffies(o2net_idle_timeout()));
1517} 1582}
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 15fdbdf9eb4b..4cbcb65784a3 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -166,18 +166,27 @@ struct o2net_sock_container {
166 /* original handlers for the sockets */ 166 /* original handlers for the sockets */
167 void (*sc_state_change)(struct sock *sk); 167 void (*sc_state_change)(struct sock *sk);
168 void (*sc_data_ready)(struct sock *sk, int bytes); 168 void (*sc_data_ready)(struct sock *sk, int bytes);
169#ifdef CONFIG_DEBUG_FS 169
170 struct list_head sc_net_debug_item;
171#endif
172 struct timeval sc_tv_timer;
173 struct timeval sc_tv_data_ready;
174 struct timeval sc_tv_advance_start;
175 struct timeval sc_tv_advance_stop;
176 struct timeval sc_tv_func_start;
177 struct timeval sc_tv_func_stop;
178 u32 sc_msg_key; 170 u32 sc_msg_key;
179 u16 sc_msg_type; 171 u16 sc_msg_type;
180 172
173#ifdef CONFIG_DEBUG_FS
174 struct list_head sc_net_debug_item;
175 ktime_t sc_tv_timer;
176 ktime_t sc_tv_data_ready;
177 ktime_t sc_tv_advance_start;
178 ktime_t sc_tv_advance_stop;
179 ktime_t sc_tv_func_start;
180 ktime_t sc_tv_func_stop;
181#endif
182#ifdef CONFIG_OCFS2_FS_STATS
183 ktime_t sc_tv_acquiry_total;
184 ktime_t sc_tv_send_total;
185 ktime_t sc_tv_status_total;
186 u32 sc_send_count;
187 u32 sc_recv_count;
188 ktime_t sc_tv_process_total;
189#endif
181 struct mutex sc_send_lock; 190 struct mutex sc_send_lock;
182}; 191};
183 192
@@ -220,9 +229,9 @@ struct o2net_send_tracking {
220 u32 st_msg_type; 229 u32 st_msg_type;
221 u32 st_msg_key; 230 u32 st_msg_key;
222 u8 st_node; 231 u8 st_node;
223 struct timeval st_sock_time; 232 ktime_t st_sock_time;
224 struct timeval st_send_time; 233 ktime_t st_send_time;
225 struct timeval st_status_time; 234 ktime_t st_status_time;
226}; 235};
227#else 236#else
228struct o2net_send_tracking { 237struct o2net_send_tracking {