aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/cluster/heartbeat.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/cluster/heartbeat.c')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c248
1 files changed, 209 insertions, 39 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9e3d45bcb5fd..b108e863d8f6 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
82#define O2HB_DB_TYPE_REGION_LIVENODES 4 82#define O2HB_DB_TYPE_REGION_LIVENODES 4
83#define O2HB_DB_TYPE_REGION_NUMBER 5 83#define O2HB_DB_TYPE_REGION_NUMBER 5
84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
85#define O2HB_DB_TYPE_REGION_PINNED 7
85struct o2hb_debug_buf { 86struct o2hb_debug_buf {
86 int db_type; 87 int db_type;
87 int db_size; 88 int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
101#define O2HB_DEBUG_FAILEDREGIONS "failed_regions" 102#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
102#define O2HB_DEBUG_REGION_NUMBER "num" 103#define O2HB_DEBUG_REGION_NUMBER "num"
103#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" 104#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
105#define O2HB_DEBUG_REGION_PINNED "pinned"
104 106
105static struct dentry *o2hb_debug_dir; 107static struct dentry *o2hb_debug_dir;
106static struct dentry *o2hb_debug_livenodes; 108static struct dentry *o2hb_debug_livenodes;
@@ -132,6 +134,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
132unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; 134unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
133unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; 135unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
134 136
137/*
138 * o2hb_dependent_users tracks the number of registered callbacks that depend
139 * on heartbeat. o2net and o2dlm are two entities that register this callback.
140 * However only o2dlm depends on the heartbeat. It does not want the heartbeat
141 * to stop while a dlm domain is still active.
142 */
143unsigned int o2hb_dependent_users;
144
145/*
146 * In global heartbeat mode, all regions are pinned if there are one or more
147 * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
148 * regions are unpinned if the region count exceeds the cut off or the number
149 * of dependent users falls to zero.
150 */
151#define O2HB_PIN_CUT_OFF 3
152
153/*
154 * In local heartbeat mode, we assume the dlm domain name to be the same as
155 * region uuid. This is true for domains created for the file system but not
156 * necessarily true for userdlm domains. This is a known limitation.
157 *
158 * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
159 * works for both file system and userdlm domains.
160 */
161static int o2hb_region_pin(const char *region_uuid);
162static void o2hb_region_unpin(const char *region_uuid);
163
135/* Only sets a new threshold if there are no active regions. 164/* Only sets a new threshold if there are no active regions.
136 * 165 *
137 * No locking or otherwise interesting code is required for reading 166 * No locking or otherwise interesting code is required for reading
@@ -186,7 +215,9 @@ struct o2hb_region {
186 struct config_item hr_item; 215 struct config_item hr_item;
187 216
188 struct list_head hr_all_item; 217 struct list_head hr_all_item;
189 unsigned hr_unclean_stop:1; 218 unsigned hr_unclean_stop:1,
219 hr_item_pinned:1,
220 hr_item_dropped:1;
190 221
191 /* protected by the hr_callback_sem */ 222 /* protected by the hr_callback_sem */
192 struct task_struct *hr_task; 223 struct task_struct *hr_task;
@@ -212,9 +243,11 @@ struct o2hb_region {
212 struct dentry *hr_debug_livenodes; 243 struct dentry *hr_debug_livenodes;
213 struct dentry *hr_debug_regnum; 244 struct dentry *hr_debug_regnum;
214 struct dentry *hr_debug_elapsed_time; 245 struct dentry *hr_debug_elapsed_time;
246 struct dentry *hr_debug_pinned;
215 struct o2hb_debug_buf *hr_db_livenodes; 247 struct o2hb_debug_buf *hr_db_livenodes;
216 struct o2hb_debug_buf *hr_db_regnum; 248 struct o2hb_debug_buf *hr_db_regnum;
217 struct o2hb_debug_buf *hr_db_elapsed_time; 249 struct o2hb_debug_buf *hr_db_elapsed_time;
250 struct o2hb_debug_buf *hr_db_pinned;
218 251
219 /* let the person setting up hb wait for it to return until it 252 /* let the person setting up hb wait for it to return until it
220 * has reached a 'steady' state. This will be fixed when we have 253 * has reached a 'steady' state. This will be fixed when we have
@@ -701,6 +734,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
701 config_item_name(&reg->hr_item)); 734 config_item_name(&reg->hr_item));
702 735
703 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); 736 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
737
738 /*
739 * If global heartbeat active, unpin all regions if the
740 * region count > CUT_OFF
741 */
742 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
743 O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
744 o2hb_region_unpin(NULL);
704} 745}
705 746
706static int o2hb_check_slot(struct o2hb_region *reg, 747static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1041,6 +1082,9 @@ static int o2hb_thread(void *data)
1041 1082
1042 set_user_nice(current, -20); 1083 set_user_nice(current, -20);
1043 1084
1085 /* Pin node */
1086 o2nm_depend_this_node();
1087
1044 while (!kthread_should_stop() && !reg->hr_unclean_stop) { 1088 while (!kthread_should_stop() && !reg->hr_unclean_stop) {
1045 /* We track the time spent inside 1089 /* We track the time spent inside
1046 * o2hb_do_disk_heartbeat so that we avoid more than 1090 * o2hb_do_disk_heartbeat so that we avoid more than
@@ -1090,6 +1134,9 @@ static int o2hb_thread(void *data)
1090 mlog_errno(ret); 1134 mlog_errno(ret);
1091 } 1135 }
1092 1136
1137 /* Unpin node */
1138 o2nm_undepend_this_node();
1139
1093 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); 1140 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
1094 1141
1095 return 0; 1142 return 0;
@@ -1142,6 +1189,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
1142 reg->hr_last_timeout_start)); 1189 reg->hr_last_timeout_start));
1143 goto done; 1190 goto done;
1144 1191
1192 case O2HB_DB_TYPE_REGION_PINNED:
1193 reg = (struct o2hb_region *)db->db_data;
1194 out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
1195 !!reg->hr_item_pinned);
1196 goto done;
1197
1145 default: 1198 default:
1146 goto done; 1199 goto done;
1147 } 1200 }
@@ -1315,6 +1368,8 @@ int o2hb_init(void)
1315 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); 1368 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
1316 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); 1369 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
1317 1370
1371 o2hb_dependent_users = 0;
1372
1318 return o2hb_debug_init(); 1373 return o2hb_debug_init();
1319} 1374}
1320 1375
@@ -1384,6 +1439,7 @@ static void o2hb_region_release(struct config_item *item)
1384 debugfs_remove(reg->hr_debug_livenodes); 1439 debugfs_remove(reg->hr_debug_livenodes);
1385 debugfs_remove(reg->hr_debug_regnum); 1440 debugfs_remove(reg->hr_debug_regnum);
1386 debugfs_remove(reg->hr_debug_elapsed_time); 1441 debugfs_remove(reg->hr_debug_elapsed_time);
1442 debugfs_remove(reg->hr_debug_pinned);
1387 debugfs_remove(reg->hr_debug_dir); 1443 debugfs_remove(reg->hr_debug_dir);
1388 1444
1389 spin_lock(&o2hb_live_lock); 1445 spin_lock(&o2hb_live_lock);
@@ -1673,7 +1729,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1673 goto out; 1729 goto out;
1674 1730
1675 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1731 reg->hr_bdev = I_BDEV(filp->f_mapping->host);
1676 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); 1732 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
1677 if (ret) { 1733 if (ret) {
1678 reg->hr_bdev = NULL; 1734 reg->hr_bdev = NULL;
1679 goto out; 1735 goto out;
@@ -1948,6 +2004,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
1948 goto bail; 2004 goto bail;
1949 } 2005 }
1950 2006
2007 reg->hr_debug_pinned =
2008 o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
2009 reg->hr_debug_dir,
2010 &(reg->hr_db_pinned),
2011 sizeof(*(reg->hr_db_pinned)),
2012 O2HB_DB_TYPE_REGION_PINNED,
2013 0, 0, reg);
2014 if (!reg->hr_debug_pinned) {
2015 mlog_errno(ret);
2016 goto bail;
2017 }
2018
1951 ret = 0; 2019 ret = 0;
1952bail: 2020bail:
1953 return ret; 2021 return ret;
@@ -2002,15 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2002{ 2070{
2003 struct task_struct *hb_task; 2071 struct task_struct *hb_task;
2004 struct o2hb_region *reg = to_o2hb_region(item); 2072 struct o2hb_region *reg = to_o2hb_region(item);
2073 int quorum_region = 0;
2005 2074
2006 /* stop the thread when the user removes the region dir */ 2075 /* stop the thread when the user removes the region dir */
2007 spin_lock(&o2hb_live_lock); 2076 spin_lock(&o2hb_live_lock);
2008 if (o2hb_global_heartbeat_active()) { 2077 if (o2hb_global_heartbeat_active()) {
2009 clear_bit(reg->hr_region_num, o2hb_region_bitmap); 2078 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
2010 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); 2079 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
2080 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
2081 quorum_region = 1;
2082 clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
2011 } 2083 }
2012 hb_task = reg->hr_task; 2084 hb_task = reg->hr_task;
2013 reg->hr_task = NULL; 2085 reg->hr_task = NULL;
2086 reg->hr_item_dropped = 1;
2014 spin_unlock(&o2hb_live_lock); 2087 spin_unlock(&o2hb_live_lock);
2015 2088
2016 if (hb_task) 2089 if (hb_task)
@@ -2028,7 +2101,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2028 if (o2hb_global_heartbeat_active()) 2101 if (o2hb_global_heartbeat_active())
2029 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", 2102 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
2030 config_item_name(&reg->hr_item)); 2103 config_item_name(&reg->hr_item));
2104
2031 config_item_put(item); 2105 config_item_put(item);
2106
2107 if (!o2hb_global_heartbeat_active() || !quorum_region)
2108 return;
2109
2110 /*
2111 * If global heartbeat active and there are dependent users,
2112 * pin all regions if quorum region count <= CUT_OFF
2113 */
2114 spin_lock(&o2hb_live_lock);
2115
2116 if (!o2hb_dependent_users)
2117 goto unlock;
2118
2119 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2120 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2121 o2hb_region_pin(NULL);
2122
2123unlock:
2124 spin_unlock(&o2hb_live_lock);
2032} 2125}
2033 2126
2034struct o2hb_heartbeat_group_attribute { 2127struct o2hb_heartbeat_group_attribute {
@@ -2214,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
2214} 2307}
2215EXPORT_SYMBOL_GPL(o2hb_setup_callback); 2308EXPORT_SYMBOL_GPL(o2hb_setup_callback);
2216 2309
2217static struct o2hb_region *o2hb_find_region(const char *region_uuid) 2310/*
2311 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2312 * In global heartbeat mode, region_uuid passed is NULL.
2313 *
2314 * In local, we only pin the matching region. In global we pin all the active
2315 * regions.
2316 */
2317static int o2hb_region_pin(const char *region_uuid)
2218{ 2318{
2219 struct o2hb_region *p, *reg = NULL; 2319 int ret = 0, found = 0;
2320 struct o2hb_region *reg;
2321 char *uuid;
2220 2322
2221 assert_spin_locked(&o2hb_live_lock); 2323 assert_spin_locked(&o2hb_live_lock);
2222 2324
2223 list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { 2325 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2224 if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { 2326 uuid = config_item_name(&reg->hr_item);
2225 reg = p; 2327
2226 break; 2328 /* local heartbeat */
2329 if (region_uuid) {
2330 if (strcmp(region_uuid, uuid))
2331 continue;
2332 found = 1;
2333 }
2334
2335 if (reg->hr_item_pinned || reg->hr_item_dropped)
2336 goto skip_pin;
2337
2338 /* Ignore ENOENT only for local hb (userdlm domain) */
2339 ret = o2nm_depend_item(&reg->hr_item);
2340 if (!ret) {
2341 mlog(ML_CLUSTER, "Pin region %s\n", uuid);
2342 reg->hr_item_pinned = 1;
2343 } else {
2344 if (ret == -ENOENT && found)
2345 ret = 0;
2346 else {
2347 mlog(ML_ERROR, "Pin region %s fails with %d\n",
2348 uuid, ret);
2349 break;
2350 }
2227 } 2351 }
2352skip_pin:
2353 if (found)
2354 break;
2228 } 2355 }
2229 2356
2230 return reg; 2357 return ret;
2231} 2358}
2232 2359
2233static int o2hb_region_get(const char *region_uuid) 2360/*
2361 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2362 * In global heartbeat mode, region_uuid passed is NULL.
2363 *
2364 * In local, we only unpin the matching region. In global we unpin all the
2365 * active regions.
2366 */
2367static void o2hb_region_unpin(const char *region_uuid)
2234{ 2368{
2235 int ret = 0;
2236 struct o2hb_region *reg; 2369 struct o2hb_region *reg;
2370 char *uuid;
2371 int found = 0;
2237 2372
2238 spin_lock(&o2hb_live_lock); 2373 assert_spin_locked(&o2hb_live_lock);
2239 2374
2240 reg = o2hb_find_region(region_uuid); 2375 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2241 if (!reg) 2376 uuid = config_item_name(&reg->hr_item);
2242 ret = -ENOENT; 2377 if (region_uuid) {
2243 spin_unlock(&o2hb_live_lock); 2378 if (strcmp(region_uuid, uuid))
2379 continue;
2380 found = 1;
2381 }
2244 2382
2245 if (ret) 2383 if (reg->hr_item_pinned) {
2246 goto out; 2384 mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
2385 o2nm_undepend_item(&reg->hr_item);
2386 reg->hr_item_pinned = 0;
2387 }
2388 if (found)
2389 break;
2390 }
2391}
2247 2392
2248 ret = o2nm_depend_this_node(); 2393static int o2hb_region_inc_user(const char *region_uuid)
2249 if (ret) 2394{
2250 goto out; 2395 int ret = 0;
2251 2396
2252 ret = o2nm_depend_item(&reg->hr_item); 2397 spin_lock(&o2hb_live_lock);
2253 if (ret)
2254 o2nm_undepend_this_node();
2255 2398
2256out: 2399 /* local heartbeat */
2400 if (!o2hb_global_heartbeat_active()) {
2401 ret = o2hb_region_pin(region_uuid);
2402 goto unlock;
2403 }
2404
2405 /*
2406 * if global heartbeat active and this is the first dependent user,
2407 * pin all regions if quorum region count <= CUT_OFF
2408 */
2409 o2hb_dependent_users++;
2410 if (o2hb_dependent_users > 1)
2411 goto unlock;
2412
2413 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2414 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2415 ret = o2hb_region_pin(NULL);
2416
2417unlock:
2418 spin_unlock(&o2hb_live_lock);
2257 return ret; 2419 return ret;
2258} 2420}
2259 2421
2260static void o2hb_region_put(const char *region_uuid) 2422void o2hb_region_dec_user(const char *region_uuid)
2261{ 2423{
2262 struct o2hb_region *reg;
2263
2264 spin_lock(&o2hb_live_lock); 2424 spin_lock(&o2hb_live_lock);
2265 2425
2266 reg = o2hb_find_region(region_uuid); 2426 /* local heartbeat */
2427 if (!o2hb_global_heartbeat_active()) {
2428 o2hb_region_unpin(region_uuid);
2429 goto unlock;
2430 }
2267 2431
2268 spin_unlock(&o2hb_live_lock); 2432 /*
2433 * if global heartbeat active and there are no dependent users,
2434 * unpin all quorum regions
2435 */
2436 o2hb_dependent_users--;
2437 if (!o2hb_dependent_users)
2438 o2hb_region_unpin(NULL);
2269 2439
2270 if (reg) { 2440unlock:
2271 o2nm_undepend_item(&reg->hr_item); 2441 spin_unlock(&o2hb_live_lock);
2272 o2nm_undepend_this_node();
2273 }
2274} 2442}
2275 2443
2276int o2hb_register_callback(const char *region_uuid, 2444int o2hb_register_callback(const char *region_uuid,
@@ -2291,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid,
2291 } 2459 }
2292 2460
2293 if (region_uuid) { 2461 if (region_uuid) {
2294 ret = o2hb_region_get(region_uuid); 2462 ret = o2hb_region_inc_user(region_uuid);
2295 if (ret) 2463 if (ret) {
2464 mlog_errno(ret);
2296 goto out; 2465 goto out;
2466 }
2297 } 2467 }
2298 2468
2299 down_write(&o2hb_callback_sem); 2469 down_write(&o2hb_callback_sem);
@@ -2311,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid,
2311 up_write(&o2hb_callback_sem); 2481 up_write(&o2hb_callback_sem);
2312 ret = 0; 2482 ret = 0;
2313out: 2483out:
2314 mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n", 2484 mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
2315 ret, __builtin_return_address(0), hc); 2485 ret, __builtin_return_address(0), hc);
2316 return ret; 2486 return ret;
2317} 2487}
@@ -2322,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2322{ 2492{
2323 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); 2493 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
2324 2494
2325 mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", 2495 mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
2326 __builtin_return_address(0), hc); 2496 __builtin_return_address(0), hc);
2327 2497
2328 /* XXX Can this happen _with_ a region reference? */ 2498 /* XXX Can this happen _with_ a region reference? */
@@ -2330,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2330 return; 2500 return;
2331 2501
2332 if (region_uuid) 2502 if (region_uuid)
2333 o2hb_region_put(region_uuid); 2503 o2hb_region_dec_user(region_uuid);
2334 2504
2335 down_write(&o2hb_callback_sem); 2505 down_write(&o2hb_callback_sem);
2336 2506