aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2014-03-11 08:47:55 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2014-07-10 12:34:59 -0400
commita5655dac75b6c572e1ef430b61ad55245fffd523 (patch)
tree50636b7490394d34029daed62e4d023eccadcbf5
parentcaa3db0e14cc301f07e758f4cadc36d4dead145a (diff)
drbd: fix bogus resync stats in /proc/drbd
We intentionally do not serialize /proc/drbd access with internal state changes or statistic updates. Because of that, cat /proc/drbd may race with resync just being finished, still see the sync state, and find information about number of blocks still to go, but then find the total number of blocks within this resync has just been reset to 0 when accessing it. This now produces bogus numbers in the resync speed estimates. Fix by accessing all relevant data only once, and fixing it up if "still to go" happens to be more than "total". Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_int.h48
-rw-r--r--drivers/block/drbd/drbd_proc.c103
2 files changed, 75 insertions, 76 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e306a22a60f1..abf5aefd9790 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1982,54 +1982,6 @@ static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_
1982extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins); 1982extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins);
1983#endif 1983#endif
1984 1984
1985/* you must have an "get_ldev" reference */
1986static inline void drbd_get_syncer_progress(struct drbd_device *device,
1987 unsigned long *bits_left, unsigned int *per_mil_done)
1988{
1989 /* this is to break it at compile time when we change that, in case we
1990 * want to support more than (1<<32) bits on a 32bit arch. */
1991 typecheck(unsigned long, device->rs_total);
1992
1993 /* note: both rs_total and rs_left are in bits, i.e. in
1994 * units of BM_BLOCK_SIZE.
1995 * for the percentage, we don't care. */
1996
1997 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
1998 *bits_left = device->ov_left;
1999 else
2000 *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
2001 /* >> 10 to prevent overflow,
2002 * +1 to prevent division by zero */
2003 if (*bits_left > device->rs_total) {
2004 /* doh. maybe a logic bug somewhere.
2005 * may also be just a race condition
2006 * between this and a disconnect during sync.
2007 * for now, just prevent in-kernel buffer overflow.
2008 */
2009 smp_rmb();
2010 drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
2011 drbd_conn_str(device->state.conn),
2012 *bits_left, device->rs_total, device->rs_failed);
2013 *per_mil_done = 0;
2014 } else {
2015 /* Make sure the division happens in long context.
2016 * We allow up to one petabyte storage right now,
2017 * at a granularity of 4k per bit that is 2**38 bits.
2018 * After shift right and multiplication by 1000,
2019 * this should still fit easily into a 32bit long,
2020 * so we don't need a 64bit division on 32bit arch.
2021 * Note: currently we don't support such large bitmaps on 32bit
2022 * arch anyways, but no harm done to be prepared for it here.
2023 */
2024 unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10;
2025 unsigned long left = *bits_left >> shift;
2026 unsigned long total = 1UL + (device->rs_total >> shift);
2027 unsigned long tmp = 1000UL - left * 1000UL/total;
2028 *per_mil_done = tmp;
2029 }
2030}
2031
2032
2033/* this throttles on-the-fly application requests 1985/* this throttles on-the-fly application requests
2034 * according to max_buffers settings; 1986 * according to max_buffers settings;
2035 * maybe re-implement using semaphores? */ 1987 * maybe re-implement using semaphores? */
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 886f6bef70dc..9059d7bf8a36 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -60,20 +60,65 @@ static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
60 seq_printf(seq, "%ld", v); 60 seq_printf(seq, "%ld", v);
61} 61}
62 62
63static void drbd_get_syncer_progress(struct drbd_device *device,
64 union drbd_dev_state state, unsigned long *rs_total,
65 unsigned long *bits_left, unsigned int *per_mil_done)
66{
67 /* this is to break it at compile time when we change that, in case we
68 * want to support more than (1<<32) bits on a 32bit arch. */
69 typecheck(unsigned long, device->rs_total);
70 *rs_total = device->rs_total;
71
72 /* note: both rs_total and rs_left are in bits, i.e. in
73 * units of BM_BLOCK_SIZE.
74 * for the percentage, we don't care. */
75
76 if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
77 *bits_left = device->ov_left;
78 else
79 *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
80 /* >> 10 to prevent overflow,
81 * +1 to prevent division by zero */
82 if (*bits_left > *rs_total) {
83 /* D'oh. Maybe a logic bug somewhere. More likely just a race
84 * between state change and reset of rs_total.
85 */
86 *bits_left = *rs_total;
87 *per_mil_done = *rs_total ? 0 : 1000;
88 } else {
89 /* Make sure the division happens in long context.
90 * We allow up to one petabyte storage right now,
91 * at a granularity of 4k per bit that is 2**38 bits.
92 * After shift right and multiplication by 1000,
93 * this should still fit easily into a 32bit long,
94 * so we don't need a 64bit division on 32bit arch.
95 * Note: currently we don't support such large bitmaps on 32bit
96 * arch anyways, but no harm done to be prepared for it here.
97 */
98 unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
99 unsigned long left = *bits_left >> shift;
100 unsigned long total = 1UL + (*rs_total >> shift);
101 unsigned long tmp = 1000UL - left * 1000UL/total;
102 *per_mil_done = tmp;
103 }
104}
105
106
63/*lge 107/*lge
64 * progress bars shamelessly adapted from driver/md/md.c 108 * progress bars shamelessly adapted from driver/md/md.c
65 * output looks like 109 * output looks like
66 * [=====>..............] 33.5% (23456/123456) 110 * [=====>..............] 33.5% (23456/123456)
67 * finish: 2:20:20 speed: 6,345 (6,456) K/sec 111 * finish: 2:20:20 speed: 6,345 (6,456) K/sec
68 */ 112 */
69static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq) 113static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
114 union drbd_dev_state state)
70{ 115{
71 unsigned long db, dt, dbdt, rt, rs_left; 116 unsigned long db, dt, dbdt, rt, rs_total, rs_left;
72 unsigned int res; 117 unsigned int res;
73 int i, x, y; 118 int i, x, y;
74 int stalled = 0; 119 int stalled = 0;
75 120
76 drbd_get_syncer_progress(device, &rs_left, &res); 121 drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
77 122
78 x = res/50; 123 x = res/50;
79 y = 20-x; 124 y = 20-x;
@@ -85,21 +130,21 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
85 seq_printf(seq, "."); 130 seq_printf(seq, ".");
86 seq_printf(seq, "] "); 131 seq_printf(seq, "] ");
87 132
88 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 133 if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
89 seq_printf(seq, "verified:"); 134 seq_printf(seq, "verified:");
90 else 135 else
91 seq_printf(seq, "sync'ed:"); 136 seq_printf(seq, "sync'ed:");
92 seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); 137 seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
93 138
94 /* if more than a few GB, display in MB */ 139 /* if more than a few GB, display in MB */
95 if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) 140 if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
96 seq_printf(seq, "(%lu/%lu)M", 141 seq_printf(seq, "(%lu/%lu)M",
97 (unsigned long) Bit2KB(rs_left >> 10), 142 (unsigned long) Bit2KB(rs_left >> 10),
98 (unsigned long) Bit2KB(device->rs_total >> 10)); 143 (unsigned long) Bit2KB(rs_total >> 10));
99 else 144 else
100 seq_printf(seq, "(%lu/%lu)K\n\t", 145 seq_printf(seq, "(%lu/%lu)K\n\t",
101 (unsigned long) Bit2KB(rs_left), 146 (unsigned long) Bit2KB(rs_left),
102 (unsigned long) Bit2KB(device->rs_total)); 147 (unsigned long) Bit2KB(rs_total));
103 148
104 /* see drivers/md/md.c 149 /* see drivers/md/md.c
105 * We do not want to overflow, so the order of operands and 150 * We do not want to overflow, so the order of operands and
@@ -150,13 +195,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
150 dt = (jiffies - device->rs_start - device->rs_paused) / HZ; 195 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
151 if (dt == 0) 196 if (dt == 0)
152 dt = 1; 197 dt = 1;
153 db = device->rs_total - rs_left; 198 db = rs_total - rs_left;
154 dbdt = Bit2KB(db/dt); 199 dbdt = Bit2KB(db/dt);
155 seq_printf_with_thousands_grouping(seq, dbdt); 200 seq_printf_with_thousands_grouping(seq, dbdt);
156 seq_printf(seq, ")"); 201 seq_printf(seq, ")");
157 202
158 if (device->state.conn == C_SYNC_TARGET || 203 if (state.conn == C_SYNC_TARGET ||
159 device->state.conn == C_VERIFY_S) { 204 state.conn == C_VERIFY_S) {
160 seq_printf(seq, " want: "); 205 seq_printf(seq, " want: ");
161 seq_printf_with_thousands_grouping(seq, device->c_sync_rate); 206 seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
162 } 207 }
@@ -168,8 +213,8 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
168 unsigned long bm_bits = drbd_bm_bits(device); 213 unsigned long bm_bits = drbd_bm_bits(device);
169 unsigned long bit_pos; 214 unsigned long bit_pos;
170 unsigned long long stop_sector = 0; 215 unsigned long long stop_sector = 0;
171 if (device->state.conn == C_VERIFY_S || 216 if (state.conn == C_VERIFY_S ||
172 device->state.conn == C_VERIFY_T) { 217 state.conn == C_VERIFY_T) {
173 bit_pos = bm_bits - device->ov_left; 218 bit_pos = bm_bits - device->ov_left;
174 if (verify_can_do_stop_sector(device)) 219 if (verify_can_do_stop_sector(device))
175 stop_sector = device->ov_stop_sector; 220 stop_sector = device->ov_stop_sector;
@@ -194,6 +239,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
194 const char *sn; 239 const char *sn;
195 struct drbd_device *device; 240 struct drbd_device *device;
196 struct net_conf *nc; 241 struct net_conf *nc;
242 union drbd_dev_state state;
197 char wp; 243 char wp;
198 244
199 static char write_ordering_chars[] = { 245 static char write_ordering_chars[] = {
@@ -231,11 +277,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
231 seq_printf(seq, "\n"); 277 seq_printf(seq, "\n");
232 prev_i = i; 278 prev_i = i;
233 279
234 sn = drbd_conn_str(device->state.conn); 280 state = device->state;
281 sn = drbd_conn_str(state.conn);
235 282
236 if (device->state.conn == C_STANDALONE && 283 if (state.conn == C_STANDALONE &&
237 device->state.disk == D_DISKLESS && 284 state.disk == D_DISKLESS &&
238 device->state.role == R_SECONDARY) { 285 state.role == R_SECONDARY) {
239 seq_printf(seq, "%2d: cs:Unconfigured\n", i); 286 seq_printf(seq, "%2d: cs:Unconfigured\n", i);
240 } else { 287 } else {
241 /* reset device->congestion_reason */ 288 /* reset device->congestion_reason */
@@ -248,15 +295,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
248 " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " 295 " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
249 "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", 296 "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
250 i, sn, 297 i, sn,
251 drbd_role_str(device->state.role), 298 drbd_role_str(state.role),
252 drbd_role_str(device->state.peer), 299 drbd_role_str(state.peer),
253 drbd_disk_str(device->state.disk), 300 drbd_disk_str(state.disk),
254 drbd_disk_str(device->state.pdsk), 301 drbd_disk_str(state.pdsk),
255 wp, 302 wp,
256 drbd_suspended(device) ? 's' : 'r', 303 drbd_suspended(device) ? 's' : 'r',
257 device->state.aftr_isp ? 'a' : '-', 304 state.aftr_isp ? 'a' : '-',
258 device->state.peer_isp ? 'p' : '-', 305 state.peer_isp ? 'p' : '-',
259 device->state.user_isp ? 'u' : '-', 306 state.user_isp ? 'u' : '-',
260 device->congestion_reason ?: '-', 307 device->congestion_reason ?: '-',
261 test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-', 308 test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
262 device->send_cnt/2, 309 device->send_cnt/2,
@@ -277,11 +324,11 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
277 Bit2KB((unsigned long long) 324 Bit2KB((unsigned long long)
278 drbd_bm_total_weight(device))); 325 drbd_bm_total_weight(device)));
279 } 326 }
280 if (device->state.conn == C_SYNC_SOURCE || 327 if (state.conn == C_SYNC_SOURCE ||
281 device->state.conn == C_SYNC_TARGET || 328 state.conn == C_SYNC_TARGET ||
282 device->state.conn == C_VERIFY_S || 329 state.conn == C_VERIFY_S ||
283 device->state.conn == C_VERIFY_T) 330 state.conn == C_VERIFY_T)
284 drbd_syncer_progress(device, seq); 331 drbd_syncer_progress(device, seq, state);
285 332
286 if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { 333 if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
287 lc_seq_printf_stats(seq, device->resync); 334 lc_seq_printf_stats(seq, device->resync);