diff options
Diffstat (limited to 'drivers/block/drbd/drbd_state.c')
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 1856 |
1 files changed, 1856 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c new file mode 100644 index 000000000000..53bf6182bac4 --- /dev/null +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -0,0 +1,1856 @@ | |||
1 | /* | ||
2 | drbd_state.c | ||
3 | |||
4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. | ||
5 | |||
6 | Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. | ||
7 | Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. | ||
8 | Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. | ||
9 | |||
10 | Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev | ||
11 | from Logicworks, Inc. for making SDP replication support possible. | ||
12 | |||
13 | drbd is free software; you can redistribute it and/or modify | ||
14 | it under the terms of the GNU General Public License as published by | ||
15 | the Free Software Foundation; either version 2, or (at your option) | ||
16 | any later version. | ||
17 | |||
18 | drbd is distributed in the hope that it will be useful, | ||
19 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
21 | GNU General Public License for more details. | ||
22 | |||
23 | You should have received a copy of the GNU General Public License | ||
24 | along with drbd; see the file COPYING. If not, write to | ||
25 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
26 | */ | ||
27 | |||
28 | #include <linux/drbd_limits.h> | ||
29 | #include "drbd_int.h" | ||
30 | #include "drbd_req.h" | ||
31 | |||
32 | /* in drbd_main.c */ | ||
33 | extern void tl_abort_disk_io(struct drbd_conf *mdev); | ||
34 | |||
35 | struct after_state_chg_work { | ||
36 | struct drbd_work w; | ||
37 | union drbd_state os; | ||
38 | union drbd_state ns; | ||
39 | enum chg_state_flags flags; | ||
40 | struct completion *done; | ||
41 | }; | ||
42 | |||
43 | enum sanitize_state_warnings { | ||
44 | NO_WARNING, | ||
45 | ABORTED_ONLINE_VERIFY, | ||
46 | ABORTED_RESYNC, | ||
47 | CONNECTION_LOST_NEGOTIATING, | ||
48 | IMPLICITLY_UPGRADED_DISK, | ||
49 | IMPLICITLY_UPGRADED_PDSK, | ||
50 | }; | ||
51 | |||
52 | static int w_after_state_ch(struct drbd_work *w, int unused); | ||
53 | static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | ||
54 | union drbd_state ns, enum chg_state_flags flags); | ||
55 | static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); | ||
56 | static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_tconn *); | ||
57 | static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); | ||
58 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, | ||
59 | enum sanitize_state_warnings *warn); | ||
60 | |||
61 | static inline bool is_susp(union drbd_state s) | ||
62 | { | ||
63 | return s.susp || s.susp_nod || s.susp_fen; | ||
64 | } | ||
65 | |||
66 | bool conn_all_vols_unconf(struct drbd_tconn *tconn) | ||
67 | { | ||
68 | struct drbd_conf *mdev; | ||
69 | bool rv = true; | ||
70 | int vnr; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | idr_for_each_entry(&tconn->volumes, mdev, vnr) { | ||
74 | if (mdev->state.disk != D_DISKLESS || | ||
75 | mdev->state.conn != C_STANDALONE || | ||
76 | mdev->state.role != R_SECONDARY) { | ||
77 | rv = false; | ||
78 | break; | ||
79 | } | ||
80 | } | ||
81 | rcu_read_unlock(); | ||
82 | |||
83 | return rv; | ||
84 | } | ||
85 | |||
86 | /* Unfortunately the states where not correctly ordered, when | ||
87 | they where defined. therefore can not use max_t() here. */ | ||
88 | static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2) | ||
89 | { | ||
90 | if (role1 == R_PRIMARY || role2 == R_PRIMARY) | ||
91 | return R_PRIMARY; | ||
92 | if (role1 == R_SECONDARY || role2 == R_SECONDARY) | ||
93 | return R_SECONDARY; | ||
94 | return R_UNKNOWN; | ||
95 | } | ||
96 | static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) | ||
97 | { | ||
98 | if (role1 == R_UNKNOWN || role2 == R_UNKNOWN) | ||
99 | return R_UNKNOWN; | ||
100 | if (role1 == R_SECONDARY || role2 == R_SECONDARY) | ||
101 | return R_SECONDARY; | ||
102 | return R_PRIMARY; | ||
103 | } | ||
104 | |||
105 | enum drbd_role conn_highest_role(struct drbd_tconn *tconn) | ||
106 | { | ||
107 | enum drbd_role role = R_UNKNOWN; | ||
108 | struct drbd_conf *mdev; | ||
109 | int vnr; | ||
110 | |||
111 | rcu_read_lock(); | ||
112 | idr_for_each_entry(&tconn->volumes, mdev, vnr) | ||
113 | role = max_role(role, mdev->state.role); | ||
114 | rcu_read_unlock(); | ||
115 | |||
116 | return role; | ||
117 | } | ||
118 | |||
119 | enum drbd_role conn_highest_peer(struct drbd_tconn *tconn) | ||
120 | { | ||
121 | enum drbd_role peer = R_UNKNOWN; | ||
122 | struct drbd_conf *mdev; | ||
123 | int vnr; | ||
124 | |||
125 | rcu_read_lock(); | ||
126 | idr_for_each_entry(&tconn->volumes, mdev, vnr) | ||
127 | peer = max_role(peer, mdev->state.peer); | ||
128 | rcu_read_unlock(); | ||
129 | |||
130 | return peer; | ||
131 | } | ||
132 | |||
133 | enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn) | ||
134 | { | ||
135 | enum drbd_disk_state ds = D_DISKLESS; | ||
136 | struct drbd_conf *mdev; | ||
137 | int vnr; | ||
138 | |||
139 | rcu_read_lock(); | ||
140 | idr_for_each_entry(&tconn->volumes, mdev, vnr) | ||
141 | ds = max_t(enum drbd_disk_state, ds, mdev->state.disk); | ||
142 | rcu_read_unlock(); | ||
143 | |||
144 | return ds; | ||
145 | } | ||
146 | |||
147 | enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn) | ||
148 | { | ||
149 | enum drbd_disk_state ds = D_MASK; | ||
150 | struct drbd_conf *mdev; | ||
151 | int vnr; | ||
152 | |||
153 | rcu_read_lock(); | ||
154 | idr_for_each_entry(&tconn->volumes, mdev, vnr) | ||
155 | ds = min_t(enum drbd_disk_state, ds, mdev->state.disk); | ||
156 | rcu_read_unlock(); | ||
157 | |||
158 | return ds; | ||
159 | } | ||
160 | |||
161 | enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) | ||
162 | { | ||
163 | enum drbd_disk_state ds = D_DISKLESS; | ||
164 | struct drbd_conf *mdev; | ||
165 | int vnr; | ||
166 | |||
167 | rcu_read_lock(); | ||
168 | idr_for_each_entry(&tconn->volumes, mdev, vnr) | ||
169 | ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk); | ||
170 | rcu_read_unlock(); | ||
171 | |||
172 | return ds; | ||
173 | } | ||
174 | |||
175 | enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn) | ||
176 | { | ||
177 | enum drbd_conns conn = C_MASK; | ||
178 | struct drbd_conf *mdev; | ||
179 | int vnr; | ||
180 | |||
181 | rcu_read_lock(); | ||
182 | idr_for_each_entry(&tconn->volumes, mdev, vnr) | ||
183 | conn = min_t(enum drbd_conns, conn, mdev->state.conn); | ||
184 | rcu_read_unlock(); | ||
185 | |||
186 | return conn; | ||
187 | } | ||
188 | |||
189 | static bool no_peer_wf_report_params(struct drbd_tconn *tconn) | ||
190 | { | ||
191 | struct drbd_conf *mdev; | ||
192 | int vnr; | ||
193 | bool rv = true; | ||
194 | |||
195 | rcu_read_lock(); | ||
196 | idr_for_each_entry(&tconn->volumes, mdev, vnr) | ||
197 | if (mdev->state.conn == C_WF_REPORT_PARAMS) { | ||
198 | rv = false; | ||
199 | break; | ||
200 | } | ||
201 | rcu_read_unlock(); | ||
202 | |||
203 | return rv; | ||
204 | } | ||
205 | |||
206 | |||
207 | /** | ||
208 | * cl_wide_st_chg() - true if the state change is a cluster wide one | ||
209 | * @mdev: DRBD device. | ||
210 | * @os: old (current) state. | ||
211 | * @ns: new (wanted) state. | ||
212 | */ | ||
213 | static int cl_wide_st_chg(struct drbd_conf *mdev, | ||
214 | union drbd_state os, union drbd_state ns) | ||
215 | { | ||
216 | return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && | ||
217 | ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || | ||
218 | (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | ||
219 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || | ||
220 | (os.disk != D_FAILED && ns.disk == D_FAILED))) || | ||
221 | (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || | ||
222 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) || | ||
223 | (os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS); | ||
224 | } | ||
225 | |||
226 | static union drbd_state | ||
227 | apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val) | ||
228 | { | ||
229 | union drbd_state ns; | ||
230 | ns.i = (os.i & ~mask.i) | val.i; | ||
231 | return ns; | ||
232 | } | ||
233 | |||
234 | enum drbd_state_rv | ||
235 | drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | ||
236 | union drbd_state mask, union drbd_state val) | ||
237 | { | ||
238 | unsigned long flags; | ||
239 | union drbd_state ns; | ||
240 | enum drbd_state_rv rv; | ||
241 | |||
242 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
243 | ns = apply_mask_val(drbd_read_state(mdev), mask, val); | ||
244 | rv = _drbd_set_state(mdev, ns, f, NULL); | ||
245 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
246 | |||
247 | return rv; | ||
248 | } | ||
249 | |||
250 | /** | ||
251 | * drbd_force_state() - Impose a change which happens outside our control on our state | ||
252 | * @mdev: DRBD device. | ||
253 | * @mask: mask of state bits to change. | ||
254 | * @val: value of new state bits. | ||
255 | */ | ||
256 | void drbd_force_state(struct drbd_conf *mdev, | ||
257 | union drbd_state mask, union drbd_state val) | ||
258 | { | ||
259 | drbd_change_state(mdev, CS_HARD, mask, val); | ||
260 | } | ||
261 | |||
262 | static enum drbd_state_rv | ||
263 | _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, | ||
264 | union drbd_state val) | ||
265 | { | ||
266 | union drbd_state os, ns; | ||
267 | unsigned long flags; | ||
268 | enum drbd_state_rv rv; | ||
269 | |||
270 | if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) | ||
271 | return SS_CW_SUCCESS; | ||
272 | |||
273 | if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) | ||
274 | return SS_CW_FAILED_BY_PEER; | ||
275 | |||
276 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
277 | os = drbd_read_state(mdev); | ||
278 | ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); | ||
279 | rv = is_valid_transition(os, ns); | ||
280 | if (rv >= SS_SUCCESS) | ||
281 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ | ||
282 | |||
283 | if (!cl_wide_st_chg(mdev, os, ns)) | ||
284 | rv = SS_CW_NO_NEED; | ||
285 | if (rv == SS_UNKNOWN_ERROR) { | ||
286 | rv = is_valid_state(mdev, ns); | ||
287 | if (rv >= SS_SUCCESS) { | ||
288 | rv = is_valid_soft_transition(os, ns, mdev->tconn); | ||
289 | if (rv >= SS_SUCCESS) | ||
290 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ | ||
291 | } | ||
292 | } | ||
293 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
294 | |||
295 | return rv; | ||
296 | } | ||
297 | |||
298 | /** | ||
299 | * drbd_req_state() - Perform an eventually cluster wide state change | ||
300 | * @mdev: DRBD device. | ||
301 | * @mask: mask of state bits to change. | ||
302 | * @val: value of new state bits. | ||
303 | * @f: flags | ||
304 | * | ||
305 | * Should not be called directly, use drbd_request_state() or | ||
306 | * _drbd_request_state(). | ||
307 | */ | ||
308 | static enum drbd_state_rv | ||
309 | drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, | ||
310 | union drbd_state val, enum chg_state_flags f) | ||
311 | { | ||
312 | struct completion done; | ||
313 | unsigned long flags; | ||
314 | union drbd_state os, ns; | ||
315 | enum drbd_state_rv rv; | ||
316 | |||
317 | init_completion(&done); | ||
318 | |||
319 | if (f & CS_SERIALIZE) | ||
320 | mutex_lock(mdev->state_mutex); | ||
321 | |||
322 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
323 | os = drbd_read_state(mdev); | ||
324 | ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); | ||
325 | rv = is_valid_transition(os, ns); | ||
326 | if (rv < SS_SUCCESS) { | ||
327 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
328 | goto abort; | ||
329 | } | ||
330 | |||
331 | if (cl_wide_st_chg(mdev, os, ns)) { | ||
332 | rv = is_valid_state(mdev, ns); | ||
333 | if (rv == SS_SUCCESS) | ||
334 | rv = is_valid_soft_transition(os, ns, mdev->tconn); | ||
335 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
336 | |||
337 | if (rv < SS_SUCCESS) { | ||
338 | if (f & CS_VERBOSE) | ||
339 | print_st_err(mdev, os, ns, rv); | ||
340 | goto abort; | ||
341 | } | ||
342 | |||
343 | if (drbd_send_state_req(mdev, mask, val)) { | ||
344 | rv = SS_CW_FAILED_BY_PEER; | ||
345 | if (f & CS_VERBOSE) | ||
346 | print_st_err(mdev, os, ns, rv); | ||
347 | goto abort; | ||
348 | } | ||
349 | |||
350 | wait_event(mdev->state_wait, | ||
351 | (rv = _req_st_cond(mdev, mask, val))); | ||
352 | |||
353 | if (rv < SS_SUCCESS) { | ||
354 | if (f & CS_VERBOSE) | ||
355 | print_st_err(mdev, os, ns, rv); | ||
356 | goto abort; | ||
357 | } | ||
358 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
359 | ns = apply_mask_val(drbd_read_state(mdev), mask, val); | ||
360 | rv = _drbd_set_state(mdev, ns, f, &done); | ||
361 | } else { | ||
362 | rv = _drbd_set_state(mdev, ns, f, &done); | ||
363 | } | ||
364 | |||
365 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
366 | |||
367 | if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { | ||
368 | D_ASSERT(current != mdev->tconn->worker.task); | ||
369 | wait_for_completion(&done); | ||
370 | } | ||
371 | |||
372 | abort: | ||
373 | if (f & CS_SERIALIZE) | ||
374 | mutex_unlock(mdev->state_mutex); | ||
375 | |||
376 | return rv; | ||
377 | } | ||
378 | |||
379 | /** | ||
380 | * _drbd_request_state() - Request a state change (with flags) | ||
381 | * @mdev: DRBD device. | ||
382 | * @mask: mask of state bits to change. | ||
383 | * @val: value of new state bits. | ||
384 | * @f: flags | ||
385 | * | ||
386 | * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE | ||
387 | * flag, or when logging of failed state change requests is not desired. | ||
388 | */ | ||
389 | enum drbd_state_rv | ||
390 | _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, | ||
391 | union drbd_state val, enum chg_state_flags f) | ||
392 | { | ||
393 | enum drbd_state_rv rv; | ||
394 | |||
395 | wait_event(mdev->state_wait, | ||
396 | (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); | ||
397 | |||
398 | return rv; | ||
399 | } | ||
400 | |||
401 | static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) | ||
402 | { | ||
403 | dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", | ||
404 | name, | ||
405 | drbd_conn_str(ns.conn), | ||
406 | drbd_role_str(ns.role), | ||
407 | drbd_role_str(ns.peer), | ||
408 | drbd_disk_str(ns.disk), | ||
409 | drbd_disk_str(ns.pdsk), | ||
410 | is_susp(ns) ? 's' : 'r', | ||
411 | ns.aftr_isp ? 'a' : '-', | ||
412 | ns.peer_isp ? 'p' : '-', | ||
413 | ns.user_isp ? 'u' : '-', | ||
414 | ns.susp_fen ? 'F' : '-', | ||
415 | ns.susp_nod ? 'N' : '-' | ||
416 | ); | ||
417 | } | ||
418 | |||
419 | void print_st_err(struct drbd_conf *mdev, union drbd_state os, | ||
420 | union drbd_state ns, enum drbd_state_rv err) | ||
421 | { | ||
422 | if (err == SS_IN_TRANSIENT_STATE) | ||
423 | return; | ||
424 | dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); | ||
425 | print_st(mdev, " state", os); | ||
426 | print_st(mdev, "wanted", ns); | ||
427 | } | ||
428 | |||
429 | static long print_state_change(char *pb, union drbd_state os, union drbd_state ns, | ||
430 | enum chg_state_flags flags) | ||
431 | { | ||
432 | char *pbp; | ||
433 | pbp = pb; | ||
434 | *pbp = 0; | ||
435 | |||
436 | if (ns.role != os.role && flags & CS_DC_ROLE) | ||
437 | pbp += sprintf(pbp, "role( %s -> %s ) ", | ||
438 | drbd_role_str(os.role), | ||
439 | drbd_role_str(ns.role)); | ||
440 | if (ns.peer != os.peer && flags & CS_DC_PEER) | ||
441 | pbp += sprintf(pbp, "peer( %s -> %s ) ", | ||
442 | drbd_role_str(os.peer), | ||
443 | drbd_role_str(ns.peer)); | ||
444 | if (ns.conn != os.conn && flags & CS_DC_CONN) | ||
445 | pbp += sprintf(pbp, "conn( %s -> %s ) ", | ||
446 | drbd_conn_str(os.conn), | ||
447 | drbd_conn_str(ns.conn)); | ||
448 | if (ns.disk != os.disk && flags & CS_DC_DISK) | ||
449 | pbp += sprintf(pbp, "disk( %s -> %s ) ", | ||
450 | drbd_disk_str(os.disk), | ||
451 | drbd_disk_str(ns.disk)); | ||
452 | if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK) | ||
453 | pbp += sprintf(pbp, "pdsk( %s -> %s ) ", | ||
454 | drbd_disk_str(os.pdsk), | ||
455 | drbd_disk_str(ns.pdsk)); | ||
456 | |||
457 | return pbp - pb; | ||
458 | } | ||
459 | |||
460 | static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, | ||
461 | enum chg_state_flags flags) | ||
462 | { | ||
463 | char pb[300]; | ||
464 | char *pbp = pb; | ||
465 | |||
466 | pbp += print_state_change(pbp, os, ns, flags ^ CS_DC_MASK); | ||
467 | |||
468 | if (ns.aftr_isp != os.aftr_isp) | ||
469 | pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", | ||
470 | os.aftr_isp, | ||
471 | ns.aftr_isp); | ||
472 | if (ns.peer_isp != os.peer_isp) | ||
473 | pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", | ||
474 | os.peer_isp, | ||
475 | ns.peer_isp); | ||
476 | if (ns.user_isp != os.user_isp) | ||
477 | pbp += sprintf(pbp, "user_isp( %d -> %d ) ", | ||
478 | os.user_isp, | ||
479 | ns.user_isp); | ||
480 | |||
481 | if (pbp != pb) | ||
482 | dev_info(DEV, "%s\n", pb); | ||
483 | } | ||
484 | |||
485 | static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, | ||
486 | enum chg_state_flags flags) | ||
487 | { | ||
488 | char pb[300]; | ||
489 | char *pbp = pb; | ||
490 | |||
491 | pbp += print_state_change(pbp, os, ns, flags); | ||
492 | |||
493 | if (is_susp(ns) != is_susp(os) && flags & CS_DC_SUSP) | ||
494 | pbp += sprintf(pbp, "susp( %d -> %d ) ", | ||
495 | is_susp(os), | ||
496 | is_susp(ns)); | ||
497 | |||
498 | if (pbp != pb) | ||
499 | conn_info(tconn, "%s\n", pb); | ||
500 | } | ||
501 | |||
502 | |||
503 | /** | ||
504 | * is_valid_state() - Returns an SS_ error code if ns is not valid | ||
505 | * @mdev: DRBD device. | ||
506 | * @ns: State to consider. | ||
507 | */ | ||
508 | static enum drbd_state_rv | ||
509 | is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | ||
510 | { | ||
511 | /* See drbd_state_sw_errors in drbd_strings.c */ | ||
512 | |||
513 | enum drbd_fencing_p fp; | ||
514 | enum drbd_state_rv rv = SS_SUCCESS; | ||
515 | struct net_conf *nc; | ||
516 | |||
517 | rcu_read_lock(); | ||
518 | fp = FP_DONT_CARE; | ||
519 | if (get_ldev(mdev)) { | ||
520 | fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; | ||
521 | put_ldev(mdev); | ||
522 | } | ||
523 | |||
524 | nc = rcu_dereference(mdev->tconn->net_conf); | ||
525 | if (nc) { | ||
526 | if (!nc->two_primaries && ns.role == R_PRIMARY) { | ||
527 | if (ns.peer == R_PRIMARY) | ||
528 | rv = SS_TWO_PRIMARIES; | ||
529 | else if (conn_highest_peer(mdev->tconn) == R_PRIMARY) | ||
530 | rv = SS_O_VOL_PEER_PRI; | ||
531 | } | ||
532 | } | ||
533 | |||
534 | if (rv <= 0) | ||
535 | /* already found a reason to abort */; | ||
536 | else if (ns.role == R_SECONDARY && mdev->open_cnt) | ||
537 | rv = SS_DEVICE_IN_USE; | ||
538 | |||
539 | else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) | ||
540 | rv = SS_NO_UP_TO_DATE_DISK; | ||
541 | |||
542 | else if (fp >= FP_RESOURCE && | ||
543 | ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) | ||
544 | rv = SS_PRIMARY_NOP; | ||
545 | |||
546 | else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) | ||
547 | rv = SS_NO_UP_TO_DATE_DISK; | ||
548 | |||
549 | else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) | ||
550 | rv = SS_NO_LOCAL_DISK; | ||
551 | |||
552 | else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) | ||
553 | rv = SS_NO_REMOTE_DISK; | ||
554 | |||
555 | else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) | ||
556 | rv = SS_NO_UP_TO_DATE_DISK; | ||
557 | |||
558 | else if ((ns.conn == C_CONNECTED || | ||
559 | ns.conn == C_WF_BITMAP_S || | ||
560 | ns.conn == C_SYNC_SOURCE || | ||
561 | ns.conn == C_PAUSED_SYNC_S) && | ||
562 | ns.disk == D_OUTDATED) | ||
563 | rv = SS_CONNECTED_OUTDATES; | ||
564 | |||
565 | else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
566 | (nc->verify_alg[0] == 0)) | ||
567 | rv = SS_NO_VERIFY_ALG; | ||
568 | |||
569 | else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
570 | mdev->tconn->agreed_pro_version < 88) | ||
571 | rv = SS_NOT_SUPPORTED; | ||
572 | |||
573 | else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) | ||
574 | rv = SS_CONNECTED_OUTDATES; | ||
575 | |||
576 | rcu_read_unlock(); | ||
577 | |||
578 | return rv; | ||
579 | } | ||
580 | |||
581 | /** | ||
582 | * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible | ||
583 | * This function limits state transitions that may be declined by DRBD. I.e. | ||
584 | * user requests (aka soft transitions). | ||
585 | * @mdev: DRBD device. | ||
586 | * @ns: new state. | ||
587 | * @os: old state. | ||
588 | */ | ||
589 | static enum drbd_state_rv | ||
590 | is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_tconn *tconn) | ||
591 | { | ||
592 | enum drbd_state_rv rv = SS_SUCCESS; | ||
593 | |||
594 | if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && | ||
595 | os.conn > C_CONNECTED) | ||
596 | rv = SS_RESYNC_RUNNING; | ||
597 | |||
598 | if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) | ||
599 | rv = SS_ALREADY_STANDALONE; | ||
600 | |||
601 | if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) | ||
602 | rv = SS_IS_DISKLESS; | ||
603 | |||
604 | if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) | ||
605 | rv = SS_NO_NET_CONFIG; | ||
606 | |||
607 | if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) | ||
608 | rv = SS_LOWER_THAN_OUTDATED; | ||
609 | |||
610 | if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) | ||
611 | rv = SS_IN_TRANSIENT_STATE; | ||
612 | |||
613 | /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) | ||
614 | rv = SS_IN_TRANSIENT_STATE; */ | ||
615 | |||
616 | /* While establishing a connection only allow cstate to change. | ||
617 | Delay/refuse role changes, detach attach etc... */ | ||
618 | if (test_bit(STATE_SENT, &tconn->flags) && | ||
619 | !(os.conn == C_WF_REPORT_PARAMS || | ||
620 | (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) | ||
621 | rv = SS_IN_TRANSIENT_STATE; | ||
622 | |||
623 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) | ||
624 | rv = SS_NEED_CONNECTION; | ||
625 | |||
626 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
627 | ns.conn != os.conn && os.conn > C_CONNECTED) | ||
628 | rv = SS_RESYNC_RUNNING; | ||
629 | |||
630 | if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && | ||
631 | os.conn < C_CONNECTED) | ||
632 | rv = SS_NEED_CONNECTION; | ||
633 | |||
634 | if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) | ||
635 | && os.conn < C_WF_REPORT_PARAMS) | ||
636 | rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ | ||
637 | |||
638 | return rv; | ||
639 | } | ||
640 | |||
641 | static enum drbd_state_rv | ||
642 | is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc) | ||
643 | { | ||
644 | /* no change -> nothing to do, at least for the connection part */ | ||
645 | if (oc == nc) | ||
646 | return SS_NOTHING_TO_DO; | ||
647 | |||
648 | /* disconnect of an unconfigured connection does not make sense */ | ||
649 | if (oc == C_STANDALONE && nc == C_DISCONNECTING) | ||
650 | return SS_ALREADY_STANDALONE; | ||
651 | |||
652 | /* from C_STANDALONE, we start with C_UNCONNECTED */ | ||
653 | if (oc == C_STANDALONE && nc != C_UNCONNECTED) | ||
654 | return SS_NEED_CONNECTION; | ||
655 | |||
656 | /* When establishing a connection we need to go through WF_REPORT_PARAMS! | ||
657 | Necessary to do the right thing upon invalidate-remote on a disconnected resource */ | ||
658 | if (oc < C_WF_REPORT_PARAMS && nc >= C_CONNECTED) | ||
659 | return SS_NEED_CONNECTION; | ||
660 | |||
661 | /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ | ||
662 | if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING) | ||
663 | return SS_IN_TRANSIENT_STATE; | ||
664 | |||
665 | /* After C_DISCONNECTING only C_STANDALONE may follow */ | ||
666 | if (oc == C_DISCONNECTING && nc != C_STANDALONE) | ||
667 | return SS_IN_TRANSIENT_STATE; | ||
668 | |||
669 | return SS_SUCCESS; | ||
670 | } | ||
671 | |||
672 | |||
673 | /** | ||
674 | * is_valid_transition() - Returns an SS_ error code if the state transition is not possible | ||
675 | * This limits hard state transitions. Hard state transitions are facts there are | ||
676 | * imposed on DRBD by the environment. E.g. disk broke or network broke down. | ||
677 | * But those hard state transitions are still not allowed to do everything. | ||
678 | * @ns: new state. | ||
679 | * @os: old state. | ||
680 | */ | ||
681 | static enum drbd_state_rv | ||
682 | is_valid_transition(union drbd_state os, union drbd_state ns) | ||
683 | { | ||
684 | enum drbd_state_rv rv; | ||
685 | |||
686 | rv = is_valid_conn_transition(os.conn, ns.conn); | ||
687 | |||
688 | /* we cannot fail (again) if we already detached */ | ||
689 | if (ns.disk == D_FAILED && os.disk == D_DISKLESS) | ||
690 | rv = SS_IS_DISKLESS; | ||
691 | |||
692 | return rv; | ||
693 | } | ||
694 | |||
695 | static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) | ||
696 | { | ||
697 | static const char *msg_table[] = { | ||
698 | [NO_WARNING] = "", | ||
699 | [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", | ||
700 | [ABORTED_RESYNC] = "Resync aborted.", | ||
701 | [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", | ||
702 | [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", | ||
703 | [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", | ||
704 | }; | ||
705 | |||
706 | if (warn != NO_WARNING) | ||
707 | dev_warn(DEV, "%s\n", msg_table[warn]); | ||
708 | } | ||
709 | |||
710 | /** | ||
711 | * sanitize_state() - Resolves implicitly necessary additional changes to a state transition | ||
712 | * @mdev: DRBD device. | ||
713 | * @os: old state. | ||
714 | * @ns: new state. | ||
715 | * @warn_sync_abort: | ||
716 | * | ||
717 | * When we loose connection, we have to set the state of the peers disk (pdsk) | ||
718 | * to D_UNKNOWN. This rule and many more along those lines are in this function. | ||
719 | */ | ||
720 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, | ||
721 | enum sanitize_state_warnings *warn) | ||
722 | { | ||
723 | enum drbd_fencing_p fp; | ||
724 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; | ||
725 | |||
726 | if (warn) | ||
727 | *warn = NO_WARNING; | ||
728 | |||
729 | fp = FP_DONT_CARE; | ||
730 | if (get_ldev(mdev)) { | ||
731 | rcu_read_lock(); | ||
732 | fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; | ||
733 | rcu_read_unlock(); | ||
734 | put_ldev(mdev); | ||
735 | } | ||
736 | |||
737 | /* Implications from connection to peer and peer_isp */ | ||
738 | if (ns.conn < C_CONNECTED) { | ||
739 | ns.peer_isp = 0; | ||
740 | ns.peer = R_UNKNOWN; | ||
741 | if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) | ||
742 | ns.pdsk = D_UNKNOWN; | ||
743 | } | ||
744 | |||
745 | /* Clear the aftr_isp when becoming unconfigured */ | ||
746 | if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) | ||
747 | ns.aftr_isp = 0; | ||
748 | |||
749 | /* An implication of the disk states onto the connection state */ | ||
750 | /* Abort resync if a disk fails/detaches */ | ||
751 | if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { | ||
752 | if (warn) | ||
753 | *warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? | ||
754 | ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; | ||
755 | ns.conn = C_CONNECTED; | ||
756 | } | ||
757 | |||
758 | /* Connection breaks down before we finished "Negotiating" */ | ||
759 | if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && | ||
760 | get_ldev_if_state(mdev, D_NEGOTIATING)) { | ||
761 | if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { | ||
762 | ns.disk = mdev->new_state_tmp.disk; | ||
763 | ns.pdsk = mdev->new_state_tmp.pdsk; | ||
764 | } else { | ||
765 | if (warn) | ||
766 | *warn = CONNECTION_LOST_NEGOTIATING; | ||
767 | ns.disk = D_DISKLESS; | ||
768 | ns.pdsk = D_UNKNOWN; | ||
769 | } | ||
770 | put_ldev(mdev); | ||
771 | } | ||
772 | |||
773 | /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ | ||
774 | if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { | ||
775 | if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) | ||
776 | ns.disk = D_UP_TO_DATE; | ||
777 | if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) | ||
778 | ns.pdsk = D_UP_TO_DATE; | ||
779 | } | ||
780 | |||
781 | /* Implications of the connection stat on the disk states */ | ||
782 | disk_min = D_DISKLESS; | ||
783 | disk_max = D_UP_TO_DATE; | ||
784 | pdsk_min = D_INCONSISTENT; | ||
785 | pdsk_max = D_UNKNOWN; | ||
786 | switch ((enum drbd_conns)ns.conn) { | ||
787 | case C_WF_BITMAP_T: | ||
788 | case C_PAUSED_SYNC_T: | ||
789 | case C_STARTING_SYNC_T: | ||
790 | case C_WF_SYNC_UUID: | ||
791 | case C_BEHIND: | ||
792 | disk_min = D_INCONSISTENT; | ||
793 | disk_max = D_OUTDATED; | ||
794 | pdsk_min = D_UP_TO_DATE; | ||
795 | pdsk_max = D_UP_TO_DATE; | ||
796 | break; | ||
797 | case C_VERIFY_S: | ||
798 | case C_VERIFY_T: | ||
799 | disk_min = D_UP_TO_DATE; | ||
800 | disk_max = D_UP_TO_DATE; | ||
801 | pdsk_min = D_UP_TO_DATE; | ||
802 | pdsk_max = D_UP_TO_DATE; | ||
803 | break; | ||
804 | case C_CONNECTED: | ||
805 | disk_min = D_DISKLESS; | ||
806 | disk_max = D_UP_TO_DATE; | ||
807 | pdsk_min = D_DISKLESS; | ||
808 | pdsk_max = D_UP_TO_DATE; | ||
809 | break; | ||
810 | case C_WF_BITMAP_S: | ||
811 | case C_PAUSED_SYNC_S: | ||
812 | case C_STARTING_SYNC_S: | ||
813 | case C_AHEAD: | ||
814 | disk_min = D_UP_TO_DATE; | ||
815 | disk_max = D_UP_TO_DATE; | ||
816 | pdsk_min = D_INCONSISTENT; | ||
817 | pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ | ||
818 | break; | ||
819 | case C_SYNC_TARGET: | ||
820 | disk_min = D_INCONSISTENT; | ||
821 | disk_max = D_INCONSISTENT; | ||
822 | pdsk_min = D_UP_TO_DATE; | ||
823 | pdsk_max = D_UP_TO_DATE; | ||
824 | break; | ||
825 | case C_SYNC_SOURCE: | ||
826 | disk_min = D_UP_TO_DATE; | ||
827 | disk_max = D_UP_TO_DATE; | ||
828 | pdsk_min = D_INCONSISTENT; | ||
829 | pdsk_max = D_INCONSISTENT; | ||
830 | break; | ||
831 | case C_STANDALONE: | ||
832 | case C_DISCONNECTING: | ||
833 | case C_UNCONNECTED: | ||
834 | case C_TIMEOUT: | ||
835 | case C_BROKEN_PIPE: | ||
836 | case C_NETWORK_FAILURE: | ||
837 | case C_PROTOCOL_ERROR: | ||
838 | case C_TEAR_DOWN: | ||
839 | case C_WF_CONNECTION: | ||
840 | case C_WF_REPORT_PARAMS: | ||
841 | case C_MASK: | ||
842 | break; | ||
843 | } | ||
844 | if (ns.disk > disk_max) | ||
845 | ns.disk = disk_max; | ||
846 | |||
847 | if (ns.disk < disk_min) { | ||
848 | if (warn) | ||
849 | *warn = IMPLICITLY_UPGRADED_DISK; | ||
850 | ns.disk = disk_min; | ||
851 | } | ||
852 | if (ns.pdsk > pdsk_max) | ||
853 | ns.pdsk = pdsk_max; | ||
854 | |||
855 | if (ns.pdsk < pdsk_min) { | ||
856 | if (warn) | ||
857 | *warn = IMPLICITLY_UPGRADED_PDSK; | ||
858 | ns.pdsk = pdsk_min; | ||
859 | } | ||
860 | |||
861 | if (fp == FP_STONITH && | ||
862 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) | ||
863 | ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ | ||
864 | |||
865 | if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO && | ||
866 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) | ||
867 | ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ | ||
868 | |||
869 | if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { | ||
870 | if (ns.conn == C_SYNC_SOURCE) | ||
871 | ns.conn = C_PAUSED_SYNC_S; | ||
872 | if (ns.conn == C_SYNC_TARGET) | ||
873 | ns.conn = C_PAUSED_SYNC_T; | ||
874 | } else { | ||
875 | if (ns.conn == C_PAUSED_SYNC_S) | ||
876 | ns.conn = C_SYNC_SOURCE; | ||
877 | if (ns.conn == C_PAUSED_SYNC_T) | ||
878 | ns.conn = C_SYNC_TARGET; | ||
879 | } | ||
880 | |||
881 | return ns; | ||
882 | } | ||
883 | |||
884 | void drbd_resume_al(struct drbd_conf *mdev) | ||
885 | { | ||
886 | if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) | ||
887 | dev_info(DEV, "Resumed AL updates\n"); | ||
888 | } | ||
889 | |||
890 | /* helper for __drbd_set_state */ | ||
891 | static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) | ||
892 | { | ||
893 | if (mdev->tconn->agreed_pro_version < 90) | ||
894 | mdev->ov_start_sector = 0; | ||
895 | mdev->rs_total = drbd_bm_bits(mdev); | ||
896 | mdev->ov_position = 0; | ||
897 | if (cs == C_VERIFY_T) { | ||
898 | /* starting online verify from an arbitrary position | ||
899 | * does not fit well into the existing protocol. | ||
900 | * on C_VERIFY_T, we initialize ov_left and friends | ||
901 | * implicitly in receive_DataRequest once the | ||
902 | * first P_OV_REQUEST is received */ | ||
903 | mdev->ov_start_sector = ~(sector_t)0; | ||
904 | } else { | ||
905 | unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); | ||
906 | if (bit >= mdev->rs_total) { | ||
907 | mdev->ov_start_sector = | ||
908 | BM_BIT_TO_SECT(mdev->rs_total - 1); | ||
909 | mdev->rs_total = 1; | ||
910 | } else | ||
911 | mdev->rs_total -= bit; | ||
912 | mdev->ov_position = mdev->ov_start_sector; | ||
913 | } | ||
914 | mdev->ov_left = mdev->rs_total; | ||
915 | } | ||
916 | |||
917 | /** | ||
918 | * __drbd_set_state() - Set a new DRBD state | ||
919 | * @mdev: DRBD device. | ||
920 | * @ns: new state. | ||
921 | * @flags: Flags | ||
922 | * @done: Optional completion, that will get completed after the after_state_ch() finished | ||
923 | * | ||
924 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. | ||
925 | */ | ||
926 | enum drbd_state_rv | ||
927 | __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | ||
928 | enum chg_state_flags flags, struct completion *done) | ||
929 | { | ||
930 | union drbd_state os; | ||
931 | enum drbd_state_rv rv = SS_SUCCESS; | ||
932 | enum sanitize_state_warnings ssw; | ||
933 | struct after_state_chg_work *ascw; | ||
934 | |||
935 | os = drbd_read_state(mdev); | ||
936 | |||
937 | ns = sanitize_state(mdev, ns, &ssw); | ||
938 | if (ns.i == os.i) | ||
939 | return SS_NOTHING_TO_DO; | ||
940 | |||
941 | rv = is_valid_transition(os, ns); | ||
942 | if (rv < SS_SUCCESS) | ||
943 | return rv; | ||
944 | |||
945 | if (!(flags & CS_HARD)) { | ||
946 | /* pre-state-change checks ; only look at ns */ | ||
947 | /* See drbd_state_sw_errors in drbd_strings.c */ | ||
948 | |||
949 | rv = is_valid_state(mdev, ns); | ||
950 | if (rv < SS_SUCCESS) { | ||
951 | /* If the old state was illegal as well, then let | ||
952 | this happen...*/ | ||
953 | |||
954 | if (is_valid_state(mdev, os) == rv) | ||
955 | rv = is_valid_soft_transition(os, ns, mdev->tconn); | ||
956 | } else | ||
957 | rv = is_valid_soft_transition(os, ns, mdev->tconn); | ||
958 | } | ||
959 | |||
960 | if (rv < SS_SUCCESS) { | ||
961 | if (flags & CS_VERBOSE) | ||
962 | print_st_err(mdev, os, ns, rv); | ||
963 | return rv; | ||
964 | } | ||
965 | |||
966 | print_sanitize_warnings(mdev, ssw); | ||
967 | |||
968 | drbd_pr_state_change(mdev, os, ns, flags); | ||
969 | |||
970 | /* Display changes to the susp* flags that where caused by the call to | ||
971 | sanitize_state(). Only display it here if we where not called from | ||
972 | _conn_request_state() */ | ||
973 | if (!(flags & CS_DC_SUSP)) | ||
974 | conn_pr_state_change(mdev->tconn, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP); | ||
975 | |||
976 | /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference | ||
977 | * on the ldev here, to be sure the transition -> D_DISKLESS resp. | ||
978 | * drbd_ldev_destroy() won't happen before our corresponding | ||
979 | * after_state_ch works run, where we put_ldev again. */ | ||
980 | if ((os.disk != D_FAILED && ns.disk == D_FAILED) || | ||
981 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) | ||
982 | atomic_inc(&mdev->local_cnt); | ||
983 | |||
984 | mdev->state.i = ns.i; | ||
985 | mdev->tconn->susp = ns.susp; | ||
986 | mdev->tconn->susp_nod = ns.susp_nod; | ||
987 | mdev->tconn->susp_fen = ns.susp_fen; | ||
988 | |||
989 | if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) | ||
990 | drbd_print_uuids(mdev, "attached to UUIDs"); | ||
991 | |||
992 | /* Wake up role changes, that were delayed because of connection establishing */ | ||
993 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && | ||
994 | no_peer_wf_report_params(mdev->tconn)) | ||
995 | clear_bit(STATE_SENT, &mdev->tconn->flags); | ||
996 | |||
997 | wake_up(&mdev->misc_wait); | ||
998 | wake_up(&mdev->state_wait); | ||
999 | wake_up(&mdev->tconn->ping_wait); | ||
1000 | |||
1001 | /* Aborted verify run, or we reached the stop sector. | ||
1002 | * Log the last position, unless end-of-device. */ | ||
1003 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && | ||
1004 | ns.conn <= C_CONNECTED) { | ||
1005 | mdev->ov_start_sector = | ||
1006 | BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); | ||
1007 | if (mdev->ov_left) | ||
1008 | dev_info(DEV, "Online Verify reached sector %llu\n", | ||
1009 | (unsigned long long)mdev->ov_start_sector); | ||
1010 | } | ||
1011 | |||
1012 | if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && | ||
1013 | (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { | ||
1014 | dev_info(DEV, "Syncer continues.\n"); | ||
1015 | mdev->rs_paused += (long)jiffies | ||
1016 | -(long)mdev->rs_mark_time[mdev->rs_last_mark]; | ||
1017 | if (ns.conn == C_SYNC_TARGET) | ||
1018 | mod_timer(&mdev->resync_timer, jiffies); | ||
1019 | } | ||
1020 | |||
1021 | if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && | ||
1022 | (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { | ||
1023 | dev_info(DEV, "Resync suspended\n"); | ||
1024 | mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; | ||
1025 | } | ||
1026 | |||
1027 | if (os.conn == C_CONNECTED && | ||
1028 | (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { | ||
1029 | unsigned long now = jiffies; | ||
1030 | int i; | ||
1031 | |||
1032 | set_ov_position(mdev, ns.conn); | ||
1033 | mdev->rs_start = now; | ||
1034 | mdev->rs_last_events = 0; | ||
1035 | mdev->rs_last_sect_ev = 0; | ||
1036 | mdev->ov_last_oos_size = 0; | ||
1037 | mdev->ov_last_oos_start = 0; | ||
1038 | |||
1039 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { | ||
1040 | mdev->rs_mark_left[i] = mdev->ov_left; | ||
1041 | mdev->rs_mark_time[i] = now; | ||
1042 | } | ||
1043 | |||
1044 | drbd_rs_controller_reset(mdev); | ||
1045 | |||
1046 | if (ns.conn == C_VERIFY_S) { | ||
1047 | dev_info(DEV, "Starting Online Verify from sector %llu\n", | ||
1048 | (unsigned long long)mdev->ov_position); | ||
1049 | mod_timer(&mdev->resync_timer, jiffies); | ||
1050 | } | ||
1051 | } | ||
1052 | |||
1053 | if (get_ldev(mdev)) { | ||
1054 | u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| | ||
1055 | MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| | ||
1056 | MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); | ||
1057 | |||
1058 | mdf &= ~MDF_AL_CLEAN; | ||
1059 | if (test_bit(CRASHED_PRIMARY, &mdev->flags)) | ||
1060 | mdf |= MDF_CRASHED_PRIMARY; | ||
1061 | if (mdev->state.role == R_PRIMARY || | ||
1062 | (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) | ||
1063 | mdf |= MDF_PRIMARY_IND; | ||
1064 | if (mdev->state.conn > C_WF_REPORT_PARAMS) | ||
1065 | mdf |= MDF_CONNECTED_IND; | ||
1066 | if (mdev->state.disk > D_INCONSISTENT) | ||
1067 | mdf |= MDF_CONSISTENT; | ||
1068 | if (mdev->state.disk > D_OUTDATED) | ||
1069 | mdf |= MDF_WAS_UP_TO_DATE; | ||
1070 | if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) | ||
1071 | mdf |= MDF_PEER_OUT_DATED; | ||
1072 | if (mdf != mdev->ldev->md.flags) { | ||
1073 | mdev->ldev->md.flags = mdf; | ||
1074 | drbd_md_mark_dirty(mdev); | ||
1075 | } | ||
1076 | if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) | ||
1077 | drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); | ||
1078 | put_ldev(mdev); | ||
1079 | } | ||
1080 | |||
1081 | /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ | ||
1082 | if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && | ||
1083 | os.peer == R_SECONDARY && ns.peer == R_PRIMARY) | ||
1084 | set_bit(CONSIDER_RESYNC, &mdev->flags); | ||
1085 | |||
1086 | /* Receiver should clean up itself */ | ||
1087 | if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) | ||
1088 | drbd_thread_stop_nowait(&mdev->tconn->receiver); | ||
1089 | |||
1090 | /* Now the receiver finished cleaning up itself, it should die */ | ||
1091 | if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) | ||
1092 | drbd_thread_stop_nowait(&mdev->tconn->receiver); | ||
1093 | |||
1094 | /* Upon network failure, we need to restart the receiver. */ | ||
1095 | if (os.conn > C_WF_CONNECTION && | ||
1096 | ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) | ||
1097 | drbd_thread_restart_nowait(&mdev->tconn->receiver); | ||
1098 | |||
1099 | /* Resume AL writing if we get a connection */ | ||
1100 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | ||
1101 | drbd_resume_al(mdev); | ||
1102 | |||
1103 | /* remember last attach time so request_timer_fn() won't | ||
1104 | * kill newly established sessions while we are still trying to thaw | ||
1105 | * previously frozen IO */ | ||
1106 | if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && | ||
1107 | ns.disk > D_NEGOTIATING) | ||
1108 | mdev->last_reattach_jif = jiffies; | ||
1109 | |||
1110 | ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); | ||
1111 | if (ascw) { | ||
1112 | ascw->os = os; | ||
1113 | ascw->ns = ns; | ||
1114 | ascw->flags = flags; | ||
1115 | ascw->w.cb = w_after_state_ch; | ||
1116 | ascw->w.mdev = mdev; | ||
1117 | ascw->done = done; | ||
1118 | drbd_queue_work(&mdev->tconn->sender_work, &ascw->w); | ||
1119 | } else { | ||
1120 | dev_err(DEV, "Could not kmalloc an ascw\n"); | ||
1121 | } | ||
1122 | |||
1123 | return rv; | ||
1124 | } | ||
1125 | |||
1126 | static int w_after_state_ch(struct drbd_work *w, int unused) | ||
1127 | { | ||
1128 | struct after_state_chg_work *ascw = | ||
1129 | container_of(w, struct after_state_chg_work, w); | ||
1130 | struct drbd_conf *mdev = w->mdev; | ||
1131 | |||
1132 | after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); | ||
1133 | if (ascw->flags & CS_WAIT_COMPLETE) { | ||
1134 | D_ASSERT(ascw->done != NULL); | ||
1135 | complete(ascw->done); | ||
1136 | } | ||
1137 | kfree(ascw); | ||
1138 | |||
1139 | return 0; | ||
1140 | } | ||
1141 | |||
1142 | static void abw_start_sync(struct drbd_conf *mdev, int rv) | ||
1143 | { | ||
1144 | if (rv) { | ||
1145 | dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); | ||
1146 | _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); | ||
1147 | return; | ||
1148 | } | ||
1149 | |||
1150 | switch (mdev->state.conn) { | ||
1151 | case C_STARTING_SYNC_T: | ||
1152 | _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); | ||
1153 | break; | ||
1154 | case C_STARTING_SYNC_S: | ||
1155 | drbd_start_resync(mdev, C_SYNC_SOURCE); | ||
1156 | break; | ||
1157 | } | ||
1158 | } | ||
1159 | |||
1160 | int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, | ||
1161 | int (*io_fn)(struct drbd_conf *), | ||
1162 | char *why, enum bm_flag flags) | ||
1163 | { | ||
1164 | int rv; | ||
1165 | |||
1166 | D_ASSERT(current == mdev->tconn->worker.task); | ||
1167 | |||
1168 | /* open coded non-blocking drbd_suspend_io(mdev); */ | ||
1169 | set_bit(SUSPEND_IO, &mdev->flags); | ||
1170 | |||
1171 | drbd_bm_lock(mdev, why, flags); | ||
1172 | rv = io_fn(mdev); | ||
1173 | drbd_bm_unlock(mdev); | ||
1174 | |||
1175 | drbd_resume_io(mdev); | ||
1176 | |||
1177 | return rv; | ||
1178 | } | ||
1179 | |||
1180 | /** | ||
1181 | * after_state_ch() - Perform after state change actions that may sleep | ||
1182 | * @mdev: DRBD device. | ||
1183 | * @os: old state. | ||
1184 | * @ns: new state. | ||
1185 | * @flags: Flags | ||
1186 | */ | ||
1187 | static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | ||
1188 | union drbd_state ns, enum chg_state_flags flags) | ||
1189 | { | ||
1190 | struct sib_info sib; | ||
1191 | |||
1192 | sib.sib_reason = SIB_STATE_CHANGE; | ||
1193 | sib.os = os; | ||
1194 | sib.ns = ns; | ||
1195 | |||
1196 | if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { | ||
1197 | clear_bit(CRASHED_PRIMARY, &mdev->flags); | ||
1198 | if (mdev->p_uuid) | ||
1199 | mdev->p_uuid[UI_FLAGS] &= ~((u64)2); | ||
1200 | } | ||
1201 | |||
1202 | /* Inform userspace about the change... */ | ||
1203 | drbd_bcast_event(mdev, &sib); | ||
1204 | |||
1205 | if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && | ||
1206 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) | ||
1207 | drbd_khelper(mdev, "pri-on-incon-degr"); | ||
1208 | |||
1209 | /* Here we have the actions that are performed after a | ||
1210 | state change. This function might sleep */ | ||
1211 | |||
1212 | if (ns.susp_nod) { | ||
1213 | struct drbd_tconn *tconn = mdev->tconn; | ||
1214 | enum drbd_req_event what = NOTHING; | ||
1215 | |||
1216 | spin_lock_irq(&tconn->req_lock); | ||
1217 | if (os.conn < C_CONNECTED && conn_lowest_conn(tconn) >= C_CONNECTED) | ||
1218 | what = RESEND; | ||
1219 | |||
1220 | if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && | ||
1221 | conn_lowest_disk(tconn) > D_NEGOTIATING) | ||
1222 | what = RESTART_FROZEN_DISK_IO; | ||
1223 | |||
1224 | if (tconn->susp_nod && what != NOTHING) { | ||
1225 | _tl_restart(tconn, what); | ||
1226 | _conn_request_state(tconn, | ||
1227 | (union drbd_state) { { .susp_nod = 1 } }, | ||
1228 | (union drbd_state) { { .susp_nod = 0 } }, | ||
1229 | CS_VERBOSE); | ||
1230 | } | ||
1231 | spin_unlock_irq(&tconn->req_lock); | ||
1232 | } | ||
1233 | |||
1234 | if (ns.susp_fen) { | ||
1235 | struct drbd_tconn *tconn = mdev->tconn; | ||
1236 | |||
1237 | spin_lock_irq(&tconn->req_lock); | ||
1238 | if (tconn->susp_fen && conn_lowest_conn(tconn) >= C_CONNECTED) { | ||
1239 | /* case2: The connection was established again: */ | ||
1240 | struct drbd_conf *odev; | ||
1241 | int vnr; | ||
1242 | |||
1243 | rcu_read_lock(); | ||
1244 | idr_for_each_entry(&tconn->volumes, odev, vnr) | ||
1245 | clear_bit(NEW_CUR_UUID, &odev->flags); | ||
1246 | rcu_read_unlock(); | ||
1247 | _tl_restart(tconn, RESEND); | ||
1248 | _conn_request_state(tconn, | ||
1249 | (union drbd_state) { { .susp_fen = 1 } }, | ||
1250 | (union drbd_state) { { .susp_fen = 0 } }, | ||
1251 | CS_VERBOSE); | ||
1252 | } | ||
1253 | spin_unlock_irq(&tconn->req_lock); | ||
1254 | } | ||
1255 | |||
1256 | /* Became sync source. With protocol >= 96, we still need to send out | ||
1257 | * the sync uuid now. Need to do that before any drbd_send_state, or | ||
1258 | * the other side may go "paused sync" before receiving the sync uuids, | ||
1259 | * which is unexpected. */ | ||
1260 | if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && | ||
1261 | (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && | ||
1262 | mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { | ||
1263 | drbd_gen_and_send_sync_uuid(mdev); | ||
1264 | put_ldev(mdev); | ||
1265 | } | ||
1266 | |||
1267 | /* Do not change the order of the if above and the two below... */ | ||
1268 | if (os.pdsk == D_DISKLESS && | ||
1269 | ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */ | ||
1270 | /* we probably will start a resync soon. | ||
1271 | * make sure those things are properly reset. */ | ||
1272 | mdev->rs_total = 0; | ||
1273 | mdev->rs_failed = 0; | ||
1274 | atomic_set(&mdev->rs_pending_cnt, 0); | ||
1275 | drbd_rs_cancel_all(mdev); | ||
1276 | |||
1277 | drbd_send_uuids(mdev); | ||
1278 | drbd_send_state(mdev, ns); | ||
1279 | } | ||
1280 | /* No point in queuing send_bitmap if we don't have a connection | ||
1281 | * anymore, so check also the _current_ state, not only the new state | ||
1282 | * at the time this work was queued. */ | ||
1283 | if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && | ||
1284 | mdev->state.conn == C_WF_BITMAP_S) | ||
1285 | drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, | ||
1286 | "send_bitmap (WFBitMapS)", | ||
1287 | BM_LOCKED_TEST_ALLOWED); | ||
1288 | |||
1289 | /* Lost contact to peer's copy of the data */ | ||
1290 | if ((os.pdsk >= D_INCONSISTENT && | ||
1291 | os.pdsk != D_UNKNOWN && | ||
1292 | os.pdsk != D_OUTDATED) | ||
1293 | && (ns.pdsk < D_INCONSISTENT || | ||
1294 | ns.pdsk == D_UNKNOWN || | ||
1295 | ns.pdsk == D_OUTDATED)) { | ||
1296 | if (get_ldev(mdev)) { | ||
1297 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && | ||
1298 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | ||
1299 | if (drbd_suspended(mdev)) { | ||
1300 | set_bit(NEW_CUR_UUID, &mdev->flags); | ||
1301 | } else { | ||
1302 | drbd_uuid_new_current(mdev); | ||
1303 | drbd_send_uuids(mdev); | ||
1304 | } | ||
1305 | } | ||
1306 | put_ldev(mdev); | ||
1307 | } | ||
1308 | } | ||
1309 | |||
1310 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { | ||
1311 | if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && | ||
1312 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | ||
1313 | drbd_uuid_new_current(mdev); | ||
1314 | drbd_send_uuids(mdev); | ||
1315 | } | ||
1316 | /* D_DISKLESS Peer becomes secondary */ | ||
1317 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | ||
1318 | /* We may still be Primary ourselves. | ||
1319 | * No harm done if the bitmap still changes, | ||
1320 | * redirtied pages will follow later. */ | ||
1321 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1322 | "demote diskless peer", BM_LOCKED_SET_ALLOWED); | ||
1323 | put_ldev(mdev); | ||
1324 | } | ||
1325 | |||
1326 | /* Write out all changed bits on demote. | ||
1327 | * Though, no need to da that just yet | ||
1328 | * if there is a resync going on still */ | ||
1329 | if (os.role == R_PRIMARY && ns.role == R_SECONDARY && | ||
1330 | mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1331 | /* No changes to the bitmap expected this time, so assert that, | ||
1332 | * even though no harm was done if it did change. */ | ||
1333 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1334 | "demote", BM_LOCKED_TEST_ALLOWED); | ||
1335 | put_ldev(mdev); | ||
1336 | } | ||
1337 | |||
1338 | /* Last part of the attaching process ... */ | ||
1339 | if (ns.conn >= C_CONNECTED && | ||
1340 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { | ||
1341 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ | ||
1342 | drbd_send_uuids(mdev); | ||
1343 | drbd_send_state(mdev, ns); | ||
1344 | } | ||
1345 | |||
1346 | /* We want to pause/continue resync, tell peer. */ | ||
1347 | if (ns.conn >= C_CONNECTED && | ||
1348 | ((os.aftr_isp != ns.aftr_isp) || | ||
1349 | (os.user_isp != ns.user_isp))) | ||
1350 | drbd_send_state(mdev, ns); | ||
1351 | |||
1352 | /* In case one of the isp bits got set, suspend other devices. */ | ||
1353 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && | ||
1354 | (ns.aftr_isp || ns.peer_isp || ns.user_isp)) | ||
1355 | suspend_other_sg(mdev); | ||
1356 | |||
1357 | /* Make sure the peer gets informed about eventual state | ||
1358 | changes (ISP bits) while we were in WFReportParams. */ | ||
1359 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | ||
1360 | drbd_send_state(mdev, ns); | ||
1361 | |||
1362 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | ||
1363 | drbd_send_state(mdev, ns); | ||
1364 | |||
1365 | /* We are in the progress to start a full sync... */ | ||
1366 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | ||
1367 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) | ||
1368 | /* no other bitmap changes expected during this phase */ | ||
1369 | drbd_queue_bitmap_io(mdev, | ||
1370 | &drbd_bmio_set_n_write, &abw_start_sync, | ||
1371 | "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); | ||
1372 | |||
1373 | /* We are invalidating our self... */ | ||
1374 | if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && | ||
1375 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) | ||
1376 | /* other bitmap operation expected during this phase */ | ||
1377 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, | ||
1378 | "set_n_write from invalidate", BM_LOCKED_MASK); | ||
1379 | |||
1380 | /* first half of local IO error, failure to attach, | ||
1381 | * or administrative detach */ | ||
1382 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { | ||
1383 | enum drbd_io_error_p eh = EP_PASS_ON; | ||
1384 | int was_io_error = 0; | ||
1385 | /* corresponding get_ldev was in __drbd_set_state, to serialize | ||
1386 | * our cleanup here with the transition to D_DISKLESS. | ||
1387 | * But is is still not save to dreference ldev here, since | ||
1388 | * we might come from an failed Attach before ldev was set. */ | ||
1389 | if (mdev->ldev) { | ||
1390 | rcu_read_lock(); | ||
1391 | eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; | ||
1392 | rcu_read_unlock(); | ||
1393 | |||
1394 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); | ||
1395 | |||
1396 | if (was_io_error && eh == EP_CALL_HELPER) | ||
1397 | drbd_khelper(mdev, "local-io-error"); | ||
1398 | |||
1399 | /* Immediately allow completion of all application IO, | ||
1400 | * that waits for completion from the local disk, | ||
1401 | * if this was a force-detach due to disk_timeout | ||
1402 | * or administrator request (drbdsetup detach --force). | ||
1403 | * Do NOT abort otherwise. | ||
1404 | * Aborting local requests may cause serious problems, | ||
1405 | * if requests are completed to upper layers already, | ||
1406 | * and then later the already submitted local bio completes. | ||
1407 | * This can cause DMA into former bio pages that meanwhile | ||
1408 | * have been re-used for other things. | ||
1409 | * So aborting local requests may cause crashes, | ||
1410 | * or even worse, silent data corruption. | ||
1411 | */ | ||
1412 | if (test_and_clear_bit(FORCE_DETACH, &mdev->flags)) | ||
1413 | tl_abort_disk_io(mdev); | ||
1414 | |||
1415 | /* current state still has to be D_FAILED, | ||
1416 | * there is only one way out: to D_DISKLESS, | ||
1417 | * and that may only happen after our put_ldev below. */ | ||
1418 | if (mdev->state.disk != D_FAILED) | ||
1419 | dev_err(DEV, | ||
1420 | "ASSERT FAILED: disk is %s during detach\n", | ||
1421 | drbd_disk_str(mdev->state.disk)); | ||
1422 | |||
1423 | if (ns.conn >= C_CONNECTED) | ||
1424 | drbd_send_state(mdev, ns); | ||
1425 | |||
1426 | drbd_rs_cancel_all(mdev); | ||
1427 | |||
1428 | /* In case we want to get something to stable storage still, | ||
1429 | * this may be the last chance. | ||
1430 | * Following put_ldev may transition to D_DISKLESS. */ | ||
1431 | drbd_md_sync(mdev); | ||
1432 | } | ||
1433 | put_ldev(mdev); | ||
1434 | } | ||
1435 | |||
1436 | /* second half of local IO error, failure to attach, | ||
1437 | * or administrative detach, | ||
1438 | * after local_cnt references have reached zero again */ | ||
1439 | if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { | ||
1440 | /* We must still be diskless, | ||
1441 | * re-attach has to be serialized with this! */ | ||
1442 | if (mdev->state.disk != D_DISKLESS) | ||
1443 | dev_err(DEV, | ||
1444 | "ASSERT FAILED: disk is %s while going diskless\n", | ||
1445 | drbd_disk_str(mdev->state.disk)); | ||
1446 | |||
1447 | if (ns.conn >= C_CONNECTED) | ||
1448 | drbd_send_state(mdev, ns); | ||
1449 | /* corresponding get_ldev in __drbd_set_state | ||
1450 | * this may finally trigger drbd_ldev_destroy. */ | ||
1451 | put_ldev(mdev); | ||
1452 | } | ||
1453 | |||
1454 | /* Notify peer that I had a local IO error, and did not detached.. */ | ||
1455 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) | ||
1456 | drbd_send_state(mdev, ns); | ||
1457 | |||
1458 | /* Disks got bigger while they were detached */ | ||
1459 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && | ||
1460 | test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { | ||
1461 | if (ns.conn == C_CONNECTED) | ||
1462 | resync_after_online_grow(mdev); | ||
1463 | } | ||
1464 | |||
1465 | /* A resync finished or aborted, wake paused devices... */ | ||
1466 | if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || | ||
1467 | (os.peer_isp && !ns.peer_isp) || | ||
1468 | (os.user_isp && !ns.user_isp)) | ||
1469 | resume_next_sg(mdev); | ||
1470 | |||
1471 | /* sync target done with resync. Explicitly notify peer, even though | ||
1472 | * it should (at least for non-empty resyncs) already know itself. */ | ||
1473 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | ||
1474 | drbd_send_state(mdev, ns); | ||
1475 | |||
1476 | /* Verify finished, or reached stop sector. Peer did not know about | ||
1477 | * the stop sector, and we may even have changed the stop sector during | ||
1478 | * verify to interrupt/stop early. Send the new state. */ | ||
1479 | if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED | ||
1480 | && verify_can_do_stop_sector(mdev)) | ||
1481 | drbd_send_state(mdev, ns); | ||
1482 | |||
1483 | /* This triggers bitmap writeout of potentially still unwritten pages | ||
1484 | * if the resync finished cleanly, or aborted because of peer disk | ||
1485 | * failure, or because of connection loss. | ||
1486 | * For resync aborted because of local disk failure, we cannot do | ||
1487 | * any bitmap writeout anymore. | ||
1488 | * No harm done if some bits change during this phase. | ||
1489 | */ | ||
1490 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1491 | drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL, | ||
1492 | "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); | ||
1493 | put_ldev(mdev); | ||
1494 | } | ||
1495 | |||
1496 | if (ns.disk == D_DISKLESS && | ||
1497 | ns.conn == C_STANDALONE && | ||
1498 | ns.role == R_SECONDARY) { | ||
1499 | if (os.aftr_isp != ns.aftr_isp) | ||
1500 | resume_next_sg(mdev); | ||
1501 | } | ||
1502 | |||
1503 | drbd_md_sync(mdev); | ||
1504 | } | ||
1505 | |||
1506 | struct after_conn_state_chg_work { | ||
1507 | struct drbd_work w; | ||
1508 | enum drbd_conns oc; | ||
1509 | union drbd_state ns_min; | ||
1510 | union drbd_state ns_max; /* new, max state, over all mdevs */ | ||
1511 | enum chg_state_flags flags; | ||
1512 | }; | ||
1513 | |||
1514 | static int w_after_conn_state_ch(struct drbd_work *w, int unused) | ||
1515 | { | ||
1516 | struct after_conn_state_chg_work *acscw = | ||
1517 | container_of(w, struct after_conn_state_chg_work, w); | ||
1518 | struct drbd_tconn *tconn = w->tconn; | ||
1519 | enum drbd_conns oc = acscw->oc; | ||
1520 | union drbd_state ns_max = acscw->ns_max; | ||
1521 | struct drbd_conf *mdev; | ||
1522 | int vnr; | ||
1523 | |||
1524 | kfree(acscw); | ||
1525 | |||
1526 | /* Upon network configuration, we need to start the receiver */ | ||
1527 | if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED) | ||
1528 | drbd_thread_start(&tconn->receiver); | ||
1529 | |||
1530 | if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) { | ||
1531 | struct net_conf *old_conf; | ||
1532 | |||
1533 | mutex_lock(&tconn->conf_update); | ||
1534 | old_conf = tconn->net_conf; | ||
1535 | tconn->my_addr_len = 0; | ||
1536 | tconn->peer_addr_len = 0; | ||
1537 | rcu_assign_pointer(tconn->net_conf, NULL); | ||
1538 | conn_free_crypto(tconn); | ||
1539 | mutex_unlock(&tconn->conf_update); | ||
1540 | |||
1541 | synchronize_rcu(); | ||
1542 | kfree(old_conf); | ||
1543 | } | ||
1544 | |||
1545 | if (ns_max.susp_fen) { | ||
1546 | /* case1: The outdate peer handler is successful: */ | ||
1547 | if (ns_max.pdsk <= D_OUTDATED) { | ||
1548 | rcu_read_lock(); | ||
1549 | idr_for_each_entry(&tconn->volumes, mdev, vnr) { | ||
1550 | if (test_bit(NEW_CUR_UUID, &mdev->flags)) { | ||
1551 | drbd_uuid_new_current(mdev); | ||
1552 | clear_bit(NEW_CUR_UUID, &mdev->flags); | ||
1553 | } | ||
1554 | } | ||
1555 | rcu_read_unlock(); | ||
1556 | spin_lock_irq(&tconn->req_lock); | ||
1557 | _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); | ||
1558 | _conn_request_state(tconn, | ||
1559 | (union drbd_state) { { .susp_fen = 1 } }, | ||
1560 | (union drbd_state) { { .susp_fen = 0 } }, | ||
1561 | CS_VERBOSE); | ||
1562 | spin_unlock_irq(&tconn->req_lock); | ||
1563 | } | ||
1564 | } | ||
1565 | kref_put(&tconn->kref, &conn_destroy); | ||
1566 | |||
1567 | conn_md_sync(tconn); | ||
1568 | |||
1569 | return 0; | ||
1570 | } | ||
1571 | |||
1572 | void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf) | ||
1573 | { | ||
1574 | enum chg_state_flags flags = ~0; | ||
1575 | struct drbd_conf *mdev; | ||
1576 | int vnr, first_vol = 1; | ||
1577 | union drbd_dev_state os, cs = { | ||
1578 | { .role = R_SECONDARY, | ||
1579 | .peer = R_UNKNOWN, | ||
1580 | .conn = tconn->cstate, | ||
1581 | .disk = D_DISKLESS, | ||
1582 | .pdsk = D_UNKNOWN, | ||
1583 | } }; | ||
1584 | |||
1585 | rcu_read_lock(); | ||
1586 | idr_for_each_entry(&tconn->volumes, mdev, vnr) { | ||
1587 | os = mdev->state; | ||
1588 | |||
1589 | if (first_vol) { | ||
1590 | cs = os; | ||
1591 | first_vol = 0; | ||
1592 | continue; | ||
1593 | } | ||
1594 | |||
1595 | if (cs.role != os.role) | ||
1596 | flags &= ~CS_DC_ROLE; | ||
1597 | |||
1598 | if (cs.peer != os.peer) | ||
1599 | flags &= ~CS_DC_PEER; | ||
1600 | |||
1601 | if (cs.conn != os.conn) | ||
1602 | flags &= ~CS_DC_CONN; | ||
1603 | |||
1604 | if (cs.disk != os.disk) | ||
1605 | flags &= ~CS_DC_DISK; | ||
1606 | |||
1607 | if (cs.pdsk != os.pdsk) | ||
1608 | flags &= ~CS_DC_PDSK; | ||
1609 | } | ||
1610 | rcu_read_unlock(); | ||
1611 | |||
1612 | *pf |= CS_DC_MASK; | ||
1613 | *pf &= flags; | ||
1614 | (*pcs).i = cs.i; | ||
1615 | } | ||
1616 | |||
1617 | static enum drbd_state_rv | ||
1618 | conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, | ||
1619 | enum chg_state_flags flags) | ||
1620 | { | ||
1621 | enum drbd_state_rv rv = SS_SUCCESS; | ||
1622 | union drbd_state ns, os; | ||
1623 | struct drbd_conf *mdev; | ||
1624 | int vnr; | ||
1625 | |||
1626 | rcu_read_lock(); | ||
1627 | idr_for_each_entry(&tconn->volumes, mdev, vnr) { | ||
1628 | os = drbd_read_state(mdev); | ||
1629 | ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); | ||
1630 | |||
1631 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) | ||
1632 | ns.disk = os.disk; | ||
1633 | |||
1634 | if (ns.i == os.i) | ||
1635 | continue; | ||
1636 | |||
1637 | rv = is_valid_transition(os, ns); | ||
1638 | if (rv < SS_SUCCESS) | ||
1639 | break; | ||
1640 | |||
1641 | if (!(flags & CS_HARD)) { | ||
1642 | rv = is_valid_state(mdev, ns); | ||
1643 | if (rv < SS_SUCCESS) { | ||
1644 | if (is_valid_state(mdev, os) == rv) | ||
1645 | rv = is_valid_soft_transition(os, ns, tconn); | ||
1646 | } else | ||
1647 | rv = is_valid_soft_transition(os, ns, tconn); | ||
1648 | } | ||
1649 | if (rv < SS_SUCCESS) | ||
1650 | break; | ||
1651 | } | ||
1652 | rcu_read_unlock(); | ||
1653 | |||
1654 | if (rv < SS_SUCCESS && flags & CS_VERBOSE) | ||
1655 | print_st_err(mdev, os, ns, rv); | ||
1656 | |||
1657 | return rv; | ||
1658 | } | ||
1659 | |||
1660 | void | ||
1661 | conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, | ||
1662 | union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) | ||
1663 | { | ||
1664 | union drbd_state ns, os, ns_max = { }; | ||
1665 | union drbd_state ns_min = { | ||
1666 | { .role = R_MASK, | ||
1667 | .peer = R_MASK, | ||
1668 | .conn = val.conn, | ||
1669 | .disk = D_MASK, | ||
1670 | .pdsk = D_MASK | ||
1671 | } }; | ||
1672 | struct drbd_conf *mdev; | ||
1673 | enum drbd_state_rv rv; | ||
1674 | int vnr, number_of_volumes = 0; | ||
1675 | |||
1676 | if (mask.conn == C_MASK) { | ||
1677 | /* remember last connect time so request_timer_fn() won't | ||
1678 | * kill newly established sessions while we are still trying to thaw | ||
1679 | * previously frozen IO */ | ||
1680 | if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS) | ||
1681 | tconn->last_reconnect_jif = jiffies; | ||
1682 | |||
1683 | tconn->cstate = val.conn; | ||
1684 | } | ||
1685 | |||
1686 | rcu_read_lock(); | ||
1687 | idr_for_each_entry(&tconn->volumes, mdev, vnr) { | ||
1688 | number_of_volumes++; | ||
1689 | os = drbd_read_state(mdev); | ||
1690 | ns = apply_mask_val(os, mask, val); | ||
1691 | ns = sanitize_state(mdev, ns, NULL); | ||
1692 | |||
1693 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) | ||
1694 | ns.disk = os.disk; | ||
1695 | |||
1696 | rv = __drbd_set_state(mdev, ns, flags, NULL); | ||
1697 | if (rv < SS_SUCCESS) | ||
1698 | BUG(); | ||
1699 | |||
1700 | ns.i = mdev->state.i; | ||
1701 | ns_max.role = max_role(ns.role, ns_max.role); | ||
1702 | ns_max.peer = max_role(ns.peer, ns_max.peer); | ||
1703 | ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn); | ||
1704 | ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk); | ||
1705 | ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk); | ||
1706 | |||
1707 | ns_min.role = min_role(ns.role, ns_min.role); | ||
1708 | ns_min.peer = min_role(ns.peer, ns_min.peer); | ||
1709 | ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn); | ||
1710 | ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk); | ||
1711 | ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk); | ||
1712 | } | ||
1713 | rcu_read_unlock(); | ||
1714 | |||
1715 | if (number_of_volumes == 0) { | ||
1716 | ns_min = ns_max = (union drbd_state) { { | ||
1717 | .role = R_SECONDARY, | ||
1718 | .peer = R_UNKNOWN, | ||
1719 | .conn = val.conn, | ||
1720 | .disk = D_DISKLESS, | ||
1721 | .pdsk = D_UNKNOWN | ||
1722 | } }; | ||
1723 | } | ||
1724 | |||
1725 | ns_min.susp = ns_max.susp = tconn->susp; | ||
1726 | ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod; | ||
1727 | ns_min.susp_fen = ns_max.susp_fen = tconn->susp_fen; | ||
1728 | |||
1729 | *pns_min = ns_min; | ||
1730 | *pns_max = ns_max; | ||
1731 | } | ||
1732 | |||
1733 | static enum drbd_state_rv | ||
1734 | _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) | ||
1735 | { | ||
1736 | enum drbd_state_rv rv; | ||
1737 | |||
1738 | if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags)) | ||
1739 | return SS_CW_SUCCESS; | ||
1740 | |||
1741 | if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) | ||
1742 | return SS_CW_FAILED_BY_PEER; | ||
1743 | |||
1744 | rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; | ||
1745 | |||
1746 | if (rv == SS_UNKNOWN_ERROR) | ||
1747 | rv = conn_is_valid_transition(tconn, mask, val, 0); | ||
1748 | |||
1749 | if (rv == SS_SUCCESS) | ||
1750 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ | ||
1751 | |||
1752 | return rv; | ||
1753 | } | ||
1754 | |||
1755 | enum drbd_state_rv | ||
1756 | _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, | ||
1757 | enum chg_state_flags flags) | ||
1758 | { | ||
1759 | enum drbd_state_rv rv = SS_SUCCESS; | ||
1760 | struct after_conn_state_chg_work *acscw; | ||
1761 | enum drbd_conns oc = tconn->cstate; | ||
1762 | union drbd_state ns_max, ns_min, os; | ||
1763 | bool have_mutex = false; | ||
1764 | |||
1765 | if (mask.conn) { | ||
1766 | rv = is_valid_conn_transition(oc, val.conn); | ||
1767 | if (rv < SS_SUCCESS) | ||
1768 | goto abort; | ||
1769 | } | ||
1770 | |||
1771 | rv = conn_is_valid_transition(tconn, mask, val, flags); | ||
1772 | if (rv < SS_SUCCESS) | ||
1773 | goto abort; | ||
1774 | |||
1775 | if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING && | ||
1776 | !(flags & (CS_LOCAL_ONLY | CS_HARD))) { | ||
1777 | |||
1778 | /* This will be a cluster-wide state change. | ||
1779 | * Need to give up the spinlock, grab the mutex, | ||
1780 | * then send the state change request, ... */ | ||
1781 | spin_unlock_irq(&tconn->req_lock); | ||
1782 | mutex_lock(&tconn->cstate_mutex); | ||
1783 | have_mutex = true; | ||
1784 | |||
1785 | set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); | ||
1786 | if (conn_send_state_req(tconn, mask, val)) { | ||
1787 | /* sending failed. */ | ||
1788 | clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); | ||
1789 | rv = SS_CW_FAILED_BY_PEER; | ||
1790 | /* need to re-aquire the spin lock, though */ | ||
1791 | goto abort_unlocked; | ||
1792 | } | ||
1793 | |||
1794 | if (val.conn == C_DISCONNECTING) | ||
1795 | set_bit(DISCONNECT_SENT, &tconn->flags); | ||
1796 | |||
1797 | /* ... and re-aquire the spinlock. | ||
1798 | * If _conn_rq_cond() returned >= SS_SUCCESS, we must call | ||
1799 | * conn_set_state() within the same spinlock. */ | ||
1800 | spin_lock_irq(&tconn->req_lock); | ||
1801 | wait_event_lock_irq(tconn->ping_wait, | ||
1802 | (rv = _conn_rq_cond(tconn, mask, val)), | ||
1803 | tconn->req_lock); | ||
1804 | clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); | ||
1805 | if (rv < SS_SUCCESS) | ||
1806 | goto abort; | ||
1807 | } | ||
1808 | |||
1809 | conn_old_common_state(tconn, &os, &flags); | ||
1810 | flags |= CS_DC_SUSP; | ||
1811 | conn_set_state(tconn, mask, val, &ns_min, &ns_max, flags); | ||
1812 | conn_pr_state_change(tconn, os, ns_max, flags); | ||
1813 | |||
1814 | acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); | ||
1815 | if (acscw) { | ||
1816 | acscw->oc = os.conn; | ||
1817 | acscw->ns_min = ns_min; | ||
1818 | acscw->ns_max = ns_max; | ||
1819 | acscw->flags = flags; | ||
1820 | acscw->w.cb = w_after_conn_state_ch; | ||
1821 | kref_get(&tconn->kref); | ||
1822 | acscw->w.tconn = tconn; | ||
1823 | drbd_queue_work(&tconn->sender_work, &acscw->w); | ||
1824 | } else { | ||
1825 | conn_err(tconn, "Could not kmalloc an acscw\n"); | ||
1826 | } | ||
1827 | |||
1828 | abort: | ||
1829 | if (have_mutex) { | ||
1830 | /* mutex_unlock() "... must not be used in interrupt context.", | ||
1831 | * so give up the spinlock, then re-aquire it */ | ||
1832 | spin_unlock_irq(&tconn->req_lock); | ||
1833 | abort_unlocked: | ||
1834 | mutex_unlock(&tconn->cstate_mutex); | ||
1835 | spin_lock_irq(&tconn->req_lock); | ||
1836 | } | ||
1837 | if (rv < SS_SUCCESS && flags & CS_VERBOSE) { | ||
1838 | conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv)); | ||
1839 | conn_err(tconn, " mask = 0x%x val = 0x%x\n", mask.i, val.i); | ||
1840 | conn_err(tconn, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn)); | ||
1841 | } | ||
1842 | return rv; | ||
1843 | } | ||
1844 | |||
1845 | enum drbd_state_rv | ||
1846 | conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, | ||
1847 | enum chg_state_flags flags) | ||
1848 | { | ||
1849 | enum drbd_state_rv rv; | ||
1850 | |||
1851 | spin_lock_irq(&tconn->req_lock); | ||
1852 | rv = _conn_request_state(tconn, mask, val, flags); | ||
1853 | spin_unlock_irq(&tconn->req_lock); | ||
1854 | |||
1855 | return rv; | ||
1856 | } | ||