aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_req.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_req.c')
-rw-r--r--drivers/block/drbd/drbd_req.c132
1 files changed, 83 insertions, 49 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 4a0f314086e5..9c5c84946b05 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -37,6 +37,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req
37 const int rw = bio_data_dir(bio); 37 const int rw = bio_data_dir(bio);
38 int cpu; 38 int cpu;
39 cpu = part_stat_lock(); 39 cpu = part_stat_lock();
40 part_round_stats(cpu, &mdev->vdisk->part0);
40 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); 41 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
41 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); 42 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
42 part_inc_in_flight(&mdev->vdisk->part0, rw); 43 part_inc_in_flight(&mdev->vdisk->part0, rw);
@@ -214,8 +215,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
214{ 215{
215 const unsigned long s = req->rq_state; 216 const unsigned long s = req->rq_state;
216 struct drbd_conf *mdev = req->mdev; 217 struct drbd_conf *mdev = req->mdev;
217 /* only WRITES may end up here without a master bio (on barrier ack) */ 218 int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
218 int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
219 219
220 /* we must not complete the master bio, while it is 220 /* we must not complete the master bio, while it is
221 * still being processed by _drbd_send_zc_bio (drbd_send_dblock) 221 * still being processed by _drbd_send_zc_bio (drbd_send_dblock)
@@ -230,7 +230,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
230 return; 230 return;
231 if (s & RQ_NET_PENDING) 231 if (s & RQ_NET_PENDING)
232 return; 232 return;
233 if (s & RQ_LOCAL_PENDING) 233 if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
234 return; 234 return;
235 235
236 if (req->master_bio) { 236 if (req->master_bio) {
@@ -277,6 +277,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
277 req->master_bio = NULL; 277 req->master_bio = NULL;
278 } 278 }
279 279
280 if (s & RQ_LOCAL_PENDING)
281 return;
282
280 if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { 283 if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
281 /* this is disconnected (local only) operation, 284 /* this is disconnected (local only) operation,
282 * or protocol C P_WRITE_ACK, 285 * or protocol C P_WRITE_ACK,
@@ -429,7 +432,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
429 break; 432 break;
430 433
431 case completed_ok: 434 case completed_ok:
432 if (bio_data_dir(req->master_bio) == WRITE) 435 if (req->rq_state & RQ_WRITE)
433 mdev->writ_cnt += req->size>>9; 436 mdev->writ_cnt += req->size>>9;
434 else 437 else
435 mdev->read_cnt += req->size>>9; 438 mdev->read_cnt += req->size>>9;
@@ -438,7 +441,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
438 req->rq_state &= ~RQ_LOCAL_PENDING; 441 req->rq_state &= ~RQ_LOCAL_PENDING;
439 442
440 _req_may_be_done_not_susp(req, m); 443 _req_may_be_done_not_susp(req, m);
441 put_ldev(mdev); 444 break;
445
446 case abort_disk_io:
447 req->rq_state |= RQ_LOCAL_ABORTED;
448 if (req->rq_state & RQ_WRITE)
449 _req_may_be_done_not_susp(req, m);
450 else
451 goto goto_queue_for_net_read;
442 break; 452 break;
443 453
444 case write_completed_with_error: 454 case write_completed_with_error:
@@ -447,7 +457,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
447 457
448 __drbd_chk_io_error(mdev, false); 458 __drbd_chk_io_error(mdev, false);
449 _req_may_be_done_not_susp(req, m); 459 _req_may_be_done_not_susp(req, m);
450 put_ldev(mdev);
451 break; 460 break;
452 461
453 case read_ahead_completed_with_error: 462 case read_ahead_completed_with_error:
@@ -455,7 +464,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
455 req->rq_state |= RQ_LOCAL_COMPLETED; 464 req->rq_state |= RQ_LOCAL_COMPLETED;
456 req->rq_state &= ~RQ_LOCAL_PENDING; 465 req->rq_state &= ~RQ_LOCAL_PENDING;
457 _req_may_be_done_not_susp(req, m); 466 _req_may_be_done_not_susp(req, m);
458 put_ldev(mdev);
459 break; 467 break;
460 468
461 case read_completed_with_error: 469 case read_completed_with_error:
@@ -467,7 +475,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
467 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 475 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
468 476
469 __drbd_chk_io_error(mdev, false); 477 __drbd_chk_io_error(mdev, false);
470 put_ldev(mdev); 478
479 goto_queue_for_net_read:
471 480
472 /* no point in retrying if there is no good remote data, 481 /* no point in retrying if there is no good remote data,
473 * or we have no connection. */ 482 * or we have no connection. */
@@ -556,10 +565,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
556 drbd_queue_work(&mdev->data.work, &req->w); 565 drbd_queue_work(&mdev->data.work, &req->w);
557 break; 566 break;
558 567
559 case oos_handed_to_network: 568 case read_retry_remote_canceled:
560 /* actually the same */
561 case send_canceled: 569 case send_canceled:
562 /* treat it the same */
563 case send_failed: 570 case send_failed:
564 /* real cleanup will be done from tl_clear. just update flags 571 /* real cleanup will be done from tl_clear. just update flags
565 * so it is no longer marked as on the worker queue */ 572 * so it is no longer marked as on the worker queue */
@@ -589,17 +596,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
589 } 596 }
590 req->rq_state &= ~RQ_NET_QUEUED; 597 req->rq_state &= ~RQ_NET_QUEUED;
591 req->rq_state |= RQ_NET_SENT; 598 req->rq_state |= RQ_NET_SENT;
592 /* because _drbd_send_zc_bio could sleep, and may want to
593 * dereference the bio even after the "write_acked_by_peer" and
594 * "completed_ok" events came in, once we return from
595 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
596 * whether it is done already, and end it. */
597 _req_may_be_done_not_susp(req, m); 599 _req_may_be_done_not_susp(req, m);
598 break; 600 break;
599 601
600 case read_retry_remote_canceled: 602 case oos_handed_to_network:
603 /* Was not set PENDING, no longer QUEUED, so is now DONE
604 * as far as this connection is concerned. */
601 req->rq_state &= ~RQ_NET_QUEUED; 605 req->rq_state &= ~RQ_NET_QUEUED;
602 /* fall through, in case we raced with drbd_disconnect */ 606 req->rq_state |= RQ_NET_DONE;
607 _req_may_be_done_not_susp(req, m);
608 break;
609
603 case connection_lost_while_pending: 610 case connection_lost_while_pending:
604 /* transfer log cleanup after connection loss */ 611 /* transfer log cleanup after connection loss */
605 /* assert something? */ 612 /* assert something? */
@@ -616,8 +623,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
616 _req_may_be_done(req, m); /* Allowed while state.susp */ 623 _req_may_be_done(req, m); /* Allowed while state.susp */
617 break; 624 break;
618 625
619 case write_acked_by_peer_and_sis:
620 req->rq_state |= RQ_NET_SIS;
621 case conflict_discarded_by_peer: 626 case conflict_discarded_by_peer:
622 /* for discarded conflicting writes of multiple primaries, 627 /* for discarded conflicting writes of multiple primaries,
623 * there is no need to keep anything in the tl, potential 628 * there is no need to keep anything in the tl, potential
@@ -628,18 +633,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
628 (unsigned long long)req->sector, req->size); 633 (unsigned long long)req->sector, req->size);
629 req->rq_state |= RQ_NET_DONE; 634 req->rq_state |= RQ_NET_DONE;
630 /* fall through */ 635 /* fall through */
636 case write_acked_by_peer_and_sis:
631 case write_acked_by_peer: 637 case write_acked_by_peer:
638 if (what == write_acked_by_peer_and_sis)
639 req->rq_state |= RQ_NET_SIS;
632 /* protocol C; successfully written on peer. 640 /* protocol C; successfully written on peer.
633 * Nothing to do here. 641 * Nothing more to do here.
634 * We want to keep the tl in place for all protocols, to cater 642 * We want to keep the tl in place for all protocols, to cater
635 * for volatile write-back caches on lower level devices. 643 * for volatile write-back caches on lower level devices. */
636 *
637 * A barrier request is expected to have forced all prior
638 * requests onto stable storage, so completion of a barrier
639 * request could set NET_DONE right here, and not wait for the
640 * P_BARRIER_ACK, but that is an unnecessary optimization. */
641 644
642 /* this makes it effectively the same as for: */
643 case recv_acked_by_peer: 645 case recv_acked_by_peer:
644 /* protocol B; pretends to be successfully written on peer. 646 /* protocol B; pretends to be successfully written on peer.
645 * see also notes above in handed_over_to_network about 647 * see also notes above in handed_over_to_network about
@@ -773,6 +775,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
773 int local, remote, send_oos = 0; 775 int local, remote, send_oos = 0;
774 int err = -EIO; 776 int err = -EIO;
775 int ret = 0; 777 int ret = 0;
778 union drbd_state s;
776 779
777 /* allocate outside of all locks; */ 780 /* allocate outside of all locks; */
778 req = drbd_req_new(mdev, bio); 781 req = drbd_req_new(mdev, bio);
@@ -834,8 +837,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
834 drbd_al_begin_io(mdev, sector); 837 drbd_al_begin_io(mdev, sector);
835 } 838 }
836 839
837 remote = remote && drbd_should_do_remote(mdev->state); 840 s = mdev->state;
838 send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); 841 remote = remote && drbd_should_do_remote(s);
842 send_oos = rw == WRITE && drbd_should_send_oos(s);
839 D_ASSERT(!(remote && send_oos)); 843 D_ASSERT(!(remote && send_oos));
840 844
841 if (!(local || remote) && !is_susp(mdev->state)) { 845 if (!(local || remote) && !is_susp(mdev->state)) {
@@ -867,7 +871,7 @@ allocate_barrier:
867 871
868 if (is_susp(mdev->state)) { 872 if (is_susp(mdev->state)) {
869 /* If we got suspended, use the retry mechanism of 873 /* If we got suspended, use the retry mechanism of
870 generic_make_request() to restart processing of this 874 drbd_make_request() to restart processing of this
871 bio. In the next call to drbd_make_request 875 bio. In the next call to drbd_make_request
872 we sleep in inc_ap_bio() */ 876 we sleep in inc_ap_bio() */
873 ret = 1; 877 ret = 1;
@@ -1091,7 +1095,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
1091 */ 1095 */
1092 D_ASSERT(bio->bi_size > 0); 1096 D_ASSERT(bio->bi_size > 0);
1093 D_ASSERT((bio->bi_size & 0x1ff) == 0); 1097 D_ASSERT((bio->bi_size & 0x1ff) == 0);
1094 D_ASSERT(bio->bi_idx == 0);
1095 1098
1096 /* to make some things easier, force alignment of requests within the 1099 /* to make some things easier, force alignment of requests within the
1097 * granularity of our hash tables */ 1100 * granularity of our hash tables */
@@ -1099,8 +1102,9 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
1099 e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; 1102 e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
1100 1103
1101 if (likely(s_enr == e_enr)) { 1104 if (likely(s_enr == e_enr)) {
1102 inc_ap_bio(mdev, 1); 1105 do {
1103 drbd_make_request_common(mdev, bio, start_time); 1106 inc_ap_bio(mdev, 1);
1107 } while (drbd_make_request_common(mdev, bio, start_time));
1104 return; 1108 return;
1105 } 1109 }
1106 1110
@@ -1196,36 +1200,66 @@ void request_timer_fn(unsigned long data)
1196 struct drbd_conf *mdev = (struct drbd_conf *) data; 1200 struct drbd_conf *mdev = (struct drbd_conf *) data;
1197 struct drbd_request *req; /* oldest request */ 1201 struct drbd_request *req; /* oldest request */
1198 struct list_head *le; 1202 struct list_head *le;
1199 unsigned long et = 0; /* effective timeout = ko_count * timeout */ 1203 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
1204 unsigned long now;
1200 1205
1201 if (get_net_conf(mdev)) { 1206 if (get_net_conf(mdev)) {
1202 et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; 1207 if (mdev->state.conn >= C_WF_REPORT_PARAMS)
1208 ent = mdev->net_conf->timeout*HZ/10
1209 * mdev->net_conf->ko_count;
1203 put_net_conf(mdev); 1210 put_net_conf(mdev);
1204 } 1211 }
1205 if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) 1212 if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
1213 dt = mdev->ldev->dc.disk_timeout * HZ / 10;
1214 put_ldev(mdev);
1215 }
1216 et = min_not_zero(dt, ent);
1217
1218 if (!et)
1206 return; /* Recurring timer stopped */ 1219 return; /* Recurring timer stopped */
1207 1220
1221 now = jiffies;
1222
1208 spin_lock_irq(&mdev->req_lock); 1223 spin_lock_irq(&mdev->req_lock);
1209 le = &mdev->oldest_tle->requests; 1224 le = &mdev->oldest_tle->requests;
1210 if (list_empty(le)) { 1225 if (list_empty(le)) {
1211 spin_unlock_irq(&mdev->req_lock); 1226 spin_unlock_irq(&mdev->req_lock);
1212 mod_timer(&mdev->request_timer, jiffies + et); 1227 mod_timer(&mdev->request_timer, now + et);
1213 return; 1228 return;
1214 } 1229 }
1215 1230
1216 le = le->prev; 1231 le = le->prev;
1217 req = list_entry(le, struct drbd_request, tl_requests); 1232 req = list_entry(le, struct drbd_request, tl_requests);
1218 if (time_is_before_eq_jiffies(req->start_time + et)) {
1219 if (req->rq_state & RQ_NET_PENDING) {
1220 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1221 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
1222 } else {
1223 dev_warn(DEV, "Local backing block device frozen?\n");
1224 mod_timer(&mdev->request_timer, jiffies + et);
1225 }
1226 } else {
1227 mod_timer(&mdev->request_timer, req->start_time + et);
1228 }
1229 1233
1234 /* The request is considered timed out, if
1235 * - we have some effective timeout from the configuration,
1236 * with above state restrictions applied,
1237 * - the oldest request is waiting for a response from the network
1238 * resp. the local disk,
1239 * - the oldest request is in fact older than the effective timeout,
1240 * - the connection was established (resp. disk was attached)
1241 * for longer than the timeout already.
1242 * Note that for 32bit jiffies and very stable connections/disks,
1243 * we may have a wrap around, which is catched by
1244 * !time_in_range(now, last_..._jif, last_..._jif + timeout).
1245 *
1246 * Side effect: once per 32bit wrap-around interval, which means every
1247 * ~198 days with 250 HZ, we have a window where the timeout would need
1248 * to expire twice (worst case) to become effective. Good enough.
1249 */
1250 if (ent && req->rq_state & RQ_NET_PENDING &&
1251 time_after(now, req->start_time + ent) &&
1252 !time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
1253 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1254 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
1255 }
1256 if (dt && req->rq_state & RQ_LOCAL_PENDING &&
1257 time_after(now, req->start_time + dt) &&
1258 !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
1259 dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
1260 __drbd_chk_io_error(mdev, 1);
1261 }
1262 nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
1230 spin_unlock_irq(&mdev->req_lock); 1263 spin_unlock_irq(&mdev->req_lock);
1264 mod_timer(&mdev->request_timer, nt);
1231} 1265}