aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2013-11-22 07:00:12 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2014-07-10 12:35:12 -0400
commit7753a4c17f9e305ed19d8851e1a3154c8c9abaaf (patch)
tree482711e3597d25d11d661514d60b8b3e4d352ced /drivers/block
parent844a6ae7358df3261daec25e0d3a510f3d4152f2 (diff)
drbd: add caching oldest request pointers for replication stages
A request that is to be shipped to the peer goes through a few stages: - queued - sent, waiting for ack - ack received, waiting for "barrier ack", which is re-order epoch being closed on the peer by acknowledging a "cache flush" equivalent on the lower level device. In the later two stages, depending on protocol, we may have already completed this request to the upper layers, so it won't be found anymore on device->pending_master_completion[] lists. Track the oldest request yet to be sent (req_next), the oldest not yet acknowledged (req_ack_pending) and the oldest "still waiting for something from the peer" (req_not_net_done), doing short list walks on the transfer log to find the next pending one whenever such a request makes progress. Now we have a fast way to look up the oldest requests, don't do a transfer log walk every time. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_int.h7
-rw-r--r--drivers/block/drbd/drbd_req.c169
2 files changed, 136 insertions, 40 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f29f107be9b8..fa010ea3a4bf 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -720,6 +720,13 @@ struct drbd_connection {
720 struct drbd_thread worker; 720 struct drbd_thread worker;
721 struct drbd_thread asender; 721 struct drbd_thread asender;
722 722
723 /* cached pointers,
724 * so we can look up the oldest pending requests more quickly.
725 * protected by resource->req_lock */
726 struct drbd_request *req_next; /* DRBD 9: todo.req_next */
727 struct drbd_request *req_ack_pending;
728 struct drbd_request *req_not_net_done;
729
723 /* sender side */ 730 /* sender side */
724 struct drbd_work_queue sender_work; 731 struct drbd_work_queue sender_work;
725 732
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 23cd909dc7f1..3f6a6ed2fd03 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -345,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_
345 return 1; 345 return 1;
346} 346}
347 347
348static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
349{
350 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
351 if (!connection)
352 return;
353 if (connection->req_next == NULL)
354 connection->req_next = req;
355}
356
357static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
358{
359 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
360 if (!connection)
361 return;
362 if (connection->req_next != req)
363 return;
364 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
365 const unsigned s = req->rq_state;
366 if (s & RQ_NET_QUEUED)
367 break;
368 }
369 if (&req->tl_requests == &connection->transfer_log)
370 req = NULL;
371 connection->req_next = req;
372}
373
374static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
375{
376 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
377 if (!connection)
378 return;
379 if (connection->req_ack_pending == NULL)
380 connection->req_ack_pending = req;
381}
382
383static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
384{
385 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
386 if (!connection)
387 return;
388 if (connection->req_ack_pending != req)
389 return;
390 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
391 const unsigned s = req->rq_state;
392 if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
393 break;
394 }
395 if (&req->tl_requests == &connection->transfer_log)
396 req = NULL;
397 connection->req_ack_pending = req;
398}
399
400static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
401{
402 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
403 if (!connection)
404 return;
405 if (connection->req_not_net_done == NULL)
406 connection->req_not_net_done = req;
407}
408
409static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
410{
411 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
412 if (!connection)
413 return;
414 if (connection->req_not_net_done != req)
415 return;
416 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
417 const unsigned s = req->rq_state;
418 if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
419 break;
420 }
421 if (&req->tl_requests == &connection->transfer_log)
422 req = NULL;
423 connection->req_not_net_done = req;
424}
425
348/* I'd like this to be the only place that manipulates 426/* I'd like this to be the only place that manipulates
349 * req->completion_ref and req->kref. */ 427 * req->completion_ref and req->kref. */
350static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, 428static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
351 int clear, int set) 429 int clear, int set)
352{ 430{
353 struct drbd_device *device = req->device; 431 struct drbd_device *device = req->device;
432 struct drbd_peer_device *peer_device = first_peer_device(device);
354 unsigned s = req->rq_state; 433 unsigned s = req->rq_state;
355 int c_put = 0; 434 int c_put = 0;
356 int k_put = 0; 435 int k_put = 0;
@@ -379,6 +458,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
379 458
380 if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) { 459 if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
381 atomic_inc(&req->completion_ref); 460 atomic_inc(&req->completion_ref);
461 set_if_null_req_next(peer_device, req);
382 } 462 }
383 463
384 if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK)) 464 if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
@@ -386,8 +466,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
386 466
387 if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { 467 if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
388 /* potentially already completed in the asender thread */ 468 /* potentially already completed in the asender thread */
389 if (!(s & RQ_NET_DONE)) 469 if (!(s & RQ_NET_DONE)) {
390 atomic_add(req->i.size >> 9, &device->ap_in_flight); 470 atomic_add(req->i.size >> 9, &device->ap_in_flight);
471 set_if_null_req_not_net_done(peer_device, req);
472 }
473 if (s & RQ_NET_PENDING)
474 set_if_null_req_ack_pending(peer_device, req);
391 } 475 }
392 476
393 if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) 477 if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
@@ -418,10 +502,13 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
418 dec_ap_pending(device); 502 dec_ap_pending(device);
419 ++c_put; 503 ++c_put;
420 req->acked_jif = jiffies; 504 req->acked_jif = jiffies;
505 advance_conn_req_ack_pending(peer_device, req);
421 } 506 }
422 507
423 if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) 508 if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
424 ++c_put; 509 ++c_put;
510 advance_conn_req_next(peer_device, req);
511 }
425 512
426 if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { 513 if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
427 if (s & RQ_NET_SENT) 514 if (s & RQ_NET_SENT)
@@ -429,6 +516,13 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
429 if (s & RQ_EXP_BARR_ACK) 516 if (s & RQ_EXP_BARR_ACK)
430 ++k_put; 517 ++k_put;
431 req->net_done_jif = jiffies; 518 req->net_done_jif = jiffies;
519
520 /* in ahead/behind mode, or just in case,
521 * before we finally destroy this request,
522 * the caching pointers must not reference it anymore */
523 advance_conn_req_next(peer_device, req);
524 advance_conn_req_ack_pending(peer_device, req);
525 advance_conn_req_not_net_done(peer_device, req);
432 } 526 }
433 527
434 /* potentially complete and destroy */ 528 /* potentially complete and destroy */
@@ -1423,36 +1517,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
1423 return limit; 1517 return limit;
1424} 1518}
1425 1519
1426static void find_oldest_requests(
1427 struct drbd_connection *connection,
1428 struct drbd_device *device,
1429 struct drbd_request **oldest_req_waiting_for_peer,
1430 struct drbd_request **oldest_req_waiting_for_disk)
1431{
1432 struct drbd_request *r;
1433 *oldest_req_waiting_for_peer = NULL;
1434 *oldest_req_waiting_for_disk = NULL;
1435 list_for_each_entry(r, &connection->transfer_log, tl_requests) {
1436 const unsigned s = r->rq_state;
1437 if (!*oldest_req_waiting_for_peer
1438 && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
1439 *oldest_req_waiting_for_peer = r;
1440
1441 if (!*oldest_req_waiting_for_disk
1442 && (s & RQ_LOCAL_PENDING) && r->device == device)
1443 *oldest_req_waiting_for_disk = r;
1444
1445 if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
1446 break;
1447 }
1448}
1449
1450void request_timer_fn(unsigned long data) 1520void request_timer_fn(unsigned long data)
1451{ 1521{
1452 struct drbd_device *device = (struct drbd_device *) data; 1522 struct drbd_device *device = (struct drbd_device *) data;
1453 struct drbd_connection *connection = first_peer_device(device)->connection; 1523 struct drbd_connection *connection = first_peer_device(device)->connection;
1454 struct drbd_request *req_disk, *req_peer; /* oldest request */ 1524 struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
1455 struct net_conf *nc; 1525 struct net_conf *nc;
1526 unsigned long oldest_submit_jif;
1456 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ 1527 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
1457 unsigned long now; 1528 unsigned long now;
1458 1529
@@ -1473,14 +1544,31 @@ void request_timer_fn(unsigned long data)
1473 return; /* Recurring timer stopped */ 1544 return; /* Recurring timer stopped */
1474 1545
1475 now = jiffies; 1546 now = jiffies;
1547 nt = now + et;
1476 1548
1477 spin_lock_irq(&device->resource->req_lock); 1549 spin_lock_irq(&device->resource->req_lock);
1478 find_oldest_requests(connection, device, &req_peer, &req_disk); 1550 req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
1479 if (req_peer == NULL && req_disk == NULL) { 1551 req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
1480 spin_unlock_irq(&device->resource->req_lock); 1552 req_peer = connection->req_not_net_done;
1481 mod_timer(&device->request_timer, now + et); 1553 /* maybe the oldest request waiting for the peer is in fact still
1482 return; 1554 * blocking in tcp sendmsg */
1483 } 1555 if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
1556 req_peer = connection->req_next;
1557
1558 /* evaluate the oldest peer request only in one timer! */
1559 if (req_peer && req_peer->device != device)
1560 req_peer = NULL;
1561
1562 /* do we have something to evaluate? */
1563 if (req_peer == NULL && req_write == NULL && req_read == NULL)
1564 goto out;
1565
1566 oldest_submit_jif =
1567 (req_write && req_read)
1568 ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
1569 ? req_write->pre_submit_jif : req_read->pre_submit_jif )
1570 : req_write ? req_write->pre_submit_jif
1571 : req_read ? req_read->pre_submit_jif : now;
1484 1572
1485 /* The request is considered timed out, if 1573 /* The request is considered timed out, if
1486 * - we have some effective timeout from the configuration, 1574 * - we have some effective timeout from the configuration,
@@ -1499,13 +1587,13 @@ void request_timer_fn(unsigned long data)
1499 * to expire twice (worst case) to become effective. Good enough. 1587 * to expire twice (worst case) to become effective. Good enough.
1500 */ 1588 */
1501 if (ent && req_peer && 1589 if (ent && req_peer &&
1502 time_after(now, req_peer->start_jif + ent) && 1590 time_after(now, req_peer->pre_send_jif + ent) &&
1503 !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { 1591 !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
1504 drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); 1592 drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
1505 _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); 1593 _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
1506 } 1594 }
1507 if (dt && req_disk && 1595 if (dt && oldest_submit_jif != now &&
1508 time_after(now, req_disk->start_jif + dt) && 1596 time_after(now, oldest_submit_jif + dt) &&
1509 !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { 1597 !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
1510 drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); 1598 drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
1511 __drbd_chk_io_error(device, DRBD_FORCE_DETACH); 1599 __drbd_chk_io_error(device, DRBD_FORCE_DETACH);
@@ -1513,11 +1601,12 @@ void request_timer_fn(unsigned long data)
1513 1601
1514 /* Reschedule timer for the nearest not already expired timeout. 1602 /* Reschedule timer for the nearest not already expired timeout.
1515 * Fallback to now + min(effective network timeout, disk timeout). */ 1603 * Fallback to now + min(effective network timeout, disk timeout). */
1516 ent = (ent && req_peer && time_before(now, req_peer->start_jif + ent)) 1604 ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
1517 ? req_peer->start_jif + ent : now + et; 1605 ? req_peer->pre_send_jif + ent : now + et;
1518 dt = (dt && req_disk && time_before(now, req_disk->start_jif + dt)) 1606 dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
1519 ? req_disk->start_jif + dt : now + et; 1607 ? oldest_submit_jif + dt : now + et;
1520 nt = time_before(ent, dt) ? ent : dt; 1608 nt = time_before(ent, dt) ? ent : dt;
1609out:
1521 spin_unlock_irq(&connection->resource->req_lock); 1610 spin_unlock_irq(&connection->resource->req_lock);
1522 mod_timer(&device->request_timer, nt); 1611 mod_timer(&device->request_timer, nt);
1523} 1612}