aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2014-05-07 16:41:28 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2014-07-10 12:35:21 -0400
commitf5b90b6bf0cf29a85ceaa8ce334b17814cd5d39b (patch)
treed10cc5f07027af4640fe9fe151f6e20c8edd3f84
parentcc356f85bb57a3587e4b1d944e669fe39b37cdf2 (diff)
drbd: resync should only lock out specific ranges
During resync, if we need to block some specific incoming write because of active resync requests to that same range, we potentially caused *all* new application writes (to "cold" activity log extents) to block until this one request has been processed. Improve the do_submit() logic to * grab all incoming requests to some "incoming" list * process this list - move aside requests that are blocked by resync - prepare activity log transactions, - commit transactions and submit corresponding requests - if there are remaining requests that only wait for activity log extents to become free, stop the fast path (mark activity log as "starving") - iterate until no more requests are waiting for the activity log, but all potentially remaining requests are only blocked by resync * only then grab new incoming requests That way, very busy IO on currently "hot" activity log extents cannot starve scattered IO to "cold" extents. And blocked-by-resync requests are processed once resync traffic on the affected region has ceased, without blocking anything else. The only blocking mode left is when we cannot start requests to "cold" extents because all currently "hot" extents are actually used. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_actlog.c15
-rw-r--r--drivers/block/drbd/drbd_req.c136
2 files changed, 98 insertions, 53 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6ce5c76d642b..e9fbcafaccdc 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -357,8 +357,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
357 /* We want all necessary updates for a given request within the same transaction 357 /* We want all necessary updates for a given request within the same transaction
358 * We could first check how many updates are *actually* needed, 358 * We could first check how many updates are *actually* needed,
359 * and use that instead of the worst-case nr_al_extents */ 359 * and use that instead of the worst-case nr_al_extents */
360 if (available_update_slots < nr_al_extents) 360 if (available_update_slots < nr_al_extents) {
361 return -EWOULDBLOCK; 361 /* Too many activity log extents are currently "hot".
362 *
363 * If we have accumulated pending changes already,
364 * we made progress.
365 *
366 * If we cannot get even a single pending change through,
367 * stop the fast path until we made some progress,
368 * or requests to "cold" extents could be starved. */
369 if (!al->pending_changes)
370 __set_bit(__LC_STARVING, &device->act_log->flags);
371 return -ENOBUFS;
372 }
362 373
363 /* Is resync active in this area? */ 374 /* Is resync active in this area? */
364 for (enr = first; enr <= last; enr++) { 375 for (enr = first; enr <= last; enr++) {
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 74ebef101dc7..c67717d572d1 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1182,6 +1182,8 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re
1182 &device->pending_master_completion[1 /* WRITE */]); 1182 &device->pending_master_completion[1 /* WRITE */]);
1183 spin_unlock_irq(&device->resource->req_lock); 1183 spin_unlock_irq(&device->resource->req_lock);
1184 queue_work(device->submit.wq, &device->submit.worker); 1184 queue_work(device->submit.wq, &device->submit.worker);
1185 /* do_submit() may sleep internally on al_wait, too */
1186 wake_up(&device->al_wait);
1185} 1187}
1186 1188
1187/* returns the new drbd_request pointer, if the caller is expected to 1189/* returns the new drbd_request pointer, if the caller is expected to
@@ -1365,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
1365 1367
1366static bool prepare_al_transaction_nonblock(struct drbd_device *device, 1368static bool prepare_al_transaction_nonblock(struct drbd_device *device,
1367 struct list_head *incoming, 1369 struct list_head *incoming,
1368 struct list_head *pending) 1370 struct list_head *pending,
1371 struct list_head *later)
1369{ 1372{
1370 struct drbd_request *req, *tmp; 1373 struct drbd_request *req, *tmp;
1371 int wake = 0; 1374 int wake = 0;
@@ -1374,44 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
1374 spin_lock_irq(&device->al_lock); 1377 spin_lock_irq(&device->al_lock);
1375 list_for_each_entry_safe(req, tmp, incoming, tl_requests) { 1378 list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
1376 err = drbd_al_begin_io_nonblock(device, &req->i); 1379 err = drbd_al_begin_io_nonblock(device, &req->i);
1380 if (err == -ENOBUFS)
1381 break;
1377 if (err == -EBUSY) 1382 if (err == -EBUSY)
1378 wake = 1; 1383 wake = 1;
1379 if (err) 1384 if (err)
1380 continue; 1385 list_move_tail(&req->tl_requests, later);
1381 list_move_tail(&req->tl_requests, pending); 1386 else
1387 list_move_tail(&req->tl_requests, pending);
1382 } 1388 }
1383 spin_unlock_irq(&device->al_lock); 1389 spin_unlock_irq(&device->al_lock);
1384 if (wake) 1390 if (wake)
1385 wake_up(&device->al_wait); 1391 wake_up(&device->al_wait);
1386
1387 return !list_empty(pending); 1392 return !list_empty(pending);
1388} 1393}
1389 1394
1395void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
1396{
1397 struct drbd_request *req, *tmp;
1398
1399 list_for_each_entry_safe(req, tmp, pending, tl_requests) {
1400 req->rq_state |= RQ_IN_ACT_LOG;
1401 req->in_actlog_jif = jiffies;
1402 atomic_dec(&device->ap_actlog_cnt);
1403 list_del_init(&req->tl_requests);
1404 drbd_send_and_submit(device, req);
1405 }
1406}
1407
1390void do_submit(struct work_struct *ws) 1408void do_submit(struct work_struct *ws)
1391{ 1409{
1392 struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); 1410 struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
1393 LIST_HEAD(incoming); 1411 LIST_HEAD(incoming); /* from drbd_make_request() */
1394 LIST_HEAD(pending); 1412 LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */
1395 struct drbd_request *req, *tmp; 1413 LIST_HEAD(busy); /* blocked by resync requests */
1414
1415 /* grab new incoming requests */
1416 spin_lock_irq(&device->resource->req_lock);
1417 list_splice_tail_init(&device->submit.writes, &incoming);
1418 spin_unlock_irq(&device->resource->req_lock);
1396 1419
1397 for (;;) { 1420 for (;;) {
1398 spin_lock_irq(&device->resource->req_lock); 1421 DEFINE_WAIT(wait);
1399 list_splice_tail_init(&device->submit.writes, &incoming);
1400 spin_unlock_irq(&device->resource->req_lock);
1401 1422
1423 /* move used-to-be-busy back to front of incoming */
1424 list_splice_init(&busy, &incoming);
1402 submit_fast_path(device, &incoming); 1425 submit_fast_path(device, &incoming);
1403 if (list_empty(&incoming)) 1426 if (list_empty(&incoming))
1404 break; 1427 break;
1405 1428
1406skip_fast_path:
1407 wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
1408 /* Maybe more was queued, while we prepared the transaction?
1409 * Try to stuff them into this transaction as well.
1410 * Be strictly non-blocking here, no wait_event, we already
1411 * have something to commit.
1412 * Stop if we don't make any more progres.
1413 */
1414 for (;;) { 1429 for (;;) {
1430 prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
1431
1432 list_splice_init(&busy, &incoming);
1433 prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
1434 if (!list_empty(&pending))
1435 break;
1436
1437 schedule();
1438
1439 /* If all currently "hot" activity log extents are kept busy by
1440 * incoming requests, we still must not totally starve new
1441 * requests to "cold" extents.
1442 * Something left on &incoming means there had not been
1443 * enough update slots available, and the activity log
1444 * has been marked as "starving".
1445 *
1446 * Try again now, without looking for new requests,
1447 * effectively blocking all new requests until we made
1448 * at least _some_ progress with what we currently have.
1449 */
1450 if (!list_empty(&incoming))
1451 continue;
1452
1453 /* Nothing moved to pending, but nothing left
1454 * on incoming: all moved to busy!
1455 * Grab new and iterate. */
1456 spin_lock_irq(&device->resource->req_lock);
1457 list_splice_tail_init(&device->submit.writes, &incoming);
1458 spin_unlock_irq(&device->resource->req_lock);
1459 }
1460 finish_wait(&device->al_wait, &wait);
1461
1462 /* If the transaction was full, before all incoming requests
1463 * had been processed, skip ahead to commit, and iterate
1464 * without splicing in more incoming requests from upper layers.
1465 *
1466 * Else, if all incoming have been processed,
1467 * they have become either "pending" (to be submitted after
1468 * next transaction commit) or "busy" (blocked by resync).
1469 *
1470 * Maybe more was queued, while we prepared the transaction?
1471 * Try to stuff those into this transaction as well.
1472 * Be strictly non-blocking here,
1473 * we already have something to commit.
1474 *
1475 * Commit if we don't make any more progres.
1476 */
1477
1478 while (list_empty(&incoming)) {
1415 LIST_HEAD(more_pending); 1479 LIST_HEAD(more_pending);
1416 LIST_HEAD(more_incoming); 1480 LIST_HEAD(more_incoming);
1417 bool made_progress; 1481 bool made_progress;
@@ -1428,46 +1492,16 @@ skip_fast_path:
1428 if (list_empty(&more_incoming)) 1492 if (list_empty(&more_incoming))
1429 break; 1493 break;
1430 1494
1431 made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending); 1495 made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
1432 1496
1433 list_splice_tail_init(&more_pending, &pending); 1497 list_splice_tail_init(&more_pending, &pending);
1434 list_splice_tail_init(&more_incoming, &incoming); 1498 list_splice_tail_init(&more_incoming, &incoming);
1435
1436 if (!made_progress) 1499 if (!made_progress)
1437 break; 1500 break;
1438 } 1501 }
1439 drbd_al_begin_io_commit(device);
1440 1502
1441 list_for_each_entry_safe(req, tmp, &pending, tl_requests) { 1503 drbd_al_begin_io_commit(device);
1442 req->rq_state |= RQ_IN_ACT_LOG; 1504 send_and_submit_pending(device, &pending);
1443 req->in_actlog_jif = jiffies;
1444 atomic_dec(&device->ap_actlog_cnt);
1445 list_del_init(&req->tl_requests);
1446 drbd_send_and_submit(device, req);
1447 }
1448
1449 /* If all currently hot activity log extents are kept busy by
1450 * incoming requests, we still must not totally starve new
1451 * requests to cold extents. In that case, prepare one request
1452 * in blocking mode. */
1453 list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
1454 bool was_cold;
1455 list_del_init(&req->tl_requests);
1456 was_cold = drbd_al_begin_io_prepare(device, &req->i);
1457 if (!was_cold) {
1458 req->rq_state |= RQ_IN_ACT_LOG;
1459 req->in_actlog_jif = jiffies;
1460 atomic_dec(&device->ap_actlog_cnt);
1461 /* Corresponding extent was hot after all? */
1462 drbd_send_and_submit(device, req);
1463 } else {
1464 /* Found a request to a cold extent.
1465 * Put on "pending" list,
1466 * and try to cumulate with more. */
1467 list_add(&req->tl_requests, &pending);
1468 goto skip_fast_path;
1469 }
1470 }
1471 } 1505 }
1472} 1506}
1473 1507