diff options
| -rw-r--r-- | Documentation/block/null_blk.txt | 72 | ||||
| -rw-r--r-- | block/blk-mq-sysfs.c | 13 | ||||
| -rw-r--r-- | drivers/block/null_blk.c | 102 | ||||
| -rw-r--r-- | drivers/block/skd_main.c | 4 | ||||
| -rw-r--r-- | drivers/md/bcache/alloc.c | 2 | ||||
| -rw-r--r-- | drivers/md/bcache/bcache.h | 12 | ||||
| -rw-r--r-- | drivers/md/bcache/btree.c | 27 | ||||
| -rw-r--r-- | drivers/md/bcache/movinggc.c | 21 | ||||
| -rw-r--r-- | drivers/md/bcache/super.c | 2 | ||||
| -rw-r--r-- | drivers/md/bcache/sysfs.c | 50 | ||||
| -rw-r--r-- | drivers/md/bcache/util.c | 8 | ||||
| -rw-r--r-- | drivers/md/bcache/util.h | 2 | ||||
| -rw-r--r-- | drivers/md/bcache/writeback.c | 53 |
13 files changed, 274 insertions, 94 deletions
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt new file mode 100644 index 000000000000..b2830b435895 --- /dev/null +++ b/Documentation/block/null_blk.txt | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | Null block device driver | ||
| 2 | ================================================================================ | ||
| 3 | |||
| 4 | I. Overview | ||
| 5 | |||
| 6 | The null block device (/dev/nullb*) is used for benchmarking the various | ||
| 7 | block-layer implementations. It emulates a block device of X gigabytes in size. | ||
| 8 | The following instances are possible: | ||
| 9 | |||
| 10 | Single-queue block-layer | ||
| 11 | - Request-based. | ||
| 12 | - Single submission queue per device. | ||
| 13 | - Implements IO scheduling algorithms (CFQ, Deadline, noop). | ||
| 14 | Multi-queue block-layer | ||
| 15 | - Request-based. | ||
| 16 | - Configurable submission queues per device. | ||
| 17 | No block-layer (Known as bio-based) | ||
| 18 | - Bio-based. IO requests are submitted directly to the device driver. | ||
| 19 | - Directly accepts bio data structure and returns them. | ||
| 20 | |||
| 21 | All of them have a completion queue for each core in the system. | ||
| 22 | |||
| 23 | II. Module parameters applicable for all instances: | ||
| 24 | |||
| 25 | queue_mode=[0-2]: Default: 2-Multi-queue | ||
| 26 | Selects which block-layer the module should instantiate with. | ||
| 27 | |||
| 28 | 0: Bio-based. | ||
| 29 | 1: Single-queue. | ||
| 30 | 2: Multi-queue. | ||
| 31 | |||
| 32 | home_node=[0--nr_nodes]: Default: NUMA_NO_NODE | ||
| 33 | Selects what CPU node the data structures are allocated from. | ||
| 34 | |||
| 35 | gb=[Size in GB]: Default: 250GB | ||
| 36 | The size of the device reported to the system. | ||
| 37 | |||
| 38 | bs=[Block size (in bytes)]: Default: 512 bytes | ||
| 39 | The block size reported to the system. | ||
| 40 | |||
| 41 | nr_devices=[Number of devices]: Default: 2 | ||
| 42 | Number of block devices instantiated. They are instantiated as /dev/nullb0, | ||
| 43 | etc. | ||
| 44 | |||
| 45 | irq_mode=[0-2]: Default: 1-Soft-irq | ||
| 46 | The completion mode used for completing IOs to the block-layer. | ||
| 47 | |||
| 48 | 0: None. | ||
| 49 | 1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead | ||
| 50 | when IOs are issued from another CPU node than the home the device is | ||
| 51 | connected to. | ||
| 52 | 2: Timer: Waits a specific period (completion_nsec) for each IO before | ||
| 53 | completion. | ||
| 54 | |||
| 55 | completion_nsec=[ns]: Default: 10.000ns | ||
| 56 | Combined with irq_mode=2 (timer). The time each completion event must wait. | ||
| 57 | |||
| 58 | submit_queues=[0..nr_cpus]: | ||
| 59 | The number of submission queues attached to the device driver. If unset, it | ||
| 60 | defaults to 1 on single-queue and bio-based instances. For multi-queue, | ||
| 61 | it is ignored when use_per_node_hctx module parameter is 1. | ||
| 62 | |||
| 63 | hw_queue_depth=[0..qdepth]: Default: 64 | ||
| 64 | The hardware queue depth of the device. | ||
| 65 | |||
| 66 | III: Multi-queue specific parameters | ||
| 67 | |||
| 68 | use_per_node_hctx=[0/1]: Default: 0 | ||
| 69 | 0: The number of submit queues are set to the value of the submit_queues | ||
| 70 | parameter. | ||
| 71 | 1: The multi-queue block layer is instantiated with a hardware dispatch | ||
| 72 | queue for each CPU node in the system. | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ba6cf8e9aa0a..b91ce75bd35d 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
| @@ -335,9 +335,22 @@ static struct kobj_type blk_mq_hw_ktype = { | |||
| 335 | void blk_mq_unregister_disk(struct gendisk *disk) | 335 | void blk_mq_unregister_disk(struct gendisk *disk) |
| 336 | { | 336 | { |
| 337 | struct request_queue *q = disk->queue; | 337 | struct request_queue *q = disk->queue; |
| 338 | struct blk_mq_hw_ctx *hctx; | ||
| 339 | struct blk_mq_ctx *ctx; | ||
| 340 | int i, j; | ||
| 341 | |||
| 342 | queue_for_each_hw_ctx(q, hctx, i) { | ||
| 343 | hctx_for_each_ctx(hctx, ctx, j) { | ||
| 344 | kobject_del(&ctx->kobj); | ||
| 345 | kobject_put(&ctx->kobj); | ||
| 346 | } | ||
| 347 | kobject_del(&hctx->kobj); | ||
| 348 | kobject_put(&hctx->kobj); | ||
| 349 | } | ||
| 338 | 350 | ||
| 339 | kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); | 351 | kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); |
| 340 | kobject_del(&q->mq_kobj); | 352 | kobject_del(&q->mq_kobj); |
| 353 | kobject_put(&q->mq_kobj); | ||
| 341 | 354 | ||
| 342 | kobject_put(&disk_to_dev(disk)->kobj); | 355 | kobject_put(&disk_to_dev(disk)->kobj); |
| 343 | } | 356 | } |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f370fc13aea5..a2e69d26266d 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
| 2 | |||
| 2 | #include <linux/moduleparam.h> | 3 | #include <linux/moduleparam.h> |
| 3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
| 4 | #include <linux/fs.h> | 5 | #include <linux/fs.h> |
| @@ -65,7 +66,7 @@ enum { | |||
| 65 | NULL_Q_MQ = 2, | 66 | NULL_Q_MQ = 2, |
| 66 | }; | 67 | }; |
| 67 | 68 | ||
| 68 | static int submit_queues = 1; | 69 | static int submit_queues; |
| 69 | module_param(submit_queues, int, S_IRUGO); | 70 | module_param(submit_queues, int, S_IRUGO); |
| 70 | MODULE_PARM_DESC(submit_queues, "Number of submission queues"); | 71 | MODULE_PARM_DESC(submit_queues, "Number of submission queues"); |
| 71 | 72 | ||
| @@ -101,9 +102,9 @@ static int hw_queue_depth = 64; | |||
| 101 | module_param(hw_queue_depth, int, S_IRUGO); | 102 | module_param(hw_queue_depth, int, S_IRUGO); |
| 102 | MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); | 103 | MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); |
| 103 | 104 | ||
| 104 | static bool use_per_node_hctx = true; | 105 | static bool use_per_node_hctx = false; |
| 105 | module_param(use_per_node_hctx, bool, S_IRUGO); | 106 | module_param(use_per_node_hctx, bool, S_IRUGO); |
| 106 | MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true"); | 107 | MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); |
| 107 | 108 | ||
| 108 | static void put_tag(struct nullb_queue *nq, unsigned int tag) | 109 | static void put_tag(struct nullb_queue *nq, unsigned int tag) |
| 109 | { | 110 | { |
| @@ -346,8 +347,37 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) | |||
| 346 | 347 | ||
| 347 | static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) | 348 | static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) |
| 348 | { | 349 | { |
| 349 | return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, | 350 | int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes); |
| 350 | hctx_index); | 351 | int tip = (reg->nr_hw_queues % nr_online_nodes); |
| 352 | int node = 0, i, n; | ||
| 353 | |||
| 354 | /* | ||
| 355 | * Split submit queues evenly wrt to the number of nodes. If uneven, | ||
| 356 | * fill the first buckets with one extra, until the rest is filled with | ||
| 357 | * no extra. | ||
| 358 | */ | ||
| 359 | for (i = 0, n = 1; i < hctx_index; i++, n++) { | ||
| 360 | if (n % b_size == 0) { | ||
| 361 | n = 0; | ||
| 362 | node++; | ||
| 363 | |||
| 364 | tip--; | ||
| 365 | if (!tip) | ||
| 366 | b_size = reg->nr_hw_queues / nr_online_nodes; | ||
| 367 | } | ||
| 368 | } | ||
| 369 | |||
| 370 | /* | ||
| 371 | * A node might not be online, therefore map the relative node id to the | ||
| 372 | * real node id. | ||
| 373 | */ | ||
| 374 | for_each_online_node(n) { | ||
| 375 | if (!node) | ||
| 376 | break; | ||
| 377 | node--; | ||
| 378 | } | ||
| 379 | |||
| 380 | return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n); | ||
| 351 | } | 381 | } |
| 352 | 382 | ||
| 353 | static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) | 383 | static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) |
| @@ -355,16 +385,24 @@ static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) | |||
| 355 | kfree(hctx); | 385 | kfree(hctx); |
| 356 | } | 386 | } |
| 357 | 387 | ||
| 388 | static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) | ||
| 389 | { | ||
| 390 | BUG_ON(!nullb); | ||
| 391 | BUG_ON(!nq); | ||
| 392 | |||
| 393 | init_waitqueue_head(&nq->wait); | ||
| 394 | nq->queue_depth = nullb->queue_depth; | ||
| 395 | } | ||
| 396 | |||
| 358 | static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | 397 | static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
| 359 | unsigned int index) | 398 | unsigned int index) |
| 360 | { | 399 | { |
| 361 | struct nullb *nullb = data; | 400 | struct nullb *nullb = data; |
| 362 | struct nullb_queue *nq = &nullb->queues[index]; | 401 | struct nullb_queue *nq = &nullb->queues[index]; |
| 363 | 402 | ||
| 364 | init_waitqueue_head(&nq->wait); | ||
| 365 | nq->queue_depth = nullb->queue_depth; | ||
| 366 | nullb->nr_queues++; | ||
| 367 | hctx->driver_data = nq; | 403 | hctx->driver_data = nq; |
| 404 | null_init_queue(nullb, nq); | ||
| 405 | nullb->nr_queues++; | ||
| 368 | 406 | ||
| 369 | return 0; | 407 | return 0; |
| 370 | } | 408 | } |
| @@ -417,13 +455,13 @@ static int setup_commands(struct nullb_queue *nq) | |||
| 417 | 455 | ||
| 418 | nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); | 456 | nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); |
| 419 | if (!nq->cmds) | 457 | if (!nq->cmds) |
| 420 | return 1; | 458 | return -ENOMEM; |
| 421 | 459 | ||
| 422 | tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; | 460 | tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; |
| 423 | nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); | 461 | nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); |
| 424 | if (!nq->tag_map) { | 462 | if (!nq->tag_map) { |
| 425 | kfree(nq->cmds); | 463 | kfree(nq->cmds); |
| 426 | return 1; | 464 | return -ENOMEM; |
| 427 | } | 465 | } |
| 428 | 466 | ||
| 429 | for (i = 0; i < nq->queue_depth; i++) { | 467 | for (i = 0; i < nq->queue_depth; i++) { |
| @@ -454,33 +492,37 @@ static void cleanup_queues(struct nullb *nullb) | |||
| 454 | 492 | ||
| 455 | static int setup_queues(struct nullb *nullb) | 493 | static int setup_queues(struct nullb *nullb) |
| 456 | { | 494 | { |
| 457 | struct nullb_queue *nq; | 495 | nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), |
| 458 | int i; | 496 | GFP_KERNEL); |
| 459 | |||
| 460 | nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL); | ||
| 461 | if (!nullb->queues) | 497 | if (!nullb->queues) |
| 462 | return 1; | 498 | return -ENOMEM; |
| 463 | 499 | ||
| 464 | nullb->nr_queues = 0; | 500 | nullb->nr_queues = 0; |
| 465 | nullb->queue_depth = hw_queue_depth; | 501 | nullb->queue_depth = hw_queue_depth; |
| 466 | 502 | ||
| 467 | if (queue_mode == NULL_Q_MQ) | 503 | return 0; |
| 468 | return 0; | 504 | } |
| 505 | |||
| 506 | static int init_driver_queues(struct nullb *nullb) | ||
| 507 | { | ||
| 508 | struct nullb_queue *nq; | ||
| 509 | int i, ret = 0; | ||
| 469 | 510 | ||
| 470 | for (i = 0; i < submit_queues; i++) { | 511 | for (i = 0; i < submit_queues; i++) { |
| 471 | nq = &nullb->queues[i]; | 512 | nq = &nullb->queues[i]; |
| 472 | init_waitqueue_head(&nq->wait); | 513 | |
| 473 | nq->queue_depth = hw_queue_depth; | 514 | null_init_queue(nullb, nq); |
| 474 | if (setup_commands(nq)) | 515 | |
| 475 | break; | 516 | ret = setup_commands(nq); |
| 517 | if (ret) | ||
| 518 | goto err_queue; | ||
| 476 | nullb->nr_queues++; | 519 | nullb->nr_queues++; |
| 477 | } | 520 | } |
| 478 | 521 | ||
| 479 | if (i == submit_queues) | 522 | return 0; |
| 480 | return 0; | 523 | err_queue: |
| 481 | |||
| 482 | cleanup_queues(nullb); | 524 | cleanup_queues(nullb); |
| 483 | return 1; | 525 | return ret; |
| 484 | } | 526 | } |
| 485 | 527 | ||
| 486 | static int null_add_dev(void) | 528 | static int null_add_dev(void) |
| @@ -518,11 +560,13 @@ static int null_add_dev(void) | |||
| 518 | } else if (queue_mode == NULL_Q_BIO) { | 560 | } else if (queue_mode == NULL_Q_BIO) { |
| 519 | nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); | 561 | nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); |
| 520 | blk_queue_make_request(nullb->q, null_queue_bio); | 562 | blk_queue_make_request(nullb->q, null_queue_bio); |
| 563 | init_driver_queues(nullb); | ||
| 521 | } else { | 564 | } else { |
| 522 | nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); | 565 | nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); |
| 523 | blk_queue_prep_rq(nullb->q, null_rq_prep_fn); | 566 | blk_queue_prep_rq(nullb->q, null_rq_prep_fn); |
| 524 | if (nullb->q) | 567 | if (nullb->q) |
| 525 | blk_queue_softirq_done(nullb->q, null_softirq_done_fn); | 568 | blk_queue_softirq_done(nullb->q, null_softirq_done_fn); |
| 569 | init_driver_queues(nullb); | ||
| 526 | } | 570 | } |
| 527 | 571 | ||
| 528 | if (!nullb->q) | 572 | if (!nullb->q) |
| @@ -579,7 +623,13 @@ static int __init null_init(void) | |||
| 579 | } | 623 | } |
| 580 | #endif | 624 | #endif |
| 581 | 625 | ||
| 582 | if (submit_queues > nr_cpu_ids) | 626 | if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { |
| 627 | if (submit_queues < nr_online_nodes) { | ||
| 628 | pr_warn("null_blk: submit_queues param is set to %u.", | ||
| 629 | nr_online_nodes); | ||
| 630 | submit_queues = nr_online_nodes; | ||
| 631 | } | ||
| 632 | } else if (submit_queues > nr_cpu_ids) | ||
| 583 | submit_queues = nr_cpu_ids; | 633 | submit_queues = nr_cpu_ids; |
| 584 | else if (!submit_queues) | 634 | else if (!submit_queues) |
| 585 | submit_queues = 1; | 635 | submit_queues = 1; |
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 9199c93be926..eb6e1e0e8db2 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c | |||
| @@ -5269,7 +5269,7 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state) | |||
| 5269 | } | 5269 | } |
| 5270 | } | 5270 | } |
| 5271 | 5271 | ||
| 5272 | const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state) | 5272 | static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state) |
| 5273 | { | 5273 | { |
| 5274 | switch (state) { | 5274 | switch (state) { |
| 5275 | case SKD_MSG_STATE_IDLE: | 5275 | case SKD_MSG_STATE_IDLE: |
| @@ -5281,7 +5281,7 @@ const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state) | |||
| 5281 | } | 5281 | } |
| 5282 | } | 5282 | } |
| 5283 | 5283 | ||
| 5284 | const char *skd_skreq_state_to_str(enum skd_req_state state) | 5284 | static const char *skd_skreq_state_to_str(enum skd_req_state state) |
| 5285 | { | 5285 | { |
| 5286 | switch (state) { | 5286 | switch (state) { |
| 5287 | case SKD_REQ_STATE_IDLE: | 5287 | case SKD_REQ_STATE_IDLE: |
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 2b46bf1d7e40..4c9852d92b0a 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
| @@ -421,9 +421,11 @@ out: | |||
| 421 | 421 | ||
| 422 | if (watermark <= WATERMARK_METADATA) { | 422 | if (watermark <= WATERMARK_METADATA) { |
| 423 | SET_GC_MARK(b, GC_MARK_METADATA); | 423 | SET_GC_MARK(b, GC_MARK_METADATA); |
| 424 | SET_GC_MOVE(b, 0); | ||
| 424 | b->prio = BTREE_PRIO; | 425 | b->prio = BTREE_PRIO; |
| 425 | } else { | 426 | } else { |
| 426 | SET_GC_MARK(b, GC_MARK_RECLAIMABLE); | 427 | SET_GC_MARK(b, GC_MARK_RECLAIMABLE); |
| 428 | SET_GC_MOVE(b, 0); | ||
| 427 | b->prio = INITIAL_PRIO; | 429 | b->prio = INITIAL_PRIO; |
| 428 | } | 430 | } |
| 429 | 431 | ||
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 4beb55a0ff30..754f43177483 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
| @@ -197,7 +197,7 @@ struct bucket { | |||
| 197 | uint8_t disk_gen; | 197 | uint8_t disk_gen; |
| 198 | uint8_t last_gc; /* Most out of date gen in the btree */ | 198 | uint8_t last_gc; /* Most out of date gen in the btree */ |
| 199 | uint8_t gc_gen; | 199 | uint8_t gc_gen; |
| 200 | uint16_t gc_mark; | 200 | uint16_t gc_mark; /* Bitfield used by GC. See below for field */ |
| 201 | }; | 201 | }; |
| 202 | 202 | ||
| 203 | /* | 203 | /* |
| @@ -209,7 +209,8 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); | |||
| 209 | #define GC_MARK_RECLAIMABLE 0 | 209 | #define GC_MARK_RECLAIMABLE 0 |
| 210 | #define GC_MARK_DIRTY 1 | 210 | #define GC_MARK_DIRTY 1 |
| 211 | #define GC_MARK_METADATA 2 | 211 | #define GC_MARK_METADATA 2 |
| 212 | BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); | 212 | BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13); |
| 213 | BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1); | ||
| 213 | 214 | ||
| 214 | #include "journal.h" | 215 | #include "journal.h" |
| 215 | #include "stats.h" | 216 | #include "stats.h" |
| @@ -372,14 +373,14 @@ struct cached_dev { | |||
| 372 | unsigned char writeback_percent; | 373 | unsigned char writeback_percent; |
| 373 | unsigned writeback_delay; | 374 | unsigned writeback_delay; |
| 374 | 375 | ||
| 375 | int writeback_rate_change; | ||
| 376 | int64_t writeback_rate_derivative; | ||
| 377 | uint64_t writeback_rate_target; | 376 | uint64_t writeback_rate_target; |
| 377 | int64_t writeback_rate_proportional; | ||
| 378 | int64_t writeback_rate_derivative; | ||
| 379 | int64_t writeback_rate_change; | ||
| 378 | 380 | ||
| 379 | unsigned writeback_rate_update_seconds; | 381 | unsigned writeback_rate_update_seconds; |
| 380 | unsigned writeback_rate_d_term; | 382 | unsigned writeback_rate_d_term; |
| 381 | unsigned writeback_rate_p_term_inverse; | 383 | unsigned writeback_rate_p_term_inverse; |
| 382 | unsigned writeback_rate_d_smooth; | ||
| 383 | }; | 384 | }; |
| 384 | 385 | ||
| 385 | enum alloc_watermarks { | 386 | enum alloc_watermarks { |
| @@ -445,7 +446,6 @@ struct cache { | |||
| 445 | * call prio_write() to keep gens from wrapping. | 446 | * call prio_write() to keep gens from wrapping. |
| 446 | */ | 447 | */ |
| 447 | uint8_t need_save_prio; | 448 | uint8_t need_save_prio; |
| 448 | unsigned gc_move_threshold; | ||
| 449 | 449 | ||
| 450 | /* | 450 | /* |
| 451 | * If nonzero, we know we aren't going to find any buckets to invalidate | 451 | * If nonzero, we know we aren't going to find any buckets to invalidate |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 5e2765aadce1..31bb53fcc67a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
| @@ -1561,6 +1561,28 @@ size_t bch_btree_gc_finish(struct cache_set *c) | |||
| 1561 | SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i), | 1561 | SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i), |
| 1562 | GC_MARK_METADATA); | 1562 | GC_MARK_METADATA); |
| 1563 | 1563 | ||
| 1564 | /* don't reclaim buckets to which writeback keys point */ | ||
| 1565 | rcu_read_lock(); | ||
| 1566 | for (i = 0; i < c->nr_uuids; i++) { | ||
| 1567 | struct bcache_device *d = c->devices[i]; | ||
| 1568 | struct cached_dev *dc; | ||
| 1569 | struct keybuf_key *w, *n; | ||
| 1570 | unsigned j; | ||
| 1571 | |||
| 1572 | if (!d || UUID_FLASH_ONLY(&c->uuids[i])) | ||
| 1573 | continue; | ||
| 1574 | dc = container_of(d, struct cached_dev, disk); | ||
| 1575 | |||
| 1576 | spin_lock(&dc->writeback_keys.lock); | ||
| 1577 | rbtree_postorder_for_each_entry_safe(w, n, | ||
| 1578 | &dc->writeback_keys.keys, node) | ||
| 1579 | for (j = 0; j < KEY_PTRS(&w->key); j++) | ||
| 1580 | SET_GC_MARK(PTR_BUCKET(c, &w->key, j), | ||
| 1581 | GC_MARK_DIRTY); | ||
| 1582 | spin_unlock(&dc->writeback_keys.lock); | ||
| 1583 | } | ||
| 1584 | rcu_read_unlock(); | ||
| 1585 | |||
| 1564 | for_each_cache(ca, c, i) { | 1586 | for_each_cache(ca, c, i) { |
| 1565 | uint64_t *i; | 1587 | uint64_t *i; |
| 1566 | 1588 | ||
| @@ -1817,7 +1839,8 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, | |||
| 1817 | if (KEY_START(k) > KEY_START(insert) + sectors_found) | 1839 | if (KEY_START(k) > KEY_START(insert) + sectors_found) |
| 1818 | goto check_failed; | 1840 | goto check_failed; |
| 1819 | 1841 | ||
| 1820 | if (KEY_PTRS(replace_key) != KEY_PTRS(k)) | 1842 | if (KEY_PTRS(k) != KEY_PTRS(replace_key) || |
| 1843 | KEY_DIRTY(k) != KEY_DIRTY(replace_key)) | ||
| 1821 | goto check_failed; | 1844 | goto check_failed; |
| 1822 | 1845 | ||
| 1823 | /* skip past gen */ | 1846 | /* skip past gen */ |
| @@ -2217,7 +2240,7 @@ struct btree_insert_op { | |||
| 2217 | struct bkey *replace_key; | 2240 | struct bkey *replace_key; |
| 2218 | }; | 2241 | }; |
| 2219 | 2242 | ||
| 2220 | int btree_insert_fn(struct btree_op *b_op, struct btree *b) | 2243 | static int btree_insert_fn(struct btree_op *b_op, struct btree *b) |
| 2221 | { | 2244 | { |
| 2222 | struct btree_insert_op *op = container_of(b_op, | 2245 | struct btree_insert_op *op = container_of(b_op, |
| 2223 | struct btree_insert_op, op); | 2246 | struct btree_insert_op, op); |
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 7c1275e66025..f2f0998c4a91 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c | |||
| @@ -25,10 +25,9 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) | |||
| 25 | unsigned i; | 25 | unsigned i; |
| 26 | 26 | ||
| 27 | for (i = 0; i < KEY_PTRS(k); i++) { | 27 | for (i = 0; i < KEY_PTRS(k); i++) { |
| 28 | struct cache *ca = PTR_CACHE(c, k, i); | ||
| 29 | struct bucket *g = PTR_BUCKET(c, k, i); | 28 | struct bucket *g = PTR_BUCKET(c, k, i); |
| 30 | 29 | ||
| 31 | if (GC_SECTORS_USED(g) < ca->gc_move_threshold) | 30 | if (GC_MOVE(g)) |
| 32 | return true; | 31 | return true; |
| 33 | } | 32 | } |
| 34 | 33 | ||
| @@ -65,11 +64,16 @@ static void write_moving_finish(struct closure *cl) | |||
| 65 | 64 | ||
| 66 | static void read_moving_endio(struct bio *bio, int error) | 65 | static void read_moving_endio(struct bio *bio, int error) |
| 67 | { | 66 | { |
| 67 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
| 68 | struct moving_io *io = container_of(bio->bi_private, | 68 | struct moving_io *io = container_of(bio->bi_private, |
| 69 | struct moving_io, cl); | 69 | struct moving_io, cl); |
| 70 | 70 | ||
| 71 | if (error) | 71 | if (error) |
| 72 | io->op.error = error; | 72 | io->op.error = error; |
| 73 | else if (!KEY_DIRTY(&b->key) && | ||
| 74 | ptr_stale(io->op.c, &b->key, 0)) { | ||
| 75 | io->op.error = -EINTR; | ||
| 76 | } | ||
| 73 | 77 | ||
| 74 | bch_bbio_endio(io->op.c, bio, error, "reading data to move"); | 78 | bch_bbio_endio(io->op.c, bio, error, "reading data to move"); |
| 75 | } | 79 | } |
| @@ -141,6 +145,11 @@ static void read_moving(struct cache_set *c) | |||
| 141 | if (!w) | 145 | if (!w) |
| 142 | break; | 146 | break; |
| 143 | 147 | ||
| 148 | if (ptr_stale(c, &w->key, 0)) { | ||
| 149 | bch_keybuf_del(&c->moving_gc_keys, w); | ||
| 150 | continue; | ||
| 151 | } | ||
| 152 | |||
| 144 | io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec) | 153 | io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec) |
| 145 | * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), | 154 | * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), |
| 146 | GFP_KERNEL); | 155 | GFP_KERNEL); |
| @@ -184,7 +193,8 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r) | |||
| 184 | 193 | ||
| 185 | static unsigned bucket_heap_top(struct cache *ca) | 194 | static unsigned bucket_heap_top(struct cache *ca) |
| 186 | { | 195 | { |
| 187 | return GC_SECTORS_USED(heap_peek(&ca->heap)); | 196 | struct bucket *b; |
| 197 | return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; | ||
| 188 | } | 198 | } |
| 189 | 199 | ||
| 190 | void bch_moving_gc(struct cache_set *c) | 200 | void bch_moving_gc(struct cache_set *c) |
| @@ -226,9 +236,8 @@ void bch_moving_gc(struct cache_set *c) | |||
| 226 | sectors_to_move -= GC_SECTORS_USED(b); | 236 | sectors_to_move -= GC_SECTORS_USED(b); |
| 227 | } | 237 | } |
| 228 | 238 | ||
| 229 | ca->gc_move_threshold = bucket_heap_top(ca); | 239 | while (heap_pop(&ca->heap, b, bucket_cmp)) |
| 230 | 240 | SET_GC_MOVE(b, 1); | |
| 231 | pr_debug("threshold %u", ca->gc_move_threshold); | ||
| 232 | } | 241 | } |
| 233 | 242 | ||
| 234 | mutex_unlock(&c->bucket_lock); | 243 | mutex_unlock(&c->bucket_lock); |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dec15cd2d797..c57bfa071a57 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
| @@ -1676,7 +1676,7 @@ err: | |||
| 1676 | static bool can_attach_cache(struct cache *ca, struct cache_set *c) | 1676 | static bool can_attach_cache(struct cache *ca, struct cache_set *c) |
| 1677 | { | 1677 | { |
| 1678 | return ca->sb.block_size == c->sb.block_size && | 1678 | return ca->sb.block_size == c->sb.block_size && |
| 1679 | ca->sb.bucket_size == c->sb.block_size && | 1679 | ca->sb.bucket_size == c->sb.bucket_size && |
| 1680 | ca->sb.nr_in_set == c->sb.nr_in_set; | 1680 | ca->sb.nr_in_set == c->sb.nr_in_set; |
| 1681 | } | 1681 | } |
| 1682 | 1682 | ||
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 80d4c2bee18a..a1f85612f0b3 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
| @@ -83,7 +83,6 @@ rw_attribute(writeback_rate); | |||
| 83 | rw_attribute(writeback_rate_update_seconds); | 83 | rw_attribute(writeback_rate_update_seconds); |
| 84 | rw_attribute(writeback_rate_d_term); | 84 | rw_attribute(writeback_rate_d_term); |
| 85 | rw_attribute(writeback_rate_p_term_inverse); | 85 | rw_attribute(writeback_rate_p_term_inverse); |
| 86 | rw_attribute(writeback_rate_d_smooth); | ||
| 87 | read_attribute(writeback_rate_debug); | 86 | read_attribute(writeback_rate_debug); |
| 88 | 87 | ||
| 89 | read_attribute(stripe_size); | 88 | read_attribute(stripe_size); |
| @@ -129,31 +128,41 @@ SHOW(__bch_cached_dev) | |||
| 129 | var_printf(writeback_running, "%i"); | 128 | var_printf(writeback_running, "%i"); |
| 130 | var_print(writeback_delay); | 129 | var_print(writeback_delay); |
| 131 | var_print(writeback_percent); | 130 | var_print(writeback_percent); |
| 132 | sysfs_print(writeback_rate, dc->writeback_rate.rate); | 131 | sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); |
| 133 | 132 | ||
| 134 | var_print(writeback_rate_update_seconds); | 133 | var_print(writeback_rate_update_seconds); |
| 135 | var_print(writeback_rate_d_term); | 134 | var_print(writeback_rate_d_term); |
| 136 | var_print(writeback_rate_p_term_inverse); | 135 | var_print(writeback_rate_p_term_inverse); |
| 137 | var_print(writeback_rate_d_smooth); | ||
| 138 | 136 | ||
| 139 | if (attr == &sysfs_writeback_rate_debug) { | 137 | if (attr == &sysfs_writeback_rate_debug) { |
| 138 | char rate[20]; | ||
| 140 | char dirty[20]; | 139 | char dirty[20]; |
| 141 | char derivative[20]; | ||
| 142 | char target[20]; | 140 | char target[20]; |
| 143 | bch_hprint(dirty, | 141 | char proportional[20]; |
| 144 | bcache_dev_sectors_dirty(&dc->disk) << 9); | 142 | char derivative[20]; |
| 145 | bch_hprint(derivative, dc->writeback_rate_derivative << 9); | 143 | char change[20]; |
| 144 | s64 next_io; | ||
| 145 | |||
| 146 | bch_hprint(rate, dc->writeback_rate.rate << 9); | ||
| 147 | bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); | ||
| 146 | bch_hprint(target, dc->writeback_rate_target << 9); | 148 | bch_hprint(target, dc->writeback_rate_target << 9); |
| 149 | bch_hprint(proportional,dc->writeback_rate_proportional << 9); | ||
| 150 | bch_hprint(derivative, dc->writeback_rate_derivative << 9); | ||
| 151 | bch_hprint(change, dc->writeback_rate_change << 9); | ||
| 152 | |||
| 153 | next_io = div64_s64(dc->writeback_rate.next - local_clock(), | ||
| 154 | NSEC_PER_MSEC); | ||
| 147 | 155 | ||
| 148 | return sprintf(buf, | 156 | return sprintf(buf, |
| 149 | "rate:\t\t%u\n" | 157 | "rate:\t\t%s/sec\n" |
| 150 | "change:\t\t%i\n" | ||
| 151 | "dirty:\t\t%s\n" | 158 | "dirty:\t\t%s\n" |
| 159 | "target:\t\t%s\n" | ||
| 160 | "proportional:\t%s\n" | ||
| 152 | "derivative:\t%s\n" | 161 | "derivative:\t%s\n" |
| 153 | "target:\t\t%s\n", | 162 | "change:\t\t%s/sec\n" |
| 154 | dc->writeback_rate.rate, | 163 | "next io:\t%llims\n", |
| 155 | dc->writeback_rate_change, | 164 | rate, dirty, target, proportional, |
| 156 | dirty, derivative, target); | 165 | derivative, change, next_io); |
| 157 | } | 166 | } |
| 158 | 167 | ||
| 159 | sysfs_hprint(dirty_data, | 168 | sysfs_hprint(dirty_data, |
| @@ -189,6 +198,7 @@ STORE(__cached_dev) | |||
| 189 | struct kobj_uevent_env *env; | 198 | struct kobj_uevent_env *env; |
| 190 | 199 | ||
| 191 | #define d_strtoul(var) sysfs_strtoul(var, dc->var) | 200 | #define d_strtoul(var) sysfs_strtoul(var, dc->var) |
| 201 | #define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX) | ||
| 192 | #define d_strtoi_h(var) sysfs_hatoi(var, dc->var) | 202 | #define d_strtoi_h(var) sysfs_hatoi(var, dc->var) |
| 193 | 203 | ||
| 194 | sysfs_strtoul(data_csum, dc->disk.data_csum); | 204 | sysfs_strtoul(data_csum, dc->disk.data_csum); |
| @@ -197,16 +207,15 @@ STORE(__cached_dev) | |||
| 197 | d_strtoul(writeback_metadata); | 207 | d_strtoul(writeback_metadata); |
| 198 | d_strtoul(writeback_running); | 208 | d_strtoul(writeback_running); |
| 199 | d_strtoul(writeback_delay); | 209 | d_strtoul(writeback_delay); |
| 200 | sysfs_strtoul_clamp(writeback_rate, | 210 | |
| 201 | dc->writeback_rate.rate, 1, 1000000); | ||
| 202 | sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); | 211 | sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); |
| 203 | 212 | ||
| 204 | d_strtoul(writeback_rate_update_seconds); | 213 | sysfs_strtoul_clamp(writeback_rate, |
| 214 | dc->writeback_rate.rate, 1, INT_MAX); | ||
| 215 | |||
| 216 | d_strtoul_nonzero(writeback_rate_update_seconds); | ||
| 205 | d_strtoul(writeback_rate_d_term); | 217 | d_strtoul(writeback_rate_d_term); |
| 206 | d_strtoul(writeback_rate_p_term_inverse); | 218 | d_strtoul_nonzero(writeback_rate_p_term_inverse); |
| 207 | sysfs_strtoul_clamp(writeback_rate_p_term_inverse, | ||
| 208 | dc->writeback_rate_p_term_inverse, 1, INT_MAX); | ||
| 209 | d_strtoul(writeback_rate_d_smooth); | ||
| 210 | 219 | ||
| 211 | d_strtoi_h(sequential_cutoff); | 220 | d_strtoi_h(sequential_cutoff); |
| 212 | d_strtoi_h(readahead); | 221 | d_strtoi_h(readahead); |
| @@ -313,7 +322,6 @@ static struct attribute *bch_cached_dev_files[] = { | |||
| 313 | &sysfs_writeback_rate_update_seconds, | 322 | &sysfs_writeback_rate_update_seconds, |
| 314 | &sysfs_writeback_rate_d_term, | 323 | &sysfs_writeback_rate_d_term, |
| 315 | &sysfs_writeback_rate_p_term_inverse, | 324 | &sysfs_writeback_rate_p_term_inverse, |
| 316 | &sysfs_writeback_rate_d_smooth, | ||
| 317 | &sysfs_writeback_rate_debug, | 325 | &sysfs_writeback_rate_debug, |
| 318 | &sysfs_dirty_data, | 326 | &sysfs_dirty_data, |
| 319 | &sysfs_stripe_size, | 327 | &sysfs_stripe_size, |
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 462214eeacbe..bb37618e7664 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c | |||
| @@ -209,7 +209,13 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) | |||
| 209 | { | 209 | { |
| 210 | uint64_t now = local_clock(); | 210 | uint64_t now = local_clock(); |
| 211 | 211 | ||
| 212 | d->next += div_u64(done, d->rate); | 212 | d->next += div_u64(done * NSEC_PER_SEC, d->rate); |
| 213 | |||
| 214 | if (time_before64(now + NSEC_PER_SEC, d->next)) | ||
| 215 | d->next = now + NSEC_PER_SEC; | ||
| 216 | |||
| 217 | if (time_after64(now - NSEC_PER_SEC * 2, d->next)) | ||
| 218 | d->next = now - NSEC_PER_SEC * 2; | ||
| 213 | 219 | ||
| 214 | return time_after64(d->next, now) | 220 | return time_after64(d->next, now) |
| 215 | ? div_u64(d->next - now, NSEC_PER_SEC / HZ) | 221 | ? div_u64(d->next - now, NSEC_PER_SEC / HZ) |
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 362c4b3f8b4a..1030c6020e98 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h | |||
| @@ -110,7 +110,7 @@ do { \ | |||
| 110 | _r; \ | 110 | _r; \ |
| 111 | }) | 111 | }) |
| 112 | 112 | ||
| 113 | #define heap_peek(h) ((h)->size ? (h)->data[0] : NULL) | 113 | #define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) |
| 114 | 114 | ||
| 115 | #define heap_full(h) ((h)->used == (h)->size) | 115 | #define heap_full(h) ((h)->used == (h)->size) |
| 116 | 116 | ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 99053b1251be..6c44fe059c27 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
| @@ -30,38 +30,40 @@ static void __update_writeback_rate(struct cached_dev *dc) | |||
| 30 | 30 | ||
| 31 | /* PD controller */ | 31 | /* PD controller */ |
| 32 | 32 | ||
| 33 | int change = 0; | ||
| 34 | int64_t error; | ||
| 35 | int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); | 33 | int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); |
| 36 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; | 34 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; |
| 35 | int64_t proportional = dirty - target; | ||
| 36 | int64_t change; | ||
| 37 | 37 | ||
| 38 | dc->disk.sectors_dirty_last = dirty; | 38 | dc->disk.sectors_dirty_last = dirty; |
| 39 | 39 | ||
| 40 | derivative *= dc->writeback_rate_d_term; | 40 | /* Scale to sectors per second */ |
| 41 | derivative = clamp(derivative, -dirty, dirty); | ||
| 42 | 41 | ||
| 43 | derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, | 42 | proportional *= dc->writeback_rate_update_seconds; |
| 44 | dc->writeback_rate_d_smooth, 0); | 43 | proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse); |
| 45 | 44 | ||
| 46 | /* Avoid divide by zero */ | 45 | derivative = div_s64(derivative, dc->writeback_rate_update_seconds); |
| 47 | if (!target) | ||
| 48 | goto out; | ||
| 49 | 46 | ||
| 50 | error = div64_s64((dirty + derivative - target) << 8, target); | 47 | derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, |
| 48 | (dc->writeback_rate_d_term / | ||
| 49 | dc->writeback_rate_update_seconds) ?: 1, 0); | ||
| 50 | |||
| 51 | derivative *= dc->writeback_rate_d_term; | ||
| 52 | derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse); | ||
| 51 | 53 | ||
| 52 | change = div_s64((dc->writeback_rate.rate * error) >> 8, | 54 | change = proportional + derivative; |
| 53 | dc->writeback_rate_p_term_inverse); | ||
| 54 | 55 | ||
| 55 | /* Don't increase writeback rate if the device isn't keeping up */ | 56 | /* Don't increase writeback rate if the device isn't keeping up */ |
| 56 | if (change > 0 && | 57 | if (change > 0 && |
| 57 | time_after64(local_clock(), | 58 | time_after64(local_clock(), |
| 58 | dc->writeback_rate.next + 10 * NSEC_PER_MSEC)) | 59 | dc->writeback_rate.next + NSEC_PER_MSEC)) |
| 59 | change = 0; | 60 | change = 0; |
| 60 | 61 | ||
| 61 | dc->writeback_rate.rate = | 62 | dc->writeback_rate.rate = |
| 62 | clamp_t(int64_t, dc->writeback_rate.rate + change, | 63 | clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change, |
| 63 | 1, NSEC_PER_MSEC); | 64 | 1, NSEC_PER_MSEC); |
| 64 | out: | 65 | |
| 66 | dc->writeback_rate_proportional = proportional; | ||
| 65 | dc->writeback_rate_derivative = derivative; | 67 | dc->writeback_rate_derivative = derivative; |
| 66 | dc->writeback_rate_change = change; | 68 | dc->writeback_rate_change = change; |
| 67 | dc->writeback_rate_target = target; | 69 | dc->writeback_rate_target = target; |
| @@ -87,15 +89,11 @@ static void update_writeback_rate(struct work_struct *work) | |||
| 87 | 89 | ||
| 88 | static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) | 90 | static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) |
| 89 | { | 91 | { |
| 90 | uint64_t ret; | ||
| 91 | |||
| 92 | if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || | 92 | if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || |
| 93 | !dc->writeback_percent) | 93 | !dc->writeback_percent) |
| 94 | return 0; | 94 | return 0; |
| 95 | 95 | ||
| 96 | ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); | 96 | return bch_next_delay(&dc->writeback_rate, sectors); |
| 97 | |||
| 98 | return min_t(uint64_t, ret, HZ); | ||
| 99 | } | 97 | } |
| 100 | 98 | ||
| 101 | struct dirty_io { | 99 | struct dirty_io { |
| @@ -241,7 +239,7 @@ static void read_dirty(struct cached_dev *dc) | |||
| 241 | if (KEY_START(&w->key) != dc->last_read || | 239 | if (KEY_START(&w->key) != dc->last_read || |
| 242 | jiffies_to_msecs(delay) > 50) | 240 | jiffies_to_msecs(delay) > 50) |
| 243 | while (!kthread_should_stop() && delay) | 241 | while (!kthread_should_stop() && delay) |
| 244 | delay = schedule_timeout_interruptible(delay); | 242 | delay = schedule_timeout_uninterruptible(delay); |
| 245 | 243 | ||
| 246 | dc->last_read = KEY_OFFSET(&w->key); | 244 | dc->last_read = KEY_OFFSET(&w->key); |
| 247 | 245 | ||
| @@ -438,7 +436,7 @@ static int bch_writeback_thread(void *arg) | |||
| 438 | while (delay && | 436 | while (delay && |
| 439 | !kthread_should_stop() && | 437 | !kthread_should_stop() && |
| 440 | !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) | 438 | !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) |
| 441 | delay = schedule_timeout_interruptible(delay); | 439 | delay = schedule_timeout_uninterruptible(delay); |
| 442 | } | 440 | } |
| 443 | } | 441 | } |
| 444 | 442 | ||
| @@ -476,6 +474,8 @@ void bch_sectors_dirty_init(struct cached_dev *dc) | |||
| 476 | 474 | ||
| 477 | bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), | 475 | bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), |
| 478 | sectors_dirty_init_fn, 0); | 476 | sectors_dirty_init_fn, 0); |
| 477 | |||
| 478 | dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); | ||
| 479 | } | 479 | } |
| 480 | 480 | ||
| 481 | int bch_cached_dev_writeback_init(struct cached_dev *dc) | 481 | int bch_cached_dev_writeback_init(struct cached_dev *dc) |
| @@ -490,18 +490,15 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc) | |||
| 490 | dc->writeback_delay = 30; | 490 | dc->writeback_delay = 30; |
| 491 | dc->writeback_rate.rate = 1024; | 491 | dc->writeback_rate.rate = 1024; |
| 492 | 492 | ||
| 493 | dc->writeback_rate_update_seconds = 30; | 493 | dc->writeback_rate_update_seconds = 5; |
| 494 | dc->writeback_rate_d_term = 16; | 494 | dc->writeback_rate_d_term = 30; |
| 495 | dc->writeback_rate_p_term_inverse = 64; | 495 | dc->writeback_rate_p_term_inverse = 6000; |
| 496 | dc->writeback_rate_d_smooth = 8; | ||
| 497 | 496 | ||
| 498 | dc->writeback_thread = kthread_create(bch_writeback_thread, dc, | 497 | dc->writeback_thread = kthread_create(bch_writeback_thread, dc, |
| 499 | "bcache_writeback"); | 498 | "bcache_writeback"); |
| 500 | if (IS_ERR(dc->writeback_thread)) | 499 | if (IS_ERR(dc->writeback_thread)) |
| 501 | return PTR_ERR(dc->writeback_thread); | 500 | return PTR_ERR(dc->writeback_thread); |
| 502 | 501 | ||
| 503 | set_task_state(dc->writeback_thread, TASK_INTERRUPTIBLE); | ||
| 504 | |||
| 505 | INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); | 502 | INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); |
| 506 | schedule_delayed_work(&dc->writeback_rate_update, | 503 | schedule_delayed_work(&dc->writeback_rate_update, |
| 507 | dc->writeback_rate_update_seconds * HZ); | 504 | dc->writeback_rate_update_seconds * HZ); |
