summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2018-12-06 11:41:19 -0500
committerJens Axboe <axboe@kernel.dk>2018-12-10 10:30:37 -0500
commit5b18b5a737600fd20ba2045f320d5926ebbf341a (patch)
treefe35e150bf36785dfe3ed8c845e3b043e56f9f90 /block
parent112f158f66cbe25fd561a5dfe9c3826e06abf757 (diff)
block: delete part_round_stats and switch to less precise counting
We want to convert to per-cpu in_flight counters. The function part_round_stats needs the in_flight counter every jiffy, it would be too costly to sum all the percpu variables every jiffy, so it must be deleted. part_round_stats is used to calculate two counters - time_in_queue and io_ticks. time_in_queue can be calculated without part_round_stats, by adding the duration of the I/O when the I/O ends (the value is almost as exact as the previously calculated value, except that time for in-progress I/Os is not counted). io_ticks can be approximated by increasing the value when I/O is started or ended and the jiffies value has changed. If the I/Os take less than a jiffy, the value is as exact as the previously calculated value. If the I/Os take more than a jiffy, io_ticks can drift behind the previously calculated value. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/bio.c24
-rw-r--r--block/blk-core.c62
-rw-r--r--block/blk-merge.c1
-rw-r--r--block/genhd.c3
-rw-r--r--block/partition-generic.c3
5 files changed, 25 insertions, 68 deletions
diff --git a/block/bio.c b/block/bio.c
index 0aca870331c3..036e3f0cc736 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1664,6 +1664,22 @@ defer:
1664} 1664}
1665EXPORT_SYMBOL_GPL(bio_check_pages_dirty); 1665EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
1666 1666
1667void update_io_ticks(struct hd_struct *part, unsigned long now)
1668{
1669 unsigned long stamp;
1670again:
1671 stamp = READ_ONCE(part->stamp);
1672 if (unlikely(stamp != now)) {
1673 if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
1674 __part_stat_add(part, io_ticks, 1);
1675 }
1676 }
1677 if (part->partno) {
1678 part = &part_to_disk(part)->part0;
1679 goto again;
1680 }
1681}
1682
1667void generic_start_io_acct(struct request_queue *q, int op, 1683void generic_start_io_acct(struct request_queue *q, int op,
1668 unsigned long sectors, struct hd_struct *part) 1684 unsigned long sectors, struct hd_struct *part)
1669{ 1685{
@@ -1671,7 +1687,7 @@ void generic_start_io_acct(struct request_queue *q, int op,
1671 1687
1672 part_stat_lock(); 1688 part_stat_lock();
1673 1689
1674 part_round_stats(q, part); 1690 update_io_ticks(part, jiffies);
1675 part_stat_inc(part, ios[sgrp]); 1691 part_stat_inc(part, ios[sgrp]);
1676 part_stat_add(part, sectors[sgrp], sectors); 1692 part_stat_add(part, sectors[sgrp], sectors);
1677 part_inc_in_flight(q, part, op_is_write(op)); 1693 part_inc_in_flight(q, part, op_is_write(op));
@@ -1683,13 +1699,15 @@ EXPORT_SYMBOL(generic_start_io_acct);
1683void generic_end_io_acct(struct request_queue *q, int req_op, 1699void generic_end_io_acct(struct request_queue *q, int req_op,
1684 struct hd_struct *part, unsigned long start_time) 1700 struct hd_struct *part, unsigned long start_time)
1685{ 1701{
1686 unsigned long duration = jiffies - start_time; 1702 unsigned long now = jiffies;
1703 unsigned long duration = now - start_time;
1687 const int sgrp = op_stat_group(req_op); 1704 const int sgrp = op_stat_group(req_op);
1688 1705
1689 part_stat_lock(); 1706 part_stat_lock();
1690 1707
1708 update_io_ticks(part, now);
1691 part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); 1709 part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
1692 part_round_stats(q, part); 1710 part_stat_add(part, time_in_queue, duration);
1693 part_dec_in_flight(q, part, op_is_write(req_op)); 1711 part_dec_in_flight(q, part, op_is_write(req_op));
1694 1712
1695 part_stat_unlock(); 1713 part_stat_unlock();
diff --git a/block/blk-core.c b/block/blk-core.c
index 734b768c9d9d..268d2b8e9843 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -584,62 +584,6 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op,
584} 584}
585EXPORT_SYMBOL(blk_get_request); 585EXPORT_SYMBOL(blk_get_request);
586 586
587static void part_round_stats_single(struct request_queue *q,
588 struct hd_struct *part, unsigned long now,
589 unsigned int inflight)
590{
591 if (inflight) {
592 __part_stat_add(part, time_in_queue,
593 inflight * (now - part->stamp));
594 __part_stat_add(part, io_ticks, (now - part->stamp));
595 }
596 part->stamp = now;
597}
598
599/**
600 * part_round_stats() - Round off the performance stats on a struct disk_stats.
601 * @q: target block queue
602 * @part: target partition
603 *
604 * The average IO queue length and utilisation statistics are maintained
605 * by observing the current state of the queue length and the amount of
606 * time it has been in this state for.
607 *
608 * Normally, that accounting is done on IO completion, but that can result
609 * in more than a second's worth of IO being accounted for within any one
610 * second, leading to >100% utilisation. To deal with that, we call this
611 * function to do a round-off before returning the results when reading
612 * /proc/diskstats. This accounts immediately for all queue usage up to
613 * the current jiffies and restarts the counters again.
614 */
615void part_round_stats(struct request_queue *q, struct hd_struct *part)
616{
617 struct hd_struct *part2 = NULL;
618 unsigned long now = jiffies;
619 unsigned int inflight[2];
620 int stats = 0;
621
622 if (part->stamp != now)
623 stats |= 1;
624
625 if (part->partno) {
626 part2 = &part_to_disk(part)->part0;
627 if (part2->stamp != now)
628 stats |= 2;
629 }
630
631 if (!stats)
632 return;
633
634 part_in_flight(q, part, inflight);
635
636 if (stats & 2)
637 part_round_stats_single(q, part2, now, inflight[1]);
638 if (stats & 1)
639 part_round_stats_single(q, part, now, inflight[0]);
640}
641EXPORT_SYMBOL_GPL(part_round_stats);
642
643void blk_put_request(struct request *req) 587void blk_put_request(struct request *req)
644{ 588{
645 blk_mq_free_request(req); 589 blk_mq_free_request(req);
@@ -1383,9 +1327,10 @@ void blk_account_io_done(struct request *req, u64 now)
1383 part_stat_lock(); 1327 part_stat_lock();
1384 part = req->part; 1328 part = req->part;
1385 1329
1330 update_io_ticks(part, jiffies);
1386 part_stat_inc(part, ios[sgrp]); 1331 part_stat_inc(part, ios[sgrp]);
1387 part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); 1332 part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
1388 part_round_stats(req->q, part); 1333 part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
1389 part_dec_in_flight(req->q, part, rq_data_dir(req)); 1334 part_dec_in_flight(req->q, part, rq_data_dir(req));
1390 1335
1391 hd_struct_put(part); 1336 hd_struct_put(part);
@@ -1420,11 +1365,12 @@ void blk_account_io_start(struct request *rq, bool new_io)
1420 part = &rq->rq_disk->part0; 1365 part = &rq->rq_disk->part0;
1421 hd_struct_get(part); 1366 hd_struct_get(part);
1422 } 1367 }
1423 part_round_stats(rq->q, part);
1424 part_inc_in_flight(rq->q, part, rw); 1368 part_inc_in_flight(rq->q, part, rw);
1425 rq->part = part; 1369 rq->part = part;
1426 } 1370 }
1427 1371
1372 update_io_ticks(part, jiffies);
1373
1428 part_stat_unlock(); 1374 part_stat_unlock();
1429} 1375}
1430 1376
diff --git a/block/blk-merge.c b/block/blk-merge.c
index a120d59b9705..9da5629d0887 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -689,7 +689,6 @@ static void blk_account_io_merge(struct request *req)
689 part_stat_lock(); 689 part_stat_lock();
690 part = req->part; 690 part = req->part;
691 691
692 part_round_stats(req->q, part);
693 part_dec_in_flight(req->q, part, rq_data_dir(req)); 692 part_dec_in_flight(req->q, part, rq_data_dir(req));
694 693
695 hd_struct_put(part); 694 hd_struct_put(part);
diff --git a/block/genhd.c b/block/genhd.c
index 2fe00cf32b93..cdf174d7d329 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1337,9 +1337,6 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1337 1337
1338 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1338 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1339 while ((hd = disk_part_iter_next(&piter))) { 1339 while ((hd = disk_part_iter_next(&piter))) {
1340 part_stat_lock();
1341 part_round_stats(gp->queue, hd);
1342 part_stat_unlock();
1343 part_in_flight(gp->queue, hd, inflight); 1340 part_in_flight(gp->queue, hd, inflight);
1344 seq_printf(seqf, "%4d %7d %s " 1341 seq_printf(seqf, "%4d %7d %s "
1345 "%lu %lu %lu %u " 1342 "%lu %lu %lu %u "
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 7e663cfb1487..42d6138ac876 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -122,9 +122,6 @@ ssize_t part_stat_show(struct device *dev,
122 struct request_queue *q = part_to_disk(p)->queue; 122 struct request_queue *q = part_to_disk(p)->queue;
123 unsigned int inflight[2]; 123 unsigned int inflight[2];
124 124
125 part_stat_lock();
126 part_round_stats(q, p);
127 part_stat_unlock();
128 part_in_flight(q, p, inflight); 125 part_in_flight(q, p, inflight);
129 return sprintf(buf, 126 return sprintf(buf,
130 "%8lu %8lu %8llu %8u " 127 "%8lu %8lu %8llu %8u "