aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2015-06-09 17:21:39 -0400
committerMike Snitzer <snitzer@redhat.com>2015-06-17 12:40:40 -0400
commitc96aec344de0de857ef3d7fba53992c7ba311e1e (patch)
tree09b6b651366fd853dc4c73a97ee0598865bd4e06
parentdd4c1b7d0c95be1c9245118a3accc41a16f1db67 (diff)
dm stats: support precise timestamps
Make it possible to use precise timestamps with nanosecond granularity in dm statistics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r--Documentation/device-mapper/statistics.txt28
-rw-r--r--drivers/md/dm-stats.c138
-rw-r--r--drivers/md/dm-stats.h4
3 files changed, 127 insertions, 43 deletions
diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
index 2a1673adc200..ff6baeaa71f7 100644
--- a/Documentation/device-mapper/statistics.txt
+++ b/Documentation/device-mapper/statistics.txt
@@ -13,9 +13,13 @@ the range specified.
13The I/O statistics counters for each step-sized area of a region are 13The I/O statistics counters for each step-sized area of a region are
14in the same format as /sys/block/*/stat or /proc/diskstats (see: 14in the same format as /sys/block/*/stat or /proc/diskstats (see:
15Documentation/iostats.txt). But two extra counters (12 and 13) are 15Documentation/iostats.txt). But two extra counters (12 and 13) are
16provided: total time spent reading and writing in milliseconds. All 16provided: total time spent reading and writing. All these counters may
17these counters may be accessed by sending the @stats_print message to 17be accessed by sending the @stats_print message to the appropriate DM
18the appropriate DM device via dmsetup. 18device via dmsetup.
19
20The reported times are in milliseconds and the granularity depends on
21the kernel ticks. When the option precise_timestamps is used, the
22reported times are in nanoseconds.
19 23
20Each region has a corresponding unique identifier, which we call a 24Each region has a corresponding unique identifier, which we call a
21region_id, that is assigned when the region is created. The region_id 25region_id, that is assigned when the region is created. The region_id
@@ -33,7 +37,9 @@ memory is used by reading
33Messages 37Messages
34======== 38========
35 39
36 @stats_create <range> <step> [<program_id> [<aux_data>]] 40 @stats_create <range> <step>
41 [<number_of_optional_arguments> <optional_arguments>...]
42 [<program_id> [<aux_data>]]
37 43
38 Create a new region and return the region_id. 44 Create a new region and return the region_id.
39 45
@@ -48,6 +54,17 @@ Messages
48 "/<number_of_areas>" - the range is subdivided into the specified 54 "/<number_of_areas>" - the range is subdivided into the specified
49 number of areas. 55 number of areas.
50 56
57 <number_of_optional_arguments>
58 The number of optional arguments
59
60 <optional_arguments>
61 The following optional arguments are supported
62 precise_timestamps - use precise timer with nanosecond resolution
63 instead of the "jiffies" variable. When this argument is
64 used, the resulting times are in nanoseconds instead of
65 milliseconds. Precise timestamps are a little bit slower
66 to obtain than jiffies-based timestamps.
67
51 <program_id> 68 <program_id>
52 An optional parameter. A name that uniquely identifies 69 An optional parameter. A name that uniquely identifies
53 the userspace owner of the range. This groups ranges together 70 the userspace owner of the range. This groups ranges together
@@ -55,6 +72,9 @@ Messages
55 created and ignore those created by others. 72 created and ignore those created by others.
56 The kernel returns this string back in the output of 73 The kernel returns this string back in the output of
57 @stats_list message, but it doesn't use it for anything else. 74 @stats_list message, but it doesn't use it for anything else.
75 If we omit the number of optional arguments, program id must not
76 be a number, otherwise it would be interpreted as the number of
77 optional arguments.
58 78
59 <aux_data> 79 <aux_data>
60 An optional parameter. A word that provides auxiliary data 80 An optional parameter. A word that provides auxiliary data
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index d1fd31a6dd1a..4bfd84ab1d4a 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -33,13 +33,14 @@ struct dm_stat_percpu {
33 33
34struct dm_stat_shared { 34struct dm_stat_shared {
35 atomic_t in_flight[2]; 35 atomic_t in_flight[2];
36 unsigned long stamp; 36 unsigned long long stamp;
37 struct dm_stat_percpu tmp; 37 struct dm_stat_percpu tmp;
38}; 38};
39 39
40struct dm_stat { 40struct dm_stat {
41 struct list_head list_entry; 41 struct list_head list_entry;
42 int id; 42 int id;
43 unsigned stat_flags;
43 size_t n_entries; 44 size_t n_entries;
44 sector_t start; 45 sector_t start;
45 sector_t end; 46 sector_t end;
@@ -53,6 +54,8 @@ struct dm_stat {
53 struct dm_stat_shared stat_shared[0]; 54 struct dm_stat_shared stat_shared[0];
54}; 55};
55 56
57#define STAT_PRECISE_TIMESTAMPS 1
58
56struct dm_stats_last_position { 59struct dm_stats_last_position {
57 sector_t last_sector; 60 sector_t last_sector;
58 unsigned last_rw; 61 unsigned last_rw;
@@ -224,7 +227,8 @@ void dm_stats_cleanup(struct dm_stats *stats)
224} 227}
225 228
226static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, 229static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
227 sector_t step, const char *program_id, const char *aux_data, 230 sector_t step, unsigned stat_flags,
231 const char *program_id, const char *aux_data,
228 void (*suspend_callback)(struct mapped_device *), 232 void (*suspend_callback)(struct mapped_device *),
229 void (*resume_callback)(struct mapped_device *), 233 void (*resume_callback)(struct mapped_device *),
230 struct mapped_device *md) 234 struct mapped_device *md)
@@ -265,6 +269,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
265 if (!s) 269 if (!s)
266 return -ENOMEM; 270 return -ENOMEM;
267 271
272 s->stat_flags = stat_flags;
268 s->n_entries = n_entries; 273 s->n_entries = n_entries;
269 s->start = start; 274 s->start = start;
270 s->end = end; 275 s->end = end;
@@ -414,18 +419,24 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
414 return 1; 419 return 1;
415} 420}
416 421
417static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *p) 422static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared,
423 struct dm_stat_percpu *p)
418{ 424{
419 /* 425 /*
420 * This is racy, but so is part_round_stats_single. 426 * This is racy, but so is part_round_stats_single.
421 */ 427 */
422 unsigned long now = jiffies; 428 unsigned long long now, difference;
423 unsigned in_flight_read; 429 unsigned in_flight_read, in_flight_write;
424 unsigned in_flight_write; 430
425 unsigned long difference = now - shared->stamp; 431 if (likely(!(s->stat_flags & STAT_PRECISE_TIMESTAMPS)))
432 now = jiffies;
433 else
434 now = ktime_to_ns(ktime_get());
426 435
436 difference = now - shared->stamp;
427 if (!difference) 437 if (!difference)
428 return; 438 return;
439
429 in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]); 440 in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]);
430 in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]); 441 in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]);
431 if (in_flight_read) 442 if (in_flight_read)
@@ -440,8 +451,9 @@ static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *
440} 451}
441 452
442static void dm_stat_for_entry(struct dm_stat *s, size_t entry, 453static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
443 unsigned long bi_rw, sector_t len, bool merged, 454 unsigned long bi_rw, sector_t len,
444 bool end, unsigned long duration) 455 struct dm_stats_aux *stats_aux, bool end,
456 unsigned long duration_jiffies)
445{ 457{
446 unsigned long idx = bi_rw & REQ_WRITE; 458 unsigned long idx = bi_rw & REQ_WRITE;
447 struct dm_stat_shared *shared = &s->stat_shared[entry]; 459 struct dm_stat_shared *shared = &s->stat_shared[entry];
@@ -471,15 +483,18 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
471 p = &s->stat_percpu[smp_processor_id()][entry]; 483 p = &s->stat_percpu[smp_processor_id()][entry];
472 484
473 if (!end) { 485 if (!end) {
474 dm_stat_round(shared, p); 486 dm_stat_round(s, shared, p);
475 atomic_inc(&shared->in_flight[idx]); 487 atomic_inc(&shared->in_flight[idx]);
476 } else { 488 } else {
477 dm_stat_round(shared, p); 489 dm_stat_round(s, shared, p);
478 atomic_dec(&shared->in_flight[idx]); 490 atomic_dec(&shared->in_flight[idx]);
479 p->sectors[idx] += len; 491 p->sectors[idx] += len;
480 p->ios[idx] += 1; 492 p->ios[idx] += 1;
481 p->merges[idx] += merged; 493 p->merges[idx] += stats_aux->merged;
482 p->ticks[idx] += duration; 494 if (!(s->stat_flags & STAT_PRECISE_TIMESTAMPS))
495 p->ticks[idx] += duration_jiffies;
496 else
497 p->ticks[idx] += stats_aux->duration_ns;
483 } 498 }
484 499
485#if BITS_PER_LONG == 32 500#if BITS_PER_LONG == 32
@@ -491,7 +506,7 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
491 506
492static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw, 507static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
493 sector_t bi_sector, sector_t end_sector, 508 sector_t bi_sector, sector_t end_sector,
494 bool end, unsigned long duration, 509 bool end, unsigned long duration_jiffies,
495 struct dm_stats_aux *stats_aux) 510 struct dm_stats_aux *stats_aux)
496{ 511{
497 sector_t rel_sector, offset, todo, fragment_len; 512 sector_t rel_sector, offset, todo, fragment_len;
@@ -520,7 +535,7 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
520 if (fragment_len > s->step - offset) 535 if (fragment_len > s->step - offset)
521 fragment_len = s->step - offset; 536 fragment_len = s->step - offset;
522 dm_stat_for_entry(s, entry, bi_rw, fragment_len, 537 dm_stat_for_entry(s, entry, bi_rw, fragment_len,
523 stats_aux->merged, end, duration); 538 stats_aux, end, duration_jiffies);
524 todo -= fragment_len; 539 todo -= fragment_len;
525 entry++; 540 entry++;
526 offset = 0; 541 offset = 0;
@@ -529,11 +544,13 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
529 544
530void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, 545void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
531 sector_t bi_sector, unsigned bi_sectors, bool end, 546 sector_t bi_sector, unsigned bi_sectors, bool end,
532 unsigned long duration, struct dm_stats_aux *stats_aux) 547 unsigned long duration_jiffies,
548 struct dm_stats_aux *stats_aux)
533{ 549{
534 struct dm_stat *s; 550 struct dm_stat *s;
535 sector_t end_sector; 551 sector_t end_sector;
536 struct dm_stats_last_position *last; 552 struct dm_stats_last_position *last;
553 bool got_precise_time;
537 554
538 if (unlikely(!bi_sectors)) 555 if (unlikely(!bi_sectors))
539 return; 556 return;
@@ -557,8 +574,17 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
557 574
558 rcu_read_lock(); 575 rcu_read_lock();
559 576
560 list_for_each_entry_rcu(s, &stats->list, list_entry) 577 got_precise_time = false;
561 __dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration, stats_aux); 578 list_for_each_entry_rcu(s, &stats->list, list_entry) {
579 if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) {
580 if (!end)
581 stats_aux->duration_ns = ktime_to_ns(ktime_get());
582 else
583 stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns;
584 got_precise_time = true;
585 }
586 __dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration_jiffies, stats_aux);
587 }
562 588
563 rcu_read_unlock(); 589 rcu_read_unlock();
564} 590}
@@ -571,7 +597,7 @@ static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared
571 597
572 local_irq_disable(); 598 local_irq_disable();
573 p = &s->stat_percpu[smp_processor_id()][x]; 599 p = &s->stat_percpu[smp_processor_id()][x];
574 dm_stat_round(shared, p); 600 dm_stat_round(s, shared, p);
575 local_irq_enable(); 601 local_irq_enable();
576 602
577 memset(&shared->tmp, 0, sizeof(shared->tmp)); 603 memset(&shared->tmp, 0, sizeof(shared->tmp));
@@ -643,11 +669,15 @@ static int dm_stats_clear(struct dm_stats *stats, int id)
643/* 669/*
644 * This is like jiffies_to_msec, but works for 64-bit values. 670 * This is like jiffies_to_msec, but works for 64-bit values.
645 */ 671 */
646static unsigned long long dm_jiffies_to_msec64(unsigned long long j) 672static unsigned long long dm_jiffies_to_msec64(struct dm_stat *s, unsigned long long j)
647{ 673{
648 unsigned long long result = 0; 674 unsigned long long result;
649 unsigned mult; 675 unsigned mult;
650 676
677 if (s->stat_flags & STAT_PRECISE_TIMESTAMPS)
678 return j;
679
680 result = 0;
651 if (j) 681 if (j)
652 result = jiffies_to_msecs(j & 0x3fffff); 682 result = jiffies_to_msecs(j & 0x3fffff);
653 if (j >= 1 << 22) { 683 if (j >= 1 << 22) {
@@ -709,16 +739,16 @@ static int dm_stats_print(struct dm_stats *stats, int id,
709 shared->tmp.ios[READ], 739 shared->tmp.ios[READ],
710 shared->tmp.merges[READ], 740 shared->tmp.merges[READ],
711 shared->tmp.sectors[READ], 741 shared->tmp.sectors[READ],
712 dm_jiffies_to_msec64(shared->tmp.ticks[READ]), 742 dm_jiffies_to_msec64(s, shared->tmp.ticks[READ]),
713 shared->tmp.ios[WRITE], 743 shared->tmp.ios[WRITE],
714 shared->tmp.merges[WRITE], 744 shared->tmp.merges[WRITE],
715 shared->tmp.sectors[WRITE], 745 shared->tmp.sectors[WRITE],
716 dm_jiffies_to_msec64(shared->tmp.ticks[WRITE]), 746 dm_jiffies_to_msec64(s, shared->tmp.ticks[WRITE]),
717 dm_stat_in_flight(shared), 747 dm_stat_in_flight(shared),
718 dm_jiffies_to_msec64(shared->tmp.io_ticks_total), 748 dm_jiffies_to_msec64(s, shared->tmp.io_ticks_total),
719 dm_jiffies_to_msec64(shared->tmp.time_in_queue), 749 dm_jiffies_to_msec64(s, shared->tmp.time_in_queue),
720 dm_jiffies_to_msec64(shared->tmp.io_ticks[READ]), 750 dm_jiffies_to_msec64(s, shared->tmp.io_ticks[READ]),
721 dm_jiffies_to_msec64(shared->tmp.io_ticks[WRITE])); 751 dm_jiffies_to_msec64(s, shared->tmp.io_ticks[WRITE]));
722 752
723 if (unlikely(sz + 1 >= maxlen)) 753 if (unlikely(sz + 1 >= maxlen))
724 goto buffer_overflow; 754 goto buffer_overflow;
@@ -769,21 +799,31 @@ static int message_stats_create(struct mapped_device *md,
769 unsigned long long start, end, len, step; 799 unsigned long long start, end, len, step;
770 unsigned divisor; 800 unsigned divisor;
771 const char *program_id, *aux_data; 801 const char *program_id, *aux_data;
802 unsigned stat_flags = 0;
803
804 struct dm_arg_set as, as_backup;
805 const char *a;
806 unsigned feature_args;
772 807
773 /* 808 /*
774 * Input format: 809 * Input format:
775 * <range> <step> [<program_id> [<aux_data>]] 810 * <range> <step> [<extra_parameters> <parameters>] [<program_id> [<aux_data>]]
776 */ 811 */
777 812
778 if (argc < 3 || argc > 5) 813 if (argc < 3)
779 return -EINVAL; 814 return -EINVAL;
780 815
781 if (!strcmp(argv[1], "-")) { 816 as.argc = argc;
817 as.argv = argv;
818 dm_consume_args(&as, 1);
819
820 a = dm_shift_arg(&as);
821 if (!strcmp(a, "-")) {
782 start = 0; 822 start = 0;
783 len = dm_get_size(md); 823 len = dm_get_size(md);
784 if (!len) 824 if (!len)
785 len = 1; 825 len = 1;
786 } else if (sscanf(argv[1], "%llu+%llu%c", &start, &len, &dummy) != 2 || 826 } else if (sscanf(a, "%llu+%llu%c", &start, &len, &dummy) != 2 ||
787 start != (sector_t)start || len != (sector_t)len) 827 start != (sector_t)start || len != (sector_t)len)
788 return -EINVAL; 828 return -EINVAL;
789 829
@@ -791,7 +831,8 @@ static int message_stats_create(struct mapped_device *md,
791 if (start >= end) 831 if (start >= end)
792 return -EINVAL; 832 return -EINVAL;
793 833
794 if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) { 834 a = dm_shift_arg(&as);
835 if (sscanf(a, "/%u%c", &divisor, &dummy) == 1) {
795 if (!divisor) 836 if (!divisor)
796 return -EINVAL; 837 return -EINVAL;
797 step = end - start; 838 step = end - start;
@@ -799,18 +840,39 @@ static int message_stats_create(struct mapped_device *md,
799 step++; 840 step++;
800 if (!step) 841 if (!step)
801 step = 1; 842 step = 1;
802 } else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 || 843 } else if (sscanf(a, "%llu%c", &step, &dummy) != 1 ||
803 step != (sector_t)step || !step) 844 step != (sector_t)step || !step)
804 return -EINVAL; 845 return -EINVAL;
805 846
847 as_backup = as;
848 a = dm_shift_arg(&as);
849 if (a && sscanf(a, "%u%c", &feature_args, &dummy) == 1) {
850 while (feature_args--) {
851 a = dm_shift_arg(&as);
852 if (!a)
853 return -EINVAL;
854 if (!strcasecmp(a, "precise_timestamps"))
855 stat_flags |= STAT_PRECISE_TIMESTAMPS;
856 else
857 return -EINVAL;
858 }
859 } else {
860 as = as_backup;
861 }
862
806 program_id = "-"; 863 program_id = "-";
807 aux_data = "-"; 864 aux_data = "-";
808 865
809 if (argc > 3) 866 a = dm_shift_arg(&as);
810 program_id = argv[3]; 867 if (a)
868 program_id = a;
869
870 a = dm_shift_arg(&as);
871 if (a)
872 aux_data = a;
811 873
812 if (argc > 4) 874 if (as.argc)
813 aux_data = argv[4]; 875 return -EINVAL;
814 876
815 /* 877 /*
816 * If a buffer overflow happens after we created the region, 878 * If a buffer overflow happens after we created the region,
@@ -822,7 +884,7 @@ static int message_stats_create(struct mapped_device *md,
822 if (dm_message_test_buffer_overflow(result, maxlen)) 884 if (dm_message_test_buffer_overflow(result, maxlen))
823 return 1; 885 return 1;
824 886
825 id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data, 887 id = dm_stats_create(dm_get_stats(md), start, end, step, stat_flags, program_id, aux_data,
826 dm_internal_suspend_fast, dm_internal_resume_fast, md); 888 dm_internal_suspend_fast, dm_internal_resume_fast, md);
827 if (id < 0) 889 if (id < 0)
828 return id; 890 return id;
diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h
index e7c4984bf235..f1c0956e3843 100644
--- a/drivers/md/dm-stats.h
+++ b/drivers/md/dm-stats.h
@@ -18,6 +18,7 @@ struct dm_stats {
18 18
19struct dm_stats_aux { 19struct dm_stats_aux {
20 bool merged; 20 bool merged;
21 unsigned long long duration_ns;
21}; 22};
22 23
23void dm_stats_init(struct dm_stats *st); 24void dm_stats_init(struct dm_stats *st);
@@ -30,7 +31,8 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
30 31
31void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, 32void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
32 sector_t bi_sector, unsigned bi_sectors, bool end, 33 sector_t bi_sector, unsigned bi_sectors, bool end,
33 unsigned long duration, struct dm_stats_aux *aux); 34 unsigned long duration_jiffies,
35 struct dm_stats_aux *aux);
34 36
35static inline bool dm_stats_used(struct dm_stats *st) 37static inline bool dm_stats_used(struct dm_stats *st)
36{ 38{