diff options
Diffstat (limited to 'block')
| -rw-r--r-- | block/Kconfig | 23 | ||||
| -rw-r--r-- | block/Kconfig.iosched | 16 | ||||
| -rw-r--r-- | block/Makefile | 2 | ||||
| -rw-r--r-- | block/blk-barrier.c | 147 | ||||
| -rw-r--r-- | block/blk-cgroup.c | 791 | ||||
| -rw-r--r-- | block/blk-cgroup.h | 178 | ||||
| -rw-r--r-- | block/blk-core.c | 31 | ||||
| -rw-r--r-- | block/blk-lib.c | 233 | ||||
| -rw-r--r-- | block/cfq-iosched.c | 81 | ||||
| -rw-r--r-- | block/elevator.c | 11 | ||||
| -rw-r--r-- | block/genhd.c | 2 | ||||
| -rw-r--r-- | block/ioctl.c | 2 |
12 files changed, 1252 insertions, 265 deletions
diff --git a/block/Kconfig b/block/Kconfig index f9e89f4d94bb..9be0b56eaee1 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
| @@ -77,29 +77,6 @@ config BLK_DEV_INTEGRITY | |||
| 77 | T10/SCSI Data Integrity Field or the T13/ATA External Path | 77 | T10/SCSI Data Integrity Field or the T13/ATA External Path |
| 78 | Protection. If in doubt, say N. | 78 | Protection. If in doubt, say N. |
| 79 | 79 | ||
| 80 | config BLK_CGROUP | ||
| 81 | tristate "Block cgroup support" | ||
| 82 | depends on CGROUPS | ||
| 83 | depends on CFQ_GROUP_IOSCHED | ||
| 84 | default n | ||
| 85 | ---help--- | ||
| 86 | Generic block IO controller cgroup interface. This is the common | ||
| 87 | cgroup interface which should be used by various IO controlling | ||
| 88 | policies. | ||
| 89 | |||
| 90 | Currently, CFQ IO scheduler uses it to recognize task groups and | ||
| 91 | control disk bandwidth allocation (proportional time slice allocation) | ||
| 92 | to such task groups. | ||
| 93 | |||
| 94 | config DEBUG_BLK_CGROUP | ||
| 95 | bool | ||
| 96 | depends on BLK_CGROUP | ||
| 97 | default n | ||
| 98 | ---help--- | ||
| 99 | Enable some debugging help. Currently it stores the cgroup path | ||
| 100 | in the blk group which can be used by cfq for tracing various | ||
| 101 | group related activity. | ||
| 102 | |||
| 103 | endif # BLOCK | 80 | endif # BLOCK |
| 104 | 81 | ||
| 105 | config BLOCK_COMPAT | 82 | config BLOCK_COMPAT |
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index fc71cf071fb2..3199b76f795d 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched | |||
| @@ -23,7 +23,8 @@ config IOSCHED_DEADLINE | |||
| 23 | 23 | ||
| 24 | config IOSCHED_CFQ | 24 | config IOSCHED_CFQ |
| 25 | tristate "CFQ I/O scheduler" | 25 | tristate "CFQ I/O scheduler" |
| 26 | select BLK_CGROUP if CFQ_GROUP_IOSCHED | 26 | # If BLK_CGROUP is a module, CFQ has to be built as module. |
| 27 | depends on (BLK_CGROUP=m && m) || !BLK_CGROUP || BLK_CGROUP=y | ||
| 27 | default y | 28 | default y |
| 28 | ---help--- | 29 | ---help--- |
| 29 | The CFQ I/O scheduler tries to distribute bandwidth equally | 30 | The CFQ I/O scheduler tries to distribute bandwidth equally |
| @@ -33,22 +34,15 @@ config IOSCHED_CFQ | |||
| 33 | 34 | ||
| 34 | This is the default I/O scheduler. | 35 | This is the default I/O scheduler. |
| 35 | 36 | ||
| 37 | Note: If BLK_CGROUP=m, then CFQ can be built only as module. | ||
| 38 | |||
| 36 | config CFQ_GROUP_IOSCHED | 39 | config CFQ_GROUP_IOSCHED |
| 37 | bool "CFQ Group Scheduling support" | 40 | bool "CFQ Group Scheduling support" |
| 38 | depends on IOSCHED_CFQ && CGROUPS | 41 | depends on IOSCHED_CFQ && BLK_CGROUP |
| 39 | default n | 42 | default n |
| 40 | ---help--- | 43 | ---help--- |
| 41 | Enable group IO scheduling in CFQ. | 44 | Enable group IO scheduling in CFQ. |
| 42 | 45 | ||
| 43 | config DEBUG_CFQ_IOSCHED | ||
| 44 | bool "Debug CFQ Scheduling" | ||
| 45 | depends on CFQ_GROUP_IOSCHED | ||
| 46 | select DEBUG_BLK_CGROUP | ||
| 47 | default n | ||
| 48 | ---help--- | ||
| 49 | Enable CFQ IO scheduling debugging in CFQ. Currently it makes | ||
| 50 | blktrace output more verbose. | ||
| 51 | |||
| 52 | choice | 46 | choice |
| 53 | prompt "Default I/O scheduler" | 47 | prompt "Default I/O scheduler" |
| 54 | default DEFAULT_CFQ | 48 | default DEFAULT_CFQ |
diff --git a/block/Makefile b/block/Makefile index cb2d515ebd6e..0bb499a739cd 100644 --- a/block/Makefile +++ b/block/Makefile | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
| 6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ |
| 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
| 8 | blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o | 8 | blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o |
| 9 | 9 | ||
| 10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
| 11 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o | 11 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o |
diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6d88544b677f..0d710c9d403b 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c | |||
| @@ -286,26 +286,31 @@ static void bio_end_empty_barrier(struct bio *bio, int err) | |||
| 286 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | 286 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); |
| 287 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 287 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
| 288 | } | 288 | } |
| 289 | 289 | if (bio->bi_private) | |
| 290 | complete(bio->bi_private); | 290 | complete(bio->bi_private); |
| 291 | bio_put(bio); | ||
| 291 | } | 292 | } |
| 292 | 293 | ||
| 293 | /** | 294 | /** |
| 294 | * blkdev_issue_flush - queue a flush | 295 | * blkdev_issue_flush - queue a flush |
| 295 | * @bdev: blockdev to issue flush for | 296 | * @bdev: blockdev to issue flush for |
| 297 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
| 296 | * @error_sector: error sector | 298 | * @error_sector: error sector |
| 299 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
| 297 | * | 300 | * |
| 298 | * Description: | 301 | * Description: |
| 299 | * Issue a flush for the block device in question. Caller can supply | 302 | * Issue a flush for the block device in question. Caller can supply |
| 300 | * room for storing the error offset in case of a flush error, if they | 303 | * room for storing the error offset in case of a flush error, if they |
| 301 | * wish to. | 304 | * wish to. If WAIT flag is not passed then caller may check only what |
| 305 | * request was pushed in some internal queue for later handling. | ||
| 302 | */ | 306 | */ |
| 303 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | 307 | int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, |
| 308 | sector_t *error_sector, unsigned long flags) | ||
| 304 | { | 309 | { |
| 305 | DECLARE_COMPLETION_ONSTACK(wait); | 310 | DECLARE_COMPLETION_ONSTACK(wait); |
| 306 | struct request_queue *q; | 311 | struct request_queue *q; |
| 307 | struct bio *bio; | 312 | struct bio *bio; |
| 308 | int ret; | 313 | int ret = 0; |
| 309 | 314 | ||
| 310 | if (bdev->bd_disk == NULL) | 315 | if (bdev->bd_disk == NULL) |
| 311 | return -ENXIO; | 316 | return -ENXIO; |
| @@ -314,23 +319,25 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
| 314 | if (!q) | 319 | if (!q) |
| 315 | return -ENXIO; | 320 | return -ENXIO; |
| 316 | 321 | ||
| 317 | bio = bio_alloc(GFP_KERNEL, 0); | 322 | bio = bio_alloc(gfp_mask, 0); |
| 318 | bio->bi_end_io = bio_end_empty_barrier; | 323 | bio->bi_end_io = bio_end_empty_barrier; |
| 319 | bio->bi_private = &wait; | ||
| 320 | bio->bi_bdev = bdev; | 324 | bio->bi_bdev = bdev; |
| 321 | submit_bio(WRITE_BARRIER, bio); | 325 | if (test_bit(BLKDEV_WAIT, &flags)) |
| 322 | 326 | bio->bi_private = &wait; | |
| 323 | wait_for_completion(&wait); | ||
| 324 | 327 | ||
| 325 | /* | 328 | bio_get(bio); |
| 326 | * The driver must store the error location in ->bi_sector, if | 329 | submit_bio(WRITE_BARRIER, bio); |
| 327 | * it supports it. For non-stacked drivers, this should be copied | 330 | if (test_bit(BLKDEV_WAIT, &flags)) { |
| 328 | * from blk_rq_pos(rq). | 331 | wait_for_completion(&wait); |
| 329 | */ | 332 | /* |
| 330 | if (error_sector) | 333 | * The driver must store the error location in ->bi_sector, if |
| 331 | *error_sector = bio->bi_sector; | 334 | * it supports it. For non-stacked drivers, this should be |
| 335 | * copied from blk_rq_pos(rq). | ||
| 336 | */ | ||
| 337 | if (error_sector) | ||
| 338 | *error_sector = bio->bi_sector; | ||
| 339 | } | ||
| 332 | 340 | ||
| 333 | ret = 0; | ||
| 334 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 341 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
| 335 | ret = -EOPNOTSUPP; | 342 | ret = -EOPNOTSUPP; |
| 336 | else if (!bio_flagged(bio, BIO_UPTODATE)) | 343 | else if (!bio_flagged(bio, BIO_UPTODATE)) |
| @@ -340,107 +347,3 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
| 340 | return ret; | 347 | return ret; |
| 341 | } | 348 | } |
| 342 | EXPORT_SYMBOL(blkdev_issue_flush); | 349 | EXPORT_SYMBOL(blkdev_issue_flush); |
| 343 | |||
| 344 | static void blkdev_discard_end_io(struct bio *bio, int err) | ||
| 345 | { | ||
| 346 | if (err) { | ||
| 347 | if (err == -EOPNOTSUPP) | ||
| 348 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
| 349 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 350 | } | ||
| 351 | |||
| 352 | if (bio->bi_private) | ||
| 353 | complete(bio->bi_private); | ||
| 354 | __free_page(bio_page(bio)); | ||
| 355 | |||
| 356 | bio_put(bio); | ||
| 357 | } | ||
| 358 | |||
| 359 | /** | ||
| 360 | * blkdev_issue_discard - queue a discard | ||
| 361 | * @bdev: blockdev to issue discard for | ||
| 362 | * @sector: start sector | ||
| 363 | * @nr_sects: number of sectors to discard | ||
| 364 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
| 365 | * @flags: DISCARD_FL_* flags to control behaviour | ||
| 366 | * | ||
| 367 | * Description: | ||
| 368 | * Issue a discard request for the sectors in question. | ||
| 369 | */ | ||
| 370 | int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | ||
| 371 | sector_t nr_sects, gfp_t gfp_mask, int flags) | ||
| 372 | { | ||
| 373 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 374 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 375 | int type = flags & DISCARD_FL_BARRIER ? | ||
| 376 | DISCARD_BARRIER : DISCARD_NOBARRIER; | ||
| 377 | struct bio *bio; | ||
| 378 | struct page *page; | ||
| 379 | int ret = 0; | ||
| 380 | |||
| 381 | if (!q) | ||
| 382 | return -ENXIO; | ||
| 383 | |||
| 384 | if (!blk_queue_discard(q)) | ||
| 385 | return -EOPNOTSUPP; | ||
| 386 | |||
| 387 | while (nr_sects && !ret) { | ||
| 388 | unsigned int sector_size = q->limits.logical_block_size; | ||
| 389 | unsigned int max_discard_sectors = | ||
| 390 | min(q->limits.max_discard_sectors, UINT_MAX >> 9); | ||
| 391 | |||
| 392 | bio = bio_alloc(gfp_mask, 1); | ||
| 393 | if (!bio) | ||
| 394 | goto out; | ||
| 395 | bio->bi_sector = sector; | ||
| 396 | bio->bi_end_io = blkdev_discard_end_io; | ||
| 397 | bio->bi_bdev = bdev; | ||
| 398 | if (flags & DISCARD_FL_WAIT) | ||
| 399 | bio->bi_private = &wait; | ||
| 400 | |||
| 401 | /* | ||
| 402 | * Add a zeroed one-sector payload as that's what | ||
| 403 | * our current implementations need. If we'll ever need | ||
| 404 | * more the interface will need revisiting. | ||
| 405 | */ | ||
| 406 | page = alloc_page(gfp_mask | __GFP_ZERO); | ||
| 407 | if (!page) | ||
| 408 | goto out_free_bio; | ||
| 409 | if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) | ||
| 410 | goto out_free_page; | ||
| 411 | |||
| 412 | /* | ||
| 413 | * And override the bio size - the way discard works we | ||
| 414 | * touch many more blocks on disk than the actual payload | ||
| 415 | * length. | ||
| 416 | */ | ||
| 417 | if (nr_sects > max_discard_sectors) { | ||
| 418 | bio->bi_size = max_discard_sectors << 9; | ||
| 419 | nr_sects -= max_discard_sectors; | ||
| 420 | sector += max_discard_sectors; | ||
| 421 | } else { | ||
| 422 | bio->bi_size = nr_sects << 9; | ||
| 423 | nr_sects = 0; | ||
| 424 | } | ||
| 425 | |||
| 426 | bio_get(bio); | ||
| 427 | submit_bio(type, bio); | ||
| 428 | |||
| 429 | if (flags & DISCARD_FL_WAIT) | ||
| 430 | wait_for_completion(&wait); | ||
| 431 | |||
| 432 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
| 433 | ret = -EOPNOTSUPP; | ||
| 434 | else if (!bio_flagged(bio, BIO_UPTODATE)) | ||
| 435 | ret = -EIO; | ||
| 436 | bio_put(bio); | ||
| 437 | } | ||
| 438 | return ret; | ||
| 439 | out_free_page: | ||
| 440 | __free_page(page); | ||
| 441 | out_free_bio: | ||
| 442 | bio_put(bio); | ||
| 443 | out: | ||
| 444 | return -ENOMEM; | ||
| 445 | } | ||
| 446 | EXPORT_SYMBOL(blkdev_issue_discard); | ||
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2cc682b860ea..a6809645d212 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
| @@ -15,8 +15,12 @@ | |||
| 15 | #include <linux/kdev_t.h> | 15 | #include <linux/kdev_t.h> |
| 16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
| 18 | #include <linux/blkdev.h> | ||
| 18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
| 19 | #include "blk-cgroup.h" | 20 | #include "blk-cgroup.h" |
| 21 | #include <linux/genhd.h> | ||
| 22 | |||
| 23 | #define MAX_KEY_LEN 100 | ||
| 20 | 24 | ||
| 21 | static DEFINE_SPINLOCK(blkio_list_lock); | 25 | static DEFINE_SPINLOCK(blkio_list_lock); |
| 22 | static LIST_HEAD(blkio_list); | 26 | static LIST_HEAD(blkio_list); |
| @@ -49,6 +53,32 @@ struct cgroup_subsys blkio_subsys = { | |||
| 49 | }; | 53 | }; |
| 50 | EXPORT_SYMBOL_GPL(blkio_subsys); | 54 | EXPORT_SYMBOL_GPL(blkio_subsys); |
| 51 | 55 | ||
| 56 | static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, | ||
| 57 | struct blkio_policy_node *pn) | ||
| 58 | { | ||
| 59 | list_add(&pn->node, &blkcg->policy_list); | ||
| 60 | } | ||
| 61 | |||
| 62 | /* Must be called with blkcg->lock held */ | ||
| 63 | static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) | ||
| 64 | { | ||
| 65 | list_del(&pn->node); | ||
| 66 | } | ||
| 67 | |||
| 68 | /* Must be called with blkcg->lock held */ | ||
| 69 | static struct blkio_policy_node * | ||
| 70 | blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev) | ||
| 71 | { | ||
| 72 | struct blkio_policy_node *pn; | ||
| 73 | |||
| 74 | list_for_each_entry(pn, &blkcg->policy_list, node) { | ||
| 75 | if (pn->dev == dev) | ||
| 76 | return pn; | ||
| 77 | } | ||
| 78 | |||
| 79 | return NULL; | ||
| 80 | } | ||
| 81 | |||
| 52 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | 82 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) |
| 53 | { | 83 | { |
| 54 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | 84 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), |
| @@ -56,13 +86,259 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | |||
| 56 | } | 86 | } |
| 57 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); | 87 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); |
| 58 | 88 | ||
| 59 | void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, | 89 | /* |
| 60 | unsigned long time, unsigned long sectors) | 90 | * Add to the appropriate stat variable depending on the request type. |
| 91 | * This should be called with the blkg->stats_lock held. | ||
| 92 | */ | ||
| 93 | static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, | ||
| 94 | bool sync) | ||
| 95 | { | ||
| 96 | if (direction) | ||
| 97 | stat[BLKIO_STAT_WRITE] += add; | ||
| 98 | else | ||
| 99 | stat[BLKIO_STAT_READ] += add; | ||
| 100 | if (sync) | ||
| 101 | stat[BLKIO_STAT_SYNC] += add; | ||
| 102 | else | ||
| 103 | stat[BLKIO_STAT_ASYNC] += add; | ||
| 104 | } | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Decrements the appropriate stat variable if non-zero depending on the | ||
| 108 | * request type. Panics on value being zero. | ||
| 109 | * This should be called with the blkg->stats_lock held. | ||
| 110 | */ | ||
| 111 | static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) | ||
| 112 | { | ||
| 113 | if (direction) { | ||
| 114 | BUG_ON(stat[BLKIO_STAT_WRITE] == 0); | ||
| 115 | stat[BLKIO_STAT_WRITE]--; | ||
| 116 | } else { | ||
| 117 | BUG_ON(stat[BLKIO_STAT_READ] == 0); | ||
| 118 | stat[BLKIO_STAT_READ]--; | ||
| 119 | } | ||
| 120 | if (sync) { | ||
| 121 | BUG_ON(stat[BLKIO_STAT_SYNC] == 0); | ||
| 122 | stat[BLKIO_STAT_SYNC]--; | ||
| 123 | } else { | ||
| 124 | BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); | ||
| 125 | stat[BLKIO_STAT_ASYNC]--; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 130 | /* This should be called with the blkg->stats_lock held. */ | ||
| 131 | static void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
| 132 | struct blkio_group *curr_blkg) | ||
| 133 | { | ||
| 134 | if (blkio_blkg_waiting(&blkg->stats)) | ||
| 135 | return; | ||
| 136 | if (blkg == curr_blkg) | ||
| 137 | return; | ||
| 138 | blkg->stats.start_group_wait_time = sched_clock(); | ||
| 139 | blkio_mark_blkg_waiting(&blkg->stats); | ||
| 140 | } | ||
| 141 | |||
| 142 | /* This should be called with the blkg->stats_lock held. */ | ||
| 143 | static void blkio_update_group_wait_time(struct blkio_group_stats *stats) | ||
| 144 | { | ||
| 145 | unsigned long long now; | ||
| 146 | |||
| 147 | if (!blkio_blkg_waiting(stats)) | ||
| 148 | return; | ||
| 149 | |||
| 150 | now = sched_clock(); | ||
| 151 | if (time_after64(now, stats->start_group_wait_time)) | ||
| 152 | stats->group_wait_time += now - stats->start_group_wait_time; | ||
| 153 | blkio_clear_blkg_waiting(stats); | ||
| 154 | } | ||
| 155 | |||
| 156 | /* This should be called with the blkg->stats_lock held. */ | ||
| 157 | static void blkio_end_empty_time(struct blkio_group_stats *stats) | ||
| 158 | { | ||
| 159 | unsigned long long now; | ||
| 160 | |||
| 161 | if (!blkio_blkg_empty(stats)) | ||
| 162 | return; | ||
| 163 | |||
| 164 | now = sched_clock(); | ||
| 165 | if (time_after64(now, stats->start_empty_time)) | ||
| 166 | stats->empty_time += now - stats->start_empty_time; | ||
| 167 | blkio_clear_blkg_empty(stats); | ||
| 168 | } | ||
| 169 | |||
| 170 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | ||
| 171 | { | ||
| 172 | unsigned long flags; | ||
| 173 | |||
| 174 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 175 | BUG_ON(blkio_blkg_idling(&blkg->stats)); | ||
| 176 | blkg->stats.start_idle_time = sched_clock(); | ||
| 177 | blkio_mark_blkg_idling(&blkg->stats); | ||
| 178 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 179 | } | ||
| 180 | EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats); | ||
| 181 | |||
| 182 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg) | ||
| 183 | { | ||
| 184 | unsigned long flags; | ||
| 185 | unsigned long long now; | ||
| 186 | struct blkio_group_stats *stats; | ||
| 187 | |||
| 188 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 189 | stats = &blkg->stats; | ||
| 190 | if (blkio_blkg_idling(stats)) { | ||
| 191 | now = sched_clock(); | ||
| 192 | if (time_after64(now, stats->start_idle_time)) | ||
| 193 | stats->idle_time += now - stats->start_idle_time; | ||
| 194 | blkio_clear_blkg_idling(stats); | ||
| 195 | } | ||
| 196 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 197 | } | ||
| 198 | EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats); | ||
| 199 | |||
| 200 | void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg) | ||
| 201 | { | ||
| 202 | unsigned long flags; | ||
| 203 | struct blkio_group_stats *stats; | ||
| 204 | |||
| 205 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 206 | stats = &blkg->stats; | ||
| 207 | stats->avg_queue_size_sum += | ||
| 208 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + | ||
| 209 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; | ||
| 210 | stats->avg_queue_size_samples++; | ||
| 211 | blkio_update_group_wait_time(stats); | ||
| 212 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 213 | } | ||
| 214 | EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); | ||
| 215 | |||
| 216 | void blkiocg_set_start_empty_time(struct blkio_group *blkg) | ||
| 217 | { | ||
| 218 | unsigned long flags; | ||
| 219 | struct blkio_group_stats *stats; | ||
| 220 | |||
| 221 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 222 | stats = &blkg->stats; | ||
| 223 | |||
| 224 | if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || | ||
| 225 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) { | ||
| 226 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 227 | return; | ||
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * group is already marked empty. This can happen if cfqq got new | ||
| 232 | * request in parent group and moved to this group while being added | ||
| 233 | * to service tree. Just ignore the event and move on. | ||
| 234 | */ | ||
| 235 | if(blkio_blkg_empty(stats)) { | ||
| 236 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | |||
| 240 | stats->start_empty_time = sched_clock(); | ||
| 241 | blkio_mark_blkg_empty(stats); | ||
| 242 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 243 | } | ||
| 244 | EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time); | ||
| 245 | |||
| 246 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, | ||
| 247 | unsigned long dequeue) | ||
| 248 | { | ||
| 249 | blkg->stats.dequeue += dequeue; | ||
| 250 | } | ||
| 251 | EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); | ||
| 252 | #else | ||
| 253 | static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
| 254 | struct blkio_group *curr_blkg) {} | ||
| 255 | static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {} | ||
| 256 | #endif | ||
| 257 | |||
| 258 | void blkiocg_update_io_add_stats(struct blkio_group *blkg, | ||
| 259 | struct blkio_group *curr_blkg, bool direction, | ||
| 260 | bool sync) | ||
| 261 | { | ||
| 262 | unsigned long flags; | ||
| 263 | |||
| 264 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 265 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, | ||
| 266 | sync); | ||
| 267 | blkio_end_empty_time(&blkg->stats); | ||
| 268 | blkio_set_start_group_wait_time(blkg, curr_blkg); | ||
| 269 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 270 | } | ||
| 271 | EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); | ||
| 272 | |||
| 273 | void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | ||
| 274 | bool direction, bool sync) | ||
| 275 | { | ||
| 276 | unsigned long flags; | ||
| 277 | |||
| 278 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 279 | blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], | ||
| 280 | direction, sync); | ||
| 281 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 282 | } | ||
| 283 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); | ||
| 284 | |||
| 285 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | ||
| 286 | { | ||
| 287 | unsigned long flags; | ||
| 288 | |||
| 289 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 290 | blkg->stats.time += time; | ||
| 291 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 292 | } | ||
| 293 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | ||
| 294 | |||
| 295 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | ||
| 296 | uint64_t bytes, bool direction, bool sync) | ||
| 61 | { | 297 | { |
| 62 | blkg->time += time; | 298 | struct blkio_group_stats *stats; |
| 63 | blkg->sectors += sectors; | 299 | unsigned long flags; |
| 300 | |||
| 301 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 302 | stats = &blkg->stats; | ||
| 303 | stats->sectors += bytes >> 9; | ||
| 304 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction, | ||
| 305 | sync); | ||
| 306 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes, | ||
| 307 | direction, sync); | ||
| 308 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 64 | } | 309 | } |
| 65 | EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats); | 310 | EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); |
| 311 | |||
| 312 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | ||
| 313 | uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) | ||
| 314 | { | ||
| 315 | struct blkio_group_stats *stats; | ||
| 316 | unsigned long flags; | ||
| 317 | unsigned long long now = sched_clock(); | ||
| 318 | |||
| 319 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 320 | stats = &blkg->stats; | ||
| 321 | if (time_after64(now, io_start_time)) | ||
| 322 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], | ||
| 323 | now - io_start_time, direction, sync); | ||
| 324 | if (time_after64(io_start_time, start_time)) | ||
| 325 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], | ||
| 326 | io_start_time - start_time, direction, sync); | ||
| 327 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 328 | } | ||
| 329 | EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); | ||
| 330 | |||
| 331 | void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, | ||
| 332 | bool sync) | ||
| 333 | { | ||
| 334 | unsigned long flags; | ||
| 335 | |||
| 336 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
| 337 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction, | ||
| 338 | sync); | ||
| 339 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
| 340 | } | ||
| 341 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); | ||
| 66 | 342 | ||
| 67 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 343 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
| 68 | struct blkio_group *blkg, void *key, dev_t dev) | 344 | struct blkio_group *blkg, void *key, dev_t dev) |
| @@ -70,14 +346,13 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |||
| 70 | unsigned long flags; | 346 | unsigned long flags; |
| 71 | 347 | ||
| 72 | spin_lock_irqsave(&blkcg->lock, flags); | 348 | spin_lock_irqsave(&blkcg->lock, flags); |
| 349 | spin_lock_init(&blkg->stats_lock); | ||
| 73 | rcu_assign_pointer(blkg->key, key); | 350 | rcu_assign_pointer(blkg->key, key); |
| 74 | blkg->blkcg_id = css_id(&blkcg->css); | 351 | blkg->blkcg_id = css_id(&blkcg->css); |
| 75 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 352 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
| 76 | spin_unlock_irqrestore(&blkcg->lock, flags); | 353 | spin_unlock_irqrestore(&blkcg->lock, flags); |
| 77 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 78 | /* Need to take css reference ? */ | 354 | /* Need to take css reference ? */ |
| 79 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | 355 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); |
| 80 | #endif | ||
| 81 | blkg->dev = dev; | 356 | blkg->dev = dev; |
| 82 | } | 357 | } |
| 83 | EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); | 358 | EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); |
| @@ -101,17 +376,16 @@ int blkiocg_del_blkio_group(struct blkio_group *blkg) | |||
| 101 | 376 | ||
| 102 | rcu_read_lock(); | 377 | rcu_read_lock(); |
| 103 | css = css_lookup(&blkio_subsys, blkg->blkcg_id); | 378 | css = css_lookup(&blkio_subsys, blkg->blkcg_id); |
| 104 | if (!css) | 379 | if (css) { |
| 105 | goto out; | 380 | blkcg = container_of(css, struct blkio_cgroup, css); |
| 106 | 381 | spin_lock_irqsave(&blkcg->lock, flags); | |
| 107 | blkcg = container_of(css, struct blkio_cgroup, css); | 382 | if (!hlist_unhashed(&blkg->blkcg_node)) { |
| 108 | spin_lock_irqsave(&blkcg->lock, flags); | 383 | __blkiocg_del_blkio_group(blkg); |
| 109 | if (!hlist_unhashed(&blkg->blkcg_node)) { | 384 | ret = 0; |
| 110 | __blkiocg_del_blkio_group(blkg); | 385 | } |
| 111 | ret = 0; | 386 | spin_unlock_irqrestore(&blkcg->lock, flags); |
| 112 | } | 387 | } |
| 113 | spin_unlock_irqrestore(&blkcg->lock, flags); | 388 | |
| 114 | out: | ||
| 115 | rcu_read_unlock(); | 389 | rcu_read_unlock(); |
| 116 | return ret; | 390 | return ret; |
| 117 | } | 391 | } |
| @@ -154,6 +428,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
| 154 | struct blkio_group *blkg; | 428 | struct blkio_group *blkg; |
| 155 | struct hlist_node *n; | 429 | struct hlist_node *n; |
| 156 | struct blkio_policy_type *blkiop; | 430 | struct blkio_policy_type *blkiop; |
| 431 | struct blkio_policy_node *pn; | ||
| 157 | 432 | ||
| 158 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) | 433 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) |
| 159 | return -EINVAL; | 434 | return -EINVAL; |
| @@ -162,7 +437,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
| 162 | spin_lock(&blkio_list_lock); | 437 | spin_lock(&blkio_list_lock); |
| 163 | spin_lock_irq(&blkcg->lock); | 438 | spin_lock_irq(&blkcg->lock); |
| 164 | blkcg->weight = (unsigned int)val; | 439 | blkcg->weight = (unsigned int)val; |
| 440 | |||
| 165 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 441 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { |
| 442 | pn = blkio_policy_search_node(blkcg, blkg->dev); | ||
| 443 | |||
| 444 | if (pn) | ||
| 445 | continue; | ||
| 446 | |||
| 166 | list_for_each_entry(blkiop, &blkio_list, list) | 447 | list_for_each_entry(blkiop, &blkio_list, list) |
| 167 | blkiop->ops.blkio_update_group_weight_fn(blkg, | 448 | blkiop->ops.blkio_update_group_weight_fn(blkg, |
| 168 | blkcg->weight); | 449 | blkcg->weight); |
| @@ -172,13 +453,154 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
| 172 | return 0; | 453 | return 0; |
| 173 | } | 454 | } |
| 174 | 455 | ||
| 175 | #define SHOW_FUNCTION_PER_GROUP(__VAR) \ | 456 | static int |
| 457 | blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | ||
| 458 | { | ||
| 459 | struct blkio_cgroup *blkcg; | ||
| 460 | struct blkio_group *blkg; | ||
| 461 | struct blkio_group_stats *stats; | ||
| 462 | struct hlist_node *n; | ||
| 463 | uint64_t queued[BLKIO_STAT_TOTAL]; | ||
| 464 | int i; | ||
| 465 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 466 | bool idling, waiting, empty; | ||
| 467 | unsigned long long now = sched_clock(); | ||
| 468 | #endif | ||
| 469 | |||
| 470 | blkcg = cgroup_to_blkio_cgroup(cgroup); | ||
| 471 | spin_lock_irq(&blkcg->lock); | ||
| 472 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
| 473 | spin_lock(&blkg->stats_lock); | ||
| 474 | stats = &blkg->stats; | ||
| 475 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 476 | idling = blkio_blkg_idling(stats); | ||
| 477 | waiting = blkio_blkg_waiting(stats); | ||
| 478 | empty = blkio_blkg_empty(stats); | ||
| 479 | #endif | ||
| 480 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | ||
| 481 | queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; | ||
| 482 | memset(stats, 0, sizeof(struct blkio_group_stats)); | ||
| 483 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | ||
| 484 | stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; | ||
| 485 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 486 | if (idling) { | ||
| 487 | blkio_mark_blkg_idling(stats); | ||
| 488 | stats->start_idle_time = now; | ||
| 489 | } | ||
| 490 | if (waiting) { | ||
| 491 | blkio_mark_blkg_waiting(stats); | ||
| 492 | stats->start_group_wait_time = now; | ||
| 493 | } | ||
| 494 | if (empty) { | ||
| 495 | blkio_mark_blkg_empty(stats); | ||
| 496 | stats->start_empty_time = now; | ||
| 497 | } | ||
| 498 | #endif | ||
| 499 | spin_unlock(&blkg->stats_lock); | ||
| 500 | } | ||
| 501 | spin_unlock_irq(&blkcg->lock); | ||
| 502 | return 0; | ||
| 503 | } | ||
| 504 | |||
| 505 | static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str, | ||
| 506 | int chars_left, bool diskname_only) | ||
| 507 | { | ||
| 508 | snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev)); | ||
| 509 | chars_left -= strlen(str); | ||
| 510 | if (chars_left <= 0) { | ||
| 511 | printk(KERN_WARNING | ||
| 512 | "Possibly incorrect cgroup stat display format"); | ||
| 513 | return; | ||
| 514 | } | ||
| 515 | if (diskname_only) | ||
| 516 | return; | ||
| 517 | switch (type) { | ||
| 518 | case BLKIO_STAT_READ: | ||
| 519 | strlcat(str, " Read", chars_left); | ||
| 520 | break; | ||
| 521 | case BLKIO_STAT_WRITE: | ||
| 522 | strlcat(str, " Write", chars_left); | ||
| 523 | break; | ||
| 524 | case BLKIO_STAT_SYNC: | ||
| 525 | strlcat(str, " Sync", chars_left); | ||
| 526 | break; | ||
| 527 | case BLKIO_STAT_ASYNC: | ||
| 528 | strlcat(str, " Async", chars_left); | ||
| 529 | break; | ||
| 530 | case BLKIO_STAT_TOTAL: | ||
| 531 | strlcat(str, " Total", chars_left); | ||
| 532 | break; | ||
| 533 | default: | ||
| 534 | strlcat(str, " Invalid", chars_left); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val, | ||
| 539 | struct cgroup_map_cb *cb, dev_t dev) | ||
| 540 | { | ||
| 541 | blkio_get_key_name(0, dev, str, chars_left, true); | ||
| 542 | cb->fill(cb, str, val); | ||
| 543 | return val; | ||
| 544 | } | ||
| 545 | |||
| 546 | /* This should be called with blkg->stats_lock held */ | ||
| 547 | static uint64_t blkio_get_stat(struct blkio_group *blkg, | ||
| 548 | struct cgroup_map_cb *cb, dev_t dev, enum stat_type type) | ||
| 549 | { | ||
| 550 | uint64_t disk_total; | ||
| 551 | char key_str[MAX_KEY_LEN]; | ||
| 552 | enum stat_sub_type sub_type; | ||
| 553 | |||
| 554 | if (type == BLKIO_STAT_TIME) | ||
| 555 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
| 556 | blkg->stats.time, cb, dev); | ||
| 557 | if (type == BLKIO_STAT_SECTORS) | ||
| 558 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
| 559 | blkg->stats.sectors, cb, dev); | ||
| 560 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 561 | if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { | ||
| 562 | uint64_t sum = blkg->stats.avg_queue_size_sum; | ||
| 563 | uint64_t samples = blkg->stats.avg_queue_size_samples; | ||
| 564 | if (samples) | ||
| 565 | do_div(sum, samples); | ||
| 566 | else | ||
| 567 | sum = 0; | ||
| 568 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); | ||
| 569 | } | ||
| 570 | if (type == BLKIO_STAT_GROUP_WAIT_TIME) | ||
| 571 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
| 572 | blkg->stats.group_wait_time, cb, dev); | ||
| 573 | if (type == BLKIO_STAT_IDLE_TIME) | ||
| 574 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
| 575 | blkg->stats.idle_time, cb, dev); | ||
| 576 | if (type == BLKIO_STAT_EMPTY_TIME) | ||
| 577 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
| 578 | blkg->stats.empty_time, cb, dev); | ||
| 579 | if (type == BLKIO_STAT_DEQUEUE) | ||
| 580 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
| 581 | blkg->stats.dequeue, cb, dev); | ||
| 582 | #endif | ||
| 583 | |||
| 584 | for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; | ||
| 585 | sub_type++) { | ||
| 586 | blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false); | ||
| 587 | cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]); | ||
| 588 | } | ||
| 589 | disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] + | ||
| 590 | blkg->stats.stat_arr[type][BLKIO_STAT_WRITE]; | ||
| 591 | blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false); | ||
| 592 | cb->fill(cb, key_str, disk_total); | ||
| 593 | return disk_total; | ||
| 594 | } | ||
| 595 | |||
| 596 | #define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \ | ||
| 176 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | 597 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ |
| 177 | struct cftype *cftype, struct seq_file *m) \ | 598 | struct cftype *cftype, struct cgroup_map_cb *cb) \ |
| 178 | { \ | 599 | { \ |
| 179 | struct blkio_cgroup *blkcg; \ | 600 | struct blkio_cgroup *blkcg; \ |
| 180 | struct blkio_group *blkg; \ | 601 | struct blkio_group *blkg; \ |
| 181 | struct hlist_node *n; \ | 602 | struct hlist_node *n; \ |
| 603 | uint64_t cgroup_total = 0; \ | ||
| 182 | \ | 604 | \ |
| 183 | if (!cgroup_lock_live_group(cgroup)) \ | 605 | if (!cgroup_lock_live_group(cgroup)) \ |
| 184 | return -ENODEV; \ | 606 | return -ENODEV; \ |
| @@ -186,50 +608,293 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | |||
| 186 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ | 608 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ |
| 187 | rcu_read_lock(); \ | 609 | rcu_read_lock(); \ |
| 188 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ | 610 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ |
| 189 | if (blkg->dev) \ | 611 | if (blkg->dev) { \ |
| 190 | seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \ | 612 | spin_lock_irq(&blkg->stats_lock); \ |
| 191 | MINOR(blkg->dev), blkg->__VAR); \ | 613 | cgroup_total += blkio_get_stat(blkg, cb, \ |
| 614 | blkg->dev, type); \ | ||
| 615 | spin_unlock_irq(&blkg->stats_lock); \ | ||
| 616 | } \ | ||
| 192 | } \ | 617 | } \ |
| 618 | if (show_total) \ | ||
| 619 | cb->fill(cb, "Total", cgroup_total); \ | ||
| 193 | rcu_read_unlock(); \ | 620 | rcu_read_unlock(); \ |
| 194 | cgroup_unlock(); \ | 621 | cgroup_unlock(); \ |
| 195 | return 0; \ | 622 | return 0; \ |
| 196 | } | 623 | } |
| 197 | 624 | ||
| 198 | SHOW_FUNCTION_PER_GROUP(time); | 625 | SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0); |
| 199 | SHOW_FUNCTION_PER_GROUP(sectors); | 626 | SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0); |
| 627 | SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1); | ||
| 628 | SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1); | ||
| 629 | SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1); | ||
| 630 | SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1); | ||
| 631 | SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1); | ||
| 632 | SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1); | ||
| 200 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 633 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
| 201 | SHOW_FUNCTION_PER_GROUP(dequeue); | 634 | SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); |
| 635 | SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0); | ||
| 636 | SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0); | ||
| 637 | SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0); | ||
| 638 | SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0); | ||
| 202 | #endif | 639 | #endif |
| 203 | #undef SHOW_FUNCTION_PER_GROUP | 640 | #undef SHOW_FUNCTION_PER_GROUP |
| 204 | 641 | ||
| 205 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 642 | static int blkio_check_dev_num(dev_t dev) |
| 206 | void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, | ||
| 207 | unsigned long dequeue) | ||
| 208 | { | 643 | { |
| 209 | blkg->dequeue += dequeue; | 644 | int part = 0; |
| 645 | struct gendisk *disk; | ||
| 646 | |||
| 647 | disk = get_gendisk(dev, &part); | ||
| 648 | if (!disk || part) | ||
| 649 | return -ENODEV; | ||
| 650 | |||
| 651 | return 0; | ||
| 652 | } | ||
| 653 | |||
| 654 | static int blkio_policy_parse_and_set(char *buf, | ||
| 655 | struct blkio_policy_node *newpn) | ||
| 656 | { | ||
| 657 | char *s[4], *p, *major_s = NULL, *minor_s = NULL; | ||
| 658 | int ret; | ||
| 659 | unsigned long major, minor, temp; | ||
| 660 | int i = 0; | ||
| 661 | dev_t dev; | ||
| 662 | |||
| 663 | memset(s, 0, sizeof(s)); | ||
| 664 | |||
| 665 | while ((p = strsep(&buf, " ")) != NULL) { | ||
| 666 | if (!*p) | ||
| 667 | continue; | ||
| 668 | |||
| 669 | s[i++] = p; | ||
| 670 | |||
| 671 | /* Prevent from inputing too many things */ | ||
| 672 | if (i == 3) | ||
| 673 | break; | ||
| 674 | } | ||
| 675 | |||
| 676 | if (i != 2) | ||
| 677 | return -EINVAL; | ||
| 678 | |||
| 679 | p = strsep(&s[0], ":"); | ||
| 680 | if (p != NULL) | ||
| 681 | major_s = p; | ||
| 682 | else | ||
| 683 | return -EINVAL; | ||
| 684 | |||
| 685 | minor_s = s[0]; | ||
| 686 | if (!minor_s) | ||
| 687 | return -EINVAL; | ||
| 688 | |||
| 689 | ret = strict_strtoul(major_s, 10, &major); | ||
| 690 | if (ret) | ||
| 691 | return -EINVAL; | ||
| 692 | |||
| 693 | ret = strict_strtoul(minor_s, 10, &minor); | ||
| 694 | if (ret) | ||
| 695 | return -EINVAL; | ||
| 696 | |||
| 697 | dev = MKDEV(major, minor); | ||
| 698 | |||
| 699 | ret = blkio_check_dev_num(dev); | ||
| 700 | if (ret) | ||
| 701 | return ret; | ||
| 702 | |||
| 703 | newpn->dev = dev; | ||
| 704 | |||
| 705 | if (s[1] == NULL) | ||
| 706 | return -EINVAL; | ||
| 707 | |||
| 708 | ret = strict_strtoul(s[1], 10, &temp); | ||
| 709 | if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || | ||
| 710 | temp > BLKIO_WEIGHT_MAX) | ||
| 711 | return -EINVAL; | ||
| 712 | |||
| 713 | newpn->weight = temp; | ||
| 714 | |||
| 715 | return 0; | ||
| 716 | } | ||
| 717 | |||
| 718 | unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, | ||
| 719 | dev_t dev) | ||
| 720 | { | ||
| 721 | struct blkio_policy_node *pn; | ||
| 722 | |||
| 723 | pn = blkio_policy_search_node(blkcg, dev); | ||
| 724 | if (pn) | ||
| 725 | return pn->weight; | ||
| 726 | else | ||
| 727 | return blkcg->weight; | ||
| 728 | } | ||
| 729 | EXPORT_SYMBOL_GPL(blkcg_get_weight); | ||
| 730 | |||
| 731 | |||
| 732 | static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, | ||
| 733 | const char *buffer) | ||
| 734 | { | ||
| 735 | int ret = 0; | ||
| 736 | char *buf; | ||
| 737 | struct blkio_policy_node *newpn, *pn; | ||
| 738 | struct blkio_cgroup *blkcg; | ||
| 739 | struct blkio_group *blkg; | ||
| 740 | int keep_newpn = 0; | ||
| 741 | struct hlist_node *n; | ||
| 742 | struct blkio_policy_type *blkiop; | ||
| 743 | |||
| 744 | buf = kstrdup(buffer, GFP_KERNEL); | ||
| 745 | if (!buf) | ||
| 746 | return -ENOMEM; | ||
| 747 | |||
| 748 | newpn = kzalloc(sizeof(*newpn), GFP_KERNEL); | ||
| 749 | if (!newpn) { | ||
| 750 | ret = -ENOMEM; | ||
| 751 | goto free_buf; | ||
| 752 | } | ||
| 753 | |||
| 754 | ret = blkio_policy_parse_and_set(buf, newpn); | ||
| 755 | if (ret) | ||
| 756 | goto free_newpn; | ||
| 757 | |||
| 758 | blkcg = cgroup_to_blkio_cgroup(cgrp); | ||
| 759 | |||
| 760 | spin_lock_irq(&blkcg->lock); | ||
| 761 | |||
| 762 | pn = blkio_policy_search_node(blkcg, newpn->dev); | ||
| 763 | if (!pn) { | ||
| 764 | if (newpn->weight != 0) { | ||
| 765 | blkio_policy_insert_node(blkcg, newpn); | ||
| 766 | keep_newpn = 1; | ||
| 767 | } | ||
| 768 | spin_unlock_irq(&blkcg->lock); | ||
| 769 | goto update_io_group; | ||
| 770 | } | ||
| 771 | |||
| 772 | if (newpn->weight == 0) { | ||
| 773 | /* weight == 0 means deleteing a specific weight */ | ||
| 774 | blkio_policy_delete_node(pn); | ||
| 775 | spin_unlock_irq(&blkcg->lock); | ||
| 776 | goto update_io_group; | ||
| 777 | } | ||
| 778 | spin_unlock_irq(&blkcg->lock); | ||
| 779 | |||
| 780 | pn->weight = newpn->weight; | ||
| 781 | |||
| 782 | update_io_group: | ||
| 783 | /* update weight for each cfqg */ | ||
| 784 | spin_lock(&blkio_list_lock); | ||
| 785 | spin_lock_irq(&blkcg->lock); | ||
| 786 | |||
| 787 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
| 788 | if (newpn->dev == blkg->dev) { | ||
| 789 | list_for_each_entry(blkiop, &blkio_list, list) | ||
| 790 | blkiop->ops.blkio_update_group_weight_fn(blkg, | ||
| 791 | newpn->weight ? | ||
| 792 | newpn->weight : | ||
| 793 | blkcg->weight); | ||
| 794 | } | ||
| 795 | } | ||
| 796 | |||
| 797 | spin_unlock_irq(&blkcg->lock); | ||
| 798 | spin_unlock(&blkio_list_lock); | ||
| 799 | |||
| 800 | free_newpn: | ||
| 801 | if (!keep_newpn) | ||
| 802 | kfree(newpn); | ||
| 803 | free_buf: | ||
| 804 | kfree(buf); | ||
| 805 | return ret; | ||
| 806 | } | ||
| 807 | |||
| 808 | static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft, | ||
| 809 | struct seq_file *m) | ||
| 810 | { | ||
| 811 | struct blkio_cgroup *blkcg; | ||
| 812 | struct blkio_policy_node *pn; | ||
| 813 | |||
| 814 | seq_printf(m, "dev\tweight\n"); | ||
| 815 | |||
| 816 | blkcg = cgroup_to_blkio_cgroup(cgrp); | ||
| 817 | if (!list_empty(&blkcg->policy_list)) { | ||
| 818 | spin_lock_irq(&blkcg->lock); | ||
| 819 | list_for_each_entry(pn, &blkcg->policy_list, node) { | ||
| 820 | seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), | ||
| 821 | MINOR(pn->dev), pn->weight); | ||
| 822 | } | ||
| 823 | spin_unlock_irq(&blkcg->lock); | ||
| 824 | } | ||
| 825 | |||
| 826 | return 0; | ||
| 210 | } | 827 | } |
| 211 | EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats); | ||
| 212 | #endif | ||
| 213 | 828 | ||
| 214 | struct cftype blkio_files[] = { | 829 | struct cftype blkio_files[] = { |
| 215 | { | 830 | { |
| 831 | .name = "weight_device", | ||
| 832 | .read_seq_string = blkiocg_weight_device_read, | ||
| 833 | .write_string = blkiocg_weight_device_write, | ||
| 834 | .max_write_len = 256, | ||
| 835 | }, | ||
| 836 | { | ||
| 216 | .name = "weight", | 837 | .name = "weight", |
| 217 | .read_u64 = blkiocg_weight_read, | 838 | .read_u64 = blkiocg_weight_read, |
| 218 | .write_u64 = blkiocg_weight_write, | 839 | .write_u64 = blkiocg_weight_write, |
| 219 | }, | 840 | }, |
| 220 | { | 841 | { |
| 221 | .name = "time", | 842 | .name = "time", |
| 222 | .read_seq_string = blkiocg_time_read, | 843 | .read_map = blkiocg_time_read, |
| 223 | }, | 844 | }, |
| 224 | { | 845 | { |
| 225 | .name = "sectors", | 846 | .name = "sectors", |
| 226 | .read_seq_string = blkiocg_sectors_read, | 847 | .read_map = blkiocg_sectors_read, |
| 848 | }, | ||
| 849 | { | ||
| 850 | .name = "io_service_bytes", | ||
| 851 | .read_map = blkiocg_io_service_bytes_read, | ||
| 852 | }, | ||
| 853 | { | ||
| 854 | .name = "io_serviced", | ||
| 855 | .read_map = blkiocg_io_serviced_read, | ||
| 856 | }, | ||
| 857 | { | ||
| 858 | .name = "io_service_time", | ||
| 859 | .read_map = blkiocg_io_service_time_read, | ||
| 860 | }, | ||
| 861 | { | ||
| 862 | .name = "io_wait_time", | ||
| 863 | .read_map = blkiocg_io_wait_time_read, | ||
| 864 | }, | ||
| 865 | { | ||
| 866 | .name = "io_merged", | ||
| 867 | .read_map = blkiocg_io_merged_read, | ||
| 868 | }, | ||
| 869 | { | ||
| 870 | .name = "io_queued", | ||
| 871 | .read_map = blkiocg_io_queued_read, | ||
| 872 | }, | ||
| 873 | { | ||
| 874 | .name = "reset_stats", | ||
| 875 | .write_u64 = blkiocg_reset_stats, | ||
| 227 | }, | 876 | }, |
| 228 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 877 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
| 229 | { | 878 | { |
| 879 | .name = "avg_queue_size", | ||
| 880 | .read_map = blkiocg_avg_queue_size_read, | ||
| 881 | }, | ||
| 882 | { | ||
| 883 | .name = "group_wait_time", | ||
| 884 | .read_map = blkiocg_group_wait_time_read, | ||
| 885 | }, | ||
| 886 | { | ||
| 887 | .name = "idle_time", | ||
| 888 | .read_map = blkiocg_idle_time_read, | ||
| 889 | }, | ||
| 890 | { | ||
| 891 | .name = "empty_time", | ||
| 892 | .read_map = blkiocg_empty_time_read, | ||
| 893 | }, | ||
| 894 | { | ||
| 230 | .name = "dequeue", | 895 | .name = "dequeue", |
| 231 | .read_seq_string = blkiocg_dequeue_read, | 896 | .read_map = blkiocg_dequeue_read, |
| 232 | }, | 897 | }, |
| 233 | #endif | 898 | #endif |
| 234 | }; | 899 | }; |
| 235 | 900 | ||
| @@ -246,37 +911,42 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |||
| 246 | struct blkio_group *blkg; | 911 | struct blkio_group *blkg; |
| 247 | void *key; | 912 | void *key; |
| 248 | struct blkio_policy_type *blkiop; | 913 | struct blkio_policy_type *blkiop; |
| 914 | struct blkio_policy_node *pn, *pntmp; | ||
| 249 | 915 | ||
| 250 | rcu_read_lock(); | 916 | rcu_read_lock(); |
| 251 | remove_entry: | 917 | do { |
| 252 | spin_lock_irqsave(&blkcg->lock, flags); | 918 | spin_lock_irqsave(&blkcg->lock, flags); |
| 919 | |||
| 920 | if (hlist_empty(&blkcg->blkg_list)) { | ||
| 921 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
| 922 | break; | ||
| 923 | } | ||
| 924 | |||
| 925 | blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, | ||
| 926 | blkcg_node); | ||
| 927 | key = rcu_dereference(blkg->key); | ||
| 928 | __blkiocg_del_blkio_group(blkg); | ||
| 253 | 929 | ||
| 254 | if (hlist_empty(&blkcg->blkg_list)) { | ||
| 255 | spin_unlock_irqrestore(&blkcg->lock, flags); | 930 | spin_unlock_irqrestore(&blkcg->lock, flags); |
| 256 | goto done; | ||
| 257 | } | ||
| 258 | 931 | ||
| 259 | blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, | 932 | /* |
| 260 | blkcg_node); | 933 | * This blkio_group is being unlinked as associated cgroup is |
| 261 | key = rcu_dereference(blkg->key); | 934 | * going away. Let all the IO controlling policies know about |
| 262 | __blkiocg_del_blkio_group(blkg); | 935 | * this event. Currently this is static call to one io |
| 936 | * controlling policy. Once we have more policies in place, we | ||
| 937 | * need some dynamic registration of callback function. | ||
| 938 | */ | ||
| 939 | spin_lock(&blkio_list_lock); | ||
| 940 | list_for_each_entry(blkiop, &blkio_list, list) | ||
| 941 | blkiop->ops.blkio_unlink_group_fn(key, blkg); | ||
| 942 | spin_unlock(&blkio_list_lock); | ||
| 943 | } while (1); | ||
| 263 | 944 | ||
| 264 | spin_unlock_irqrestore(&blkcg->lock, flags); | 945 | list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) { |
| 946 | blkio_policy_delete_node(pn); | ||
| 947 | kfree(pn); | ||
| 948 | } | ||
| 265 | 949 | ||
| 266 | /* | ||
| 267 | * This blkio_group is being unlinked as associated cgroup is going | ||
| 268 | * away. Let all the IO controlling policies know about this event. | ||
| 269 | * | ||
| 270 | * Currently this is static call to one io controlling policy. Once | ||
| 271 | * we have more policies in place, we need some dynamic registration | ||
| 272 | * of callback function. | ||
| 273 | */ | ||
| 274 | spin_lock(&blkio_list_lock); | ||
| 275 | list_for_each_entry(blkiop, &blkio_list, list) | ||
| 276 | blkiop->ops.blkio_unlink_group_fn(key, blkg); | ||
| 277 | spin_unlock(&blkio_list_lock); | ||
| 278 | goto remove_entry; | ||
| 279 | done: | ||
| 280 | free_css_id(&blkio_subsys, &blkcg->css); | 950 | free_css_id(&blkio_subsys, &blkcg->css); |
| 281 | rcu_read_unlock(); | 951 | rcu_read_unlock(); |
| 282 | if (blkcg != &blkio_root_cgroup) | 952 | if (blkcg != &blkio_root_cgroup) |
| @@ -307,6 +977,7 @@ done: | |||
| 307 | spin_lock_init(&blkcg->lock); | 977 | spin_lock_init(&blkcg->lock); |
| 308 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 978 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
| 309 | 979 | ||
| 980 | INIT_LIST_HEAD(&blkcg->policy_list); | ||
| 310 | return &blkcg->css; | 981 | return &blkcg->css; |
| 311 | } | 982 | } |
| 312 | 983 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 8ccc20464dae..2b866ec1dcea 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
| @@ -23,11 +23,84 @@ extern struct cgroup_subsys blkio_subsys; | |||
| 23 | #define blkio_subsys_id blkio_subsys.subsys_id | 23 | #define blkio_subsys_id blkio_subsys.subsys_id |
| 24 | #endif | 24 | #endif |
| 25 | 25 | ||
| 26 | enum stat_type { | ||
| 27 | /* Total time spent (in ns) between request dispatch to the driver and | ||
| 28 | * request completion for IOs doen by this cgroup. This may not be | ||
| 29 | * accurate when NCQ is turned on. */ | ||
| 30 | BLKIO_STAT_SERVICE_TIME = 0, | ||
| 31 | /* Total bytes transferred */ | ||
| 32 | BLKIO_STAT_SERVICE_BYTES, | ||
| 33 | /* Total IOs serviced, post merge */ | ||
| 34 | BLKIO_STAT_SERVICED, | ||
| 35 | /* Total time spent waiting in scheduler queue in ns */ | ||
| 36 | BLKIO_STAT_WAIT_TIME, | ||
| 37 | /* Number of IOs merged */ | ||
| 38 | BLKIO_STAT_MERGED, | ||
| 39 | /* Number of IOs queued up */ | ||
| 40 | BLKIO_STAT_QUEUED, | ||
| 41 | /* All the single valued stats go below this */ | ||
| 42 | BLKIO_STAT_TIME, | ||
| 43 | BLKIO_STAT_SECTORS, | ||
| 44 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 45 | BLKIO_STAT_AVG_QUEUE_SIZE, | ||
| 46 | BLKIO_STAT_IDLE_TIME, | ||
| 47 | BLKIO_STAT_EMPTY_TIME, | ||
| 48 | BLKIO_STAT_GROUP_WAIT_TIME, | ||
| 49 | BLKIO_STAT_DEQUEUE | ||
| 50 | #endif | ||
| 51 | }; | ||
| 52 | |||
| 53 | enum stat_sub_type { | ||
| 54 | BLKIO_STAT_READ = 0, | ||
| 55 | BLKIO_STAT_WRITE, | ||
| 56 | BLKIO_STAT_SYNC, | ||
| 57 | BLKIO_STAT_ASYNC, | ||
| 58 | BLKIO_STAT_TOTAL | ||
| 59 | }; | ||
| 60 | |||
| 61 | /* blkg state flags */ | ||
| 62 | enum blkg_state_flags { | ||
| 63 | BLKG_waiting = 0, | ||
| 64 | BLKG_idling, | ||
| 65 | BLKG_empty, | ||
| 66 | }; | ||
| 67 | |||
| 26 | struct blkio_cgroup { | 68 | struct blkio_cgroup { |
| 27 | struct cgroup_subsys_state css; | 69 | struct cgroup_subsys_state css; |
| 28 | unsigned int weight; | 70 | unsigned int weight; |
| 29 | spinlock_t lock; | 71 | spinlock_t lock; |
| 30 | struct hlist_head blkg_list; | 72 | struct hlist_head blkg_list; |
| 73 | struct list_head policy_list; /* list of blkio_policy_node */ | ||
| 74 | }; | ||
| 75 | |||
| 76 | struct blkio_group_stats { | ||
| 77 | /* total disk time and nr sectors dispatched by this group */ | ||
| 78 | uint64_t time; | ||
| 79 | uint64_t sectors; | ||
| 80 | uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL]; | ||
| 81 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 82 | /* Sum of number of IOs queued across all samples */ | ||
| 83 | uint64_t avg_queue_size_sum; | ||
| 84 | /* Count of samples taken for average */ | ||
| 85 | uint64_t avg_queue_size_samples; | ||
| 86 | /* How many times this group has been removed from service tree */ | ||
| 87 | unsigned long dequeue; | ||
| 88 | |||
| 89 | /* Total time spent waiting for it to be assigned a timeslice. */ | ||
| 90 | uint64_t group_wait_time; | ||
| 91 | uint64_t start_group_wait_time; | ||
| 92 | |||
| 93 | /* Time spent idling for this blkio_group */ | ||
| 94 | uint64_t idle_time; | ||
| 95 | uint64_t start_idle_time; | ||
| 96 | /* | ||
| 97 | * Total time when we have requests queued and do not contain the | ||
| 98 | * current active queue. | ||
| 99 | */ | ||
| 100 | uint64_t empty_time; | ||
| 101 | uint64_t start_empty_time; | ||
| 102 | uint16_t flags; | ||
| 103 | #endif | ||
| 31 | }; | 104 | }; |
| 32 | 105 | ||
| 33 | struct blkio_group { | 106 | struct blkio_group { |
| @@ -35,20 +108,25 @@ struct blkio_group { | |||
| 35 | void *key; | 108 | void *key; |
| 36 | struct hlist_node blkcg_node; | 109 | struct hlist_node blkcg_node; |
| 37 | unsigned short blkcg_id; | 110 | unsigned short blkcg_id; |
| 38 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
| 39 | /* Store cgroup path */ | 111 | /* Store cgroup path */ |
| 40 | char path[128]; | 112 | char path[128]; |
| 41 | /* How many times this group has been removed from service tree */ | ||
| 42 | unsigned long dequeue; | ||
| 43 | #endif | ||
| 44 | /* The device MKDEV(major, minor), this group has been created for */ | 113 | /* The device MKDEV(major, minor), this group has been created for */ |
| 45 | dev_t dev; | 114 | dev_t dev; |
| 46 | 115 | ||
| 47 | /* total disk time and nr sectors dispatched by this group */ | 116 | /* Need to serialize the stats in the case of reset/update */ |
| 48 | unsigned long time; | 117 | spinlock_t stats_lock; |
| 49 | unsigned long sectors; | 118 | struct blkio_group_stats stats; |
| 50 | }; | 119 | }; |
| 51 | 120 | ||
| 121 | struct blkio_policy_node { | ||
| 122 | struct list_head node; | ||
| 123 | dev_t dev; | ||
| 124 | unsigned int weight; | ||
| 125 | }; | ||
| 126 | |||
| 127 | extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, | ||
| 128 | dev_t dev); | ||
| 129 | |||
| 52 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); | 130 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); |
| 53 | typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, | 131 | typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, |
| 54 | unsigned int weight); | 132 | unsigned int weight); |
| @@ -67,6 +145,11 @@ struct blkio_policy_type { | |||
| 67 | extern void blkio_policy_register(struct blkio_policy_type *); | 145 | extern void blkio_policy_register(struct blkio_policy_type *); |
| 68 | extern void blkio_policy_unregister(struct blkio_policy_type *); | 146 | extern void blkio_policy_unregister(struct blkio_policy_type *); |
| 69 | 147 | ||
| 148 | static inline char *blkg_path(struct blkio_group *blkg) | ||
| 149 | { | ||
| 150 | return blkg->path; | ||
| 151 | } | ||
| 152 | |||
| 70 | #else | 153 | #else |
| 71 | 154 | ||
| 72 | struct blkio_group { | 155 | struct blkio_group { |
| @@ -78,6 +161,8 @@ struct blkio_policy_type { | |||
| 78 | static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } | 161 | static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } |
| 79 | static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } | 162 | static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } |
| 80 | 163 | ||
| 164 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | ||
| 165 | |||
| 81 | #endif | 166 | #endif |
| 82 | 167 | ||
| 83 | #define BLKIO_WEIGHT_MIN 100 | 168 | #define BLKIO_WEIGHT_MIN 100 |
| @@ -85,16 +170,42 @@ static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } | |||
| 85 | #define BLKIO_WEIGHT_DEFAULT 500 | 170 | #define BLKIO_WEIGHT_DEFAULT 500 |
| 86 | 171 | ||
| 87 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 172 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
| 88 | static inline char *blkg_path(struct blkio_group *blkg) | 173 | void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg); |
| 89 | { | 174 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, |
| 90 | return blkg->path; | ||
| 91 | } | ||
| 92 | void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, | ||
| 93 | unsigned long dequeue); | 175 | unsigned long dequeue); |
| 176 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg); | ||
| 177 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg); | ||
| 178 | void blkiocg_set_start_empty_time(struct blkio_group *blkg); | ||
| 179 | |||
| 180 | #define BLKG_FLAG_FNS(name) \ | ||
| 181 | static inline void blkio_mark_blkg_##name( \ | ||
| 182 | struct blkio_group_stats *stats) \ | ||
| 183 | { \ | ||
| 184 | stats->flags |= (1 << BLKG_##name); \ | ||
| 185 | } \ | ||
| 186 | static inline void blkio_clear_blkg_##name( \ | ||
| 187 | struct blkio_group_stats *stats) \ | ||
| 188 | { \ | ||
| 189 | stats->flags &= ~(1 << BLKG_##name); \ | ||
| 190 | } \ | ||
| 191 | static inline int blkio_blkg_##name(struct blkio_group_stats *stats) \ | ||
| 192 | { \ | ||
| 193 | return (stats->flags & (1 << BLKG_##name)) != 0; \ | ||
| 194 | } \ | ||
| 195 | |||
| 196 | BLKG_FLAG_FNS(waiting) | ||
| 197 | BLKG_FLAG_FNS(idling) | ||
| 198 | BLKG_FLAG_FNS(empty) | ||
| 199 | #undef BLKG_FLAG_FNS | ||
| 94 | #else | 200 | #else |
| 95 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | 201 | static inline void blkiocg_update_avg_queue_size_stats( |
| 96 | static inline void blkiocg_update_blkio_group_dequeue_stats( | 202 | struct blkio_group *blkg) {} |
| 97 | struct blkio_group *blkg, unsigned long dequeue) {} | 203 | static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg, |
| 204 | unsigned long dequeue) {} | ||
| 205 | static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | ||
| 206 | {} | ||
| 207 | static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {} | ||
| 208 | static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {} | ||
| 98 | #endif | 209 | #endif |
| 99 | 210 | ||
| 100 | #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) | 211 | #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) |
| @@ -105,26 +216,43 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |||
| 105 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | 216 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); |
| 106 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | 217 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, |
| 107 | void *key); | 218 | void *key); |
| 108 | void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, | 219 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
| 109 | unsigned long time, unsigned long sectors); | 220 | unsigned long time); |
| 221 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes, | ||
| 222 | bool direction, bool sync); | ||
| 223 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | ||
| 224 | uint64_t start_time, uint64_t io_start_time, bool direction, bool sync); | ||
| 225 | void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, | ||
| 226 | bool sync); | ||
| 227 | void blkiocg_update_io_add_stats(struct blkio_group *blkg, | ||
| 228 | struct blkio_group *curr_blkg, bool direction, bool sync); | ||
| 229 | void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | ||
| 230 | bool direction, bool sync); | ||
| 110 | #else | 231 | #else |
| 111 | struct cgroup; | 232 | struct cgroup; |
| 112 | static inline struct blkio_cgroup * | 233 | static inline struct blkio_cgroup * |
| 113 | cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } | 234 | cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } |
| 114 | 235 | ||
| 115 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 236 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
| 116 | struct blkio_group *blkg, void *key, dev_t dev) | 237 | struct blkio_group *blkg, void *key, dev_t dev) {} |
| 117 | { | ||
| 118 | } | ||
| 119 | 238 | ||
| 120 | static inline int | 239 | static inline int |
| 121 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | 240 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } |
| 122 | 241 | ||
| 123 | static inline struct blkio_group * | 242 | static inline struct blkio_group * |
| 124 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | 243 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } |
| 125 | static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, | 244 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
| 126 | unsigned long time, unsigned long sectors) | 245 | unsigned long time) {} |
| 127 | { | 246 | static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg, |
| 128 | } | 247 | uint64_t bytes, bool direction, bool sync) {} |
| 248 | static inline void blkiocg_update_completion_stats(struct blkio_group *blkg, | ||
| 249 | uint64_t start_time, uint64_t io_start_time, bool direction, | ||
| 250 | bool sync) {} | ||
| 251 | static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg, | ||
| 252 | bool direction, bool sync) {} | ||
| 253 | static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg, | ||
| 254 | struct blkio_group *curr_blkg, bool direction, bool sync) {} | ||
| 255 | static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | ||
| 256 | bool direction, bool sync) {} | ||
| 129 | #endif | 257 | #endif |
| 130 | #endif /* _BLK_CGROUP_H */ | 258 | #endif /* _BLK_CGROUP_H */ |
diff --git a/block/blk-core.c b/block/blk-core.c index 9fe174dc74d1..3bc5579d6f54 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -127,6 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
| 127 | rq->tag = -1; | 127 | rq->tag = -1; |
| 128 | rq->ref_count = 1; | 128 | rq->ref_count = 1; |
| 129 | rq->start_time = jiffies; | 129 | rq->start_time = jiffies; |
| 130 | set_start_time_ns(rq); | ||
| 130 | } | 131 | } |
| 131 | EXPORT_SYMBOL(blk_rq_init); | 132 | EXPORT_SYMBOL(blk_rq_init); |
| 132 | 133 | ||
| @@ -450,6 +451,7 @@ void blk_cleanup_queue(struct request_queue *q) | |||
| 450 | */ | 451 | */ |
| 451 | blk_sync_queue(q); | 452 | blk_sync_queue(q); |
| 452 | 453 | ||
| 454 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | ||
| 453 | mutex_lock(&q->sysfs_lock); | 455 | mutex_lock(&q->sysfs_lock); |
| 454 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); | 456 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); |
| 455 | mutex_unlock(&q->sysfs_lock); | 457 | mutex_unlock(&q->sysfs_lock); |
| @@ -510,6 +512,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
| 510 | return NULL; | 512 | return NULL; |
| 511 | } | 513 | } |
| 512 | 514 | ||
| 515 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | ||
| 516 | laptop_mode_timer_fn, (unsigned long) q); | ||
| 513 | init_timer(&q->unplug_timer); | 517 | init_timer(&q->unplug_timer); |
| 514 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 518 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
| 515 | INIT_LIST_HEAD(&q->timeout_list); | 519 | INIT_LIST_HEAD(&q->timeout_list); |
| @@ -568,6 +572,22 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | |||
| 568 | { | 572 | { |
| 569 | struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); | 573 | struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); |
| 570 | 574 | ||
| 575 | return blk_init_allocated_queue_node(q, rfn, lock, node_id); | ||
| 576 | } | ||
| 577 | EXPORT_SYMBOL(blk_init_queue_node); | ||
| 578 | |||
| 579 | struct request_queue * | ||
| 580 | blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | ||
| 581 | spinlock_t *lock) | ||
| 582 | { | ||
| 583 | return blk_init_allocated_queue_node(q, rfn, lock, -1); | ||
| 584 | } | ||
| 585 | EXPORT_SYMBOL(blk_init_allocated_queue); | ||
| 586 | |||
| 587 | struct request_queue * | ||
| 588 | blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | ||
| 589 | spinlock_t *lock, int node_id) | ||
| 590 | { | ||
| 571 | if (!q) | 591 | if (!q) |
| 572 | return NULL; | 592 | return NULL; |
| 573 | 593 | ||
| @@ -601,7 +621,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | |||
| 601 | blk_put_queue(q); | 621 | blk_put_queue(q); |
| 602 | return NULL; | 622 | return NULL; |
| 603 | } | 623 | } |
| 604 | EXPORT_SYMBOL(blk_init_queue_node); | 624 | EXPORT_SYMBOL(blk_init_allocated_queue_node); |
| 605 | 625 | ||
| 606 | int blk_get_queue(struct request_queue *q) | 626 | int blk_get_queue(struct request_queue *q) |
| 607 | { | 627 | { |
| @@ -1198,6 +1218,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
| 1198 | if (!blk_rq_cpu_valid(req)) | 1218 | if (!blk_rq_cpu_valid(req)) |
| 1199 | req->cpu = bio->bi_comp_cpu; | 1219 | req->cpu = bio->bi_comp_cpu; |
| 1200 | drive_stat_acct(req, 0); | 1220 | drive_stat_acct(req, 0); |
| 1221 | elv_bio_merged(q, req, bio); | ||
| 1201 | if (!attempt_back_merge(q, req)) | 1222 | if (!attempt_back_merge(q, req)) |
| 1202 | elv_merged_request(q, req, el_ret); | 1223 | elv_merged_request(q, req, el_ret); |
| 1203 | goto out; | 1224 | goto out; |
| @@ -1231,6 +1252,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
| 1231 | if (!blk_rq_cpu_valid(req)) | 1252 | if (!blk_rq_cpu_valid(req)) |
| 1232 | req->cpu = bio->bi_comp_cpu; | 1253 | req->cpu = bio->bi_comp_cpu; |
| 1233 | drive_stat_acct(req, 0); | 1254 | drive_stat_acct(req, 0); |
| 1255 | elv_bio_merged(q, req, bio); | ||
| 1234 | if (!attempt_front_merge(q, req)) | 1256 | if (!attempt_front_merge(q, req)) |
| 1235 | elv_merged_request(q, req, el_ret); | 1257 | elv_merged_request(q, req, el_ret); |
| 1236 | goto out; | 1258 | goto out; |
| @@ -1855,8 +1877,10 @@ void blk_dequeue_request(struct request *rq) | |||
| 1855 | * and to it is freed is accounted as io that is in progress at | 1877 | * and to it is freed is accounted as io that is in progress at |
| 1856 | * the driver side. | 1878 | * the driver side. |
| 1857 | */ | 1879 | */ |
| 1858 | if (blk_account_rq(rq)) | 1880 | if (blk_account_rq(rq)) { |
| 1859 | q->in_flight[rq_is_sync(rq)]++; | 1881 | q->in_flight[rq_is_sync(rq)]++; |
| 1882 | set_io_start_time_ns(rq); | ||
| 1883 | } | ||
| 1860 | } | 1884 | } |
| 1861 | 1885 | ||
| 1862 | /** | 1886 | /** |
| @@ -2098,7 +2122,7 @@ static void blk_finish_request(struct request *req, int error) | |||
| 2098 | BUG_ON(blk_queued_rq(req)); | 2122 | BUG_ON(blk_queued_rq(req)); |
| 2099 | 2123 | ||
| 2100 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 2124 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
| 2101 | laptop_io_completion(); | 2125 | laptop_io_completion(&req->q->backing_dev_info); |
| 2102 | 2126 | ||
| 2103 | blk_delete_timer(req); | 2127 | blk_delete_timer(req); |
| 2104 | 2128 | ||
| @@ -2517,4 +2541,3 @@ int __init blk_dev_init(void) | |||
| 2517 | 2541 | ||
| 2518 | return 0; | 2542 | return 0; |
| 2519 | } | 2543 | } |
| 2520 | |||
diff --git a/block/blk-lib.c b/block/blk-lib.c new file mode 100644 index 000000000000..d0216b9f22d4 --- /dev/null +++ b/block/blk-lib.c | |||
| @@ -0,0 +1,233 @@ | |||
| 1 | /* | ||
| 2 | * Functions related to generic helpers functions | ||
| 3 | */ | ||
| 4 | #include <linux/kernel.h> | ||
| 5 | #include <linux/module.h> | ||
| 6 | #include <linux/bio.h> | ||
| 7 | #include <linux/blkdev.h> | ||
| 8 | #include <linux/scatterlist.h> | ||
| 9 | |||
| 10 | #include "blk.h" | ||
| 11 | |||
| 12 | static void blkdev_discard_end_io(struct bio *bio, int err) | ||
| 13 | { | ||
| 14 | if (err) { | ||
| 15 | if (err == -EOPNOTSUPP) | ||
| 16 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
| 17 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 18 | } | ||
| 19 | |||
| 20 | if (bio->bi_private) | ||
| 21 | complete(bio->bi_private); | ||
| 22 | __free_page(bio_page(bio)); | ||
| 23 | |||
| 24 | bio_put(bio); | ||
| 25 | } | ||
| 26 | |||
| 27 | /** | ||
| 28 | * blkdev_issue_discard - queue a discard | ||
| 29 | * @bdev: blockdev to issue discard for | ||
| 30 | * @sector: start sector | ||
| 31 | * @nr_sects: number of sectors to discard | ||
| 32 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
| 33 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
| 34 | * | ||
| 35 | * Description: | ||
| 36 | * Issue a discard request for the sectors in question. | ||
| 37 | */ | ||
| 38 | int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | ||
| 39 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) | ||
| 40 | { | ||
| 41 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 42 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 43 | int type = flags & BLKDEV_IFL_BARRIER ? | ||
| 44 | DISCARD_BARRIER : DISCARD_NOBARRIER; | ||
| 45 | struct bio *bio; | ||
| 46 | struct page *page; | ||
| 47 | int ret = 0; | ||
| 48 | |||
| 49 | if (!q) | ||
| 50 | return -ENXIO; | ||
| 51 | |||
| 52 | if (!blk_queue_discard(q)) | ||
| 53 | return -EOPNOTSUPP; | ||
| 54 | |||
| 55 | while (nr_sects && !ret) { | ||
| 56 | unsigned int sector_size = q->limits.logical_block_size; | ||
| 57 | unsigned int max_discard_sectors = | ||
| 58 | min(q->limits.max_discard_sectors, UINT_MAX >> 9); | ||
| 59 | |||
| 60 | bio = bio_alloc(gfp_mask, 1); | ||
| 61 | if (!bio) | ||
| 62 | goto out; | ||
| 63 | bio->bi_sector = sector; | ||
| 64 | bio->bi_end_io = blkdev_discard_end_io; | ||
| 65 | bio->bi_bdev = bdev; | ||
| 66 | if (flags & BLKDEV_IFL_WAIT) | ||
| 67 | bio->bi_private = &wait; | ||
| 68 | |||
| 69 | /* | ||
| 70 | * Add a zeroed one-sector payload as that's what | ||
| 71 | * our current implementations need. If we'll ever need | ||
| 72 | * more the interface will need revisiting. | ||
| 73 | */ | ||
| 74 | page = alloc_page(gfp_mask | __GFP_ZERO); | ||
| 75 | if (!page) | ||
| 76 | goto out_free_bio; | ||
| 77 | if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) | ||
| 78 | goto out_free_page; | ||
| 79 | |||
| 80 | /* | ||
| 81 | * And override the bio size - the way discard works we | ||
| 82 | * touch many more blocks on disk than the actual payload | ||
| 83 | * length. | ||
| 84 | */ | ||
| 85 | if (nr_sects > max_discard_sectors) { | ||
| 86 | bio->bi_size = max_discard_sectors << 9; | ||
| 87 | nr_sects -= max_discard_sectors; | ||
| 88 | sector += max_discard_sectors; | ||
| 89 | } else { | ||
| 90 | bio->bi_size = nr_sects << 9; | ||
| 91 | nr_sects = 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | bio_get(bio); | ||
| 95 | submit_bio(type, bio); | ||
| 96 | |||
| 97 | if (flags & BLKDEV_IFL_WAIT) | ||
| 98 | wait_for_completion(&wait); | ||
| 99 | |||
| 100 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
| 101 | ret = -EOPNOTSUPP; | ||
| 102 | else if (!bio_flagged(bio, BIO_UPTODATE)) | ||
| 103 | ret = -EIO; | ||
| 104 | bio_put(bio); | ||
| 105 | } | ||
| 106 | return ret; | ||
| 107 | out_free_page: | ||
| 108 | __free_page(page); | ||
| 109 | out_free_bio: | ||
| 110 | bio_put(bio); | ||
| 111 | out: | ||
| 112 | return -ENOMEM; | ||
| 113 | } | ||
| 114 | EXPORT_SYMBOL(blkdev_issue_discard); | ||
| 115 | |||
| 116 | struct bio_batch | ||
| 117 | { | ||
| 118 | atomic_t done; | ||
| 119 | unsigned long flags; | ||
| 120 | struct completion *wait; | ||
| 121 | bio_end_io_t *end_io; | ||
| 122 | }; | ||
| 123 | |||
| 124 | static void bio_batch_end_io(struct bio *bio, int err) | ||
| 125 | { | ||
| 126 | struct bio_batch *bb = bio->bi_private; | ||
| 127 | |||
| 128 | if (err) { | ||
| 129 | if (err == -EOPNOTSUPP) | ||
| 130 | set_bit(BIO_EOPNOTSUPP, &bb->flags); | ||
| 131 | else | ||
| 132 | clear_bit(BIO_UPTODATE, &bb->flags); | ||
| 133 | } | ||
| 134 | if (bb) { | ||
| 135 | if (bb->end_io) | ||
| 136 | bb->end_io(bio, err); | ||
| 137 | atomic_inc(&bb->done); | ||
| 138 | complete(bb->wait); | ||
| 139 | } | ||
| 140 | bio_put(bio); | ||
| 141 | } | ||
| 142 | |||
| 143 | /** | ||
| 144 | * blkdev_issue_zeroout generate number of zero filed write bios | ||
| 145 | * @bdev: blockdev to issue | ||
| 146 | * @sector: start sector | ||
| 147 | * @nr_sects: number of sectors to write | ||
| 148 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
| 149 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
| 150 | * | ||
| 151 | * Description: | ||
| 152 | * Generate and issue number of bios with zerofiled pages. | ||
| 153 | * Send barrier at the beginning and at the end if requested. This guarantie | ||
| 154 | * correct request ordering. Empty barrier allow us to avoid post queue flush. | ||
| 155 | */ | ||
| 156 | |||
| 157 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | ||
| 158 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) | ||
| 159 | { | ||
| 160 | int ret = 0; | ||
| 161 | struct bio *bio; | ||
| 162 | struct bio_batch bb; | ||
| 163 | unsigned int sz, issued = 0; | ||
| 164 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 165 | |||
| 166 | atomic_set(&bb.done, 0); | ||
| 167 | bb.flags = 1 << BIO_UPTODATE; | ||
| 168 | bb.wait = &wait; | ||
| 169 | bb.end_io = NULL; | ||
| 170 | |||
| 171 | if (flags & BLKDEV_IFL_BARRIER) { | ||
| 172 | /* issue async barrier before the data */ | ||
| 173 | ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); | ||
| 174 | if (ret) | ||
| 175 | return ret; | ||
| 176 | } | ||
| 177 | submit: | ||
| 178 | while (nr_sects != 0) { | ||
| 179 | bio = bio_alloc(gfp_mask, | ||
| 180 | min(nr_sects, (sector_t)BIO_MAX_PAGES)); | ||
| 181 | if (!bio) | ||
| 182 | break; | ||
| 183 | |||
| 184 | bio->bi_sector = sector; | ||
| 185 | bio->bi_bdev = bdev; | ||
| 186 | bio->bi_end_io = bio_batch_end_io; | ||
| 187 | if (flags & BLKDEV_IFL_WAIT) | ||
| 188 | bio->bi_private = &bb; | ||
| 189 | |||
| 190 | while (nr_sects != 0) { | ||
| 191 | sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); | ||
| 192 | if (sz == 0) | ||
| 193 | /* bio has maximum size possible */ | ||
| 194 | break; | ||
| 195 | ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0); | ||
| 196 | nr_sects -= ret >> 9; | ||
| 197 | sector += ret >> 9; | ||
| 198 | if (ret < (sz << 9)) | ||
| 199 | break; | ||
| 200 | } | ||
| 201 | issued++; | ||
| 202 | submit_bio(WRITE, bio); | ||
| 203 | } | ||
| 204 | /* | ||
| 205 | * When all data bios are in flight. Send final barrier if requeted. | ||
| 206 | */ | ||
| 207 | if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) | ||
| 208 | ret = blkdev_issue_flush(bdev, gfp_mask, NULL, | ||
| 209 | flags & BLKDEV_IFL_WAIT); | ||
| 210 | |||
| 211 | |||
| 212 | if (flags & BLKDEV_IFL_WAIT) | ||
| 213 | /* Wait for bios in-flight */ | ||
| 214 | while ( issued != atomic_read(&bb.done)) | ||
| 215 | wait_for_completion(&wait); | ||
| 216 | |||
| 217 | if (!test_bit(BIO_UPTODATE, &bb.flags)) | ||
| 218 | /* One of bios in the batch was completed with error.*/ | ||
| 219 | ret = -EIO; | ||
| 220 | |||
| 221 | if (ret) | ||
| 222 | goto out; | ||
| 223 | |||
| 224 | if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) { | ||
| 225 | ret = -EOPNOTSUPP; | ||
| 226 | goto out; | ||
| 227 | } | ||
| 228 | if (nr_sects != 0) | ||
| 229 | goto submit; | ||
| 230 | out: | ||
| 231 | return ret; | ||
| 232 | } | ||
| 233 | EXPORT_SYMBOL(blkdev_issue_zeroout); | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5f127cfb2e92..ed897b5ef315 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
| @@ -55,6 +55,7 @@ static const int cfq_hist_divisor = 4; | |||
| 55 | #define RQ_CIC(rq) \ | 55 | #define RQ_CIC(rq) \ |
| 56 | ((struct cfq_io_context *) (rq)->elevator_private) | 56 | ((struct cfq_io_context *) (rq)->elevator_private) |
| 57 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) | 57 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) |
| 58 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) | ||
| 58 | 59 | ||
| 59 | static struct kmem_cache *cfq_pool; | 60 | static struct kmem_cache *cfq_pool; |
| 60 | static struct kmem_cache *cfq_ioc_pool; | 61 | static struct kmem_cache *cfq_ioc_pool; |
| @@ -143,8 +144,6 @@ struct cfq_queue { | |||
| 143 | struct cfq_queue *new_cfqq; | 144 | struct cfq_queue *new_cfqq; |
| 144 | struct cfq_group *cfqg; | 145 | struct cfq_group *cfqg; |
| 145 | struct cfq_group *orig_cfqg; | 146 | struct cfq_group *orig_cfqg; |
| 146 | /* Sectors dispatched in current dispatch round */ | ||
| 147 | unsigned long nr_sectors; | ||
| 148 | }; | 147 | }; |
| 149 | 148 | ||
| 150 | /* | 149 | /* |
| @@ -346,7 +345,7 @@ CFQ_CFQQ_FNS(deep); | |||
| 346 | CFQ_CFQQ_FNS(wait_busy); | 345 | CFQ_CFQQ_FNS(wait_busy); |
| 347 | #undef CFQ_CFQQ_FNS | 346 | #undef CFQ_CFQQ_FNS |
| 348 | 347 | ||
| 349 | #ifdef CONFIG_DEBUG_CFQ_IOSCHED | 348 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
| 350 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 349 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
| 351 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ | 350 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ |
| 352 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ | 351 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ |
| @@ -858,7 +857,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
| 858 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | 857 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
| 859 | cfq_rb_erase(&cfqg->rb_node, st); | 858 | cfq_rb_erase(&cfqg->rb_node, st); |
| 860 | cfqg->saved_workload_slice = 0; | 859 | cfqg->saved_workload_slice = 0; |
| 861 | blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1); | 860 | blkiocg_update_dequeue_stats(&cfqg->blkg, 1); |
| 862 | } | 861 | } |
| 863 | 862 | ||
| 864 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | 863 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) |
| @@ -884,8 +883,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | |||
| 884 | slice_used = cfqq->allocated_slice; | 883 | slice_used = cfqq->allocated_slice; |
| 885 | } | 884 | } |
| 886 | 885 | ||
| 887 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used, | 886 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used); |
| 888 | cfqq->nr_sectors); | ||
| 889 | return slice_used; | 887 | return slice_used; |
| 890 | } | 888 | } |
| 891 | 889 | ||
| @@ -919,8 +917,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
| 919 | 917 | ||
| 920 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, | 918 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, |
| 921 | st->min_vdisktime); | 919 | st->min_vdisktime); |
| 922 | blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl, | 920 | blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); |
| 923 | cfqq->nr_sectors); | 921 | blkiocg_set_start_empty_time(&cfqg->blkg); |
| 924 | } | 922 | } |
| 925 | 923 | ||
| 926 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 924 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
| @@ -961,7 +959,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
| 961 | if (!cfqg) | 959 | if (!cfqg) |
| 962 | goto done; | 960 | goto done; |
| 963 | 961 | ||
| 964 | cfqg->weight = blkcg->weight; | ||
| 965 | for_each_cfqg_st(cfqg, i, j, st) | 962 | for_each_cfqg_st(cfqg, i, j, st) |
| 966 | *st = CFQ_RB_ROOT; | 963 | *st = CFQ_RB_ROOT; |
| 967 | RB_CLEAR_NODE(&cfqg->rb_node); | 964 | RB_CLEAR_NODE(&cfqg->rb_node); |
| @@ -978,6 +975,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
| 978 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 975 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
| 979 | blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, | 976 | blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, |
| 980 | MKDEV(major, minor)); | 977 | MKDEV(major, minor)); |
| 978 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
| 981 | 979 | ||
| 982 | /* Add group on cfqd list */ | 980 | /* Add group on cfqd list */ |
| 983 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | 981 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); |
| @@ -1004,6 +1002,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
| 1004 | return cfqg; | 1002 | return cfqg; |
| 1005 | } | 1003 | } |
| 1006 | 1004 | ||
| 1005 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
| 1006 | { | ||
| 1007 | atomic_inc(&cfqg->ref); | ||
| 1008 | return cfqg; | ||
| 1009 | } | ||
| 1010 | |||
| 1007 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | 1011 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) |
| 1008 | { | 1012 | { |
| 1009 | /* Currently, all async queues are mapped to root group */ | 1013 | /* Currently, all async queues are mapped to root group */ |
| @@ -1087,6 +1091,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
| 1087 | { | 1091 | { |
| 1088 | return &cfqd->root_group; | 1092 | return &cfqd->root_group; |
| 1089 | } | 1093 | } |
| 1094 | |||
| 1095 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
| 1096 | { | ||
| 1097 | return cfqg; | ||
| 1098 | } | ||
| 1099 | |||
| 1090 | static inline void | 1100 | static inline void |
| 1091 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { | 1101 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { |
| 1092 | cfqq->cfqg = cfqg; | 1102 | cfqq->cfqg = cfqg; |
| @@ -1389,7 +1399,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) | |||
| 1389 | { | 1399 | { |
| 1390 | elv_rb_del(&cfqq->sort_list, rq); | 1400 | elv_rb_del(&cfqq->sort_list, rq); |
| 1391 | cfqq->queued[rq_is_sync(rq)]--; | 1401 | cfqq->queued[rq_is_sync(rq)]--; |
| 1402 | blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), | ||
| 1403 | rq_is_sync(rq)); | ||
| 1392 | cfq_add_rq_rb(rq); | 1404 | cfq_add_rq_rb(rq); |
| 1405 | blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, | ||
| 1406 | &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq), | ||
| 1407 | rq_is_sync(rq)); | ||
| 1393 | } | 1408 | } |
| 1394 | 1409 | ||
| 1395 | static struct request * | 1410 | static struct request * |
| @@ -1445,6 +1460,8 @@ static void cfq_remove_request(struct request *rq) | |||
| 1445 | cfq_del_rq_rb(rq); | 1460 | cfq_del_rq_rb(rq); |
| 1446 | 1461 | ||
| 1447 | cfqq->cfqd->rq_queued--; | 1462 | cfqq->cfqd->rq_queued--; |
| 1463 | blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), | ||
| 1464 | rq_is_sync(rq)); | ||
| 1448 | if (rq_is_meta(rq)) { | 1465 | if (rq_is_meta(rq)) { |
| 1449 | WARN_ON(!cfqq->meta_pending); | 1466 | WARN_ON(!cfqq->meta_pending); |
| 1450 | cfqq->meta_pending--; | 1467 | cfqq->meta_pending--; |
| @@ -1476,6 +1493,13 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, | |||
| 1476 | } | 1493 | } |
| 1477 | } | 1494 | } |
| 1478 | 1495 | ||
| 1496 | static void cfq_bio_merged(struct request_queue *q, struct request *req, | ||
| 1497 | struct bio *bio) | ||
| 1498 | { | ||
| 1499 | blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio), | ||
| 1500 | cfq_bio_sync(bio)); | ||
| 1501 | } | ||
| 1502 | |||
| 1479 | static void | 1503 | static void |
| 1480 | cfq_merged_requests(struct request_queue *q, struct request *rq, | 1504 | cfq_merged_requests(struct request_queue *q, struct request *rq, |
| 1481 | struct request *next) | 1505 | struct request *next) |
| @@ -1493,6 +1517,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, | |||
| 1493 | if (cfqq->next_rq == next) | 1517 | if (cfqq->next_rq == next) |
| 1494 | cfqq->next_rq = rq; | 1518 | cfqq->next_rq = rq; |
| 1495 | cfq_remove_request(next); | 1519 | cfq_remove_request(next); |
| 1520 | blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next), | ||
| 1521 | rq_is_sync(next)); | ||
| 1496 | } | 1522 | } |
| 1497 | 1523 | ||
| 1498 | static int cfq_allow_merge(struct request_queue *q, struct request *rq, | 1524 | static int cfq_allow_merge(struct request_queue *q, struct request *rq, |
| @@ -1520,18 +1546,24 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
| 1520 | return cfqq == RQ_CFQQ(rq); | 1546 | return cfqq == RQ_CFQQ(rq); |
| 1521 | } | 1547 | } |
| 1522 | 1548 | ||
| 1549 | static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
| 1550 | { | ||
| 1551 | del_timer(&cfqd->idle_slice_timer); | ||
| 1552 | blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); | ||
| 1553 | } | ||
| 1554 | |||
| 1523 | static void __cfq_set_active_queue(struct cfq_data *cfqd, | 1555 | static void __cfq_set_active_queue(struct cfq_data *cfqd, |
| 1524 | struct cfq_queue *cfqq) | 1556 | struct cfq_queue *cfqq) |
| 1525 | { | 1557 | { |
| 1526 | if (cfqq) { | 1558 | if (cfqq) { |
| 1527 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", | 1559 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", |
| 1528 | cfqd->serving_prio, cfqd->serving_type); | 1560 | cfqd->serving_prio, cfqd->serving_type); |
| 1561 | blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg); | ||
| 1529 | cfqq->slice_start = 0; | 1562 | cfqq->slice_start = 0; |
| 1530 | cfqq->dispatch_start = jiffies; | 1563 | cfqq->dispatch_start = jiffies; |
| 1531 | cfqq->allocated_slice = 0; | 1564 | cfqq->allocated_slice = 0; |
| 1532 | cfqq->slice_end = 0; | 1565 | cfqq->slice_end = 0; |
| 1533 | cfqq->slice_dispatch = 0; | 1566 | cfqq->slice_dispatch = 0; |
| 1534 | cfqq->nr_sectors = 0; | ||
| 1535 | 1567 | ||
| 1536 | cfq_clear_cfqq_wait_request(cfqq); | 1568 | cfq_clear_cfqq_wait_request(cfqq); |
| 1537 | cfq_clear_cfqq_must_dispatch(cfqq); | 1569 | cfq_clear_cfqq_must_dispatch(cfqq); |
| @@ -1539,7 +1571,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, | |||
| 1539 | cfq_clear_cfqq_fifo_expire(cfqq); | 1571 | cfq_clear_cfqq_fifo_expire(cfqq); |
| 1540 | cfq_mark_cfqq_slice_new(cfqq); | 1572 | cfq_mark_cfqq_slice_new(cfqq); |
| 1541 | 1573 | ||
| 1542 | del_timer(&cfqd->idle_slice_timer); | 1574 | cfq_del_timer(cfqd, cfqq); |
| 1543 | } | 1575 | } |
| 1544 | 1576 | ||
| 1545 | cfqd->active_queue = cfqq; | 1577 | cfqd->active_queue = cfqq; |
| @@ -1555,7 +1587,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
| 1555 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); | 1587 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); |
| 1556 | 1588 | ||
| 1557 | if (cfq_cfqq_wait_request(cfqq)) | 1589 | if (cfq_cfqq_wait_request(cfqq)) |
| 1558 | del_timer(&cfqd->idle_slice_timer); | 1590 | cfq_del_timer(cfqd, cfqq); |
| 1559 | 1591 | ||
| 1560 | cfq_clear_cfqq_wait_request(cfqq); | 1592 | cfq_clear_cfqq_wait_request(cfqq); |
| 1561 | cfq_clear_cfqq_wait_busy(cfqq); | 1593 | cfq_clear_cfqq_wait_busy(cfqq); |
| @@ -1857,6 +1889,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
| 1857 | sl = cfqd->cfq_slice_idle; | 1889 | sl = cfqd->cfq_slice_idle; |
| 1858 | 1890 | ||
| 1859 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 1891 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); |
| 1892 | blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg); | ||
| 1860 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); | 1893 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); |
| 1861 | } | 1894 | } |
| 1862 | 1895 | ||
| @@ -1876,7 +1909,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) | |||
| 1876 | elv_dispatch_sort(q, rq); | 1909 | elv_dispatch_sort(q, rq); |
| 1877 | 1910 | ||
| 1878 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; | 1911 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; |
| 1879 | cfqq->nr_sectors += blk_rq_sectors(rq); | 1912 | blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq), |
| 1913 | rq_data_dir(rq), rq_is_sync(rq)); | ||
| 1880 | } | 1914 | } |
| 1881 | 1915 | ||
| 1882 | /* | 1916 | /* |
| @@ -3185,11 +3219,14 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
| 3185 | if (cfq_cfqq_wait_request(cfqq)) { | 3219 | if (cfq_cfqq_wait_request(cfqq)) { |
| 3186 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || | 3220 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || |
| 3187 | cfqd->busy_queues > 1) { | 3221 | cfqd->busy_queues > 1) { |
| 3188 | del_timer(&cfqd->idle_slice_timer); | 3222 | cfq_del_timer(cfqd, cfqq); |
| 3189 | cfq_clear_cfqq_wait_request(cfqq); | 3223 | cfq_clear_cfqq_wait_request(cfqq); |
| 3190 | __blk_run_queue(cfqd->queue); | 3224 | __blk_run_queue(cfqd->queue); |
| 3191 | } else | 3225 | } else { |
| 3226 | blkiocg_update_idle_time_stats( | ||
| 3227 | &cfqq->cfqg->blkg); | ||
| 3192 | cfq_mark_cfqq_must_dispatch(cfqq); | 3228 | cfq_mark_cfqq_must_dispatch(cfqq); |
| 3229 | } | ||
| 3193 | } | 3230 | } |
| 3194 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { | 3231 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { |
| 3195 | /* | 3232 | /* |
| @@ -3214,7 +3251,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
| 3214 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); | 3251 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); |
| 3215 | list_add_tail(&rq->queuelist, &cfqq->fifo); | 3252 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
| 3216 | cfq_add_rq_rb(rq); | 3253 | cfq_add_rq_rb(rq); |
| 3217 | 3254 | blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, | |
| 3255 | &cfqd->serving_group->blkg, rq_data_dir(rq), | ||
| 3256 | rq_is_sync(rq)); | ||
| 3218 | cfq_rq_enqueued(cfqd, cfqq, rq); | 3257 | cfq_rq_enqueued(cfqd, cfqq, rq); |
| 3219 | } | 3258 | } |
| 3220 | 3259 | ||
| @@ -3300,6 +3339,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
| 3300 | WARN_ON(!cfqq->dispatched); | 3339 | WARN_ON(!cfqq->dispatched); |
| 3301 | cfqd->rq_in_driver--; | 3340 | cfqd->rq_in_driver--; |
| 3302 | cfqq->dispatched--; | 3341 | cfqq->dispatched--; |
| 3342 | blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq), | ||
| 3343 | rq_io_start_time_ns(rq), rq_data_dir(rq), | ||
| 3344 | rq_is_sync(rq)); | ||
| 3303 | 3345 | ||
| 3304 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; | 3346 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; |
| 3305 | 3347 | ||
| @@ -3440,6 +3482,10 @@ static void cfq_put_request(struct request *rq) | |||
| 3440 | rq->elevator_private = NULL; | 3482 | rq->elevator_private = NULL; |
| 3441 | rq->elevator_private2 = NULL; | 3483 | rq->elevator_private2 = NULL; |
| 3442 | 3484 | ||
| 3485 | /* Put down rq reference on cfqg */ | ||
| 3486 | cfq_put_cfqg(RQ_CFQG(rq)); | ||
| 3487 | rq->elevator_private3 = NULL; | ||
| 3488 | |||
| 3443 | cfq_put_queue(cfqq); | 3489 | cfq_put_queue(cfqq); |
| 3444 | } | 3490 | } |
| 3445 | } | 3491 | } |
| @@ -3528,6 +3574,7 @@ new_queue: | |||
| 3528 | 3574 | ||
| 3529 | rq->elevator_private = cic; | 3575 | rq->elevator_private = cic; |
| 3530 | rq->elevator_private2 = cfqq; | 3576 | rq->elevator_private2 = cfqq; |
| 3577 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | ||
| 3531 | return 0; | 3578 | return 0; |
| 3532 | 3579 | ||
| 3533 | queue_fail: | 3580 | queue_fail: |
| @@ -3743,7 +3790,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
| 3743 | * second, in order to have larger depth for async operations. | 3790 | * second, in order to have larger depth for async operations. |
| 3744 | */ | 3791 | */ |
| 3745 | cfqd->last_delayed_sync = jiffies - HZ; | 3792 | cfqd->last_delayed_sync = jiffies - HZ; |
| 3746 | INIT_RCU_HEAD(&cfqd->rcu); | ||
| 3747 | return cfqd; | 3793 | return cfqd; |
| 3748 | } | 3794 | } |
| 3749 | 3795 | ||
| @@ -3872,6 +3918,7 @@ static struct elevator_type iosched_cfq = { | |||
| 3872 | .elevator_merged_fn = cfq_merged_request, | 3918 | .elevator_merged_fn = cfq_merged_request, |
| 3873 | .elevator_merge_req_fn = cfq_merged_requests, | 3919 | .elevator_merge_req_fn = cfq_merged_requests, |
| 3874 | .elevator_allow_merge_fn = cfq_allow_merge, | 3920 | .elevator_allow_merge_fn = cfq_allow_merge, |
| 3921 | .elevator_bio_merged_fn = cfq_bio_merged, | ||
| 3875 | .elevator_dispatch_fn = cfq_dispatch_requests, | 3922 | .elevator_dispatch_fn = cfq_dispatch_requests, |
| 3876 | .elevator_add_req_fn = cfq_insert_request, | 3923 | .elevator_add_req_fn = cfq_insert_request, |
| 3877 | .elevator_activate_req_fn = cfq_activate_request, | 3924 | .elevator_activate_req_fn = cfq_activate_request, |
diff --git a/block/elevator.c b/block/elevator.c index 76e3702d5381..6df2b5056b51 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
| @@ -539,6 +539,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, | |||
| 539 | q->last_merge = rq; | 539 | q->last_merge = rq; |
| 540 | } | 540 | } |
| 541 | 541 | ||
| 542 | void elv_bio_merged(struct request_queue *q, struct request *rq, | ||
| 543 | struct bio *bio) | ||
| 544 | { | ||
| 545 | struct elevator_queue *e = q->elevator; | ||
| 546 | |||
| 547 | if (e->ops->elevator_bio_merged_fn) | ||
| 548 | e->ops->elevator_bio_merged_fn(q, rq, bio); | ||
| 549 | } | ||
| 550 | |||
| 542 | void elv_requeue_request(struct request_queue *q, struct request *rq) | 551 | void elv_requeue_request(struct request_queue *q, struct request *rq) |
| 543 | { | 552 | { |
| 544 | /* | 553 | /* |
| @@ -921,6 +930,7 @@ int elv_register_queue(struct request_queue *q) | |||
| 921 | } | 930 | } |
| 922 | return error; | 931 | return error; |
| 923 | } | 932 | } |
| 933 | EXPORT_SYMBOL(elv_register_queue); | ||
| 924 | 934 | ||
| 925 | static void __elv_unregister_queue(struct elevator_queue *e) | 935 | static void __elv_unregister_queue(struct elevator_queue *e) |
| 926 | { | 936 | { |
| @@ -933,6 +943,7 @@ void elv_unregister_queue(struct request_queue *q) | |||
| 933 | if (q) | 943 | if (q) |
| 934 | __elv_unregister_queue(q->elevator); | 944 | __elv_unregister_queue(q->elevator); |
| 935 | } | 945 | } |
| 946 | EXPORT_SYMBOL(elv_unregister_queue); | ||
| 936 | 947 | ||
| 937 | void elv_register(struct elevator_type *e) | 948 | void elv_register(struct elevator_type *e) |
| 938 | { | 949 | { |
diff --git a/block/genhd.c b/block/genhd.c index d13ba76a169c..59a2db6fecef 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
| @@ -596,6 +596,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) | |||
| 596 | 596 | ||
| 597 | return disk; | 597 | return disk; |
| 598 | } | 598 | } |
| 599 | EXPORT_SYMBOL(get_gendisk); | ||
| 599 | 600 | ||
| 600 | /** | 601 | /** |
| 601 | * bdget_disk - do bdget() by gendisk and partition number | 602 | * bdget_disk - do bdget() by gendisk and partition number |
| @@ -987,7 +988,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) | |||
| 987 | if (!new_ptbl) | 988 | if (!new_ptbl) |
| 988 | return -ENOMEM; | 989 | return -ENOMEM; |
| 989 | 990 | ||
| 990 | INIT_RCU_HEAD(&new_ptbl->rcu_head); | ||
| 991 | new_ptbl->len = target; | 991 | new_ptbl->len = target; |
| 992 | 992 | ||
| 993 | for (i = 0; i < len; i++) | 993 | for (i = 0; i < len; i++) |
diff --git a/block/ioctl.c b/block/ioctl.c index 8905d2a2a717..e8eb679f2f9b 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
| @@ -126,7 +126,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | |||
| 126 | if (start + len > (bdev->bd_inode->i_size >> 9)) | 126 | if (start + len > (bdev->bd_inode->i_size >> 9)) |
| 127 | return -EINVAL; | 127 | return -EINVAL; |
| 128 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, | 128 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, |
| 129 | DISCARD_FL_WAIT); | 129 | BLKDEV_IFL_WAIT); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | static int put_ushort(unsigned long arg, unsigned short val) | 132 | static int put_ushort(unsigned long arg, unsigned short val) |
