diff options
34 files changed, 1699 insertions, 333 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 630879cd9a42..48e0b21b0059 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt | |||
@@ -17,6 +17,9 @@ HOWTO | |||
17 | You can do a very simple testing of running two dd threads in two different | 17 | You can do a very simple testing of running two dd threads in two different |
18 | cgroups. Here is what you can do. | 18 | cgroups. Here is what you can do. |
19 | 19 | ||
20 | - Enable Block IO controller | ||
21 | CONFIG_BLK_CGROUP=y | ||
22 | |||
20 | - Enable group scheduling in CFQ | 23 | - Enable group scheduling in CFQ |
21 | CONFIG_CFQ_GROUP_IOSCHED=y | 24 | CONFIG_CFQ_GROUP_IOSCHED=y |
22 | 25 | ||
@@ -54,32 +57,52 @@ cgroups. Here is what you can do. | |||
54 | 57 | ||
55 | Various user visible config options | 58 | Various user visible config options |
56 | =================================== | 59 | =================================== |
57 | CONFIG_CFQ_GROUP_IOSCHED | ||
58 | - Enables group scheduling in CFQ. Currently only 1 level of group | ||
59 | creation is allowed. | ||
60 | |||
61 | CONFIG_DEBUG_CFQ_IOSCHED | ||
62 | - Enables some debugging messages in blktrace. Also creates extra | ||
63 | cgroup file blkio.dequeue. | ||
64 | |||
65 | Config options selected automatically | ||
66 | ===================================== | ||
67 | These config options are not user visible and are selected/deselected | ||
68 | automatically based on IO scheduler configuration. | ||
69 | |||
70 | CONFIG_BLK_CGROUP | 60 | CONFIG_BLK_CGROUP |
71 | - Block IO controller. Selected by CONFIG_CFQ_GROUP_IOSCHED. | 61 | - Block IO controller. |
72 | 62 | ||
73 | CONFIG_DEBUG_BLK_CGROUP | 63 | CONFIG_DEBUG_BLK_CGROUP |
74 | - Debug help. Selected by CONFIG_DEBUG_CFQ_IOSCHED. | 64 | - Debug help. Right now some additional stats file show up in cgroup |
65 | if this option is enabled. | ||
66 | |||
67 | CONFIG_CFQ_GROUP_IOSCHED | ||
68 | - Enables group scheduling in CFQ. Currently only 1 level of group | ||
69 | creation is allowed. | ||
75 | 70 | ||
76 | Details of cgroup files | 71 | Details of cgroup files |
77 | ======================= | 72 | ======================= |
78 | - blkio.weight | 73 | - blkio.weight |
79 | - Specifies per cgroup weight. | 74 | - Specifies per cgroup weight. This is default weight of the group |
80 | 75 | on all the devices until and unless overridden by per device rule. | |
76 | (See blkio.weight_device). | ||
81 | Currently allowed range of weights is from 100 to 1000. | 77 | Currently allowed range of weights is from 100 to 1000. |
82 | 78 | ||
79 | - blkio.weight_device | ||
80 | - One can specify per cgroup per device rules using this interface. | ||
81 | These rules override the default value of group weight as specified | ||
82 | by blkio.weight. | ||
83 | |||
84 | Following is the format. | ||
85 | |||
86 | #echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device | ||
87 | Configure weight=300 on /dev/sdb (8:16) in this cgroup | ||
88 | # echo 8:16 300 > blkio.weight_device | ||
89 | # cat blkio.weight_device | ||
90 | dev weight | ||
91 | 8:16 300 | ||
92 | |||
93 | Configure weight=500 on /dev/sda (8:0) in this cgroup | ||
94 | # echo 8:0 500 > blkio.weight_device | ||
95 | # cat blkio.weight_device | ||
96 | dev weight | ||
97 | 8:0 500 | ||
98 | 8:16 300 | ||
99 | |||
100 | Remove specific weight for /dev/sda in this cgroup | ||
101 | # echo 8:0 0 > blkio.weight_device | ||
102 | # cat blkio.weight_device | ||
103 | dev weight | ||
104 | 8:16 300 | ||
105 | |||
83 | - blkio.time | 106 | - blkio.time |
84 | - disk time allocated to cgroup per device in milliseconds. First | 107 | - disk time allocated to cgroup per device in milliseconds. First |
85 | two fields specify the major and minor number of the device and | 108 | two fields specify the major and minor number of the device and |
@@ -92,13 +115,105 @@ Details of cgroup files | |||
92 | third field specifies the number of sectors transferred by the | 115 | third field specifies the number of sectors transferred by the |
93 | group to/from the device. | 116 | group to/from the device. |
94 | 117 | ||
118 | - blkio.io_service_bytes | ||
119 | - Number of bytes transferred to/from the disk by the group. These | ||
120 | are further divided by the type of operation - read or write, sync | ||
121 | or async. First two fields specify the major and minor number of the | ||
122 | device, third field specifies the operation type and the fourth field | ||
123 | specifies the number of bytes. | ||
124 | |||
125 | - blkio.io_serviced | ||
126 | - Number of IOs completed to/from the disk by the group. These | ||
127 | are further divided by the type of operation - read or write, sync | ||
128 | or async. First two fields specify the major and minor number of the | ||
129 | device, third field specifies the operation type and the fourth field | ||
130 | specifies the number of IOs. | ||
131 | |||
132 | - blkio.io_service_time | ||
133 | - Total amount of time between request dispatch and request completion | ||
134 | for the IOs done by this cgroup. This is in nanoseconds to make it | ||
135 | meaningful for flash devices too. For devices with queue depth of 1, | ||
136 | this time represents the actual service time. When queue_depth > 1, | ||
137 | that is no longer true as requests may be served out of order. This | ||
138 | may cause the service time for a given IO to include the service time | ||
139 | of multiple IOs when served out of order which may result in total | ||
140 | io_service_time > actual time elapsed. This time is further divided by | ||
141 | the type of operation - read or write, sync or async. First two fields | ||
142 | specify the major and minor number of the device, third field | ||
143 | specifies the operation type and the fourth field specifies the | ||
144 | io_service_time in ns. | ||
145 | |||
146 | - blkio.io_wait_time | ||
147 | - Total amount of time the IOs for this cgroup spent waiting in the | ||
148 | scheduler queues for service. This can be greater than the total time | ||
149 | elapsed since it is cumulative io_wait_time for all IOs. It is not a | ||
150 | measure of total time the cgroup spent waiting but rather a measure of | ||
151 | the wait_time for its individual IOs. For devices with queue_depth > 1 | ||
152 | this metric does not include the time spent waiting for service once | ||
153 | the IO is dispatched to the device but till it actually gets serviced | ||
154 | (there might be a time lag here due to re-ordering of requests by the | ||
155 | device). This is in nanoseconds to make it meaningful for flash | ||
156 | devices too. This time is further divided by the type of operation - | ||
157 | read or write, sync or async. First two fields specify the major and | ||
158 | minor number of the device, third field specifies the operation type | ||
159 | and the fourth field specifies the io_wait_time in ns. | ||
160 | |||
161 | - blkio.io_merged | ||
162 | - Total number of bios/requests merged into requests belonging to this | ||
163 | cgroup. This is further divided by the type of operation - read or | ||
164 | write, sync or async. | ||
165 | |||
166 | - blkio.io_queued | ||
167 | - Total number of requests queued up at any given instant for this | ||
168 | cgroup. This is further divided by the type of operation - read or | ||
169 | write, sync or async. | ||
170 | |||
171 | - blkio.avg_queue_size | ||
172 | - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. | ||
173 | The average queue size for this cgroup over the entire time of this | ||
174 | cgroup's existence. Queue size samples are taken each time one of the | ||
175 | queues of this cgroup gets a timeslice. | ||
176 | |||
177 | - blkio.group_wait_time | ||
178 | - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. | ||
179 | This is the amount of time the cgroup had to wait since it became busy | ||
180 | (i.e., went from 0 to 1 request queued) to get a timeslice for one of | ||
181 | its queues. This is different from the io_wait_time which is the | ||
182 | cumulative total of the amount of time spent by each IO in that cgroup | ||
183 | waiting in the scheduler queue. This is in nanoseconds. If this is | ||
184 | read when the cgroup is in a waiting (for timeslice) state, the stat | ||
185 | will only report the group_wait_time accumulated till the last time it | ||
186 | got a timeslice and will not include the current delta. | ||
187 | |||
188 | - blkio.empty_time | ||
189 | - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. | ||
190 | This is the amount of time a cgroup spends without any pending | ||
191 | requests when not being served, i.e., it does not include any time | ||
192 | spent idling for one of the queues of the cgroup. This is in | ||
193 | nanoseconds. If this is read when the cgroup is in an empty state, | ||
194 | the stat will only report the empty_time accumulated till the last | ||
195 | time it had a pending request and will not include the current delta. | ||
196 | |||
197 | - blkio.idle_time | ||
198 | - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. | ||
199 | This is the amount of time spent by the IO scheduler idling for a | ||
200 | given cgroup in anticipation of a better request than the exising ones | ||
201 | from other queues/cgroups. This is in nanoseconds. If this is read | ||
202 | when the cgroup is in an idling state, the stat will only report the | ||
203 | idle_time accumulated till the last idle period and will not include | ||
204 | the current delta. | ||
205 | |||
95 | - blkio.dequeue | 206 | - blkio.dequeue |
96 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This | 207 | - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This |
97 | gives the statistics about how many a times a group was dequeued | 208 | gives the statistics about how many a times a group was dequeued |
98 | from service tree of the device. First two fields specify the major | 209 | from service tree of the device. First two fields specify the major |
99 | and minor number of the device and third field specifies the number | 210 | and minor number of the device and third field specifies the number |
100 | of times a group was dequeued from a particular device. | 211 | of times a group was dequeued from a particular device. |
101 | 212 | ||
213 | - blkio.reset_stats | ||
214 | - Writing an int to this file will result in resetting all the stats | ||
215 | for that cgroup. | ||
216 | |||
102 | CFQ sysfs tunable | 217 | CFQ sysfs tunable |
103 | ================= | 218 | ================= |
104 | /sys/block/<disk>/queue/iosched/group_isolation | 219 | /sys/block/<disk>/queue/iosched/group_isolation |
diff --git a/block/Kconfig b/block/Kconfig index f9e89f4d94bb..9be0b56eaee1 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -77,29 +77,6 @@ config BLK_DEV_INTEGRITY | |||
77 | T10/SCSI Data Integrity Field or the T13/ATA External Path | 77 | T10/SCSI Data Integrity Field or the T13/ATA External Path |
78 | Protection. If in doubt, say N. | 78 | Protection. If in doubt, say N. |
79 | 79 | ||
80 | config BLK_CGROUP | ||
81 | tristate "Block cgroup support" | ||
82 | depends on CGROUPS | ||
83 | depends on CFQ_GROUP_IOSCHED | ||
84 | default n | ||
85 | ---help--- | ||
86 | Generic block IO controller cgroup interface. This is the common | ||
87 | cgroup interface which should be used by various IO controlling | ||
88 | policies. | ||
89 | |||
90 | Currently, CFQ IO scheduler uses it to recognize task groups and | ||
91 | control disk bandwidth allocation (proportional time slice allocation) | ||
92 | to such task groups. | ||
93 | |||
94 | config DEBUG_BLK_CGROUP | ||
95 | bool | ||
96 | depends on BLK_CGROUP | ||
97 | default n | ||
98 | ---help--- | ||
99 | Enable some debugging help. Currently it stores the cgroup path | ||
100 | in the blk group which can be used by cfq for tracing various | ||
101 | group related activity. | ||
102 | |||
103 | endif # BLOCK | 80 | endif # BLOCK |
104 | 81 | ||
105 | config BLOCK_COMPAT | 82 | config BLOCK_COMPAT |
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index fc71cf071fb2..3199b76f795d 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched | |||
@@ -23,7 +23,8 @@ config IOSCHED_DEADLINE | |||
23 | 23 | ||
24 | config IOSCHED_CFQ | 24 | config IOSCHED_CFQ |
25 | tristate "CFQ I/O scheduler" | 25 | tristate "CFQ I/O scheduler" |
26 | select BLK_CGROUP if CFQ_GROUP_IOSCHED | 26 | # If BLK_CGROUP is a module, CFQ has to be built as module. |
27 | depends on (BLK_CGROUP=m && m) || !BLK_CGROUP || BLK_CGROUP=y | ||
27 | default y | 28 | default y |
28 | ---help--- | 29 | ---help--- |
29 | The CFQ I/O scheduler tries to distribute bandwidth equally | 30 | The CFQ I/O scheduler tries to distribute bandwidth equally |
@@ -33,22 +34,15 @@ config IOSCHED_CFQ | |||
33 | 34 | ||
34 | This is the default I/O scheduler. | 35 | This is the default I/O scheduler. |
35 | 36 | ||
37 | Note: If BLK_CGROUP=m, then CFQ can be built only as module. | ||
38 | |||
36 | config CFQ_GROUP_IOSCHED | 39 | config CFQ_GROUP_IOSCHED |
37 | bool "CFQ Group Scheduling support" | 40 | bool "CFQ Group Scheduling support" |
38 | depends on IOSCHED_CFQ && CGROUPS | 41 | depends on IOSCHED_CFQ && BLK_CGROUP |
39 | default n | 42 | default n |
40 | ---help--- | 43 | ---help--- |
41 | Enable group IO scheduling in CFQ. | 44 | Enable group IO scheduling in CFQ. |
42 | 45 | ||
43 | config DEBUG_CFQ_IOSCHED | ||
44 | bool "Debug CFQ Scheduling" | ||
45 | depends on CFQ_GROUP_IOSCHED | ||
46 | select DEBUG_BLK_CGROUP | ||
47 | default n | ||
48 | ---help--- | ||
49 | Enable CFQ IO scheduling debugging in CFQ. Currently it makes | ||
50 | blktrace output more verbose. | ||
51 | |||
52 | choice | 46 | choice |
53 | prompt "Default I/O scheduler" | 47 | prompt "Default I/O scheduler" |
54 | default DEFAULT_CFQ | 48 | default DEFAULT_CFQ |
diff --git a/block/Makefile b/block/Makefile index cb2d515ebd6e..0bb499a739cd 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o | 8 | blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o |
9 | 9 | ||
10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
11 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o | 11 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o |
diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6d88544b677f..0d710c9d403b 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c | |||
@@ -286,26 +286,31 @@ static void bio_end_empty_barrier(struct bio *bio, int err) | |||
286 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | 286 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); |
287 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 287 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
288 | } | 288 | } |
289 | 289 | if (bio->bi_private) | |
290 | complete(bio->bi_private); | 290 | complete(bio->bi_private); |
291 | bio_put(bio); | ||
291 | } | 292 | } |
292 | 293 | ||
293 | /** | 294 | /** |
294 | * blkdev_issue_flush - queue a flush | 295 | * blkdev_issue_flush - queue a flush |
295 | * @bdev: blockdev to issue flush for | 296 | * @bdev: blockdev to issue flush for |
297 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
296 | * @error_sector: error sector | 298 | * @error_sector: error sector |
299 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
297 | * | 300 | * |
298 | * Description: | 301 | * Description: |
299 | * Issue a flush for the block device in question. Caller can supply | 302 | * Issue a flush for the block device in question. Caller can supply |
300 | * room for storing the error offset in case of a flush error, if they | 303 | * room for storing the error offset in case of a flush error, if they |
301 | * wish to. | 304 | * wish to. If WAIT flag is not passed then caller may check only what |
305 | * request was pushed in some internal queue for later handling. | ||
302 | */ | 306 | */ |
303 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | 307 | int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, |
308 | sector_t *error_sector, unsigned long flags) | ||
304 | { | 309 | { |
305 | DECLARE_COMPLETION_ONSTACK(wait); | 310 | DECLARE_COMPLETION_ONSTACK(wait); |
306 | struct request_queue *q; | 311 | struct request_queue *q; |
307 | struct bio *bio; | 312 | struct bio *bio; |
308 | int ret; | 313 | int ret = 0; |
309 | 314 | ||
310 | if (bdev->bd_disk == NULL) | 315 | if (bdev->bd_disk == NULL) |
311 | return -ENXIO; | 316 | return -ENXIO; |
@@ -314,23 +319,25 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
314 | if (!q) | 319 | if (!q) |
315 | return -ENXIO; | 320 | return -ENXIO; |
316 | 321 | ||
317 | bio = bio_alloc(GFP_KERNEL, 0); | 322 | bio = bio_alloc(gfp_mask, 0); |
318 | bio->bi_end_io = bio_end_empty_barrier; | 323 | bio->bi_end_io = bio_end_empty_barrier; |
319 | bio->bi_private = &wait; | ||
320 | bio->bi_bdev = bdev; | 324 | bio->bi_bdev = bdev; |
321 | submit_bio(WRITE_BARRIER, bio); | 325 | if (test_bit(BLKDEV_WAIT, &flags)) |
322 | 326 | bio->bi_private = &wait; | |
323 | wait_for_completion(&wait); | ||
324 | 327 | ||
325 | /* | 328 | bio_get(bio); |
326 | * The driver must store the error location in ->bi_sector, if | 329 | submit_bio(WRITE_BARRIER, bio); |
327 | * it supports it. For non-stacked drivers, this should be copied | 330 | if (test_bit(BLKDEV_WAIT, &flags)) { |
328 | * from blk_rq_pos(rq). | 331 | wait_for_completion(&wait); |
329 | */ | 332 | /* |
330 | if (error_sector) | 333 | * The driver must store the error location in ->bi_sector, if |
331 | *error_sector = bio->bi_sector; | 334 | * it supports it. For non-stacked drivers, this should be |
335 | * copied from blk_rq_pos(rq). | ||
336 | */ | ||
337 | if (error_sector) | ||
338 | *error_sector = bio->bi_sector; | ||
339 | } | ||
332 | 340 | ||
333 | ret = 0; | ||
334 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 341 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
335 | ret = -EOPNOTSUPP; | 342 | ret = -EOPNOTSUPP; |
336 | else if (!bio_flagged(bio, BIO_UPTODATE)) | 343 | else if (!bio_flagged(bio, BIO_UPTODATE)) |
@@ -340,107 +347,3 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
340 | return ret; | 347 | return ret; |
341 | } | 348 | } |
342 | EXPORT_SYMBOL(blkdev_issue_flush); | 349 | EXPORT_SYMBOL(blkdev_issue_flush); |
343 | |||
344 | static void blkdev_discard_end_io(struct bio *bio, int err) | ||
345 | { | ||
346 | if (err) { | ||
347 | if (err == -EOPNOTSUPP) | ||
348 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
349 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
350 | } | ||
351 | |||
352 | if (bio->bi_private) | ||
353 | complete(bio->bi_private); | ||
354 | __free_page(bio_page(bio)); | ||
355 | |||
356 | bio_put(bio); | ||
357 | } | ||
358 | |||
359 | /** | ||
360 | * blkdev_issue_discard - queue a discard | ||
361 | * @bdev: blockdev to issue discard for | ||
362 | * @sector: start sector | ||
363 | * @nr_sects: number of sectors to discard | ||
364 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
365 | * @flags: DISCARD_FL_* flags to control behaviour | ||
366 | * | ||
367 | * Description: | ||
368 | * Issue a discard request for the sectors in question. | ||
369 | */ | ||
370 | int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | ||
371 | sector_t nr_sects, gfp_t gfp_mask, int flags) | ||
372 | { | ||
373 | DECLARE_COMPLETION_ONSTACK(wait); | ||
374 | struct request_queue *q = bdev_get_queue(bdev); | ||
375 | int type = flags & DISCARD_FL_BARRIER ? | ||
376 | DISCARD_BARRIER : DISCARD_NOBARRIER; | ||
377 | struct bio *bio; | ||
378 | struct page *page; | ||
379 | int ret = 0; | ||
380 | |||
381 | if (!q) | ||
382 | return -ENXIO; | ||
383 | |||
384 | if (!blk_queue_discard(q)) | ||
385 | return -EOPNOTSUPP; | ||
386 | |||
387 | while (nr_sects && !ret) { | ||
388 | unsigned int sector_size = q->limits.logical_block_size; | ||
389 | unsigned int max_discard_sectors = | ||
390 | min(q->limits.max_discard_sectors, UINT_MAX >> 9); | ||
391 | |||
392 | bio = bio_alloc(gfp_mask, 1); | ||
393 | if (!bio) | ||
394 | goto out; | ||
395 | bio->bi_sector = sector; | ||
396 | bio->bi_end_io = blkdev_discard_end_io; | ||
397 | bio->bi_bdev = bdev; | ||
398 | if (flags & DISCARD_FL_WAIT) | ||
399 | bio->bi_private = &wait; | ||
400 | |||
401 | /* | ||
402 | * Add a zeroed one-sector payload as that's what | ||
403 | * our current implementations need. If we'll ever need | ||
404 | * more the interface will need revisiting. | ||
405 | */ | ||
406 | page = alloc_page(gfp_mask | __GFP_ZERO); | ||
407 | if (!page) | ||
408 | goto out_free_bio; | ||
409 | if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) | ||
410 | goto out_free_page; | ||
411 | |||
412 | /* | ||
413 | * And override the bio size - the way discard works we | ||
414 | * touch many more blocks on disk than the actual payload | ||
415 | * length. | ||
416 | */ | ||
417 | if (nr_sects > max_discard_sectors) { | ||
418 | bio->bi_size = max_discard_sectors << 9; | ||
419 | nr_sects -= max_discard_sectors; | ||
420 | sector += max_discard_sectors; | ||
421 | } else { | ||
422 | bio->bi_size = nr_sects << 9; | ||
423 | nr_sects = 0; | ||
424 | } | ||
425 | |||
426 | bio_get(bio); | ||
427 | submit_bio(type, bio); | ||
428 | |||
429 | if (flags & DISCARD_FL_WAIT) | ||
430 | wait_for_completion(&wait); | ||
431 | |||
432 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
433 | ret = -EOPNOTSUPP; | ||
434 | else if (!bio_flagged(bio, BIO_UPTODATE)) | ||
435 | ret = -EIO; | ||
436 | bio_put(bio); | ||
437 | } | ||
438 | return ret; | ||
439 | out_free_page: | ||
440 | __free_page(page); | ||
441 | out_free_bio: | ||
442 | bio_put(bio); | ||
443 | out: | ||
444 | return -ENOMEM; | ||
445 | } | ||
446 | EXPORT_SYMBOL(blkdev_issue_discard); | ||
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5fe03def34b2..d02bbf88de13 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -15,8 +15,12 @@ | |||
15 | #include <linux/kdev_t.h> | 15 | #include <linux/kdev_t.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | #include <linux/blkdev.h> | ||
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
19 | #include "blk-cgroup.h" | 20 | #include "blk-cgroup.h" |
21 | #include <linux/genhd.h> | ||
22 | |||
23 | #define MAX_KEY_LEN 100 | ||
20 | 24 | ||
21 | static DEFINE_SPINLOCK(blkio_list_lock); | 25 | static DEFINE_SPINLOCK(blkio_list_lock); |
22 | static LIST_HEAD(blkio_list); | 26 | static LIST_HEAD(blkio_list); |
@@ -49,6 +53,32 @@ struct cgroup_subsys blkio_subsys = { | |||
49 | }; | 53 | }; |
50 | EXPORT_SYMBOL_GPL(blkio_subsys); | 54 | EXPORT_SYMBOL_GPL(blkio_subsys); |
51 | 55 | ||
56 | static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, | ||
57 | struct blkio_policy_node *pn) | ||
58 | { | ||
59 | list_add(&pn->node, &blkcg->policy_list); | ||
60 | } | ||
61 | |||
62 | /* Must be called with blkcg->lock held */ | ||
63 | static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) | ||
64 | { | ||
65 | list_del(&pn->node); | ||
66 | } | ||
67 | |||
68 | /* Must be called with blkcg->lock held */ | ||
69 | static struct blkio_policy_node * | ||
70 | blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev) | ||
71 | { | ||
72 | struct blkio_policy_node *pn; | ||
73 | |||
74 | list_for_each_entry(pn, &blkcg->policy_list, node) { | ||
75 | if (pn->dev == dev) | ||
76 | return pn; | ||
77 | } | ||
78 | |||
79 | return NULL; | ||
80 | } | ||
81 | |||
52 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | 82 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) |
53 | { | 83 | { |
54 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | 84 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), |
@@ -56,13 +86,259 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | |||
56 | } | 86 | } |
57 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); | 87 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); |
58 | 88 | ||
59 | void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, | 89 | /* |
60 | unsigned long time, unsigned long sectors) | 90 | * Add to the appropriate stat variable depending on the request type. |
91 | * This should be called with the blkg->stats_lock held. | ||
92 | */ | ||
93 | static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, | ||
94 | bool sync) | ||
95 | { | ||
96 | if (direction) | ||
97 | stat[BLKIO_STAT_WRITE] += add; | ||
98 | else | ||
99 | stat[BLKIO_STAT_READ] += add; | ||
100 | if (sync) | ||
101 | stat[BLKIO_STAT_SYNC] += add; | ||
102 | else | ||
103 | stat[BLKIO_STAT_ASYNC] += add; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Decrements the appropriate stat variable if non-zero depending on the | ||
108 | * request type. Panics on value being zero. | ||
109 | * This should be called with the blkg->stats_lock held. | ||
110 | */ | ||
111 | static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) | ||
112 | { | ||
113 | if (direction) { | ||
114 | BUG_ON(stat[BLKIO_STAT_WRITE] == 0); | ||
115 | stat[BLKIO_STAT_WRITE]--; | ||
116 | } else { | ||
117 | BUG_ON(stat[BLKIO_STAT_READ] == 0); | ||
118 | stat[BLKIO_STAT_READ]--; | ||
119 | } | ||
120 | if (sync) { | ||
121 | BUG_ON(stat[BLKIO_STAT_SYNC] == 0); | ||
122 | stat[BLKIO_STAT_SYNC]--; | ||
123 | } else { | ||
124 | BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); | ||
125 | stat[BLKIO_STAT_ASYNC]--; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
130 | /* This should be called with the blkg->stats_lock held. */ | ||
131 | static void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
132 | struct blkio_group *curr_blkg) | ||
133 | { | ||
134 | if (blkio_blkg_waiting(&blkg->stats)) | ||
135 | return; | ||
136 | if (blkg == curr_blkg) | ||
137 | return; | ||
138 | blkg->stats.start_group_wait_time = sched_clock(); | ||
139 | blkio_mark_blkg_waiting(&blkg->stats); | ||
140 | } | ||
141 | |||
142 | /* This should be called with the blkg->stats_lock held. */ | ||
143 | static void blkio_update_group_wait_time(struct blkio_group_stats *stats) | ||
144 | { | ||
145 | unsigned long long now; | ||
146 | |||
147 | if (!blkio_blkg_waiting(stats)) | ||
148 | return; | ||
149 | |||
150 | now = sched_clock(); | ||
151 | if (time_after64(now, stats->start_group_wait_time)) | ||
152 | stats->group_wait_time += now - stats->start_group_wait_time; | ||
153 | blkio_clear_blkg_waiting(stats); | ||
154 | } | ||
155 | |||
156 | /* This should be called with the blkg->stats_lock held. */ | ||
157 | static void blkio_end_empty_time(struct blkio_group_stats *stats) | ||
158 | { | ||
159 | unsigned long long now; | ||
160 | |||
161 | if (!blkio_blkg_empty(stats)) | ||
162 | return; | ||
163 | |||
164 | now = sched_clock(); | ||
165 | if (time_after64(now, stats->start_empty_time)) | ||
166 | stats->empty_time += now - stats->start_empty_time; | ||
167 | blkio_clear_blkg_empty(stats); | ||
168 | } | ||
169 | |||
170 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | ||
171 | { | ||
172 | unsigned long flags; | ||
173 | |||
174 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
175 | BUG_ON(blkio_blkg_idling(&blkg->stats)); | ||
176 | blkg->stats.start_idle_time = sched_clock(); | ||
177 | blkio_mark_blkg_idling(&blkg->stats); | ||
178 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats); | ||
181 | |||
182 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg) | ||
183 | { | ||
184 | unsigned long flags; | ||
185 | unsigned long long now; | ||
186 | struct blkio_group_stats *stats; | ||
187 | |||
188 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
189 | stats = &blkg->stats; | ||
190 | if (blkio_blkg_idling(stats)) { | ||
191 | now = sched_clock(); | ||
192 | if (time_after64(now, stats->start_idle_time)) | ||
193 | stats->idle_time += now - stats->start_idle_time; | ||
194 | blkio_clear_blkg_idling(stats); | ||
195 | } | ||
196 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
197 | } | ||
198 | EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats); | ||
199 | |||
200 | void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg) | ||
201 | { | ||
202 | unsigned long flags; | ||
203 | struct blkio_group_stats *stats; | ||
204 | |||
205 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
206 | stats = &blkg->stats; | ||
207 | stats->avg_queue_size_sum += | ||
208 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + | ||
209 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; | ||
210 | stats->avg_queue_size_samples++; | ||
211 | blkio_update_group_wait_time(stats); | ||
212 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); | ||
215 | |||
216 | void blkiocg_set_start_empty_time(struct blkio_group *blkg) | ||
217 | { | ||
218 | unsigned long flags; | ||
219 | struct blkio_group_stats *stats; | ||
220 | |||
221 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
222 | stats = &blkg->stats; | ||
223 | |||
224 | if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || | ||
225 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) { | ||
226 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
227 | return; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * group is already marked empty. This can happen if cfqq got new | ||
232 | * request in parent group and moved to this group while being added | ||
233 | * to service tree. Just ignore the event and move on. | ||
234 | */ | ||
235 | if(blkio_blkg_empty(stats)) { | ||
236 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
237 | return; | ||
238 | } | ||
239 | |||
240 | stats->start_empty_time = sched_clock(); | ||
241 | blkio_mark_blkg_empty(stats); | ||
242 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
243 | } | ||
244 | EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time); | ||
245 | |||
246 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, | ||
247 | unsigned long dequeue) | ||
248 | { | ||
249 | blkg->stats.dequeue += dequeue; | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); | ||
252 | #else | ||
253 | static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
254 | struct blkio_group *curr_blkg) {} | ||
255 | static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {} | ||
256 | #endif | ||
257 | |||
258 | void blkiocg_update_io_add_stats(struct blkio_group *blkg, | ||
259 | struct blkio_group *curr_blkg, bool direction, | ||
260 | bool sync) | ||
261 | { | ||
262 | unsigned long flags; | ||
263 | |||
264 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
265 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, | ||
266 | sync); | ||
267 | blkio_end_empty_time(&blkg->stats); | ||
268 | blkio_set_start_group_wait_time(blkg, curr_blkg); | ||
269 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
270 | } | ||
271 | EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); | ||
272 | |||
273 | void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | ||
274 | bool direction, bool sync) | ||
275 | { | ||
276 | unsigned long flags; | ||
277 | |||
278 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
279 | blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], | ||
280 | direction, sync); | ||
281 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
282 | } | ||
283 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); | ||
284 | |||
285 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | ||
286 | { | ||
287 | unsigned long flags; | ||
288 | |||
289 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
290 | blkg->stats.time += time; | ||
291 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
292 | } | ||
293 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | ||
294 | |||
295 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | ||
296 | uint64_t bytes, bool direction, bool sync) | ||
297 | { | ||
298 | struct blkio_group_stats *stats; | ||
299 | unsigned long flags; | ||
300 | |||
301 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
302 | stats = &blkg->stats; | ||
303 | stats->sectors += bytes >> 9; | ||
304 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction, | ||
305 | sync); | ||
306 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes, | ||
307 | direction, sync); | ||
308 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
309 | } | ||
310 | EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); | ||
311 | |||
312 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | ||
313 | uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) | ||
314 | { | ||
315 | struct blkio_group_stats *stats; | ||
316 | unsigned long flags; | ||
317 | unsigned long long now = sched_clock(); | ||
318 | |||
319 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
320 | stats = &blkg->stats; | ||
321 | if (time_after64(now, io_start_time)) | ||
322 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], | ||
323 | now - io_start_time, direction, sync); | ||
324 | if (time_after64(io_start_time, start_time)) | ||
325 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], | ||
326 | io_start_time - start_time, direction, sync); | ||
327 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
328 | } | ||
329 | EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); | ||
330 | |||
331 | void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, | ||
332 | bool sync) | ||
61 | { | 333 | { |
62 | blkg->time += time; | 334 | unsigned long flags; |
63 | blkg->sectors += sectors; | 335 | |
336 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
337 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction, | ||
338 | sync); | ||
339 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
64 | } | 340 | } |
65 | EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats); | 341 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); |
66 | 342 | ||
67 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 343 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
68 | struct blkio_group *blkg, void *key, dev_t dev) | 344 | struct blkio_group *blkg, void *key, dev_t dev) |
@@ -70,14 +346,13 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |||
70 | unsigned long flags; | 346 | unsigned long flags; |
71 | 347 | ||
72 | spin_lock_irqsave(&blkcg->lock, flags); | 348 | spin_lock_irqsave(&blkcg->lock, flags); |
349 | spin_lock_init(&blkg->stats_lock); | ||
73 | rcu_assign_pointer(blkg->key, key); | 350 | rcu_assign_pointer(blkg->key, key); |
74 | blkg->blkcg_id = css_id(&blkcg->css); | 351 | blkg->blkcg_id = css_id(&blkcg->css); |
75 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 352 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
76 | spin_unlock_irqrestore(&blkcg->lock, flags); | 353 | spin_unlock_irqrestore(&blkcg->lock, flags); |
77 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
78 | /* Need to take css reference ? */ | 354 | /* Need to take css reference ? */ |
79 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | 355 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); |
80 | #endif | ||
81 | blkg->dev = dev; | 356 | blkg->dev = dev; |
82 | } | 357 | } |
83 | EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); | 358 | EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); |
@@ -154,6 +429,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
154 | struct blkio_group *blkg; | 429 | struct blkio_group *blkg; |
155 | struct hlist_node *n; | 430 | struct hlist_node *n; |
156 | struct blkio_policy_type *blkiop; | 431 | struct blkio_policy_type *blkiop; |
432 | struct blkio_policy_node *pn; | ||
157 | 433 | ||
158 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) | 434 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) |
159 | return -EINVAL; | 435 | return -EINVAL; |
@@ -162,7 +438,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
162 | spin_lock(&blkio_list_lock); | 438 | spin_lock(&blkio_list_lock); |
163 | spin_lock_irq(&blkcg->lock); | 439 | spin_lock_irq(&blkcg->lock); |
164 | blkcg->weight = (unsigned int)val; | 440 | blkcg->weight = (unsigned int)val; |
441 | |||
165 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 442 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { |
443 | pn = blkio_policy_search_node(blkcg, blkg->dev); | ||
444 | |||
445 | if (pn) | ||
446 | continue; | ||
447 | |||
166 | list_for_each_entry(blkiop, &blkio_list, list) | 448 | list_for_each_entry(blkiop, &blkio_list, list) |
167 | blkiop->ops.blkio_update_group_weight_fn(blkg, | 449 | blkiop->ops.blkio_update_group_weight_fn(blkg, |
168 | blkcg->weight); | 450 | blkcg->weight); |
@@ -172,13 +454,154 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
172 | return 0; | 454 | return 0; |
173 | } | 455 | } |
174 | 456 | ||
175 | #define SHOW_FUNCTION_PER_GROUP(__VAR) \ | 457 | static int |
458 | blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | ||
459 | { | ||
460 | struct blkio_cgroup *blkcg; | ||
461 | struct blkio_group *blkg; | ||
462 | struct blkio_group_stats *stats; | ||
463 | struct hlist_node *n; | ||
464 | uint64_t queued[BLKIO_STAT_TOTAL]; | ||
465 | int i; | ||
466 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
467 | bool idling, waiting, empty; | ||
468 | unsigned long long now = sched_clock(); | ||
469 | #endif | ||
470 | |||
471 | blkcg = cgroup_to_blkio_cgroup(cgroup); | ||
472 | spin_lock_irq(&blkcg->lock); | ||
473 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
474 | spin_lock(&blkg->stats_lock); | ||
475 | stats = &blkg->stats; | ||
476 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
477 | idling = blkio_blkg_idling(stats); | ||
478 | waiting = blkio_blkg_waiting(stats); | ||
479 | empty = blkio_blkg_empty(stats); | ||
480 | #endif | ||
481 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | ||
482 | queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; | ||
483 | memset(stats, 0, sizeof(struct blkio_group_stats)); | ||
484 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | ||
485 | stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; | ||
486 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
487 | if (idling) { | ||
488 | blkio_mark_blkg_idling(stats); | ||
489 | stats->start_idle_time = now; | ||
490 | } | ||
491 | if (waiting) { | ||
492 | blkio_mark_blkg_waiting(stats); | ||
493 | stats->start_group_wait_time = now; | ||
494 | } | ||
495 | if (empty) { | ||
496 | blkio_mark_blkg_empty(stats); | ||
497 | stats->start_empty_time = now; | ||
498 | } | ||
499 | #endif | ||
500 | spin_unlock(&blkg->stats_lock); | ||
501 | } | ||
502 | spin_unlock_irq(&blkcg->lock); | ||
503 | return 0; | ||
504 | } | ||
505 | |||
506 | static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str, | ||
507 | int chars_left, bool diskname_only) | ||
508 | { | ||
509 | snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev)); | ||
510 | chars_left -= strlen(str); | ||
511 | if (chars_left <= 0) { | ||
512 | printk(KERN_WARNING | ||
513 | "Possibly incorrect cgroup stat display format"); | ||
514 | return; | ||
515 | } | ||
516 | if (diskname_only) | ||
517 | return; | ||
518 | switch (type) { | ||
519 | case BLKIO_STAT_READ: | ||
520 | strlcat(str, " Read", chars_left); | ||
521 | break; | ||
522 | case BLKIO_STAT_WRITE: | ||
523 | strlcat(str, " Write", chars_left); | ||
524 | break; | ||
525 | case BLKIO_STAT_SYNC: | ||
526 | strlcat(str, " Sync", chars_left); | ||
527 | break; | ||
528 | case BLKIO_STAT_ASYNC: | ||
529 | strlcat(str, " Async", chars_left); | ||
530 | break; | ||
531 | case BLKIO_STAT_TOTAL: | ||
532 | strlcat(str, " Total", chars_left); | ||
533 | break; | ||
534 | default: | ||
535 | strlcat(str, " Invalid", chars_left); | ||
536 | } | ||
537 | } | ||
538 | |||
539 | static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val, | ||
540 | struct cgroup_map_cb *cb, dev_t dev) | ||
541 | { | ||
542 | blkio_get_key_name(0, dev, str, chars_left, true); | ||
543 | cb->fill(cb, str, val); | ||
544 | return val; | ||
545 | } | ||
546 | |||
547 | /* This should be called with blkg->stats_lock held */ | ||
548 | static uint64_t blkio_get_stat(struct blkio_group *blkg, | ||
549 | struct cgroup_map_cb *cb, dev_t dev, enum stat_type type) | ||
550 | { | ||
551 | uint64_t disk_total; | ||
552 | char key_str[MAX_KEY_LEN]; | ||
553 | enum stat_sub_type sub_type; | ||
554 | |||
555 | if (type == BLKIO_STAT_TIME) | ||
556 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
557 | blkg->stats.time, cb, dev); | ||
558 | if (type == BLKIO_STAT_SECTORS) | ||
559 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
560 | blkg->stats.sectors, cb, dev); | ||
561 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
562 | if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { | ||
563 | uint64_t sum = blkg->stats.avg_queue_size_sum; | ||
564 | uint64_t samples = blkg->stats.avg_queue_size_samples; | ||
565 | if (samples) | ||
566 | do_div(sum, samples); | ||
567 | else | ||
568 | sum = 0; | ||
569 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); | ||
570 | } | ||
571 | if (type == BLKIO_STAT_GROUP_WAIT_TIME) | ||
572 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
573 | blkg->stats.group_wait_time, cb, dev); | ||
574 | if (type == BLKIO_STAT_IDLE_TIME) | ||
575 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
576 | blkg->stats.idle_time, cb, dev); | ||
577 | if (type == BLKIO_STAT_EMPTY_TIME) | ||
578 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
579 | blkg->stats.empty_time, cb, dev); | ||
580 | if (type == BLKIO_STAT_DEQUEUE) | ||
581 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
582 | blkg->stats.dequeue, cb, dev); | ||
583 | #endif | ||
584 | |||
585 | for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; | ||
586 | sub_type++) { | ||
587 | blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false); | ||
588 | cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]); | ||
589 | } | ||
590 | disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] + | ||
591 | blkg->stats.stat_arr[type][BLKIO_STAT_WRITE]; | ||
592 | blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false); | ||
593 | cb->fill(cb, key_str, disk_total); | ||
594 | return disk_total; | ||
595 | } | ||
596 | |||
597 | #define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \ | ||
176 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | 598 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ |
177 | struct cftype *cftype, struct seq_file *m) \ | 599 | struct cftype *cftype, struct cgroup_map_cb *cb) \ |
178 | { \ | 600 | { \ |
179 | struct blkio_cgroup *blkcg; \ | 601 | struct blkio_cgroup *blkcg; \ |
180 | struct blkio_group *blkg; \ | 602 | struct blkio_group *blkg; \ |
181 | struct hlist_node *n; \ | 603 | struct hlist_node *n; \ |
604 | uint64_t cgroup_total = 0; \ | ||
182 | \ | 605 | \ |
183 | if (!cgroup_lock_live_group(cgroup)) \ | 606 | if (!cgroup_lock_live_group(cgroup)) \ |
184 | return -ENODEV; \ | 607 | return -ENODEV; \ |
@@ -186,50 +609,295 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | |||
186 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ | 609 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ |
187 | rcu_read_lock(); \ | 610 | rcu_read_lock(); \ |
188 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ | 611 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ |
189 | if (blkg->dev) \ | 612 | if (blkg->dev) { \ |
190 | seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \ | 613 | spin_lock_irq(&blkg->stats_lock); \ |
191 | MINOR(blkg->dev), blkg->__VAR); \ | 614 | cgroup_total += blkio_get_stat(blkg, cb, \ |
615 | blkg->dev, type); \ | ||
616 | spin_unlock_irq(&blkg->stats_lock); \ | ||
617 | } \ | ||
192 | } \ | 618 | } \ |
619 | if (show_total) \ | ||
620 | cb->fill(cb, "Total", cgroup_total); \ | ||
193 | rcu_read_unlock(); \ | 621 | rcu_read_unlock(); \ |
194 | cgroup_unlock(); \ | 622 | cgroup_unlock(); \ |
195 | return 0; \ | 623 | return 0; \ |
196 | } | 624 | } |
197 | 625 | ||
198 | SHOW_FUNCTION_PER_GROUP(time); | 626 | SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0); |
199 | SHOW_FUNCTION_PER_GROUP(sectors); | 627 | SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0); |
628 | SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1); | ||
629 | SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1); | ||
630 | SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1); | ||
631 | SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1); | ||
632 | SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1); | ||
633 | SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1); | ||
200 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 634 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
201 | SHOW_FUNCTION_PER_GROUP(dequeue); | 635 | SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); |
636 | SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0); | ||
637 | SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0); | ||
638 | SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0); | ||
639 | SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0); | ||
202 | #endif | 640 | #endif |
203 | #undef SHOW_FUNCTION_PER_GROUP | 641 | #undef SHOW_FUNCTION_PER_GROUP |
204 | 642 | ||
205 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 643 | static int blkio_check_dev_num(dev_t dev) |
206 | void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, | ||
207 | unsigned long dequeue) | ||
208 | { | 644 | { |
209 | blkg->dequeue += dequeue; | 645 | int part = 0; |
646 | struct gendisk *disk; | ||
647 | |||
648 | disk = get_gendisk(dev, &part); | ||
649 | if (!disk || part) | ||
650 | return -ENODEV; | ||
651 | |||
652 | return 0; | ||
653 | } | ||
654 | |||
655 | static int blkio_policy_parse_and_set(char *buf, | ||
656 | struct blkio_policy_node *newpn) | ||
657 | { | ||
658 | char *s[4], *p, *major_s = NULL, *minor_s = NULL; | ||
659 | int ret; | ||
660 | unsigned long major, minor, temp; | ||
661 | int i = 0; | ||
662 | dev_t dev; | ||
663 | |||
664 | memset(s, 0, sizeof(s)); | ||
665 | |||
666 | while ((p = strsep(&buf, " ")) != NULL) { | ||
667 | if (!*p) | ||
668 | continue; | ||
669 | |||
670 | s[i++] = p; | ||
671 | |||
672 | /* Prevent from inputing too many things */ | ||
673 | if (i == 3) | ||
674 | break; | ||
675 | } | ||
676 | |||
677 | if (i != 2) | ||
678 | return -EINVAL; | ||
679 | |||
680 | p = strsep(&s[0], ":"); | ||
681 | if (p != NULL) | ||
682 | major_s = p; | ||
683 | else | ||
684 | return -EINVAL; | ||
685 | |||
686 | minor_s = s[0]; | ||
687 | if (!minor_s) | ||
688 | return -EINVAL; | ||
689 | |||
690 | ret = strict_strtoul(major_s, 10, &major); | ||
691 | if (ret) | ||
692 | return -EINVAL; | ||
693 | |||
694 | ret = strict_strtoul(minor_s, 10, &minor); | ||
695 | if (ret) | ||
696 | return -EINVAL; | ||
697 | |||
698 | dev = MKDEV(major, minor); | ||
699 | |||
700 | ret = blkio_check_dev_num(dev); | ||
701 | if (ret) | ||
702 | return ret; | ||
703 | |||
704 | newpn->dev = dev; | ||
705 | |||
706 | if (s[1] == NULL) | ||
707 | return -EINVAL; | ||
708 | |||
709 | ret = strict_strtoul(s[1], 10, &temp); | ||
710 | if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || | ||
711 | temp > BLKIO_WEIGHT_MAX) | ||
712 | return -EINVAL; | ||
713 | |||
714 | newpn->weight = temp; | ||
715 | |||
716 | return 0; | ||
717 | } | ||
718 | |||
719 | unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, | ||
720 | dev_t dev) | ||
721 | { | ||
722 | struct blkio_policy_node *pn; | ||
723 | |||
724 | pn = blkio_policy_search_node(blkcg, dev); | ||
725 | if (pn) | ||
726 | return pn->weight; | ||
727 | else | ||
728 | return blkcg->weight; | ||
729 | } | ||
730 | EXPORT_SYMBOL_GPL(blkcg_get_weight); | ||
731 | |||
732 | |||
733 | static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, | ||
734 | const char *buffer) | ||
735 | { | ||
736 | int ret = 0; | ||
737 | char *buf; | ||
738 | struct blkio_policy_node *newpn, *pn; | ||
739 | struct blkio_cgroup *blkcg; | ||
740 | struct blkio_group *blkg; | ||
741 | int keep_newpn = 0; | ||
742 | struct hlist_node *n; | ||
743 | struct blkio_policy_type *blkiop; | ||
744 | |||
745 | buf = kstrdup(buffer, GFP_KERNEL); | ||
746 | if (!buf) | ||
747 | return -ENOMEM; | ||
748 | |||
749 | newpn = kzalloc(sizeof(*newpn), GFP_KERNEL); | ||
750 | if (!newpn) { | ||
751 | ret = -ENOMEM; | ||
752 | goto free_buf; | ||
753 | } | ||
754 | |||
755 | ret = blkio_policy_parse_and_set(buf, newpn); | ||
756 | if (ret) | ||
757 | goto free_newpn; | ||
758 | |||
759 | blkcg = cgroup_to_blkio_cgroup(cgrp); | ||
760 | |||
761 | spin_lock_irq(&blkcg->lock); | ||
762 | |||
763 | pn = blkio_policy_search_node(blkcg, newpn->dev); | ||
764 | if (!pn) { | ||
765 | if (newpn->weight != 0) { | ||
766 | blkio_policy_insert_node(blkcg, newpn); | ||
767 | keep_newpn = 1; | ||
768 | } | ||
769 | spin_unlock_irq(&blkcg->lock); | ||
770 | goto update_io_group; | ||
771 | } | ||
772 | |||
773 | if (newpn->weight == 0) { | ||
774 | /* weight == 0 means deleteing a specific weight */ | ||
775 | blkio_policy_delete_node(pn); | ||
776 | spin_unlock_irq(&blkcg->lock); | ||
777 | goto update_io_group; | ||
778 | } | ||
779 | spin_unlock_irq(&blkcg->lock); | ||
780 | |||
781 | pn->weight = newpn->weight; | ||
782 | |||
783 | update_io_group: | ||
784 | /* update weight for each cfqg */ | ||
785 | spin_lock(&blkio_list_lock); | ||
786 | spin_lock_irq(&blkcg->lock); | ||
787 | |||
788 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
789 | if (newpn->dev == blkg->dev) { | ||
790 | list_for_each_entry(blkiop, &blkio_list, list) | ||
791 | blkiop->ops.blkio_update_group_weight_fn(blkg, | ||
792 | newpn->weight ? | ||
793 | newpn->weight : | ||
794 | blkcg->weight); | ||
795 | } | ||
796 | } | ||
797 | |||
798 | spin_unlock_irq(&blkcg->lock); | ||
799 | spin_unlock(&blkio_list_lock); | ||
800 | |||
801 | free_newpn: | ||
802 | if (!keep_newpn) | ||
803 | kfree(newpn); | ||
804 | free_buf: | ||
805 | kfree(buf); | ||
806 | return ret; | ||
807 | } | ||
808 | |||
809 | static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft, | ||
810 | struct seq_file *m) | ||
811 | { | ||
812 | struct blkio_cgroup *blkcg; | ||
813 | struct blkio_policy_node *pn; | ||
814 | |||
815 | seq_printf(m, "dev\tweight\n"); | ||
816 | |||
817 | blkcg = cgroup_to_blkio_cgroup(cgrp); | ||
818 | if (list_empty(&blkcg->policy_list)) | ||
819 | goto out; | ||
820 | |||
821 | spin_lock_irq(&blkcg->lock); | ||
822 | list_for_each_entry(pn, &blkcg->policy_list, node) { | ||
823 | seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), | ||
824 | MINOR(pn->dev), pn->weight); | ||
825 | } | ||
826 | spin_unlock_irq(&blkcg->lock); | ||
827 | |||
828 | out: | ||
829 | return 0; | ||
210 | } | 830 | } |
211 | EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats); | ||
212 | #endif | ||
213 | 831 | ||
214 | struct cftype blkio_files[] = { | 832 | struct cftype blkio_files[] = { |
215 | { | 833 | { |
834 | .name = "weight_device", | ||
835 | .read_seq_string = blkiocg_weight_device_read, | ||
836 | .write_string = blkiocg_weight_device_write, | ||
837 | .max_write_len = 256, | ||
838 | }, | ||
839 | { | ||
216 | .name = "weight", | 840 | .name = "weight", |
217 | .read_u64 = blkiocg_weight_read, | 841 | .read_u64 = blkiocg_weight_read, |
218 | .write_u64 = blkiocg_weight_write, | 842 | .write_u64 = blkiocg_weight_write, |
219 | }, | 843 | }, |
220 | { | 844 | { |
221 | .name = "time", | 845 | .name = "time", |
222 | .read_seq_string = blkiocg_time_read, | 846 | .read_map = blkiocg_time_read, |
223 | }, | 847 | }, |
224 | { | 848 | { |
225 | .name = "sectors", | 849 | .name = "sectors", |
226 | .read_seq_string = blkiocg_sectors_read, | 850 | .read_map = blkiocg_sectors_read, |
851 | }, | ||
852 | { | ||
853 | .name = "io_service_bytes", | ||
854 | .read_map = blkiocg_io_service_bytes_read, | ||
855 | }, | ||
856 | { | ||
857 | .name = "io_serviced", | ||
858 | .read_map = blkiocg_io_serviced_read, | ||
859 | }, | ||
860 | { | ||
861 | .name = "io_service_time", | ||
862 | .read_map = blkiocg_io_service_time_read, | ||
863 | }, | ||
864 | { | ||
865 | .name = "io_wait_time", | ||
866 | .read_map = blkiocg_io_wait_time_read, | ||
867 | }, | ||
868 | { | ||
869 | .name = "io_merged", | ||
870 | .read_map = blkiocg_io_merged_read, | ||
871 | }, | ||
872 | { | ||
873 | .name = "io_queued", | ||
874 | .read_map = blkiocg_io_queued_read, | ||
875 | }, | ||
876 | { | ||
877 | .name = "reset_stats", | ||
878 | .write_u64 = blkiocg_reset_stats, | ||
227 | }, | 879 | }, |
228 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 880 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
229 | { | 881 | { |
882 | .name = "avg_queue_size", | ||
883 | .read_map = blkiocg_avg_queue_size_read, | ||
884 | }, | ||
885 | { | ||
886 | .name = "group_wait_time", | ||
887 | .read_map = blkiocg_group_wait_time_read, | ||
888 | }, | ||
889 | { | ||
890 | .name = "idle_time", | ||
891 | .read_map = blkiocg_idle_time_read, | ||
892 | }, | ||
893 | { | ||
894 | .name = "empty_time", | ||
895 | .read_map = blkiocg_empty_time_read, | ||
896 | }, | ||
897 | { | ||
230 | .name = "dequeue", | 898 | .name = "dequeue", |
231 | .read_seq_string = blkiocg_dequeue_read, | 899 | .read_map = blkiocg_dequeue_read, |
232 | }, | 900 | }, |
233 | #endif | 901 | #endif |
234 | }; | 902 | }; |
235 | 903 | ||
@@ -246,6 +914,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |||
246 | struct blkio_group *blkg; | 914 | struct blkio_group *blkg; |
247 | void *key; | 915 | void *key; |
248 | struct blkio_policy_type *blkiop; | 916 | struct blkio_policy_type *blkiop; |
917 | struct blkio_policy_node *pn, *pntmp; | ||
249 | 918 | ||
250 | rcu_read_lock(); | 919 | rcu_read_lock(); |
251 | remove_entry: | 920 | remove_entry: |
@@ -276,7 +945,12 @@ remove_entry: | |||
276 | blkiop->ops.blkio_unlink_group_fn(key, blkg); | 945 | blkiop->ops.blkio_unlink_group_fn(key, blkg); |
277 | spin_unlock(&blkio_list_lock); | 946 | spin_unlock(&blkio_list_lock); |
278 | goto remove_entry; | 947 | goto remove_entry; |
948 | |||
279 | done: | 949 | done: |
950 | list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) { | ||
951 | blkio_policy_delete_node(pn); | ||
952 | kfree(pn); | ||
953 | } | ||
280 | free_css_id(&blkio_subsys, &blkcg->css); | 954 | free_css_id(&blkio_subsys, &blkcg->css); |
281 | rcu_read_unlock(); | 955 | rcu_read_unlock(); |
282 | if (blkcg != &blkio_root_cgroup) | 956 | if (blkcg != &blkio_root_cgroup) |
@@ -307,6 +981,7 @@ done: | |||
307 | spin_lock_init(&blkcg->lock); | 981 | spin_lock_init(&blkcg->lock); |
308 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 982 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
309 | 983 | ||
984 | INIT_LIST_HEAD(&blkcg->policy_list); | ||
310 | return &blkcg->css; | 985 | return &blkcg->css; |
311 | } | 986 | } |
312 | 987 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 8ccc20464dae..2b866ec1dcea 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -23,11 +23,84 @@ extern struct cgroup_subsys blkio_subsys; | |||
23 | #define blkio_subsys_id blkio_subsys.subsys_id | 23 | #define blkio_subsys_id blkio_subsys.subsys_id |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | enum stat_type { | ||
27 | /* Total time spent (in ns) between request dispatch to the driver and | ||
28 | * request completion for IOs doen by this cgroup. This may not be | ||
29 | * accurate when NCQ is turned on. */ | ||
30 | BLKIO_STAT_SERVICE_TIME = 0, | ||
31 | /* Total bytes transferred */ | ||
32 | BLKIO_STAT_SERVICE_BYTES, | ||
33 | /* Total IOs serviced, post merge */ | ||
34 | BLKIO_STAT_SERVICED, | ||
35 | /* Total time spent waiting in scheduler queue in ns */ | ||
36 | BLKIO_STAT_WAIT_TIME, | ||
37 | /* Number of IOs merged */ | ||
38 | BLKIO_STAT_MERGED, | ||
39 | /* Number of IOs queued up */ | ||
40 | BLKIO_STAT_QUEUED, | ||
41 | /* All the single valued stats go below this */ | ||
42 | BLKIO_STAT_TIME, | ||
43 | BLKIO_STAT_SECTORS, | ||
44 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
45 | BLKIO_STAT_AVG_QUEUE_SIZE, | ||
46 | BLKIO_STAT_IDLE_TIME, | ||
47 | BLKIO_STAT_EMPTY_TIME, | ||
48 | BLKIO_STAT_GROUP_WAIT_TIME, | ||
49 | BLKIO_STAT_DEQUEUE | ||
50 | #endif | ||
51 | }; | ||
52 | |||
53 | enum stat_sub_type { | ||
54 | BLKIO_STAT_READ = 0, | ||
55 | BLKIO_STAT_WRITE, | ||
56 | BLKIO_STAT_SYNC, | ||
57 | BLKIO_STAT_ASYNC, | ||
58 | BLKIO_STAT_TOTAL | ||
59 | }; | ||
60 | |||
61 | /* blkg state flags */ | ||
62 | enum blkg_state_flags { | ||
63 | BLKG_waiting = 0, | ||
64 | BLKG_idling, | ||
65 | BLKG_empty, | ||
66 | }; | ||
67 | |||
26 | struct blkio_cgroup { | 68 | struct blkio_cgroup { |
27 | struct cgroup_subsys_state css; | 69 | struct cgroup_subsys_state css; |
28 | unsigned int weight; | 70 | unsigned int weight; |
29 | spinlock_t lock; | 71 | spinlock_t lock; |
30 | struct hlist_head blkg_list; | 72 | struct hlist_head blkg_list; |
73 | struct list_head policy_list; /* list of blkio_policy_node */ | ||
74 | }; | ||
75 | |||
76 | struct blkio_group_stats { | ||
77 | /* total disk time and nr sectors dispatched by this group */ | ||
78 | uint64_t time; | ||
79 | uint64_t sectors; | ||
80 | uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL]; | ||
81 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
82 | /* Sum of number of IOs queued across all samples */ | ||
83 | uint64_t avg_queue_size_sum; | ||
84 | /* Count of samples taken for average */ | ||
85 | uint64_t avg_queue_size_samples; | ||
86 | /* How many times this group has been removed from service tree */ | ||
87 | unsigned long dequeue; | ||
88 | |||
89 | /* Total time spent waiting for it to be assigned a timeslice. */ | ||
90 | uint64_t group_wait_time; | ||
91 | uint64_t start_group_wait_time; | ||
92 | |||
93 | /* Time spent idling for this blkio_group */ | ||
94 | uint64_t idle_time; | ||
95 | uint64_t start_idle_time; | ||
96 | /* | ||
97 | * Total time when we have requests queued and do not contain the | ||
98 | * current active queue. | ||
99 | */ | ||
100 | uint64_t empty_time; | ||
101 | uint64_t start_empty_time; | ||
102 | uint16_t flags; | ||
103 | #endif | ||
31 | }; | 104 | }; |
32 | 105 | ||
33 | struct blkio_group { | 106 | struct blkio_group { |
@@ -35,20 +108,25 @@ struct blkio_group { | |||
35 | void *key; | 108 | void *key; |
36 | struct hlist_node blkcg_node; | 109 | struct hlist_node blkcg_node; |
37 | unsigned short blkcg_id; | 110 | unsigned short blkcg_id; |
38 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
39 | /* Store cgroup path */ | 111 | /* Store cgroup path */ |
40 | char path[128]; | 112 | char path[128]; |
41 | /* How many times this group has been removed from service tree */ | ||
42 | unsigned long dequeue; | ||
43 | #endif | ||
44 | /* The device MKDEV(major, minor), this group has been created for */ | 113 | /* The device MKDEV(major, minor), this group has been created for */ |
45 | dev_t dev; | 114 | dev_t dev; |
46 | 115 | ||
47 | /* total disk time and nr sectors dispatched by this group */ | 116 | /* Need to serialize the stats in the case of reset/update */ |
48 | unsigned long time; | 117 | spinlock_t stats_lock; |
49 | unsigned long sectors; | 118 | struct blkio_group_stats stats; |
50 | }; | 119 | }; |
51 | 120 | ||
121 | struct blkio_policy_node { | ||
122 | struct list_head node; | ||
123 | dev_t dev; | ||
124 | unsigned int weight; | ||
125 | }; | ||
126 | |||
127 | extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, | ||
128 | dev_t dev); | ||
129 | |||
52 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); | 130 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); |
53 | typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, | 131 | typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, |
54 | unsigned int weight); | 132 | unsigned int weight); |
@@ -67,6 +145,11 @@ struct blkio_policy_type { | |||
67 | extern void blkio_policy_register(struct blkio_policy_type *); | 145 | extern void blkio_policy_register(struct blkio_policy_type *); |
68 | extern void blkio_policy_unregister(struct blkio_policy_type *); | 146 | extern void blkio_policy_unregister(struct blkio_policy_type *); |
69 | 147 | ||
148 | static inline char *blkg_path(struct blkio_group *blkg) | ||
149 | { | ||
150 | return blkg->path; | ||
151 | } | ||
152 | |||
70 | #else | 153 | #else |
71 | 154 | ||
72 | struct blkio_group { | 155 | struct blkio_group { |
@@ -78,6 +161,8 @@ struct blkio_policy_type { | |||
78 | static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } | 161 | static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } |
79 | static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } | 162 | static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } |
80 | 163 | ||
164 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | ||
165 | |||
81 | #endif | 166 | #endif |
82 | 167 | ||
83 | #define BLKIO_WEIGHT_MIN 100 | 168 | #define BLKIO_WEIGHT_MIN 100 |
@@ -85,16 +170,42 @@ static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } | |||
85 | #define BLKIO_WEIGHT_DEFAULT 500 | 170 | #define BLKIO_WEIGHT_DEFAULT 500 |
86 | 171 | ||
87 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 172 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
88 | static inline char *blkg_path(struct blkio_group *blkg) | 173 | void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg); |
89 | { | 174 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, |
90 | return blkg->path; | ||
91 | } | ||
92 | void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, | ||
93 | unsigned long dequeue); | 175 | unsigned long dequeue); |
176 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg); | ||
177 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg); | ||
178 | void blkiocg_set_start_empty_time(struct blkio_group *blkg); | ||
179 | |||
180 | #define BLKG_FLAG_FNS(name) \ | ||
181 | static inline void blkio_mark_blkg_##name( \ | ||
182 | struct blkio_group_stats *stats) \ | ||
183 | { \ | ||
184 | stats->flags |= (1 << BLKG_##name); \ | ||
185 | } \ | ||
186 | static inline void blkio_clear_blkg_##name( \ | ||
187 | struct blkio_group_stats *stats) \ | ||
188 | { \ | ||
189 | stats->flags &= ~(1 << BLKG_##name); \ | ||
190 | } \ | ||
191 | static inline int blkio_blkg_##name(struct blkio_group_stats *stats) \ | ||
192 | { \ | ||
193 | return (stats->flags & (1 << BLKG_##name)) != 0; \ | ||
194 | } \ | ||
195 | |||
196 | BLKG_FLAG_FNS(waiting) | ||
197 | BLKG_FLAG_FNS(idling) | ||
198 | BLKG_FLAG_FNS(empty) | ||
199 | #undef BLKG_FLAG_FNS | ||
94 | #else | 200 | #else |
95 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | 201 | static inline void blkiocg_update_avg_queue_size_stats( |
96 | static inline void blkiocg_update_blkio_group_dequeue_stats( | 202 | struct blkio_group *blkg) {} |
97 | struct blkio_group *blkg, unsigned long dequeue) {} | 203 | static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg, |
204 | unsigned long dequeue) {} | ||
205 | static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | ||
206 | {} | ||
207 | static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {} | ||
208 | static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {} | ||
98 | #endif | 209 | #endif |
99 | 210 | ||
100 | #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) | 211 | #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) |
@@ -105,26 +216,43 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |||
105 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | 216 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); |
106 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | 217 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, |
107 | void *key); | 218 | void *key); |
108 | void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, | 219 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
109 | unsigned long time, unsigned long sectors); | 220 | unsigned long time); |
221 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes, | ||
222 | bool direction, bool sync); | ||
223 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | ||
224 | uint64_t start_time, uint64_t io_start_time, bool direction, bool sync); | ||
225 | void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, | ||
226 | bool sync); | ||
227 | void blkiocg_update_io_add_stats(struct blkio_group *blkg, | ||
228 | struct blkio_group *curr_blkg, bool direction, bool sync); | ||
229 | void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | ||
230 | bool direction, bool sync); | ||
110 | #else | 231 | #else |
111 | struct cgroup; | 232 | struct cgroup; |
112 | static inline struct blkio_cgroup * | 233 | static inline struct blkio_cgroup * |
113 | cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } | 234 | cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } |
114 | 235 | ||
115 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 236 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
116 | struct blkio_group *blkg, void *key, dev_t dev) | 237 | struct blkio_group *blkg, void *key, dev_t dev) {} |
117 | { | ||
118 | } | ||
119 | 238 | ||
120 | static inline int | 239 | static inline int |
121 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | 240 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } |
122 | 241 | ||
123 | static inline struct blkio_group * | 242 | static inline struct blkio_group * |
124 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | 243 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } |
125 | static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, | 244 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
126 | unsigned long time, unsigned long sectors) | 245 | unsigned long time) {} |
127 | { | 246 | static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg, |
128 | } | 247 | uint64_t bytes, bool direction, bool sync) {} |
248 | static inline void blkiocg_update_completion_stats(struct blkio_group *blkg, | ||
249 | uint64_t start_time, uint64_t io_start_time, bool direction, | ||
250 | bool sync) {} | ||
251 | static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg, | ||
252 | bool direction, bool sync) {} | ||
253 | static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg, | ||
254 | struct blkio_group *curr_blkg, bool direction, bool sync) {} | ||
255 | static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | ||
256 | bool direction, bool sync) {} | ||
129 | #endif | 257 | #endif |
130 | #endif /* _BLK_CGROUP_H */ | 258 | #endif /* _BLK_CGROUP_H */ |
diff --git a/block/blk-core.c b/block/blk-core.c index 9fe174dc74d1..e9a5ae25db8c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -127,6 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
127 | rq->tag = -1; | 127 | rq->tag = -1; |
128 | rq->ref_count = 1; | 128 | rq->ref_count = 1; |
129 | rq->start_time = jiffies; | 129 | rq->start_time = jiffies; |
130 | set_start_time_ns(rq); | ||
130 | } | 131 | } |
131 | EXPORT_SYMBOL(blk_rq_init); | 132 | EXPORT_SYMBOL(blk_rq_init); |
132 | 133 | ||
@@ -450,6 +451,7 @@ void blk_cleanup_queue(struct request_queue *q) | |||
450 | */ | 451 | */ |
451 | blk_sync_queue(q); | 452 | blk_sync_queue(q); |
452 | 453 | ||
454 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | ||
453 | mutex_lock(&q->sysfs_lock); | 455 | mutex_lock(&q->sysfs_lock); |
454 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); | 456 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); |
455 | mutex_unlock(&q->sysfs_lock); | 457 | mutex_unlock(&q->sysfs_lock); |
@@ -510,6 +512,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
510 | return NULL; | 512 | return NULL; |
511 | } | 513 | } |
512 | 514 | ||
515 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | ||
516 | laptop_mode_timer_fn, (unsigned long) q); | ||
513 | init_timer(&q->unplug_timer); | 517 | init_timer(&q->unplug_timer); |
514 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 518 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
515 | INIT_LIST_HEAD(&q->timeout_list); | 519 | INIT_LIST_HEAD(&q->timeout_list); |
@@ -1198,6 +1202,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1198 | if (!blk_rq_cpu_valid(req)) | 1202 | if (!blk_rq_cpu_valid(req)) |
1199 | req->cpu = bio->bi_comp_cpu; | 1203 | req->cpu = bio->bi_comp_cpu; |
1200 | drive_stat_acct(req, 0); | 1204 | drive_stat_acct(req, 0); |
1205 | elv_bio_merged(q, req, bio); | ||
1201 | if (!attempt_back_merge(q, req)) | 1206 | if (!attempt_back_merge(q, req)) |
1202 | elv_merged_request(q, req, el_ret); | 1207 | elv_merged_request(q, req, el_ret); |
1203 | goto out; | 1208 | goto out; |
@@ -1231,6 +1236,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1231 | if (!blk_rq_cpu_valid(req)) | 1236 | if (!blk_rq_cpu_valid(req)) |
1232 | req->cpu = bio->bi_comp_cpu; | 1237 | req->cpu = bio->bi_comp_cpu; |
1233 | drive_stat_acct(req, 0); | 1238 | drive_stat_acct(req, 0); |
1239 | elv_bio_merged(q, req, bio); | ||
1234 | if (!attempt_front_merge(q, req)) | 1240 | if (!attempt_front_merge(q, req)) |
1235 | elv_merged_request(q, req, el_ret); | 1241 | elv_merged_request(q, req, el_ret); |
1236 | goto out; | 1242 | goto out; |
@@ -1855,8 +1861,10 @@ void blk_dequeue_request(struct request *rq) | |||
1855 | * and to it is freed is accounted as io that is in progress at | 1861 | * and to it is freed is accounted as io that is in progress at |
1856 | * the driver side. | 1862 | * the driver side. |
1857 | */ | 1863 | */ |
1858 | if (blk_account_rq(rq)) | 1864 | if (blk_account_rq(rq)) { |
1859 | q->in_flight[rq_is_sync(rq)]++; | 1865 | q->in_flight[rq_is_sync(rq)]++; |
1866 | set_io_start_time_ns(rq); | ||
1867 | } | ||
1860 | } | 1868 | } |
1861 | 1869 | ||
1862 | /** | 1870 | /** |
@@ -2098,7 +2106,7 @@ static void blk_finish_request(struct request *req, int error) | |||
2098 | BUG_ON(blk_queued_rq(req)); | 2106 | BUG_ON(blk_queued_rq(req)); |
2099 | 2107 | ||
2100 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 2108 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
2101 | laptop_io_completion(); | 2109 | laptop_io_completion(&req->q->backing_dev_info); |
2102 | 2110 | ||
2103 | blk_delete_timer(req); | 2111 | blk_delete_timer(req); |
2104 | 2112 | ||
@@ -2517,4 +2525,3 @@ int __init blk_dev_init(void) | |||
2517 | 2525 | ||
2518 | return 0; | 2526 | return 0; |
2519 | } | 2527 | } |
2520 | |||
diff --git a/block/blk-lib.c b/block/blk-lib.c new file mode 100644 index 000000000000..d0216b9f22d4 --- /dev/null +++ b/block/blk-lib.c | |||
@@ -0,0 +1,233 @@ | |||
1 | /* | ||
2 | * Functions related to generic helpers functions | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/bio.h> | ||
7 | #include <linux/blkdev.h> | ||
8 | #include <linux/scatterlist.h> | ||
9 | |||
10 | #include "blk.h" | ||
11 | |||
12 | static void blkdev_discard_end_io(struct bio *bio, int err) | ||
13 | { | ||
14 | if (err) { | ||
15 | if (err == -EOPNOTSUPP) | ||
16 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
17 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
18 | } | ||
19 | |||
20 | if (bio->bi_private) | ||
21 | complete(bio->bi_private); | ||
22 | __free_page(bio_page(bio)); | ||
23 | |||
24 | bio_put(bio); | ||
25 | } | ||
26 | |||
27 | /** | ||
28 | * blkdev_issue_discard - queue a discard | ||
29 | * @bdev: blockdev to issue discard for | ||
30 | * @sector: start sector | ||
31 | * @nr_sects: number of sectors to discard | ||
32 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
33 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
34 | * | ||
35 | * Description: | ||
36 | * Issue a discard request for the sectors in question. | ||
37 | */ | ||
38 | int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | ||
39 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) | ||
40 | { | ||
41 | DECLARE_COMPLETION_ONSTACK(wait); | ||
42 | struct request_queue *q = bdev_get_queue(bdev); | ||
43 | int type = flags & BLKDEV_IFL_BARRIER ? | ||
44 | DISCARD_BARRIER : DISCARD_NOBARRIER; | ||
45 | struct bio *bio; | ||
46 | struct page *page; | ||
47 | int ret = 0; | ||
48 | |||
49 | if (!q) | ||
50 | return -ENXIO; | ||
51 | |||
52 | if (!blk_queue_discard(q)) | ||
53 | return -EOPNOTSUPP; | ||
54 | |||
55 | while (nr_sects && !ret) { | ||
56 | unsigned int sector_size = q->limits.logical_block_size; | ||
57 | unsigned int max_discard_sectors = | ||
58 | min(q->limits.max_discard_sectors, UINT_MAX >> 9); | ||
59 | |||
60 | bio = bio_alloc(gfp_mask, 1); | ||
61 | if (!bio) | ||
62 | goto out; | ||
63 | bio->bi_sector = sector; | ||
64 | bio->bi_end_io = blkdev_discard_end_io; | ||
65 | bio->bi_bdev = bdev; | ||
66 | if (flags & BLKDEV_IFL_WAIT) | ||
67 | bio->bi_private = &wait; | ||
68 | |||
69 | /* | ||
70 | * Add a zeroed one-sector payload as that's what | ||
71 | * our current implementations need. If we'll ever need | ||
72 | * more the interface will need revisiting. | ||
73 | */ | ||
74 | page = alloc_page(gfp_mask | __GFP_ZERO); | ||
75 | if (!page) | ||
76 | goto out_free_bio; | ||
77 | if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) | ||
78 | goto out_free_page; | ||
79 | |||
80 | /* | ||
81 | * And override the bio size - the way discard works we | ||
82 | * touch many more blocks on disk than the actual payload | ||
83 | * length. | ||
84 | */ | ||
85 | if (nr_sects > max_discard_sectors) { | ||
86 | bio->bi_size = max_discard_sectors << 9; | ||
87 | nr_sects -= max_discard_sectors; | ||
88 | sector += max_discard_sectors; | ||
89 | } else { | ||
90 | bio->bi_size = nr_sects << 9; | ||
91 | nr_sects = 0; | ||
92 | } | ||
93 | |||
94 | bio_get(bio); | ||
95 | submit_bio(type, bio); | ||
96 | |||
97 | if (flags & BLKDEV_IFL_WAIT) | ||
98 | wait_for_completion(&wait); | ||
99 | |||
100 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
101 | ret = -EOPNOTSUPP; | ||
102 | else if (!bio_flagged(bio, BIO_UPTODATE)) | ||
103 | ret = -EIO; | ||
104 | bio_put(bio); | ||
105 | } | ||
106 | return ret; | ||
107 | out_free_page: | ||
108 | __free_page(page); | ||
109 | out_free_bio: | ||
110 | bio_put(bio); | ||
111 | out: | ||
112 | return -ENOMEM; | ||
113 | } | ||
114 | EXPORT_SYMBOL(blkdev_issue_discard); | ||
115 | |||
116 | struct bio_batch | ||
117 | { | ||
118 | atomic_t done; | ||
119 | unsigned long flags; | ||
120 | struct completion *wait; | ||
121 | bio_end_io_t *end_io; | ||
122 | }; | ||
123 | |||
124 | static void bio_batch_end_io(struct bio *bio, int err) | ||
125 | { | ||
126 | struct bio_batch *bb = bio->bi_private; | ||
127 | |||
128 | if (err) { | ||
129 | if (err == -EOPNOTSUPP) | ||
130 | set_bit(BIO_EOPNOTSUPP, &bb->flags); | ||
131 | else | ||
132 | clear_bit(BIO_UPTODATE, &bb->flags); | ||
133 | } | ||
134 | if (bb) { | ||
135 | if (bb->end_io) | ||
136 | bb->end_io(bio, err); | ||
137 | atomic_inc(&bb->done); | ||
138 | complete(bb->wait); | ||
139 | } | ||
140 | bio_put(bio); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * blkdev_issue_zeroout generate number of zero filed write bios | ||
145 | * @bdev: blockdev to issue | ||
146 | * @sector: start sector | ||
147 | * @nr_sects: number of sectors to write | ||
148 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
149 | * @flags: BLKDEV_IFL_* flags to control behaviour | ||
150 | * | ||
151 | * Description: | ||
152 | * Generate and issue number of bios with zerofiled pages. | ||
153 | * Send barrier at the beginning and at the end if requested. This guarantie | ||
154 | * correct request ordering. Empty barrier allow us to avoid post queue flush. | ||
155 | */ | ||
156 | |||
157 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | ||
158 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) | ||
159 | { | ||
160 | int ret = 0; | ||
161 | struct bio *bio; | ||
162 | struct bio_batch bb; | ||
163 | unsigned int sz, issued = 0; | ||
164 | DECLARE_COMPLETION_ONSTACK(wait); | ||
165 | |||
166 | atomic_set(&bb.done, 0); | ||
167 | bb.flags = 1 << BIO_UPTODATE; | ||
168 | bb.wait = &wait; | ||
169 | bb.end_io = NULL; | ||
170 | |||
171 | if (flags & BLKDEV_IFL_BARRIER) { | ||
172 | /* issue async barrier before the data */ | ||
173 | ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); | ||
174 | if (ret) | ||
175 | return ret; | ||
176 | } | ||
177 | submit: | ||
178 | while (nr_sects != 0) { | ||
179 | bio = bio_alloc(gfp_mask, | ||
180 | min(nr_sects, (sector_t)BIO_MAX_PAGES)); | ||
181 | if (!bio) | ||
182 | break; | ||
183 | |||
184 | bio->bi_sector = sector; | ||
185 | bio->bi_bdev = bdev; | ||
186 | bio->bi_end_io = bio_batch_end_io; | ||
187 | if (flags & BLKDEV_IFL_WAIT) | ||
188 | bio->bi_private = &bb; | ||
189 | |||
190 | while (nr_sects != 0) { | ||
191 | sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); | ||
192 | if (sz == 0) | ||
193 | /* bio has maximum size possible */ | ||
194 | break; | ||
195 | ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0); | ||
196 | nr_sects -= ret >> 9; | ||
197 | sector += ret >> 9; | ||
198 | if (ret < (sz << 9)) | ||
199 | break; | ||
200 | } | ||
201 | issued++; | ||
202 | submit_bio(WRITE, bio); | ||
203 | } | ||
204 | /* | ||
205 | * When all data bios are in flight. Send final barrier if requeted. | ||
206 | */ | ||
207 | if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) | ||
208 | ret = blkdev_issue_flush(bdev, gfp_mask, NULL, | ||
209 | flags & BLKDEV_IFL_WAIT); | ||
210 | |||
211 | |||
212 | if (flags & BLKDEV_IFL_WAIT) | ||
213 | /* Wait for bios in-flight */ | ||
214 | while ( issued != atomic_read(&bb.done)) | ||
215 | wait_for_completion(&wait); | ||
216 | |||
217 | if (!test_bit(BIO_UPTODATE, &bb.flags)) | ||
218 | /* One of bios in the batch was completed with error.*/ | ||
219 | ret = -EIO; | ||
220 | |||
221 | if (ret) | ||
222 | goto out; | ||
223 | |||
224 | if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) { | ||
225 | ret = -EOPNOTSUPP; | ||
226 | goto out; | ||
227 | } | ||
228 | if (nr_sects != 0) | ||
229 | goto submit; | ||
230 | out: | ||
231 | return ret; | ||
232 | } | ||
233 | EXPORT_SYMBOL(blkdev_issue_zeroout); | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 838834be115b..0f3eb70f9ce1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -55,6 +55,7 @@ static const int cfq_hist_divisor = 4; | |||
55 | #define RQ_CIC(rq) \ | 55 | #define RQ_CIC(rq) \ |
56 | ((struct cfq_io_context *) (rq)->elevator_private) | 56 | ((struct cfq_io_context *) (rq)->elevator_private) |
57 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) | 57 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) |
58 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) | ||
58 | 59 | ||
59 | static struct kmem_cache *cfq_pool; | 60 | static struct kmem_cache *cfq_pool; |
60 | static struct kmem_cache *cfq_ioc_pool; | 61 | static struct kmem_cache *cfq_ioc_pool; |
@@ -143,8 +144,6 @@ struct cfq_queue { | |||
143 | struct cfq_queue *new_cfqq; | 144 | struct cfq_queue *new_cfqq; |
144 | struct cfq_group *cfqg; | 145 | struct cfq_group *cfqg; |
145 | struct cfq_group *orig_cfqg; | 146 | struct cfq_group *orig_cfqg; |
146 | /* Sectors dispatched in current dispatch round */ | ||
147 | unsigned long nr_sectors; | ||
148 | }; | 147 | }; |
149 | 148 | ||
150 | /* | 149 | /* |
@@ -346,7 +345,7 @@ CFQ_CFQQ_FNS(deep); | |||
346 | CFQ_CFQQ_FNS(wait_busy); | 345 | CFQ_CFQQ_FNS(wait_busy); |
347 | #undef CFQ_CFQQ_FNS | 346 | #undef CFQ_CFQQ_FNS |
348 | 347 | ||
349 | #ifdef CONFIG_DEBUG_CFQ_IOSCHED | 348 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
350 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 349 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
351 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ | 350 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ |
352 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ | 351 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ |
@@ -858,7 +857,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
858 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | 857 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
859 | cfq_rb_erase(&cfqg->rb_node, st); | 858 | cfq_rb_erase(&cfqg->rb_node, st); |
860 | cfqg->saved_workload_slice = 0; | 859 | cfqg->saved_workload_slice = 0; |
861 | blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1); | 860 | blkiocg_update_dequeue_stats(&cfqg->blkg, 1); |
862 | } | 861 | } |
863 | 862 | ||
864 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | 863 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) |
@@ -884,8 +883,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | |||
884 | slice_used = cfqq->allocated_slice; | 883 | slice_used = cfqq->allocated_slice; |
885 | } | 884 | } |
886 | 885 | ||
887 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used, | 886 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used); |
888 | cfqq->nr_sectors); | ||
889 | return slice_used; | 887 | return slice_used; |
890 | } | 888 | } |
891 | 889 | ||
@@ -919,8 +917,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
919 | 917 | ||
920 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, | 918 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, |
921 | st->min_vdisktime); | 919 | st->min_vdisktime); |
922 | blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl, | 920 | blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); |
923 | cfqq->nr_sectors); | 921 | blkiocg_set_start_empty_time(&cfqg->blkg); |
924 | } | 922 | } |
925 | 923 | ||
926 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 924 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
@@ -961,7 +959,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
961 | if (!cfqg) | 959 | if (!cfqg) |
962 | goto done; | 960 | goto done; |
963 | 961 | ||
964 | cfqg->weight = blkcg->weight; | ||
965 | for_each_cfqg_st(cfqg, i, j, st) | 962 | for_each_cfqg_st(cfqg, i, j, st) |
966 | *st = CFQ_RB_ROOT; | 963 | *st = CFQ_RB_ROOT; |
967 | RB_CLEAR_NODE(&cfqg->rb_node); | 964 | RB_CLEAR_NODE(&cfqg->rb_node); |
@@ -978,6 +975,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
978 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 975 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
979 | blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, | 976 | blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, |
980 | MKDEV(major, minor)); | 977 | MKDEV(major, minor)); |
978 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
981 | 979 | ||
982 | /* Add group on cfqd list */ | 980 | /* Add group on cfqd list */ |
983 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | 981 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); |
@@ -1004,6 +1002,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
1004 | return cfqg; | 1002 | return cfqg; |
1005 | } | 1003 | } |
1006 | 1004 | ||
1005 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
1006 | { | ||
1007 | atomic_inc(&cfqg->ref); | ||
1008 | return cfqg; | ||
1009 | } | ||
1010 | |||
1007 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | 1011 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) |
1008 | { | 1012 | { |
1009 | /* Currently, all async queues are mapped to root group */ | 1013 | /* Currently, all async queues are mapped to root group */ |
@@ -1087,6 +1091,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
1087 | { | 1091 | { |
1088 | return &cfqd->root_group; | 1092 | return &cfqd->root_group; |
1089 | } | 1093 | } |
1094 | |||
1095 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
1096 | { | ||
1097 | return cfqg; | ||
1098 | } | ||
1099 | |||
1090 | static inline void | 1100 | static inline void |
1091 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { | 1101 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { |
1092 | cfqq->cfqg = cfqg; | 1102 | cfqq->cfqg = cfqg; |
@@ -1389,7 +1399,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) | |||
1389 | { | 1399 | { |
1390 | elv_rb_del(&cfqq->sort_list, rq); | 1400 | elv_rb_del(&cfqq->sort_list, rq); |
1391 | cfqq->queued[rq_is_sync(rq)]--; | 1401 | cfqq->queued[rq_is_sync(rq)]--; |
1402 | blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), | ||
1403 | rq_is_sync(rq)); | ||
1392 | cfq_add_rq_rb(rq); | 1404 | cfq_add_rq_rb(rq); |
1405 | blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, | ||
1406 | &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq), | ||
1407 | rq_is_sync(rq)); | ||
1393 | } | 1408 | } |
1394 | 1409 | ||
1395 | static struct request * | 1410 | static struct request * |
@@ -1445,6 +1460,8 @@ static void cfq_remove_request(struct request *rq) | |||
1445 | cfq_del_rq_rb(rq); | 1460 | cfq_del_rq_rb(rq); |
1446 | 1461 | ||
1447 | cfqq->cfqd->rq_queued--; | 1462 | cfqq->cfqd->rq_queued--; |
1463 | blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), | ||
1464 | rq_is_sync(rq)); | ||
1448 | if (rq_is_meta(rq)) { | 1465 | if (rq_is_meta(rq)) { |
1449 | WARN_ON(!cfqq->meta_pending); | 1466 | WARN_ON(!cfqq->meta_pending); |
1450 | cfqq->meta_pending--; | 1467 | cfqq->meta_pending--; |
@@ -1476,6 +1493,13 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, | |||
1476 | } | 1493 | } |
1477 | } | 1494 | } |
1478 | 1495 | ||
1496 | static void cfq_bio_merged(struct request_queue *q, struct request *req, | ||
1497 | struct bio *bio) | ||
1498 | { | ||
1499 | blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio), | ||
1500 | cfq_bio_sync(bio)); | ||
1501 | } | ||
1502 | |||
1479 | static void | 1503 | static void |
1480 | cfq_merged_requests(struct request_queue *q, struct request *rq, | 1504 | cfq_merged_requests(struct request_queue *q, struct request *rq, |
1481 | struct request *next) | 1505 | struct request *next) |
@@ -1493,6 +1517,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, | |||
1493 | if (cfqq->next_rq == next) | 1517 | if (cfqq->next_rq == next) |
1494 | cfqq->next_rq = rq; | 1518 | cfqq->next_rq = rq; |
1495 | cfq_remove_request(next); | 1519 | cfq_remove_request(next); |
1520 | blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next), | ||
1521 | rq_is_sync(next)); | ||
1496 | } | 1522 | } |
1497 | 1523 | ||
1498 | static int cfq_allow_merge(struct request_queue *q, struct request *rq, | 1524 | static int cfq_allow_merge(struct request_queue *q, struct request *rq, |
@@ -1520,18 +1546,24 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
1520 | return cfqq == RQ_CFQQ(rq); | 1546 | return cfqq == RQ_CFQQ(rq); |
1521 | } | 1547 | } |
1522 | 1548 | ||
1549 | static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
1550 | { | ||
1551 | del_timer(&cfqd->idle_slice_timer); | ||
1552 | blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); | ||
1553 | } | ||
1554 | |||
1523 | static void __cfq_set_active_queue(struct cfq_data *cfqd, | 1555 | static void __cfq_set_active_queue(struct cfq_data *cfqd, |
1524 | struct cfq_queue *cfqq) | 1556 | struct cfq_queue *cfqq) |
1525 | { | 1557 | { |
1526 | if (cfqq) { | 1558 | if (cfqq) { |
1527 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", | 1559 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", |
1528 | cfqd->serving_prio, cfqd->serving_type); | 1560 | cfqd->serving_prio, cfqd->serving_type); |
1561 | blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg); | ||
1529 | cfqq->slice_start = 0; | 1562 | cfqq->slice_start = 0; |
1530 | cfqq->dispatch_start = jiffies; | 1563 | cfqq->dispatch_start = jiffies; |
1531 | cfqq->allocated_slice = 0; | 1564 | cfqq->allocated_slice = 0; |
1532 | cfqq->slice_end = 0; | 1565 | cfqq->slice_end = 0; |
1533 | cfqq->slice_dispatch = 0; | 1566 | cfqq->slice_dispatch = 0; |
1534 | cfqq->nr_sectors = 0; | ||
1535 | 1567 | ||
1536 | cfq_clear_cfqq_wait_request(cfqq); | 1568 | cfq_clear_cfqq_wait_request(cfqq); |
1537 | cfq_clear_cfqq_must_dispatch(cfqq); | 1569 | cfq_clear_cfqq_must_dispatch(cfqq); |
@@ -1539,7 +1571,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, | |||
1539 | cfq_clear_cfqq_fifo_expire(cfqq); | 1571 | cfq_clear_cfqq_fifo_expire(cfqq); |
1540 | cfq_mark_cfqq_slice_new(cfqq); | 1572 | cfq_mark_cfqq_slice_new(cfqq); |
1541 | 1573 | ||
1542 | del_timer(&cfqd->idle_slice_timer); | 1574 | cfq_del_timer(cfqd, cfqq); |
1543 | } | 1575 | } |
1544 | 1576 | ||
1545 | cfqd->active_queue = cfqq; | 1577 | cfqd->active_queue = cfqq; |
@@ -1555,7 +1587,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1555 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); | 1587 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); |
1556 | 1588 | ||
1557 | if (cfq_cfqq_wait_request(cfqq)) | 1589 | if (cfq_cfqq_wait_request(cfqq)) |
1558 | del_timer(&cfqd->idle_slice_timer); | 1590 | cfq_del_timer(cfqd, cfqq); |
1559 | 1591 | ||
1560 | cfq_clear_cfqq_wait_request(cfqq); | 1592 | cfq_clear_cfqq_wait_request(cfqq); |
1561 | cfq_clear_cfqq_wait_busy(cfqq); | 1593 | cfq_clear_cfqq_wait_busy(cfqq); |
@@ -1857,6 +1889,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
1857 | sl = cfqd->cfq_slice_idle; | 1889 | sl = cfqd->cfq_slice_idle; |
1858 | 1890 | ||
1859 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 1891 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); |
1892 | blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg); | ||
1860 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); | 1893 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); |
1861 | } | 1894 | } |
1862 | 1895 | ||
@@ -1876,7 +1909,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) | |||
1876 | elv_dispatch_sort(q, rq); | 1909 | elv_dispatch_sort(q, rq); |
1877 | 1910 | ||
1878 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; | 1911 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; |
1879 | cfqq->nr_sectors += blk_rq_sectors(rq); | 1912 | blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq), |
1913 | rq_data_dir(rq), rq_is_sync(rq)); | ||
1880 | } | 1914 | } |
1881 | 1915 | ||
1882 | /* | 1916 | /* |
@@ -3185,11 +3219,14 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3185 | if (cfq_cfqq_wait_request(cfqq)) { | 3219 | if (cfq_cfqq_wait_request(cfqq)) { |
3186 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || | 3220 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || |
3187 | cfqd->busy_queues > 1) { | 3221 | cfqd->busy_queues > 1) { |
3188 | del_timer(&cfqd->idle_slice_timer); | 3222 | cfq_del_timer(cfqd, cfqq); |
3189 | cfq_clear_cfqq_wait_request(cfqq); | 3223 | cfq_clear_cfqq_wait_request(cfqq); |
3190 | __blk_run_queue(cfqd->queue); | 3224 | __blk_run_queue(cfqd->queue); |
3191 | } else | 3225 | } else { |
3226 | blkiocg_update_idle_time_stats( | ||
3227 | &cfqq->cfqg->blkg); | ||
3192 | cfq_mark_cfqq_must_dispatch(cfqq); | 3228 | cfq_mark_cfqq_must_dispatch(cfqq); |
3229 | } | ||
3193 | } | 3230 | } |
3194 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { | 3231 | } else if (cfq_should_preempt(cfqd, cfqq, rq)) { |
3195 | /* | 3232 | /* |
@@ -3214,7 +3251,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
3214 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); | 3251 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); |
3215 | list_add_tail(&rq->queuelist, &cfqq->fifo); | 3252 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
3216 | cfq_add_rq_rb(rq); | 3253 | cfq_add_rq_rb(rq); |
3217 | 3254 | blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, | |
3255 | &cfqd->serving_group->blkg, rq_data_dir(rq), | ||
3256 | rq_is_sync(rq)); | ||
3218 | cfq_rq_enqueued(cfqd, cfqq, rq); | 3257 | cfq_rq_enqueued(cfqd, cfqq, rq); |
3219 | } | 3258 | } |
3220 | 3259 | ||
@@ -3300,6 +3339,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
3300 | WARN_ON(!cfqq->dispatched); | 3339 | WARN_ON(!cfqq->dispatched); |
3301 | cfqd->rq_in_driver--; | 3340 | cfqd->rq_in_driver--; |
3302 | cfqq->dispatched--; | 3341 | cfqq->dispatched--; |
3342 | blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq), | ||
3343 | rq_io_start_time_ns(rq), rq_data_dir(rq), | ||
3344 | rq_is_sync(rq)); | ||
3303 | 3345 | ||
3304 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; | 3346 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; |
3305 | 3347 | ||
@@ -3440,6 +3482,10 @@ static void cfq_put_request(struct request *rq) | |||
3440 | rq->elevator_private = NULL; | 3482 | rq->elevator_private = NULL; |
3441 | rq->elevator_private2 = NULL; | 3483 | rq->elevator_private2 = NULL; |
3442 | 3484 | ||
3485 | /* Put down rq reference on cfqg */ | ||
3486 | cfq_put_cfqg(RQ_CFQG(rq)); | ||
3487 | rq->elevator_private3 = NULL; | ||
3488 | |||
3443 | cfq_put_queue(cfqq); | 3489 | cfq_put_queue(cfqq); |
3444 | } | 3490 | } |
3445 | } | 3491 | } |
@@ -3528,6 +3574,7 @@ new_queue: | |||
3528 | 3574 | ||
3529 | rq->elevator_private = cic; | 3575 | rq->elevator_private = cic; |
3530 | rq->elevator_private2 = cfqq; | 3576 | rq->elevator_private2 = cfqq; |
3577 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3531 | return 0; | 3578 | return 0; |
3532 | 3579 | ||
3533 | queue_fail: | 3580 | queue_fail: |
@@ -3870,6 +3917,7 @@ static struct elevator_type iosched_cfq = { | |||
3870 | .elevator_merged_fn = cfq_merged_request, | 3917 | .elevator_merged_fn = cfq_merged_request, |
3871 | .elevator_merge_req_fn = cfq_merged_requests, | 3918 | .elevator_merge_req_fn = cfq_merged_requests, |
3872 | .elevator_allow_merge_fn = cfq_allow_merge, | 3919 | .elevator_allow_merge_fn = cfq_allow_merge, |
3920 | .elevator_bio_merged_fn = cfq_bio_merged, | ||
3873 | .elevator_dispatch_fn = cfq_dispatch_requests, | 3921 | .elevator_dispatch_fn = cfq_dispatch_requests, |
3874 | .elevator_add_req_fn = cfq_insert_request, | 3922 | .elevator_add_req_fn = cfq_insert_request, |
3875 | .elevator_activate_req_fn = cfq_activate_request, | 3923 | .elevator_activate_req_fn = cfq_activate_request, |
diff --git a/block/elevator.c b/block/elevator.c index 76e3702d5381..5e734592bb40 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -539,6 +539,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, | |||
539 | q->last_merge = rq; | 539 | q->last_merge = rq; |
540 | } | 540 | } |
541 | 541 | ||
542 | void elv_bio_merged(struct request_queue *q, struct request *rq, | ||
543 | struct bio *bio) | ||
544 | { | ||
545 | struct elevator_queue *e = q->elevator; | ||
546 | |||
547 | if (e->ops->elevator_bio_merged_fn) | ||
548 | e->ops->elevator_bio_merged_fn(q, rq, bio); | ||
549 | } | ||
550 | |||
542 | void elv_requeue_request(struct request_queue *q, struct request *rq) | 551 | void elv_requeue_request(struct request_queue *q, struct request *rq) |
543 | { | 552 | { |
544 | /* | 553 | /* |
diff --git a/block/genhd.c b/block/genhd.c index d13ba76a169c..154b5f80b3ab 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -596,6 +596,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) | |||
596 | 596 | ||
597 | return disk; | 597 | return disk; |
598 | } | 598 | } |
599 | EXPORT_SYMBOL(get_gendisk); | ||
599 | 600 | ||
600 | /** | 601 | /** |
601 | * bdget_disk - do bdget() by gendisk and partition number | 602 | * bdget_disk - do bdget() by gendisk and partition number |
diff --git a/block/ioctl.c b/block/ioctl.c index 8905d2a2a717..e8eb679f2f9b 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -126,7 +126,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | |||
126 | if (start + len > (bdev->bd_inode->i_size >> 9)) | 126 | if (start + len > (bdev->bd_inode->i_size >> 9)) |
127 | return -EINVAL; | 127 | return -EINVAL; |
128 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, | 128 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, |
129 | DISCARD_FL_WAIT); | 129 | BLKDEV_IFL_WAIT); |
130 | } | 130 | } |
131 | 131 | ||
132 | static int put_ushort(unsigned long arg, unsigned short val) | 132 | static int put_ushort(unsigned long arg, unsigned short val) |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e5e86a781820..d6f1ae342b1d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -2251,7 +2251,8 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) | |||
2251 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) | 2251 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) |
2252 | return; | 2252 | return; |
2253 | 2253 | ||
2254 | r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); | 2254 | r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL, |
2255 | BLKDEV_IFL_WAIT); | ||
2255 | if (r) { | 2256 | if (r) { |
2256 | set_bit(MD_NO_BARRIER, &mdev->flags); | 2257 | set_bit(MD_NO_BARRIER, &mdev->flags); |
2257 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); | 2258 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3f096e7959b4..c786023001d2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -946,7 +946,8 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d | |||
946 | int rv; | 946 | int rv; |
947 | 947 | ||
948 | if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { | 948 | if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { |
949 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL); | 949 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, |
950 | NULL, BLKDEV_IFL_WAIT); | ||
950 | if (rv) { | 951 | if (rv) { |
951 | dev_err(DEV, "local disk flush failed with status %d\n", rv); | 952 | dev_err(DEV, "local disk flush failed with status %d\n", rv); |
952 | /* would rather check on EOPNOTSUPP, but that is not reliable. | 953 | /* would rather check on EOPNOTSUPP, but that is not reliable. |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 6dcee88c2e5d..55dcb7884f4d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -417,7 +417,7 @@ int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) | |||
417 | */ | 417 | */ |
418 | mutex_unlock(&bd_inode->i_mutex); | 418 | mutex_unlock(&bd_inode->i_mutex); |
419 | 419 | ||
420 | error = blkdev_issue_flush(bdev, NULL); | 420 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); |
421 | if (error == -EOPNOTSUPP) | 421 | if (error == -EOPNOTSUPP) |
422 | error = 0; | 422 | error = 0; |
423 | 423 | ||
@@ -668,41 +668,209 @@ void bd_forget(struct inode *inode) | |||
668 | iput(bdev->bd_inode); | 668 | iput(bdev->bd_inode); |
669 | } | 669 | } |
670 | 670 | ||
671 | int bd_claim(struct block_device *bdev, void *holder) | 671 | /** |
672 | * bd_may_claim - test whether a block device can be claimed | ||
673 | * @bdev: block device of interest | ||
674 | * @whole: whole block device containing @bdev, may equal @bdev | ||
675 | * @holder: holder trying to claim @bdev | ||
676 | * | ||
677 | * Test whther @bdev can be claimed by @holder. | ||
678 | * | ||
679 | * CONTEXT: | ||
680 | * spin_lock(&bdev_lock). | ||
681 | * | ||
682 | * RETURNS: | ||
683 | * %true if @bdev can be claimed, %false otherwise. | ||
684 | */ | ||
685 | static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, | ||
686 | void *holder) | ||
672 | { | 687 | { |
673 | int res; | ||
674 | spin_lock(&bdev_lock); | ||
675 | |||
676 | /* first decide result */ | ||
677 | if (bdev->bd_holder == holder) | 688 | if (bdev->bd_holder == holder) |
678 | res = 0; /* already a holder */ | 689 | return true; /* already a holder */ |
679 | else if (bdev->bd_holder != NULL) | 690 | else if (bdev->bd_holder != NULL) |
680 | res = -EBUSY; /* held by someone else */ | 691 | return false; /* held by someone else */ |
681 | else if (bdev->bd_contains == bdev) | 692 | else if (bdev->bd_contains == bdev) |
682 | res = 0; /* is a whole device which isn't held */ | 693 | return true; /* is a whole device which isn't held */ |
683 | 694 | ||
684 | else if (bdev->bd_contains->bd_holder == bd_claim) | 695 | else if (whole->bd_holder == bd_claim) |
685 | res = 0; /* is a partition of a device that is being partitioned */ | 696 | return true; /* is a partition of a device that is being partitioned */ |
686 | else if (bdev->bd_contains->bd_holder != NULL) | 697 | else if (whole->bd_holder != NULL) |
687 | res = -EBUSY; /* is a partition of a held device */ | 698 | return false; /* is a partition of a held device */ |
688 | else | 699 | else |
689 | res = 0; /* is a partition of an un-held device */ | 700 | return true; /* is a partition of an un-held device */ |
701 | } | ||
702 | |||
703 | /** | ||
704 | * bd_prepare_to_claim - prepare to claim a block device | ||
705 | * @bdev: block device of interest | ||
706 | * @whole: the whole device containing @bdev, may equal @bdev | ||
707 | * @holder: holder trying to claim @bdev | ||
708 | * | ||
709 | * Prepare to claim @bdev. This function fails if @bdev is already | ||
710 | * claimed by another holder and waits if another claiming is in | ||
711 | * progress. This function doesn't actually claim. On successful | ||
712 | * return, the caller has ownership of bd_claiming and bd_holder[s]. | ||
713 | * | ||
714 | * CONTEXT: | ||
715 | * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab | ||
716 | * it multiple times. | ||
717 | * | ||
718 | * RETURNS: | ||
719 | * 0 if @bdev can be claimed, -EBUSY otherwise. | ||
720 | */ | ||
721 | static int bd_prepare_to_claim(struct block_device *bdev, | ||
722 | struct block_device *whole, void *holder) | ||
723 | { | ||
724 | retry: | ||
725 | /* if someone else claimed, fail */ | ||
726 | if (!bd_may_claim(bdev, whole, holder)) | ||
727 | return -EBUSY; | ||
728 | |||
729 | /* if someone else is claiming, wait for it to finish */ | ||
730 | if (whole->bd_claiming && whole->bd_claiming != holder) { | ||
731 | wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); | ||
732 | DEFINE_WAIT(wait); | ||
733 | |||
734 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | ||
735 | spin_unlock(&bdev_lock); | ||
736 | schedule(); | ||
737 | finish_wait(wq, &wait); | ||
738 | spin_lock(&bdev_lock); | ||
739 | goto retry; | ||
740 | } | ||
741 | |||
742 | /* yay, all mine */ | ||
743 | return 0; | ||
744 | } | ||
745 | |||
746 | /** | ||
747 | * bd_start_claiming - start claiming a block device | ||
748 | * @bdev: block device of interest | ||
749 | * @holder: holder trying to claim @bdev | ||
750 | * | ||
751 | * @bdev is about to be opened exclusively. Check @bdev can be opened | ||
752 | * exclusively and mark that an exclusive open is in progress. Each | ||
753 | * successful call to this function must be matched with a call to | ||
754 | * either bd_claim() or bd_abort_claiming(). If this function | ||
755 | * succeeds, the matching bd_claim() is guaranteed to succeed. | ||
756 | * | ||
757 | * CONTEXT: | ||
758 | * Might sleep. | ||
759 | * | ||
760 | * RETURNS: | ||
761 | * Pointer to the block device containing @bdev on success, ERR_PTR() | ||
762 | * value on failure. | ||
763 | */ | ||
764 | static struct block_device *bd_start_claiming(struct block_device *bdev, | ||
765 | void *holder) | ||
766 | { | ||
767 | struct gendisk *disk; | ||
768 | struct block_device *whole; | ||
769 | int partno, err; | ||
770 | |||
771 | might_sleep(); | ||
772 | |||
773 | /* | ||
774 | * @bdev might not have been initialized properly yet, look up | ||
775 | * and grab the outer block device the hard way. | ||
776 | */ | ||
777 | disk = get_gendisk(bdev->bd_dev, &partno); | ||
778 | if (!disk) | ||
779 | return ERR_PTR(-ENXIO); | ||
780 | |||
781 | whole = bdget_disk(disk, 0); | ||
782 | put_disk(disk); | ||
783 | if (!whole) | ||
784 | return ERR_PTR(-ENOMEM); | ||
785 | |||
786 | /* prepare to claim, if successful, mark claiming in progress */ | ||
787 | spin_lock(&bdev_lock); | ||
788 | |||
789 | err = bd_prepare_to_claim(bdev, whole, holder); | ||
790 | if (err == 0) { | ||
791 | whole->bd_claiming = holder; | ||
792 | spin_unlock(&bdev_lock); | ||
793 | return whole; | ||
794 | } else { | ||
795 | spin_unlock(&bdev_lock); | ||
796 | bdput(whole); | ||
797 | return ERR_PTR(err); | ||
798 | } | ||
799 | } | ||
690 | 800 | ||
691 | /* now impose change */ | 801 | /* releases bdev_lock */ |
692 | if (res==0) { | 802 | static void __bd_abort_claiming(struct block_device *whole, void *holder) |
803 | { | ||
804 | BUG_ON(whole->bd_claiming != holder); | ||
805 | whole->bd_claiming = NULL; | ||
806 | wake_up_bit(&whole->bd_claiming, 0); | ||
807 | |||
808 | spin_unlock(&bdev_lock); | ||
809 | bdput(whole); | ||
810 | } | ||
811 | |||
812 | /** | ||
813 | * bd_abort_claiming - abort claiming a block device | ||
814 | * @whole: whole block device returned by bd_start_claiming() | ||
815 | * @holder: holder trying to claim @bdev | ||
816 | * | ||
817 | * Abort a claiming block started by bd_start_claiming(). Note that | ||
818 | * @whole is not the block device to be claimed but the whole device | ||
819 | * returned by bd_start_claiming(). | ||
820 | * | ||
821 | * CONTEXT: | ||
822 | * Grabs and releases bdev_lock. | ||
823 | */ | ||
824 | static void bd_abort_claiming(struct block_device *whole, void *holder) | ||
825 | { | ||
826 | spin_lock(&bdev_lock); | ||
827 | __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | ||
828 | } | ||
829 | |||
830 | /** | ||
831 | * bd_claim - claim a block device | ||
832 | * @bdev: block device to claim | ||
833 | * @holder: holder trying to claim @bdev | ||
834 | * | ||
835 | * Try to claim @bdev which must have been opened successfully. This | ||
836 | * function may be called with or without preceding | ||
837 | * blk_start_claiming(). In the former case, this function is always | ||
838 | * successful and terminates the claiming block. | ||
839 | * | ||
840 | * CONTEXT: | ||
841 | * Might sleep. | ||
842 | * | ||
843 | * RETURNS: | ||
844 | * 0 if successful, -EBUSY if @bdev is already claimed. | ||
845 | */ | ||
846 | int bd_claim(struct block_device *bdev, void *holder) | ||
847 | { | ||
848 | struct block_device *whole = bdev->bd_contains; | ||
849 | int res; | ||
850 | |||
851 | might_sleep(); | ||
852 | |||
853 | spin_lock(&bdev_lock); | ||
854 | |||
855 | res = bd_prepare_to_claim(bdev, whole, holder); | ||
856 | if (res == 0) { | ||
693 | /* note that for a whole device bd_holders | 857 | /* note that for a whole device bd_holders |
694 | * will be incremented twice, and bd_holder will | 858 | * will be incremented twice, and bd_holder will |
695 | * be set to bd_claim before being set to holder | 859 | * be set to bd_claim before being set to holder |
696 | */ | 860 | */ |
697 | bdev->bd_contains->bd_holders ++; | 861 | whole->bd_holders++; |
698 | bdev->bd_contains->bd_holder = bd_claim; | 862 | whole->bd_holder = bd_claim; |
699 | bdev->bd_holders++; | 863 | bdev->bd_holders++; |
700 | bdev->bd_holder = holder; | 864 | bdev->bd_holder = holder; |
701 | } | 865 | } |
702 | spin_unlock(&bdev_lock); | 866 | |
867 | if (whole->bd_claiming) | ||
868 | __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | ||
869 | else | ||
870 | spin_unlock(&bdev_lock); | ||
871 | |||
703 | return res; | 872 | return res; |
704 | } | 873 | } |
705 | |||
706 | EXPORT_SYMBOL(bd_claim); | 874 | EXPORT_SYMBOL(bd_claim); |
707 | 875 | ||
708 | void bd_release(struct block_device *bdev) | 876 | void bd_release(struct block_device *bdev) |
@@ -1316,6 +1484,7 @@ EXPORT_SYMBOL(blkdev_get); | |||
1316 | 1484 | ||
1317 | static int blkdev_open(struct inode * inode, struct file * filp) | 1485 | static int blkdev_open(struct inode * inode, struct file * filp) |
1318 | { | 1486 | { |
1487 | struct block_device *whole = NULL; | ||
1319 | struct block_device *bdev; | 1488 | struct block_device *bdev; |
1320 | int res; | 1489 | int res; |
1321 | 1490 | ||
@@ -1338,22 +1507,25 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
1338 | if (bdev == NULL) | 1507 | if (bdev == NULL) |
1339 | return -ENOMEM; | 1508 | return -ENOMEM; |
1340 | 1509 | ||
1510 | if (filp->f_mode & FMODE_EXCL) { | ||
1511 | whole = bd_start_claiming(bdev, filp); | ||
1512 | if (IS_ERR(whole)) { | ||
1513 | bdput(bdev); | ||
1514 | return PTR_ERR(whole); | ||
1515 | } | ||
1516 | } | ||
1517 | |||
1341 | filp->f_mapping = bdev->bd_inode->i_mapping; | 1518 | filp->f_mapping = bdev->bd_inode->i_mapping; |
1342 | 1519 | ||
1343 | res = blkdev_get(bdev, filp->f_mode); | 1520 | res = blkdev_get(bdev, filp->f_mode); |
1344 | if (res) | ||
1345 | return res; | ||
1346 | 1521 | ||
1347 | if (filp->f_mode & FMODE_EXCL) { | 1522 | if (whole) { |
1348 | res = bd_claim(bdev, filp); | 1523 | if (res == 0) |
1349 | if (res) | 1524 | BUG_ON(bd_claim(bdev, filp) != 0); |
1350 | goto out_blkdev_put; | 1525 | else |
1526 | bd_abort_claiming(whole, filp); | ||
1351 | } | 1527 | } |
1352 | 1528 | ||
1353 | return 0; | ||
1354 | |||
1355 | out_blkdev_put: | ||
1356 | blkdev_put(bdev, filp->f_mode); | ||
1357 | return res; | 1529 | return res; |
1358 | } | 1530 | } |
1359 | 1531 | ||
@@ -1564,27 +1736,34 @@ EXPORT_SYMBOL(lookup_bdev); | |||
1564 | */ | 1736 | */ |
1565 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) | 1737 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) |
1566 | { | 1738 | { |
1567 | struct block_device *bdev; | 1739 | struct block_device *bdev, *whole; |
1568 | int error = 0; | 1740 | int error; |
1569 | 1741 | ||
1570 | bdev = lookup_bdev(path); | 1742 | bdev = lookup_bdev(path); |
1571 | if (IS_ERR(bdev)) | 1743 | if (IS_ERR(bdev)) |
1572 | return bdev; | 1744 | return bdev; |
1573 | 1745 | ||
1746 | whole = bd_start_claiming(bdev, holder); | ||
1747 | if (IS_ERR(whole)) { | ||
1748 | bdput(bdev); | ||
1749 | return whole; | ||
1750 | } | ||
1751 | |||
1574 | error = blkdev_get(bdev, mode); | 1752 | error = blkdev_get(bdev, mode); |
1575 | if (error) | 1753 | if (error) |
1576 | return ERR_PTR(error); | 1754 | goto out_abort_claiming; |
1755 | |||
1577 | error = -EACCES; | 1756 | error = -EACCES; |
1578 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) | 1757 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) |
1579 | goto blkdev_put; | 1758 | goto out_blkdev_put; |
1580 | error = bd_claim(bdev, holder); | ||
1581 | if (error) | ||
1582 | goto blkdev_put; | ||
1583 | 1759 | ||
1760 | BUG_ON(bd_claim(bdev, holder) != 0); | ||
1584 | return bdev; | 1761 | return bdev; |
1585 | 1762 | ||
1586 | blkdev_put: | 1763 | out_blkdev_put: |
1587 | blkdev_put(bdev, mode); | 1764 | blkdev_put(bdev, mode); |
1765 | out_abort_claiming: | ||
1766 | bd_abort_claiming(whole, holder); | ||
1588 | return ERR_PTR(error); | 1767 | return ERR_PTR(error); |
1589 | } | 1768 | } |
1590 | 1769 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b34d32fdaaec..c6a4f459ad76 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1589,7 +1589,7 @@ static void btrfs_issue_discard(struct block_device *bdev, | |||
1589 | u64 start, u64 len) | 1589 | u64 start, u64 len) |
1590 | { | 1590 | { |
1591 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1591 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1592 | DISCARD_FL_BARRIER); | 1592 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
1593 | } | 1593 | } |
1594 | 1594 | ||
1595 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1595 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 8209f266e9ad..9492f6003ef9 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -91,7 +91,8 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
91 | * storage | 91 | * storage |
92 | */ | 92 | */ |
93 | if (test_opt(inode->i_sb, BARRIER)) | 93 | if (test_opt(inode->i_sb, BARRIER)) |
94 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 94 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
95 | BLKDEV_IFL_WAIT); | ||
95 | out: | 96 | out: |
96 | return ret; | 97 | return ret; |
97 | } | 98 | } |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0d0c3239c1cd..ef3d980e67cb 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
100 | if (ext4_should_writeback_data(inode) && | 100 | if (ext4_should_writeback_data(inode) && |
101 | (journal->j_fs_dev != journal->j_dev) && | 101 | (journal->j_fs_dev != journal->j_dev) && |
102 | (journal->j_flags & JBD2_BARRIER)) | 102 | (journal->j_flags & JBD2_BARRIER)) |
103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 103 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
104 | NULL, BLKDEV_IFL_WAIT); | ||
104 | jbd2_log_wait_commit(journal, commit_tid); | 105 | jbd2_log_wait_commit(journal, commit_tid); |
105 | } else if (journal->j_flags & JBD2_BARRIER) | 106 | } else if (journal->j_flags & JBD2_BARRIER) |
106 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 107 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
108 | BLKDEV_IFL_WAIT); | ||
107 | return ret; | 109 | return ret; |
108 | } | 110 | } |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 503b842f3ba2..bf011dc63471 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
854 | if ((start + nr_sects) != blk) { | 854 | if ((start + nr_sects) != blk) { |
855 | rv = blkdev_issue_discard(bdev, start, | 855 | rv = blkdev_issue_discard(bdev, start, |
856 | nr_sects, GFP_NOFS, | 856 | nr_sects, GFP_NOFS, |
857 | DISCARD_FL_BARRIER); | 857 | BLKDEV_IFL_WAIT | |
858 | BLKDEV_IFL_BARRIER); | ||
858 | if (rv) | 859 | if (rv) |
859 | goto fail; | 860 | goto fail; |
860 | nr_sects = 0; | 861 | nr_sects = 0; |
@@ -869,7 +870,7 @@ start_new_extent: | |||
869 | } | 870 | } |
870 | if (nr_sects) { | 871 | if (nr_sects) { |
871 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, | 872 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, |
872 | DISCARD_FL_BARRIER); | 873 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
873 | if (rv) | 874 | if (rv) |
874 | goto fail; | 875 | goto fail; |
875 | } | 876 | } |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 30beb11ef928..076d1cc44f95 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
530 | */ | 530 | */ |
531 | if ((journal->j_fs_dev != journal->j_dev) && | 531 | if ((journal->j_fs_dev != journal->j_dev) && |
532 | (journal->j_flags & JBD2_BARRIER)) | 532 | (journal->j_flags & JBD2_BARRIER)) |
533 | blkdev_issue_flush(journal->j_fs_dev, NULL); | 533 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, |
534 | BLKDEV_IFL_WAIT); | ||
534 | if (!(journal->j_flags & JBD2_ABORT)) | 535 | if (!(journal->j_flags & JBD2_ABORT)) |
535 | jbd2_journal_update_superblock(journal, 1); | 536 | jbd2_journal_update_superblock(journal, 1); |
536 | return 0; | 537 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 671da7fb7ffd..75716d3d2be0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -717,7 +717,8 @@ start_journal_io: | |||
717 | if (commit_transaction->t_flushed_data_blocks && | 717 | if (commit_transaction->t_flushed_data_blocks && |
718 | (journal->j_fs_dev != journal->j_dev) && | 718 | (journal->j_fs_dev != journal->j_dev) && |
719 | (journal->j_flags & JBD2_BARRIER)) | 719 | (journal->j_flags & JBD2_BARRIER)) |
720 | blkdev_issue_flush(journal->j_fs_dev, NULL); | 720 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, |
721 | BLKDEV_IFL_WAIT); | ||
721 | 722 | ||
722 | /* Done it all: now write the commit record asynchronously. */ | 723 | /* Done it all: now write the commit record asynchronously. */ |
723 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 724 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -727,7 +728,8 @@ start_journal_io: | |||
727 | if (err) | 728 | if (err) |
728 | __jbd2_journal_abort_hard(journal); | 729 | __jbd2_journal_abort_hard(journal); |
729 | if (journal->j_flags & JBD2_BARRIER) | 730 | if (journal->j_flags & JBD2_BARRIER) |
730 | blkdev_issue_flush(journal->j_dev, NULL); | 731 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, |
732 | BLKDEV_IFL_WAIT); | ||
731 | } | 733 | } |
732 | 734 | ||
733 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 735 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 33871f7e4f01..7ffcf2b8b1f4 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -670,7 +670,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
670 | start * sects_per_block, | 670 | start * sects_per_block, |
671 | nblocks * sects_per_block, | 671 | nblocks * sects_per_block, |
672 | GFP_NOFS, | 672 | GFP_NOFS, |
673 | DISCARD_FL_BARRIER); | 673 | BLKDEV_IFL_BARRIER); |
674 | if (ret < 0) | 674 | if (ret < 0) |
675 | return ret; | 675 | return ret; |
676 | nblocks = 0; | 676 | nblocks = 0; |
@@ -680,7 +680,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
680 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 680 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
681 | start * sects_per_block, | 681 | start * sects_per_block, |
682 | nblocks * sects_per_block, | 682 | nblocks * sects_per_block, |
683 | GFP_NOFS, DISCARD_FL_BARRIER); | 683 | GFP_NOFS, BLKDEV_IFL_BARRIER); |
684 | return ret; | 684 | return ret; |
685 | } | 685 | } |
686 | 686 | ||
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1d9c12714c5c..9977df9f3a54 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp, | |||
147 | barrier_done = reiserfs_commit_for_inode(inode); | 147 | barrier_done = reiserfs_commit_for_inode(inode); |
148 | reiserfs_write_unlock(inode->i_sb); | 148 | reiserfs_write_unlock(inode->i_sb); |
149 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) | 149 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) |
150 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 150 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
151 | BLKDEV_IFL_WAIT); | ||
151 | if (barrier_done < 0) | 152 | if (barrier_done < 0) |
152 | return barrier_done; | 153 | return barrier_done; |
153 | return (err < 0) ? -EIO : 0; | 154 | return (err < 0) ? -EIO : 0; |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 52e06b487ced..2b177c778ba7 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -725,7 +725,8 @@ void | |||
725 | xfs_blkdev_issue_flush( | 725 | xfs_blkdev_issue_flush( |
726 | xfs_buftarg_t *buftarg) | 726 | xfs_buftarg_t *buftarg) |
727 | { | 727 | { |
728 | blkdev_issue_flush(buftarg->bt_bdev, NULL); | 728 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, |
729 | BLKDEV_IFL_WAIT); | ||
729 | } | 730 | } |
730 | 731 | ||
731 | STATIC void | 732 | STATIC void |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index bd0e3c6f323f..7534979d83bd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/timer.h> | ||
17 | #include <linux/writeback.h> | 18 | #include <linux/writeback.h> |
18 | #include <asm/atomic.h> | 19 | #include <asm/atomic.h> |
19 | 20 | ||
@@ -88,6 +89,8 @@ struct backing_dev_info { | |||
88 | 89 | ||
89 | struct device *dev; | 90 | struct device *dev; |
90 | 91 | ||
92 | struct timer_list laptop_mode_wb_timer; | ||
93 | |||
91 | #ifdef CONFIG_DEBUG_FS | 94 | #ifdef CONFIG_DEBUG_FS |
92 | struct dentry *debug_dir; | 95 | struct dentry *debug_dir; |
93 | struct dentry *debug_stats; | 96 | struct dentry *debug_stats; |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6690e8bae7bb..3ac2bd2fc485 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -186,15 +186,19 @@ struct request { | |||
186 | }; | 186 | }; |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * two pointers are available for the IO schedulers, if they need | 189 | * Three pointers are available for the IO schedulers, if they need |
190 | * more they have to dynamically allocate it. | 190 | * more they have to dynamically allocate it. |
191 | */ | 191 | */ |
192 | void *elevator_private; | 192 | void *elevator_private; |
193 | void *elevator_private2; | 193 | void *elevator_private2; |
194 | void *elevator_private3; | ||
194 | 195 | ||
195 | struct gendisk *rq_disk; | 196 | struct gendisk *rq_disk; |
196 | unsigned long start_time; | 197 | unsigned long start_time; |
197 | 198 | #ifdef CONFIG_BLK_CGROUP | |
199 | unsigned long long start_time_ns; | ||
200 | unsigned long long io_start_time_ns; /* when passed to hardware */ | ||
201 | #endif | ||
198 | /* Number of scatter-gather DMA addr+len pairs after | 202 | /* Number of scatter-gather DMA addr+len pairs after |
199 | * physical address coalescing is performed. | 203 | * physical address coalescing is performed. |
200 | */ | 204 | */ |
@@ -994,20 +998,25 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | |||
994 | return NULL; | 998 | return NULL; |
995 | return bqt->tag_index[tag]; | 999 | return bqt->tag_index[tag]; |
996 | } | 1000 | } |
997 | 1001 | enum{ | |
998 | extern int blkdev_issue_flush(struct block_device *, sector_t *); | 1002 | BLKDEV_WAIT, /* wait for completion */ |
999 | #define DISCARD_FL_WAIT 0x01 /* wait for completion */ | 1003 | BLKDEV_BARRIER, /*issue request with barrier */ |
1000 | #define DISCARD_FL_BARRIER 0x02 /* issue DISCARD_BARRIER request */ | 1004 | }; |
1001 | extern int blkdev_issue_discard(struct block_device *, sector_t sector, | 1005 | #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) |
1002 | sector_t nr_sects, gfp_t, int flags); | 1006 | #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) |
1003 | 1007 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, | |
1008 | unsigned long); | ||
1009 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | ||
1010 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | ||
1011 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | ||
1012 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | ||
1004 | static inline int sb_issue_discard(struct super_block *sb, | 1013 | static inline int sb_issue_discard(struct super_block *sb, |
1005 | sector_t block, sector_t nr_blocks) | 1014 | sector_t block, sector_t nr_blocks) |
1006 | { | 1015 | { |
1007 | block <<= (sb->s_blocksize_bits - 9); | 1016 | block <<= (sb->s_blocksize_bits - 9); |
1008 | nr_blocks <<= (sb->s_blocksize_bits - 9); | 1017 | nr_blocks <<= (sb->s_blocksize_bits - 9); |
1009 | return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL, | 1018 | return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL, |
1010 | DISCARD_FL_BARRIER); | 1019 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
1011 | } | 1020 | } |
1012 | 1021 | ||
1013 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); | 1022 | extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); |
@@ -1196,6 +1205,39 @@ static inline void put_dev_sector(Sector p) | |||
1196 | struct work_struct; | 1205 | struct work_struct; |
1197 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); | 1206 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); |
1198 | 1207 | ||
1208 | #ifdef CONFIG_BLK_CGROUP | ||
1209 | static inline void set_start_time_ns(struct request *req) | ||
1210 | { | ||
1211 | req->start_time_ns = sched_clock(); | ||
1212 | } | ||
1213 | |||
1214 | static inline void set_io_start_time_ns(struct request *req) | ||
1215 | { | ||
1216 | req->io_start_time_ns = sched_clock(); | ||
1217 | } | ||
1218 | |||
1219 | static inline uint64_t rq_start_time_ns(struct request *req) | ||
1220 | { | ||
1221 | return req->start_time_ns; | ||
1222 | } | ||
1223 | |||
1224 | static inline uint64_t rq_io_start_time_ns(struct request *req) | ||
1225 | { | ||
1226 | return req->io_start_time_ns; | ||
1227 | } | ||
1228 | #else | ||
1229 | static inline void set_start_time_ns(struct request *req) {} | ||
1230 | static inline void set_io_start_time_ns(struct request *req) {} | ||
1231 | static inline uint64_t rq_start_time_ns(struct request *req) | ||
1232 | { | ||
1233 | return 0; | ||
1234 | } | ||
1235 | static inline uint64_t rq_io_start_time_ns(struct request *req) | ||
1236 | { | ||
1237 | return 0; | ||
1238 | } | ||
1239 | #endif | ||
1240 | |||
1199 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 1241 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |
1200 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) | 1242 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) |
1201 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ | 1243 | #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ |
diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1cb3372e65d8..2c958f4fce1e 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h | |||
@@ -14,6 +14,9 @@ typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int | |||
14 | 14 | ||
15 | typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *); | 15 | typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *); |
16 | 16 | ||
17 | typedef void (elevator_bio_merged_fn) (struct request_queue *, | ||
18 | struct request *, struct bio *); | ||
19 | |||
17 | typedef int (elevator_dispatch_fn) (struct request_queue *, int); | 20 | typedef int (elevator_dispatch_fn) (struct request_queue *, int); |
18 | 21 | ||
19 | typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); | 22 | typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); |
@@ -36,6 +39,7 @@ struct elevator_ops | |||
36 | elevator_merged_fn *elevator_merged_fn; | 39 | elevator_merged_fn *elevator_merged_fn; |
37 | elevator_merge_req_fn *elevator_merge_req_fn; | 40 | elevator_merge_req_fn *elevator_merge_req_fn; |
38 | elevator_allow_merge_fn *elevator_allow_merge_fn; | 41 | elevator_allow_merge_fn *elevator_allow_merge_fn; |
42 | elevator_bio_merged_fn *elevator_bio_merged_fn; | ||
39 | 43 | ||
40 | elevator_dispatch_fn *elevator_dispatch_fn; | 44 | elevator_dispatch_fn *elevator_dispatch_fn; |
41 | elevator_add_req_fn *elevator_add_req_fn; | 45 | elevator_add_req_fn *elevator_add_req_fn; |
@@ -103,6 +107,8 @@ extern int elv_merge(struct request_queue *, struct request **, struct bio *); | |||
103 | extern void elv_merge_requests(struct request_queue *, struct request *, | 107 | extern void elv_merge_requests(struct request_queue *, struct request *, |
104 | struct request *); | 108 | struct request *); |
105 | extern void elv_merged_request(struct request_queue *, struct request *, int); | 109 | extern void elv_merged_request(struct request_queue *, struct request *, int); |
110 | extern void elv_bio_merged(struct request_queue *q, struct request *, | ||
111 | struct bio *); | ||
106 | extern void elv_requeue_request(struct request_queue *, struct request *); | 112 | extern void elv_requeue_request(struct request_queue *, struct request *); |
107 | extern int elv_queue_empty(struct request_queue *); | 113 | extern int elv_queue_empty(struct request_queue *); |
108 | extern struct request *elv_former_request(struct request_queue *, struct request *); | 114 | extern struct request *elv_former_request(struct request_queue *, struct request *); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 44f35aea2f1f..f30970c97acf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -651,6 +651,7 @@ struct block_device { | |||
651 | int bd_openers; | 651 | int bd_openers; |
652 | struct mutex bd_mutex; /* open/close mutex */ | 652 | struct mutex bd_mutex; /* open/close mutex */ |
653 | struct list_head bd_inodes; | 653 | struct list_head bd_inodes; |
654 | void * bd_claiming; | ||
654 | void * bd_holder; | 655 | void * bd_holder; |
655 | int bd_holders; | 656 | int bd_holders; |
656 | #ifdef CONFIG_SYSFS | 657 | #ifdef CONFIG_SYSFS |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 36520ded3e06..eb38a2c645f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -96,8 +96,10 @@ static inline void inode_sync_wait(struct inode *inode) | |||
96 | /* | 96 | /* |
97 | * mm/page-writeback.c | 97 | * mm/page-writeback.c |
98 | */ | 98 | */ |
99 | void laptop_io_completion(void); | 99 | void laptop_io_completion(struct backing_dev_info *info); |
100 | void laptop_sync_completion(void); | 100 | void laptop_sync_completion(void); |
101 | void laptop_mode_sync(struct work_struct *work); | ||
102 | void laptop_mode_timer_fn(unsigned long data); | ||
101 | void throttle_vm_writeout(gfp_t gfp_mask); | 103 | void throttle_vm_writeout(gfp_t gfp_mask); |
102 | 104 | ||
103 | /* These are exported to sysctl. */ | 105 | /* These are exported to sysctl. */ |
diff --git a/init/Kconfig b/init/Kconfig index eb77e8ccde1c..087c14f3c595 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -612,6 +612,33 @@ config RT_GROUP_SCHED | |||
612 | 612 | ||
613 | endif #CGROUP_SCHED | 613 | endif #CGROUP_SCHED |
614 | 614 | ||
615 | config BLK_CGROUP | ||
616 | tristate "Block IO controller" | ||
617 | depends on CGROUPS && BLOCK | ||
618 | default n | ||
619 | ---help--- | ||
620 | Generic block IO controller cgroup interface. This is the common | ||
621 | cgroup interface which should be used by various IO controlling | ||
622 | policies. | ||
623 | |||
624 | Currently, CFQ IO scheduler uses it to recognize task groups and | ||
625 | control disk bandwidth allocation (proportional time slice allocation) | ||
626 | to such task groups. | ||
627 | |||
628 | This option only enables generic Block IO controller infrastructure. | ||
629 | One needs to also enable actual IO controlling logic in CFQ for it | ||
630 | to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y). | ||
631 | |||
632 | See Documentation/cgroups/blkio-controller.txt for more information. | ||
633 | |||
634 | config DEBUG_BLK_CGROUP | ||
635 | bool "Enable Block IO controller debugging" | ||
636 | depends on BLK_CGROUP | ||
637 | default n | ||
638 | ---help--- | ||
639 | Enable some debugging help. Currently it exports additional stat | ||
640 | files in a cgroup which can be useful for debugging. | ||
641 | |||
615 | endif # CGROUPS | 642 | endif # CGROUPS |
616 | 643 | ||
617 | config MM_OWNER | 644 | config MM_OWNER |
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 5b496132c28a..906a0f718cb3 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -41,6 +41,7 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
41 | return (unsigned long long)(jiffies - INITIAL_JIFFIES) | 41 | return (unsigned long long)(jiffies - INITIAL_JIFFIES) |
42 | * (NSEC_PER_SEC / HZ); | 42 | * (NSEC_PER_SEC / HZ); |
43 | } | 43 | } |
44 | EXPORT_SYMBOL_GPL(sched_clock); | ||
44 | 45 | ||
45 | static __read_mostly int sched_clock_running; | 46 | static __read_mostly int sched_clock_running; |
46 | 47 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0b19943ecf8b..d0f2b3765f8d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -683,10 +683,6 @@ void throttle_vm_writeout(gfp_t gfp_mask) | |||
683 | } | 683 | } |
684 | } | 684 | } |
685 | 685 | ||
686 | static void laptop_timer_fn(unsigned long unused); | ||
687 | |||
688 | static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); | ||
689 | |||
690 | /* | 686 | /* |
691 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs | 687 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs |
692 | */ | 688 | */ |
@@ -697,21 +693,19 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write, | |||
697 | return 0; | 693 | return 0; |
698 | } | 694 | } |
699 | 695 | ||
700 | static void do_laptop_sync(struct work_struct *work) | 696 | void laptop_mode_timer_fn(unsigned long data) |
701 | { | 697 | { |
702 | wakeup_flusher_threads(0); | 698 | struct request_queue *q = (struct request_queue *)data; |
703 | kfree(work); | 699 | int nr_pages = global_page_state(NR_FILE_DIRTY) + |
704 | } | 700 | global_page_state(NR_UNSTABLE_NFS); |
705 | 701 | ||
706 | static void laptop_timer_fn(unsigned long unused) | 702 | /* |
707 | { | 703 | * We want to write everything out, not just down to the dirty |
708 | struct work_struct *work; | 704 | * threshold |
705 | */ | ||
709 | 706 | ||
710 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 707 | if (bdi_has_dirty_io(&q->backing_dev_info)) |
711 | if (work) { | 708 | bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); |
712 | INIT_WORK(work, do_laptop_sync); | ||
713 | schedule_work(work); | ||
714 | } | ||
715 | } | 709 | } |
716 | 710 | ||
717 | /* | 711 | /* |
@@ -719,9 +713,9 @@ static void laptop_timer_fn(unsigned long unused) | |||
719 | * of all dirty data a few seconds from now. If the flush is already scheduled | 713 | * of all dirty data a few seconds from now. If the flush is already scheduled |
720 | * then push it back - the user is still using the disk. | 714 | * then push it back - the user is still using the disk. |
721 | */ | 715 | */ |
722 | void laptop_io_completion(void) | 716 | void laptop_io_completion(struct backing_dev_info *info) |
723 | { | 717 | { |
724 | mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode); | 718 | mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode); |
725 | } | 719 | } |
726 | 720 | ||
727 | /* | 721 | /* |
@@ -731,7 +725,14 @@ void laptop_io_completion(void) | |||
731 | */ | 725 | */ |
732 | void laptop_sync_completion(void) | 726 | void laptop_sync_completion(void) |
733 | { | 727 | { |
734 | del_timer(&laptop_mode_wb_timer); | 728 | struct backing_dev_info *bdi; |
729 | |||
730 | rcu_read_lock(); | ||
731 | |||
732 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) | ||
733 | del_timer(&bdi->laptop_mode_wb_timer); | ||
734 | |||
735 | rcu_read_unlock(); | ||
735 | } | 736 | } |
736 | 737 | ||
737 | /* | 738 | /* |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 6cd0a8f90dc7..eb086e0f4dcc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -139,7 +139,8 @@ static int discard_swap(struct swap_info_struct *si) | |||
139 | nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); | 139 | nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); |
140 | if (nr_blocks) { | 140 | if (nr_blocks) { |
141 | err = blkdev_issue_discard(si->bdev, start_block, | 141 | err = blkdev_issue_discard(si->bdev, start_block, |
142 | nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER); | 142 | nr_blocks, GFP_KERNEL, |
143 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
143 | if (err) | 144 | if (err) |
144 | return err; | 145 | return err; |
145 | cond_resched(); | 146 | cond_resched(); |
@@ -150,7 +151,8 @@ static int discard_swap(struct swap_info_struct *si) | |||
150 | nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); | 151 | nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); |
151 | 152 | ||
152 | err = blkdev_issue_discard(si->bdev, start_block, | 153 | err = blkdev_issue_discard(si->bdev, start_block, |
153 | nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER); | 154 | nr_blocks, GFP_KERNEL, |
155 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
154 | if (err) | 156 | if (err) |
155 | break; | 157 | break; |
156 | 158 | ||
@@ -189,7 +191,8 @@ static void discard_swap_cluster(struct swap_info_struct *si, | |||
189 | start_block <<= PAGE_SHIFT - 9; | 191 | start_block <<= PAGE_SHIFT - 9; |
190 | nr_blocks <<= PAGE_SHIFT - 9; | 192 | nr_blocks <<= PAGE_SHIFT - 9; |
191 | if (blkdev_issue_discard(si->bdev, start_block, | 193 | if (blkdev_issue_discard(si->bdev, start_block, |
192 | nr_blocks, GFP_NOIO, DISCARD_FL_BARRIER)) | 194 | nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT | |
195 | BLKDEV_IFL_BARRIER)) | ||
193 | break; | 196 | break; |
194 | } | 197 | } |
195 | 198 | ||