diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 2 | ||||
-rw-r--r-- | block/blk-cgroup.c | 4 | ||||
-rw-r--r-- | block/blk-core.c | 40 | ||||
-rw-r--r-- | block/blk-ioc.c | 5 | ||||
-rw-r--r-- | block/blk-map.c | 5 | ||||
-rw-r--r-- | block/blk-merge.c | 9 | ||||
-rw-r--r-- | block/blk-settings.c | 51 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-throttle.c | 41 | ||||
-rw-r--r-- | block/bsg.c | 8 | ||||
-rw-r--r-- | block/cfq-iosched.c | 142 | ||||
-rw-r--r-- | block/genhd.c | 550 | ||||
-rw-r--r-- | block/ioctl.c | 5 |
13 files changed, 703 insertions, 161 deletions
diff --git a/block/Kconfig b/block/Kconfig index 6c9213ef15a1..60be1e0455da 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -2,7 +2,7 @@ | |||
2 | # Block layer core configuration | 2 | # Block layer core configuration |
3 | # | 3 | # |
4 | menuconfig BLOCK | 4 | menuconfig BLOCK |
5 | bool "Enable the block layer" if EMBEDDED | 5 | bool "Enable the block layer" if EXPERT |
6 | default y | 6 | default y |
7 | help | 7 | help |
8 | Provide block layer support for the kernel. | 8 | Provide block layer support for the kernel. |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b1febd0f6d2a..455768a3eb9e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |||
1452 | goto done; | 1452 | goto done; |
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | /* Currently we do not support hierarchy deeper than two level (0,1) */ | ||
1456 | if (parent != cgroup->top_cgroup) | ||
1457 | return ERR_PTR(-EPERM); | ||
1458 | |||
1459 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); | 1455 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); |
1460 | if (!blkcg) | 1456 | if (!blkcg) |
1461 | return ERR_PTR(-ENOMEM); | 1457 | return ERR_PTR(-ENOMEM); |
diff --git a/block/blk-core.c b/block/blk-core.c index 4ce953f1b390..2f4002f79a24 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -33,7 +33,7 @@ | |||
33 | 33 | ||
34 | #include "blk.h" | 34 | #include "blk.h" |
35 | 35 | ||
36 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); | 36 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); |
37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | 37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | 38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
39 | 39 | ||
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io) | |||
64 | return; | 64 | return; |
65 | 65 | ||
66 | cpu = part_stat_lock(); | 66 | cpu = part_stat_lock(); |
67 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | ||
68 | 67 | ||
69 | if (!new_io) | 68 | if (!new_io) { |
69 | part = rq->part; | ||
70 | part_stat_inc(cpu, part, merges[rw]); | 70 | part_stat_inc(cpu, part, merges[rw]); |
71 | else { | 71 | } else { |
72 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | ||
73 | if (!hd_struct_try_get(part)) { | ||
74 | /* | ||
75 | * The partition is already being removed, | ||
76 | * the request will be accounted on the disk only | ||
77 | * | ||
78 | * We take a reference on disk->part0 although that | ||
79 | * partition will never be deleted, so we can treat | ||
80 | * it as any other partition. | ||
81 | */ | ||
82 | part = &rq->rq_disk->part0; | ||
83 | hd_struct_get(part); | ||
84 | } | ||
72 | part_round_stats(cpu, part); | 85 | part_round_stats(cpu, part); |
73 | part_inc_in_flight(part, rw); | 86 | part_inc_in_flight(part, rw); |
87 | rq->part = part; | ||
74 | } | 88 | } |
75 | 89 | ||
76 | part_stat_unlock(); | 90 | part_stat_unlock(); |
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
128 | rq->ref_count = 1; | 142 | rq->ref_count = 1; |
129 | rq->start_time = jiffies; | 143 | rq->start_time = jiffies; |
130 | set_start_time_ns(rq); | 144 | set_start_time_ns(rq); |
145 | rq->part = NULL; | ||
131 | } | 146 | } |
132 | EXPORT_SYMBOL(blk_rq_init); | 147 | EXPORT_SYMBOL(blk_rq_init); |
133 | 148 | ||
@@ -1329,9 +1344,9 @@ static inline void blk_partition_remap(struct bio *bio) | |||
1329 | bio->bi_sector += p->start_sect; | 1344 | bio->bi_sector += p->start_sect; |
1330 | bio->bi_bdev = bdev->bd_contains; | 1345 | bio->bi_bdev = bdev->bd_contains; |
1331 | 1346 | ||
1332 | trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, | 1347 | trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, |
1333 | bdev->bd_dev, | 1348 | bdev->bd_dev, |
1334 | bio->bi_sector - p->start_sect); | 1349 | bio->bi_sector - p->start_sect); |
1335 | } | 1350 | } |
1336 | } | 1351 | } |
1337 | 1352 | ||
@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio) | |||
1500 | goto end_io; | 1515 | goto end_io; |
1501 | 1516 | ||
1502 | if (old_sector != -1) | 1517 | if (old_sector != -1) |
1503 | trace_block_remap(q, bio, old_dev, old_sector); | 1518 | trace_block_bio_remap(q, bio, old_dev, old_sector); |
1504 | 1519 | ||
1505 | old_sector = bio->bi_sector; | 1520 | old_sector = bio->bi_sector; |
1506 | old_dev = bio->bi_bdev->bd_dev; | 1521 | old_dev = bio->bi_bdev->bd_dev; |
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) | |||
1776 | int cpu; | 1791 | int cpu; |
1777 | 1792 | ||
1778 | cpu = part_stat_lock(); | 1793 | cpu = part_stat_lock(); |
1779 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 1794 | part = req->part; |
1780 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); | 1795 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); |
1781 | part_stat_unlock(); | 1796 | part_stat_unlock(); |
1782 | } | 1797 | } |
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req) | |||
1796 | int cpu; | 1811 | int cpu; |
1797 | 1812 | ||
1798 | cpu = part_stat_lock(); | 1813 | cpu = part_stat_lock(); |
1799 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 1814 | part = req->part; |
1800 | 1815 | ||
1801 | part_stat_inc(cpu, part, ios[rw]); | 1816 | part_stat_inc(cpu, part, ios[rw]); |
1802 | part_stat_add(cpu, part, ticks[rw], duration); | 1817 | part_stat_add(cpu, part, ticks[rw], duration); |
1803 | part_round_stats(cpu, part); | 1818 | part_round_stats(cpu, part); |
1804 | part_dec_in_flight(part, rw); | 1819 | part_dec_in_flight(part, rw); |
1805 | 1820 | ||
1821 | hd_struct_put(part); | ||
1806 | part_stat_unlock(); | 1822 | part_stat_unlock(); |
1807 | } | 1823 | } |
1808 | } | 1824 | } |
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void) | |||
2606 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * | 2622 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
2607 | sizeof(((struct request *)0)->cmd_flags)); | 2623 | sizeof(((struct request *)0)->cmd_flags)); |
2608 | 2624 | ||
2609 | kblockd_workqueue = create_workqueue("kblockd"); | 2625 | /* used for unplugging and affects IO latency/throughput - HIGHPRI */ |
2626 | kblockd_workqueue = alloc_workqueue("kblockd", | ||
2627 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); | ||
2610 | if (!kblockd_workqueue) | 2628 | if (!kblockd_workqueue) |
2611 | panic("Failed to create kblockd\n"); | 2629 | panic("Failed to create kblockd\n"); |
2612 | 2630 | ||
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 3c7a339fe381..b791022beef3 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc) | |||
64 | rcu_read_unlock(); | 64 | rcu_read_unlock(); |
65 | } | 65 | } |
66 | 66 | ||
67 | /* Called by the exitting task */ | 67 | /* Called by the exiting task */ |
68 | void exit_io_context(struct task_struct *task) | 68 | void exit_io_context(struct task_struct *task) |
69 | { | 69 | { |
70 | struct io_context *ioc; | 70 | struct io_context *ioc; |
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task) | |||
74 | task->io_context = NULL; | 74 | task->io_context = NULL; |
75 | task_unlock(task); | 75 | task_unlock(task); |
76 | 76 | ||
77 | if (atomic_dec_and_test(&ioc->nr_tasks)) { | 77 | if (atomic_dec_and_test(&ioc->nr_tasks)) |
78 | cfq_exit(ioc); | 78 | cfq_exit(ioc); |
79 | 79 | ||
80 | } | ||
81 | put_io_context(ioc); | 80 | put_io_context(ioc); |
82 | } | 81 | } |
83 | 82 | ||
diff --git a/block/blk-map.c b/block/blk-map.c index 5d5dbe47c228..e663ac2d8e68 100644 --- a/block/blk-map.c +++ b/block/blk-map.c | |||
@@ -201,12 +201,13 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
201 | for (i = 0; i < iov_count; i++) { | 201 | for (i = 0; i < iov_count; i++) { |
202 | unsigned long uaddr = (unsigned long)iov[i].iov_base; | 202 | unsigned long uaddr = (unsigned long)iov[i].iov_base; |
203 | 203 | ||
204 | if (!iov[i].iov_len) | ||
205 | return -EINVAL; | ||
206 | |||
204 | if (uaddr & queue_dma_alignment(q)) { | 207 | if (uaddr & queue_dma_alignment(q)) { |
205 | unaligned = 1; | 208 | unaligned = 1; |
206 | break; | 209 | break; |
207 | } | 210 | } |
208 | if (!iov[i].iov_len) | ||
209 | return -EINVAL; | ||
210 | } | 211 | } |
211 | 212 | ||
212 | if (unaligned || (q->dma_pad_mask & len) || map_data) | 213 | if (unaligned || (q->dma_pad_mask & len) || map_data) |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 77b7c26df6b5..ea85e20d5e94 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -21,7 +21,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
21 | return 0; | 21 | return 0; |
22 | 22 | ||
23 | fbio = bio; | 23 | fbio = bio; |
24 | cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); | 24 | cluster = blk_queue_cluster(q); |
25 | seg_size = 0; | 25 | seg_size = 0; |
26 | nr_phys_segs = 0; | 26 | nr_phys_segs = 0; |
27 | for_each_bio(bio) { | 27 | for_each_bio(bio) { |
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(blk_recount_segments); | |||
87 | static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, | 87 | static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, |
88 | struct bio *nxt) | 88 | struct bio *nxt) |
89 | { | 89 | { |
90 | if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) | 90 | if (!blk_queue_cluster(q)) |
91 | return 0; | 91 | return 0; |
92 | 92 | ||
93 | if (bio->bi_seg_back_size + nxt->bi_seg_front_size > | 93 | if (bio->bi_seg_back_size + nxt->bi_seg_front_size > |
@@ -123,7 +123,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, | |||
123 | int nsegs, cluster; | 123 | int nsegs, cluster; |
124 | 124 | ||
125 | nsegs = 0; | 125 | nsegs = 0; |
126 | cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); | 126 | cluster = blk_queue_cluster(q); |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * for each bio in rq | 129 | * for each bio in rq |
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req) | |||
351 | int cpu; | 351 | int cpu; |
352 | 352 | ||
353 | cpu = part_stat_lock(); | 353 | cpu = part_stat_lock(); |
354 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 354 | part = req->part; |
355 | 355 | ||
356 | part_round_stats(cpu, part); | 356 | part_round_stats(cpu, part); |
357 | part_dec_in_flight(part, rq_data_dir(req)); | 357 | part_dec_in_flight(part, rq_data_dir(req)); |
358 | 358 | ||
359 | hd_struct_put(part); | ||
359 | part_stat_unlock(); | 360 | part_stat_unlock(); |
360 | } | 361 | } |
361 | } | 362 | } |
diff --git a/block/blk-settings.c b/block/blk-settings.c index 701859fb9647..36c8c1f2af18 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -126,7 +126,7 @@ void blk_set_default_limits(struct queue_limits *lim) | |||
126 | lim->alignment_offset = 0; | 126 | lim->alignment_offset = 0; |
127 | lim->io_opt = 0; | 127 | lim->io_opt = 0; |
128 | lim->misaligned = 0; | 128 | lim->misaligned = 0; |
129 | lim->no_cluster = 0; | 129 | lim->cluster = 1; |
130 | } | 130 | } |
131 | EXPORT_SYMBOL(blk_set_default_limits); | 131 | EXPORT_SYMBOL(blk_set_default_limits); |
132 | 132 | ||
@@ -229,8 +229,8 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) | |||
229 | EXPORT_SYMBOL(blk_queue_bounce_limit); | 229 | EXPORT_SYMBOL(blk_queue_bounce_limit); |
230 | 230 | ||
231 | /** | 231 | /** |
232 | * blk_queue_max_hw_sectors - set max sectors for a request for this queue | 232 | * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request |
233 | * @q: the request queue for the device | 233 | * @limits: the queue limits |
234 | * @max_hw_sectors: max hardware sectors in the usual 512b unit | 234 | * @max_hw_sectors: max hardware sectors in the usual 512b unit |
235 | * | 235 | * |
236 | * Description: | 236 | * Description: |
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); | |||
244 | * per-device basis in /sys/block/<device>/queue/max_sectors_kb. | 244 | * per-device basis in /sys/block/<device>/queue/max_sectors_kb. |
245 | * The soft limit can not exceed max_hw_sectors. | 245 | * The soft limit can not exceed max_hw_sectors. |
246 | **/ | 246 | **/ |
247 | void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) | 247 | void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors) |
248 | { | 248 | { |
249 | if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { | 249 | if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { |
250 | max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); | 250 | max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); |
@@ -252,9 +252,23 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto | |||
252 | __func__, max_hw_sectors); | 252 | __func__, max_hw_sectors); |
253 | } | 253 | } |
254 | 254 | ||
255 | q->limits.max_hw_sectors = max_hw_sectors; | 255 | limits->max_hw_sectors = max_hw_sectors; |
256 | q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, | 256 | limits->max_sectors = min_t(unsigned int, max_hw_sectors, |
257 | BLK_DEF_MAX_SECTORS); | 257 | BLK_DEF_MAX_SECTORS); |
258 | } | ||
259 | EXPORT_SYMBOL(blk_limits_max_hw_sectors); | ||
260 | |||
261 | /** | ||
262 | * blk_queue_max_hw_sectors - set max sectors for a request for this queue | ||
263 | * @q: the request queue for the device | ||
264 | * @max_hw_sectors: max hardware sectors in the usual 512b unit | ||
265 | * | ||
266 | * Description: | ||
267 | * See description for blk_limits_max_hw_sectors(). | ||
268 | **/ | ||
269 | void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) | ||
270 | { | ||
271 | blk_limits_max_hw_sectors(&q->limits, max_hw_sectors); | ||
258 | } | 272 | } |
259 | EXPORT_SYMBOL(blk_queue_max_hw_sectors); | 273 | EXPORT_SYMBOL(blk_queue_max_hw_sectors); |
260 | 274 | ||
@@ -464,15 +478,6 @@ EXPORT_SYMBOL(blk_queue_io_opt); | |||
464 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) | 478 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) |
465 | { | 479 | { |
466 | blk_stack_limits(&t->limits, &b->limits, 0); | 480 | blk_stack_limits(&t->limits, &b->limits, 0); |
467 | |||
468 | if (!t->queue_lock) | ||
469 | WARN_ON_ONCE(1); | ||
470 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { | ||
471 | unsigned long flags; | ||
472 | spin_lock_irqsave(t->queue_lock, flags); | ||
473 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); | ||
474 | spin_unlock_irqrestore(t->queue_lock, flags); | ||
475 | } | ||
476 | } | 481 | } |
477 | EXPORT_SYMBOL(blk_queue_stack_limits); | 482 | EXPORT_SYMBOL(blk_queue_stack_limits); |
478 | 483 | ||
@@ -545,7 +550,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | |||
545 | t->io_min = max(t->io_min, b->io_min); | 550 | t->io_min = max(t->io_min, b->io_min); |
546 | t->io_opt = lcm(t->io_opt, b->io_opt); | 551 | t->io_opt = lcm(t->io_opt, b->io_opt); |
547 | 552 | ||
548 | t->no_cluster |= b->no_cluster; | 553 | t->cluster &= b->cluster; |
549 | t->discard_zeroes_data &= b->discard_zeroes_data; | 554 | t->discard_zeroes_data &= b->discard_zeroes_data; |
550 | 555 | ||
551 | /* Physical block size a multiple of the logical block size? */ | 556 | /* Physical block size a multiple of the logical block size? */ |
@@ -641,7 +646,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | |||
641 | sector_t offset) | 646 | sector_t offset) |
642 | { | 647 | { |
643 | struct request_queue *t = disk->queue; | 648 | struct request_queue *t = disk->queue; |
644 | struct request_queue *b = bdev_get_queue(bdev); | ||
645 | 649 | ||
646 | if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { | 650 | if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { |
647 | char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; | 651 | char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; |
@@ -652,17 +656,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, | |||
652 | printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", | 656 | printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", |
653 | top, bottom); | 657 | top, bottom); |
654 | } | 658 | } |
655 | |||
656 | if (!t->queue_lock) | ||
657 | WARN_ON_ONCE(1); | ||
658 | else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { | ||
659 | unsigned long flags; | ||
660 | |||
661 | spin_lock_irqsave(t->queue_lock, flags); | ||
662 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | ||
663 | queue_flag_clear(QUEUE_FLAG_CLUSTER, t); | ||
664 | spin_unlock_irqrestore(t->queue_lock, flags); | ||
665 | } | ||
666 | } | 659 | } |
667 | EXPORT_SYMBOL(disk_stack_limits); | 660 | EXPORT_SYMBOL(disk_stack_limits); |
668 | 661 | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 013457f47fdc..41fb69150b4d 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -119,7 +119,7 @@ static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char * | |||
119 | 119 | ||
120 | static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) | 120 | static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) |
121 | { | 121 | { |
122 | if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) | 122 | if (blk_queue_cluster(q)) |
123 | return queue_var_show(queue_max_segment_size(q), (page)); | 123 | return queue_var_show(queue_max_segment_size(q), (page)); |
124 | 124 | ||
125 | return queue_var_show(PAGE_CACHE_SIZE, (page)); | 125 | return queue_var_show(PAGE_CACHE_SIZE, (page)); |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 56ad4531b412..381b09bb562b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -355,6 +355,12 @@ throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) | |||
355 | tg->slice_end[rw], jiffies); | 355 | tg->slice_end[rw], jiffies); |
356 | } | 356 | } |
357 | 357 | ||
358 | static inline void throtl_set_slice_end(struct throtl_data *td, | ||
359 | struct throtl_grp *tg, bool rw, unsigned long jiffy_end) | ||
360 | { | ||
361 | tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); | ||
362 | } | ||
363 | |||
358 | static inline void throtl_extend_slice(struct throtl_data *td, | 364 | static inline void throtl_extend_slice(struct throtl_data *td, |
359 | struct throtl_grp *tg, bool rw, unsigned long jiffy_end) | 365 | struct throtl_grp *tg, bool rw, unsigned long jiffy_end) |
360 | { | 366 | { |
@@ -391,6 +397,16 @@ throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) | |||
391 | if (throtl_slice_used(td, tg, rw)) | 397 | if (throtl_slice_used(td, tg, rw)) |
392 | return; | 398 | return; |
393 | 399 | ||
400 | /* | ||
401 | * A bio has been dispatched. Also adjust slice_end. It might happen | ||
402 | * that initially cgroup limit was very low resulting in high | ||
403 | * slice_end, but later limit was bumped up and bio was dispached | ||
404 | * sooner, then we need to reduce slice_end. A high bogus slice_end | ||
405 | * is bad because it does not allow new slice to start. | ||
406 | */ | ||
407 | |||
408 | throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice); | ||
409 | |||
394 | time_elapsed = jiffies - tg->slice_start[rw]; | 410 | time_elapsed = jiffies - tg->slice_start[rw]; |
395 | 411 | ||
396 | nr_slices = time_elapsed / throtl_slice; | 412 | nr_slices = time_elapsed / throtl_slice; |
@@ -645,7 +661,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, | |||
645 | { | 661 | { |
646 | unsigned int nr_reads = 0, nr_writes = 0; | 662 | unsigned int nr_reads = 0, nr_writes = 0; |
647 | unsigned int max_nr_reads = throtl_grp_quantum*3/4; | 663 | unsigned int max_nr_reads = throtl_grp_quantum*3/4; |
648 | unsigned int max_nr_writes = throtl_grp_quantum - nr_reads; | 664 | unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; |
649 | struct bio *bio; | 665 | struct bio *bio; |
650 | 666 | ||
651 | /* Try to dispatch 75% READS and 25% WRITES */ | 667 | /* Try to dispatch 75% READS and 25% WRITES */ |
@@ -709,26 +725,21 @@ static void throtl_process_limit_change(struct throtl_data *td) | |||
709 | struct throtl_grp *tg; | 725 | struct throtl_grp *tg; |
710 | struct hlist_node *pos, *n; | 726 | struct hlist_node *pos, *n; |
711 | 727 | ||
712 | /* | ||
713 | * Make sure atomic_inc() effects from | ||
714 | * throtl_update_blkio_group_read_bps(), group of functions are | ||
715 | * visible. | ||
716 | * Is this required or smp_mb__after_atomic_inc() was suffcient | ||
717 | * after the atomic_inc(). | ||
718 | */ | ||
719 | smp_rmb(); | ||
720 | if (!atomic_read(&td->limits_changed)) | 728 | if (!atomic_read(&td->limits_changed)) |
721 | return; | 729 | return; |
722 | 730 | ||
723 | throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); | 731 | throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); |
724 | 732 | ||
725 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { | 733 | /* |
726 | /* | 734 | * Make sure updates from throtl_update_blkio_group_read_bps() group |
727 | * Do I need an smp_rmb() here to make sure tg->limits_changed | 735 | * of functions to tg->limits_changed are visible. We do not |
728 | * update is visible. I am relying on smp_rmb() at the | 736 | * want update td->limits_changed to be visible but update to |
729 | * beginning of function and not putting a new one here. | 737 | * tg->limits_changed not being visible yet on this cpu. Hence |
730 | */ | 738 | * the read barrier. |
739 | */ | ||
740 | smp_rmb(); | ||
731 | 741 | ||
742 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { | ||
732 | if (throtl_tg_on_rr(tg) && tg->limits_changed) { | 743 | if (throtl_tg_on_rr(tg) && tg->limits_changed) { |
733 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | 744 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" |
734 | " riops=%u wiops=%u", tg->bps[READ], | 745 | " riops=%u wiops=%u", tg->bps[READ], |
diff --git a/block/bsg.c b/block/bsg.c index f20d6a789d48..0c8b64a16484 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -250,6 +250,14 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, | |||
250 | int ret, rw; | 250 | int ret, rw; |
251 | unsigned int dxfer_len; | 251 | unsigned int dxfer_len; |
252 | void *dxferp = NULL; | 252 | void *dxferp = NULL; |
253 | struct bsg_class_device *bcd = &q->bsg_dev; | ||
254 | |||
255 | /* if the LLD has been removed then the bsg_unregister_queue will | ||
256 | * eventually be called and the class_dev was freed, so we can no | ||
257 | * longer use this request_queue. Return no such address. | ||
258 | */ | ||
259 | if (!bcd->class_dev) | ||
260 | return ERR_PTR(-ENXIO); | ||
253 | 261 | ||
254 | dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp, | 262 | dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp, |
255 | hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp, | 263 | hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp, |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4cd59b0d7c15..501ffdf0399c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -87,7 +87,6 @@ struct cfq_rb_root { | |||
87 | unsigned count; | 87 | unsigned count; |
88 | unsigned total_weight; | 88 | unsigned total_weight; |
89 | u64 min_vdisktime; | 89 | u64 min_vdisktime; |
90 | struct rb_node *active; | ||
91 | }; | 90 | }; |
92 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ | 91 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ |
93 | .count = 0, .min_vdisktime = 0, } | 92 | .count = 0, .min_vdisktime = 0, } |
@@ -97,7 +96,7 @@ struct cfq_rb_root { | |||
97 | */ | 96 | */ |
98 | struct cfq_queue { | 97 | struct cfq_queue { |
99 | /* reference count */ | 98 | /* reference count */ |
100 | atomic_t ref; | 99 | int ref; |
101 | /* various state flags, see below */ | 100 | /* various state flags, see below */ |
102 | unsigned int flags; | 101 | unsigned int flags; |
103 | /* parent cfq_data */ | 102 | /* parent cfq_data */ |
@@ -180,7 +179,6 @@ struct cfq_group { | |||
180 | /* group service_tree key */ | 179 | /* group service_tree key */ |
181 | u64 vdisktime; | 180 | u64 vdisktime; |
182 | unsigned int weight; | 181 | unsigned int weight; |
183 | bool on_st; | ||
184 | 182 | ||
185 | /* number of cfqq currently on this group */ | 183 | /* number of cfqq currently on this group */ |
186 | int nr_cfqq; | 184 | int nr_cfqq; |
@@ -209,7 +207,7 @@ struct cfq_group { | |||
209 | struct blkio_group blkg; | 207 | struct blkio_group blkg; |
210 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 208 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
211 | struct hlist_node cfqd_node; | 209 | struct hlist_node cfqd_node; |
212 | atomic_t ref; | 210 | int ref; |
213 | #endif | 211 | #endif |
214 | /* number of requests that are on the dispatch list or inside driver */ | 212 | /* number of requests that are on the dispatch list or inside driver */ |
215 | int dispatched; | 213 | int dispatched; |
@@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st) | |||
563 | u64 vdisktime = st->min_vdisktime; | 561 | u64 vdisktime = st->min_vdisktime; |
564 | struct cfq_group *cfqg; | 562 | struct cfq_group *cfqg; |
565 | 563 | ||
566 | if (st->active) { | ||
567 | cfqg = rb_entry_cfqg(st->active); | ||
568 | vdisktime = cfqg->vdisktime; | ||
569 | } | ||
570 | |||
571 | if (st->left) { | 564 | if (st->left) { |
572 | cfqg = rb_entry_cfqg(st->left); | 565 | cfqg = rb_entry_cfqg(st->left); |
573 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); | 566 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); |
@@ -605,8 +598,8 @@ cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
605 | return cfq_target_latency * cfqg->weight / st->total_weight; | 598 | return cfq_target_latency * cfqg->weight / st->total_weight; |
606 | } | 599 | } |
607 | 600 | ||
608 | static inline void | 601 | static inline unsigned |
609 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 602 | cfq_scaled_group_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
610 | { | 603 | { |
611 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); | 604 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); |
612 | if (cfqd->cfq_latency) { | 605 | if (cfqd->cfq_latency) { |
@@ -632,6 +625,14 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
632 | low_slice); | 625 | low_slice); |
633 | } | 626 | } |
634 | } | 627 | } |
628 | return slice; | ||
629 | } | ||
630 | |||
631 | static inline void | ||
632 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
633 | { | ||
634 | unsigned slice = cfq_scaled_group_slice(cfqd, cfqq); | ||
635 | |||
635 | cfqq->slice_start = jiffies; | 636 | cfqq->slice_start = jiffies; |
636 | cfqq->slice_end = jiffies + slice; | 637 | cfqq->slice_end = jiffies + slice; |
637 | cfqq->allocated_slice = slice; | 638 | cfqq->allocated_slice = slice; |
@@ -646,11 +647,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
646 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) | 647 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) |
647 | { | 648 | { |
648 | if (cfq_cfqq_slice_new(cfqq)) | 649 | if (cfq_cfqq_slice_new(cfqq)) |
649 | return 0; | 650 | return false; |
650 | if (time_before(jiffies, cfqq->slice_end)) | 651 | if (time_before(jiffies, cfqq->slice_end)) |
651 | return 0; | 652 | return false; |
652 | 653 | ||
653 | return 1; | 654 | return true; |
654 | } | 655 | } |
655 | 656 | ||
656 | /* | 657 | /* |
@@ -869,7 +870,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
869 | struct rb_node *n; | 870 | struct rb_node *n; |
870 | 871 | ||
871 | cfqg->nr_cfqq++; | 872 | cfqg->nr_cfqq++; |
872 | if (cfqg->on_st) | 873 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
873 | return; | 874 | return; |
874 | 875 | ||
875 | /* | 876 | /* |
@@ -885,7 +886,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
885 | cfqg->vdisktime = st->min_vdisktime; | 886 | cfqg->vdisktime = st->min_vdisktime; |
886 | 887 | ||
887 | __cfq_group_service_tree_add(st, cfqg); | 888 | __cfq_group_service_tree_add(st, cfqg); |
888 | cfqg->on_st = true; | ||
889 | st->total_weight += cfqg->weight; | 889 | st->total_weight += cfqg->weight; |
890 | } | 890 | } |
891 | 891 | ||
@@ -894,9 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
894 | { | 894 | { |
895 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 895 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
896 | 896 | ||
897 | if (st->active == &cfqg->rb_node) | ||
898 | st->active = NULL; | ||
899 | |||
900 | BUG_ON(cfqg->nr_cfqq < 1); | 897 | BUG_ON(cfqg->nr_cfqq < 1); |
901 | cfqg->nr_cfqq--; | 898 | cfqg->nr_cfqq--; |
902 | 899 | ||
@@ -905,7 +902,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
905 | return; | 902 | return; |
906 | 903 | ||
907 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); | 904 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
908 | cfqg->on_st = false; | ||
909 | st->total_weight -= cfqg->weight; | 905 | st->total_weight -= cfqg->weight; |
910 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | 906 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
911 | cfq_rb_erase(&cfqg->rb_node, st); | 907 | cfq_rb_erase(&cfqg->rb_node, st); |
@@ -1026,11 +1022,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
1026 | * elevator which will be dropped by either elevator exit | 1022 | * elevator which will be dropped by either elevator exit |
1027 | * or cgroup deletion path depending on who is exiting first. | 1023 | * or cgroup deletion path depending on who is exiting first. |
1028 | */ | 1024 | */ |
1029 | atomic_set(&cfqg->ref, 1); | 1025 | cfqg->ref = 1; |
1030 | 1026 | ||
1031 | /* | 1027 | /* |
1032 | * Add group onto cgroup list. It might happen that bdi->dev is | 1028 | * Add group onto cgroup list. It might happen that bdi->dev is |
1033 | * not initiliazed yet. Initialize this new group without major | 1029 | * not initialized yet. Initialize this new group without major |
1034 | * and minor info and this info will be filled in once a new thread | 1030 | * and minor info and this info will be filled in once a new thread |
1035 | * comes for IO. See code above. | 1031 | * comes for IO. See code above. |
1036 | */ | 1032 | */ |
@@ -1071,7 +1067,7 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
1071 | 1067 | ||
1072 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | 1068 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) |
1073 | { | 1069 | { |
1074 | atomic_inc(&cfqg->ref); | 1070 | cfqg->ref++; |
1075 | return cfqg; | 1071 | return cfqg; |
1076 | } | 1072 | } |
1077 | 1073 | ||
@@ -1083,7 +1079,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | |||
1083 | 1079 | ||
1084 | cfqq->cfqg = cfqg; | 1080 | cfqq->cfqg = cfqg; |
1085 | /* cfqq reference on cfqg */ | 1081 | /* cfqq reference on cfqg */ |
1086 | atomic_inc(&cfqq->cfqg->ref); | 1082 | cfqq->cfqg->ref++; |
1087 | } | 1083 | } |
1088 | 1084 | ||
1089 | static void cfq_put_cfqg(struct cfq_group *cfqg) | 1085 | static void cfq_put_cfqg(struct cfq_group *cfqg) |
@@ -1091,11 +1087,12 @@ static void cfq_put_cfqg(struct cfq_group *cfqg) | |||
1091 | struct cfq_rb_root *st; | 1087 | struct cfq_rb_root *st; |
1092 | int i, j; | 1088 | int i, j; |
1093 | 1089 | ||
1094 | BUG_ON(atomic_read(&cfqg->ref) <= 0); | 1090 | BUG_ON(cfqg->ref <= 0); |
1095 | if (!atomic_dec_and_test(&cfqg->ref)) | 1091 | cfqg->ref--; |
1092 | if (cfqg->ref) | ||
1096 | return; | 1093 | return; |
1097 | for_each_cfqg_st(cfqg, i, j, st) | 1094 | for_each_cfqg_st(cfqg, i, j, st) |
1098 | BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); | 1095 | BUG_ON(!RB_EMPTY_ROOT(&st->rb)); |
1099 | kfree(cfqg); | 1096 | kfree(cfqg); |
1100 | } | 1097 | } |
1101 | 1098 | ||
@@ -1200,7 +1197,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1200 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | 1197 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); |
1201 | cfqq->orig_cfqg = cfqq->cfqg; | 1198 | cfqq->orig_cfqg = cfqq->cfqg; |
1202 | cfqq->cfqg = &cfqd->root_group; | 1199 | cfqq->cfqg = &cfqd->root_group; |
1203 | atomic_inc(&cfqd->root_group.ref); | 1200 | cfqd->root_group.ref++; |
1204 | group_changed = 1; | 1201 | group_changed = 1; |
1205 | } else if (!cfqd->cfq_group_isolation | 1202 | } else if (!cfqd->cfq_group_isolation |
1206 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { | 1203 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { |
@@ -1672,8 +1669,11 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1672 | /* | 1669 | /* |
1673 | * store what was left of this slice, if the queue idled/timed out | 1670 | * store what was left of this slice, if the queue idled/timed out |
1674 | */ | 1671 | */ |
1675 | if (timed_out && !cfq_cfqq_slice_new(cfqq)) { | 1672 | if (timed_out) { |
1676 | cfqq->slice_resid = cfqq->slice_end - jiffies; | 1673 | if (cfq_cfqq_slice_new(cfqq)) |
1674 | cfqq->slice_resid = cfq_scaled_group_slice(cfqd, cfqq); | ||
1675 | else | ||
1676 | cfqq->slice_resid = cfqq->slice_end - jiffies; | ||
1677 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); | 1677 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); |
1678 | } | 1678 | } |
1679 | 1679 | ||
@@ -1687,9 +1687,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1687 | if (cfqq == cfqd->active_queue) | 1687 | if (cfqq == cfqd->active_queue) |
1688 | cfqd->active_queue = NULL; | 1688 | cfqd->active_queue = NULL; |
1689 | 1689 | ||
1690 | if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active) | ||
1691 | cfqd->grp_service_tree.active = NULL; | ||
1692 | |||
1693 | if (cfqd->active_cic) { | 1690 | if (cfqd->active_cic) { |
1694 | put_io_context(cfqd->active_cic->ioc); | 1691 | put_io_context(cfqd->active_cic->ioc); |
1695 | cfqd->active_cic = NULL; | 1692 | cfqd->active_cic = NULL; |
@@ -1901,10 +1898,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1901 | * in their service tree. | 1898 | * in their service tree. |
1902 | */ | 1899 | */ |
1903 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) | 1900 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) |
1904 | return 1; | 1901 | return true; |
1905 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", | 1902 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", |
1906 | service_tree->count); | 1903 | service_tree->count); |
1907 | return 0; | 1904 | return false; |
1908 | } | 1905 | } |
1909 | 1906 | ||
1910 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 1907 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
@@ -2040,7 +2037,7 @@ static int cfqq_process_refs(struct cfq_queue *cfqq) | |||
2040 | int process_refs, io_refs; | 2037 | int process_refs, io_refs; |
2041 | 2038 | ||
2042 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; | 2039 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; |
2043 | process_refs = atomic_read(&cfqq->ref) - io_refs; | 2040 | process_refs = cfqq->ref - io_refs; |
2044 | BUG_ON(process_refs < 0); | 2041 | BUG_ON(process_refs < 0); |
2045 | return process_refs; | 2042 | return process_refs; |
2046 | } | 2043 | } |
@@ -2080,10 +2077,10 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) | |||
2080 | */ | 2077 | */ |
2081 | if (new_process_refs >= process_refs) { | 2078 | if (new_process_refs >= process_refs) { |
2082 | cfqq->new_cfqq = new_cfqq; | 2079 | cfqq->new_cfqq = new_cfqq; |
2083 | atomic_add(process_refs, &new_cfqq->ref); | 2080 | new_cfqq->ref += process_refs; |
2084 | } else { | 2081 | } else { |
2085 | new_cfqq->new_cfqq = cfqq; | 2082 | new_cfqq->new_cfqq = cfqq; |
2086 | atomic_add(new_process_refs, &cfqq->ref); | 2083 | cfqq->ref += new_process_refs; |
2087 | } | 2084 | } |
2088 | } | 2085 | } |
2089 | 2086 | ||
@@ -2116,12 +2113,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2116 | unsigned count; | 2113 | unsigned count; |
2117 | struct cfq_rb_root *st; | 2114 | struct cfq_rb_root *st; |
2118 | unsigned group_slice; | 2115 | unsigned group_slice; |
2119 | 2116 | enum wl_prio_t original_prio = cfqd->serving_prio; | |
2120 | if (!cfqg) { | ||
2121 | cfqd->serving_prio = IDLE_WORKLOAD; | ||
2122 | cfqd->workload_expires = jiffies + 1; | ||
2123 | return; | ||
2124 | } | ||
2125 | 2117 | ||
2126 | /* Choose next priority. RT > BE > IDLE */ | 2118 | /* Choose next priority. RT > BE > IDLE */ |
2127 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) | 2119 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) |
@@ -2134,6 +2126,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2134 | return; | 2126 | return; |
2135 | } | 2127 | } |
2136 | 2128 | ||
2129 | if (original_prio != cfqd->serving_prio) | ||
2130 | goto new_workload; | ||
2131 | |||
2137 | /* | 2132 | /* |
2138 | * For RT and BE, we have to choose also the type | 2133 | * For RT and BE, we have to choose also the type |
2139 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload | 2134 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload |
@@ -2148,6 +2143,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2148 | if (count && !time_after(jiffies, cfqd->workload_expires)) | 2143 | if (count && !time_after(jiffies, cfqd->workload_expires)) |
2149 | return; | 2144 | return; |
2150 | 2145 | ||
2146 | new_workload: | ||
2151 | /* otherwise select new workload type */ | 2147 | /* otherwise select new workload type */ |
2152 | cfqd->serving_type = | 2148 | cfqd->serving_type = |
2153 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); | 2149 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); |
@@ -2199,7 +2195,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | |||
2199 | if (RB_EMPTY_ROOT(&st->rb)) | 2195 | if (RB_EMPTY_ROOT(&st->rb)) |
2200 | return NULL; | 2196 | return NULL; |
2201 | cfqg = cfq_rb_first_group(st); | 2197 | cfqg = cfq_rb_first_group(st); |
2202 | st->active = &cfqg->rb_node; | ||
2203 | update_min_vdisktime(st); | 2198 | update_min_vdisktime(st); |
2204 | return cfqg; | 2199 | return cfqg; |
2205 | } | 2200 | } |
@@ -2293,6 +2288,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
2293 | goto keep_queue; | 2288 | goto keep_queue; |
2294 | } | 2289 | } |
2295 | 2290 | ||
2291 | /* | ||
2292 | * This is a deep seek queue, but the device is much faster than | ||
2293 | * the queue can deliver, don't idle | ||
2294 | **/ | ||
2295 | if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && | ||
2296 | (cfq_cfqq_slice_new(cfqq) || | ||
2297 | (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { | ||
2298 | cfq_clear_cfqq_deep(cfqq); | ||
2299 | cfq_clear_cfqq_idle_window(cfqq); | ||
2300 | } | ||
2301 | |||
2296 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { | 2302 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { |
2297 | cfqq = NULL; | 2303 | cfqq = NULL; |
2298 | goto keep_queue; | 2304 | goto keep_queue; |
@@ -2367,12 +2373,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, | |||
2367 | { | 2373 | { |
2368 | /* the queue hasn't finished any request, can't estimate */ | 2374 | /* the queue hasn't finished any request, can't estimate */ |
2369 | if (cfq_cfqq_slice_new(cfqq)) | 2375 | if (cfq_cfqq_slice_new(cfqq)) |
2370 | return 1; | 2376 | return true; |
2371 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, | 2377 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, |
2372 | cfqq->slice_end)) | 2378 | cfqq->slice_end)) |
2373 | return 1; | 2379 | return true; |
2374 | 2380 | ||
2375 | return 0; | 2381 | return false; |
2376 | } | 2382 | } |
2377 | 2383 | ||
2378 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2384 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
@@ -2538,9 +2544,10 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2538 | struct cfq_data *cfqd = cfqq->cfqd; | 2544 | struct cfq_data *cfqd = cfqq->cfqd; |
2539 | struct cfq_group *cfqg, *orig_cfqg; | 2545 | struct cfq_group *cfqg, *orig_cfqg; |
2540 | 2546 | ||
2541 | BUG_ON(atomic_read(&cfqq->ref) <= 0); | 2547 | BUG_ON(cfqq->ref <= 0); |
2542 | 2548 | ||
2543 | if (!atomic_dec_and_test(&cfqq->ref)) | 2549 | cfqq->ref--; |
2550 | if (cfqq->ref) | ||
2544 | return; | 2551 | return; |
2545 | 2552 | ||
2546 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); | 2553 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); |
@@ -2843,7 +2850,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2843 | RB_CLEAR_NODE(&cfqq->p_node); | 2850 | RB_CLEAR_NODE(&cfqq->p_node); |
2844 | INIT_LIST_HEAD(&cfqq->fifo); | 2851 | INIT_LIST_HEAD(&cfqq->fifo); |
2845 | 2852 | ||
2846 | atomic_set(&cfqq->ref, 0); | 2853 | cfqq->ref = 0; |
2847 | cfqq->cfqd = cfqd; | 2854 | cfqq->cfqd = cfqd; |
2848 | 2855 | ||
2849 | cfq_mark_cfqq_prio_changed(cfqq); | 2856 | cfq_mark_cfqq_prio_changed(cfqq); |
@@ -2979,11 +2986,11 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, | |||
2979 | * pin the queue now that it's allocated, scheduler exit will prune it | 2986 | * pin the queue now that it's allocated, scheduler exit will prune it |
2980 | */ | 2987 | */ |
2981 | if (!is_sync && !(*async_cfqq)) { | 2988 | if (!is_sync && !(*async_cfqq)) { |
2982 | atomic_inc(&cfqq->ref); | 2989 | cfqq->ref++; |
2983 | *async_cfqq = cfqq; | 2990 | *async_cfqq = cfqq; |
2984 | } | 2991 | } |
2985 | 2992 | ||
2986 | atomic_inc(&cfqq->ref); | 2993 | cfqq->ref++; |
2987 | return cfqq; | 2994 | return cfqq; |
2988 | } | 2995 | } |
2989 | 2996 | ||
@@ -3265,6 +3272,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3265 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) | 3272 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) |
3266 | return true; | 3273 | return true; |
3267 | 3274 | ||
3275 | /* An idle queue should not be idle now for some reason */ | ||
3276 | if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) | ||
3277 | return true; | ||
3278 | |||
3268 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) | 3279 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) |
3269 | return false; | 3280 | return false; |
3270 | 3281 | ||
@@ -3284,10 +3295,19 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3284 | */ | 3295 | */ |
3285 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 3296 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
3286 | { | 3297 | { |
3298 | struct cfq_queue *old_cfqq = cfqd->active_queue; | ||
3299 | |||
3287 | cfq_log_cfqq(cfqd, cfqq, "preempt"); | 3300 | cfq_log_cfqq(cfqd, cfqq, "preempt"); |
3288 | cfq_slice_expired(cfqd, 1); | 3301 | cfq_slice_expired(cfqd, 1); |
3289 | 3302 | ||
3290 | /* | 3303 | /* |
3304 | * workload type is changed, don't save slice, otherwise preempt | ||
3305 | * doesn't happen | ||
3306 | */ | ||
3307 | if (cfqq_type(old_cfqq) != cfqq_type(cfqq)) | ||
3308 | cfqq->cfqg->saved_workload_slice = 0; | ||
3309 | |||
3310 | /* | ||
3291 | * Put the new queue at the front of the of the current list, | 3311 | * Put the new queue at the front of the of the current list, |
3292 | * so we know that it will be selected next. | 3312 | * so we know that it will be selected next. |
3293 | */ | 3313 | */ |
@@ -3681,13 +3701,13 @@ new_queue: | |||
3681 | } | 3701 | } |
3682 | 3702 | ||
3683 | cfqq->allocated[rw]++; | 3703 | cfqq->allocated[rw]++; |
3684 | atomic_inc(&cfqq->ref); | 3704 | cfqq->ref++; |
3685 | |||
3686 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
3687 | |||
3688 | rq->elevator_private = cic; | 3705 | rq->elevator_private = cic; |
3689 | rq->elevator_private2 = cfqq; | 3706 | rq->elevator_private2 = cfqq; |
3690 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | 3707 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); |
3708 | |||
3709 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
3710 | |||
3691 | return 0; | 3711 | return 0; |
3692 | 3712 | ||
3693 | queue_fail: | 3713 | queue_fail: |
@@ -3862,6 +3882,10 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3862 | if (!cfqd) | 3882 | if (!cfqd) |
3863 | return NULL; | 3883 | return NULL; |
3864 | 3884 | ||
3885 | /* | ||
3886 | * Don't need take queue_lock in the routine, since we are | ||
3887 | * initializing the ioscheduler, and nobody is using cfqd | ||
3888 | */ | ||
3865 | cfqd->cic_index = i; | 3889 | cfqd->cic_index = i; |
3866 | 3890 | ||
3867 | /* Init root service tree */ | 3891 | /* Init root service tree */ |
@@ -3881,7 +3905,7 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3881 | * Take a reference to root group which we never drop. This is just | 3905 | * Take a reference to root group which we never drop. This is just |
3882 | * to make sure that cfq_put_cfqg() does not try to kfree root group | 3906 | * to make sure that cfq_put_cfqg() does not try to kfree root group |
3883 | */ | 3907 | */ |
3884 | atomic_set(&cfqg->ref, 1); | 3908 | cfqg->ref = 1; |
3885 | rcu_read_lock(); | 3909 | rcu_read_lock(); |
3886 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, | 3910 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, |
3887 | (void *)cfqd, 0); | 3911 | (void *)cfqd, 0); |
@@ -3901,7 +3925,7 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3901 | * will not attempt to free it. | 3925 | * will not attempt to free it. |
3902 | */ | 3926 | */ |
3903 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | 3927 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); |
3904 | atomic_inc(&cfqd->oom_cfqq.ref); | 3928 | cfqd->oom_cfqq.ref++; |
3905 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); | 3929 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); |
3906 | 3930 | ||
3907 | INIT_LIST_HEAD(&cfqd->cic_list); | 3931 | INIT_LIST_HEAD(&cfqd->cic_list); |
diff --git a/block/genhd.c b/block/genhd.c index 5fa2b44a72ff..6a5b772aa201 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/buffer_head.h> | 18 | #include <linux/buffer_head.h> |
19 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/log2.h> | ||
21 | 22 | ||
22 | #include "blk.h" | 23 | #include "blk.h" |
23 | 24 | ||
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr); | |||
35 | 36 | ||
36 | static struct device_type disk_type; | 37 | static struct device_type disk_type; |
37 | 38 | ||
39 | static void disk_add_events(struct gendisk *disk); | ||
40 | static void disk_del_events(struct gendisk *disk); | ||
41 | static void disk_release_events(struct gendisk *disk); | ||
42 | |||
38 | /** | 43 | /** |
39 | * disk_get_part - get partition | 44 | * disk_get_part - get partition |
40 | * @disk: disk to look partition from | 45 | * @disk: disk to look partition from |
@@ -239,7 +244,7 @@ static struct blk_major_name { | |||
239 | } *major_names[BLKDEV_MAJOR_HASH_SIZE]; | 244 | } *major_names[BLKDEV_MAJOR_HASH_SIZE]; |
240 | 245 | ||
241 | /* index in the above - for now: assume no multimajor ranges */ | 246 | /* index in the above - for now: assume no multimajor ranges */ |
242 | static inline int major_to_index(int major) | 247 | static inline int major_to_index(unsigned major) |
243 | { | 248 | { |
244 | return major % BLKDEV_MAJOR_HASH_SIZE; | 249 | return major % BLKDEV_MAJOR_HASH_SIZE; |
245 | } | 250 | } |
@@ -502,6 +507,64 @@ static int exact_lock(dev_t devt, void *data) | |||
502 | return 0; | 507 | return 0; |
503 | } | 508 | } |
504 | 509 | ||
510 | void register_disk(struct gendisk *disk) | ||
511 | { | ||
512 | struct device *ddev = disk_to_dev(disk); | ||
513 | struct block_device *bdev; | ||
514 | struct disk_part_iter piter; | ||
515 | struct hd_struct *part; | ||
516 | int err; | ||
517 | |||
518 | ddev->parent = disk->driverfs_dev; | ||
519 | |||
520 | dev_set_name(ddev, disk->disk_name); | ||
521 | |||
522 | /* delay uevents, until we scanned partition table */ | ||
523 | dev_set_uevent_suppress(ddev, 1); | ||
524 | |||
525 | if (device_add(ddev)) | ||
526 | return; | ||
527 | if (!sysfs_deprecated) { | ||
528 | err = sysfs_create_link(block_depr, &ddev->kobj, | ||
529 | kobject_name(&ddev->kobj)); | ||
530 | if (err) { | ||
531 | device_del(ddev); | ||
532 | return; | ||
533 | } | ||
534 | } | ||
535 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); | ||
536 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | ||
537 | |||
538 | /* No minors to use for partitions */ | ||
539 | if (!disk_partitionable(disk)) | ||
540 | goto exit; | ||
541 | |||
542 | /* No such device (e.g., media were just removed) */ | ||
543 | if (!get_capacity(disk)) | ||
544 | goto exit; | ||
545 | |||
546 | bdev = bdget_disk(disk, 0); | ||
547 | if (!bdev) | ||
548 | goto exit; | ||
549 | |||
550 | bdev->bd_invalidated = 1; | ||
551 | err = blkdev_get(bdev, FMODE_READ, NULL); | ||
552 | if (err < 0) | ||
553 | goto exit; | ||
554 | blkdev_put(bdev, FMODE_READ); | ||
555 | |||
556 | exit: | ||
557 | /* announce disk after possible partitions are created */ | ||
558 | dev_set_uevent_suppress(ddev, 0); | ||
559 | kobject_uevent(&ddev->kobj, KOBJ_ADD); | ||
560 | |||
561 | /* announce possible partitions */ | ||
562 | disk_part_iter_init(&piter, disk, 0); | ||
563 | while ((part = disk_part_iter_next(&piter))) | ||
564 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); | ||
565 | disk_part_iter_exit(&piter); | ||
566 | } | ||
567 | |||
505 | /** | 568 | /** |
506 | * add_disk - add partitioning information to kernel list | 569 | * add_disk - add partitioning information to kernel list |
507 | * @disk: per-device partitioning information | 570 | * @disk: per-device partitioning information |
@@ -551,18 +614,48 @@ void add_disk(struct gendisk *disk) | |||
551 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, | 614 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
552 | "bdi"); | 615 | "bdi"); |
553 | WARN_ON(retval); | 616 | WARN_ON(retval); |
554 | } | ||
555 | 617 | ||
618 | disk_add_events(disk); | ||
619 | } | ||
556 | EXPORT_SYMBOL(add_disk); | 620 | EXPORT_SYMBOL(add_disk); |
557 | EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ | ||
558 | 621 | ||
559 | void unlink_gendisk(struct gendisk *disk) | 622 | void del_gendisk(struct gendisk *disk) |
560 | { | 623 | { |
624 | struct disk_part_iter piter; | ||
625 | struct hd_struct *part; | ||
626 | |||
627 | disk_del_events(disk); | ||
628 | |||
629 | /* invalidate stuff */ | ||
630 | disk_part_iter_init(&piter, disk, | ||
631 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); | ||
632 | while ((part = disk_part_iter_next(&piter))) { | ||
633 | invalidate_partition(disk, part->partno); | ||
634 | delete_partition(disk, part->partno); | ||
635 | } | ||
636 | disk_part_iter_exit(&piter); | ||
637 | |||
638 | invalidate_partition(disk, 0); | ||
639 | blk_free_devt(disk_to_dev(disk)->devt); | ||
640 | set_capacity(disk, 0); | ||
641 | disk->flags &= ~GENHD_FL_UP; | ||
642 | |||
561 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); | 643 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); |
562 | bdi_unregister(&disk->queue->backing_dev_info); | 644 | bdi_unregister(&disk->queue->backing_dev_info); |
563 | blk_unregister_queue(disk); | 645 | blk_unregister_queue(disk); |
564 | blk_unregister_region(disk_devt(disk), disk->minors); | 646 | blk_unregister_region(disk_devt(disk), disk->minors); |
647 | |||
648 | part_stat_set_all(&disk->part0, 0); | ||
649 | disk->part0.stamp = 0; | ||
650 | |||
651 | kobject_put(disk->part0.holder_dir); | ||
652 | kobject_put(disk->slave_dir); | ||
653 | disk->driverfs_dev = NULL; | ||
654 | if (!sysfs_deprecated) | ||
655 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); | ||
656 | device_del(disk_to_dev(disk)); | ||
565 | } | 657 | } |
658 | EXPORT_SYMBOL(del_gendisk); | ||
566 | 659 | ||
567 | /** | 660 | /** |
568 | * get_gendisk - get partitioning information for a given device | 661 | * get_gendisk - get partitioning information for a given device |
@@ -735,7 +828,7 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos) | |||
735 | static void *p; | 828 | static void *p; |
736 | 829 | ||
737 | p = disk_seqf_start(seqf, pos); | 830 | p = disk_seqf_start(seqf, pos); |
738 | if (!IS_ERR(p) && p && !*pos) | 831 | if (!IS_ERR_OR_NULL(p) && !*pos) |
739 | seq_puts(seqf, "major minor #blocks name\n\n"); | 832 | seq_puts(seqf, "major minor #blocks name\n\n"); |
740 | return p; | 833 | return p; |
741 | } | 834 | } |
@@ -1005,6 +1098,7 @@ static void disk_release(struct device *dev) | |||
1005 | { | 1098 | { |
1006 | struct gendisk *disk = dev_to_disk(dev); | 1099 | struct gendisk *disk = dev_to_disk(dev); |
1007 | 1100 | ||
1101 | disk_release_events(disk); | ||
1008 | kfree(disk->random); | 1102 | kfree(disk->random); |
1009 | disk_replace_part_tbl(disk, NULL); | 1103 | disk_replace_part_tbl(disk, NULL); |
1010 | free_part_stats(&disk->part0); | 1104 | free_part_stats(&disk->part0); |
@@ -1110,29 +1204,6 @@ static int __init proc_genhd_init(void) | |||
1110 | module_init(proc_genhd_init); | 1204 | module_init(proc_genhd_init); |
1111 | #endif /* CONFIG_PROC_FS */ | 1205 | #endif /* CONFIG_PROC_FS */ |
1112 | 1206 | ||
1113 | static void media_change_notify_thread(struct work_struct *work) | ||
1114 | { | ||
1115 | struct gendisk *gd = container_of(work, struct gendisk, async_notify); | ||
1116 | char event[] = "MEDIA_CHANGE=1"; | ||
1117 | char *envp[] = { event, NULL }; | ||
1118 | |||
1119 | /* | ||
1120 | * set enviroment vars to indicate which event this is for | ||
1121 | * so that user space will know to go check the media status. | ||
1122 | */ | ||
1123 | kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); | ||
1124 | put_device(gd->driverfs_dev); | ||
1125 | } | ||
1126 | |||
1127 | #if 0 | ||
1128 | void genhd_media_change_notify(struct gendisk *disk) | ||
1129 | { | ||
1130 | get_device(disk->driverfs_dev); | ||
1131 | schedule_work(&disk->async_notify); | ||
1132 | } | ||
1133 | EXPORT_SYMBOL_GPL(genhd_media_change_notify); | ||
1134 | #endif /* 0 */ | ||
1135 | |||
1136 | dev_t blk_lookup_devt(const char *name, int partno) | 1207 | dev_t blk_lookup_devt(const char *name, int partno) |
1137 | { | 1208 | { |
1138 | dev_t devt = MKDEV(0, 0); | 1209 | dev_t devt = MKDEV(0, 0); |
@@ -1193,13 +1264,13 @@ struct gendisk *alloc_disk_node(int minors, int node_id) | |||
1193 | } | 1264 | } |
1194 | disk->part_tbl->part[0] = &disk->part0; | 1265 | disk->part_tbl->part[0] = &disk->part0; |
1195 | 1266 | ||
1267 | hd_ref_init(&disk->part0); | ||
1268 | |||
1196 | disk->minors = minors; | 1269 | disk->minors = minors; |
1197 | rand_initialize_disk(disk); | 1270 | rand_initialize_disk(disk); |
1198 | disk_to_dev(disk)->class = &block_class; | 1271 | disk_to_dev(disk)->class = &block_class; |
1199 | disk_to_dev(disk)->type = &disk_type; | 1272 | disk_to_dev(disk)->type = &disk_type; |
1200 | device_initialize(disk_to_dev(disk)); | 1273 | device_initialize(disk_to_dev(disk)); |
1201 | INIT_WORK(&disk->async_notify, | ||
1202 | media_change_notify_thread); | ||
1203 | } | 1274 | } |
1204 | return disk; | 1275 | return disk; |
1205 | } | 1276 | } |
@@ -1291,3 +1362,422 @@ int invalidate_partition(struct gendisk *disk, int partno) | |||
1291 | } | 1362 | } |
1292 | 1363 | ||
1293 | EXPORT_SYMBOL(invalidate_partition); | 1364 | EXPORT_SYMBOL(invalidate_partition); |
1365 | |||
1366 | /* | ||
1367 | * Disk events - monitor disk events like media change and eject request. | ||
1368 | */ | ||
1369 | struct disk_events { | ||
1370 | struct list_head node; /* all disk_event's */ | ||
1371 | struct gendisk *disk; /* the associated disk */ | ||
1372 | spinlock_t lock; | ||
1373 | |||
1374 | int block; /* event blocking depth */ | ||
1375 | unsigned int pending; /* events already sent out */ | ||
1376 | unsigned int clearing; /* events being cleared */ | ||
1377 | |||
1378 | long poll_msecs; /* interval, -1 for default */ | ||
1379 | struct delayed_work dwork; | ||
1380 | }; | ||
1381 | |||
1382 | static const char *disk_events_strs[] = { | ||
1383 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", | ||
1384 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", | ||
1385 | }; | ||
1386 | |||
1387 | static char *disk_uevents[] = { | ||
1388 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", | ||
1389 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", | ||
1390 | }; | ||
1391 | |||
1392 | /* list of all disk_events */ | ||
1393 | static DEFINE_MUTEX(disk_events_mutex); | ||
1394 | static LIST_HEAD(disk_events); | ||
1395 | |||
1396 | /* disable in-kernel polling by default */ | ||
1397 | static unsigned long disk_events_dfl_poll_msecs = 0; | ||
1398 | |||
1399 | static unsigned long disk_events_poll_jiffies(struct gendisk *disk) | ||
1400 | { | ||
1401 | struct disk_events *ev = disk->ev; | ||
1402 | long intv_msecs = 0; | ||
1403 | |||
1404 | /* | ||
1405 | * If device-specific poll interval is set, always use it. If | ||
1406 | * the default is being used, poll iff there are events which | ||
1407 | * can't be monitored asynchronously. | ||
1408 | */ | ||
1409 | if (ev->poll_msecs >= 0) | ||
1410 | intv_msecs = ev->poll_msecs; | ||
1411 | else if (disk->events & ~disk->async_events) | ||
1412 | intv_msecs = disk_events_dfl_poll_msecs; | ||
1413 | |||
1414 | return msecs_to_jiffies(intv_msecs); | ||
1415 | } | ||
1416 | |||
1417 | static void __disk_block_events(struct gendisk *disk, bool sync) | ||
1418 | { | ||
1419 | struct disk_events *ev = disk->ev; | ||
1420 | unsigned long flags; | ||
1421 | bool cancel; | ||
1422 | |||
1423 | spin_lock_irqsave(&ev->lock, flags); | ||
1424 | cancel = !ev->block++; | ||
1425 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1426 | |||
1427 | if (cancel) { | ||
1428 | if (sync) | ||
1429 | cancel_delayed_work_sync(&disk->ev->dwork); | ||
1430 | else | ||
1431 | cancel_delayed_work(&disk->ev->dwork); | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1435 | static void __disk_unblock_events(struct gendisk *disk, bool check_now) | ||
1436 | { | ||
1437 | struct disk_events *ev = disk->ev; | ||
1438 | unsigned long intv; | ||
1439 | unsigned long flags; | ||
1440 | |||
1441 | spin_lock_irqsave(&ev->lock, flags); | ||
1442 | |||
1443 | if (WARN_ON_ONCE(ev->block <= 0)) | ||
1444 | goto out_unlock; | ||
1445 | |||
1446 | if (--ev->block) | ||
1447 | goto out_unlock; | ||
1448 | |||
1449 | /* | ||
1450 | * Not exactly a latency critical operation, set poll timer | ||
1451 | * slack to 25% and kick event check. | ||
1452 | */ | ||
1453 | intv = disk_events_poll_jiffies(disk); | ||
1454 | set_timer_slack(&ev->dwork.timer, intv / 4); | ||
1455 | if (check_now) | ||
1456 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1457 | else if (intv) | ||
1458 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1459 | out_unlock: | ||
1460 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1461 | } | ||
1462 | |||
1463 | /** | ||
1464 | * disk_block_events - block and flush disk event checking | ||
1465 | * @disk: disk to block events for | ||
1466 | * | ||
1467 | * On return from this function, it is guaranteed that event checking | ||
1468 | * isn't in progress and won't happen until unblocked by | ||
1469 | * disk_unblock_events(). Events blocking is counted and the actual | ||
1470 | * unblocking happens after the matching number of unblocks are done. | ||
1471 | * | ||
1472 | * Note that this intentionally does not block event checking from | ||
1473 | * disk_clear_events(). | ||
1474 | * | ||
1475 | * CONTEXT: | ||
1476 | * Might sleep. | ||
1477 | */ | ||
1478 | void disk_block_events(struct gendisk *disk) | ||
1479 | { | ||
1480 | if (disk->ev) | ||
1481 | __disk_block_events(disk, true); | ||
1482 | } | ||
1483 | |||
1484 | /** | ||
1485 | * disk_unblock_events - unblock disk event checking | ||
1486 | * @disk: disk to unblock events for | ||
1487 | * | ||
1488 | * Undo disk_block_events(). When the block count reaches zero, it | ||
1489 | * starts events polling if configured. | ||
1490 | * | ||
1491 | * CONTEXT: | ||
1492 | * Don't care. Safe to call from irq context. | ||
1493 | */ | ||
1494 | void disk_unblock_events(struct gendisk *disk) | ||
1495 | { | ||
1496 | if (disk->ev) | ||
1497 | __disk_unblock_events(disk, true); | ||
1498 | } | ||
1499 | |||
1500 | /** | ||
1501 | * disk_check_events - schedule immediate event checking | ||
1502 | * @disk: disk to check events for | ||
1503 | * | ||
1504 | * Schedule immediate event checking on @disk if not blocked. | ||
1505 | * | ||
1506 | * CONTEXT: | ||
1507 | * Don't care. Safe to call from irq context. | ||
1508 | */ | ||
1509 | void disk_check_events(struct gendisk *disk) | ||
1510 | { | ||
1511 | if (disk->ev) { | ||
1512 | __disk_block_events(disk, false); | ||
1513 | __disk_unblock_events(disk, true); | ||
1514 | } | ||
1515 | } | ||
1516 | EXPORT_SYMBOL_GPL(disk_check_events); | ||
1517 | |||
1518 | /** | ||
1519 | * disk_clear_events - synchronously check, clear and return pending events | ||
1520 | * @disk: disk to fetch and clear events from | ||
1521 | * @mask: mask of events to be fetched and clearted | ||
1522 | * | ||
1523 | * Disk events are synchronously checked and pending events in @mask | ||
1524 | * are cleared and returned. This ignores the block count. | ||
1525 | * | ||
1526 | * CONTEXT: | ||
1527 | * Might sleep. | ||
1528 | */ | ||
1529 | unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) | ||
1530 | { | ||
1531 | const struct block_device_operations *bdops = disk->fops; | ||
1532 | struct disk_events *ev = disk->ev; | ||
1533 | unsigned int pending; | ||
1534 | |||
1535 | if (!ev) { | ||
1536 | /* for drivers still using the old ->media_changed method */ | ||
1537 | if ((mask & DISK_EVENT_MEDIA_CHANGE) && | ||
1538 | bdops->media_changed && bdops->media_changed(disk)) | ||
1539 | return DISK_EVENT_MEDIA_CHANGE; | ||
1540 | return 0; | ||
1541 | } | ||
1542 | |||
1543 | /* tell the workfn about the events being cleared */ | ||
1544 | spin_lock_irq(&ev->lock); | ||
1545 | ev->clearing |= mask; | ||
1546 | spin_unlock_irq(&ev->lock); | ||
1547 | |||
1548 | /* uncondtionally schedule event check and wait for it to finish */ | ||
1549 | __disk_block_events(disk, true); | ||
1550 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1551 | flush_delayed_work(&ev->dwork); | ||
1552 | __disk_unblock_events(disk, false); | ||
1553 | |||
1554 | /* then, fetch and clear pending events */ | ||
1555 | spin_lock_irq(&ev->lock); | ||
1556 | WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */ | ||
1557 | pending = ev->pending & mask; | ||
1558 | ev->pending &= ~mask; | ||
1559 | spin_unlock_irq(&ev->lock); | ||
1560 | |||
1561 | return pending; | ||
1562 | } | ||
1563 | |||
1564 | static void disk_events_workfn(struct work_struct *work) | ||
1565 | { | ||
1566 | struct delayed_work *dwork = to_delayed_work(work); | ||
1567 | struct disk_events *ev = container_of(dwork, struct disk_events, dwork); | ||
1568 | struct gendisk *disk = ev->disk; | ||
1569 | char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; | ||
1570 | unsigned int clearing = ev->clearing; | ||
1571 | unsigned int events; | ||
1572 | unsigned long intv; | ||
1573 | int nr_events = 0, i; | ||
1574 | |||
1575 | /* check events */ | ||
1576 | events = disk->fops->check_events(disk, clearing); | ||
1577 | |||
1578 | /* accumulate pending events and schedule next poll if necessary */ | ||
1579 | spin_lock_irq(&ev->lock); | ||
1580 | |||
1581 | events &= ~ev->pending; | ||
1582 | ev->pending |= events; | ||
1583 | ev->clearing &= ~clearing; | ||
1584 | |||
1585 | intv = disk_events_poll_jiffies(disk); | ||
1586 | if (!ev->block && intv) | ||
1587 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1588 | |||
1589 | spin_unlock_irq(&ev->lock); | ||
1590 | |||
1591 | /* tell userland about new events */ | ||
1592 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) | ||
1593 | if (events & (1 << i)) | ||
1594 | envp[nr_events++] = disk_uevents[i]; | ||
1595 | |||
1596 | if (nr_events) | ||
1597 | kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); | ||
1598 | } | ||
1599 | |||
1600 | /* | ||
1601 | * A disk events enabled device has the following sysfs nodes under | ||
1602 | * its /sys/block/X/ directory. | ||
1603 | * | ||
1604 | * events : list of all supported events | ||
1605 | * events_async : list of events which can be detected w/o polling | ||
1606 | * events_poll_msecs : polling interval, 0: disable, -1: system default | ||
1607 | */ | ||
1608 | static ssize_t __disk_events_show(unsigned int events, char *buf) | ||
1609 | { | ||
1610 | const char *delim = ""; | ||
1611 | ssize_t pos = 0; | ||
1612 | int i; | ||
1613 | |||
1614 | for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) | ||
1615 | if (events & (1 << i)) { | ||
1616 | pos += sprintf(buf + pos, "%s%s", | ||
1617 | delim, disk_events_strs[i]); | ||
1618 | delim = " "; | ||
1619 | } | ||
1620 | if (pos) | ||
1621 | pos += sprintf(buf + pos, "\n"); | ||
1622 | return pos; | ||
1623 | } | ||
1624 | |||
1625 | static ssize_t disk_events_show(struct device *dev, | ||
1626 | struct device_attribute *attr, char *buf) | ||
1627 | { | ||
1628 | struct gendisk *disk = dev_to_disk(dev); | ||
1629 | |||
1630 | return __disk_events_show(disk->events, buf); | ||
1631 | } | ||
1632 | |||
1633 | static ssize_t disk_events_async_show(struct device *dev, | ||
1634 | struct device_attribute *attr, char *buf) | ||
1635 | { | ||
1636 | struct gendisk *disk = dev_to_disk(dev); | ||
1637 | |||
1638 | return __disk_events_show(disk->async_events, buf); | ||
1639 | } | ||
1640 | |||
1641 | static ssize_t disk_events_poll_msecs_show(struct device *dev, | ||
1642 | struct device_attribute *attr, | ||
1643 | char *buf) | ||
1644 | { | ||
1645 | struct gendisk *disk = dev_to_disk(dev); | ||
1646 | |||
1647 | return sprintf(buf, "%ld\n", disk->ev->poll_msecs); | ||
1648 | } | ||
1649 | |||
1650 | static ssize_t disk_events_poll_msecs_store(struct device *dev, | ||
1651 | struct device_attribute *attr, | ||
1652 | const char *buf, size_t count) | ||
1653 | { | ||
1654 | struct gendisk *disk = dev_to_disk(dev); | ||
1655 | long intv; | ||
1656 | |||
1657 | if (!count || !sscanf(buf, "%ld", &intv)) | ||
1658 | return -EINVAL; | ||
1659 | |||
1660 | if (intv < 0 && intv != -1) | ||
1661 | return -EINVAL; | ||
1662 | |||
1663 | __disk_block_events(disk, true); | ||
1664 | disk->ev->poll_msecs = intv; | ||
1665 | __disk_unblock_events(disk, true); | ||
1666 | |||
1667 | return count; | ||
1668 | } | ||
1669 | |||
1670 | static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); | ||
1671 | static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); | ||
1672 | static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, | ||
1673 | disk_events_poll_msecs_show, | ||
1674 | disk_events_poll_msecs_store); | ||
1675 | |||
1676 | static const struct attribute *disk_events_attrs[] = { | ||
1677 | &dev_attr_events.attr, | ||
1678 | &dev_attr_events_async.attr, | ||
1679 | &dev_attr_events_poll_msecs.attr, | ||
1680 | NULL, | ||
1681 | }; | ||
1682 | |||
1683 | /* | ||
1684 | * The default polling interval can be specified by the kernel | ||
1685 | * parameter block.events_dfl_poll_msecs which defaults to 0 | ||
1686 | * (disable). This can also be modified runtime by writing to | ||
1687 | * /sys/module/block/events_dfl_poll_msecs. | ||
1688 | */ | ||
1689 | static int disk_events_set_dfl_poll_msecs(const char *val, | ||
1690 | const struct kernel_param *kp) | ||
1691 | { | ||
1692 | struct disk_events *ev; | ||
1693 | int ret; | ||
1694 | |||
1695 | ret = param_set_ulong(val, kp); | ||
1696 | if (ret < 0) | ||
1697 | return ret; | ||
1698 | |||
1699 | mutex_lock(&disk_events_mutex); | ||
1700 | |||
1701 | list_for_each_entry(ev, &disk_events, node) | ||
1702 | disk_check_events(ev->disk); | ||
1703 | |||
1704 | mutex_unlock(&disk_events_mutex); | ||
1705 | |||
1706 | return 0; | ||
1707 | } | ||
1708 | |||
1709 | static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { | ||
1710 | .set = disk_events_set_dfl_poll_msecs, | ||
1711 | .get = param_get_ulong, | ||
1712 | }; | ||
1713 | |||
1714 | #undef MODULE_PARAM_PREFIX | ||
1715 | #define MODULE_PARAM_PREFIX "block." | ||
1716 | |||
1717 | module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, | ||
1718 | &disk_events_dfl_poll_msecs, 0644); | ||
1719 | |||
1720 | /* | ||
1721 | * disk_{add|del|release}_events - initialize and destroy disk_events. | ||
1722 | */ | ||
1723 | static void disk_add_events(struct gendisk *disk) | ||
1724 | { | ||
1725 | struct disk_events *ev; | ||
1726 | |||
1727 | if (!disk->fops->check_events || !(disk->events | disk->async_events)) | ||
1728 | return; | ||
1729 | |||
1730 | ev = kzalloc(sizeof(*ev), GFP_KERNEL); | ||
1731 | if (!ev) { | ||
1732 | pr_warn("%s: failed to initialize events\n", disk->disk_name); | ||
1733 | return; | ||
1734 | } | ||
1735 | |||
1736 | if (sysfs_create_files(&disk_to_dev(disk)->kobj, | ||
1737 | disk_events_attrs) < 0) { | ||
1738 | pr_warn("%s: failed to create sysfs files for events\n", | ||
1739 | disk->disk_name); | ||
1740 | kfree(ev); | ||
1741 | return; | ||
1742 | } | ||
1743 | |||
1744 | disk->ev = ev; | ||
1745 | |||
1746 | INIT_LIST_HEAD(&ev->node); | ||
1747 | ev->disk = disk; | ||
1748 | spin_lock_init(&ev->lock); | ||
1749 | ev->block = 1; | ||
1750 | ev->poll_msecs = -1; | ||
1751 | INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); | ||
1752 | |||
1753 | mutex_lock(&disk_events_mutex); | ||
1754 | list_add_tail(&ev->node, &disk_events); | ||
1755 | mutex_unlock(&disk_events_mutex); | ||
1756 | |||
1757 | /* | ||
1758 | * Block count is initialized to 1 and the following initial | ||
1759 | * unblock kicks it into action. | ||
1760 | */ | ||
1761 | __disk_unblock_events(disk, true); | ||
1762 | } | ||
1763 | |||
1764 | static void disk_del_events(struct gendisk *disk) | ||
1765 | { | ||
1766 | if (!disk->ev) | ||
1767 | return; | ||
1768 | |||
1769 | __disk_block_events(disk, true); | ||
1770 | |||
1771 | mutex_lock(&disk_events_mutex); | ||
1772 | list_del_init(&disk->ev->node); | ||
1773 | mutex_unlock(&disk_events_mutex); | ||
1774 | |||
1775 | sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); | ||
1776 | } | ||
1777 | |||
1778 | static void disk_release_events(struct gendisk *disk) | ||
1779 | { | ||
1780 | /* the block count should be 1 from disk_del_events() */ | ||
1781 | WARN_ON_ONCE(disk->ev && disk->ev->block != 1); | ||
1782 | kfree(disk->ev); | ||
1783 | } | ||
diff --git a/block/ioctl.c b/block/ioctl.c index a9a302eba01e..9049d460fa89 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, | |||
294 | return -EINVAL; | 294 | return -EINVAL; |
295 | if (get_user(n, (int __user *) arg)) | 295 | if (get_user(n, (int __user *) arg)) |
296 | return -EFAULT; | 296 | return -EFAULT; |
297 | if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) | 297 | if (!(mode & FMODE_EXCL) && |
298 | blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) | ||
298 | return -EBUSY; | 299 | return -EBUSY; |
299 | ret = set_blocksize(bdev, n); | 300 | ret = set_blocksize(bdev, n); |
300 | if (!(mode & FMODE_EXCL)) | 301 | if (!(mode & FMODE_EXCL)) |
301 | bd_release(bdev); | 302 | blkdev_put(bdev, mode | FMODE_EXCL); |
302 | return ret; | 303 | return ret; |
303 | case BLKPG: | 304 | case BLKPG: |
304 | ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); | 305 | ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); |