diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 13:45:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 13:45:01 -0500 |
commit | 275220f0fcff1adf28a717076e00f575edf05fda (patch) | |
tree | d249bccc80c64443dab211639050c4fb14332648 /block | |
parent | fe3c560b8a22cb28e54fe8950abef38e88d75831 (diff) | |
parent | 81c5e2ae33c4b19e53966b427e33646bf6811830 (diff) |
Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits)
block: ensure that completion error gets properly traced
blktrace: add missing probe argument to block_bio_complete
block cfq: don't use atomic_t for cfq_group
block cfq: don't use atomic_t for cfq_queue
block: trace event block fix unassigned field
block: add internal hd part table references
block: fix accounting bug on cross partition merges
kref: add kref_test_and_get
bio-integrity: mark kintegrityd_wq highpri and CPU intensive
block: make kblockd_workqueue smarter
Revert "sd: implement sd_check_events()"
block: Clean up exit_io_context() source code.
Fix compile warnings due to missing removal of a 'ret' variable
fs/block: type signature of major_to_index(int) to major_to_index(unsigned)
block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p)
cfq-iosched: don't check cfqg in choose_service_tree()
fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors
cdrom: export cdrom_check_events()
sd: implement sd_check_events()
sr: implement sr_check_events()
...
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 4 | ||||
-rw-r--r-- | block/blk-core.c | 40 | ||||
-rw-r--r-- | block/blk-ioc.c | 5 | ||||
-rw-r--r-- | block/blk-merge.c | 3 | ||||
-rw-r--r-- | block/cfq-iosched.c | 112 | ||||
-rw-r--r-- | block/genhd.c | 550 | ||||
-rw-r--r-- | block/ioctl.c | 5 |
7 files changed, 614 insertions, 105 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b1febd0f6d2a..455768a3eb9e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |||
1452 | goto done; | 1452 | goto done; |
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | /* Currently we do not support hierarchy deeper than two level (0,1) */ | ||
1456 | if (parent != cgroup->top_cgroup) | ||
1457 | return ERR_PTR(-EPERM); | ||
1458 | |||
1459 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); | 1455 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); |
1460 | if (!blkcg) | 1456 | if (!blkcg) |
1461 | return ERR_PTR(-ENOMEM); | 1457 | return ERR_PTR(-ENOMEM); |
diff --git a/block/blk-core.c b/block/blk-core.c index 4ce953f1b390..2f4002f79a24 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -33,7 +33,7 @@ | |||
33 | 33 | ||
34 | #include "blk.h" | 34 | #include "blk.h" |
35 | 35 | ||
36 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); | 36 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); |
37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | 37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | 38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
39 | 39 | ||
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io) | |||
64 | return; | 64 | return; |
65 | 65 | ||
66 | cpu = part_stat_lock(); | 66 | cpu = part_stat_lock(); |
67 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | ||
68 | 67 | ||
69 | if (!new_io) | 68 | if (!new_io) { |
69 | part = rq->part; | ||
70 | part_stat_inc(cpu, part, merges[rw]); | 70 | part_stat_inc(cpu, part, merges[rw]); |
71 | else { | 71 | } else { |
72 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | ||
73 | if (!hd_struct_try_get(part)) { | ||
74 | /* | ||
75 | * The partition is already being removed, | ||
76 | * the request will be accounted on the disk only | ||
77 | * | ||
78 | * We take a reference on disk->part0 although that | ||
79 | * partition will never be deleted, so we can treat | ||
80 | * it as any other partition. | ||
81 | */ | ||
82 | part = &rq->rq_disk->part0; | ||
83 | hd_struct_get(part); | ||
84 | } | ||
72 | part_round_stats(cpu, part); | 85 | part_round_stats(cpu, part); |
73 | part_inc_in_flight(part, rw); | 86 | part_inc_in_flight(part, rw); |
87 | rq->part = part; | ||
74 | } | 88 | } |
75 | 89 | ||
76 | part_stat_unlock(); | 90 | part_stat_unlock(); |
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
128 | rq->ref_count = 1; | 142 | rq->ref_count = 1; |
129 | rq->start_time = jiffies; | 143 | rq->start_time = jiffies; |
130 | set_start_time_ns(rq); | 144 | set_start_time_ns(rq); |
145 | rq->part = NULL; | ||
131 | } | 146 | } |
132 | EXPORT_SYMBOL(blk_rq_init); | 147 | EXPORT_SYMBOL(blk_rq_init); |
133 | 148 | ||
@@ -1329,9 +1344,9 @@ static inline void blk_partition_remap(struct bio *bio) | |||
1329 | bio->bi_sector += p->start_sect; | 1344 | bio->bi_sector += p->start_sect; |
1330 | bio->bi_bdev = bdev->bd_contains; | 1345 | bio->bi_bdev = bdev->bd_contains; |
1331 | 1346 | ||
1332 | trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, | 1347 | trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, |
1333 | bdev->bd_dev, | 1348 | bdev->bd_dev, |
1334 | bio->bi_sector - p->start_sect); | 1349 | bio->bi_sector - p->start_sect); |
1335 | } | 1350 | } |
1336 | } | 1351 | } |
1337 | 1352 | ||
@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio) | |||
1500 | goto end_io; | 1515 | goto end_io; |
1501 | 1516 | ||
1502 | if (old_sector != -1) | 1517 | if (old_sector != -1) |
1503 | trace_block_remap(q, bio, old_dev, old_sector); | 1518 | trace_block_bio_remap(q, bio, old_dev, old_sector); |
1504 | 1519 | ||
1505 | old_sector = bio->bi_sector; | 1520 | old_sector = bio->bi_sector; |
1506 | old_dev = bio->bi_bdev->bd_dev; | 1521 | old_dev = bio->bi_bdev->bd_dev; |
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) | |||
1776 | int cpu; | 1791 | int cpu; |
1777 | 1792 | ||
1778 | cpu = part_stat_lock(); | 1793 | cpu = part_stat_lock(); |
1779 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 1794 | part = req->part; |
1780 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); | 1795 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); |
1781 | part_stat_unlock(); | 1796 | part_stat_unlock(); |
1782 | } | 1797 | } |
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req) | |||
1796 | int cpu; | 1811 | int cpu; |
1797 | 1812 | ||
1798 | cpu = part_stat_lock(); | 1813 | cpu = part_stat_lock(); |
1799 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 1814 | part = req->part; |
1800 | 1815 | ||
1801 | part_stat_inc(cpu, part, ios[rw]); | 1816 | part_stat_inc(cpu, part, ios[rw]); |
1802 | part_stat_add(cpu, part, ticks[rw], duration); | 1817 | part_stat_add(cpu, part, ticks[rw], duration); |
1803 | part_round_stats(cpu, part); | 1818 | part_round_stats(cpu, part); |
1804 | part_dec_in_flight(part, rw); | 1819 | part_dec_in_flight(part, rw); |
1805 | 1820 | ||
1821 | hd_struct_put(part); | ||
1806 | part_stat_unlock(); | 1822 | part_stat_unlock(); |
1807 | } | 1823 | } |
1808 | } | 1824 | } |
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void) | |||
2606 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * | 2622 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
2607 | sizeof(((struct request *)0)->cmd_flags)); | 2623 | sizeof(((struct request *)0)->cmd_flags)); |
2608 | 2624 | ||
2609 | kblockd_workqueue = create_workqueue("kblockd"); | 2625 | /* used for unplugging and affects IO latency/throughput - HIGHPRI */ |
2626 | kblockd_workqueue = alloc_workqueue("kblockd", | ||
2627 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); | ||
2610 | if (!kblockd_workqueue) | 2628 | if (!kblockd_workqueue) |
2611 | panic("Failed to create kblockd\n"); | 2629 | panic("Failed to create kblockd\n"); |
2612 | 2630 | ||
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 3c7a339fe381..b791022beef3 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc) | |||
64 | rcu_read_unlock(); | 64 | rcu_read_unlock(); |
65 | } | 65 | } |
66 | 66 | ||
67 | /* Called by the exitting task */ | 67 | /* Called by the exiting task */ |
68 | void exit_io_context(struct task_struct *task) | 68 | void exit_io_context(struct task_struct *task) |
69 | { | 69 | { |
70 | struct io_context *ioc; | 70 | struct io_context *ioc; |
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task) | |||
74 | task->io_context = NULL; | 74 | task->io_context = NULL; |
75 | task_unlock(task); | 75 | task_unlock(task); |
76 | 76 | ||
77 | if (atomic_dec_and_test(&ioc->nr_tasks)) { | 77 | if (atomic_dec_and_test(&ioc->nr_tasks)) |
78 | cfq_exit(ioc); | 78 | cfq_exit(ioc); |
79 | 79 | ||
80 | } | ||
81 | put_io_context(ioc); | 80 | put_io_context(ioc); |
82 | } | 81 | } |
83 | 82 | ||
diff --git a/block/blk-merge.c b/block/blk-merge.c index 74bc4a768f32..ea85e20d5e94 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req) | |||
351 | int cpu; | 351 | int cpu; |
352 | 352 | ||
353 | cpu = part_stat_lock(); | 353 | cpu = part_stat_lock(); |
354 | part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); | 354 | part = req->part; |
355 | 355 | ||
356 | part_round_stats(cpu, part); | 356 | part_round_stats(cpu, part); |
357 | part_dec_in_flight(part, rq_data_dir(req)); | 357 | part_dec_in_flight(part, rq_data_dir(req)); |
358 | 358 | ||
359 | hd_struct_put(part); | ||
359 | part_stat_unlock(); | 360 | part_stat_unlock(); |
360 | } | 361 | } |
361 | } | 362 | } |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 78ee4b1d4e85..8427697c5437 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -87,7 +87,6 @@ struct cfq_rb_root { | |||
87 | unsigned count; | 87 | unsigned count; |
88 | unsigned total_weight; | 88 | unsigned total_weight; |
89 | u64 min_vdisktime; | 89 | u64 min_vdisktime; |
90 | struct rb_node *active; | ||
91 | }; | 90 | }; |
92 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ | 91 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ |
93 | .count = 0, .min_vdisktime = 0, } | 92 | .count = 0, .min_vdisktime = 0, } |
@@ -97,7 +96,7 @@ struct cfq_rb_root { | |||
97 | */ | 96 | */ |
98 | struct cfq_queue { | 97 | struct cfq_queue { |
99 | /* reference count */ | 98 | /* reference count */ |
100 | atomic_t ref; | 99 | int ref; |
101 | /* various state flags, see below */ | 100 | /* various state flags, see below */ |
102 | unsigned int flags; | 101 | unsigned int flags; |
103 | /* parent cfq_data */ | 102 | /* parent cfq_data */ |
@@ -180,7 +179,6 @@ struct cfq_group { | |||
180 | /* group service_tree key */ | 179 | /* group service_tree key */ |
181 | u64 vdisktime; | 180 | u64 vdisktime; |
182 | unsigned int weight; | 181 | unsigned int weight; |
183 | bool on_st; | ||
184 | 182 | ||
185 | /* number of cfqq currently on this group */ | 183 | /* number of cfqq currently on this group */ |
186 | int nr_cfqq; | 184 | int nr_cfqq; |
@@ -209,7 +207,7 @@ struct cfq_group { | |||
209 | struct blkio_group blkg; | 207 | struct blkio_group blkg; |
210 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 208 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
211 | struct hlist_node cfqd_node; | 209 | struct hlist_node cfqd_node; |
212 | atomic_t ref; | 210 | int ref; |
213 | #endif | 211 | #endif |
214 | /* number of requests that are on the dispatch list or inside driver */ | 212 | /* number of requests that are on the dispatch list or inside driver */ |
215 | int dispatched; | 213 | int dispatched; |
@@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st) | |||
563 | u64 vdisktime = st->min_vdisktime; | 561 | u64 vdisktime = st->min_vdisktime; |
564 | struct cfq_group *cfqg; | 562 | struct cfq_group *cfqg; |
565 | 563 | ||
566 | if (st->active) { | ||
567 | cfqg = rb_entry_cfqg(st->active); | ||
568 | vdisktime = cfqg->vdisktime; | ||
569 | } | ||
570 | |||
571 | if (st->left) { | 564 | if (st->left) { |
572 | cfqg = rb_entry_cfqg(st->left); | 565 | cfqg = rb_entry_cfqg(st->left); |
573 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); | 566 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); |
@@ -646,11 +639,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
646 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) | 639 | static inline bool cfq_slice_used(struct cfq_queue *cfqq) |
647 | { | 640 | { |
648 | if (cfq_cfqq_slice_new(cfqq)) | 641 | if (cfq_cfqq_slice_new(cfqq)) |
649 | return 0; | 642 | return false; |
650 | if (time_before(jiffies, cfqq->slice_end)) | 643 | if (time_before(jiffies, cfqq->slice_end)) |
651 | return 0; | 644 | return false; |
652 | 645 | ||
653 | return 1; | 646 | return true; |
654 | } | 647 | } |
655 | 648 | ||
656 | /* | 649 | /* |
@@ -869,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
869 | struct rb_node *n; | 862 | struct rb_node *n; |
870 | 863 | ||
871 | cfqg->nr_cfqq++; | 864 | cfqg->nr_cfqq++; |
872 | if (cfqg->on_st) | 865 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
873 | return; | 866 | return; |
874 | 867 | ||
875 | /* | 868 | /* |
@@ -885,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
885 | cfqg->vdisktime = st->min_vdisktime; | 878 | cfqg->vdisktime = st->min_vdisktime; |
886 | 879 | ||
887 | __cfq_group_service_tree_add(st, cfqg); | 880 | __cfq_group_service_tree_add(st, cfqg); |
888 | cfqg->on_st = true; | ||
889 | st->total_weight += cfqg->weight; | 881 | st->total_weight += cfqg->weight; |
890 | } | 882 | } |
891 | 883 | ||
@@ -894,9 +886,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
894 | { | 886 | { |
895 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 887 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
896 | 888 | ||
897 | if (st->active == &cfqg->rb_node) | ||
898 | st->active = NULL; | ||
899 | |||
900 | BUG_ON(cfqg->nr_cfqq < 1); | 889 | BUG_ON(cfqg->nr_cfqq < 1); |
901 | cfqg->nr_cfqq--; | 890 | cfqg->nr_cfqq--; |
902 | 891 | ||
@@ -905,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
905 | return; | 894 | return; |
906 | 895 | ||
907 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); | 896 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
908 | cfqg->on_st = false; | ||
909 | st->total_weight -= cfqg->weight; | 897 | st->total_weight -= cfqg->weight; |
910 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | 898 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
911 | cfq_rb_erase(&cfqg->rb_node, st); | 899 | cfq_rb_erase(&cfqg->rb_node, st); |
@@ -1026,7 +1014,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
1026 | * elevator which will be dropped by either elevator exit | 1014 | * elevator which will be dropped by either elevator exit |
1027 | * or cgroup deletion path depending on who is exiting first. | 1015 | * or cgroup deletion path depending on who is exiting first. |
1028 | */ | 1016 | */ |
1029 | atomic_set(&cfqg->ref, 1); | 1017 | cfqg->ref = 1; |
1030 | 1018 | ||
1031 | /* | 1019 | /* |
1032 | * Add group onto cgroup list. It might happen that bdi->dev is | 1020 | * Add group onto cgroup list. It might happen that bdi->dev is |
@@ -1071,7 +1059,7 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
1071 | 1059 | ||
1072 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | 1060 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) |
1073 | { | 1061 | { |
1074 | atomic_inc(&cfqg->ref); | 1062 | cfqg->ref++; |
1075 | return cfqg; | 1063 | return cfqg; |
1076 | } | 1064 | } |
1077 | 1065 | ||
@@ -1083,7 +1071,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | |||
1083 | 1071 | ||
1084 | cfqq->cfqg = cfqg; | 1072 | cfqq->cfqg = cfqg; |
1085 | /* cfqq reference on cfqg */ | 1073 | /* cfqq reference on cfqg */ |
1086 | atomic_inc(&cfqq->cfqg->ref); | 1074 | cfqq->cfqg->ref++; |
1087 | } | 1075 | } |
1088 | 1076 | ||
1089 | static void cfq_put_cfqg(struct cfq_group *cfqg) | 1077 | static void cfq_put_cfqg(struct cfq_group *cfqg) |
@@ -1091,11 +1079,12 @@ static void cfq_put_cfqg(struct cfq_group *cfqg) | |||
1091 | struct cfq_rb_root *st; | 1079 | struct cfq_rb_root *st; |
1092 | int i, j; | 1080 | int i, j; |
1093 | 1081 | ||
1094 | BUG_ON(atomic_read(&cfqg->ref) <= 0); | 1082 | BUG_ON(cfqg->ref <= 0); |
1095 | if (!atomic_dec_and_test(&cfqg->ref)) | 1083 | cfqg->ref--; |
1084 | if (cfqg->ref) | ||
1096 | return; | 1085 | return; |
1097 | for_each_cfqg_st(cfqg, i, j, st) | 1086 | for_each_cfqg_st(cfqg, i, j, st) |
1098 | BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); | 1087 | BUG_ON(!RB_EMPTY_ROOT(&st->rb)); |
1099 | kfree(cfqg); | 1088 | kfree(cfqg); |
1100 | } | 1089 | } |
1101 | 1090 | ||
@@ -1200,7 +1189,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1200 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | 1189 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); |
1201 | cfqq->orig_cfqg = cfqq->cfqg; | 1190 | cfqq->orig_cfqg = cfqq->cfqg; |
1202 | cfqq->cfqg = &cfqd->root_group; | 1191 | cfqq->cfqg = &cfqd->root_group; |
1203 | atomic_inc(&cfqd->root_group.ref); | 1192 | cfqd->root_group.ref++; |
1204 | group_changed = 1; | 1193 | group_changed = 1; |
1205 | } else if (!cfqd->cfq_group_isolation | 1194 | } else if (!cfqd->cfq_group_isolation |
1206 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { | 1195 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { |
@@ -1687,9 +1676,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1687 | if (cfqq == cfqd->active_queue) | 1676 | if (cfqq == cfqd->active_queue) |
1688 | cfqd->active_queue = NULL; | 1677 | cfqd->active_queue = NULL; |
1689 | 1678 | ||
1690 | if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active) | ||
1691 | cfqd->grp_service_tree.active = NULL; | ||
1692 | |||
1693 | if (cfqd->active_cic) { | 1679 | if (cfqd->active_cic) { |
1694 | put_io_context(cfqd->active_cic->ioc); | 1680 | put_io_context(cfqd->active_cic->ioc); |
1695 | cfqd->active_cic = NULL; | 1681 | cfqd->active_cic = NULL; |
@@ -1901,10 +1887,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1901 | * in their service tree. | 1887 | * in their service tree. |
1902 | */ | 1888 | */ |
1903 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) | 1889 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) |
1904 | return 1; | 1890 | return true; |
1905 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", | 1891 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", |
1906 | service_tree->count); | 1892 | service_tree->count); |
1907 | return 0; | 1893 | return false; |
1908 | } | 1894 | } |
1909 | 1895 | ||
1910 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 1896 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
@@ -2040,7 +2026,7 @@ static int cfqq_process_refs(struct cfq_queue *cfqq) | |||
2040 | int process_refs, io_refs; | 2026 | int process_refs, io_refs; |
2041 | 2027 | ||
2042 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; | 2028 | io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; |
2043 | process_refs = atomic_read(&cfqq->ref) - io_refs; | 2029 | process_refs = cfqq->ref - io_refs; |
2044 | BUG_ON(process_refs < 0); | 2030 | BUG_ON(process_refs < 0); |
2045 | return process_refs; | 2031 | return process_refs; |
2046 | } | 2032 | } |
@@ -2080,10 +2066,10 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) | |||
2080 | */ | 2066 | */ |
2081 | if (new_process_refs >= process_refs) { | 2067 | if (new_process_refs >= process_refs) { |
2082 | cfqq->new_cfqq = new_cfqq; | 2068 | cfqq->new_cfqq = new_cfqq; |
2083 | atomic_add(process_refs, &new_cfqq->ref); | 2069 | new_cfqq->ref += process_refs; |
2084 | } else { | 2070 | } else { |
2085 | new_cfqq->new_cfqq = cfqq; | 2071 | new_cfqq->new_cfqq = cfqq; |
2086 | atomic_add(new_process_refs, &cfqq->ref); | 2072 | cfqq->ref += new_process_refs; |
2087 | } | 2073 | } |
2088 | } | 2074 | } |
2089 | 2075 | ||
@@ -2116,12 +2102,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2116 | unsigned count; | 2102 | unsigned count; |
2117 | struct cfq_rb_root *st; | 2103 | struct cfq_rb_root *st; |
2118 | unsigned group_slice; | 2104 | unsigned group_slice; |
2119 | 2105 | enum wl_prio_t original_prio = cfqd->serving_prio; | |
2120 | if (!cfqg) { | ||
2121 | cfqd->serving_prio = IDLE_WORKLOAD; | ||
2122 | cfqd->workload_expires = jiffies + 1; | ||
2123 | return; | ||
2124 | } | ||
2125 | 2106 | ||
2126 | /* Choose next priority. RT > BE > IDLE */ | 2107 | /* Choose next priority. RT > BE > IDLE */ |
2127 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) | 2108 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) |
@@ -2134,6 +2115,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2134 | return; | 2115 | return; |
2135 | } | 2116 | } |
2136 | 2117 | ||
2118 | if (original_prio != cfqd->serving_prio) | ||
2119 | goto new_workload; | ||
2120 | |||
2137 | /* | 2121 | /* |
2138 | * For RT and BE, we have to choose also the type | 2122 | * For RT and BE, we have to choose also the type |
2139 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload | 2123 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload |
@@ -2148,6 +2132,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2148 | if (count && !time_after(jiffies, cfqd->workload_expires)) | 2132 | if (count && !time_after(jiffies, cfqd->workload_expires)) |
2149 | return; | 2133 | return; |
2150 | 2134 | ||
2135 | new_workload: | ||
2151 | /* otherwise select new workload type */ | 2136 | /* otherwise select new workload type */ |
2152 | cfqd->serving_type = | 2137 | cfqd->serving_type = |
2153 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); | 2138 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); |
@@ -2199,7 +2184,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | |||
2199 | if (RB_EMPTY_ROOT(&st->rb)) | 2184 | if (RB_EMPTY_ROOT(&st->rb)) |
2200 | return NULL; | 2185 | return NULL; |
2201 | cfqg = cfq_rb_first_group(st); | 2186 | cfqg = cfq_rb_first_group(st); |
2202 | st->active = &cfqg->rb_node; | ||
2203 | update_min_vdisktime(st); | 2187 | update_min_vdisktime(st); |
2204 | return cfqg; | 2188 | return cfqg; |
2205 | } | 2189 | } |
@@ -2293,6 +2277,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
2293 | goto keep_queue; | 2277 | goto keep_queue; |
2294 | } | 2278 | } |
2295 | 2279 | ||
2280 | /* | ||
2281 | * This is a deep seek queue, but the device is much faster than | ||
2282 | * the queue can deliver, don't idle | ||
2283 | **/ | ||
2284 | if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && | ||
2285 | (cfq_cfqq_slice_new(cfqq) || | ||
2286 | (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { | ||
2287 | cfq_clear_cfqq_deep(cfqq); | ||
2288 | cfq_clear_cfqq_idle_window(cfqq); | ||
2289 | } | ||
2290 | |||
2296 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { | 2291 | if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { |
2297 | cfqq = NULL; | 2292 | cfqq = NULL; |
2298 | goto keep_queue; | 2293 | goto keep_queue; |
@@ -2367,12 +2362,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, | |||
2367 | { | 2362 | { |
2368 | /* the queue hasn't finished any request, can't estimate */ | 2363 | /* the queue hasn't finished any request, can't estimate */ |
2369 | if (cfq_cfqq_slice_new(cfqq)) | 2364 | if (cfq_cfqq_slice_new(cfqq)) |
2370 | return 1; | 2365 | return true; |
2371 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, | 2366 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, |
2372 | cfqq->slice_end)) | 2367 | cfqq->slice_end)) |
2373 | return 1; | 2368 | return true; |
2374 | 2369 | ||
2375 | return 0; | 2370 | return false; |
2376 | } | 2371 | } |
2377 | 2372 | ||
2378 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2373 | static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
@@ -2538,9 +2533,10 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2538 | struct cfq_data *cfqd = cfqq->cfqd; | 2533 | struct cfq_data *cfqd = cfqq->cfqd; |
2539 | struct cfq_group *cfqg, *orig_cfqg; | 2534 | struct cfq_group *cfqg, *orig_cfqg; |
2540 | 2535 | ||
2541 | BUG_ON(atomic_read(&cfqq->ref) <= 0); | 2536 | BUG_ON(cfqq->ref <= 0); |
2542 | 2537 | ||
2543 | if (!atomic_dec_and_test(&cfqq->ref)) | 2538 | cfqq->ref--; |
2539 | if (cfqq->ref) | ||
2544 | return; | 2540 | return; |
2545 | 2541 | ||
2546 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); | 2542 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); |
@@ -2843,7 +2839,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2843 | RB_CLEAR_NODE(&cfqq->p_node); | 2839 | RB_CLEAR_NODE(&cfqq->p_node); |
2844 | INIT_LIST_HEAD(&cfqq->fifo); | 2840 | INIT_LIST_HEAD(&cfqq->fifo); |
2845 | 2841 | ||
2846 | atomic_set(&cfqq->ref, 0); | 2842 | cfqq->ref = 0; |
2847 | cfqq->cfqd = cfqd; | 2843 | cfqq->cfqd = cfqd; |
2848 | 2844 | ||
2849 | cfq_mark_cfqq_prio_changed(cfqq); | 2845 | cfq_mark_cfqq_prio_changed(cfqq); |
@@ -2979,11 +2975,11 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, | |||
2979 | * pin the queue now that it's allocated, scheduler exit will prune it | 2975 | * pin the queue now that it's allocated, scheduler exit will prune it |
2980 | */ | 2976 | */ |
2981 | if (!is_sync && !(*async_cfqq)) { | 2977 | if (!is_sync && !(*async_cfqq)) { |
2982 | atomic_inc(&cfqq->ref); | 2978 | cfqq->ref++; |
2983 | *async_cfqq = cfqq; | 2979 | *async_cfqq = cfqq; |
2984 | } | 2980 | } |
2985 | 2981 | ||
2986 | atomic_inc(&cfqq->ref); | 2982 | cfqq->ref++; |
2987 | return cfqq; | 2983 | return cfqq; |
2988 | } | 2984 | } |
2989 | 2985 | ||
@@ -3265,6 +3261,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3265 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) | 3261 | if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) |
3266 | return true; | 3262 | return true; |
3267 | 3263 | ||
3264 | /* An idle queue should not be idle now for some reason */ | ||
3265 | if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq)) | ||
3266 | return true; | ||
3267 | |||
3268 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) | 3268 | if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) |
3269 | return false; | 3269 | return false; |
3270 | 3270 | ||
@@ -3681,13 +3681,13 @@ new_queue: | |||
3681 | } | 3681 | } |
3682 | 3682 | ||
3683 | cfqq->allocated[rw]++; | 3683 | cfqq->allocated[rw]++; |
3684 | atomic_inc(&cfqq->ref); | 3684 | cfqq->ref++; |
3685 | |||
3686 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
3687 | |||
3688 | rq->elevator_private = cic; | 3685 | rq->elevator_private = cic; |
3689 | rq->elevator_private2 = cfqq; | 3686 | rq->elevator_private2 = cfqq; |
3690 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | 3687 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); |
3688 | |||
3689 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
3690 | |||
3691 | return 0; | 3691 | return 0; |
3692 | 3692 | ||
3693 | queue_fail: | 3693 | queue_fail: |
@@ -3862,6 +3862,10 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3862 | if (!cfqd) | 3862 | if (!cfqd) |
3863 | return NULL; | 3863 | return NULL; |
3864 | 3864 | ||
3865 | /* | ||
3866 | * Don't need take queue_lock in the routine, since we are | ||
3867 | * initializing the ioscheduler, and nobody is using cfqd | ||
3868 | */ | ||
3865 | cfqd->cic_index = i; | 3869 | cfqd->cic_index = i; |
3866 | 3870 | ||
3867 | /* Init root service tree */ | 3871 | /* Init root service tree */ |
@@ -3881,7 +3885,7 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3881 | * Take a reference to root group which we never drop. This is just | 3885 | * Take a reference to root group which we never drop. This is just |
3882 | * to make sure that cfq_put_cfqg() does not try to kfree root group | 3886 | * to make sure that cfq_put_cfqg() does not try to kfree root group |
3883 | */ | 3887 | */ |
3884 | atomic_set(&cfqg->ref, 1); | 3888 | cfqg->ref = 1; |
3885 | rcu_read_lock(); | 3889 | rcu_read_lock(); |
3886 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, | 3890 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, |
3887 | (void *)cfqd, 0); | 3891 | (void *)cfqd, 0); |
@@ -3901,7 +3905,7 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3901 | * will not attempt to free it. | 3905 | * will not attempt to free it. |
3902 | */ | 3906 | */ |
3903 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | 3907 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); |
3904 | atomic_inc(&cfqd->oom_cfqq.ref); | 3908 | cfqd->oom_cfqq.ref++; |
3905 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); | 3909 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); |
3906 | 3910 | ||
3907 | INIT_LIST_HEAD(&cfqd->cic_list); | 3911 | INIT_LIST_HEAD(&cfqd->cic_list); |
diff --git a/block/genhd.c b/block/genhd.c index 5fa2b44a72ff..6a5b772aa201 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/buffer_head.h> | 18 | #include <linux/buffer_head.h> |
19 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/log2.h> | ||
21 | 22 | ||
22 | #include "blk.h" | 23 | #include "blk.h" |
23 | 24 | ||
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr); | |||
35 | 36 | ||
36 | static struct device_type disk_type; | 37 | static struct device_type disk_type; |
37 | 38 | ||
39 | static void disk_add_events(struct gendisk *disk); | ||
40 | static void disk_del_events(struct gendisk *disk); | ||
41 | static void disk_release_events(struct gendisk *disk); | ||
42 | |||
38 | /** | 43 | /** |
39 | * disk_get_part - get partition | 44 | * disk_get_part - get partition |
40 | * @disk: disk to look partition from | 45 | * @disk: disk to look partition from |
@@ -239,7 +244,7 @@ static struct blk_major_name { | |||
239 | } *major_names[BLKDEV_MAJOR_HASH_SIZE]; | 244 | } *major_names[BLKDEV_MAJOR_HASH_SIZE]; |
240 | 245 | ||
241 | /* index in the above - for now: assume no multimajor ranges */ | 246 | /* index in the above - for now: assume no multimajor ranges */ |
242 | static inline int major_to_index(int major) | 247 | static inline int major_to_index(unsigned major) |
243 | { | 248 | { |
244 | return major % BLKDEV_MAJOR_HASH_SIZE; | 249 | return major % BLKDEV_MAJOR_HASH_SIZE; |
245 | } | 250 | } |
@@ -502,6 +507,64 @@ static int exact_lock(dev_t devt, void *data) | |||
502 | return 0; | 507 | return 0; |
503 | } | 508 | } |
504 | 509 | ||
510 | void register_disk(struct gendisk *disk) | ||
511 | { | ||
512 | struct device *ddev = disk_to_dev(disk); | ||
513 | struct block_device *bdev; | ||
514 | struct disk_part_iter piter; | ||
515 | struct hd_struct *part; | ||
516 | int err; | ||
517 | |||
518 | ddev->parent = disk->driverfs_dev; | ||
519 | |||
520 | dev_set_name(ddev, disk->disk_name); | ||
521 | |||
522 | /* delay uevents, until we scanned partition table */ | ||
523 | dev_set_uevent_suppress(ddev, 1); | ||
524 | |||
525 | if (device_add(ddev)) | ||
526 | return; | ||
527 | if (!sysfs_deprecated) { | ||
528 | err = sysfs_create_link(block_depr, &ddev->kobj, | ||
529 | kobject_name(&ddev->kobj)); | ||
530 | if (err) { | ||
531 | device_del(ddev); | ||
532 | return; | ||
533 | } | ||
534 | } | ||
535 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); | ||
536 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | ||
537 | |||
538 | /* No minors to use for partitions */ | ||
539 | if (!disk_partitionable(disk)) | ||
540 | goto exit; | ||
541 | |||
542 | /* No such device (e.g., media were just removed) */ | ||
543 | if (!get_capacity(disk)) | ||
544 | goto exit; | ||
545 | |||
546 | bdev = bdget_disk(disk, 0); | ||
547 | if (!bdev) | ||
548 | goto exit; | ||
549 | |||
550 | bdev->bd_invalidated = 1; | ||
551 | err = blkdev_get(bdev, FMODE_READ, NULL); | ||
552 | if (err < 0) | ||
553 | goto exit; | ||
554 | blkdev_put(bdev, FMODE_READ); | ||
555 | |||
556 | exit: | ||
557 | /* announce disk after possible partitions are created */ | ||
558 | dev_set_uevent_suppress(ddev, 0); | ||
559 | kobject_uevent(&ddev->kobj, KOBJ_ADD); | ||
560 | |||
561 | /* announce possible partitions */ | ||
562 | disk_part_iter_init(&piter, disk, 0); | ||
563 | while ((part = disk_part_iter_next(&piter))) | ||
564 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); | ||
565 | disk_part_iter_exit(&piter); | ||
566 | } | ||
567 | |||
505 | /** | 568 | /** |
506 | * add_disk - add partitioning information to kernel list | 569 | * add_disk - add partitioning information to kernel list |
507 | * @disk: per-device partitioning information | 570 | * @disk: per-device partitioning information |
@@ -551,18 +614,48 @@ void add_disk(struct gendisk *disk) | |||
551 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, | 614 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
552 | "bdi"); | 615 | "bdi"); |
553 | WARN_ON(retval); | 616 | WARN_ON(retval); |
554 | } | ||
555 | 617 | ||
618 | disk_add_events(disk); | ||
619 | } | ||
556 | EXPORT_SYMBOL(add_disk); | 620 | EXPORT_SYMBOL(add_disk); |
557 | EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ | ||
558 | 621 | ||
559 | void unlink_gendisk(struct gendisk *disk) | 622 | void del_gendisk(struct gendisk *disk) |
560 | { | 623 | { |
624 | struct disk_part_iter piter; | ||
625 | struct hd_struct *part; | ||
626 | |||
627 | disk_del_events(disk); | ||
628 | |||
629 | /* invalidate stuff */ | ||
630 | disk_part_iter_init(&piter, disk, | ||
631 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); | ||
632 | while ((part = disk_part_iter_next(&piter))) { | ||
633 | invalidate_partition(disk, part->partno); | ||
634 | delete_partition(disk, part->partno); | ||
635 | } | ||
636 | disk_part_iter_exit(&piter); | ||
637 | |||
638 | invalidate_partition(disk, 0); | ||
639 | blk_free_devt(disk_to_dev(disk)->devt); | ||
640 | set_capacity(disk, 0); | ||
641 | disk->flags &= ~GENHD_FL_UP; | ||
642 | |||
561 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); | 643 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); |
562 | bdi_unregister(&disk->queue->backing_dev_info); | 644 | bdi_unregister(&disk->queue->backing_dev_info); |
563 | blk_unregister_queue(disk); | 645 | blk_unregister_queue(disk); |
564 | blk_unregister_region(disk_devt(disk), disk->minors); | 646 | blk_unregister_region(disk_devt(disk), disk->minors); |
647 | |||
648 | part_stat_set_all(&disk->part0, 0); | ||
649 | disk->part0.stamp = 0; | ||
650 | |||
651 | kobject_put(disk->part0.holder_dir); | ||
652 | kobject_put(disk->slave_dir); | ||
653 | disk->driverfs_dev = NULL; | ||
654 | if (!sysfs_deprecated) | ||
655 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); | ||
656 | device_del(disk_to_dev(disk)); | ||
565 | } | 657 | } |
658 | EXPORT_SYMBOL(del_gendisk); | ||
566 | 659 | ||
567 | /** | 660 | /** |
568 | * get_gendisk - get partitioning information for a given device | 661 | * get_gendisk - get partitioning information for a given device |
@@ -735,7 +828,7 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos) | |||
735 | static void *p; | 828 | static void *p; |
736 | 829 | ||
737 | p = disk_seqf_start(seqf, pos); | 830 | p = disk_seqf_start(seqf, pos); |
738 | if (!IS_ERR(p) && p && !*pos) | 831 | if (!IS_ERR_OR_NULL(p) && !*pos) |
739 | seq_puts(seqf, "major minor #blocks name\n\n"); | 832 | seq_puts(seqf, "major minor #blocks name\n\n"); |
740 | return p; | 833 | return p; |
741 | } | 834 | } |
@@ -1005,6 +1098,7 @@ static void disk_release(struct device *dev) | |||
1005 | { | 1098 | { |
1006 | struct gendisk *disk = dev_to_disk(dev); | 1099 | struct gendisk *disk = dev_to_disk(dev); |
1007 | 1100 | ||
1101 | disk_release_events(disk); | ||
1008 | kfree(disk->random); | 1102 | kfree(disk->random); |
1009 | disk_replace_part_tbl(disk, NULL); | 1103 | disk_replace_part_tbl(disk, NULL); |
1010 | free_part_stats(&disk->part0); | 1104 | free_part_stats(&disk->part0); |
@@ -1110,29 +1204,6 @@ static int __init proc_genhd_init(void) | |||
1110 | module_init(proc_genhd_init); | 1204 | module_init(proc_genhd_init); |
1111 | #endif /* CONFIG_PROC_FS */ | 1205 | #endif /* CONFIG_PROC_FS */ |
1112 | 1206 | ||
1113 | static void media_change_notify_thread(struct work_struct *work) | ||
1114 | { | ||
1115 | struct gendisk *gd = container_of(work, struct gendisk, async_notify); | ||
1116 | char event[] = "MEDIA_CHANGE=1"; | ||
1117 | char *envp[] = { event, NULL }; | ||
1118 | |||
1119 | /* | ||
1120 | * set enviroment vars to indicate which event this is for | ||
1121 | * so that user space will know to go check the media status. | ||
1122 | */ | ||
1123 | kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); | ||
1124 | put_device(gd->driverfs_dev); | ||
1125 | } | ||
1126 | |||
1127 | #if 0 | ||
1128 | void genhd_media_change_notify(struct gendisk *disk) | ||
1129 | { | ||
1130 | get_device(disk->driverfs_dev); | ||
1131 | schedule_work(&disk->async_notify); | ||
1132 | } | ||
1133 | EXPORT_SYMBOL_GPL(genhd_media_change_notify); | ||
1134 | #endif /* 0 */ | ||
1135 | |||
1136 | dev_t blk_lookup_devt(const char *name, int partno) | 1207 | dev_t blk_lookup_devt(const char *name, int partno) |
1137 | { | 1208 | { |
1138 | dev_t devt = MKDEV(0, 0); | 1209 | dev_t devt = MKDEV(0, 0); |
@@ -1193,13 +1264,13 @@ struct gendisk *alloc_disk_node(int minors, int node_id) | |||
1193 | } | 1264 | } |
1194 | disk->part_tbl->part[0] = &disk->part0; | 1265 | disk->part_tbl->part[0] = &disk->part0; |
1195 | 1266 | ||
1267 | hd_ref_init(&disk->part0); | ||
1268 | |||
1196 | disk->minors = minors; | 1269 | disk->minors = minors; |
1197 | rand_initialize_disk(disk); | 1270 | rand_initialize_disk(disk); |
1198 | disk_to_dev(disk)->class = &block_class; | 1271 | disk_to_dev(disk)->class = &block_class; |
1199 | disk_to_dev(disk)->type = &disk_type; | 1272 | disk_to_dev(disk)->type = &disk_type; |
1200 | device_initialize(disk_to_dev(disk)); | 1273 | device_initialize(disk_to_dev(disk)); |
1201 | INIT_WORK(&disk->async_notify, | ||
1202 | media_change_notify_thread); | ||
1203 | } | 1274 | } |
1204 | return disk; | 1275 | return disk; |
1205 | } | 1276 | } |
@@ -1291,3 +1362,422 @@ int invalidate_partition(struct gendisk *disk, int partno) | |||
1291 | } | 1362 | } |
1292 | 1363 | ||
1293 | EXPORT_SYMBOL(invalidate_partition); | 1364 | EXPORT_SYMBOL(invalidate_partition); |
1365 | |||
1366 | /* | ||
1367 | * Disk events - monitor disk events like media change and eject request. | ||
1368 | */ | ||
1369 | struct disk_events { | ||
1370 | struct list_head node; /* all disk_event's */ | ||
1371 | struct gendisk *disk; /* the associated disk */ | ||
1372 | spinlock_t lock; | ||
1373 | |||
1374 | int block; /* event blocking depth */ | ||
1375 | unsigned int pending; /* events already sent out */ | ||
1376 | unsigned int clearing; /* events being cleared */ | ||
1377 | |||
1378 | long poll_msecs; /* interval, -1 for default */ | ||
1379 | struct delayed_work dwork; | ||
1380 | }; | ||
1381 | |||
1382 | static const char *disk_events_strs[] = { | ||
1383 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", | ||
1384 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", | ||
1385 | }; | ||
1386 | |||
1387 | static char *disk_uevents[] = { | ||
1388 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", | ||
1389 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", | ||
1390 | }; | ||
1391 | |||
1392 | /* list of all disk_events */ | ||
1393 | static DEFINE_MUTEX(disk_events_mutex); | ||
1394 | static LIST_HEAD(disk_events); | ||
1395 | |||
1396 | /* disable in-kernel polling by default */ | ||
1397 | static unsigned long disk_events_dfl_poll_msecs = 0; | ||
1398 | |||
1399 | static unsigned long disk_events_poll_jiffies(struct gendisk *disk) | ||
1400 | { | ||
1401 | struct disk_events *ev = disk->ev; | ||
1402 | long intv_msecs = 0; | ||
1403 | |||
1404 | /* | ||
1405 | * If device-specific poll interval is set, always use it. If | ||
1406 | * the default is being used, poll iff there are events which | ||
1407 | * can't be monitored asynchronously. | ||
1408 | */ | ||
1409 | if (ev->poll_msecs >= 0) | ||
1410 | intv_msecs = ev->poll_msecs; | ||
1411 | else if (disk->events & ~disk->async_events) | ||
1412 | intv_msecs = disk_events_dfl_poll_msecs; | ||
1413 | |||
1414 | return msecs_to_jiffies(intv_msecs); | ||
1415 | } | ||
1416 | |||
1417 | static void __disk_block_events(struct gendisk *disk, bool sync) | ||
1418 | { | ||
1419 | struct disk_events *ev = disk->ev; | ||
1420 | unsigned long flags; | ||
1421 | bool cancel; | ||
1422 | |||
1423 | spin_lock_irqsave(&ev->lock, flags); | ||
1424 | cancel = !ev->block++; | ||
1425 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1426 | |||
1427 | if (cancel) { | ||
1428 | if (sync) | ||
1429 | cancel_delayed_work_sync(&disk->ev->dwork); | ||
1430 | else | ||
1431 | cancel_delayed_work(&disk->ev->dwork); | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1435 | static void __disk_unblock_events(struct gendisk *disk, bool check_now) | ||
1436 | { | ||
1437 | struct disk_events *ev = disk->ev; | ||
1438 | unsigned long intv; | ||
1439 | unsigned long flags; | ||
1440 | |||
1441 | spin_lock_irqsave(&ev->lock, flags); | ||
1442 | |||
1443 | if (WARN_ON_ONCE(ev->block <= 0)) | ||
1444 | goto out_unlock; | ||
1445 | |||
1446 | if (--ev->block) | ||
1447 | goto out_unlock; | ||
1448 | |||
1449 | /* | ||
1450 | * Not exactly a latency critical operation, set poll timer | ||
1451 | * slack to 25% and kick event check. | ||
1452 | */ | ||
1453 | intv = disk_events_poll_jiffies(disk); | ||
1454 | set_timer_slack(&ev->dwork.timer, intv / 4); | ||
1455 | if (check_now) | ||
1456 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1457 | else if (intv) | ||
1458 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1459 | out_unlock: | ||
1460 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1461 | } | ||
1462 | |||
1463 | /** | ||
1464 | * disk_block_events - block and flush disk event checking | ||
1465 | * @disk: disk to block events for | ||
1466 | * | ||
1467 | * On return from this function, it is guaranteed that event checking | ||
1468 | * isn't in progress and won't happen until unblocked by | ||
1469 | * disk_unblock_events(). Events blocking is counted and the actual | ||
1470 | * unblocking happens after the matching number of unblocks are done. | ||
1471 | * | ||
1472 | * Note that this intentionally does not block event checking from | ||
1473 | * disk_clear_events(). | ||
1474 | * | ||
1475 | * CONTEXT: | ||
1476 | * Might sleep. | ||
1477 | */ | ||
1478 | void disk_block_events(struct gendisk *disk) | ||
1479 | { | ||
1480 | if (disk->ev) | ||
1481 | __disk_block_events(disk, true); | ||
1482 | } | ||
1483 | |||
1484 | /** | ||
1485 | * disk_unblock_events - unblock disk event checking | ||
1486 | * @disk: disk to unblock events for | ||
1487 | * | ||
1488 | * Undo disk_block_events(). When the block count reaches zero, it | ||
1489 | * starts events polling if configured. | ||
1490 | * | ||
1491 | * CONTEXT: | ||
1492 | * Don't care. Safe to call from irq context. | ||
1493 | */ | ||
1494 | void disk_unblock_events(struct gendisk *disk) | ||
1495 | { | ||
1496 | if (disk->ev) | ||
1497 | __disk_unblock_events(disk, true); | ||
1498 | } | ||
1499 | |||
1500 | /** | ||
1501 | * disk_check_events - schedule immediate event checking | ||
1502 | * @disk: disk to check events for | ||
1503 | * | ||
1504 | * Schedule immediate event checking on @disk if not blocked. | ||
1505 | * | ||
1506 | * CONTEXT: | ||
1507 | * Don't care. Safe to call from irq context. | ||
1508 | */ | ||
1509 | void disk_check_events(struct gendisk *disk) | ||
1510 | { | ||
1511 | if (disk->ev) { | ||
1512 | __disk_block_events(disk, false); | ||
1513 | __disk_unblock_events(disk, true); | ||
1514 | } | ||
1515 | } | ||
1516 | EXPORT_SYMBOL_GPL(disk_check_events); | ||
1517 | |||
1518 | /** | ||
1519 | * disk_clear_events - synchronously check, clear and return pending events | ||
1520 | * @disk: disk to fetch and clear events from | ||
1521 | * @mask: mask of events to be fetched and clearted | ||
1522 | * | ||
1523 | * Disk events are synchronously checked and pending events in @mask | ||
1524 | * are cleared and returned. This ignores the block count. | ||
1525 | * | ||
1526 | * CONTEXT: | ||
1527 | * Might sleep. | ||
1528 | */ | ||
1529 | unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) | ||
1530 | { | ||
1531 | const struct block_device_operations *bdops = disk->fops; | ||
1532 | struct disk_events *ev = disk->ev; | ||
1533 | unsigned int pending; | ||
1534 | |||
1535 | if (!ev) { | ||
1536 | /* for drivers still using the old ->media_changed method */ | ||
1537 | if ((mask & DISK_EVENT_MEDIA_CHANGE) && | ||
1538 | bdops->media_changed && bdops->media_changed(disk)) | ||
1539 | return DISK_EVENT_MEDIA_CHANGE; | ||
1540 | return 0; | ||
1541 | } | ||
1542 | |||
1543 | /* tell the workfn about the events being cleared */ | ||
1544 | spin_lock_irq(&ev->lock); | ||
1545 | ev->clearing |= mask; | ||
1546 | spin_unlock_irq(&ev->lock); | ||
1547 | |||
1548 | /* uncondtionally schedule event check and wait for it to finish */ | ||
1549 | __disk_block_events(disk, true); | ||
1550 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1551 | flush_delayed_work(&ev->dwork); | ||
1552 | __disk_unblock_events(disk, false); | ||
1553 | |||
1554 | /* then, fetch and clear pending events */ | ||
1555 | spin_lock_irq(&ev->lock); | ||
1556 | WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */ | ||
1557 | pending = ev->pending & mask; | ||
1558 | ev->pending &= ~mask; | ||
1559 | spin_unlock_irq(&ev->lock); | ||
1560 | |||
1561 | return pending; | ||
1562 | } | ||
1563 | |||
1564 | static void disk_events_workfn(struct work_struct *work) | ||
1565 | { | ||
1566 | struct delayed_work *dwork = to_delayed_work(work); | ||
1567 | struct disk_events *ev = container_of(dwork, struct disk_events, dwork); | ||
1568 | struct gendisk *disk = ev->disk; | ||
1569 | char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; | ||
1570 | unsigned int clearing = ev->clearing; | ||
1571 | unsigned int events; | ||
1572 | unsigned long intv; | ||
1573 | int nr_events = 0, i; | ||
1574 | |||
1575 | /* check events */ | ||
1576 | events = disk->fops->check_events(disk, clearing); | ||
1577 | |||
1578 | /* accumulate pending events and schedule next poll if necessary */ | ||
1579 | spin_lock_irq(&ev->lock); | ||
1580 | |||
1581 | events &= ~ev->pending; | ||
1582 | ev->pending |= events; | ||
1583 | ev->clearing &= ~clearing; | ||
1584 | |||
1585 | intv = disk_events_poll_jiffies(disk); | ||
1586 | if (!ev->block && intv) | ||
1587 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1588 | |||
1589 | spin_unlock_irq(&ev->lock); | ||
1590 | |||
1591 | /* tell userland about new events */ | ||
1592 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) | ||
1593 | if (events & (1 << i)) | ||
1594 | envp[nr_events++] = disk_uevents[i]; | ||
1595 | |||
1596 | if (nr_events) | ||
1597 | kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); | ||
1598 | } | ||
1599 | |||
1600 | /* | ||
1601 | * A disk events enabled device has the following sysfs nodes under | ||
1602 | * its /sys/block/X/ directory. | ||
1603 | * | ||
1604 | * events : list of all supported events | ||
1605 | * events_async : list of events which can be detected w/o polling | ||
1606 | * events_poll_msecs : polling interval, 0: disable, -1: system default | ||
1607 | */ | ||
1608 | static ssize_t __disk_events_show(unsigned int events, char *buf) | ||
1609 | { | ||
1610 | const char *delim = ""; | ||
1611 | ssize_t pos = 0; | ||
1612 | int i; | ||
1613 | |||
1614 | for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) | ||
1615 | if (events & (1 << i)) { | ||
1616 | pos += sprintf(buf + pos, "%s%s", | ||
1617 | delim, disk_events_strs[i]); | ||
1618 | delim = " "; | ||
1619 | } | ||
1620 | if (pos) | ||
1621 | pos += sprintf(buf + pos, "\n"); | ||
1622 | return pos; | ||
1623 | } | ||
1624 | |||
1625 | static ssize_t disk_events_show(struct device *dev, | ||
1626 | struct device_attribute *attr, char *buf) | ||
1627 | { | ||
1628 | struct gendisk *disk = dev_to_disk(dev); | ||
1629 | |||
1630 | return __disk_events_show(disk->events, buf); | ||
1631 | } | ||
1632 | |||
1633 | static ssize_t disk_events_async_show(struct device *dev, | ||
1634 | struct device_attribute *attr, char *buf) | ||
1635 | { | ||
1636 | struct gendisk *disk = dev_to_disk(dev); | ||
1637 | |||
1638 | return __disk_events_show(disk->async_events, buf); | ||
1639 | } | ||
1640 | |||
1641 | static ssize_t disk_events_poll_msecs_show(struct device *dev, | ||
1642 | struct device_attribute *attr, | ||
1643 | char *buf) | ||
1644 | { | ||
1645 | struct gendisk *disk = dev_to_disk(dev); | ||
1646 | |||
1647 | return sprintf(buf, "%ld\n", disk->ev->poll_msecs); | ||
1648 | } | ||
1649 | |||
1650 | static ssize_t disk_events_poll_msecs_store(struct device *dev, | ||
1651 | struct device_attribute *attr, | ||
1652 | const char *buf, size_t count) | ||
1653 | { | ||
1654 | struct gendisk *disk = dev_to_disk(dev); | ||
1655 | long intv; | ||
1656 | |||
1657 | if (!count || !sscanf(buf, "%ld", &intv)) | ||
1658 | return -EINVAL; | ||
1659 | |||
1660 | if (intv < 0 && intv != -1) | ||
1661 | return -EINVAL; | ||
1662 | |||
1663 | __disk_block_events(disk, true); | ||
1664 | disk->ev->poll_msecs = intv; | ||
1665 | __disk_unblock_events(disk, true); | ||
1666 | |||
1667 | return count; | ||
1668 | } | ||
1669 | |||
1670 | static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); | ||
1671 | static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); | ||
1672 | static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, | ||
1673 | disk_events_poll_msecs_show, | ||
1674 | disk_events_poll_msecs_store); | ||
1675 | |||
1676 | static const struct attribute *disk_events_attrs[] = { | ||
1677 | &dev_attr_events.attr, | ||
1678 | &dev_attr_events_async.attr, | ||
1679 | &dev_attr_events_poll_msecs.attr, | ||
1680 | NULL, | ||
1681 | }; | ||
1682 | |||
1683 | /* | ||
1684 | * The default polling interval can be specified by the kernel | ||
1685 | * parameter block.events_dfl_poll_msecs which defaults to 0 | ||
1686 | * (disable). This can also be modified runtime by writing to | ||
1687 | * /sys/module/block/events_dfl_poll_msecs. | ||
1688 | */ | ||
1689 | static int disk_events_set_dfl_poll_msecs(const char *val, | ||
1690 | const struct kernel_param *kp) | ||
1691 | { | ||
1692 | struct disk_events *ev; | ||
1693 | int ret; | ||
1694 | |||
1695 | ret = param_set_ulong(val, kp); | ||
1696 | if (ret < 0) | ||
1697 | return ret; | ||
1698 | |||
1699 | mutex_lock(&disk_events_mutex); | ||
1700 | |||
1701 | list_for_each_entry(ev, &disk_events, node) | ||
1702 | disk_check_events(ev->disk); | ||
1703 | |||
1704 | mutex_unlock(&disk_events_mutex); | ||
1705 | |||
1706 | return 0; | ||
1707 | } | ||
1708 | |||
1709 | static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { | ||
1710 | .set = disk_events_set_dfl_poll_msecs, | ||
1711 | .get = param_get_ulong, | ||
1712 | }; | ||
1713 | |||
1714 | #undef MODULE_PARAM_PREFIX | ||
1715 | #define MODULE_PARAM_PREFIX "block." | ||
1716 | |||
1717 | module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, | ||
1718 | &disk_events_dfl_poll_msecs, 0644); | ||
1719 | |||
1720 | /* | ||
1721 | * disk_{add|del|release}_events - initialize and destroy disk_events. | ||
1722 | */ | ||
1723 | static void disk_add_events(struct gendisk *disk) | ||
1724 | { | ||
1725 | struct disk_events *ev; | ||
1726 | |||
1727 | if (!disk->fops->check_events || !(disk->events | disk->async_events)) | ||
1728 | return; | ||
1729 | |||
1730 | ev = kzalloc(sizeof(*ev), GFP_KERNEL); | ||
1731 | if (!ev) { | ||
1732 | pr_warn("%s: failed to initialize events\n", disk->disk_name); | ||
1733 | return; | ||
1734 | } | ||
1735 | |||
1736 | if (sysfs_create_files(&disk_to_dev(disk)->kobj, | ||
1737 | disk_events_attrs) < 0) { | ||
1738 | pr_warn("%s: failed to create sysfs files for events\n", | ||
1739 | disk->disk_name); | ||
1740 | kfree(ev); | ||
1741 | return; | ||
1742 | } | ||
1743 | |||
1744 | disk->ev = ev; | ||
1745 | |||
1746 | INIT_LIST_HEAD(&ev->node); | ||
1747 | ev->disk = disk; | ||
1748 | spin_lock_init(&ev->lock); | ||
1749 | ev->block = 1; | ||
1750 | ev->poll_msecs = -1; | ||
1751 | INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); | ||
1752 | |||
1753 | mutex_lock(&disk_events_mutex); | ||
1754 | list_add_tail(&ev->node, &disk_events); | ||
1755 | mutex_unlock(&disk_events_mutex); | ||
1756 | |||
1757 | /* | ||
1758 | * Block count is initialized to 1 and the following initial | ||
1759 | * unblock kicks it into action. | ||
1760 | */ | ||
1761 | __disk_unblock_events(disk, true); | ||
1762 | } | ||
1763 | |||
1764 | static void disk_del_events(struct gendisk *disk) | ||
1765 | { | ||
1766 | if (!disk->ev) | ||
1767 | return; | ||
1768 | |||
1769 | __disk_block_events(disk, true); | ||
1770 | |||
1771 | mutex_lock(&disk_events_mutex); | ||
1772 | list_del_init(&disk->ev->node); | ||
1773 | mutex_unlock(&disk_events_mutex); | ||
1774 | |||
1775 | sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); | ||
1776 | } | ||
1777 | |||
1778 | static void disk_release_events(struct gendisk *disk) | ||
1779 | { | ||
1780 | /* the block count should be 1 from disk_del_events() */ | ||
1781 | WARN_ON_ONCE(disk->ev && disk->ev->block != 1); | ||
1782 | kfree(disk->ev); | ||
1783 | } | ||
diff --git a/block/ioctl.c b/block/ioctl.c index a9a302eba01e..9049d460fa89 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, | |||
294 | return -EINVAL; | 294 | return -EINVAL; |
295 | if (get_user(n, (int __user *) arg)) | 295 | if (get_user(n, (int __user *) arg)) |
296 | return -EFAULT; | 296 | return -EFAULT; |
297 | if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) | 297 | if (!(mode & FMODE_EXCL) && |
298 | blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) | ||
298 | return -EBUSY; | 299 | return -EBUSY; |
299 | ret = set_blocksize(bdev, n); | 300 | ret = set_blocksize(bdev, n); |
300 | if (!(mode & FMODE_EXCL)) | 301 | if (!(mode & FMODE_EXCL)) |
301 | bd_release(bdev); | 302 | blkdev_put(bdev, mode | FMODE_EXCL); |
302 | return ret; | 303 | return ret; |
303 | case BLKPG: | 304 | case BLKPG: |
304 | ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); | 305 | ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); |