aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 13:45:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 13:45:01 -0500
commit275220f0fcff1adf28a717076e00f575edf05fda (patch)
treed249bccc80c64443dab211639050c4fb14332648 /block
parentfe3c560b8a22cb28e54fe8950abef38e88d75831 (diff)
parent81c5e2ae33c4b19e53966b427e33646bf6811830 (diff)
Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits) block: ensure that completion error gets properly traced blktrace: add missing probe argument to block_bio_complete block cfq: don't use atomic_t for cfq_group block cfq: don't use atomic_t for cfq_queue block: trace event block fix unassigned field block: add internal hd part table references block: fix accounting bug on cross partition merges kref: add kref_test_and_get bio-integrity: mark kintegrityd_wq highpri and CPU intensive block: make kblockd_workqueue smarter Revert "sd: implement sd_check_events()" block: Clean up exit_io_context() source code. Fix compile warnings due to missing removal of a 'ret' variable fs/block: type signature of major_to_index(int) to major_to_index(unsigned) block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p) cfq-iosched: don't check cfqg in choose_service_tree() fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors cdrom: export cdrom_check_events() sd: implement sd_check_events() sr: implement sr_check_events() ...
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c40
-rw-r--r--block/blk-ioc.c5
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/cfq-iosched.c112
-rw-r--r--block/genhd.c550
-rw-r--r--block/ioctl.c5
7 files changed, 614 insertions, 105 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b1febd0f6d2a..455768a3eb9e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1452 goto done; 1452 goto done;
1453 } 1453 }
1454 1454
1455 /* Currently we do not support hierarchy deeper than two level (0,1) */
1456 if (parent != cgroup->top_cgroup)
1457 return ERR_PTR(-EPERM);
1458
1459 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 1455 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1460 if (!blkcg) 1456 if (!blkcg)
1461 return ERR_PTR(-ENOMEM); 1457 return ERR_PTR(-ENOMEM);
diff --git a/block/blk-core.c b/block/blk-core.c
index 4ce953f1b390..2f4002f79a24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,7 +33,7 @@
33 33
34#include "blk.h" 34#include "blk.h"
35 35
36EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); 36EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
37EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 37EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
39 39
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
64 return; 64 return;
65 65
66 cpu = part_stat_lock(); 66 cpu = part_stat_lock();
67 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
68 67
69 if (!new_io) 68 if (!new_io) {
69 part = rq->part;
70 part_stat_inc(cpu, part, merges[rw]); 70 part_stat_inc(cpu, part, merges[rw]);
71 else { 71 } else {
72 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
73 if (!hd_struct_try_get(part)) {
74 /*
75 * The partition is already being removed,
76 * the request will be accounted on the disk only
77 *
78 * We take a reference on disk->part0 although that
79 * partition will never be deleted, so we can treat
80 * it as any other partition.
81 */
82 part = &rq->rq_disk->part0;
83 hd_struct_get(part);
84 }
72 part_round_stats(cpu, part); 85 part_round_stats(cpu, part);
73 part_inc_in_flight(part, rw); 86 part_inc_in_flight(part, rw);
87 rq->part = part;
74 } 88 }
75 89
76 part_stat_unlock(); 90 part_stat_unlock();
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
128 rq->ref_count = 1; 142 rq->ref_count = 1;
129 rq->start_time = jiffies; 143 rq->start_time = jiffies;
130 set_start_time_ns(rq); 144 set_start_time_ns(rq);
145 rq->part = NULL;
131} 146}
132EXPORT_SYMBOL(blk_rq_init); 147EXPORT_SYMBOL(blk_rq_init);
133 148
@@ -1329,9 +1344,9 @@ static inline void blk_partition_remap(struct bio *bio)
1329 bio->bi_sector += p->start_sect; 1344 bio->bi_sector += p->start_sect;
1330 bio->bi_bdev = bdev->bd_contains; 1345 bio->bi_bdev = bdev->bd_contains;
1331 1346
1332 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, 1347 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1333 bdev->bd_dev, 1348 bdev->bd_dev,
1334 bio->bi_sector - p->start_sect); 1349 bio->bi_sector - p->start_sect);
1335 } 1350 }
1336} 1351}
1337 1352
@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio)
1500 goto end_io; 1515 goto end_io;
1501 1516
1502 if (old_sector != -1) 1517 if (old_sector != -1)
1503 trace_block_remap(q, bio, old_dev, old_sector); 1518 trace_block_bio_remap(q, bio, old_dev, old_sector);
1504 1519
1505 old_sector = bio->bi_sector; 1520 old_sector = bio->bi_sector;
1506 old_dev = bio->bi_bdev->bd_dev; 1521 old_dev = bio->bi_bdev->bd_dev;
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
1776 int cpu; 1791 int cpu;
1777 1792
1778 cpu = part_stat_lock(); 1793 cpu = part_stat_lock();
1779 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1794 part = req->part;
1780 part_stat_add(cpu, part, sectors[rw], bytes >> 9); 1795 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1781 part_stat_unlock(); 1796 part_stat_unlock();
1782 } 1797 }
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req)
1796 int cpu; 1811 int cpu;
1797 1812
1798 cpu = part_stat_lock(); 1813 cpu = part_stat_lock();
1799 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1814 part = req->part;
1800 1815
1801 part_stat_inc(cpu, part, ios[rw]); 1816 part_stat_inc(cpu, part, ios[rw]);
1802 part_stat_add(cpu, part, ticks[rw], duration); 1817 part_stat_add(cpu, part, ticks[rw], duration);
1803 part_round_stats(cpu, part); 1818 part_round_stats(cpu, part);
1804 part_dec_in_flight(part, rw); 1819 part_dec_in_flight(part, rw);
1805 1820
1821 hd_struct_put(part);
1806 part_stat_unlock(); 1822 part_stat_unlock();
1807 } 1823 }
1808} 1824}
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void)
2606 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2622 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2607 sizeof(((struct request *)0)->cmd_flags)); 2623 sizeof(((struct request *)0)->cmd_flags));
2608 2624
2609 kblockd_workqueue = create_workqueue("kblockd"); 2625 /* used for unplugging and affects IO latency/throughput - HIGHPRI */
2626 kblockd_workqueue = alloc_workqueue("kblockd",
2627 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2610 if (!kblockd_workqueue) 2628 if (!kblockd_workqueue)
2611 panic("Failed to create kblockd\n"); 2629 panic("Failed to create kblockd\n");
2612 2630
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 3c7a339fe381..b791022beef3 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc)
64 rcu_read_unlock(); 64 rcu_read_unlock();
65} 65}
66 66
67/* Called by the exitting task */ 67/* Called by the exiting task */
68void exit_io_context(struct task_struct *task) 68void exit_io_context(struct task_struct *task)
69{ 69{
70 struct io_context *ioc; 70 struct io_context *ioc;
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task)
74 task->io_context = NULL; 74 task->io_context = NULL;
75 task_unlock(task); 75 task_unlock(task);
76 76
77 if (atomic_dec_and_test(&ioc->nr_tasks)) { 77 if (atomic_dec_and_test(&ioc->nr_tasks))
78 cfq_exit(ioc); 78 cfq_exit(ioc);
79 79
80 }
81 put_io_context(ioc); 80 put_io_context(ioc);
82} 81}
83 82
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 74bc4a768f32..ea85e20d5e94 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req)
351 int cpu; 351 int cpu;
352 352
353 cpu = part_stat_lock(); 353 cpu = part_stat_lock();
354 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 354 part = req->part;
355 355
356 part_round_stats(cpu, part); 356 part_round_stats(cpu, part);
357 part_dec_in_flight(part, rq_data_dir(req)); 357 part_dec_in_flight(part, rq_data_dir(req));
358 358
359 hd_struct_put(part);
359 part_stat_unlock(); 360 part_stat_unlock();
360 } 361 }
361} 362}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 78ee4b1d4e85..8427697c5437 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,7 +87,6 @@ struct cfq_rb_root {
87 unsigned count; 87 unsigned count;
88 unsigned total_weight; 88 unsigned total_weight;
89 u64 min_vdisktime; 89 u64 min_vdisktime;
90 struct rb_node *active;
91}; 90};
92#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 91#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
93 .count = 0, .min_vdisktime = 0, } 92 .count = 0, .min_vdisktime = 0, }
@@ -97,7 +96,7 @@ struct cfq_rb_root {
97 */ 96 */
98struct cfq_queue { 97struct cfq_queue {
99 /* reference count */ 98 /* reference count */
100 atomic_t ref; 99 int ref;
101 /* various state flags, see below */ 100 /* various state flags, see below */
102 unsigned int flags; 101 unsigned int flags;
103 /* parent cfq_data */ 102 /* parent cfq_data */
@@ -180,7 +179,6 @@ struct cfq_group {
180 /* group service_tree key */ 179 /* group service_tree key */
181 u64 vdisktime; 180 u64 vdisktime;
182 unsigned int weight; 181 unsigned int weight;
183 bool on_st;
184 182
185 /* number of cfqq currently on this group */ 183 /* number of cfqq currently on this group */
186 int nr_cfqq; 184 int nr_cfqq;
@@ -209,7 +207,7 @@ struct cfq_group {
209 struct blkio_group blkg; 207 struct blkio_group blkg;
210#ifdef CONFIG_CFQ_GROUP_IOSCHED 208#ifdef CONFIG_CFQ_GROUP_IOSCHED
211 struct hlist_node cfqd_node; 209 struct hlist_node cfqd_node;
212 atomic_t ref; 210 int ref;
213#endif 211#endif
214 /* number of requests that are on the dispatch list or inside driver */ 212 /* number of requests that are on the dispatch list or inside driver */
215 int dispatched; 213 int dispatched;
@@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
563 u64 vdisktime = st->min_vdisktime; 561 u64 vdisktime = st->min_vdisktime;
564 struct cfq_group *cfqg; 562 struct cfq_group *cfqg;
565 563
566 if (st->active) {
567 cfqg = rb_entry_cfqg(st->active);
568 vdisktime = cfqg->vdisktime;
569 }
570
571 if (st->left) { 564 if (st->left) {
572 cfqg = rb_entry_cfqg(st->left); 565 cfqg = rb_entry_cfqg(st->left);
573 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); 566 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
@@ -646,11 +639,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
646static inline bool cfq_slice_used(struct cfq_queue *cfqq) 639static inline bool cfq_slice_used(struct cfq_queue *cfqq)
647{ 640{
648 if (cfq_cfqq_slice_new(cfqq)) 641 if (cfq_cfqq_slice_new(cfqq))
649 return 0; 642 return false;
650 if (time_before(jiffies, cfqq->slice_end)) 643 if (time_before(jiffies, cfqq->slice_end))
651 return 0; 644 return false;
652 645
653 return 1; 646 return true;
654} 647}
655 648
656/* 649/*
@@ -869,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
869 struct rb_node *n; 862 struct rb_node *n;
870 863
871 cfqg->nr_cfqq++; 864 cfqg->nr_cfqq++;
872 if (cfqg->on_st) 865 if (!RB_EMPTY_NODE(&cfqg->rb_node))
873 return; 866 return;
874 867
875 /* 868 /*
@@ -885,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
885 cfqg->vdisktime = st->min_vdisktime; 878 cfqg->vdisktime = st->min_vdisktime;
886 879
887 __cfq_group_service_tree_add(st, cfqg); 880 __cfq_group_service_tree_add(st, cfqg);
888 cfqg->on_st = true;
889 st->total_weight += cfqg->weight; 881 st->total_weight += cfqg->weight;
890} 882}
891 883
@@ -894,9 +886,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
894{ 886{
895 struct cfq_rb_root *st = &cfqd->grp_service_tree; 887 struct cfq_rb_root *st = &cfqd->grp_service_tree;
896 888
897 if (st->active == &cfqg->rb_node)
898 st->active = NULL;
899
900 BUG_ON(cfqg->nr_cfqq < 1); 889 BUG_ON(cfqg->nr_cfqq < 1);
901 cfqg->nr_cfqq--; 890 cfqg->nr_cfqq--;
902 891
@@ -905,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
905 return; 894 return;
906 895
907 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); 896 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
908 cfqg->on_st = false;
909 st->total_weight -= cfqg->weight; 897 st->total_weight -= cfqg->weight;
910 if (!RB_EMPTY_NODE(&cfqg->rb_node)) 898 if (!RB_EMPTY_NODE(&cfqg->rb_node))
911 cfq_rb_erase(&cfqg->rb_node, st); 899 cfq_rb_erase(&cfqg->rb_node, st);
@@ -1026,7 +1014,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
1026 * elevator which will be dropped by either elevator exit 1014 * elevator which will be dropped by either elevator exit
1027 * or cgroup deletion path depending on who is exiting first. 1015 * or cgroup deletion path depending on who is exiting first.
1028 */ 1016 */
1029 atomic_set(&cfqg->ref, 1); 1017 cfqg->ref = 1;
1030 1018
1031 /* 1019 /*
1032 * Add group onto cgroup list. It might happen that bdi->dev is 1020 * Add group onto cgroup list. It might happen that bdi->dev is
@@ -1071,7 +1059,7 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
1071 1059
1072static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) 1060static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1073{ 1061{
1074 atomic_inc(&cfqg->ref); 1062 cfqg->ref++;
1075 return cfqg; 1063 return cfqg;
1076} 1064}
1077 1065
@@ -1083,7 +1071,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1083 1071
1084 cfqq->cfqg = cfqg; 1072 cfqq->cfqg = cfqg;
1085 /* cfqq reference on cfqg */ 1073 /* cfqq reference on cfqg */
1086 atomic_inc(&cfqq->cfqg->ref); 1074 cfqq->cfqg->ref++;
1087} 1075}
1088 1076
1089static void cfq_put_cfqg(struct cfq_group *cfqg) 1077static void cfq_put_cfqg(struct cfq_group *cfqg)
@@ -1091,11 +1079,12 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
1091 struct cfq_rb_root *st; 1079 struct cfq_rb_root *st;
1092 int i, j; 1080 int i, j;
1093 1081
1094 BUG_ON(atomic_read(&cfqg->ref) <= 0); 1082 BUG_ON(cfqg->ref <= 0);
1095 if (!atomic_dec_and_test(&cfqg->ref)) 1083 cfqg->ref--;
1084 if (cfqg->ref)
1096 return; 1085 return;
1097 for_each_cfqg_st(cfqg, i, j, st) 1086 for_each_cfqg_st(cfqg, i, j, st)
1098 BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); 1087 BUG_ON(!RB_EMPTY_ROOT(&st->rb));
1099 kfree(cfqg); 1088 kfree(cfqg);
1100} 1089}
1101 1090
@@ -1200,7 +1189,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1200 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1189 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1201 cfqq->orig_cfqg = cfqq->cfqg; 1190 cfqq->orig_cfqg = cfqq->cfqg;
1202 cfqq->cfqg = &cfqd->root_group; 1191 cfqq->cfqg = &cfqd->root_group;
1203 atomic_inc(&cfqd->root_group.ref); 1192 cfqd->root_group.ref++;
1204 group_changed = 1; 1193 group_changed = 1;
1205 } else if (!cfqd->cfq_group_isolation 1194 } else if (!cfqd->cfq_group_isolation
1206 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { 1195 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
@@ -1687,9 +1676,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1687 if (cfqq == cfqd->active_queue) 1676 if (cfqq == cfqd->active_queue)
1688 cfqd->active_queue = NULL; 1677 cfqd->active_queue = NULL;
1689 1678
1690 if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
1691 cfqd->grp_service_tree.active = NULL;
1692
1693 if (cfqd->active_cic) { 1679 if (cfqd->active_cic) {
1694 put_io_context(cfqd->active_cic->ioc); 1680 put_io_context(cfqd->active_cic->ioc);
1695 cfqd->active_cic = NULL; 1681 cfqd->active_cic = NULL;
@@ -1901,10 +1887,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1901 * in their service tree. 1887 * in their service tree.
1902 */ 1888 */
1903 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1889 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
1904 return 1; 1890 return true;
1905 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1891 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1906 service_tree->count); 1892 service_tree->count);
1907 return 0; 1893 return false;
1908} 1894}
1909 1895
1910static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1896static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -2040,7 +2026,7 @@ static int cfqq_process_refs(struct cfq_queue *cfqq)
2040 int process_refs, io_refs; 2026 int process_refs, io_refs;
2041 2027
2042 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; 2028 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
2043 process_refs = atomic_read(&cfqq->ref) - io_refs; 2029 process_refs = cfqq->ref - io_refs;
2044 BUG_ON(process_refs < 0); 2030 BUG_ON(process_refs < 0);
2045 return process_refs; 2031 return process_refs;
2046} 2032}
@@ -2080,10 +2066,10 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
2080 */ 2066 */
2081 if (new_process_refs >= process_refs) { 2067 if (new_process_refs >= process_refs) {
2082 cfqq->new_cfqq = new_cfqq; 2068 cfqq->new_cfqq = new_cfqq;
2083 atomic_add(process_refs, &new_cfqq->ref); 2069 new_cfqq->ref += process_refs;
2084 } else { 2070 } else {
2085 new_cfqq->new_cfqq = cfqq; 2071 new_cfqq->new_cfqq = cfqq;
2086 atomic_add(new_process_refs, &cfqq->ref); 2072 cfqq->ref += new_process_refs;
2087 } 2073 }
2088} 2074}
2089 2075
@@ -2116,12 +2102,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2116 unsigned count; 2102 unsigned count;
2117 struct cfq_rb_root *st; 2103 struct cfq_rb_root *st;
2118 unsigned group_slice; 2104 unsigned group_slice;
2119 2105 enum wl_prio_t original_prio = cfqd->serving_prio;
2120 if (!cfqg) {
2121 cfqd->serving_prio = IDLE_WORKLOAD;
2122 cfqd->workload_expires = jiffies + 1;
2123 return;
2124 }
2125 2106
2126 /* Choose next priority. RT > BE > IDLE */ 2107 /* Choose next priority. RT > BE > IDLE */
2127 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) 2108 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -2134,6 +2115,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2134 return; 2115 return;
2135 } 2116 }
2136 2117
2118 if (original_prio != cfqd->serving_prio)
2119 goto new_workload;
2120
2137 /* 2121 /*
2138 * For RT and BE, we have to choose also the type 2122 * For RT and BE, we have to choose also the type
2139 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload 2123 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
@@ -2148,6 +2132,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2148 if (count && !time_after(jiffies, cfqd->workload_expires)) 2132 if (count && !time_after(jiffies, cfqd->workload_expires))
2149 return; 2133 return;
2150 2134
2135new_workload:
2151 /* otherwise select new workload type */ 2136 /* otherwise select new workload type */
2152 cfqd->serving_type = 2137 cfqd->serving_type =
2153 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); 2138 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
@@ -2199,7 +2184,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
2199 if (RB_EMPTY_ROOT(&st->rb)) 2184 if (RB_EMPTY_ROOT(&st->rb))
2200 return NULL; 2185 return NULL;
2201 cfqg = cfq_rb_first_group(st); 2186 cfqg = cfq_rb_first_group(st);
2202 st->active = &cfqg->rb_node;
2203 update_min_vdisktime(st); 2187 update_min_vdisktime(st);
2204 return cfqg; 2188 return cfqg;
2205} 2189}
@@ -2293,6 +2277,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2293 goto keep_queue; 2277 goto keep_queue;
2294 } 2278 }
2295 2279
2280 /*
2281 * This is a deep seek queue, but the device is much faster than
2282 * the queue can deliver, don't idle
2283 **/
2284 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
2285 (cfq_cfqq_slice_new(cfqq) ||
2286 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
2287 cfq_clear_cfqq_deep(cfqq);
2288 cfq_clear_cfqq_idle_window(cfqq);
2289 }
2290
2296 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { 2291 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2297 cfqq = NULL; 2292 cfqq = NULL;
2298 goto keep_queue; 2293 goto keep_queue;
@@ -2367,12 +2362,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
2367{ 2362{
2368 /* the queue hasn't finished any request, can't estimate */ 2363 /* the queue hasn't finished any request, can't estimate */
2369 if (cfq_cfqq_slice_new(cfqq)) 2364 if (cfq_cfqq_slice_new(cfqq))
2370 return 1; 2365 return true;
2371 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, 2366 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
2372 cfqq->slice_end)) 2367 cfqq->slice_end))
2373 return 1; 2368 return true;
2374 2369
2375 return 0; 2370 return false;
2376} 2371}
2377 2372
2378static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2373static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -2538,9 +2533,10 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
2538 struct cfq_data *cfqd = cfqq->cfqd; 2533 struct cfq_data *cfqd = cfqq->cfqd;
2539 struct cfq_group *cfqg, *orig_cfqg; 2534 struct cfq_group *cfqg, *orig_cfqg;
2540 2535
2541 BUG_ON(atomic_read(&cfqq->ref) <= 0); 2536 BUG_ON(cfqq->ref <= 0);
2542 2537
2543 if (!atomic_dec_and_test(&cfqq->ref)) 2538 cfqq->ref--;
2539 if (cfqq->ref)
2544 return; 2540 return;
2545 2541
2546 cfq_log_cfqq(cfqd, cfqq, "put_queue"); 2542 cfq_log_cfqq(cfqd, cfqq, "put_queue");
@@ -2843,7 +2839,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2843 RB_CLEAR_NODE(&cfqq->p_node); 2839 RB_CLEAR_NODE(&cfqq->p_node);
2844 INIT_LIST_HEAD(&cfqq->fifo); 2840 INIT_LIST_HEAD(&cfqq->fifo);
2845 2841
2846 atomic_set(&cfqq->ref, 0); 2842 cfqq->ref = 0;
2847 cfqq->cfqd = cfqd; 2843 cfqq->cfqd = cfqd;
2848 2844
2849 cfq_mark_cfqq_prio_changed(cfqq); 2845 cfq_mark_cfqq_prio_changed(cfqq);
@@ -2979,11 +2975,11 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
2979 * pin the queue now that it's allocated, scheduler exit will prune it 2975 * pin the queue now that it's allocated, scheduler exit will prune it
2980 */ 2976 */
2981 if (!is_sync && !(*async_cfqq)) { 2977 if (!is_sync && !(*async_cfqq)) {
2982 atomic_inc(&cfqq->ref); 2978 cfqq->ref++;
2983 *async_cfqq = cfqq; 2979 *async_cfqq = cfqq;
2984 } 2980 }
2985 2981
2986 atomic_inc(&cfqq->ref); 2982 cfqq->ref++;
2987 return cfqq; 2983 return cfqq;
2988} 2984}
2989 2985
@@ -3265,6 +3261,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3265 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3261 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
3266 return true; 3262 return true;
3267 3263
3264 /* An idle queue should not be idle now for some reason */
3265 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
3266 return true;
3267
3268 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) 3268 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
3269 return false; 3269 return false;
3270 3270
@@ -3681,13 +3681,13 @@ new_queue:
3681 } 3681 }
3682 3682
3683 cfqq->allocated[rw]++; 3683 cfqq->allocated[rw]++;
3684 atomic_inc(&cfqq->ref); 3684 cfqq->ref++;
3685
3686 spin_unlock_irqrestore(q->queue_lock, flags);
3687
3688 rq->elevator_private = cic; 3685 rq->elevator_private = cic;
3689 rq->elevator_private2 = cfqq; 3686 rq->elevator_private2 = cfqq;
3690 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); 3687 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
3688
3689 spin_unlock_irqrestore(q->queue_lock, flags);
3690
3691 return 0; 3691 return 0;
3692 3692
3693queue_fail: 3693queue_fail:
@@ -3862,6 +3862,10 @@ static void *cfq_init_queue(struct request_queue *q)
3862 if (!cfqd) 3862 if (!cfqd)
3863 return NULL; 3863 return NULL;
3864 3864
3865 /*
3866 * Don't need take queue_lock in the routine, since we are
3867 * initializing the ioscheduler, and nobody is using cfqd
3868 */
3865 cfqd->cic_index = i; 3869 cfqd->cic_index = i;
3866 3870
3867 /* Init root service tree */ 3871 /* Init root service tree */
@@ -3881,7 +3885,7 @@ static void *cfq_init_queue(struct request_queue *q)
3881 * Take a reference to root group which we never drop. This is just 3885 * Take a reference to root group which we never drop. This is just
3882 * to make sure that cfq_put_cfqg() does not try to kfree root group 3886 * to make sure that cfq_put_cfqg() does not try to kfree root group
3883 */ 3887 */
3884 atomic_set(&cfqg->ref, 1); 3888 cfqg->ref = 1;
3885 rcu_read_lock(); 3889 rcu_read_lock();
3886 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, 3890 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
3887 (void *)cfqd, 0); 3891 (void *)cfqd, 0);
@@ -3901,7 +3905,7 @@ static void *cfq_init_queue(struct request_queue *q)
3901 * will not attempt to free it. 3905 * will not attempt to free it.
3902 */ 3906 */
3903 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); 3907 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
3904 atomic_inc(&cfqd->oom_cfqq.ref); 3908 cfqd->oom_cfqq.ref++;
3905 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); 3909 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
3906 3910
3907 INIT_LIST_HEAD(&cfqd->cic_list); 3911 INIT_LIST_HEAD(&cfqd->cic_list);
diff --git a/block/genhd.c b/block/genhd.c
index 5fa2b44a72ff..6a5b772aa201 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -18,6 +18,7 @@
18#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
19#include <linux/mutex.h> 19#include <linux/mutex.h>
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/log2.h>
21 22
22#include "blk.h" 23#include "blk.h"
23 24
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr);
35 36
36static struct device_type disk_type; 37static struct device_type disk_type;
37 38
39static void disk_add_events(struct gendisk *disk);
40static void disk_del_events(struct gendisk *disk);
41static void disk_release_events(struct gendisk *disk);
42
38/** 43/**
39 * disk_get_part - get partition 44 * disk_get_part - get partition
40 * @disk: disk to look partition from 45 * @disk: disk to look partition from
@@ -239,7 +244,7 @@ static struct blk_major_name {
239} *major_names[BLKDEV_MAJOR_HASH_SIZE]; 244} *major_names[BLKDEV_MAJOR_HASH_SIZE];
240 245
241/* index in the above - for now: assume no multimajor ranges */ 246/* index in the above - for now: assume no multimajor ranges */
242static inline int major_to_index(int major) 247static inline int major_to_index(unsigned major)
243{ 248{
244 return major % BLKDEV_MAJOR_HASH_SIZE; 249 return major % BLKDEV_MAJOR_HASH_SIZE;
245} 250}
@@ -502,6 +507,64 @@ static int exact_lock(dev_t devt, void *data)
502 return 0; 507 return 0;
503} 508}
504 509
510void register_disk(struct gendisk *disk)
511{
512 struct device *ddev = disk_to_dev(disk);
513 struct block_device *bdev;
514 struct disk_part_iter piter;
515 struct hd_struct *part;
516 int err;
517
518 ddev->parent = disk->driverfs_dev;
519
520 dev_set_name(ddev, disk->disk_name);
521
522 /* delay uevents, until we scanned partition table */
523 dev_set_uevent_suppress(ddev, 1);
524
525 if (device_add(ddev))
526 return;
527 if (!sysfs_deprecated) {
528 err = sysfs_create_link(block_depr, &ddev->kobj,
529 kobject_name(&ddev->kobj));
530 if (err) {
531 device_del(ddev);
532 return;
533 }
534 }
535 disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
536 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
537
538 /* No minors to use for partitions */
539 if (!disk_partitionable(disk))
540 goto exit;
541
542 /* No such device (e.g., media were just removed) */
543 if (!get_capacity(disk))
544 goto exit;
545
546 bdev = bdget_disk(disk, 0);
547 if (!bdev)
548 goto exit;
549
550 bdev->bd_invalidated = 1;
551 err = blkdev_get(bdev, FMODE_READ, NULL);
552 if (err < 0)
553 goto exit;
554 blkdev_put(bdev, FMODE_READ);
555
556exit:
557 /* announce disk after possible partitions are created */
558 dev_set_uevent_suppress(ddev, 0);
559 kobject_uevent(&ddev->kobj, KOBJ_ADD);
560
561 /* announce possible partitions */
562 disk_part_iter_init(&piter, disk, 0);
563 while ((part = disk_part_iter_next(&piter)))
564 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
565 disk_part_iter_exit(&piter);
566}
567
505/** 568/**
506 * add_disk - add partitioning information to kernel list 569 * add_disk - add partitioning information to kernel list
507 * @disk: per-device partitioning information 570 * @disk: per-device partitioning information
@@ -551,18 +614,48 @@ void add_disk(struct gendisk *disk)
551 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 614 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
552 "bdi"); 615 "bdi");
553 WARN_ON(retval); 616 WARN_ON(retval);
554}
555 617
618 disk_add_events(disk);
619}
556EXPORT_SYMBOL(add_disk); 620EXPORT_SYMBOL(add_disk);
557EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
558 621
559void unlink_gendisk(struct gendisk *disk) 622void del_gendisk(struct gendisk *disk)
560{ 623{
624 struct disk_part_iter piter;
625 struct hd_struct *part;
626
627 disk_del_events(disk);
628
629 /* invalidate stuff */
630 disk_part_iter_init(&piter, disk,
631 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
632 while ((part = disk_part_iter_next(&piter))) {
633 invalidate_partition(disk, part->partno);
634 delete_partition(disk, part->partno);
635 }
636 disk_part_iter_exit(&piter);
637
638 invalidate_partition(disk, 0);
639 blk_free_devt(disk_to_dev(disk)->devt);
640 set_capacity(disk, 0);
641 disk->flags &= ~GENHD_FL_UP;
642
561 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 643 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
562 bdi_unregister(&disk->queue->backing_dev_info); 644 bdi_unregister(&disk->queue->backing_dev_info);
563 blk_unregister_queue(disk); 645 blk_unregister_queue(disk);
564 blk_unregister_region(disk_devt(disk), disk->minors); 646 blk_unregister_region(disk_devt(disk), disk->minors);
647
648 part_stat_set_all(&disk->part0, 0);
649 disk->part0.stamp = 0;
650
651 kobject_put(disk->part0.holder_dir);
652 kobject_put(disk->slave_dir);
653 disk->driverfs_dev = NULL;
654 if (!sysfs_deprecated)
655 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
656 device_del(disk_to_dev(disk));
565} 657}
658EXPORT_SYMBOL(del_gendisk);
566 659
567/** 660/**
568 * get_gendisk - get partitioning information for a given device 661 * get_gendisk - get partitioning information for a given device
@@ -735,7 +828,7 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
735 static void *p; 828 static void *p;
736 829
737 p = disk_seqf_start(seqf, pos); 830 p = disk_seqf_start(seqf, pos);
738 if (!IS_ERR(p) && p && !*pos) 831 if (!IS_ERR_OR_NULL(p) && !*pos)
739 seq_puts(seqf, "major minor #blocks name\n\n"); 832 seq_puts(seqf, "major minor #blocks name\n\n");
740 return p; 833 return p;
741} 834}
@@ -1005,6 +1098,7 @@ static void disk_release(struct device *dev)
1005{ 1098{
1006 struct gendisk *disk = dev_to_disk(dev); 1099 struct gendisk *disk = dev_to_disk(dev);
1007 1100
1101 disk_release_events(disk);
1008 kfree(disk->random); 1102 kfree(disk->random);
1009 disk_replace_part_tbl(disk, NULL); 1103 disk_replace_part_tbl(disk, NULL);
1010 free_part_stats(&disk->part0); 1104 free_part_stats(&disk->part0);
@@ -1110,29 +1204,6 @@ static int __init proc_genhd_init(void)
1110module_init(proc_genhd_init); 1204module_init(proc_genhd_init);
1111#endif /* CONFIG_PROC_FS */ 1205#endif /* CONFIG_PROC_FS */
1112 1206
1113static void media_change_notify_thread(struct work_struct *work)
1114{
1115 struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1116 char event[] = "MEDIA_CHANGE=1";
1117 char *envp[] = { event, NULL };
1118
1119 /*
1120 * set enviroment vars to indicate which event this is for
1121 * so that user space will know to go check the media status.
1122 */
1123 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1124 put_device(gd->driverfs_dev);
1125}
1126
1127#if 0
1128void genhd_media_change_notify(struct gendisk *disk)
1129{
1130 get_device(disk->driverfs_dev);
1131 schedule_work(&disk->async_notify);
1132}
1133EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1134#endif /* 0 */
1135
1136dev_t blk_lookup_devt(const char *name, int partno) 1207dev_t blk_lookup_devt(const char *name, int partno)
1137{ 1208{
1138 dev_t devt = MKDEV(0, 0); 1209 dev_t devt = MKDEV(0, 0);
@@ -1193,13 +1264,13 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
1193 } 1264 }
1194 disk->part_tbl->part[0] = &disk->part0; 1265 disk->part_tbl->part[0] = &disk->part0;
1195 1266
1267 hd_ref_init(&disk->part0);
1268
1196 disk->minors = minors; 1269 disk->minors = minors;
1197 rand_initialize_disk(disk); 1270 rand_initialize_disk(disk);
1198 disk_to_dev(disk)->class = &block_class; 1271 disk_to_dev(disk)->class = &block_class;
1199 disk_to_dev(disk)->type = &disk_type; 1272 disk_to_dev(disk)->type = &disk_type;
1200 device_initialize(disk_to_dev(disk)); 1273 device_initialize(disk_to_dev(disk));
1201 INIT_WORK(&disk->async_notify,
1202 media_change_notify_thread);
1203 } 1274 }
1204 return disk; 1275 return disk;
1205} 1276}
@@ -1291,3 +1362,422 @@ int invalidate_partition(struct gendisk *disk, int partno)
1291} 1362}
1292 1363
1293EXPORT_SYMBOL(invalidate_partition); 1364EXPORT_SYMBOL(invalidate_partition);
1365
1366/*
1367 * Disk events - monitor disk events like media change and eject request.
1368 */
1369struct disk_events {
1370 struct list_head node; /* all disk_event's */
1371 struct gendisk *disk; /* the associated disk */
1372 spinlock_t lock;
1373
1374 int block; /* event blocking depth */
1375 unsigned int pending; /* events already sent out */
1376 unsigned int clearing; /* events being cleared */
1377
1378 long poll_msecs; /* interval, -1 for default */
1379 struct delayed_work dwork;
1380};
1381
1382static const char *disk_events_strs[] = {
1383 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
1384 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
1385};
1386
1387static char *disk_uevents[] = {
1388 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
1389 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
1390};
1391
1392/* list of all disk_events */
1393static DEFINE_MUTEX(disk_events_mutex);
1394static LIST_HEAD(disk_events);
1395
1396/* disable in-kernel polling by default */
1397static unsigned long disk_events_dfl_poll_msecs = 0;
1398
1399static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
1400{
1401 struct disk_events *ev = disk->ev;
1402 long intv_msecs = 0;
1403
1404 /*
1405 * If device-specific poll interval is set, always use it. If
1406 * the default is being used, poll iff there are events which
1407 * can't be monitored asynchronously.
1408 */
1409 if (ev->poll_msecs >= 0)
1410 intv_msecs = ev->poll_msecs;
1411 else if (disk->events & ~disk->async_events)
1412 intv_msecs = disk_events_dfl_poll_msecs;
1413
1414 return msecs_to_jiffies(intv_msecs);
1415}
1416
1417static void __disk_block_events(struct gendisk *disk, bool sync)
1418{
1419 struct disk_events *ev = disk->ev;
1420 unsigned long flags;
1421 bool cancel;
1422
1423 spin_lock_irqsave(&ev->lock, flags);
1424 cancel = !ev->block++;
1425 spin_unlock_irqrestore(&ev->lock, flags);
1426
1427 if (cancel) {
1428 if (sync)
1429 cancel_delayed_work_sync(&disk->ev->dwork);
1430 else
1431 cancel_delayed_work(&disk->ev->dwork);
1432 }
1433}
1434
1435static void __disk_unblock_events(struct gendisk *disk, bool check_now)
1436{
1437 struct disk_events *ev = disk->ev;
1438 unsigned long intv;
1439 unsigned long flags;
1440
1441 spin_lock_irqsave(&ev->lock, flags);
1442
1443 if (WARN_ON_ONCE(ev->block <= 0))
1444 goto out_unlock;
1445
1446 if (--ev->block)
1447 goto out_unlock;
1448
1449 /*
1450 * Not exactly a latency critical operation, set poll timer
1451 * slack to 25% and kick event check.
1452 */
1453 intv = disk_events_poll_jiffies(disk);
1454 set_timer_slack(&ev->dwork.timer, intv / 4);
1455 if (check_now)
1456 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1457 else if (intv)
1458 queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1459out_unlock:
1460 spin_unlock_irqrestore(&ev->lock, flags);
1461}
1462
1463/**
1464 * disk_block_events - block and flush disk event checking
1465 * @disk: disk to block events for
1466 *
1467 * On return from this function, it is guaranteed that event checking
1468 * isn't in progress and won't happen until unblocked by
1469 * disk_unblock_events(). Events blocking is counted and the actual
1470 * unblocking happens after the matching number of unblocks are done.
1471 *
1472 * Note that this intentionally does not block event checking from
1473 * disk_clear_events().
1474 *
1475 * CONTEXT:
1476 * Might sleep.
1477 */
1478void disk_block_events(struct gendisk *disk)
1479{
1480 if (disk->ev)
1481 __disk_block_events(disk, true);
1482}
1483
1484/**
1485 * disk_unblock_events - unblock disk event checking
1486 * @disk: disk to unblock events for
1487 *
1488 * Undo disk_block_events(). When the block count reaches zero, it
1489 * starts events polling if configured.
1490 *
1491 * CONTEXT:
1492 * Don't care. Safe to call from irq context.
1493 */
1494void disk_unblock_events(struct gendisk *disk)
1495{
1496 if (disk->ev)
1497 __disk_unblock_events(disk, true);
1498}
1499
1500/**
1501 * disk_check_events - schedule immediate event checking
1502 * @disk: disk to check events for
1503 *
1504 * Schedule immediate event checking on @disk if not blocked.
1505 *
1506 * CONTEXT:
1507 * Don't care. Safe to call from irq context.
1508 */
1509void disk_check_events(struct gendisk *disk)
1510{
1511 if (disk->ev) {
1512 __disk_block_events(disk, false);
1513 __disk_unblock_events(disk, true);
1514 }
1515}
1516EXPORT_SYMBOL_GPL(disk_check_events);
1517
1518/**
1519 * disk_clear_events - synchronously check, clear and return pending events
1520 * @disk: disk to fetch and clear events from
1521 * @mask: mask of events to be fetched and clearted
1522 *
1523 * Disk events are synchronously checked and pending events in @mask
1524 * are cleared and returned. This ignores the block count.
1525 *
1526 * CONTEXT:
1527 * Might sleep.
1528 */
1529unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
1530{
1531 const struct block_device_operations *bdops = disk->fops;
1532 struct disk_events *ev = disk->ev;
1533 unsigned int pending;
1534
1535 if (!ev) {
1536 /* for drivers still using the old ->media_changed method */
1537 if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
1538 bdops->media_changed && bdops->media_changed(disk))
1539 return DISK_EVENT_MEDIA_CHANGE;
1540 return 0;
1541 }
1542
1543 /* tell the workfn about the events being cleared */
1544 spin_lock_irq(&ev->lock);
1545 ev->clearing |= mask;
1546 spin_unlock_irq(&ev->lock);
1547
1548 /* uncondtionally schedule event check and wait for it to finish */
1549 __disk_block_events(disk, true);
1550 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1551 flush_delayed_work(&ev->dwork);
1552 __disk_unblock_events(disk, false);
1553
1554 /* then, fetch and clear pending events */
1555 spin_lock_irq(&ev->lock);
1556 WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
1557 pending = ev->pending & mask;
1558 ev->pending &= ~mask;
1559 spin_unlock_irq(&ev->lock);
1560
1561 return pending;
1562}
1563
1564static void disk_events_workfn(struct work_struct *work)
1565{
1566 struct delayed_work *dwork = to_delayed_work(work);
1567 struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
1568 struct gendisk *disk = ev->disk;
1569 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
1570 unsigned int clearing = ev->clearing;
1571 unsigned int events;
1572 unsigned long intv;
1573 int nr_events = 0, i;
1574
1575 /* check events */
1576 events = disk->fops->check_events(disk, clearing);
1577
1578 /* accumulate pending events and schedule next poll if necessary */
1579 spin_lock_irq(&ev->lock);
1580
1581 events &= ~ev->pending;
1582 ev->pending |= events;
1583 ev->clearing &= ~clearing;
1584
1585 intv = disk_events_poll_jiffies(disk);
1586 if (!ev->block && intv)
1587 queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1588
1589 spin_unlock_irq(&ev->lock);
1590
1591 /* tell userland about new events */
1592 for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
1593 if (events & (1 << i))
1594 envp[nr_events++] = disk_uevents[i];
1595
1596 if (nr_events)
1597 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
1598}
1599
1600/*
1601 * A disk events enabled device has the following sysfs nodes under
1602 * its /sys/block/X/ directory.
1603 *
1604 * events : list of all supported events
1605 * events_async : list of events which can be detected w/o polling
1606 * events_poll_msecs : polling interval, 0: disable, -1: system default
1607 */
1608static ssize_t __disk_events_show(unsigned int events, char *buf)
1609{
1610 const char *delim = "";
1611 ssize_t pos = 0;
1612 int i;
1613
1614 for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
1615 if (events & (1 << i)) {
1616 pos += sprintf(buf + pos, "%s%s",
1617 delim, disk_events_strs[i]);
1618 delim = " ";
1619 }
1620 if (pos)
1621 pos += sprintf(buf + pos, "\n");
1622 return pos;
1623}
1624
1625static ssize_t disk_events_show(struct device *dev,
1626 struct device_attribute *attr, char *buf)
1627{
1628 struct gendisk *disk = dev_to_disk(dev);
1629
1630 return __disk_events_show(disk->events, buf);
1631}
1632
1633static ssize_t disk_events_async_show(struct device *dev,
1634 struct device_attribute *attr, char *buf)
1635{
1636 struct gendisk *disk = dev_to_disk(dev);
1637
1638 return __disk_events_show(disk->async_events, buf);
1639}
1640
1641static ssize_t disk_events_poll_msecs_show(struct device *dev,
1642 struct device_attribute *attr,
1643 char *buf)
1644{
1645 struct gendisk *disk = dev_to_disk(dev);
1646
1647 return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
1648}
1649
1650static ssize_t disk_events_poll_msecs_store(struct device *dev,
1651 struct device_attribute *attr,
1652 const char *buf, size_t count)
1653{
1654 struct gendisk *disk = dev_to_disk(dev);
1655 long intv;
1656
1657 if (!count || !sscanf(buf, "%ld", &intv))
1658 return -EINVAL;
1659
1660 if (intv < 0 && intv != -1)
1661 return -EINVAL;
1662
1663 __disk_block_events(disk, true);
1664 disk->ev->poll_msecs = intv;
1665 __disk_unblock_events(disk, true);
1666
1667 return count;
1668}
1669
1670static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
1671static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
1672static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
1673 disk_events_poll_msecs_show,
1674 disk_events_poll_msecs_store);
1675
1676static const struct attribute *disk_events_attrs[] = {
1677 &dev_attr_events.attr,
1678 &dev_attr_events_async.attr,
1679 &dev_attr_events_poll_msecs.attr,
1680 NULL,
1681};
1682
1683/*
1684 * The default polling interval can be specified by the kernel
1685 * parameter block.events_dfl_poll_msecs which defaults to 0
1686 * (disable). This can also be modified runtime by writing to
1687 * /sys/module/block/events_dfl_poll_msecs.
1688 */
1689static int disk_events_set_dfl_poll_msecs(const char *val,
1690 const struct kernel_param *kp)
1691{
1692 struct disk_events *ev;
1693 int ret;
1694
1695 ret = param_set_ulong(val, kp);
1696 if (ret < 0)
1697 return ret;
1698
1699 mutex_lock(&disk_events_mutex);
1700
1701 list_for_each_entry(ev, &disk_events, node)
1702 disk_check_events(ev->disk);
1703
1704 mutex_unlock(&disk_events_mutex);
1705
1706 return 0;
1707}
1708
1709static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
1710 .set = disk_events_set_dfl_poll_msecs,
1711 .get = param_get_ulong,
1712};
1713
1714#undef MODULE_PARAM_PREFIX
1715#define MODULE_PARAM_PREFIX "block."
1716
1717module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
1718 &disk_events_dfl_poll_msecs, 0644);
1719
1720/*
1721 * disk_{add|del|release}_events - initialize and destroy disk_events.
1722 */
1723static void disk_add_events(struct gendisk *disk)
1724{
1725 struct disk_events *ev;
1726
1727 if (!disk->fops->check_events || !(disk->events | disk->async_events))
1728 return;
1729
1730 ev = kzalloc(sizeof(*ev), GFP_KERNEL);
1731 if (!ev) {
1732 pr_warn("%s: failed to initialize events\n", disk->disk_name);
1733 return;
1734 }
1735
1736 if (sysfs_create_files(&disk_to_dev(disk)->kobj,
1737 disk_events_attrs) < 0) {
1738 pr_warn("%s: failed to create sysfs files for events\n",
1739 disk->disk_name);
1740 kfree(ev);
1741 return;
1742 }
1743
1744 disk->ev = ev;
1745
1746 INIT_LIST_HEAD(&ev->node);
1747 ev->disk = disk;
1748 spin_lock_init(&ev->lock);
1749 ev->block = 1;
1750 ev->poll_msecs = -1;
1751 INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
1752
1753 mutex_lock(&disk_events_mutex);
1754 list_add_tail(&ev->node, &disk_events);
1755 mutex_unlock(&disk_events_mutex);
1756
1757 /*
1758 * Block count is initialized to 1 and the following initial
1759 * unblock kicks it into action.
1760 */
1761 __disk_unblock_events(disk, true);
1762}
1763
1764static void disk_del_events(struct gendisk *disk)
1765{
1766 if (!disk->ev)
1767 return;
1768
1769 __disk_block_events(disk, true);
1770
1771 mutex_lock(&disk_events_mutex);
1772 list_del_init(&disk->ev->node);
1773 mutex_unlock(&disk_events_mutex);
1774
1775 sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
1776}
1777
1778static void disk_release_events(struct gendisk *disk)
1779{
1780 /* the block count should be 1 from disk_del_events() */
1781 WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
1782 kfree(disk->ev);
1783}
diff --git a/block/ioctl.c b/block/ioctl.c
index a9a302eba01e..9049d460fa89 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
294 return -EINVAL; 294 return -EINVAL;
295 if (get_user(n, (int __user *) arg)) 295 if (get_user(n, (int __user *) arg))
296 return -EFAULT; 296 return -EFAULT;
297 if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) 297 if (!(mode & FMODE_EXCL) &&
298 blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
298 return -EBUSY; 299 return -EBUSY;
299 ret = set_blocksize(bdev, n); 300 ret = set_blocksize(bdev, n);
300 if (!(mode & FMODE_EXCL)) 301 if (!(mode & FMODE_EXCL))
301 bd_release(bdev); 302 blkdev_put(bdev, mode | FMODE_EXCL);
302 return ret; 303 return ret;
303 case BLKPG: 304 case BLKPG:
304 ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); 305 ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);