aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 13:45:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 13:45:01 -0500
commit275220f0fcff1adf28a717076e00f575edf05fda (patch)
treed249bccc80c64443dab211639050c4fb14332648
parentfe3c560b8a22cb28e54fe8950abef38e88d75831 (diff)
parent81c5e2ae33c4b19e53966b427e33646bf6811830 (diff)
Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits) block: ensure that completion error gets properly traced blktrace: add missing probe argument to block_bio_complete block cfq: don't use atomic_t for cfq_group block cfq: don't use atomic_t for cfq_queue block: trace event block fix unassigned field block: add internal hd part table references block: fix accounting bug on cross partition merges kref: add kref_test_and_get bio-integrity: mark kintegrityd_wq highpri and CPU intensive block: make kblockd_workqueue smarter Revert "sd: implement sd_check_events()" block: Clean up exit_io_context() source code. Fix compile warnings due to missing removal of a 'ret' variable fs/block: type signature of major_to_index(int) to major_to_index(unsigned) block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p) cfq-iosched: don't check cfqg in choose_service_tree() fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors cdrom: export cdrom_check_events() sd: implement sd_check_events() sr: implement sr_check_events() ...
-rw-r--r--Documentation/cgroups/blkio-controller.txt27
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c40
-rw-r--r--block/blk-ioc.c5
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/cfq-iosched.c112
-rw-r--r--block/genhd.c550
-rw-r--r--block/ioctl.c5
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_main.c7
-rw-r--r--drivers/block/drbd/drbd_nl.c103
-rw-r--r--drivers/block/loop.c6
-rw-r--r--drivers/block/pktcdvd.c22
-rw-r--r--drivers/cdrom/cdrom.c56
-rw-r--r--drivers/char/raw.c14
-rw-r--r--drivers/md/dm-table.c20
-rw-r--r--drivers/md/dm.c6
-rw-r--r--drivers/md/md.c16
-rw-r--r--drivers/mtd/devices/block2mtd.c10
-rw-r--r--drivers/s390/block/dasd_genhd.c2
-rw-r--r--drivers/scsi/scsi_lib.c13
-rw-r--r--drivers/scsi/sd.c10
-rw-r--r--drivers/scsi/sr.c174
-rw-r--r--drivers/scsi/sr.h3
-rw-r--r--drivers/scsi/sr_ioctl.c2
-rw-r--r--drivers/usb/gadget/storage_common.c7
-rw-r--r--fs/bio-integrity.c7
-rw-r--r--fs/block_dev.c741
-rw-r--r--fs/btrfs/volumes.c28
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/ext3/super.c12
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/jfs/jfs_logmgr.c17
-rw-r--r--fs/logfs/dev_bdev.c7
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/partitions/check.c106
-rw-r--r--fs/reiserfs/journal.c21
-rw-r--r--fs/splice.c43
-rw-r--r--fs/super.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/cdrom.h6
-rw-r--r--include/linux/fs.h26
-rw-r--r--include/linux/genhd.h45
-rw-r--r--include/scsi/scsi.h1
-rw-r--r--include/trace/events/block.h12
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/trace/blktrace.c37
-rw-r--r--mm/swapfile.c7
53 files changed, 1323 insertions, 1085 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index d6da611f8f63..4ed7b5ceeed2 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -89,6 +89,33 @@ Throttling/Upper Limit policy
89 89
90 Limits for writes can be put using blkio.write_bps_device file. 90 Limits for writes can be put using blkio.write_bps_device file.
91 91
92Hierarchical Cgroups
93====================
94- Currently none of the IO control policy supports hierarhical groups. But
95 cgroup interface does allow creation of hierarhical cgroups and internally
96 IO policies treat them as flat hierarchy.
97
98 So this patch will allow creation of cgroup hierarhcy but at the backend
99 everything will be treated as flat. So if somebody created a hierarchy like
100 as follows.
101
102 root
103 / \
104 test1 test2
105 |
106 test3
107
108 CFQ and throttling will practically treat all groups at same level.
109
110 pivot
111 / | \ \
112 root test1 test2 test3
113
114 Down the line we can implement hierarchical accounting/control support
115 and also introduce a new cgroup file "use_hierarchy" which will control
116 whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
117 This is how memory controller also has implemented the things.
118
92Various user visible config options 119Various user visible config options
93=================================== 120===================================
94CONFIG_BLK_CGROUP 121CONFIG_BLK_CGROUP
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b1febd0f6d2a..455768a3eb9e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1452 goto done; 1452 goto done;
1453 } 1453 }
1454 1454
1455 /* Currently we do not support hierarchy deeper than two level (0,1) */
1456 if (parent != cgroup->top_cgroup)
1457 return ERR_PTR(-EPERM);
1458
1459 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 1455 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1460 if (!blkcg) 1456 if (!blkcg)
1461 return ERR_PTR(-ENOMEM); 1457 return ERR_PTR(-ENOMEM);
diff --git a/block/blk-core.c b/block/blk-core.c
index 4ce953f1b390..2f4002f79a24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,7 +33,7 @@
33 33
34#include "blk.h" 34#include "blk.h"
35 35
36EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); 36EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
37EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 37EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
39 39
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
64 return; 64 return;
65 65
66 cpu = part_stat_lock(); 66 cpu = part_stat_lock();
67 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
68 67
69 if (!new_io) 68 if (!new_io) {
69 part = rq->part;
70 part_stat_inc(cpu, part, merges[rw]); 70 part_stat_inc(cpu, part, merges[rw]);
71 else { 71 } else {
72 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
73 if (!hd_struct_try_get(part)) {
74 /*
75 * The partition is already being removed,
76 * the request will be accounted on the disk only
77 *
78 * We take a reference on disk->part0 although that
79 * partition will never be deleted, so we can treat
80 * it as any other partition.
81 */
82 part = &rq->rq_disk->part0;
83 hd_struct_get(part);
84 }
72 part_round_stats(cpu, part); 85 part_round_stats(cpu, part);
73 part_inc_in_flight(part, rw); 86 part_inc_in_flight(part, rw);
87 rq->part = part;
74 } 88 }
75 89
76 part_stat_unlock(); 90 part_stat_unlock();
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
128 rq->ref_count = 1; 142 rq->ref_count = 1;
129 rq->start_time = jiffies; 143 rq->start_time = jiffies;
130 set_start_time_ns(rq); 144 set_start_time_ns(rq);
145 rq->part = NULL;
131} 146}
132EXPORT_SYMBOL(blk_rq_init); 147EXPORT_SYMBOL(blk_rq_init);
133 148
@@ -1329,9 +1344,9 @@ static inline void blk_partition_remap(struct bio *bio)
1329 bio->bi_sector += p->start_sect; 1344 bio->bi_sector += p->start_sect;
1330 bio->bi_bdev = bdev->bd_contains; 1345 bio->bi_bdev = bdev->bd_contains;
1331 1346
1332 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, 1347 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1333 bdev->bd_dev, 1348 bdev->bd_dev,
1334 bio->bi_sector - p->start_sect); 1349 bio->bi_sector - p->start_sect);
1335 } 1350 }
1336} 1351}
1337 1352
@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio)
1500 goto end_io; 1515 goto end_io;
1501 1516
1502 if (old_sector != -1) 1517 if (old_sector != -1)
1503 trace_block_remap(q, bio, old_dev, old_sector); 1518 trace_block_bio_remap(q, bio, old_dev, old_sector);
1504 1519
1505 old_sector = bio->bi_sector; 1520 old_sector = bio->bi_sector;
1506 old_dev = bio->bi_bdev->bd_dev; 1521 old_dev = bio->bi_bdev->bd_dev;
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
1776 int cpu; 1791 int cpu;
1777 1792
1778 cpu = part_stat_lock(); 1793 cpu = part_stat_lock();
1779 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1794 part = req->part;
1780 part_stat_add(cpu, part, sectors[rw], bytes >> 9); 1795 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1781 part_stat_unlock(); 1796 part_stat_unlock();
1782 } 1797 }
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req)
1796 int cpu; 1811 int cpu;
1797 1812
1798 cpu = part_stat_lock(); 1813 cpu = part_stat_lock();
1799 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1814 part = req->part;
1800 1815
1801 part_stat_inc(cpu, part, ios[rw]); 1816 part_stat_inc(cpu, part, ios[rw]);
1802 part_stat_add(cpu, part, ticks[rw], duration); 1817 part_stat_add(cpu, part, ticks[rw], duration);
1803 part_round_stats(cpu, part); 1818 part_round_stats(cpu, part);
1804 part_dec_in_flight(part, rw); 1819 part_dec_in_flight(part, rw);
1805 1820
1821 hd_struct_put(part);
1806 part_stat_unlock(); 1822 part_stat_unlock();
1807 } 1823 }
1808} 1824}
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void)
2606 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2622 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2607 sizeof(((struct request *)0)->cmd_flags)); 2623 sizeof(((struct request *)0)->cmd_flags));
2608 2624
2609 kblockd_workqueue = create_workqueue("kblockd"); 2625 /* used for unplugging and affects IO latency/throughput - HIGHPRI */
2626 kblockd_workqueue = alloc_workqueue("kblockd",
2627 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2610 if (!kblockd_workqueue) 2628 if (!kblockd_workqueue)
2611 panic("Failed to create kblockd\n"); 2629 panic("Failed to create kblockd\n");
2612 2630
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 3c7a339fe381..b791022beef3 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc)
64 rcu_read_unlock(); 64 rcu_read_unlock();
65} 65}
66 66
67/* Called by the exitting task */ 67/* Called by the exiting task */
68void exit_io_context(struct task_struct *task) 68void exit_io_context(struct task_struct *task)
69{ 69{
70 struct io_context *ioc; 70 struct io_context *ioc;
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task)
74 task->io_context = NULL; 74 task->io_context = NULL;
75 task_unlock(task); 75 task_unlock(task);
76 76
77 if (atomic_dec_and_test(&ioc->nr_tasks)) { 77 if (atomic_dec_and_test(&ioc->nr_tasks))
78 cfq_exit(ioc); 78 cfq_exit(ioc);
79 79
80 }
81 put_io_context(ioc); 80 put_io_context(ioc);
82} 81}
83 82
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 74bc4a768f32..ea85e20d5e94 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req)
351 int cpu; 351 int cpu;
352 352
353 cpu = part_stat_lock(); 353 cpu = part_stat_lock();
354 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 354 part = req->part;
355 355
356 part_round_stats(cpu, part); 356 part_round_stats(cpu, part);
357 part_dec_in_flight(part, rq_data_dir(req)); 357 part_dec_in_flight(part, rq_data_dir(req));
358 358
359 hd_struct_put(part);
359 part_stat_unlock(); 360 part_stat_unlock();
360 } 361 }
361} 362}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 78ee4b1d4e85..8427697c5437 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,7 +87,6 @@ struct cfq_rb_root {
87 unsigned count; 87 unsigned count;
88 unsigned total_weight; 88 unsigned total_weight;
89 u64 min_vdisktime; 89 u64 min_vdisktime;
90 struct rb_node *active;
91}; 90};
92#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 91#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
93 .count = 0, .min_vdisktime = 0, } 92 .count = 0, .min_vdisktime = 0, }
@@ -97,7 +96,7 @@ struct cfq_rb_root {
97 */ 96 */
98struct cfq_queue { 97struct cfq_queue {
99 /* reference count */ 98 /* reference count */
100 atomic_t ref; 99 int ref;
101 /* various state flags, see below */ 100 /* various state flags, see below */
102 unsigned int flags; 101 unsigned int flags;
103 /* parent cfq_data */ 102 /* parent cfq_data */
@@ -180,7 +179,6 @@ struct cfq_group {
180 /* group service_tree key */ 179 /* group service_tree key */
181 u64 vdisktime; 180 u64 vdisktime;
182 unsigned int weight; 181 unsigned int weight;
183 bool on_st;
184 182
185 /* number of cfqq currently on this group */ 183 /* number of cfqq currently on this group */
186 int nr_cfqq; 184 int nr_cfqq;
@@ -209,7 +207,7 @@ struct cfq_group {
209 struct blkio_group blkg; 207 struct blkio_group blkg;
210#ifdef CONFIG_CFQ_GROUP_IOSCHED 208#ifdef CONFIG_CFQ_GROUP_IOSCHED
211 struct hlist_node cfqd_node; 209 struct hlist_node cfqd_node;
212 atomic_t ref; 210 int ref;
213#endif 211#endif
214 /* number of requests that are on the dispatch list or inside driver */ 212 /* number of requests that are on the dispatch list or inside driver */
215 int dispatched; 213 int dispatched;
@@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
563 u64 vdisktime = st->min_vdisktime; 561 u64 vdisktime = st->min_vdisktime;
564 struct cfq_group *cfqg; 562 struct cfq_group *cfqg;
565 563
566 if (st->active) {
567 cfqg = rb_entry_cfqg(st->active);
568 vdisktime = cfqg->vdisktime;
569 }
570
571 if (st->left) { 564 if (st->left) {
572 cfqg = rb_entry_cfqg(st->left); 565 cfqg = rb_entry_cfqg(st->left);
573 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); 566 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
@@ -646,11 +639,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
646static inline bool cfq_slice_used(struct cfq_queue *cfqq) 639static inline bool cfq_slice_used(struct cfq_queue *cfqq)
647{ 640{
648 if (cfq_cfqq_slice_new(cfqq)) 641 if (cfq_cfqq_slice_new(cfqq))
649 return 0; 642 return false;
650 if (time_before(jiffies, cfqq->slice_end)) 643 if (time_before(jiffies, cfqq->slice_end))
651 return 0; 644 return false;
652 645
653 return 1; 646 return true;
654} 647}
655 648
656/* 649/*
@@ -869,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
869 struct rb_node *n; 862 struct rb_node *n;
870 863
871 cfqg->nr_cfqq++; 864 cfqg->nr_cfqq++;
872 if (cfqg->on_st) 865 if (!RB_EMPTY_NODE(&cfqg->rb_node))
873 return; 866 return;
874 867
875 /* 868 /*
@@ -885,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
885 cfqg->vdisktime = st->min_vdisktime; 878 cfqg->vdisktime = st->min_vdisktime;
886 879
887 __cfq_group_service_tree_add(st, cfqg); 880 __cfq_group_service_tree_add(st, cfqg);
888 cfqg->on_st = true;
889 st->total_weight += cfqg->weight; 881 st->total_weight += cfqg->weight;
890} 882}
891 883
@@ -894,9 +886,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
894{ 886{
895 struct cfq_rb_root *st = &cfqd->grp_service_tree; 887 struct cfq_rb_root *st = &cfqd->grp_service_tree;
896 888
897 if (st->active == &cfqg->rb_node)
898 st->active = NULL;
899
900 BUG_ON(cfqg->nr_cfqq < 1); 889 BUG_ON(cfqg->nr_cfqq < 1);
901 cfqg->nr_cfqq--; 890 cfqg->nr_cfqq--;
902 891
@@ -905,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
905 return; 894 return;
906 895
907 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); 896 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
908 cfqg->on_st = false;
909 st->total_weight -= cfqg->weight; 897 st->total_weight -= cfqg->weight;
910 if (!RB_EMPTY_NODE(&cfqg->rb_node)) 898 if (!RB_EMPTY_NODE(&cfqg->rb_node))
911 cfq_rb_erase(&cfqg->rb_node, st); 899 cfq_rb_erase(&cfqg->rb_node, st);
@@ -1026,7 +1014,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
1026 * elevator which will be dropped by either elevator exit 1014 * elevator which will be dropped by either elevator exit
1027 * or cgroup deletion path depending on who is exiting first. 1015 * or cgroup deletion path depending on who is exiting first.
1028 */ 1016 */
1029 atomic_set(&cfqg->ref, 1); 1017 cfqg->ref = 1;
1030 1018
1031 /* 1019 /*
1032 * Add group onto cgroup list. It might happen that bdi->dev is 1020 * Add group onto cgroup list. It might happen that bdi->dev is
@@ -1071,7 +1059,7 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
1071 1059
1072static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) 1060static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1073{ 1061{
1074 atomic_inc(&cfqg->ref); 1062 cfqg->ref++;
1075 return cfqg; 1063 return cfqg;
1076} 1064}
1077 1065
@@ -1083,7 +1071,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1083 1071
1084 cfqq->cfqg = cfqg; 1072 cfqq->cfqg = cfqg;
1085 /* cfqq reference on cfqg */ 1073 /* cfqq reference on cfqg */
1086 atomic_inc(&cfqq->cfqg->ref); 1074 cfqq->cfqg->ref++;
1087} 1075}
1088 1076
1089static void cfq_put_cfqg(struct cfq_group *cfqg) 1077static void cfq_put_cfqg(struct cfq_group *cfqg)
@@ -1091,11 +1079,12 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
1091 struct cfq_rb_root *st; 1079 struct cfq_rb_root *st;
1092 int i, j; 1080 int i, j;
1093 1081
1094 BUG_ON(atomic_read(&cfqg->ref) <= 0); 1082 BUG_ON(cfqg->ref <= 0);
1095 if (!atomic_dec_and_test(&cfqg->ref)) 1083 cfqg->ref--;
1084 if (cfqg->ref)
1096 return; 1085 return;
1097 for_each_cfqg_st(cfqg, i, j, st) 1086 for_each_cfqg_st(cfqg, i, j, st)
1098 BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); 1087 BUG_ON(!RB_EMPTY_ROOT(&st->rb));
1099 kfree(cfqg); 1088 kfree(cfqg);
1100} 1089}
1101 1090
@@ -1200,7 +1189,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1200 cfq_group_service_tree_del(cfqd, cfqq->cfqg); 1189 cfq_group_service_tree_del(cfqd, cfqq->cfqg);
1201 cfqq->orig_cfqg = cfqq->cfqg; 1190 cfqq->orig_cfqg = cfqq->cfqg;
1202 cfqq->cfqg = &cfqd->root_group; 1191 cfqq->cfqg = &cfqd->root_group;
1203 atomic_inc(&cfqd->root_group.ref); 1192 cfqd->root_group.ref++;
1204 group_changed = 1; 1193 group_changed = 1;
1205 } else if (!cfqd->cfq_group_isolation 1194 } else if (!cfqd->cfq_group_isolation
1206 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { 1195 && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
@@ -1687,9 +1676,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1687 if (cfqq == cfqd->active_queue) 1676 if (cfqq == cfqd->active_queue)
1688 cfqd->active_queue = NULL; 1677 cfqd->active_queue = NULL;
1689 1678
1690 if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
1691 cfqd->grp_service_tree.active = NULL;
1692
1693 if (cfqd->active_cic) { 1679 if (cfqd->active_cic) {
1694 put_io_context(cfqd->active_cic->ioc); 1680 put_io_context(cfqd->active_cic->ioc);
1695 cfqd->active_cic = NULL; 1681 cfqd->active_cic = NULL;
@@ -1901,10 +1887,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1901 * in their service tree. 1887 * in their service tree.
1902 */ 1888 */
1903 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1889 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
1904 return 1; 1890 return true;
1905 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1891 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1906 service_tree->count); 1892 service_tree->count);
1907 return 0; 1893 return false;
1908} 1894}
1909 1895
1910static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1896static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -2040,7 +2026,7 @@ static int cfqq_process_refs(struct cfq_queue *cfqq)
2040 int process_refs, io_refs; 2026 int process_refs, io_refs;
2041 2027
2042 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE]; 2028 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
2043 process_refs = atomic_read(&cfqq->ref) - io_refs; 2029 process_refs = cfqq->ref - io_refs;
2044 BUG_ON(process_refs < 0); 2030 BUG_ON(process_refs < 0);
2045 return process_refs; 2031 return process_refs;
2046} 2032}
@@ -2080,10 +2066,10 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
2080 */ 2066 */
2081 if (new_process_refs >= process_refs) { 2067 if (new_process_refs >= process_refs) {
2082 cfqq->new_cfqq = new_cfqq; 2068 cfqq->new_cfqq = new_cfqq;
2083 atomic_add(process_refs, &new_cfqq->ref); 2069 new_cfqq->ref += process_refs;
2084 } else { 2070 } else {
2085 new_cfqq->new_cfqq = cfqq; 2071 new_cfqq->new_cfqq = cfqq;
2086 atomic_add(new_process_refs, &cfqq->ref); 2072 cfqq->ref += new_process_refs;
2087 } 2073 }
2088} 2074}
2089 2075
@@ -2116,12 +2102,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2116 unsigned count; 2102 unsigned count;
2117 struct cfq_rb_root *st; 2103 struct cfq_rb_root *st;
2118 unsigned group_slice; 2104 unsigned group_slice;
2119 2105 enum wl_prio_t original_prio = cfqd->serving_prio;
2120 if (!cfqg) {
2121 cfqd->serving_prio = IDLE_WORKLOAD;
2122 cfqd->workload_expires = jiffies + 1;
2123 return;
2124 }
2125 2106
2126 /* Choose next priority. RT > BE > IDLE */ 2107 /* Choose next priority. RT > BE > IDLE */
2127 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) 2108 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -2134,6 +2115,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2134 return; 2115 return;
2135 } 2116 }
2136 2117
2118 if (original_prio != cfqd->serving_prio)
2119 goto new_workload;
2120
2137 /* 2121 /*
2138 * For RT and BE, we have to choose also the type 2122 * For RT and BE, we have to choose also the type
2139 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload 2123 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
@@ -2148,6 +2132,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2148 if (count && !time_after(jiffies, cfqd->workload_expires)) 2132 if (count && !time_after(jiffies, cfqd->workload_expires))
2149 return; 2133 return;
2150 2134
2135new_workload:
2151 /* otherwise select new workload type */ 2136 /* otherwise select new workload type */
2152 cfqd->serving_type = 2137 cfqd->serving_type =
2153 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); 2138 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
@@ -2199,7 +2184,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
2199 if (RB_EMPTY_ROOT(&st->rb)) 2184 if (RB_EMPTY_ROOT(&st->rb))
2200 return NULL; 2185 return NULL;
2201 cfqg = cfq_rb_first_group(st); 2186 cfqg = cfq_rb_first_group(st);
2202 st->active = &cfqg->rb_node;
2203 update_min_vdisktime(st); 2187 update_min_vdisktime(st);
2204 return cfqg; 2188 return cfqg;
2205} 2189}
@@ -2293,6 +2277,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2293 goto keep_queue; 2277 goto keep_queue;
2294 } 2278 }
2295 2279
2280 /*
2281 * This is a deep seek queue, but the device is much faster than
2282 * the queue can deliver, don't idle
2283 **/
2284 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
2285 (cfq_cfqq_slice_new(cfqq) ||
2286 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
2287 cfq_clear_cfqq_deep(cfqq);
2288 cfq_clear_cfqq_idle_window(cfqq);
2289 }
2290
2296 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { 2291 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2297 cfqq = NULL; 2292 cfqq = NULL;
2298 goto keep_queue; 2293 goto keep_queue;
@@ -2367,12 +2362,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
2367{ 2362{
2368 /* the queue hasn't finished any request, can't estimate */ 2363 /* the queue hasn't finished any request, can't estimate */
2369 if (cfq_cfqq_slice_new(cfqq)) 2364 if (cfq_cfqq_slice_new(cfqq))
2370 return 1; 2365 return true;
2371 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, 2366 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
2372 cfqq->slice_end)) 2367 cfqq->slice_end))
2373 return 1; 2368 return true;
2374 2369
2375 return 0; 2370 return false;
2376} 2371}
2377 2372
2378static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2373static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -2538,9 +2533,10 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
2538 struct cfq_data *cfqd = cfqq->cfqd; 2533 struct cfq_data *cfqd = cfqq->cfqd;
2539 struct cfq_group *cfqg, *orig_cfqg; 2534 struct cfq_group *cfqg, *orig_cfqg;
2540 2535
2541 BUG_ON(atomic_read(&cfqq->ref) <= 0); 2536 BUG_ON(cfqq->ref <= 0);
2542 2537
2543 if (!atomic_dec_and_test(&cfqq->ref)) 2538 cfqq->ref--;
2539 if (cfqq->ref)
2544 return; 2540 return;
2545 2541
2546 cfq_log_cfqq(cfqd, cfqq, "put_queue"); 2542 cfq_log_cfqq(cfqd, cfqq, "put_queue");
@@ -2843,7 +2839,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2843 RB_CLEAR_NODE(&cfqq->p_node); 2839 RB_CLEAR_NODE(&cfqq->p_node);
2844 INIT_LIST_HEAD(&cfqq->fifo); 2840 INIT_LIST_HEAD(&cfqq->fifo);
2845 2841
2846 atomic_set(&cfqq->ref, 0); 2842 cfqq->ref = 0;
2847 cfqq->cfqd = cfqd; 2843 cfqq->cfqd = cfqd;
2848 2844
2849 cfq_mark_cfqq_prio_changed(cfqq); 2845 cfq_mark_cfqq_prio_changed(cfqq);
@@ -2979,11 +2975,11 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
2979 * pin the queue now that it's allocated, scheduler exit will prune it 2975 * pin the queue now that it's allocated, scheduler exit will prune it
2980 */ 2976 */
2981 if (!is_sync && !(*async_cfqq)) { 2977 if (!is_sync && !(*async_cfqq)) {
2982 atomic_inc(&cfqq->ref); 2978 cfqq->ref++;
2983 *async_cfqq = cfqq; 2979 *async_cfqq = cfqq;
2984 } 2980 }
2985 2981
2986 atomic_inc(&cfqq->ref); 2982 cfqq->ref++;
2987 return cfqq; 2983 return cfqq;
2988} 2984}
2989 2985
@@ -3265,6 +3261,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3265 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3261 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
3266 return true; 3262 return true;
3267 3263
3264 /* An idle queue should not be idle now for some reason */
3265 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
3266 return true;
3267
3268 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) 3268 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
3269 return false; 3269 return false;
3270 3270
@@ -3681,13 +3681,13 @@ new_queue:
3681 } 3681 }
3682 3682
3683 cfqq->allocated[rw]++; 3683 cfqq->allocated[rw]++;
3684 atomic_inc(&cfqq->ref); 3684 cfqq->ref++;
3685
3686 spin_unlock_irqrestore(q->queue_lock, flags);
3687
3688 rq->elevator_private = cic; 3685 rq->elevator_private = cic;
3689 rq->elevator_private2 = cfqq; 3686 rq->elevator_private2 = cfqq;
3690 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); 3687 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
3688
3689 spin_unlock_irqrestore(q->queue_lock, flags);
3690
3691 return 0; 3691 return 0;
3692 3692
3693queue_fail: 3693queue_fail:
@@ -3862,6 +3862,10 @@ static void *cfq_init_queue(struct request_queue *q)
3862 if (!cfqd) 3862 if (!cfqd)
3863 return NULL; 3863 return NULL;
3864 3864
3865 /*
3866 * Don't need take queue_lock in the routine, since we are
3867 * initializing the ioscheduler, and nobody is using cfqd
3868 */
3865 cfqd->cic_index = i; 3869 cfqd->cic_index = i;
3866 3870
3867 /* Init root service tree */ 3871 /* Init root service tree */
@@ -3881,7 +3885,7 @@ static void *cfq_init_queue(struct request_queue *q)
3881 * Take a reference to root group which we never drop. This is just 3885 * Take a reference to root group which we never drop. This is just
3882 * to make sure that cfq_put_cfqg() does not try to kfree root group 3886 * to make sure that cfq_put_cfqg() does not try to kfree root group
3883 */ 3887 */
3884 atomic_set(&cfqg->ref, 1); 3888 cfqg->ref = 1;
3885 rcu_read_lock(); 3889 rcu_read_lock();
3886 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, 3890 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
3887 (void *)cfqd, 0); 3891 (void *)cfqd, 0);
@@ -3901,7 +3905,7 @@ static void *cfq_init_queue(struct request_queue *q)
3901 * will not attempt to free it. 3905 * will not attempt to free it.
3902 */ 3906 */
3903 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); 3907 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
3904 atomic_inc(&cfqd->oom_cfqq.ref); 3908 cfqd->oom_cfqq.ref++;
3905 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); 3909 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
3906 3910
3907 INIT_LIST_HEAD(&cfqd->cic_list); 3911 INIT_LIST_HEAD(&cfqd->cic_list);
diff --git a/block/genhd.c b/block/genhd.c
index 5fa2b44a72ff..6a5b772aa201 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -18,6 +18,7 @@
18#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
19#include <linux/mutex.h> 19#include <linux/mutex.h>
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/log2.h>
21 22
22#include "blk.h" 23#include "blk.h"
23 24
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr);
35 36
36static struct device_type disk_type; 37static struct device_type disk_type;
37 38
39static void disk_add_events(struct gendisk *disk);
40static void disk_del_events(struct gendisk *disk);
41static void disk_release_events(struct gendisk *disk);
42
38/** 43/**
39 * disk_get_part - get partition 44 * disk_get_part - get partition
40 * @disk: disk to look partition from 45 * @disk: disk to look partition from
@@ -239,7 +244,7 @@ static struct blk_major_name {
239} *major_names[BLKDEV_MAJOR_HASH_SIZE]; 244} *major_names[BLKDEV_MAJOR_HASH_SIZE];
240 245
241/* index in the above - for now: assume no multimajor ranges */ 246/* index in the above - for now: assume no multimajor ranges */
242static inline int major_to_index(int major) 247static inline int major_to_index(unsigned major)
243{ 248{
244 return major % BLKDEV_MAJOR_HASH_SIZE; 249 return major % BLKDEV_MAJOR_HASH_SIZE;
245} 250}
@@ -502,6 +507,64 @@ static int exact_lock(dev_t devt, void *data)
502 return 0; 507 return 0;
503} 508}
504 509
510void register_disk(struct gendisk *disk)
511{
512 struct device *ddev = disk_to_dev(disk);
513 struct block_device *bdev;
514 struct disk_part_iter piter;
515 struct hd_struct *part;
516 int err;
517
518 ddev->parent = disk->driverfs_dev;
519
520 dev_set_name(ddev, disk->disk_name);
521
522 /* delay uevents, until we scanned partition table */
523 dev_set_uevent_suppress(ddev, 1);
524
525 if (device_add(ddev))
526 return;
527 if (!sysfs_deprecated) {
528 err = sysfs_create_link(block_depr, &ddev->kobj,
529 kobject_name(&ddev->kobj));
530 if (err) {
531 device_del(ddev);
532 return;
533 }
534 }
535 disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
536 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
537
538 /* No minors to use for partitions */
539 if (!disk_partitionable(disk))
540 goto exit;
541
542 /* No such device (e.g., media were just removed) */
543 if (!get_capacity(disk))
544 goto exit;
545
546 bdev = bdget_disk(disk, 0);
547 if (!bdev)
548 goto exit;
549
550 bdev->bd_invalidated = 1;
551 err = blkdev_get(bdev, FMODE_READ, NULL);
552 if (err < 0)
553 goto exit;
554 blkdev_put(bdev, FMODE_READ);
555
556exit:
557 /* announce disk after possible partitions are created */
558 dev_set_uevent_suppress(ddev, 0);
559 kobject_uevent(&ddev->kobj, KOBJ_ADD);
560
561 /* announce possible partitions */
562 disk_part_iter_init(&piter, disk, 0);
563 while ((part = disk_part_iter_next(&piter)))
564 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
565 disk_part_iter_exit(&piter);
566}
567
505/** 568/**
506 * add_disk - add partitioning information to kernel list 569 * add_disk - add partitioning information to kernel list
507 * @disk: per-device partitioning information 570 * @disk: per-device partitioning information
@@ -551,18 +614,48 @@ void add_disk(struct gendisk *disk)
551 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 614 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
552 "bdi"); 615 "bdi");
553 WARN_ON(retval); 616 WARN_ON(retval);
554}
555 617
618 disk_add_events(disk);
619}
556EXPORT_SYMBOL(add_disk); 620EXPORT_SYMBOL(add_disk);
557EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
558 621
559void unlink_gendisk(struct gendisk *disk) 622void del_gendisk(struct gendisk *disk)
560{ 623{
624 struct disk_part_iter piter;
625 struct hd_struct *part;
626
627 disk_del_events(disk);
628
629 /* invalidate stuff */
630 disk_part_iter_init(&piter, disk,
631 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
632 while ((part = disk_part_iter_next(&piter))) {
633 invalidate_partition(disk, part->partno);
634 delete_partition(disk, part->partno);
635 }
636 disk_part_iter_exit(&piter);
637
638 invalidate_partition(disk, 0);
639 blk_free_devt(disk_to_dev(disk)->devt);
640 set_capacity(disk, 0);
641 disk->flags &= ~GENHD_FL_UP;
642
561 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 643 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
562 bdi_unregister(&disk->queue->backing_dev_info); 644 bdi_unregister(&disk->queue->backing_dev_info);
563 blk_unregister_queue(disk); 645 blk_unregister_queue(disk);
564 blk_unregister_region(disk_devt(disk), disk->minors); 646 blk_unregister_region(disk_devt(disk), disk->minors);
647
648 part_stat_set_all(&disk->part0, 0);
649 disk->part0.stamp = 0;
650
651 kobject_put(disk->part0.holder_dir);
652 kobject_put(disk->slave_dir);
653 disk->driverfs_dev = NULL;
654 if (!sysfs_deprecated)
655 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
656 device_del(disk_to_dev(disk));
565} 657}
658EXPORT_SYMBOL(del_gendisk);
566 659
567/** 660/**
568 * get_gendisk - get partitioning information for a given device 661 * get_gendisk - get partitioning information for a given device
@@ -735,7 +828,7 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
735 static void *p; 828 static void *p;
736 829
737 p = disk_seqf_start(seqf, pos); 830 p = disk_seqf_start(seqf, pos);
738 if (!IS_ERR(p) && p && !*pos) 831 if (!IS_ERR_OR_NULL(p) && !*pos)
739 seq_puts(seqf, "major minor #blocks name\n\n"); 832 seq_puts(seqf, "major minor #blocks name\n\n");
740 return p; 833 return p;
741} 834}
@@ -1005,6 +1098,7 @@ static void disk_release(struct device *dev)
1005{ 1098{
1006 struct gendisk *disk = dev_to_disk(dev); 1099 struct gendisk *disk = dev_to_disk(dev);
1007 1100
1101 disk_release_events(disk);
1008 kfree(disk->random); 1102 kfree(disk->random);
1009 disk_replace_part_tbl(disk, NULL); 1103 disk_replace_part_tbl(disk, NULL);
1010 free_part_stats(&disk->part0); 1104 free_part_stats(&disk->part0);
@@ -1110,29 +1204,6 @@ static int __init proc_genhd_init(void)
1110module_init(proc_genhd_init); 1204module_init(proc_genhd_init);
1111#endif /* CONFIG_PROC_FS */ 1205#endif /* CONFIG_PROC_FS */
1112 1206
1113static void media_change_notify_thread(struct work_struct *work)
1114{
1115 struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1116 char event[] = "MEDIA_CHANGE=1";
1117 char *envp[] = { event, NULL };
1118
1119 /*
1120 * set enviroment vars to indicate which event this is for
1121 * so that user space will know to go check the media status.
1122 */
1123 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1124 put_device(gd->driverfs_dev);
1125}
1126
1127#if 0
1128void genhd_media_change_notify(struct gendisk *disk)
1129{
1130 get_device(disk->driverfs_dev);
1131 schedule_work(&disk->async_notify);
1132}
1133EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1134#endif /* 0 */
1135
1136dev_t blk_lookup_devt(const char *name, int partno) 1207dev_t blk_lookup_devt(const char *name, int partno)
1137{ 1208{
1138 dev_t devt = MKDEV(0, 0); 1209 dev_t devt = MKDEV(0, 0);
@@ -1193,13 +1264,13 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
1193 } 1264 }
1194 disk->part_tbl->part[0] = &disk->part0; 1265 disk->part_tbl->part[0] = &disk->part0;
1195 1266
1267 hd_ref_init(&disk->part0);
1268
1196 disk->minors = minors; 1269 disk->minors = minors;
1197 rand_initialize_disk(disk); 1270 rand_initialize_disk(disk);
1198 disk_to_dev(disk)->class = &block_class; 1271 disk_to_dev(disk)->class = &block_class;
1199 disk_to_dev(disk)->type = &disk_type; 1272 disk_to_dev(disk)->type = &disk_type;
1200 device_initialize(disk_to_dev(disk)); 1273 device_initialize(disk_to_dev(disk));
1201 INIT_WORK(&disk->async_notify,
1202 media_change_notify_thread);
1203 } 1274 }
1204 return disk; 1275 return disk;
1205} 1276}
@@ -1291,3 +1362,422 @@ int invalidate_partition(struct gendisk *disk, int partno)
1291} 1362}
1292 1363
1293EXPORT_SYMBOL(invalidate_partition); 1364EXPORT_SYMBOL(invalidate_partition);
1365
1366/*
1367 * Disk events - monitor disk events like media change and eject request.
1368 */
1369struct disk_events {
1370 struct list_head node; /* all disk_event's */
1371 struct gendisk *disk; /* the associated disk */
1372 spinlock_t lock;
1373
1374 int block; /* event blocking depth */
1375 unsigned int pending; /* events already sent out */
1376 unsigned int clearing; /* events being cleared */
1377
1378 long poll_msecs; /* interval, -1 for default */
1379 struct delayed_work dwork;
1380};
1381
1382static const char *disk_events_strs[] = {
1383 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
1384 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
1385};
1386
1387static char *disk_uevents[] = {
1388 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
1389 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
1390};
1391
1392/* list of all disk_events */
1393static DEFINE_MUTEX(disk_events_mutex);
1394static LIST_HEAD(disk_events);
1395
1396/* disable in-kernel polling by default */
1397static unsigned long disk_events_dfl_poll_msecs = 0;
1398
1399static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
1400{
1401 struct disk_events *ev = disk->ev;
1402 long intv_msecs = 0;
1403
1404 /*
1405 * If device-specific poll interval is set, always use it. If
1406 * the default is being used, poll iff there are events which
1407 * can't be monitored asynchronously.
1408 */
1409 if (ev->poll_msecs >= 0)
1410 intv_msecs = ev->poll_msecs;
1411 else if (disk->events & ~disk->async_events)
1412 intv_msecs = disk_events_dfl_poll_msecs;
1413
1414 return msecs_to_jiffies(intv_msecs);
1415}
1416
1417static void __disk_block_events(struct gendisk *disk, bool sync)
1418{
1419 struct disk_events *ev = disk->ev;
1420 unsigned long flags;
1421 bool cancel;
1422
1423 spin_lock_irqsave(&ev->lock, flags);
1424 cancel = !ev->block++;
1425 spin_unlock_irqrestore(&ev->lock, flags);
1426
1427 if (cancel) {
1428 if (sync)
1429 cancel_delayed_work_sync(&disk->ev->dwork);
1430 else
1431 cancel_delayed_work(&disk->ev->dwork);
1432 }
1433}
1434
1435static void __disk_unblock_events(struct gendisk *disk, bool check_now)
1436{
1437 struct disk_events *ev = disk->ev;
1438 unsigned long intv;
1439 unsigned long flags;
1440
1441 spin_lock_irqsave(&ev->lock, flags);
1442
1443 if (WARN_ON_ONCE(ev->block <= 0))
1444 goto out_unlock;
1445
1446 if (--ev->block)
1447 goto out_unlock;
1448
1449 /*
1450 * Not exactly a latency critical operation, set poll timer
1451 * slack to 25% and kick event check.
1452 */
1453 intv = disk_events_poll_jiffies(disk);
1454 set_timer_slack(&ev->dwork.timer, intv / 4);
1455 if (check_now)
1456 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1457 else if (intv)
1458 queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1459out_unlock:
1460 spin_unlock_irqrestore(&ev->lock, flags);
1461}
1462
1463/**
1464 * disk_block_events - block and flush disk event checking
1465 * @disk: disk to block events for
1466 *
1467 * On return from this function, it is guaranteed that event checking
1468 * isn't in progress and won't happen until unblocked by
1469 * disk_unblock_events(). Events blocking is counted and the actual
1470 * unblocking happens after the matching number of unblocks are done.
1471 *
1472 * Note that this intentionally does not block event checking from
1473 * disk_clear_events().
1474 *
1475 * CONTEXT:
1476 * Might sleep.
1477 */
1478void disk_block_events(struct gendisk *disk)
1479{
1480 if (disk->ev)
1481 __disk_block_events(disk, true);
1482}
1483
1484/**
1485 * disk_unblock_events - unblock disk event checking
1486 * @disk: disk to unblock events for
1487 *
1488 * Undo disk_block_events(). When the block count reaches zero, it
1489 * starts events polling if configured.
1490 *
1491 * CONTEXT:
1492 * Don't care. Safe to call from irq context.
1493 */
1494void disk_unblock_events(struct gendisk *disk)
1495{
1496 if (disk->ev)
1497 __disk_unblock_events(disk, true);
1498}
1499
1500/**
1501 * disk_check_events - schedule immediate event checking
1502 * @disk: disk to check events for
1503 *
1504 * Schedule immediate event checking on @disk if not blocked.
1505 *
1506 * CONTEXT:
1507 * Don't care. Safe to call from irq context.
1508 */
1509void disk_check_events(struct gendisk *disk)
1510{
1511 if (disk->ev) {
1512 __disk_block_events(disk, false);
1513 __disk_unblock_events(disk, true);
1514 }
1515}
1516EXPORT_SYMBOL_GPL(disk_check_events);
1517
1518/**
1519 * disk_clear_events - synchronously check, clear and return pending events
1520 * @disk: disk to fetch and clear events from
1521 * @mask: mask of events to be fetched and clearted
1522 *
1523 * Disk events are synchronously checked and pending events in @mask
1524 * are cleared and returned. This ignores the block count.
1525 *
1526 * CONTEXT:
1527 * Might sleep.
1528 */
1529unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
1530{
1531 const struct block_device_operations *bdops = disk->fops;
1532 struct disk_events *ev = disk->ev;
1533 unsigned int pending;
1534
1535 if (!ev) {
1536 /* for drivers still using the old ->media_changed method */
1537 if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
1538 bdops->media_changed && bdops->media_changed(disk))
1539 return DISK_EVENT_MEDIA_CHANGE;
1540 return 0;
1541 }
1542
1543 /* tell the workfn about the events being cleared */
1544 spin_lock_irq(&ev->lock);
1545 ev->clearing |= mask;
1546 spin_unlock_irq(&ev->lock);
1547
1548 /* uncondtionally schedule event check and wait for it to finish */
1549 __disk_block_events(disk, true);
1550 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1551 flush_delayed_work(&ev->dwork);
1552 __disk_unblock_events(disk, false);
1553
1554 /* then, fetch and clear pending events */
1555 spin_lock_irq(&ev->lock);
1556 WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
1557 pending = ev->pending & mask;
1558 ev->pending &= ~mask;
1559 spin_unlock_irq(&ev->lock);
1560
1561 return pending;
1562}
1563
1564static void disk_events_workfn(struct work_struct *work)
1565{
1566 struct delayed_work *dwork = to_delayed_work(work);
1567 struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
1568 struct gendisk *disk = ev->disk;
1569 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
1570 unsigned int clearing = ev->clearing;
1571 unsigned int events;
1572 unsigned long intv;
1573 int nr_events = 0, i;
1574
1575 /* check events */
1576 events = disk->fops->check_events(disk, clearing);
1577
1578 /* accumulate pending events and schedule next poll if necessary */
1579 spin_lock_irq(&ev->lock);
1580
1581 events &= ~ev->pending;
1582 ev->pending |= events;
1583 ev->clearing &= ~clearing;
1584
1585 intv = disk_events_poll_jiffies(disk);
1586 if (!ev->block && intv)
1587 queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1588
1589 spin_unlock_irq(&ev->lock);
1590
1591 /* tell userland about new events */
1592 for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
1593 if (events & (1 << i))
1594 envp[nr_events++] = disk_uevents[i];
1595
1596 if (nr_events)
1597 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
1598}
1599
1600/*
1601 * A disk events enabled device has the following sysfs nodes under
1602 * its /sys/block/X/ directory.
1603 *
1604 * events : list of all supported events
1605 * events_async : list of events which can be detected w/o polling
1606 * events_poll_msecs : polling interval, 0: disable, -1: system default
1607 */
1608static ssize_t __disk_events_show(unsigned int events, char *buf)
1609{
1610 const char *delim = "";
1611 ssize_t pos = 0;
1612 int i;
1613
1614 for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
1615 if (events & (1 << i)) {
1616 pos += sprintf(buf + pos, "%s%s",
1617 delim, disk_events_strs[i]);
1618 delim = " ";
1619 }
1620 if (pos)
1621 pos += sprintf(buf + pos, "\n");
1622 return pos;
1623}
1624
1625static ssize_t disk_events_show(struct device *dev,
1626 struct device_attribute *attr, char *buf)
1627{
1628 struct gendisk *disk = dev_to_disk(dev);
1629
1630 return __disk_events_show(disk->events, buf);
1631}
1632
1633static ssize_t disk_events_async_show(struct device *dev,
1634 struct device_attribute *attr, char *buf)
1635{
1636 struct gendisk *disk = dev_to_disk(dev);
1637
1638 return __disk_events_show(disk->async_events, buf);
1639}
1640
1641static ssize_t disk_events_poll_msecs_show(struct device *dev,
1642 struct device_attribute *attr,
1643 char *buf)
1644{
1645 struct gendisk *disk = dev_to_disk(dev);
1646
1647 return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
1648}
1649
1650static ssize_t disk_events_poll_msecs_store(struct device *dev,
1651 struct device_attribute *attr,
1652 const char *buf, size_t count)
1653{
1654 struct gendisk *disk = dev_to_disk(dev);
1655 long intv;
1656
1657 if (!count || !sscanf(buf, "%ld", &intv))
1658 return -EINVAL;
1659
1660 if (intv < 0 && intv != -1)
1661 return -EINVAL;
1662
1663 __disk_block_events(disk, true);
1664 disk->ev->poll_msecs = intv;
1665 __disk_unblock_events(disk, true);
1666
1667 return count;
1668}
1669
1670static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
1671static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
1672static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
1673 disk_events_poll_msecs_show,
1674 disk_events_poll_msecs_store);
1675
1676static const struct attribute *disk_events_attrs[] = {
1677 &dev_attr_events.attr,
1678 &dev_attr_events_async.attr,
1679 &dev_attr_events_poll_msecs.attr,
1680 NULL,
1681};
1682
1683/*
1684 * The default polling interval can be specified by the kernel
1685 * parameter block.events_dfl_poll_msecs which defaults to 0
1686 * (disable). This can also be modified runtime by writing to
1687 * /sys/module/block/events_dfl_poll_msecs.
1688 */
1689static int disk_events_set_dfl_poll_msecs(const char *val,
1690 const struct kernel_param *kp)
1691{
1692 struct disk_events *ev;
1693 int ret;
1694
1695 ret = param_set_ulong(val, kp);
1696 if (ret < 0)
1697 return ret;
1698
1699 mutex_lock(&disk_events_mutex);
1700
1701 list_for_each_entry(ev, &disk_events, node)
1702 disk_check_events(ev->disk);
1703
1704 mutex_unlock(&disk_events_mutex);
1705
1706 return 0;
1707}
1708
1709static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
1710 .set = disk_events_set_dfl_poll_msecs,
1711 .get = param_get_ulong,
1712};
1713
1714#undef MODULE_PARAM_PREFIX
1715#define MODULE_PARAM_PREFIX "block."
1716
1717module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
1718 &disk_events_dfl_poll_msecs, 0644);
1719
1720/*
1721 * disk_{add|del|release}_events - initialize and destroy disk_events.
1722 */
1723static void disk_add_events(struct gendisk *disk)
1724{
1725 struct disk_events *ev;
1726
1727 if (!disk->fops->check_events || !(disk->events | disk->async_events))
1728 return;
1729
1730 ev = kzalloc(sizeof(*ev), GFP_KERNEL);
1731 if (!ev) {
1732 pr_warn("%s: failed to initialize events\n", disk->disk_name);
1733 return;
1734 }
1735
1736 if (sysfs_create_files(&disk_to_dev(disk)->kobj,
1737 disk_events_attrs) < 0) {
1738 pr_warn("%s: failed to create sysfs files for events\n",
1739 disk->disk_name);
1740 kfree(ev);
1741 return;
1742 }
1743
1744 disk->ev = ev;
1745
1746 INIT_LIST_HEAD(&ev->node);
1747 ev->disk = disk;
1748 spin_lock_init(&ev->lock);
1749 ev->block = 1;
1750 ev->poll_msecs = -1;
1751 INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
1752
1753 mutex_lock(&disk_events_mutex);
1754 list_add_tail(&ev->node, &disk_events);
1755 mutex_unlock(&disk_events_mutex);
1756
1757 /*
1758 * Block count is initialized to 1 and the following initial
1759 * unblock kicks it into action.
1760 */
1761 __disk_unblock_events(disk, true);
1762}
1763
1764static void disk_del_events(struct gendisk *disk)
1765{
1766 if (!disk->ev)
1767 return;
1768
1769 __disk_block_events(disk, true);
1770
1771 mutex_lock(&disk_events_mutex);
1772 list_del_init(&disk->ev->node);
1773 mutex_unlock(&disk_events_mutex);
1774
1775 sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
1776}
1777
1778static void disk_release_events(struct gendisk *disk)
1779{
1780 /* the block count should be 1 from disk_del_events() */
1781 WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
1782 kfree(disk->ev);
1783}
diff --git a/block/ioctl.c b/block/ioctl.c
index a9a302eba01e..9049d460fa89 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
294 return -EINVAL; 294 return -EINVAL;
295 if (get_user(n, (int __user *) arg)) 295 if (get_user(n, (int __user *) arg))
296 return -EFAULT; 296 return -EFAULT;
297 if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) 297 if (!(mode & FMODE_EXCL) &&
298 blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
298 return -EBUSY; 299 return -EBUSY;
299 ret = set_blocksize(bdev, n); 300 ret = set_blocksize(bdev, n);
300 if (!(mode & FMODE_EXCL)) 301 if (!(mode & FMODE_EXCL))
301 bd_release(bdev); 302 blkdev_put(bdev, mode | FMODE_EXCL);
302 return ret; 303 return ret;
303 case BLKPG: 304 case BLKPG:
304 ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); 305 ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1ea1a34e78b2..3803a0348937 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -911,8 +911,6 @@ struct drbd_md {
911struct drbd_backing_dev { 911struct drbd_backing_dev {
912 struct block_device *backing_bdev; 912 struct block_device *backing_bdev;
913 struct block_device *md_bdev; 913 struct block_device *md_bdev;
914 struct file *lo_file;
915 struct file *md_file;
916 struct drbd_md md; 914 struct drbd_md md;
917 struct disk_conf dc; /* The user provided config... */ 915 struct disk_conf dc; /* The user provided config... */
918 sector_t known_size; /* last known size of that backing device */ 916 sector_t known_size; /* last known size of that backing device */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6be5401d0e88..29cd0dc9fe4f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3372,11 +3372,8 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
3372 if (ldev == NULL) 3372 if (ldev == NULL)
3373 return; 3373 return;
3374 3374
3375 bd_release(ldev->backing_bdev); 3375 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3376 bd_release(ldev->md_bdev); 3376 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3377
3378 fput(ldev->lo_file);
3379 fput(ldev->md_file);
3380 3377
3381 kfree(ldev); 3378 kfree(ldev);
3382} 3379}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 29e5c70e4e26..8cbfaa687d72 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -855,7 +855,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
855 sector_t max_possible_sectors; 855 sector_t max_possible_sectors;
856 sector_t min_md_device_sectors; 856 sector_t min_md_device_sectors;
857 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ 857 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
858 struct inode *inode, *inode2; 858 struct block_device *bdev;
859 struct lru_cache *resync_lru = NULL; 859 struct lru_cache *resync_lru = NULL;
860 union drbd_state ns, os; 860 union drbd_state ns, os;
861 unsigned int max_seg_s; 861 unsigned int max_seg_s;
@@ -907,46 +907,40 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
907 } 907 }
908 } 908 }
909 909
910 nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); 910 bdev = blkdev_get_by_path(nbc->dc.backing_dev,
911 if (IS_ERR(nbc->lo_file)) { 911 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
912 if (IS_ERR(bdev)) {
912 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, 913 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
913 PTR_ERR(nbc->lo_file)); 914 PTR_ERR(bdev));
914 nbc->lo_file = NULL;
915 retcode = ERR_OPEN_DISK; 915 retcode = ERR_OPEN_DISK;
916 goto fail; 916 goto fail;
917 } 917 }
918 nbc->backing_bdev = bdev;
918 919
919 inode = nbc->lo_file->f_dentry->d_inode; 920 /*
920 921 * meta_dev_idx >= 0: external fixed size, possibly multiple
921 if (!S_ISBLK(inode->i_mode)) { 922 * drbd sharing one meta device. TODO in that case, paranoia
922 retcode = ERR_DISK_NOT_BDEV; 923 * check that [md_bdev, meta_dev_idx] is not yet used by some
923 goto fail; 924 * other drbd minor! (if you use drbd.conf + drbdadm, that
924 } 925 * should check it for you already; but if you don't, or
925 926 * someone fooled it, we need to double check here)
926 nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); 927 */
927 if (IS_ERR(nbc->md_file)) { 928 bdev = blkdev_get_by_path(nbc->dc.meta_dev,
929 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
930 (nbc->dc.meta_dev_idx < 0) ?
931 (void *)mdev : (void *)drbd_m_holder);
932 if (IS_ERR(bdev)) {
928 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, 933 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
929 PTR_ERR(nbc->md_file)); 934 PTR_ERR(bdev));
930 nbc->md_file = NULL;
931 retcode = ERR_OPEN_MD_DISK; 935 retcode = ERR_OPEN_MD_DISK;
932 goto fail; 936 goto fail;
933 } 937 }
938 nbc->md_bdev = bdev;
934 939
935 inode2 = nbc->md_file->f_dentry->d_inode; 940 if ((nbc->backing_bdev == nbc->md_bdev) !=
936 941 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
937 if (!S_ISBLK(inode2->i_mode)) { 942 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
938 retcode = ERR_MD_NOT_BDEV; 943 retcode = ERR_MD_IDX_INVALID;
939 goto fail;
940 }
941
942 nbc->backing_bdev = inode->i_bdev;
943 if (bd_claim(nbc->backing_bdev, mdev)) {
944 printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
945 nbc->backing_bdev, mdev,
946 nbc->backing_bdev->bd_holder,
947 nbc->backing_bdev->bd_contains->bd_holder,
948 nbc->backing_bdev->bd_holders);
949 retcode = ERR_BDCLAIM_DISK;
950 goto fail; 944 goto fail;
951 } 945 }
952 946
@@ -955,28 +949,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
955 offsetof(struct bm_extent, lce)); 949 offsetof(struct bm_extent, lce));
956 if (!resync_lru) { 950 if (!resync_lru) {
957 retcode = ERR_NOMEM; 951 retcode = ERR_NOMEM;
958 goto release_bdev_fail; 952 goto fail;
959 }
960
961 /* meta_dev_idx >= 0: external fixed size,
962 * possibly multiple drbd sharing one meta device.
963 * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
964 * not yet used by some other drbd minor!
965 * (if you use drbd.conf + drbdadm,
966 * that should check it for you already; but if you don't, or someone
967 * fooled it, we need to double check here) */
968 nbc->md_bdev = inode2->i_bdev;
969 if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
970 : (void *) drbd_m_holder)) {
971 retcode = ERR_BDCLAIM_MD_DISK;
972 goto release_bdev_fail;
973 }
974
975 if ((nbc->backing_bdev == nbc->md_bdev) !=
976 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
977 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
978 retcode = ERR_MD_IDX_INVALID;
979 goto release_bdev2_fail;
980 } 953 }
981 954
982 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ 955 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
@@ -987,7 +960,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
987 (unsigned long long) drbd_get_max_capacity(nbc), 960 (unsigned long long) drbd_get_max_capacity(nbc),
988 (unsigned long long) nbc->dc.disk_size); 961 (unsigned long long) nbc->dc.disk_size);
989 retcode = ERR_DISK_TO_SMALL; 962 retcode = ERR_DISK_TO_SMALL;
990 goto release_bdev2_fail; 963 goto fail;
991 } 964 }
992 965
993 if (nbc->dc.meta_dev_idx < 0) { 966 if (nbc->dc.meta_dev_idx < 0) {
@@ -1004,7 +977,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1004 dev_warn(DEV, "refusing attach: md-device too small, " 977 dev_warn(DEV, "refusing attach: md-device too small, "
1005 "at least %llu sectors needed for this meta-disk type\n", 978 "at least %llu sectors needed for this meta-disk type\n",
1006 (unsigned long long) min_md_device_sectors); 979 (unsigned long long) min_md_device_sectors);
1007 goto release_bdev2_fail; 980 goto fail;
1008 } 981 }
1009 982
1010 /* Make sure the new disk is big enough 983 /* Make sure the new disk is big enough
@@ -1012,7 +985,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1012 if (drbd_get_max_capacity(nbc) < 985 if (drbd_get_max_capacity(nbc) <
1013 drbd_get_capacity(mdev->this_bdev)) { 986 drbd_get_capacity(mdev->this_bdev)) {
1014 retcode = ERR_DISK_TO_SMALL; 987 retcode = ERR_DISK_TO_SMALL;
1015 goto release_bdev2_fail; 988 goto fail;
1016 } 989 }
1017 990
1018 nbc->known_size = drbd_get_capacity(nbc->backing_bdev); 991 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
@@ -1035,7 +1008,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1035 retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); 1008 retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1036 drbd_resume_io(mdev); 1009 drbd_resume_io(mdev);
1037 if (retcode < SS_SUCCESS) 1010 if (retcode < SS_SUCCESS)
1038 goto release_bdev2_fail; 1011 goto fail;
1039 1012
1040 if (!get_ldev_if_state(mdev, D_ATTACHING)) 1013 if (!get_ldev_if_state(mdev, D_ATTACHING))
1041 goto force_diskless; 1014 goto force_diskless;
@@ -1269,18 +1242,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1269 force_diskless: 1242 force_diskless:
1270 drbd_force_state(mdev, NS(disk, D_FAILED)); 1243 drbd_force_state(mdev, NS(disk, D_FAILED));
1271 drbd_md_sync(mdev); 1244 drbd_md_sync(mdev);
1272 release_bdev2_fail:
1273 if (nbc)
1274 bd_release(nbc->md_bdev);
1275 release_bdev_fail:
1276 if (nbc)
1277 bd_release(nbc->backing_bdev);
1278 fail: 1245 fail:
1279 if (nbc) { 1246 if (nbc) {
1280 if (nbc->lo_file) 1247 if (nbc->backing_bdev)
1281 fput(nbc->lo_file); 1248 blkdev_put(nbc->backing_bdev,
1282 if (nbc->md_file) 1249 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1283 fput(nbc->md_file); 1250 if (nbc->md_bdev)
1251 blkdev_put(nbc->md_bdev,
1252 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1284 kfree(nbc); 1253 kfree(nbc);
1285 } 1254 }
1286 lc_destroy(resync_lru); 1255 lc_destroy(resync_lru);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 7ea0bea2f7e3..44e18c073c44 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -395,11 +395,7 @@ lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
395 struct loop_device *lo = p->lo; 395 struct loop_device *lo = p->lo;
396 struct page *page = buf->page; 396 struct page *page = buf->page;
397 sector_t IV; 397 sector_t IV;
398 int size, ret; 398 int size;
399
400 ret = buf->ops->confirm(pipe, buf);
401 if (unlikely(ret))
402 return ret;
403 399
404 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + 400 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
405 (buf->offset >> 9); 401 (buf->offset >> 9);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 19b3568e9326..77d70eebb6b2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2296,15 +2296,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2296 * so bdget() can't fail. 2296 * so bdget() can't fail.
2297 */ 2297 */
2298 bdget(pd->bdev->bd_dev); 2298 bdget(pd->bdev->bd_dev);
2299 if ((ret = blkdev_get(pd->bdev, FMODE_READ))) 2299 if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd)))
2300 goto out; 2300 goto out;
2301 2301
2302 if ((ret = bd_claim(pd->bdev, pd)))
2303 goto out_putdev;
2304
2305 if ((ret = pkt_get_last_written(pd, &lba))) { 2302 if ((ret = pkt_get_last_written(pd, &lba))) {
2306 printk(DRIVER_NAME": pkt_get_last_written failed\n"); 2303 printk(DRIVER_NAME": pkt_get_last_written failed\n");
2307 goto out_unclaim; 2304 goto out_putdev;
2308 } 2305 }
2309 2306
2310 set_capacity(pd->disk, lba << 2); 2307 set_capacity(pd->disk, lba << 2);
@@ -2314,7 +2311,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2314 q = bdev_get_queue(pd->bdev); 2311 q = bdev_get_queue(pd->bdev);
2315 if (write) { 2312 if (write) {
2316 if ((ret = pkt_open_write(pd))) 2313 if ((ret = pkt_open_write(pd)))
2317 goto out_unclaim; 2314 goto out_putdev;
2318 /* 2315 /*
2319 * Some CDRW drives can not handle writes larger than one packet, 2316 * Some CDRW drives can not handle writes larger than one packet,
2320 * even if the size is a multiple of the packet size. 2317 * even if the size is a multiple of the packet size.
@@ -2329,23 +2326,21 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2329 } 2326 }
2330 2327
2331 if ((ret = pkt_set_segment_merging(pd, q))) 2328 if ((ret = pkt_set_segment_merging(pd, q)))
2332 goto out_unclaim; 2329 goto out_putdev;
2333 2330
2334 if (write) { 2331 if (write) {
2335 if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { 2332 if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
2336 printk(DRIVER_NAME": not enough memory for buffers\n"); 2333 printk(DRIVER_NAME": not enough memory for buffers\n");
2337 ret = -ENOMEM; 2334 ret = -ENOMEM;
2338 goto out_unclaim; 2335 goto out_putdev;
2339 } 2336 }
2340 printk(DRIVER_NAME": %lukB available on disc\n", lba << 1); 2337 printk(DRIVER_NAME": %lukB available on disc\n", lba << 1);
2341 } 2338 }
2342 2339
2343 return 0; 2340 return 0;
2344 2341
2345out_unclaim:
2346 bd_release(pd->bdev);
2347out_putdev: 2342out_putdev:
2348 blkdev_put(pd->bdev, FMODE_READ); 2343 blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
2349out: 2344out:
2350 return ret; 2345 return ret;
2351} 2346}
@@ -2362,8 +2357,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
2362 pkt_lock_door(pd, 0); 2357 pkt_lock_door(pd, 0);
2363 2358
2364 pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); 2359 pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
2365 bd_release(pd->bdev); 2360 blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
2366 blkdev_put(pd->bdev, FMODE_READ);
2367 2361
2368 pkt_shrink_pktlist(pd); 2362 pkt_shrink_pktlist(pd);
2369} 2363}
@@ -2733,7 +2727,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
2733 bdev = bdget(dev); 2727 bdev = bdget(dev);
2734 if (!bdev) 2728 if (!bdev)
2735 return -ENOMEM; 2729 return -ENOMEM;
2736 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); 2730 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
2737 if (ret) 2731 if (ret)
2738 return ret; 2732 return ret;
2739 2733
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index af13c62dc473..14033a36bcd0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1348,7 +1348,10 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
1348 if (!CDROM_CAN(CDC_SELECT_DISC)) 1348 if (!CDROM_CAN(CDC_SELECT_DISC))
1349 return -EDRIVE_CANT_DO_THIS; 1349 return -EDRIVE_CANT_DO_THIS;
1350 1350
1351 (void) cdi->ops->media_changed(cdi, slot); 1351 if (cdi->ops->check_events)
1352 cdi->ops->check_events(cdi, 0, slot);
1353 else
1354 cdi->ops->media_changed(cdi, slot);
1352 1355
1353 if (slot == CDSL_NONE) { 1356 if (slot == CDSL_NONE) {
1354 /* set media changed bits, on both queues */ 1357 /* set media changed bits, on both queues */
@@ -1392,6 +1395,42 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
1392 return slot; 1395 return slot;
1393} 1396}
1394 1397
1398/*
1399 * As cdrom implements an extra ioctl consumer for media changed
1400 * event, it needs to buffer ->check_events() output, such that event
1401 * is not lost for both the usual VFS and ioctl paths.
1402 * cdi->{vfs|ioctl}_events are used to buffer pending events for each
1403 * path.
1404 *
1405 * XXX: Locking is non-existent. cdi->ops->check_events() can be
1406 * called in parallel and buffering fields are accessed without any
1407 * exclusion. The original media_changed code had the same problem.
1408 * It might be better to simply deprecate CDROM_MEDIA_CHANGED ioctl
1409 * and remove this cruft altogether. It doesn't have much usefulness
1410 * at this point.
1411 */
1412static void cdrom_update_events(struct cdrom_device_info *cdi,
1413 unsigned int clearing)
1414{
1415 unsigned int events;
1416
1417 events = cdi->ops->check_events(cdi, clearing, CDSL_CURRENT);
1418 cdi->vfs_events |= events;
1419 cdi->ioctl_events |= events;
1420}
1421
1422unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
1423 unsigned int clearing)
1424{
1425 unsigned int events;
1426
1427 cdrom_update_events(cdi, clearing);
1428 events = cdi->vfs_events;
1429 cdi->vfs_events = 0;
1430 return events;
1431}
1432EXPORT_SYMBOL(cdrom_check_events);
1433
1395/* We want to make media_changed accessible to the user through an 1434/* We want to make media_changed accessible to the user through an
1396 * ioctl. The main problem now is that we must double-buffer the 1435 * ioctl. The main problem now is that we must double-buffer the
1397 * low-level implementation, to assure that the VFS and the user both 1436 * low-level implementation, to assure that the VFS and the user both
@@ -1403,15 +1442,26 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
1403{ 1442{
1404 unsigned int mask = (1 << (queue & 1)); 1443 unsigned int mask = (1 << (queue & 1));
1405 int ret = !!(cdi->mc_flags & mask); 1444 int ret = !!(cdi->mc_flags & mask);
1445 bool changed;
1406 1446
1407 if (!CDROM_CAN(CDC_MEDIA_CHANGED)) 1447 if (!CDROM_CAN(CDC_MEDIA_CHANGED))
1408 return ret; 1448 return ret;
1449
1409 /* changed since last call? */ 1450 /* changed since last call? */
1410 if (cdi->ops->media_changed(cdi, CDSL_CURRENT)) { 1451 if (cdi->ops->check_events) {
1452 BUG_ON(!queue); /* shouldn't be called from VFS path */
1453 cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE);
1454 changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE;
1455 cdi->ioctl_events = 0;
1456 } else
1457 changed = cdi->ops->media_changed(cdi, CDSL_CURRENT);
1458
1459 if (changed) {
1411 cdi->mc_flags = 0x3; /* set bit on both queues */ 1460 cdi->mc_flags = 0x3; /* set bit on both queues */
1412 ret |= 1; 1461 ret |= 1;
1413 cdi->media_written = 0; 1462 cdi->media_written = 0;
1414 } 1463 }
1464
1415 cdi->mc_flags &= ~mask; /* clear bit */ 1465 cdi->mc_flags &= ~mask; /* clear bit */
1416 return ret; 1466 return ret;
1417} 1467}
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index bfe25ea9766b..b4b9d5a47885 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -65,15 +65,12 @@ static int raw_open(struct inode *inode, struct file *filp)
65 if (!bdev) 65 if (!bdev)
66 goto out; 66 goto out;
67 igrab(bdev->bd_inode); 67 igrab(bdev->bd_inode);
68 err = blkdev_get(bdev, filp->f_mode); 68 err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open);
69 if (err) 69 if (err)
70 goto out; 70 goto out;
71 err = bd_claim(bdev, raw_open);
72 if (err)
73 goto out1;
74 err = set_blocksize(bdev, bdev_logical_block_size(bdev)); 71 err = set_blocksize(bdev, bdev_logical_block_size(bdev));
75 if (err) 72 if (err)
76 goto out2; 73 goto out1;
77 filp->f_flags |= O_DIRECT; 74 filp->f_flags |= O_DIRECT;
78 filp->f_mapping = bdev->bd_inode->i_mapping; 75 filp->f_mapping = bdev->bd_inode->i_mapping;
79 if (++raw_devices[minor].inuse == 1) 76 if (++raw_devices[minor].inuse == 1)
@@ -83,10 +80,8 @@ static int raw_open(struct inode *inode, struct file *filp)
83 mutex_unlock(&raw_mutex); 80 mutex_unlock(&raw_mutex);
84 return 0; 81 return 0;
85 82
86out2:
87 bd_release(bdev);
88out1: 83out1:
89 blkdev_put(bdev, filp->f_mode); 84 blkdev_put(bdev, filp->f_mode | FMODE_EXCL);
90out: 85out:
91 mutex_unlock(&raw_mutex); 86 mutex_unlock(&raw_mutex);
92 return err; 87 return err;
@@ -110,8 +105,7 @@ static int raw_release(struct inode *inode, struct file *filp)
110 } 105 }
111 mutex_unlock(&raw_mutex); 106 mutex_unlock(&raw_mutex);
112 107
113 bd_release(bdev); 108 blkdev_put(bdev, filp->f_mode | FMODE_EXCL);
114 blkdev_put(bdev, filp->f_mode);
115 return 0; 109 return 0;
116} 110}
117 111
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4d705cea0f8c..985c20a4f30e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -325,15 +325,18 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev,
325 325
326 BUG_ON(d->dm_dev.bdev); 326 BUG_ON(d->dm_dev.bdev);
327 327
328 bdev = open_by_devnum(dev, d->dm_dev.mode); 328 bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
329 if (IS_ERR(bdev)) 329 if (IS_ERR(bdev))
330 return PTR_ERR(bdev); 330 return PTR_ERR(bdev);
331 r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); 331
332 if (r) 332 r = bd_link_disk_holder(bdev, dm_disk(md));
333 blkdev_put(bdev, d->dm_dev.mode); 333 if (r) {
334 else 334 blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
335 d->dm_dev.bdev = bdev; 335 return r;
336 return r; 336 }
337
338 d->dm_dev.bdev = bdev;
339 return 0;
337} 340}
338 341
339/* 342/*
@@ -344,8 +347,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
344 if (!d->dm_dev.bdev) 347 if (!d->dm_dev.bdev)
345 return; 348 return;
346 349
347 bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); 350 blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
348 blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
349 d->dm_dev.bdev = NULL; 351 d->dm_dev.bdev = NULL;
350} 352}
351 353
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7cb1352f7e7a..f48a2f359ac4 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -630,7 +630,7 @@ static void dec_pending(struct dm_io *io, int error)
630 queue_io(md, bio); 630 queue_io(md, bio);
631 } else { 631 } else {
632 /* done with normal IO or empty flush */ 632 /* done with normal IO or empty flush */
633 trace_block_bio_complete(md->queue, bio); 633 trace_block_bio_complete(md->queue, bio, io_error);
634 bio_endio(bio, io_error); 634 bio_endio(bio, io_error);
635 } 635 }
636 } 636 }
@@ -990,8 +990,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
990 if (r == DM_MAPIO_REMAPPED) { 990 if (r == DM_MAPIO_REMAPPED) {
991 /* the bio has been remapped so dispatch it */ 991 /* the bio has been remapped so dispatch it */
992 992
993 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, 993 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
994 tio->io->bio->bi_bdev->bd_dev, sector); 994 tio->io->bio->bi_bdev->bd_dev, sector);
995 995
996 generic_make_request(clone); 996 generic_make_request(clone);
997 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 997 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 175c424f201f..7fc090ac9e28 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1879,7 +1879,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1879 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); 1879 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
1880 1880
1881 list_add_rcu(&rdev->same_set, &mddev->disks); 1881 list_add_rcu(&rdev->same_set, &mddev->disks);
1882 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); 1882 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
1883 1883
1884 /* May as well allow recovery to be retried once */ 1884 /* May as well allow recovery to be retried once */
1885 mddev->recovery_disabled = 0; 1885 mddev->recovery_disabled = 0;
@@ -1906,7 +1906,6 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1906 MD_BUG(); 1906 MD_BUG();
1907 return; 1907 return;
1908 } 1908 }
1909 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1910 list_del_rcu(&rdev->same_set); 1909 list_del_rcu(&rdev->same_set);
1911 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); 1910 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1912 rdev->mddev = NULL; 1911 rdev->mddev = NULL;
@@ -1934,19 +1933,13 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1934 struct block_device *bdev; 1933 struct block_device *bdev;
1935 char b[BDEVNAME_SIZE]; 1934 char b[BDEVNAME_SIZE];
1936 1935
1937 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 1936 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1937 shared ? (mdk_rdev_t *)lock_rdev : rdev);
1938 if (IS_ERR(bdev)) { 1938 if (IS_ERR(bdev)) {
1939 printk(KERN_ERR "md: could not open %s.\n", 1939 printk(KERN_ERR "md: could not open %s.\n",
1940 __bdevname(dev, b)); 1940 __bdevname(dev, b));
1941 return PTR_ERR(bdev); 1941 return PTR_ERR(bdev);
1942 } 1942 }
1943 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1944 if (err) {
1945 printk(KERN_ERR "md: could not bd_claim %s.\n",
1946 bdevname(bdev, b));
1947 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1948 return err;
1949 }
1950 if (!shared) 1943 if (!shared)
1951 set_bit(AllReserved, &rdev->flags); 1944 set_bit(AllReserved, &rdev->flags);
1952 rdev->bdev = bdev; 1945 rdev->bdev = bdev;
@@ -1959,8 +1952,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
1959 rdev->bdev = NULL; 1952 rdev->bdev = NULL;
1960 if (!bdev) 1953 if (!bdev)
1961 MD_BUG(); 1954 MD_BUG();
1962 bd_release(bdev); 1955 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1963 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1964} 1956}
1965 1957
1966void md_autodetect_dev(dev_t dev); 1958void md_autodetect_dev(dev_t dev);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 2cf0cc6a4189..f29a6f9df6e7 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -224,7 +224,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
224 if (dev->blkdev) { 224 if (dev->blkdev) {
225 invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 225 invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping,
226 0, -1); 226 0, -1);
227 close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE); 227 blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
228 } 228 }
229 229
230 kfree(dev); 230 kfree(dev);
@@ -234,6 +234,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
234/* FIXME: ensure that mtd->size % erase_size == 0 */ 234/* FIXME: ensure that mtd->size % erase_size == 0 */
235static struct block2mtd_dev *add_device(char *devname, int erase_size) 235static struct block2mtd_dev *add_device(char *devname, int erase_size)
236{ 236{
237 const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
237 struct block_device *bdev; 238 struct block_device *bdev;
238 struct block2mtd_dev *dev; 239 struct block2mtd_dev *dev;
239 char *name; 240 char *name;
@@ -246,7 +247,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
246 return NULL; 247 return NULL;
247 248
248 /* Get a handle on the device */ 249 /* Get a handle on the device */
249 bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL); 250 bdev = blkdev_get_by_path(devname, mode, dev);
250#ifndef MODULE 251#ifndef MODULE
251 if (IS_ERR(bdev)) { 252 if (IS_ERR(bdev)) {
252 253
@@ -254,9 +255,8 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
254 to resolve the device name by other means. */ 255 to resolve the device name by other means. */
255 256
256 dev_t devt = name_to_dev_t(devname); 257 dev_t devt = name_to_dev_t(devname);
257 if (devt) { 258 if (devt)
258 bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ); 259 bdev = blkdev_get_by_dev(devt, mode, dev);
259 }
260 } 260 }
261#endif 261#endif
262 262
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 30a1ca3d08b7..5505bc07e1e7 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -103,7 +103,7 @@ int dasd_scan_partitions(struct dasd_block *block)
103 struct block_device *bdev; 103 struct block_device *bdev;
104 104
105 bdev = bdget_disk(block->gdp, 0); 105 bdev = bdget_disk(block->gdp, 0);
106 if (!bdev || blkdev_get(bdev, FMODE_READ) < 0) 106 if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0)
107 return -ENODEV; 107 return -ENODEV;
108 /* 108 /*
109 * See fs/partition/check.c:register_disk,rescan_partitions 109 * See fs/partition/check.c:register_disk,rescan_partitions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 501f67bef719..9045c52abd25 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1977,8 +1977,7 @@ EXPORT_SYMBOL(scsi_mode_sense);
1977 * in. 1977 * in.
1978 * 1978 *
1979 * Returns zero if unsuccessful or an error if TUR failed. For 1979 * Returns zero if unsuccessful or an error if TUR failed. For
1980 * removable media, a return of NOT_READY or UNIT_ATTENTION is 1980 * removable media, UNIT_ATTENTION sets ->changed flag.
1981 * translated to success, with the ->changed flag updated.
1982 **/ 1981 **/
1983int 1982int
1984scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, 1983scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
@@ -2005,16 +2004,6 @@ scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
2005 } while (scsi_sense_valid(sshdr) && 2004 } while (scsi_sense_valid(sshdr) &&
2006 sshdr->sense_key == UNIT_ATTENTION && --retries); 2005 sshdr->sense_key == UNIT_ATTENTION && --retries);
2007 2006
2008 if (!sshdr)
2009 /* could not allocate sense buffer, so can't process it */
2010 return result;
2011
2012 if (sdev->removable && scsi_sense_valid(sshdr) &&
2013 (sshdr->sense_key == UNIT_ATTENTION ||
2014 sshdr->sense_key == NOT_READY)) {
2015 sdev->changed = 1;
2016 result = 0;
2017 }
2018 if (!sshdr_external) 2007 if (!sshdr_external)
2019 kfree(sshdr); 2008 kfree(sshdr);
2020 return result; 2009 return result;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 365024b0c407..b65e65aa07eb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1043,15 +1043,7 @@ static int sd_media_changed(struct gendisk *disk)
1043 sshdr); 1043 sshdr);
1044 } 1044 }
1045 1045
1046 /* 1046 if (retval) {
1047 * Unable to test, unit probably not ready. This usually
1048 * means there is no disc in the drive. Mark as changed,
1049 * and we will figure it out later once the drive is
1050 * available again.
1051 */
1052 if (retval || (scsi_sense_valid(sshdr) &&
1053 /* 0x3a is medium not present */
1054 sshdr->asc == 0x3a)) {
1055 set_media_not_present(sdkp); 1047 set_media_not_present(sdkp);
1056 goto out; 1048 goto out;
1057 } 1049 }
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index d7b383c96d5d..be6baf8ad704 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -104,14 +104,15 @@ static void sr_release(struct cdrom_device_info *);
104static void get_sectorsize(struct scsi_cd *); 104static void get_sectorsize(struct scsi_cd *);
105static void get_capabilities(struct scsi_cd *); 105static void get_capabilities(struct scsi_cd *);
106 106
107static int sr_media_change(struct cdrom_device_info *, int); 107static unsigned int sr_check_events(struct cdrom_device_info *cdi,
108 unsigned int clearing, int slot);
108static int sr_packet(struct cdrom_device_info *, struct packet_command *); 109static int sr_packet(struct cdrom_device_info *, struct packet_command *);
109 110
110static struct cdrom_device_ops sr_dops = { 111static struct cdrom_device_ops sr_dops = {
111 .open = sr_open, 112 .open = sr_open,
112 .release = sr_release, 113 .release = sr_release,
113 .drive_status = sr_drive_status, 114 .drive_status = sr_drive_status,
114 .media_changed = sr_media_change, 115 .check_events = sr_check_events,
115 .tray_move = sr_tray_move, 116 .tray_move = sr_tray_move,
116 .lock_door = sr_lock_door, 117 .lock_door = sr_lock_door,
117 .select_speed = sr_select_speed, 118 .select_speed = sr_select_speed,
@@ -165,90 +166,96 @@ static void scsi_cd_put(struct scsi_cd *cd)
165 mutex_unlock(&sr_ref_mutex); 166 mutex_unlock(&sr_ref_mutex);
166} 167}
167 168
168/* identical to scsi_test_unit_ready except that it doesn't 169static unsigned int sr_get_events(struct scsi_device *sdev)
169 * eat the NOT_READY returns for removable media */
170int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr)
171{ 170{
172 int retries = MAX_RETRIES; 171 u8 buf[8];
173 int the_result; 172 u8 cmd[] = { GET_EVENT_STATUS_NOTIFICATION,
174 u8 cmd[] = {TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 173 1, /* polled */
174 0, 0, /* reserved */
175 1 << 4, /* notification class: media */
176 0, 0, /* reserved */
177 0, sizeof(buf), /* allocation length */
178 0, /* control */
179 };
180 struct event_header *eh = (void *)buf;
181 struct media_event_desc *med = (void *)(buf + 4);
182 struct scsi_sense_hdr sshdr;
183 int result;
175 184
176 /* issue TEST_UNIT_READY until the initial startup UNIT_ATTENTION 185 result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, sizeof(buf),
177 * conditions are gone, or a timeout happens 186 &sshdr, SR_TIMEOUT, MAX_RETRIES, NULL);
178 */ 187 if (scsi_sense_valid(&sshdr) && sshdr.sense_key == UNIT_ATTENTION)
179 do { 188 return DISK_EVENT_MEDIA_CHANGE;
180 the_result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 189
181 0, sshdr, SR_TIMEOUT, 190 if (result || be16_to_cpu(eh->data_len) < sizeof(*med))
182 retries--, NULL); 191 return 0;
183 if (scsi_sense_valid(sshdr) && 192
184 sshdr->sense_key == UNIT_ATTENTION) 193 if (eh->nea || eh->notification_class != 0x4)
185 sdev->changed = 1; 194 return 0;
186 195
187 } while (retries > 0 && 196 if (med->media_event_code == 1)
188 (!scsi_status_is_good(the_result) || 197 return DISK_EVENT_EJECT_REQUEST;
189 (scsi_sense_valid(sshdr) && 198 else if (med->media_event_code == 2)
190 sshdr->sense_key == UNIT_ATTENTION))); 199 return DISK_EVENT_MEDIA_CHANGE;
191 return the_result; 200 return 0;
192} 201}
193 202
194/* 203/*
195 * This function checks to see if the media has been changed in the 204 * This function checks to see if the media has been changed or eject
196 * CDROM drive. It is possible that we have already sensed a change, 205 * button has been pressed. It is possible that we have already
197 * or the drive may have sensed one and not yet reported it. We must 206 * sensed a change, or the drive may have sensed one and not yet
198 * be ready for either case. This function always reports the current 207 * reported it. The past events are accumulated in sdev->changed and
199 * value of the changed bit. If flag is 0, then the changed bit is reset. 208 * returned together with the current state.
200 * This function could be done as an ioctl, but we would need to have
201 * an inode for that to work, and we do not always have one.
202 */ 209 */
203 210static unsigned int sr_check_events(struct cdrom_device_info *cdi,
204static int sr_media_change(struct cdrom_device_info *cdi, int slot) 211 unsigned int clearing, int slot)
205{ 212{
206 struct scsi_cd *cd = cdi->handle; 213 struct scsi_cd *cd = cdi->handle;
207 int retval; 214 bool last_present;
208 struct scsi_sense_hdr *sshdr; 215 struct scsi_sense_hdr sshdr;
216 unsigned int events;
217 int ret;
209 218
210 if (CDSL_CURRENT != slot) { 219 /* no changer support */
211 /* no changer support */ 220 if (CDSL_CURRENT != slot)
212 return -EINVAL; 221 return 0;
213 }
214 222
215 sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL); 223 events = sr_get_events(cd->device);
216 retval = sr_test_unit_ready(cd->device, sshdr); 224 /*
217 if (retval || (scsi_sense_valid(sshdr) && 225 * GET_EVENT_STATUS_NOTIFICATION is enough unless MEDIA_CHANGE
218 /* 0x3a is medium not present */ 226 * is being cleared. Note that there are devices which hang
219 sshdr->asc == 0x3a)) { 227 * if asked to execute TUR repeatedly.
220 /* Media not present or unable to test, unit probably not 228 */
221 * ready. This usually means there is no disc in the drive. 229 if (!(clearing & DISK_EVENT_MEDIA_CHANGE))
222 * Mark as changed, and we will figure it out later once 230 goto skip_tur;
223 * the drive is available again. 231
224 */ 232 /* let's see whether the media is there with TUR */
225 cd->device->changed = 1; 233 last_present = cd->media_present;
226 /* This will force a flush, if called from check_disk_change */ 234 ret = scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
227 retval = 1; 235
228 goto out; 236 /*
229 }; 237 * Media is considered to be present if TUR succeeds or fails with
238 * sense data indicating something other than media-not-present
239 * (ASC 0x3a).
240 */
241 cd->media_present = scsi_status_is_good(ret) ||
242 (scsi_sense_valid(&sshdr) && sshdr.asc != 0x3a);
230 243
231 retval = cd->device->changed; 244 if (last_present != cd->media_present)
232 cd->device->changed = 0; 245 events |= DISK_EVENT_MEDIA_CHANGE;
233 /* If the disk changed, the capacity will now be different, 246skip_tur:
234 * so we force a re-read of this information */ 247 if (cd->device->changed) {
235 if (retval) { 248 events |= DISK_EVENT_MEDIA_CHANGE;
236 /* check multisession offset etc */ 249 cd->device->changed = 0;
237 sr_cd_check(cdi);
238 get_sectorsize(cd);
239 } 250 }
240 251
241out: 252 /* for backward compatibility */
242 /* Notify userspace, that media has changed. */ 253 if (events & DISK_EVENT_MEDIA_CHANGE)
243 if (retval != cd->previous_state)
244 sdev_evt_send_simple(cd->device, SDEV_EVT_MEDIA_CHANGE, 254 sdev_evt_send_simple(cd->device, SDEV_EVT_MEDIA_CHANGE,
245 GFP_KERNEL); 255 GFP_KERNEL);
246 cd->previous_state = retval; 256 return events;
247 kfree(sshdr);
248
249 return retval;
250} 257}
251 258
252/* 259/*
253 * sr_done is the interrupt routine for the device driver. 260 * sr_done is the interrupt routine for the device driver.
254 * 261 *
@@ -533,10 +540,25 @@ out:
533 return ret; 540 return ret;
534} 541}
535 542
536static int sr_block_media_changed(struct gendisk *disk) 543static unsigned int sr_block_check_events(struct gendisk *disk,
544 unsigned int clearing)
537{ 545{
538 struct scsi_cd *cd = scsi_cd(disk); 546 struct scsi_cd *cd = scsi_cd(disk);
539 return cdrom_media_changed(&cd->cdi); 547 return cdrom_check_events(&cd->cdi, clearing);
548}
549
550static int sr_block_revalidate_disk(struct gendisk *disk)
551{
552 struct scsi_cd *cd = scsi_cd(disk);
553 struct scsi_sense_hdr sshdr;
554
555 /* if the unit is not ready, nothing more to do */
556 if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
557 return 0;
558
559 sr_cd_check(&cd->cdi);
560 get_sectorsize(cd);
561 return 0;
540} 562}
541 563
542static const struct block_device_operations sr_bdops = 564static const struct block_device_operations sr_bdops =
@@ -545,7 +567,8 @@ static const struct block_device_operations sr_bdops =
545 .open = sr_block_open, 567 .open = sr_block_open,
546 .release = sr_block_release, 568 .release = sr_block_release,
547 .ioctl = sr_block_ioctl, 569 .ioctl = sr_block_ioctl,
548 .media_changed = sr_block_media_changed, 570 .check_events = sr_block_check_events,
571 .revalidate_disk = sr_block_revalidate_disk,
549 /* 572 /*
550 * No compat_ioctl for now because sr_block_ioctl never 573 * No compat_ioctl for now because sr_block_ioctl never
551 * seems to pass arbitary ioctls down to host drivers. 574 * seems to pass arbitary ioctls down to host drivers.
@@ -618,6 +641,7 @@ static int sr_probe(struct device *dev)
618 sprintf(disk->disk_name, "sr%d", minor); 641 sprintf(disk->disk_name, "sr%d", minor);
619 disk->fops = &sr_bdops; 642 disk->fops = &sr_bdops;
620 disk->flags = GENHD_FL_CD; 643 disk->flags = GENHD_FL_CD;
644 disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
621 645
622 blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); 646 blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
623 647
@@ -627,7 +651,7 @@ static int sr_probe(struct device *dev)
627 cd->disk = disk; 651 cd->disk = disk;
628 cd->capacity = 0x1fffff; 652 cd->capacity = 0x1fffff;
629 cd->device->changed = 1; /* force recheck CD type */ 653 cd->device->changed = 1; /* force recheck CD type */
630 cd->previous_state = 1; 654 cd->media_present = 1;
631 cd->use = 1; 655 cd->use = 1;
632 cd->readcd_known = 0; 656 cd->readcd_known = 0;
633 cd->readcd_cdda = 0; 657 cd->readcd_cdda = 0;
@@ -780,7 +804,7 @@ static void get_capabilities(struct scsi_cd *cd)
780 } 804 }
781 805
782 /* eat unit attentions */ 806 /* eat unit attentions */
783 sr_test_unit_ready(cd->device, &sshdr); 807 scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
784 808
785 /* ask for mode page 0x2a */ 809 /* ask for mode page 0x2a */
786 rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128, 810 rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128,
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 1e144dfdbd4b..e036f1dc83c8 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -40,7 +40,7 @@ typedef struct scsi_cd {
40 unsigned xa_flag:1; /* CD has XA sectors ? */ 40 unsigned xa_flag:1; /* CD has XA sectors ? */
41 unsigned readcd_known:1; /* drive supports READ_CD (0xbe) */ 41 unsigned readcd_known:1; /* drive supports READ_CD (0xbe) */
42 unsigned readcd_cdda:1; /* reading audio data using READ_CD */ 42 unsigned readcd_cdda:1; /* reading audio data using READ_CD */
43 unsigned previous_state:1; /* media has changed */ 43 unsigned media_present:1; /* media is present */
44 struct cdrom_device_info cdi; 44 struct cdrom_device_info cdi;
45 /* We hold gendisk and scsi_device references on probe and use 45 /* We hold gendisk and scsi_device references on probe and use
46 * the refs on this kref to decide when to release them */ 46 * the refs on this kref to decide when to release them */
@@ -61,7 +61,6 @@ int sr_select_speed(struct cdrom_device_info *cdi, int speed);
61int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *); 61int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *);
62 62
63int sr_is_xa(Scsi_CD *); 63int sr_is_xa(Scsi_CD *);
64int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr);
65 64
66/* sr_vendor.c */ 65/* sr_vendor.c */
67void sr_vendor_init(Scsi_CD *); 66void sr_vendor_init(Scsi_CD *);
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 3cd8ffbad577..8be30554119b 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -307,7 +307,7 @@ int sr_drive_status(struct cdrom_device_info *cdi, int slot)
307 /* we have no changer support */ 307 /* we have no changer support */
308 return -EINVAL; 308 return -EINVAL;
309 } 309 }
310 if (0 == sr_test_unit_ready(cd->device, &sshdr)) 310 if (!scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
311 return CDS_DISC_OK; 311 return CDS_DISC_OK;
312 312
313 /* SK/ASC/ASCQ of 2/4/1 means "unit is becoming ready" */ 313 /* SK/ASC/ASCQ of 2/4/1 means "unit is becoming ready" */
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 3b513bafaf2a..b015561fd602 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -543,7 +543,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
543 ro = curlun->initially_ro; 543 ro = curlun->initially_ro;
544 if (!ro) { 544 if (!ro) {
545 filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0); 545 filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0);
546 if (-EROFS == PTR_ERR(filp)) 546 if (PTR_ERR(filp) == -EROFS || PTR_ERR(filp) == -EACCES)
547 ro = 1; 547 ro = 1;
548 } 548 }
549 if (ro) 549 if (ro)
@@ -558,10 +558,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
558 558
559 if (filp->f_path.dentry) 559 if (filp->f_path.dentry)
560 inode = filp->f_path.dentry->d_inode; 560 inode = filp->f_path.dentry->d_inode;
561 if (inode && S_ISBLK(inode->i_mode)) { 561 if (!inode || (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
562 if (bdev_read_only(inode->i_bdev))
563 ro = 1;
564 } else if (!inode || !S_ISREG(inode->i_mode)) {
565 LINFO(curlun, "invalid file type: %s\n", filename); 562 LINFO(curlun, "invalid file type: %s\n", filename);
566 goto out; 563 goto out;
567 } 564 }
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 4d0ff5ee27b8..e49cce234c65 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -782,7 +782,12 @@ void __init bio_integrity_init(void)
782{ 782{
783 unsigned int i; 783 unsigned int i;
784 784
785 kintegrityd_wq = create_workqueue("kintegrityd"); 785 /*
786 * kintegrityd won't block much but may burn a lot of CPU cycles.
787 * Make it highpri CPU intensive wq with max concurrency of 1.
788 */
789 kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
790 WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
786 if (!kintegrityd_wq) 791 if (!kintegrityd_wq)
787 panic("Failed to create kintegrityd\n"); 792 panic("Failed to create kintegrityd\n");
788 793
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 88da70355aa3..fe3f59c14a02 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -432,9 +432,6 @@ static void init_once(void *foo)
432 mutex_init(&bdev->bd_mutex); 432 mutex_init(&bdev->bd_mutex);
433 INIT_LIST_HEAD(&bdev->bd_inodes); 433 INIT_LIST_HEAD(&bdev->bd_inodes);
434 INIT_LIST_HEAD(&bdev->bd_list); 434 INIT_LIST_HEAD(&bdev->bd_list);
435#ifdef CONFIG_SYSFS
436 INIT_LIST_HEAD(&bdev->bd_holder_list);
437#endif
438 inode_init_once(&ei->vfs_inode); 435 inode_init_once(&ei->vfs_inode);
439 /* Initialize mutex for freeze. */ 436 /* Initialize mutex for freeze. */
440 mutex_init(&bdev->bd_fsfreeze_mutex); 437 mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -669,7 +666,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
669 else if (bdev->bd_contains == bdev) 666 else if (bdev->bd_contains == bdev)
670 return true; /* is a whole device which isn't held */ 667 return true; /* is a whole device which isn't held */
671 668
672 else if (whole->bd_holder == bd_claim) 669 else if (whole->bd_holder == bd_may_claim)
673 return true; /* is a partition of a device that is being partitioned */ 670 return true; /* is a partition of a device that is being partitioned */
674 else if (whole->bd_holder != NULL) 671 else if (whole->bd_holder != NULL)
675 return false; /* is a partition of a held device */ 672 return false; /* is a partition of a held device */
@@ -781,439 +778,87 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
781 } 778 }
782} 779}
783 780
784/* releases bdev_lock */
785static void __bd_abort_claiming(struct block_device *whole, void *holder)
786{
787 BUG_ON(whole->bd_claiming != holder);
788 whole->bd_claiming = NULL;
789 wake_up_bit(&whole->bd_claiming, 0);
790
791 spin_unlock(&bdev_lock);
792 bdput(whole);
793}
794
795/**
796 * bd_abort_claiming - abort claiming a block device
797 * @whole: whole block device returned by bd_start_claiming()
798 * @holder: holder trying to claim @bdev
799 *
800 * Abort a claiming block started by bd_start_claiming(). Note that
801 * @whole is not the block device to be claimed but the whole device
802 * returned by bd_start_claiming().
803 *
804 * CONTEXT:
805 * Grabs and releases bdev_lock.
806 */
807static void bd_abort_claiming(struct block_device *whole, void *holder)
808{
809 spin_lock(&bdev_lock);
810 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
811}
812
813/* increment holders when we have a legitimate claim. requires bdev_lock */
814static void __bd_claim(struct block_device *bdev, struct block_device *whole,
815 void *holder)
816{
817 /* note that for a whole device bd_holders
818 * will be incremented twice, and bd_holder will
819 * be set to bd_claim before being set to holder
820 */
821 whole->bd_holders++;
822 whole->bd_holder = bd_claim;
823 bdev->bd_holders++;
824 bdev->bd_holder = holder;
825}
826
827/**
828 * bd_finish_claiming - finish claiming a block device
829 * @bdev: block device of interest (passed to bd_start_claiming())
830 * @whole: whole block device returned by bd_start_claiming()
831 * @holder: holder trying to claim @bdev
832 *
833 * Finish a claiming block started by bd_start_claiming().
834 *
835 * CONTEXT:
836 * Grabs and releases bdev_lock.
837 */
838static void bd_finish_claiming(struct block_device *bdev,
839 struct block_device *whole, void *holder)
840{
841 spin_lock(&bdev_lock);
842 BUG_ON(!bd_may_claim(bdev, whole, holder));
843 __bd_claim(bdev, whole, holder);
844 __bd_abort_claiming(whole, holder); /* not actually an abort */
845}
846
847/**
848 * bd_claim - claim a block device
849 * @bdev: block device to claim
850 * @holder: holder trying to claim @bdev
851 *
852 * Try to claim @bdev which must have been opened successfully.
853 *
854 * CONTEXT:
855 * Might sleep.
856 *
857 * RETURNS:
858 * 0 if successful, -EBUSY if @bdev is already claimed.
859 */
860int bd_claim(struct block_device *bdev, void *holder)
861{
862 struct block_device *whole = bdev->bd_contains;
863 int res;
864
865 might_sleep();
866
867 spin_lock(&bdev_lock);
868 res = bd_prepare_to_claim(bdev, whole, holder);
869 if (res == 0)
870 __bd_claim(bdev, whole, holder);
871 spin_unlock(&bdev_lock);
872
873 return res;
874}
875EXPORT_SYMBOL(bd_claim);
876
877void bd_release(struct block_device *bdev)
878{
879 spin_lock(&bdev_lock);
880 if (!--bdev->bd_contains->bd_holders)
881 bdev->bd_contains->bd_holder = NULL;
882 if (!--bdev->bd_holders)
883 bdev->bd_holder = NULL;
884 spin_unlock(&bdev_lock);
885}
886
887EXPORT_SYMBOL(bd_release);
888
889#ifdef CONFIG_SYSFS 781#ifdef CONFIG_SYSFS
890/*
891 * Functions for bd_claim_by_kobject / bd_release_from_kobject
892 *
893 * If a kobject is passed to bd_claim_by_kobject()
894 * and the kobject has a parent directory,
895 * following symlinks are created:
896 * o from the kobject to the claimed bdev
897 * o from "holders" directory of the bdev to the parent of the kobject
898 * bd_release_from_kobject() removes these symlinks.
899 *
900 * Example:
901 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to
902 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
903 * /sys/block/dm-0/slaves/sda --> /sys/block/sda
904 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
905 */
906
907static int add_symlink(struct kobject *from, struct kobject *to) 782static int add_symlink(struct kobject *from, struct kobject *to)
908{ 783{
909 if (!from || !to)
910 return 0;
911 return sysfs_create_link(from, to, kobject_name(to)); 784 return sysfs_create_link(from, to, kobject_name(to));
912} 785}
913 786
914static void del_symlink(struct kobject *from, struct kobject *to) 787static void del_symlink(struct kobject *from, struct kobject *to)
915{ 788{
916 if (!from || !to)
917 return;
918 sysfs_remove_link(from, kobject_name(to)); 789 sysfs_remove_link(from, kobject_name(to));
919} 790}
920 791
921/*
922 * 'struct bd_holder' contains pointers to kobjects symlinked by
923 * bd_claim_by_kobject.
924 * It's connected to bd_holder_list which is protected by bdev->bd_sem.
925 */
926struct bd_holder {
927 struct list_head list; /* chain of holders of the bdev */
928 int count; /* references from the holder */
929 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */
930 struct kobject *hdev; /* e.g. "/block/dm-0" */
931 struct kobject *hdir; /* e.g. "/block/sda/holders" */
932 struct kobject *sdev; /* e.g. "/block/sda" */
933};
934
935/*
936 * Get references of related kobjects at once.
937 * Returns 1 on success. 0 on failure.
938 *
939 * Should call bd_holder_release_dirs() after successful use.
940 */
941static int bd_holder_grab_dirs(struct block_device *bdev,
942 struct bd_holder *bo)
943{
944 if (!bdev || !bo)
945 return 0;
946
947 bo->sdir = kobject_get(bo->sdir);
948 if (!bo->sdir)
949 return 0;
950
951 bo->hdev = kobject_get(bo->sdir->parent);
952 if (!bo->hdev)
953 goto fail_put_sdir;
954
955 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
956 if (!bo->sdev)
957 goto fail_put_hdev;
958
959 bo->hdir = kobject_get(bdev->bd_part->holder_dir);
960 if (!bo->hdir)
961 goto fail_put_sdev;
962
963 return 1;
964
965fail_put_sdev:
966 kobject_put(bo->sdev);
967fail_put_hdev:
968 kobject_put(bo->hdev);
969fail_put_sdir:
970 kobject_put(bo->sdir);
971
972 return 0;
973}
974
975/* Put references of related kobjects at once. */
976static void bd_holder_release_dirs(struct bd_holder *bo)
977{
978 kobject_put(bo->hdir);
979 kobject_put(bo->sdev);
980 kobject_put(bo->hdev);
981 kobject_put(bo->sdir);
982}
983
984static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
985{
986 struct bd_holder *bo;
987
988 bo = kzalloc(sizeof(*bo), GFP_KERNEL);
989 if (!bo)
990 return NULL;
991
992 bo->count = 1;
993 bo->sdir = kobj;
994
995 return bo;
996}
997
998static void free_bd_holder(struct bd_holder *bo)
999{
1000 kfree(bo);
1001}
1002
1003/** 792/**
1004 * find_bd_holder - find matching struct bd_holder from the block device 793 * bd_link_disk_holder - create symlinks between holding disk and slave bdev
794 * @bdev: the claimed slave bdev
795 * @disk: the holding disk
1005 * 796 *
1006 * @bdev: struct block device to be searched 797 * This functions creates the following sysfs symlinks.
1007 * @bo: target struct bd_holder
1008 *
1009 * Returns matching entry with @bo in @bdev->bd_holder_list.
1010 * If found, increment the reference count and return the pointer.
1011 * If not found, returns NULL.
1012 */
1013static struct bd_holder *find_bd_holder(struct block_device *bdev,
1014 struct bd_holder *bo)
1015{
1016 struct bd_holder *tmp;
1017
1018 list_for_each_entry(tmp, &bdev->bd_holder_list, list)
1019 if (tmp->sdir == bo->sdir) {
1020 tmp->count++;
1021 return tmp;
1022 }
1023
1024 return NULL;
1025}
1026
1027/**
1028 * add_bd_holder - create sysfs symlinks for bd_claim() relationship
1029 *
1030 * @bdev: block device to be bd_claimed
1031 * @bo: preallocated and initialized by alloc_bd_holder()
1032 *
1033 * Add @bo to @bdev->bd_holder_list, create symlinks.
1034 *
1035 * Returns 0 if symlinks are created.
1036 * Returns -ve if something fails.
1037 */
1038static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
1039{
1040 int err;
1041
1042 if (!bo)
1043 return -EINVAL;
1044
1045 if (!bd_holder_grab_dirs(bdev, bo))
1046 return -EBUSY;
1047
1048 err = add_symlink(bo->sdir, bo->sdev);
1049 if (err)
1050 return err;
1051
1052 err = add_symlink(bo->hdir, bo->hdev);
1053 if (err) {
1054 del_symlink(bo->sdir, bo->sdev);
1055 return err;
1056 }
1057
1058 list_add_tail(&bo->list, &bdev->bd_holder_list);
1059 return 0;
1060}
1061
1062/**
1063 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
1064 * 798 *
1065 * @bdev: block device to be bd_claimed 799 * - from "slaves" directory of the holder @disk to the claimed @bdev
1066 * @kobj: holder's kobject 800 * - from "holders" directory of the @bdev to the holder @disk
1067 * 801 *
1068 * If there is matching entry with @kobj in @bdev->bd_holder_list 802 * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
1069 * and no other bd_claim() from the same kobject, 803 * passed to bd_link_disk_holder(), then:
1070 * remove the struct bd_holder from the list, delete symlinks for it.
1071 * 804 *
1072 * Returns a pointer to the struct bd_holder when it's removed from the list 805 * /sys/block/dm-0/slaves/sda --> /sys/block/sda
1073 * and ready to be freed. 806 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
1074 * Returns NULL if matching claim isn't found or there is other bd_claim()
1075 * by the same kobject.
1076 */
1077static struct bd_holder *del_bd_holder(struct block_device *bdev,
1078 struct kobject *kobj)
1079{
1080 struct bd_holder *bo;
1081
1082 list_for_each_entry(bo, &bdev->bd_holder_list, list) {
1083 if (bo->sdir == kobj) {
1084 bo->count--;
1085 BUG_ON(bo->count < 0);
1086 if (!bo->count) {
1087 list_del(&bo->list);
1088 del_symlink(bo->sdir, bo->sdev);
1089 del_symlink(bo->hdir, bo->hdev);
1090 bd_holder_release_dirs(bo);
1091 return bo;
1092 }
1093 break;
1094 }
1095 }
1096
1097 return NULL;
1098}
1099
1100/**
1101 * bd_claim_by_kobject - bd_claim() with additional kobject signature
1102 * 807 *
1103 * @bdev: block device to be claimed 808 * The caller must have claimed @bdev before calling this function and
1104 * @holder: holder's signature 809 * ensure that both @bdev and @disk are valid during the creation and
1105 * @kobj: holder's kobject 810 * lifetime of these symlinks.
1106 * 811 *
1107 * Do bd_claim() and if it succeeds, create sysfs symlinks between 812 * CONTEXT:
1108 * the bdev and the holder's kobject. 813 * Might sleep.
1109 * Use bd_release_from_kobject() when relesing the claimed bdev.
1110 * 814 *
1111 * Returns 0 on success. (same as bd_claim()) 815 * RETURNS:
1112 * Returns errno on failure. 816 * 0 on success, -errno on failure.
1113 */ 817 */
1114static int bd_claim_by_kobject(struct block_device *bdev, void *holder, 818int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1115 struct kobject *kobj)
1116{ 819{
1117 int err; 820 int ret = 0;
1118 struct bd_holder *bo, *found;
1119
1120 if (!kobj)
1121 return -EINVAL;
1122
1123 bo = alloc_bd_holder(kobj);
1124 if (!bo)
1125 return -ENOMEM;
1126 821
1127 mutex_lock(&bdev->bd_mutex); 822 mutex_lock(&bdev->bd_mutex);
1128 823
1129 err = bd_claim(bdev, holder); 824 WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
1130 if (err)
1131 goto fail;
1132 825
1133 found = find_bd_holder(bdev, bo); 826 /* FIXME: remove the following once add_disk() handles errors */
1134 if (found) 827 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1135 goto fail; 828 goto out_unlock;
1136 829
1137 err = add_bd_holder(bdev, bo); 830 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1138 if (err) 831 if (ret)
1139 bd_release(bdev); 832 goto out_unlock;
1140 else
1141 bo = NULL;
1142fail:
1143 mutex_unlock(&bdev->bd_mutex);
1144 free_bd_holder(bo);
1145 return err;
1146}
1147 833
1148/** 834 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1149 * bd_release_from_kobject - bd_release() with additional kobject signature 835 if (ret) {
1150 * 836 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1151 * @bdev: block device to be released 837 goto out_unlock;
1152 * @kobj: holder's kobject 838 }
1153 *
1154 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
1155 */
1156static void bd_release_from_kobject(struct block_device *bdev,
1157 struct kobject *kobj)
1158{
1159 if (!kobj)
1160 return;
1161 839
1162 mutex_lock(&bdev->bd_mutex); 840 bdev->bd_holder_disk = disk;
1163 bd_release(bdev); 841out_unlock:
1164 free_bd_holder(del_bd_holder(bdev, kobj));
1165 mutex_unlock(&bdev->bd_mutex); 842 mutex_unlock(&bdev->bd_mutex);
843 return ret;
1166} 844}
845EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1167 846
1168/** 847static void bd_unlink_disk_holder(struct block_device *bdev)
1169 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
1170 *
1171 * @bdev: block device to be claimed
1172 * @holder: holder's signature
1173 * @disk: holder's gendisk
1174 *
1175 * Call bd_claim_by_kobject() with getting @disk->slave_dir.
1176 */
1177int bd_claim_by_disk(struct block_device *bdev, void *holder,
1178 struct gendisk *disk)
1179{ 848{
1180 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); 849 struct gendisk *disk = bdev->bd_holder_disk;
1181}
1182EXPORT_SYMBOL_GPL(bd_claim_by_disk);
1183 850
1184/** 851 bdev->bd_holder_disk = NULL;
1185 * bd_release_from_disk - wrapper function for bd_release_from_kobject() 852 if (!disk)
1186 * 853 return;
1187 * @bdev: block device to be claimed
1188 * @disk: holder's gendisk
1189 *
1190 * Call bd_release_from_kobject() and put @disk->slave_dir.
1191 */
1192void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
1193{
1194 bd_release_from_kobject(bdev, disk->slave_dir);
1195 kobject_put(disk->slave_dir);
1196}
1197EXPORT_SYMBOL_GPL(bd_release_from_disk);
1198#endif
1199 854
1200/* 855 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1201 * Tries to open block device by device number. Use it ONLY if you 856 del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1202 * really do not have anything better - i.e. when you are behind a
1203 * truly sucky interface and all you are given is a device number. _Never_
1204 * to be used for internal purposes. If you ever need it - reconsider
1205 * your API.
1206 */
1207struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
1208{
1209 struct block_device *bdev = bdget(dev);
1210 int err = -ENOMEM;
1211 if (bdev)
1212 err = blkdev_get(bdev, mode);
1213 return err ? ERR_PTR(err) : bdev;
1214} 857}
1215 858#else
1216EXPORT_SYMBOL(open_by_devnum); 859static inline void bd_unlink_disk_holder(struct block_device *bdev)
860{ }
861#endif
1217 862
1218/** 863/**
1219 * flush_disk - invalidates all buffer-cache entries on a disk 864 * flush_disk - invalidates all buffer-cache entries on a disk
@@ -1309,10 +954,11 @@ int check_disk_change(struct block_device *bdev)
1309{ 954{
1310 struct gendisk *disk = bdev->bd_disk; 955 struct gendisk *disk = bdev->bd_disk;
1311 const struct block_device_operations *bdops = disk->fops; 956 const struct block_device_operations *bdops = disk->fops;
957 unsigned int events;
1312 958
1313 if (!bdops->media_changed) 959 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1314 return 0; 960 DISK_EVENT_EJECT_REQUEST);
1315 if (!bdops->media_changed(bdev->bd_disk)) 961 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1316 return 0; 962 return 0;
1317 963
1318 flush_disk(bdev); 964 flush_disk(bdev);
@@ -1475,17 +1121,171 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1475 return ret; 1121 return ret;
1476} 1122}
1477 1123
1478int blkdev_get(struct block_device *bdev, fmode_t mode) 1124/**
1125 * blkdev_get - open a block device
1126 * @bdev: block_device to open
1127 * @mode: FMODE_* mask
1128 * @holder: exclusive holder identifier
1129 *
1130 * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
1131 * open with exclusive access. Specifying %FMODE_EXCL with %NULL
1132 * @holder is invalid. Exclusive opens may nest for the same @holder.
1133 *
1134 * On success, the reference count of @bdev is unchanged. On failure,
1135 * @bdev is put.
1136 *
1137 * CONTEXT:
1138 * Might sleep.
1139 *
1140 * RETURNS:
1141 * 0 on success, -errno on failure.
1142 */
1143int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1479{ 1144{
1480 return __blkdev_get(bdev, mode, 0); 1145 struct block_device *whole = NULL;
1146 int res;
1147
1148 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1149
1150 if ((mode & FMODE_EXCL) && holder) {
1151 whole = bd_start_claiming(bdev, holder);
1152 if (IS_ERR(whole)) {
1153 bdput(bdev);
1154 return PTR_ERR(whole);
1155 }
1156 }
1157
1158 res = __blkdev_get(bdev, mode, 0);
1159
1160 /* __blkdev_get() may alter read only status, check it afterwards */
1161 if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1162 __blkdev_put(bdev, mode, 0);
1163 res = -EACCES;
1164 }
1165
1166 if (whole) {
1167 /* finish claiming */
1168 mutex_lock(&bdev->bd_mutex);
1169 spin_lock(&bdev_lock);
1170
1171 if (!res) {
1172 BUG_ON(!bd_may_claim(bdev, whole, holder));
1173 /*
1174 * Note that for a whole device bd_holders
1175 * will be incremented twice, and bd_holder
1176 * will be set to bd_may_claim before being
1177 * set to holder
1178 */
1179 whole->bd_holders++;
1180 whole->bd_holder = bd_may_claim;
1181 bdev->bd_holders++;
1182 bdev->bd_holder = holder;
1183 }
1184
1185 /* tell others that we're done */
1186 BUG_ON(whole->bd_claiming != holder);
1187 whole->bd_claiming = NULL;
1188 wake_up_bit(&whole->bd_claiming, 0);
1189
1190 spin_unlock(&bdev_lock);
1191
1192 /*
1193 * Block event polling for write claims. Any write
1194 * holder makes the write_holder state stick until all
1195 * are released. This is good enough and tracking
1196 * individual writeable reference is too fragile given
1197 * the way @mode is used in blkdev_get/put().
1198 */
1199 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
1200 bdev->bd_write_holder = true;
1201 disk_block_events(bdev->bd_disk);
1202 }
1203
1204 mutex_unlock(&bdev->bd_mutex);
1205 bdput(whole);
1206 }
1207
1208 return res;
1481} 1209}
1482EXPORT_SYMBOL(blkdev_get); 1210EXPORT_SYMBOL(blkdev_get);
1483 1211
1212/**
1213 * blkdev_get_by_path - open a block device by name
1214 * @path: path to the block device to open
1215 * @mode: FMODE_* mask
1216 * @holder: exclusive holder identifier
1217 *
1218 * Open the blockdevice described by the device file at @path. @mode
1219 * and @holder are identical to blkdev_get().
1220 *
1221 * On success, the returned block_device has reference count of one.
1222 *
1223 * CONTEXT:
1224 * Might sleep.
1225 *
1226 * RETURNS:
1227 * Pointer to block_device on success, ERR_PTR(-errno) on failure.
1228 */
1229struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1230 void *holder)
1231{
1232 struct block_device *bdev;
1233 int err;
1234
1235 bdev = lookup_bdev(path);
1236 if (IS_ERR(bdev))
1237 return bdev;
1238
1239 err = blkdev_get(bdev, mode, holder);
1240 if (err)
1241 return ERR_PTR(err);
1242
1243 return bdev;
1244}
1245EXPORT_SYMBOL(blkdev_get_by_path);
1246
1247/**
1248 * blkdev_get_by_dev - open a block device by device number
1249 * @dev: device number of block device to open
1250 * @mode: FMODE_* mask
1251 * @holder: exclusive holder identifier
1252 *
1253 * Open the blockdevice described by device number @dev. @mode and
1254 * @holder are identical to blkdev_get().
1255 *
1256 * Use it ONLY if you really do not have anything better - i.e. when
1257 * you are behind a truly sucky interface and all you are given is a
1258 * device number. _Never_ to be used for internal purposes. If you
1259 * ever need it - reconsider your API.
1260 *
1261 * On success, the returned block_device has reference count of one.
1262 *
1263 * CONTEXT:
1264 * Might sleep.
1265 *
1266 * RETURNS:
1267 * Pointer to block_device on success, ERR_PTR(-errno) on failure.
1268 */
1269struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1270{
1271 struct block_device *bdev;
1272 int err;
1273
1274 bdev = bdget(dev);
1275 if (!bdev)
1276 return ERR_PTR(-ENOMEM);
1277
1278 err = blkdev_get(bdev, mode, holder);
1279 if (err)
1280 return ERR_PTR(err);
1281
1282 return bdev;
1283}
1284EXPORT_SYMBOL(blkdev_get_by_dev);
1285
1484static int blkdev_open(struct inode * inode, struct file * filp) 1286static int blkdev_open(struct inode * inode, struct file * filp)
1485{ 1287{
1486 struct block_device *whole = NULL;
1487 struct block_device *bdev; 1288 struct block_device *bdev;
1488 int res;
1489 1289
1490 /* 1290 /*
1491 * Preserve backwards compatibility and allow large file access 1291 * Preserve backwards compatibility and allow large file access
@@ -1506,26 +1306,9 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1506 if (bdev == NULL) 1306 if (bdev == NULL)
1507 return -ENOMEM; 1307 return -ENOMEM;
1508 1308
1509 if (filp->f_mode & FMODE_EXCL) {
1510 whole = bd_start_claiming(bdev, filp);
1511 if (IS_ERR(whole)) {
1512 bdput(bdev);
1513 return PTR_ERR(whole);
1514 }
1515 }
1516
1517 filp->f_mapping = bdev->bd_inode->i_mapping; 1309 filp->f_mapping = bdev->bd_inode->i_mapping;
1518 1310
1519 res = blkdev_get(bdev, filp->f_mode); 1311 return blkdev_get(bdev, filp->f_mode, filp);
1520
1521 if (whole) {
1522 if (res == 0)
1523 bd_finish_claiming(bdev, whole, filp);
1524 else
1525 bd_abort_claiming(whole, filp);
1526 }
1527
1528 return res;
1529} 1312}
1530 1313
1531static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) 1314static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@ -1539,6 +1322,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1539 bdev->bd_part_count--; 1322 bdev->bd_part_count--;
1540 1323
1541 if (!--bdev->bd_openers) { 1324 if (!--bdev->bd_openers) {
1325 WARN_ON_ONCE(bdev->bd_holders);
1542 sync_blockdev(bdev); 1326 sync_blockdev(bdev);
1543 kill_bdev(bdev); 1327 kill_bdev(bdev);
1544 } 1328 }
@@ -1569,6 +1353,45 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1569 1353
1570int blkdev_put(struct block_device *bdev, fmode_t mode) 1354int blkdev_put(struct block_device *bdev, fmode_t mode)
1571{ 1355{
1356 if (mode & FMODE_EXCL) {
1357 bool bdev_free;
1358
1359 /*
1360 * Release a claim on the device. The holder fields
1361 * are protected with bdev_lock. bd_mutex is to
1362 * synchronize disk_holder unlinking.
1363 */
1364 mutex_lock(&bdev->bd_mutex);
1365 spin_lock(&bdev_lock);
1366
1367 WARN_ON_ONCE(--bdev->bd_holders < 0);
1368 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1369
1370 /* bd_contains might point to self, check in a separate step */
1371 if ((bdev_free = !bdev->bd_holders))
1372 bdev->bd_holder = NULL;
1373 if (!bdev->bd_contains->bd_holders)
1374 bdev->bd_contains->bd_holder = NULL;
1375
1376 spin_unlock(&bdev_lock);
1377
1378 /*
1379 * If this was the last claim, remove holder link and
1380 * unblock evpoll if it was a write holder.
1381 */
1382 if (bdev_free) {
1383 bd_unlink_disk_holder(bdev);
1384 if (bdev->bd_write_holder) {
1385 disk_unblock_events(bdev->bd_disk);
1386 bdev->bd_write_holder = false;
1387 } else
1388 disk_check_events(bdev->bd_disk);
1389 }
1390
1391 mutex_unlock(&bdev->bd_mutex);
1392 } else
1393 disk_check_events(bdev->bd_disk);
1394
1572 return __blkdev_put(bdev, mode, 0); 1395 return __blkdev_put(bdev, mode, 0);
1573} 1396}
1574EXPORT_SYMBOL(blkdev_put); 1397EXPORT_SYMBOL(blkdev_put);
@@ -1576,8 +1399,7 @@ EXPORT_SYMBOL(blkdev_put);
1576static int blkdev_close(struct inode * inode, struct file * filp) 1399static int blkdev_close(struct inode * inode, struct file * filp)
1577{ 1400{
1578 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1401 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1579 if (bdev->bd_holder == filp) 1402
1580 bd_release(bdev);
1581 return blkdev_put(bdev, filp->f_mode); 1403 return blkdev_put(bdev, filp->f_mode);
1582} 1404}
1583 1405
@@ -1722,67 +1544,6 @@ fail:
1722} 1544}
1723EXPORT_SYMBOL(lookup_bdev); 1545EXPORT_SYMBOL(lookup_bdev);
1724 1546
1725/**
1726 * open_bdev_exclusive - open a block device by name and set it up for use
1727 *
1728 * @path: special file representing the block device
1729 * @mode: FMODE_... combination to pass be used
1730 * @holder: owner for exclusion
1731 *
1732 * Open the blockdevice described by the special file at @path, claim it
1733 * for the @holder.
1734 */
1735struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1736{
1737 struct block_device *bdev, *whole;
1738 int error;
1739
1740 bdev = lookup_bdev(path);
1741 if (IS_ERR(bdev))
1742 return bdev;
1743
1744 whole = bd_start_claiming(bdev, holder);
1745 if (IS_ERR(whole)) {
1746 bdput(bdev);
1747 return whole;
1748 }
1749
1750 error = blkdev_get(bdev, mode);
1751 if (error)
1752 goto out_abort_claiming;
1753
1754 error = -EACCES;
1755 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1756 goto out_blkdev_put;
1757
1758 bd_finish_claiming(bdev, whole, holder);
1759 return bdev;
1760
1761out_blkdev_put:
1762 blkdev_put(bdev, mode);
1763out_abort_claiming:
1764 bd_abort_claiming(whole, holder);
1765 return ERR_PTR(error);
1766}
1767
1768EXPORT_SYMBOL(open_bdev_exclusive);
1769
1770/**
1771 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1772 *
1773 * @bdev: blockdevice to close
1774 * @mode: mode, must match that used to open.
1775 *
1776 * This is the counterpart to open_bdev_exclusive().
1777 */
1778void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1779{
1780 bd_release(bdev);
1781 blkdev_put(bdev, mode);
1782}
1783
1784EXPORT_SYMBOL(close_bdev_exclusive);
1785
1786int __invalidate_device(struct block_device *bdev) 1547int __invalidate_device(struct block_device *bdev)
1787{ 1548{
1788 struct super_block *sb = get_super(bdev); 1549 struct super_block *sb = get_super(bdev);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6b9884507837..1718e1a5c320 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -493,7 +493,7 @@ again:
493 continue; 493 continue;
494 494
495 if (device->bdev) { 495 if (device->bdev) {
496 close_bdev_exclusive(device->bdev, device->mode); 496 blkdev_put(device->bdev, device->mode);
497 device->bdev = NULL; 497 device->bdev = NULL;
498 fs_devices->open_devices--; 498 fs_devices->open_devices--;
499 } 499 }
@@ -527,7 +527,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
527 527
528 list_for_each_entry(device, &fs_devices->devices, dev_list) { 528 list_for_each_entry(device, &fs_devices->devices, dev_list) {
529 if (device->bdev) { 529 if (device->bdev) {
530 close_bdev_exclusive(device->bdev, device->mode); 530 blkdev_put(device->bdev, device->mode);
531 fs_devices->open_devices--; 531 fs_devices->open_devices--;
532 } 532 }
533 if (device->writeable) { 533 if (device->writeable) {
@@ -584,13 +584,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
584 int seeding = 1; 584 int seeding = 1;
585 int ret = 0; 585 int ret = 0;
586 586
587 flags |= FMODE_EXCL;
588
587 list_for_each_entry(device, head, dev_list) { 589 list_for_each_entry(device, head, dev_list) {
588 if (device->bdev) 590 if (device->bdev)
589 continue; 591 continue;
590 if (!device->name) 592 if (!device->name)
591 continue; 593 continue;
592 594
593 bdev = open_bdev_exclusive(device->name, flags, holder); 595 bdev = blkdev_get_by_path(device->name, flags, holder);
594 if (IS_ERR(bdev)) { 596 if (IS_ERR(bdev)) {
595 printk(KERN_INFO "open %s failed\n", device->name); 597 printk(KERN_INFO "open %s failed\n", device->name);
596 goto error; 598 goto error;
@@ -642,7 +644,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
642error_brelse: 644error_brelse:
643 brelse(bh); 645 brelse(bh);
644error_close: 646error_close:
645 close_bdev_exclusive(bdev, FMODE_READ); 647 blkdev_put(bdev, flags);
646error: 648error:
647 continue; 649 continue;
648 } 650 }
@@ -688,7 +690,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
688 690
689 mutex_lock(&uuid_mutex); 691 mutex_lock(&uuid_mutex);
690 692
691 bdev = open_bdev_exclusive(path, flags, holder); 693 flags |= FMODE_EXCL;
694 bdev = blkdev_get_by_path(path, flags, holder);
692 695
693 if (IS_ERR(bdev)) { 696 if (IS_ERR(bdev)) {
694 ret = PTR_ERR(bdev); 697 ret = PTR_ERR(bdev);
@@ -720,7 +723,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
720 723
721 brelse(bh); 724 brelse(bh);
722error_close: 725error_close:
723 close_bdev_exclusive(bdev, flags); 726 blkdev_put(bdev, flags);
724error: 727error:
725 mutex_unlock(&uuid_mutex); 728 mutex_unlock(&uuid_mutex);
726 return ret; 729 return ret;
@@ -1183,8 +1186,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1183 goto out; 1186 goto out;
1184 } 1187 }
1185 } else { 1188 } else {
1186 bdev = open_bdev_exclusive(device_path, FMODE_READ, 1189 bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
1187 root->fs_info->bdev_holder); 1190 root->fs_info->bdev_holder);
1188 if (IS_ERR(bdev)) { 1191 if (IS_ERR(bdev)) {
1189 ret = PTR_ERR(bdev); 1192 ret = PTR_ERR(bdev);
1190 goto out; 1193 goto out;
@@ -1251,7 +1254,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1251 root->fs_info->fs_devices->latest_bdev = next_device->bdev; 1254 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1252 1255
1253 if (device->bdev) { 1256 if (device->bdev) {
1254 close_bdev_exclusive(device->bdev, device->mode); 1257 blkdev_put(device->bdev, device->mode);
1255 device->bdev = NULL; 1258 device->bdev = NULL;
1256 device->fs_devices->open_devices--; 1259 device->fs_devices->open_devices--;
1257 } 1260 }
@@ -1294,7 +1297,7 @@ error_brelse:
1294 brelse(bh); 1297 brelse(bh);
1295error_close: 1298error_close:
1296 if (bdev) 1299 if (bdev)
1297 close_bdev_exclusive(bdev, FMODE_READ); 1300 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
1298out: 1301out:
1299 mutex_unlock(&root->fs_info->volume_mutex); 1302 mutex_unlock(&root->fs_info->volume_mutex);
1300 mutex_unlock(&uuid_mutex); 1303 mutex_unlock(&uuid_mutex);
@@ -1446,7 +1449,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1446 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) 1449 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1447 return -EINVAL; 1450 return -EINVAL;
1448 1451
1449 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); 1452 bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
1453 root->fs_info->bdev_holder);
1450 if (IS_ERR(bdev)) 1454 if (IS_ERR(bdev))
1451 return PTR_ERR(bdev); 1455 return PTR_ERR(bdev);
1452 1456
@@ -1572,7 +1576,7 @@ out:
1572 mutex_unlock(&root->fs_info->volume_mutex); 1576 mutex_unlock(&root->fs_info->volume_mutex);
1573 return ret; 1577 return ret;
1574error: 1578error:
1575 close_bdev_exclusive(bdev, 0); 1579 blkdev_put(bdev, FMODE_EXCL);
1576 if (seeding_dev) { 1580 if (seeding_dev) {
1577 mutex_unlock(&uuid_mutex); 1581 mutex_unlock(&uuid_mutex);
1578 up_write(&sb->s_umount); 1582 up_write(&sb->s_umount);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2740db49eb04..1be781079450 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -50,7 +50,7 @@ struct btrfs_device {
50 50
51 struct block_device *bdev; 51 struct block_device *bdev;
52 52
53 /* the mode sent to open_bdev_exclusive */ 53 /* the mode sent to blkdev_get */
54 fmode_t mode; 54 fmode_t mode;
55 55
56 char *name; 56 char *name;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 6e99b9ddd4e9..dca9e5e0f73b 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -59,7 +59,7 @@ static struct char_device_struct {
59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; 59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
60 60
61/* index in the above */ 61/* index in the above */
62static inline int major_to_index(int major) 62static inline int major_to_index(unsigned major)
63{ 63{
64 return major % CHRDEV_MAJOR_HASH_SIZE; 64 return major % CHRDEV_MAJOR_HASH_SIZE;
65} 65}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b7d0554631e4..7aa767d4f06f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -364,7 +364,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
364 struct block_device *bdev; 364 struct block_device *bdev;
365 char b[BDEVNAME_SIZE]; 365 char b[BDEVNAME_SIZE];
366 366
367 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 367 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
368 if (IS_ERR(bdev)) 368 if (IS_ERR(bdev))
369 goto fail; 369 goto fail;
370 return bdev; 370 return bdev;
@@ -381,8 +381,7 @@ fail:
381 */ 381 */
382static int ext3_blkdev_put(struct block_device *bdev) 382static int ext3_blkdev_put(struct block_device *bdev)
383{ 383{
384 bd_release(bdev); 384 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
385 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
386} 385}
387 386
388static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 387static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
@@ -2162,13 +2161,6 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2162 if (bdev == NULL) 2161 if (bdev == NULL)
2163 return NULL; 2162 return NULL;
2164 2163
2165 if (bd_claim(bdev, sb)) {
2166 ext3_msg(sb, KERN_ERR,
2167 "error: failed to claim external journal device");
2168 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2169 return NULL;
2170 }
2171
2172 blocksize = sb->s_blocksize; 2164 blocksize = sb->s_blocksize;
2173 hblock = bdev_logical_block_size(bdev); 2165 hblock = bdev_logical_block_size(bdev);
2174 if (blocksize < hblock) { 2166 if (blocksize < hblock) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 29c80f6d8b27..cb10a06775e4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -657,7 +657,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
657 struct block_device *bdev; 657 struct block_device *bdev;
658 char b[BDEVNAME_SIZE]; 658 char b[BDEVNAME_SIZE];
659 659
660 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 660 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
661 if (IS_ERR(bdev)) 661 if (IS_ERR(bdev))
662 goto fail; 662 goto fail;
663 return bdev; 663 return bdev;
@@ -673,8 +673,7 @@ fail:
673 */ 673 */
674static int ext4_blkdev_put(struct block_device *bdev) 674static int ext4_blkdev_put(struct block_device *bdev)
675{ 675{
676 bd_release(bdev); 676 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
677 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
678} 677}
679 678
680static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 679static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@ -3778,13 +3777,6 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
3778 if (bdev == NULL) 3777 if (bdev == NULL)
3779 return NULL; 3778 return NULL;
3780 3779
3781 if (bd_claim(bdev, sb)) {
3782 ext4_msg(sb, KERN_ERR,
3783 "failed to claim external journal device");
3784 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
3785 return NULL;
3786 }
3787
3788 blocksize = sb->s_blocksize; 3780 blocksize = sb->s_blocksize;
3789 hblock = bdev_logical_block_size(bdev); 3781 hblock = bdev_logical_block_size(bdev);
3790 if (blocksize < hblock) { 3782 if (blocksize < hblock) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 693f4470a2df..777927ce6f79 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1268,7 +1268,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1268{ 1268{
1269 struct block_device *bdev; 1269 struct block_device *bdev;
1270 struct super_block *s; 1270 struct super_block *s;
1271 fmode_t mode = FMODE_READ; 1271 fmode_t mode = FMODE_READ | FMODE_EXCL;
1272 int error; 1272 int error;
1273 struct gfs2_args args; 1273 struct gfs2_args args;
1274 struct gfs2_sbd *sdp; 1274 struct gfs2_sbd *sdp;
@@ -1276,7 +1276,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1276 if (!(flags & MS_RDONLY)) 1276 if (!(flags & MS_RDONLY))
1277 mode |= FMODE_WRITE; 1277 mode |= FMODE_WRITE;
1278 1278
1279 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1279 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1280 if (IS_ERR(bdev)) 1280 if (IS_ERR(bdev))
1281 return ERR_CAST(bdev); 1281 return ERR_CAST(bdev);
1282 1282
@@ -1298,7 +1298,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1298 goto error_bdev; 1298 goto error_bdev;
1299 1299
1300 if (s->s_root) 1300 if (s->s_root)
1301 close_bdev_exclusive(bdev, mode); 1301 blkdev_put(bdev, mode);
1302 1302
1303 memset(&args, 0, sizeof(args)); 1303 memset(&args, 0, sizeof(args));
1304 args.ar_quota = GFS2_QUOTA_DEFAULT; 1304 args.ar_quota = GFS2_QUOTA_DEFAULT;
@@ -1342,7 +1342,7 @@ error_super:
1342 deactivate_locked_super(s); 1342 deactivate_locked_super(s);
1343 return ERR_PTR(error); 1343 return ERR_PTR(error);
1344error_bdev: 1344error_bdev:
1345 close_bdev_exclusive(bdev, mode); 1345 blkdev_put(bdev, mode);
1346 return ERR_PTR(error); 1346 return ERR_PTR(error);
1347} 1347}
1348 1348
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index e1b8493b9aaa..278e3fb40b71 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1120,16 +1120,13 @@ int lmLogOpen(struct super_block *sb)
1120 * file systems to log may have n-to-1 relationship; 1120 * file systems to log may have n-to-1 relationship;
1121 */ 1121 */
1122 1122
1123 bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); 1123 bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1124 log);
1124 if (IS_ERR(bdev)) { 1125 if (IS_ERR(bdev)) {
1125 rc = -PTR_ERR(bdev); 1126 rc = -PTR_ERR(bdev);
1126 goto free; 1127 goto free;
1127 } 1128 }
1128 1129
1129 if ((rc = bd_claim(bdev, log))) {
1130 goto close;
1131 }
1132
1133 log->bdev = bdev; 1130 log->bdev = bdev;
1134 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); 1131 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1135 1132
@@ -1137,7 +1134,7 @@ int lmLogOpen(struct super_block *sb)
1137 * initialize log: 1134 * initialize log:
1138 */ 1135 */
1139 if ((rc = lmLogInit(log))) 1136 if ((rc = lmLogInit(log)))
1140 goto unclaim; 1137 goto close;
1141 1138
1142 list_add(&log->journal_list, &jfs_external_logs); 1139 list_add(&log->journal_list, &jfs_external_logs);
1143 1140
@@ -1163,11 +1160,8 @@ journal_found:
1163 list_del(&log->journal_list); 1160 list_del(&log->journal_list);
1164 lbmLogShutdown(log); 1161 lbmLogShutdown(log);
1165 1162
1166 unclaim:
1167 bd_release(bdev);
1168
1169 close: /* close external log device */ 1163 close: /* close external log device */
1170 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 1164 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1171 1165
1172 free: /* free log descriptor */ 1166 free: /* free log descriptor */
1173 mutex_unlock(&jfs_log_mutex); 1167 mutex_unlock(&jfs_log_mutex);
@@ -1512,8 +1506,7 @@ int lmLogClose(struct super_block *sb)
1512 bdev = log->bdev; 1506 bdev = log->bdev;
1513 rc = lmLogShutdown(log); 1507 rc = lmLogShutdown(log);
1514 1508
1515 bd_release(bdev); 1509 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1516 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1517 1510
1518 kfree(log); 1511 kfree(log);
1519 1512
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 92ca6fbe09bd..723bc5bca09a 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -300,7 +300,7 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
300 300
301static void bdev_put_device(struct logfs_super *s) 301static void bdev_put_device(struct logfs_super *s)
302{ 302{
303 close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE); 303 blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
304} 304}
305 305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -325,13 +325,14 @@ int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
325{ 325{
326 struct block_device *bdev; 326 struct block_device *bdev;
327 327
328 bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type); 328 bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
329 type);
329 if (IS_ERR(bdev)) 330 if (IS_ERR(bdev))
330 return PTR_ERR(bdev); 331 return PTR_ERR(bdev);
331 332
332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 333 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
333 int mtdnr = MINOR(bdev->bd_dev); 334 int mtdnr = MINOR(bdev->bd_dev);
334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); 335 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
335 return logfs_get_sb_mtd(p, mtdnr); 336 return logfs_get_sb_mtd(p, mtdnr);
336 } 337 }
337 338
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 3a359023c9f7..230b79fbf005 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -845,11 +845,6 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
845 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 845 struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
846 struct page *page = buf->page; 846 struct page *page = buf->page;
847 size_t size; 847 size_t size;
848 int ret;
849
850 ret = buf->ops->confirm(pipe, buf);
851 if (unlikely(ret))
852 return ret;
853 848
854 size = sd->len; 849 size = sd->len;
855 850
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 70dfdd532b83..0994f6a76c07 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1163,14 +1163,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1163{ 1163{
1164 struct nilfs_super_data sd; 1164 struct nilfs_super_data sd;
1165 struct super_block *s; 1165 struct super_block *s;
1166 fmode_t mode = FMODE_READ; 1166 fmode_t mode = FMODE_READ | FMODE_EXCL;
1167 struct dentry *root_dentry; 1167 struct dentry *root_dentry;
1168 int err, s_new = false; 1168 int err, s_new = false;
1169 1169
1170 if (!(flags & MS_RDONLY)) 1170 if (!(flags & MS_RDONLY))
1171 mode |= FMODE_WRITE; 1171 mode |= FMODE_WRITE;
1172 1172
1173 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1173 sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1174 if (IS_ERR(sd.bdev)) 1174 if (IS_ERR(sd.bdev))
1175 return ERR_CAST(sd.bdev); 1175 return ERR_CAST(sd.bdev);
1176 1176
@@ -1249,7 +1249,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1249 } 1249 }
1250 1250
1251 if (!s_new) 1251 if (!s_new)
1252 close_bdev_exclusive(sd.bdev, mode); 1252 blkdev_put(sd.bdev, mode);
1253 1253
1254 return root_dentry; 1254 return root_dentry;
1255 1255
@@ -1258,7 +1258,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1258 1258
1259 failed: 1259 failed:
1260 if (!s_new) 1260 if (!s_new)
1261 close_bdev_exclusive(sd.bdev, mode); 1261 blkdev_put(sd.bdev, mode);
1262 return ERR_PTR(err); 1262 return ERR_PTR(err);
1263} 1263}
1264 1264
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a6cc05302e9f..b108e863d8f6 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1729,7 +1729,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1729 goto out; 1729 goto out;
1730 1730
1731 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1731 reg->hr_bdev = I_BDEV(filp->f_mapping->host);
1732 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); 1732 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
1733 if (ret) { 1733 if (ret) {
1734 reg->hr_bdev = NULL; 1734 reg->hr_bdev = NULL;
1735 goto out; 1735 goto out;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0a8b0ad0c7e2..9c21119512b9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -237,6 +237,13 @@ ssize_t part_size_show(struct device *dev,
237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); 237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
238} 238}
239 239
240ssize_t part_ro_show(struct device *dev,
241 struct device_attribute *attr, char *buf)
242{
243 struct hd_struct *p = dev_to_part(dev);
244 return sprintf(buf, "%d\n", p->policy ? 1 : 0);
245}
246
240ssize_t part_alignment_offset_show(struct device *dev, 247ssize_t part_alignment_offset_show(struct device *dev,
241 struct device_attribute *attr, char *buf) 248 struct device_attribute *attr, char *buf)
242{ 249{
@@ -312,6 +319,7 @@ ssize_t part_fail_store(struct device *dev,
312static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); 319static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
313static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 320static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
314static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 321static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
322static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
315static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 323static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
316static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, 324static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
317 NULL); 325 NULL);
@@ -326,6 +334,7 @@ static struct attribute *part_attrs[] = {
326 &dev_attr_partition.attr, 334 &dev_attr_partition.attr,
327 &dev_attr_start.attr, 335 &dev_attr_start.attr,
328 &dev_attr_size.attr, 336 &dev_attr_size.attr,
337 &dev_attr_ro.attr,
329 &dev_attr_alignment_offset.attr, 338 &dev_attr_alignment_offset.attr,
330 &dev_attr_discard_alignment.attr, 339 &dev_attr_discard_alignment.attr,
331 &dev_attr_stat.attr, 340 &dev_attr_stat.attr,
@@ -372,6 +381,11 @@ static void delete_partition_rcu_cb(struct rcu_head *head)
372 put_device(part_to_dev(part)); 381 put_device(part_to_dev(part));
373} 382}
374 383
384void __delete_partition(struct hd_struct *part)
385{
386 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
387}
388
375void delete_partition(struct gendisk *disk, int partno) 389void delete_partition(struct gendisk *disk, int partno)
376{ 390{
377 struct disk_part_tbl *ptbl = disk->part_tbl; 391 struct disk_part_tbl *ptbl = disk->part_tbl;
@@ -390,7 +404,7 @@ void delete_partition(struct gendisk *disk, int partno)
390 kobject_put(part->holder_dir); 404 kobject_put(part->holder_dir);
391 device_del(part_to_dev(part)); 405 device_del(part_to_dev(part));
392 406
393 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 407 hd_struct_put(part);
394} 408}
395 409
396static ssize_t whole_disk_show(struct device *dev, 410static ssize_t whole_disk_show(struct device *dev,
@@ -489,6 +503,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
489 if (!dev_get_uevent_suppress(ddev)) 503 if (!dev_get_uevent_suppress(ddev))
490 kobject_uevent(&pdev->kobj, KOBJ_ADD); 504 kobject_uevent(&pdev->kobj, KOBJ_ADD);
491 505
506 hd_ref_init(p);
492 return p; 507 return p;
493 508
494out_free_info: 509out_free_info:
@@ -507,65 +522,6 @@ out_put:
507 return ERR_PTR(err); 522 return ERR_PTR(err);
508} 523}
509 524
510/* Not exported, helper to add_disk(). */
511void register_disk(struct gendisk *disk)
512{
513 struct device *ddev = disk_to_dev(disk);
514 struct block_device *bdev;
515 struct disk_part_iter piter;
516 struct hd_struct *part;
517 int err;
518
519 ddev->parent = disk->driverfs_dev;
520
521 dev_set_name(ddev, disk->disk_name);
522
523 /* delay uevents, until we scanned partition table */
524 dev_set_uevent_suppress(ddev, 1);
525
526 if (device_add(ddev))
527 return;
528 if (!sysfs_deprecated) {
529 err = sysfs_create_link(block_depr, &ddev->kobj,
530 kobject_name(&ddev->kobj));
531 if (err) {
532 device_del(ddev);
533 return;
534 }
535 }
536 disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
537 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
538
539 /* No minors to use for partitions */
540 if (!disk_partitionable(disk))
541 goto exit;
542
543 /* No such device (e.g., media were just removed) */
544 if (!get_capacity(disk))
545 goto exit;
546
547 bdev = bdget_disk(disk, 0);
548 if (!bdev)
549 goto exit;
550
551 bdev->bd_invalidated = 1;
552 err = blkdev_get(bdev, FMODE_READ);
553 if (err < 0)
554 goto exit;
555 blkdev_put(bdev, FMODE_READ);
556
557exit:
558 /* announce disk after possible partitions are created */
559 dev_set_uevent_suppress(ddev, 0);
560 kobject_uevent(&ddev->kobj, KOBJ_ADD);
561
562 /* announce possible partitions */
563 disk_part_iter_init(&piter, disk, 0);
564 while ((part = disk_part_iter_next(&piter)))
565 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
566 disk_part_iter_exit(&piter);
567}
568
569static bool disk_unlock_native_capacity(struct gendisk *disk) 525static bool disk_unlock_native_capacity(struct gendisk *disk)
570{ 526{
571 const struct block_device_operations *bdops = disk->fops; 527 const struct block_device_operations *bdops = disk->fops;
@@ -728,33 +684,3 @@ fail:
728} 684}
729 685
730EXPORT_SYMBOL(read_dev_sector); 686EXPORT_SYMBOL(read_dev_sector);
731
732void del_gendisk(struct gendisk *disk)
733{
734 struct disk_part_iter piter;
735 struct hd_struct *part;
736
737 /* invalidate stuff */
738 disk_part_iter_init(&piter, disk,
739 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
740 while ((part = disk_part_iter_next(&piter))) {
741 invalidate_partition(disk, part->partno);
742 delete_partition(disk, part->partno);
743 }
744 disk_part_iter_exit(&piter);
745
746 invalidate_partition(disk, 0);
747 blk_free_devt(disk_to_dev(disk)->devt);
748 set_capacity(disk, 0);
749 disk->flags &= ~GENHD_FL_UP;
750 unlink_gendisk(disk);
751 part_stat_set_all(&disk->part0, 0);
752 disk->part0.stamp = 0;
753
754 kobject_put(disk->part0.holder_dir);
755 kobject_put(disk->slave_dir);
756 disk->driverfs_dev = NULL;
757 if (!sysfs_deprecated)
758 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
759 device_del(disk_to_dev(disk));
760}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index d31bce1a9f90..3eea859e6990 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2551,8 +2551,6 @@ static int release_journal_dev(struct super_block *super,
2551 result = 0; 2551 result = 0;
2552 2552
2553 if (journal->j_dev_bd != NULL) { 2553 if (journal->j_dev_bd != NULL) {
2554 if (journal->j_dev_bd->bd_dev != super->s_dev)
2555 bd_release(journal->j_dev_bd);
2556 result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); 2554 result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
2557 journal->j_dev_bd = NULL; 2555 journal->j_dev_bd = NULL;
2558 } 2556 }
@@ -2570,7 +2568,7 @@ static int journal_init_dev(struct super_block *super,
2570{ 2568{
2571 int result; 2569 int result;
2572 dev_t jdev; 2570 dev_t jdev;
2573 fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; 2571 fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
2574 char b[BDEVNAME_SIZE]; 2572 char b[BDEVNAME_SIZE];
2575 2573
2576 result = 0; 2574 result = 0;
@@ -2584,7 +2582,10 @@ static int journal_init_dev(struct super_block *super,
2584 2582
2585 /* there is no "jdev" option and journal is on separate device */ 2583 /* there is no "jdev" option and journal is on separate device */
2586 if ((!jdev_name || !jdev_name[0])) { 2584 if ((!jdev_name || !jdev_name[0])) {
2587 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2585 if (jdev == super->s_dev)
2586 blkdev_mode &= ~FMODE_EXCL;
2587 journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
2588 journal);
2588 journal->j_dev_mode = blkdev_mode; 2589 journal->j_dev_mode = blkdev_mode;
2589 if (IS_ERR(journal->j_dev_bd)) { 2590 if (IS_ERR(journal->j_dev_bd)) {
2590 result = PTR_ERR(journal->j_dev_bd); 2591 result = PTR_ERR(journal->j_dev_bd);
@@ -2593,22 +2594,14 @@ static int journal_init_dev(struct super_block *super,
2593 "cannot init journal device '%s': %i", 2594 "cannot init journal device '%s': %i",
2594 __bdevname(jdev, b), result); 2595 __bdevname(jdev, b), result);
2595 return result; 2596 return result;
2596 } else if (jdev != super->s_dev) { 2597 } else if (jdev != super->s_dev)
2597 result = bd_claim(journal->j_dev_bd, journal);
2598 if (result) {
2599 blkdev_put(journal->j_dev_bd, blkdev_mode);
2600 return result;
2601 }
2602
2603 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2598 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2604 }
2605 2599
2606 return 0; 2600 return 0;
2607 } 2601 }
2608 2602
2609 journal->j_dev_mode = blkdev_mode; 2603 journal->j_dev_mode = blkdev_mode;
2610 journal->j_dev_bd = open_bdev_exclusive(jdev_name, 2604 journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
2611 blkdev_mode, journal);
2612 if (IS_ERR(journal->j_dev_bd)) { 2605 if (IS_ERR(journal->j_dev_bd)) {
2613 result = PTR_ERR(journal->j_dev_bd); 2606 result = PTR_ERR(journal->j_dev_bd);
2614 journal->j_dev_bd = NULL; 2607 journal->j_dev_bd = NULL;
diff --git a/fs/splice.c b/fs/splice.c
index ce2f02579e35..50a5d978da16 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -682,19 +682,14 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
682{ 682{
683 struct file *file = sd->u.file; 683 struct file *file = sd->u.file;
684 loff_t pos = sd->pos; 684 loff_t pos = sd->pos;
685 int ret, more; 685 int more;
686
687 ret = buf->ops->confirm(pipe, buf);
688 if (!ret) {
689 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
690 if (file->f_op && file->f_op->sendpage)
691 ret = file->f_op->sendpage(file, buf->page, buf->offset,
692 sd->len, &pos, more);
693 else
694 ret = -EINVAL;
695 }
696 686
697 return ret; 687 if (!likely(file->f_op && file->f_op->sendpage))
688 return -EINVAL;
689
690 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
691 return file->f_op->sendpage(file, buf->page, buf->offset,
692 sd->len, &pos, more);
698} 693}
699 694
700/* 695/*
@@ -727,13 +722,6 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
727 void *fsdata; 722 void *fsdata;
728 int ret; 723 int ret;
729 724
730 /*
731 * make sure the data in this buffer is uptodate
732 */
733 ret = buf->ops->confirm(pipe, buf);
734 if (unlikely(ret))
735 return ret;
736
737 offset = sd->pos & ~PAGE_CACHE_MASK; 725 offset = sd->pos & ~PAGE_CACHE_MASK;
738 726
739 this_len = sd->len; 727 this_len = sd->len;
@@ -805,12 +793,17 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
805 if (sd->len > sd->total_len) 793 if (sd->len > sd->total_len)
806 sd->len = sd->total_len; 794 sd->len = sd->total_len;
807 795
808 ret = actor(pipe, buf, sd); 796 ret = buf->ops->confirm(pipe, buf);
809 if (ret <= 0) { 797 if (unlikely(ret)) {
810 if (ret == -ENODATA) 798 if (ret == -ENODATA)
811 ret = 0; 799 ret = 0;
812 return ret; 800 return ret;
813 } 801 }
802
803 ret = actor(pipe, buf, sd);
804 if (ret <= 0)
805 return ret;
806
814 buf->offset += ret; 807 buf->offset += ret;
815 buf->len -= ret; 808 buf->len -= ret;
816 809
@@ -1044,10 +1037,6 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1044 int ret; 1037 int ret;
1045 void *data; 1038 void *data;
1046 1039
1047 ret = buf->ops->confirm(pipe, buf);
1048 if (ret)
1049 return ret;
1050
1051 data = buf->ops->map(pipe, buf, 0); 1040 data = buf->ops->map(pipe, buf, 0);
1052 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); 1041 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
1053 buf->ops->unmap(pipe, buf, data); 1042 buf->ops->unmap(pipe, buf, data);
@@ -1495,10 +1484,6 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1495 char *src; 1484 char *src;
1496 int ret; 1485 int ret;
1497 1486
1498 ret = buf->ops->confirm(pipe, buf);
1499 if (unlikely(ret))
1500 return ret;
1501
1502 /* 1487 /*
1503 * See if we can use the atomic maps, by prefaulting in the 1488 * See if we can use the atomic maps, by prefaulting in the
1504 * pages and doing an atomic copy 1489 * pages and doing an atomic copy
diff --git a/fs/super.c b/fs/super.c
index 823e061faa87..4f6a3571a634 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -767,13 +767,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
767{ 767{
768 struct block_device *bdev; 768 struct block_device *bdev;
769 struct super_block *s; 769 struct super_block *s;
770 fmode_t mode = FMODE_READ; 770 fmode_t mode = FMODE_READ | FMODE_EXCL;
771 int error = 0; 771 int error = 0;
772 772
773 if (!(flags & MS_RDONLY)) 773 if (!(flags & MS_RDONLY))
774 mode |= FMODE_WRITE; 774 mode |= FMODE_WRITE;
775 775
776 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 776 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
777 if (IS_ERR(bdev)) 777 if (IS_ERR(bdev))
778 return ERR_CAST(bdev); 778 return ERR_CAST(bdev);
779 779
@@ -802,13 +802,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
802 802
803 /* 803 /*
804 * s_umount nests inside bd_mutex during 804 * s_umount nests inside bd_mutex during
805 * __invalidate_device(). close_bdev_exclusive() 805 * __invalidate_device(). blkdev_put() acquires
806 * acquires bd_mutex and can't be called under 806 * bd_mutex and can't be called under s_umount. Drop
807 * s_umount. Drop s_umount temporarily. This is safe 807 * s_umount temporarily. This is safe as we're
808 * as we're holding an active reference. 808 * holding an active reference.
809 */ 809 */
810 up_write(&s->s_umount); 810 up_write(&s->s_umount);
811 close_bdev_exclusive(bdev, mode); 811 blkdev_put(bdev, mode);
812 down_write(&s->s_umount); 812 down_write(&s->s_umount);
813 } else { 813 } else {
814 char b[BDEVNAME_SIZE]; 814 char b[BDEVNAME_SIZE];
@@ -832,7 +832,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
832error_s: 832error_s:
833 error = PTR_ERR(s); 833 error = PTR_ERR(s);
834error_bdev: 834error_bdev:
835 close_bdev_exclusive(bdev, mode); 835 blkdev_put(bdev, mode);
836error: 836error:
837 return ERR_PTR(error); 837 return ERR_PTR(error);
838} 838}
@@ -863,7 +863,8 @@ void kill_block_super(struct super_block *sb)
863 bdev->bd_super = NULL; 863 bdev->bd_super = NULL;
864 generic_shutdown_super(sb); 864 generic_shutdown_super(sb);
865 sync_blockdev(bdev); 865 sync_blockdev(bdev);
866 close_bdev_exclusive(bdev, mode); 866 WARN_ON_ONCE(!(mode & FMODE_EXCL));
867 blkdev_put(bdev, mode | FMODE_EXCL);
867} 868}
868 869
869EXPORT_SYMBOL(kill_block_super); 870EXPORT_SYMBOL(kill_block_super);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a10f6416e563..bd07f7339366 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -606,7 +606,8 @@ xfs_blkdev_get(
606{ 606{
607 int error = 0; 607 int error = 0;
608 608
609 *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); 609 *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
610 mp);
610 if (IS_ERR(*bdevp)) { 611 if (IS_ERR(*bdevp)) {
611 error = PTR_ERR(*bdevp); 612 error = PTR_ERR(*bdevp);
612 printk("XFS: Invalid device [%s], error=%d\n", name, error); 613 printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@ -620,7 +621,7 @@ xfs_blkdev_put(
620 struct block_device *bdev) 621 struct block_device *bdev)
621{ 622{
622 if (bdev) 623 if (bdev)
623 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); 624 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
624} 625}
625 626
626/* 627/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 36ab42c9bb99..4d18ff34670a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -115,6 +115,7 @@ struct request {
115 void *elevator_private3; 115 void *elevator_private3;
116 116
117 struct gendisk *rq_disk; 117 struct gendisk *rq_disk;
118 struct hd_struct *part;
118 unsigned long start_time; 119 unsigned long start_time;
119#ifdef CONFIG_BLK_CGROUP 120#ifdef CONFIG_BLK_CGROUP
120 unsigned long long start_time_ns; 121 unsigned long long start_time_ns;
@@ -646,7 +647,6 @@ static inline void rq_flush_dcache_pages(struct request *rq)
646 647
647extern int blk_register_queue(struct gendisk *disk); 648extern int blk_register_queue(struct gendisk *disk);
648extern void blk_unregister_queue(struct gendisk *disk); 649extern void blk_unregister_queue(struct gendisk *disk);
649extern void register_disk(struct gendisk *dev);
650extern void generic_make_request(struct bio *bio); 650extern void generic_make_request(struct bio *bio);
651extern void blk_rq_init(struct request_queue *q, struct request *rq); 651extern void blk_rq_init(struct request_queue *q, struct request *rq);
652extern void blk_put_request(struct request *); 652extern void blk_put_request(struct request *);
@@ -1256,6 +1256,9 @@ struct block_device_operations {
1256 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1256 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1257 int (*direct_access) (struct block_device *, sector_t, 1257 int (*direct_access) (struct block_device *, sector_t,
1258 void **, unsigned long *); 1258 void **, unsigned long *);
1259 unsigned int (*check_events) (struct gendisk *disk,
1260 unsigned int clearing);
1261 /* ->media_changed() is DEPRECATED, use ->check_events() instead */
1259 int (*media_changed) (struct gendisk *); 1262 int (*media_changed) (struct gendisk *);
1260 void (*unlock_native_capacity) (struct gendisk *); 1263 void (*unlock_native_capacity) (struct gendisk *);
1261 int (*revalidate_disk) (struct gendisk *); 1264 int (*revalidate_disk) (struct gendisk *);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 78e904796622..35eae4b67503 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -946,6 +946,8 @@ struct cdrom_device_info {
946/* device-related storage */ 946/* device-related storage */
947 unsigned int options : 30; /* options flags */ 947 unsigned int options : 30; /* options flags */
948 unsigned mc_flags : 2; /* media change buffer flags */ 948 unsigned mc_flags : 2; /* media change buffer flags */
949 unsigned int vfs_events; /* cached events for vfs path */
950 unsigned int ioctl_events; /* cached events for ioctl path */
949 int use_count; /* number of times device opened */ 951 int use_count; /* number of times device opened */
950 char name[20]; /* name of the device type */ 952 char name[20]; /* name of the device type */
951/* per-device flags */ 953/* per-device flags */
@@ -965,6 +967,8 @@ struct cdrom_device_ops {
965 int (*open) (struct cdrom_device_info *, int); 967 int (*open) (struct cdrom_device_info *, int);
966 void (*release) (struct cdrom_device_info *); 968 void (*release) (struct cdrom_device_info *);
967 int (*drive_status) (struct cdrom_device_info *, int); 969 int (*drive_status) (struct cdrom_device_info *, int);
970 unsigned int (*check_events) (struct cdrom_device_info *cdi,
971 unsigned int clearing, int slot);
968 int (*media_changed) (struct cdrom_device_info *, int); 972 int (*media_changed) (struct cdrom_device_info *, int);
969 int (*tray_move) (struct cdrom_device_info *, int); 973 int (*tray_move) (struct cdrom_device_info *, int);
970 int (*lock_door) (struct cdrom_device_info *, int); 974 int (*lock_door) (struct cdrom_device_info *, int);
@@ -993,6 +997,8 @@ extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
993extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode); 997extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode);
994extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, 998extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
995 fmode_t mode, unsigned int cmd, unsigned long arg); 999 fmode_t mode, unsigned int cmd, unsigned long arg);
1000extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
1001 unsigned int clearing);
996extern int cdrom_media_changed(struct cdrom_device_info *); 1002extern int cdrom_media_changed(struct cdrom_device_info *);
997 1003
998extern int register_cdrom(struct cdrom_device_info *cdi); 1004extern int register_cdrom(struct cdrom_device_info *cdi);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c0701288d204..3984f2358d1f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -664,8 +664,9 @@ struct block_device {
664 void * bd_claiming; 664 void * bd_claiming;
665 void * bd_holder; 665 void * bd_holder;
666 int bd_holders; 666 int bd_holders;
667 bool bd_write_holder;
667#ifdef CONFIG_SYSFS 668#ifdef CONFIG_SYSFS
668 struct list_head bd_holder_list; 669 struct gendisk * bd_holder_disk; /* for sysfs slave linkng */
669#endif 670#endif
670 struct block_device * bd_contains; 671 struct block_device * bd_contains;
671 unsigned bd_block_size; 672 unsigned bd_block_size;
@@ -2019,7 +2020,6 @@ extern struct block_device *bdgrab(struct block_device *bdev);
2019extern void bd_set_size(struct block_device *, loff_t size); 2020extern void bd_set_size(struct block_device *, loff_t size);
2020extern void bd_forget(struct inode *inode); 2021extern void bd_forget(struct inode *inode);
2021extern void bdput(struct block_device *); 2022extern void bdput(struct block_device *);
2022extern struct block_device *open_by_devnum(dev_t, fmode_t);
2023extern void invalidate_bdev(struct block_device *); 2023extern void invalidate_bdev(struct block_device *);
2024extern int sync_blockdev(struct block_device *bdev); 2024extern int sync_blockdev(struct block_device *bdev);
2025extern struct super_block *freeze_bdev(struct block_device *); 2025extern struct super_block *freeze_bdev(struct block_device *);
@@ -2050,16 +2050,20 @@ extern const struct file_operations def_fifo_fops;
2050extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 2050extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
2051extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); 2051extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
2052extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 2052extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
2053extern int blkdev_get(struct block_device *, fmode_t); 2053extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
2054extern int blkdev_put(struct block_device *, fmode_t); 2054extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
2055extern int bd_claim(struct block_device *, void *); 2055 void *holder);
2056extern void bd_release(struct block_device *); 2056extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
2057 void *holder);
2058extern int blkdev_put(struct block_device *bdev, fmode_t mode);
2057#ifdef CONFIG_SYSFS 2059#ifdef CONFIG_SYSFS
2058extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); 2060extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
2059extern void bd_release_from_disk(struct block_device *, struct gendisk *);
2060#else 2061#else
2061#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) 2062static inline int bd_link_disk_holder(struct block_device *bdev,
2062#define bd_release_from_disk(bdev, disk) bd_release(bdev) 2063 struct gendisk *disk)
2064{
2065 return 0;
2066}
2063#endif 2067#endif
2064#endif 2068#endif
2065 2069
@@ -2095,8 +2099,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
2095extern const char *__bdevname(dev_t, char *buffer); 2099extern const char *__bdevname(dev_t, char *buffer);
2096extern const char *bdevname(struct block_device *bdev, char *buffer); 2100extern const char *bdevname(struct block_device *bdev, char *buffer);
2097extern struct block_device *lookup_bdev(const char *); 2101extern struct block_device *lookup_bdev(const char *);
2098extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
2099extern void close_bdev_exclusive(struct block_device *, fmode_t);
2100extern void blkdev_show(struct seq_file *,off_t); 2102extern void blkdev_show(struct seq_file *,off_t);
2101 2103
2102#else 2104#else
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7a7b9c1644e4..c0d5f6945c1e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -115,6 +115,7 @@ struct hd_struct {
115#else 115#else
116 struct disk_stats dkstats; 116 struct disk_stats dkstats;
117#endif 117#endif
118 atomic_t ref;
118 struct rcu_head rcu_head; 119 struct rcu_head rcu_head;
119}; 120};
120 121
@@ -127,6 +128,11 @@ struct hd_struct {
127#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ 128#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
128#define GENHD_FL_NATIVE_CAPACITY 128 129#define GENHD_FL_NATIVE_CAPACITY 128
129 130
131enum {
132 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
133 DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */
134};
135
130#define BLK_SCSI_MAX_CMDS (256) 136#define BLK_SCSI_MAX_CMDS (256)
131#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) 137#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
132 138
@@ -143,6 +149,8 @@ struct disk_part_tbl {
143 struct hd_struct __rcu *part[]; 149 struct hd_struct __rcu *part[];
144}; 150};
145 151
152struct disk_events;
153
146struct gendisk { 154struct gendisk {
147 /* major, first_minor and minors are input parameters only, 155 /* major, first_minor and minors are input parameters only,
148 * don't use directly. Use disk_devt() and disk_max_parts(). 156 * don't use directly. Use disk_devt() and disk_max_parts().
@@ -154,6 +162,10 @@ struct gendisk {
154 162
155 char disk_name[DISK_NAME_LEN]; /* name of major driver */ 163 char disk_name[DISK_NAME_LEN]; /* name of major driver */
156 char *(*devnode)(struct gendisk *gd, mode_t *mode); 164 char *(*devnode)(struct gendisk *gd, mode_t *mode);
165
166 unsigned int events; /* supported events */
167 unsigned int async_events; /* async events, subset of all */
168
157 /* Array of pointers to partitions indexed by partno. 169 /* Array of pointers to partitions indexed by partno.
158 * Protected with matching bdev lock but stat and other 170 * Protected with matching bdev lock but stat and other
159 * non-critical accesses use RCU. Always access through 171 * non-critical accesses use RCU. Always access through
@@ -171,9 +183,8 @@ struct gendisk {
171 struct kobject *slave_dir; 183 struct kobject *slave_dir;
172 184
173 struct timer_rand_state *random; 185 struct timer_rand_state *random;
174
175 atomic_t sync_io; /* RAID */ 186 atomic_t sync_io; /* RAID */
176 struct work_struct async_notify; 187 struct disk_events *ev;
177#ifdef CONFIG_BLK_DEV_INTEGRITY 188#ifdef CONFIG_BLK_DEV_INTEGRITY
178 struct blk_integrity *integrity; 189 struct blk_integrity *integrity;
179#endif 190#endif
@@ -395,7 +406,6 @@ extern void part_round_stats(int cpu, struct hd_struct *part);
395/* block/genhd.c */ 406/* block/genhd.c */
396extern void add_disk(struct gendisk *disk); 407extern void add_disk(struct gendisk *disk);
397extern void del_gendisk(struct gendisk *gp); 408extern void del_gendisk(struct gendisk *gp);
398extern void unlink_gendisk(struct gendisk *gp);
399extern struct gendisk *get_gendisk(dev_t dev, int *partno); 409extern struct gendisk *get_gendisk(dev_t dev, int *partno);
400extern struct block_device *bdget_disk(struct gendisk *disk, int partno); 410extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
401 411
@@ -407,6 +417,11 @@ static inline int get_disk_ro(struct gendisk *disk)
407 return disk->part0.policy; 417 return disk->part0.policy;
408} 418}
409 419
420extern void disk_block_events(struct gendisk *disk);
421extern void disk_unblock_events(struct gendisk *disk);
422extern void disk_check_events(struct gendisk *disk);
423extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
424
410/* drivers/char/random.c */ 425/* drivers/char/random.c */
411extern void add_disk_randomness(struct gendisk *disk); 426extern void add_disk_randomness(struct gendisk *disk);
412extern void rand_initialize_disk(struct gendisk *disk); 427extern void rand_initialize_disk(struct gendisk *disk);
@@ -583,6 +598,7 @@ extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
583 sector_t len, int flags, 598 sector_t len, int flags,
584 struct partition_meta_info 599 struct partition_meta_info
585 *info); 600 *info);
601extern void __delete_partition(struct hd_struct *);
586extern void delete_partition(struct gendisk *, int); 602extern void delete_partition(struct gendisk *, int);
587extern void printk_all_partitions(void); 603extern void printk_all_partitions(void);
588 604
@@ -611,6 +627,29 @@ extern ssize_t part_fail_store(struct device *dev,
611 const char *buf, size_t count); 627 const char *buf, size_t count);
612#endif /* CONFIG_FAIL_MAKE_REQUEST */ 628#endif /* CONFIG_FAIL_MAKE_REQUEST */
613 629
630static inline void hd_ref_init(struct hd_struct *part)
631{
632 atomic_set(&part->ref, 1);
633 smp_mb();
634}
635
636static inline void hd_struct_get(struct hd_struct *part)
637{
638 atomic_inc(&part->ref);
639 smp_mb__after_atomic_inc();
640}
641
642static inline int hd_struct_try_get(struct hd_struct *part)
643{
644 return atomic_inc_not_zero(&part->ref);
645}
646
647static inline void hd_struct_put(struct hd_struct *part)
648{
649 if (atomic_dec_and_test(&part->ref))
650 __delete_partition(part);
651}
652
614#else /* CONFIG_BLOCK */ 653#else /* CONFIG_BLOCK */
615 654
616static inline void printk_all_partitions(void) { } 655static inline void printk_all_partitions(void) { }
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 1651fef18831..648d23358038 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -104,6 +104,7 @@ struct scsi_cmnd;
104#define UNMAP 0x42 104#define UNMAP 0x42
105#define READ_TOC 0x43 105#define READ_TOC 0x43
106#define READ_HEADER 0x44 106#define READ_HEADER 0x44
107#define GET_EVENT_STATUS_NOTIFICATION 0x4a
107#define LOG_SELECT 0x4c 108#define LOG_SELECT 0x4c
108#define LOG_SENSE 0x4d 109#define LOG_SENSE 0x4d
109#define XDWRITEREAD_10 0x53 110#define XDWRITEREAD_10 0x53
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d8ce278515c3..aba421d68f6f 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -206,15 +206,16 @@ TRACE_EVENT(block_bio_bounce,
206 * block_bio_complete - completed all work on the block operation 206 * block_bio_complete - completed all work on the block operation
207 * @q: queue holding the block operation 207 * @q: queue holding the block operation
208 * @bio: block operation completed 208 * @bio: block operation completed
209 * @error: io error value
209 * 210 *
210 * This tracepoint indicates there is no further work to do on this 211 * This tracepoint indicates there is no further work to do on this
211 * block IO operation @bio. 212 * block IO operation @bio.
212 */ 213 */
213TRACE_EVENT(block_bio_complete, 214TRACE_EVENT(block_bio_complete,
214 215
215 TP_PROTO(struct request_queue *q, struct bio *bio), 216 TP_PROTO(struct request_queue *q, struct bio *bio, int error),
216 217
217 TP_ARGS(q, bio), 218 TP_ARGS(q, bio, error),
218 219
219 TP_STRUCT__entry( 220 TP_STRUCT__entry(
220 __field( dev_t, dev ) 221 __field( dev_t, dev )
@@ -228,6 +229,7 @@ TRACE_EVENT(block_bio_complete,
228 __entry->dev = bio->bi_bdev->bd_dev; 229 __entry->dev = bio->bi_bdev->bd_dev;
229 __entry->sector = bio->bi_sector; 230 __entry->sector = bio->bi_sector;
230 __entry->nr_sector = bio->bi_size >> 9; 231 __entry->nr_sector = bio->bi_size >> 9;
232 __entry->error = error;
231 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 233 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
232 ), 234 ),
233 235
@@ -486,16 +488,16 @@ TRACE_EVENT(block_split,
486); 488);
487 489
488/** 490/**
489 * block_remap - map request for a partition to the raw device 491 * block_bio_remap - map request for a logical device to the raw device
490 * @q: queue holding the operation 492 * @q: queue holding the operation
491 * @bio: revised operation 493 * @bio: revised operation
492 * @dev: device for the operation 494 * @dev: device for the operation
493 * @from: original sector for the operation 495 * @from: original sector for the operation
494 * 496 *
495 * An operation for a partition on a block device has been mapped to the 497 * An operation for a logical device has been mapped to the
496 * raw block device. 498 * raw block device.
497 */ 499 */
498TRACE_EVENT(block_remap, 500TRACE_EVENT(block_bio_remap,
499 501
500 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, 502 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
501 sector_t from), 503 sector_t from),
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 69425889bd40..7c97c3a0eee3 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -224,7 +224,7 @@ static int swsusp_swap_check(void)
224 return res; 224 return res;
225 225
226 root_swap = res; 226 root_swap = res;
227 res = blkdev_get(hib_resume_bdev, FMODE_WRITE); 227 res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
228 if (res) 228 if (res)
229 return res; 229 return res;
230 230
@@ -930,7 +930,8 @@ int swsusp_check(void)
930{ 930{
931 int error; 931 int error;
932 932
933 hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); 933 hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
934 FMODE_READ, NULL);
934 if (!IS_ERR(hib_resume_bdev)) { 935 if (!IS_ERR(hib_resume_bdev)) {
935 set_blocksize(hib_resume_bdev, PAGE_SIZE); 936 set_blocksize(hib_resume_bdev, PAGE_SIZE);
936 clear_page(swsusp_header); 937 clear_page(swsusp_header);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7b8ec0281548..153562d0b93c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -758,53 +758,58 @@ static void blk_add_trace_rq_complete(void *ignore,
758 * @q: queue the io is for 758 * @q: queue the io is for
759 * @bio: the source bio 759 * @bio: the source bio
760 * @what: the action 760 * @what: the action
761 * @error: error, if any
761 * 762 *
762 * Description: 763 * Description:
763 * Records an action against a bio. Will log the bio offset + size. 764 * Records an action against a bio. Will log the bio offset + size.
764 * 765 *
765 **/ 766 **/
766static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, 767static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
767 u32 what) 768 u32 what, int error)
768{ 769{
769 struct blk_trace *bt = q->blk_trace; 770 struct blk_trace *bt = q->blk_trace;
770 771
771 if (likely(!bt)) 772 if (likely(!bt))
772 return; 773 return;
773 774
775 if (!error && !bio_flagged(bio, BIO_UPTODATE))
776 error = EIO;
777
774 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, 778 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
775 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 779 error, 0, NULL);
776} 780}
777 781
778static void blk_add_trace_bio_bounce(void *ignore, 782static void blk_add_trace_bio_bounce(void *ignore,
779 struct request_queue *q, struct bio *bio) 783 struct request_queue *q, struct bio *bio)
780{ 784{
781 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 785 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
782} 786}
783 787
784static void blk_add_trace_bio_complete(void *ignore, 788static void blk_add_trace_bio_complete(void *ignore,
785 struct request_queue *q, struct bio *bio) 789 struct request_queue *q, struct bio *bio,
790 int error)
786{ 791{
787 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 792 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
788} 793}
789 794
790static void blk_add_trace_bio_backmerge(void *ignore, 795static void blk_add_trace_bio_backmerge(void *ignore,
791 struct request_queue *q, 796 struct request_queue *q,
792 struct bio *bio) 797 struct bio *bio)
793{ 798{
794 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 799 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
795} 800}
796 801
797static void blk_add_trace_bio_frontmerge(void *ignore, 802static void blk_add_trace_bio_frontmerge(void *ignore,
798 struct request_queue *q, 803 struct request_queue *q,
799 struct bio *bio) 804 struct bio *bio)
800{ 805{
801 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 806 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
802} 807}
803 808
804static void blk_add_trace_bio_queue(void *ignore, 809static void blk_add_trace_bio_queue(void *ignore,
805 struct request_queue *q, struct bio *bio) 810 struct request_queue *q, struct bio *bio)
806{ 811{
807 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 812 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
808} 813}
809 814
810static void blk_add_trace_getrq(void *ignore, 815static void blk_add_trace_getrq(void *ignore,
@@ -812,7 +817,7 @@ static void blk_add_trace_getrq(void *ignore,
812 struct bio *bio, int rw) 817 struct bio *bio, int rw)
813{ 818{
814 if (bio) 819 if (bio)
815 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 820 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
816 else { 821 else {
817 struct blk_trace *bt = q->blk_trace; 822 struct blk_trace *bt = q->blk_trace;
818 823
@@ -827,7 +832,7 @@ static void blk_add_trace_sleeprq(void *ignore,
827 struct bio *bio, int rw) 832 struct bio *bio, int rw)
828{ 833{
829 if (bio) 834 if (bio)
830 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 835 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
831 else { 836 else {
832 struct blk_trace *bt = q->blk_trace; 837 struct blk_trace *bt = q->blk_trace;
833 838
@@ -887,7 +892,7 @@ static void blk_add_trace_split(void *ignore,
887} 892}
888 893
889/** 894/**
890 * blk_add_trace_remap - Add a trace for a remap operation 895 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
891 * @ignore: trace callback data parameter (not used) 896 * @ignore: trace callback data parameter (not used)
892 * @q: queue the io is for 897 * @q: queue the io is for
893 * @bio: the source bio 898 * @bio: the source bio
@@ -899,9 +904,9 @@ static void blk_add_trace_split(void *ignore,
899 * it spans a stripe (or similar). Add a trace for that action. 904 * it spans a stripe (or similar). Add a trace for that action.
900 * 905 *
901 **/ 906 **/
902static void blk_add_trace_remap(void *ignore, 907static void blk_add_trace_bio_remap(void *ignore,
903 struct request_queue *q, struct bio *bio, 908 struct request_queue *q, struct bio *bio,
904 dev_t dev, sector_t from) 909 dev_t dev, sector_t from)
905{ 910{
906 struct blk_trace *bt = q->blk_trace; 911 struct blk_trace *bt = q->blk_trace;
907 struct blk_io_trace_remap r; 912 struct blk_io_trace_remap r;
@@ -1016,7 +1021,7 @@ static void blk_register_tracepoints(void)
1016 WARN_ON(ret); 1021 WARN_ON(ret);
1017 ret = register_trace_block_split(blk_add_trace_split, NULL); 1022 ret = register_trace_block_split(blk_add_trace_split, NULL);
1018 WARN_ON(ret); 1023 WARN_ON(ret);
1019 ret = register_trace_block_remap(blk_add_trace_remap, NULL); 1024 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1020 WARN_ON(ret); 1025 WARN_ON(ret);
1021 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1026 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1022 WARN_ON(ret); 1027 WARN_ON(ret);
@@ -1025,7 +1030,7 @@ static void blk_register_tracepoints(void)
1025static void blk_unregister_tracepoints(void) 1030static void blk_unregister_tracepoints(void)
1026{ 1031{
1027 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1032 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1028 unregister_trace_block_remap(blk_add_trace_remap, NULL); 1033 unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1029 unregister_trace_block_split(blk_add_trace_split, NULL); 1034 unregister_trace_block_split(blk_add_trace_split, NULL);
1030 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); 1035 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
1031 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); 1036 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 67ddaaf98c74..b6adcfbf6f48 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1677,7 +1677,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1677 if (S_ISBLK(inode->i_mode)) { 1677 if (S_ISBLK(inode->i_mode)) {
1678 struct block_device *bdev = I_BDEV(inode); 1678 struct block_device *bdev = I_BDEV(inode);
1679 set_blocksize(bdev, p->old_block_size); 1679 set_blocksize(bdev, p->old_block_size);
1680 bd_release(bdev); 1680 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1681 } else { 1681 } else {
1682 mutex_lock(&inode->i_mutex); 1682 mutex_lock(&inode->i_mutex);
1683 inode->i_flags &= ~S_SWAPFILE; 1683 inode->i_flags &= ~S_SWAPFILE;
@@ -1939,7 +1939,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1939 error = -EINVAL; 1939 error = -EINVAL;
1940 if (S_ISBLK(inode->i_mode)) { 1940 if (S_ISBLK(inode->i_mode)) {
1941 bdev = I_BDEV(inode); 1941 bdev = I_BDEV(inode);
1942 error = bd_claim(bdev, sys_swapon); 1942 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1943 sys_swapon);
1943 if (error < 0) { 1944 if (error < 0) {
1944 bdev = NULL; 1945 bdev = NULL;
1945 error = -EINVAL; 1946 error = -EINVAL;
@@ -2136,7 +2137,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2136bad_swap: 2137bad_swap:
2137 if (bdev) { 2138 if (bdev) {
2138 set_blocksize(bdev, p->old_block_size); 2139 set_blocksize(bdev, p->old_block_size);
2139 bd_release(bdev); 2140 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2140 } 2141 }
2141 destroy_swap_extents(p); 2142 destroy_swap_extents(p);
2142 swap_cgroup_swapoff(type); 2143 swap_cgroup_swapoff(type);