aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Bottomley <James.Bottomley@suse.de>2010-12-23 00:29:40 -0500
committerJames Bottomley <James.Bottomley@suse.de>2010-12-23 00:29:40 -0500
commitce82ba80477c2cf3576f1e42c4c377e9995633a7 (patch)
tree952eb80032c38e8a104015c3497bff2d09857af5
parent5f7bb3a439ce51ae8b92ca1dc93b91712224b69a (diff)
parent047b7bdab951b76d19b44388e35cf579ef91e448 (diff)
Merge branch 'block'
-rw-r--r--Documentation/cgroups/blkio-controller.txt27
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c10
-rw-r--r--block/cfq-iosched.c62
-rw-r--r--block/genhd.c548
-rw-r--r--block/ioctl.c5
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_main.c7
-rw-r--r--drivers/block/drbd/drbd_nl.c103
-rw-r--r--drivers/block/loop.c6
-rw-r--r--drivers/block/pktcdvd.c22
-rw-r--r--drivers/cdrom/cdrom.c56
-rw-r--r--drivers/char/raw.c14
-rw-r--r--drivers/md/dm-table.c20
-rw-r--r--drivers/md/dm.c4
-rw-r--r--drivers/md/md.c16
-rw-r--r--drivers/mtd/devices/block2mtd.c10
-rw-r--r--drivers/s390/block/dasd_genhd.c2
-rw-r--r--drivers/scsi/scsi_lib.c13
-rw-r--r--drivers/scsi/sd.c10
-rw-r--r--drivers/scsi/sr.c174
-rw-r--r--drivers/scsi/sr.h3
-rw-r--r--drivers/scsi/sr_ioctl.c2
-rw-r--r--drivers/usb/gadget/storage_common.c7
-rw-r--r--fs/block_dev.c741
-rw-r--r--fs/btrfs/volumes.c28
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/ext3/super.c12
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/jfs/jfs_logmgr.c17
-rw-r--r--fs/logfs/dev_bdev.c7
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/partitions/check.c98
-rw-r--r--fs/reiserfs/journal.c21
-rw-r--r--fs/splice.c43
-rw-r--r--fs/super.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/cdrom.h6
-rw-r--r--include/linux/fs.h26
-rw-r--r--include/linux/genhd.h20
-rw-r--r--include/scsi/scsi.h1
-rw-r--r--include/trace/events/block.h6
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/trace/blktrace.c12
-rw-r--r--mm/swapfile.c7
50 files changed, 1206 insertions, 1038 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index d6da611f8f63..4ed7b5ceeed2 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -89,6 +89,33 @@ Throttling/Upper Limit policy
89 89
90 Limits for writes can be put using blkio.write_bps_device file. 90 Limits for writes can be put using blkio.write_bps_device file.
91 91
92Hierarchical Cgroups
93====================
94- Currently none of the IO control policy supports hierarhical groups. But
95 cgroup interface does allow creation of hierarhical cgroups and internally
96 IO policies treat them as flat hierarchy.
97
98 So this patch will allow creation of cgroup hierarhcy but at the backend
99 everything will be treated as flat. So if somebody created a hierarchy like
100 as follows.
101
102 root
103 / \
104 test1 test2
105 |
106 test3
107
108 CFQ and throttling will practically treat all groups at same level.
109
110 pivot
111 / | \ \
112 root test1 test2 test3
113
114 Down the line we can implement hierarchical accounting/control support
115 and also introduce a new cgroup file "use_hierarchy" which will control
116 whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
117 This is how memory controller also has implemented the things.
118
92Various user visible config options 119Various user visible config options
93=================================== 120===================================
94CONFIG_BLK_CGROUP 121CONFIG_BLK_CGROUP
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b1febd0f6d2a..455768a3eb9e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1452 goto done; 1452 goto done;
1453 } 1453 }
1454 1454
1455 /* Currently we do not support hierarchy deeper than two level (0,1) */
1456 if (parent != cgroup->top_cgroup)
1457 return ERR_PTR(-EPERM);
1458
1459 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 1455 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1460 if (!blkcg) 1456 if (!blkcg)
1461 return ERR_PTR(-ENOMEM); 1457 return ERR_PTR(-ENOMEM);
diff --git a/block/blk-core.c b/block/blk-core.c
index 4ce953f1b390..151070541e21 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,7 +33,7 @@
33 33
34#include "blk.h" 34#include "blk.h"
35 35
36EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); 36EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
37EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 37EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
39 39
@@ -1329,9 +1329,9 @@ static inline void blk_partition_remap(struct bio *bio)
1329 bio->bi_sector += p->start_sect; 1329 bio->bi_sector += p->start_sect;
1330 bio->bi_bdev = bdev->bd_contains; 1330 bio->bi_bdev = bdev->bd_contains;
1331 1331
1332 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, 1332 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1333 bdev->bd_dev, 1333 bdev->bd_dev,
1334 bio->bi_sector - p->start_sect); 1334 bio->bi_sector - p->start_sect);
1335 } 1335 }
1336} 1336}
1337 1337
@@ -1500,7 +1500,7 @@ static inline void __generic_make_request(struct bio *bio)
1500 goto end_io; 1500 goto end_io;
1501 1501
1502 if (old_sector != -1) 1502 if (old_sector != -1)
1503 trace_block_remap(q, bio, old_dev, old_sector); 1503 trace_block_bio_remap(q, bio, old_dev, old_sector);
1504 1504
1505 old_sector = bio->bi_sector; 1505 old_sector = bio->bi_sector;
1506 old_dev = bio->bi_bdev->bd_dev; 1506 old_dev = bio->bi_bdev->bd_dev;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4cd59b0d7c15..c19d015ac5a5 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,7 +87,6 @@ struct cfq_rb_root {
87 unsigned count; 87 unsigned count;
88 unsigned total_weight; 88 unsigned total_weight;
89 u64 min_vdisktime; 89 u64 min_vdisktime;
90 struct rb_node *active;
91}; 90};
92#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 91#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
93 .count = 0, .min_vdisktime = 0, } 92 .count = 0, .min_vdisktime = 0, }
@@ -180,7 +179,6 @@ struct cfq_group {
180 /* group service_tree key */ 179 /* group service_tree key */
181 u64 vdisktime; 180 u64 vdisktime;
182 unsigned int weight; 181 unsigned int weight;
183 bool on_st;
184 182
185 /* number of cfqq currently on this group */ 183 /* number of cfqq currently on this group */
186 int nr_cfqq; 184 int nr_cfqq;
@@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
563 u64 vdisktime = st->min_vdisktime; 561 u64 vdisktime = st->min_vdisktime;
564 struct cfq_group *cfqg; 562 struct cfq_group *cfqg;
565 563
566 if (st->active) {
567 cfqg = rb_entry_cfqg(st->active);
568 vdisktime = cfqg->vdisktime;
569 }
570
571 if (st->left) { 564 if (st->left) {
572 cfqg = rb_entry_cfqg(st->left); 565 cfqg = rb_entry_cfqg(st->left);
573 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); 566 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
@@ -646,11 +639,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
646static inline bool cfq_slice_used(struct cfq_queue *cfqq) 639static inline bool cfq_slice_used(struct cfq_queue *cfqq)
647{ 640{
648 if (cfq_cfqq_slice_new(cfqq)) 641 if (cfq_cfqq_slice_new(cfqq))
649 return 0; 642 return false;
650 if (time_before(jiffies, cfqq->slice_end)) 643 if (time_before(jiffies, cfqq->slice_end))
651 return 0; 644 return false;
652 645
653 return 1; 646 return true;
654} 647}
655 648
656/* 649/*
@@ -869,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
869 struct rb_node *n; 862 struct rb_node *n;
870 863
871 cfqg->nr_cfqq++; 864 cfqg->nr_cfqq++;
872 if (cfqg->on_st) 865 if (!RB_EMPTY_NODE(&cfqg->rb_node))
873 return; 866 return;
874 867
875 /* 868 /*
@@ -885,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
885 cfqg->vdisktime = st->min_vdisktime; 878 cfqg->vdisktime = st->min_vdisktime;
886 879
887 __cfq_group_service_tree_add(st, cfqg); 880 __cfq_group_service_tree_add(st, cfqg);
888 cfqg->on_st = true;
889 st->total_weight += cfqg->weight; 881 st->total_weight += cfqg->weight;
890} 882}
891 883
@@ -894,9 +886,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
894{ 886{
895 struct cfq_rb_root *st = &cfqd->grp_service_tree; 887 struct cfq_rb_root *st = &cfqd->grp_service_tree;
896 888
897 if (st->active == &cfqg->rb_node)
898 st->active = NULL;
899
900 BUG_ON(cfqg->nr_cfqq < 1); 889 BUG_ON(cfqg->nr_cfqq < 1);
901 cfqg->nr_cfqq--; 890 cfqg->nr_cfqq--;
902 891
@@ -905,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
905 return; 894 return;
906 895
907 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); 896 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
908 cfqg->on_st = false;
909 st->total_weight -= cfqg->weight; 897 st->total_weight -= cfqg->weight;
910 if (!RB_EMPTY_NODE(&cfqg->rb_node)) 898 if (!RB_EMPTY_NODE(&cfqg->rb_node))
911 cfq_rb_erase(&cfqg->rb_node, st); 899 cfq_rb_erase(&cfqg->rb_node, st);
@@ -1095,7 +1083,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
1095 if (!atomic_dec_and_test(&cfqg->ref)) 1083 if (!atomic_dec_and_test(&cfqg->ref))
1096 return; 1084 return;
1097 for_each_cfqg_st(cfqg, i, j, st) 1085 for_each_cfqg_st(cfqg, i, j, st)
1098 BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); 1086 BUG_ON(!RB_EMPTY_ROOT(&st->rb));
1099 kfree(cfqg); 1087 kfree(cfqg);
1100} 1088}
1101 1089
@@ -1687,9 +1675,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1687 if (cfqq == cfqd->active_queue) 1675 if (cfqq == cfqd->active_queue)
1688 cfqd->active_queue = NULL; 1676 cfqd->active_queue = NULL;
1689 1677
1690 if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
1691 cfqd->grp_service_tree.active = NULL;
1692
1693 if (cfqd->active_cic) { 1678 if (cfqd->active_cic) {
1694 put_io_context(cfqd->active_cic->ioc); 1679 put_io_context(cfqd->active_cic->ioc);
1695 cfqd->active_cic = NULL; 1680 cfqd->active_cic = NULL;
@@ -1901,10 +1886,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1901 * in their service tree. 1886 * in their service tree.
1902 */ 1887 */
1903 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1888 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
1904 return 1; 1889 return true;
1905 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1890 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1906 service_tree->count); 1891 service_tree->count);
1907 return 0; 1892 return false;
1908} 1893}
1909 1894
1910static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1895static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -2116,12 +2101,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2116 unsigned count; 2101 unsigned count;
2117 struct cfq_rb_root *st; 2102 struct cfq_rb_root *st;
2118 unsigned group_slice; 2103 unsigned group_slice;
2119 2104 enum wl_prio_t original_prio = cfqd->serving_prio;
2120 if (!cfqg) {
2121 cfqd->serving_prio = IDLE_WORKLOAD;
2122 cfqd->workload_expires = jiffies + 1;
2123 return;
2124 }
2125 2105
2126 /* Choose next priority. RT > BE > IDLE */ 2106 /* Choose next priority. RT > BE > IDLE */
2127 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) 2107 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -2134,6 +2114,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2134 return; 2114 return;
2135 } 2115 }
2136 2116
2117 if (original_prio != cfqd->serving_prio)
2118 goto new_workload;
2119
2137 /* 2120 /*
2138 * For RT and BE, we have to choose also the type 2121 * For RT and BE, we have to choose also the type
2139 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload 2122 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
@@ -2148,6 +2131,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2148 if (count && !time_after(jiffies, cfqd->workload_expires)) 2131 if (count && !time_after(jiffies, cfqd->workload_expires))
2149 return; 2132 return;
2150 2133
2134new_workload:
2151 /* otherwise select new workload type */ 2135 /* otherwise select new workload type */
2152 cfqd->serving_type = 2136 cfqd->serving_type =
2153 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); 2137 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
@@ -2199,7 +2183,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
2199 if (RB_EMPTY_ROOT(&st->rb)) 2183 if (RB_EMPTY_ROOT(&st->rb))
2200 return NULL; 2184 return NULL;
2201 cfqg = cfq_rb_first_group(st); 2185 cfqg = cfq_rb_first_group(st);
2202 st->active = &cfqg->rb_node;
2203 update_min_vdisktime(st); 2186 update_min_vdisktime(st);
2204 return cfqg; 2187 return cfqg;
2205} 2188}
@@ -2293,6 +2276,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2293 goto keep_queue; 2276 goto keep_queue;
2294 } 2277 }
2295 2278
2279 /*
2280 * This is a deep seek queue, but the device is much faster than
2281 * the queue can deliver, don't idle
2282 **/
2283 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
2284 (cfq_cfqq_slice_new(cfqq) ||
2285 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
2286 cfq_clear_cfqq_deep(cfqq);
2287 cfq_clear_cfqq_idle_window(cfqq);
2288 }
2289
2296 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { 2290 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2297 cfqq = NULL; 2291 cfqq = NULL;
2298 goto keep_queue; 2292 goto keep_queue;
@@ -2367,12 +2361,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
2367{ 2361{
2368 /* the queue hasn't finished any request, can't estimate */ 2362 /* the queue hasn't finished any request, can't estimate */
2369 if (cfq_cfqq_slice_new(cfqq)) 2363 if (cfq_cfqq_slice_new(cfqq))
2370 return 1; 2364 return true;
2371 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, 2365 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
2372 cfqq->slice_end)) 2366 cfqq->slice_end))
2373 return 1; 2367 return true;
2374 2368
2375 return 0; 2369 return false;
2376} 2370}
2377 2371
2378static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2372static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -3265,6 +3259,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3265 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3259 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
3266 return true; 3260 return true;
3267 3261
3262 /* An idle queue should not be idle now for some reason */
3263 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
3264 return true;
3265
3268 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) 3266 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
3269 return false; 3267 return false;
3270 3268
diff --git a/block/genhd.c b/block/genhd.c
index 5fa2b44a72ff..743317381632 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -18,6 +18,7 @@
18#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
19#include <linux/mutex.h> 19#include <linux/mutex.h>
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/log2.h>
21 22
22#include "blk.h" 23#include "blk.h"
23 24
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr);
35 36
36static struct device_type disk_type; 37static struct device_type disk_type;
37 38
39static void disk_add_events(struct gendisk *disk);
40static void disk_del_events(struct gendisk *disk);
41static void disk_release_events(struct gendisk *disk);
42
38/** 43/**
39 * disk_get_part - get partition 44 * disk_get_part - get partition
40 * @disk: disk to look partition from 45 * @disk: disk to look partition from
@@ -239,7 +244,7 @@ static struct blk_major_name {
239} *major_names[BLKDEV_MAJOR_HASH_SIZE]; 244} *major_names[BLKDEV_MAJOR_HASH_SIZE];
240 245
241/* index in the above - for now: assume no multimajor ranges */ 246/* index in the above - for now: assume no multimajor ranges */
242static inline int major_to_index(int major) 247static inline int major_to_index(unsigned major)
243{ 248{
244 return major % BLKDEV_MAJOR_HASH_SIZE; 249 return major % BLKDEV_MAJOR_HASH_SIZE;
245} 250}
@@ -502,6 +507,64 @@ static int exact_lock(dev_t devt, void *data)
502 return 0; 507 return 0;
503} 508}
504 509
510void register_disk(struct gendisk *disk)
511{
512 struct device *ddev = disk_to_dev(disk);
513 struct block_device *bdev;
514 struct disk_part_iter piter;
515 struct hd_struct *part;
516 int err;
517
518 ddev->parent = disk->driverfs_dev;
519
520 dev_set_name(ddev, disk->disk_name);
521
522 /* delay uevents, until we scanned partition table */
523 dev_set_uevent_suppress(ddev, 1);
524
525 if (device_add(ddev))
526 return;
527 if (!sysfs_deprecated) {
528 err = sysfs_create_link(block_depr, &ddev->kobj,
529 kobject_name(&ddev->kobj));
530 if (err) {
531 device_del(ddev);
532 return;
533 }
534 }
535 disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
536 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
537
538 /* No minors to use for partitions */
539 if (!disk_partitionable(disk))
540 goto exit;
541
542 /* No such device (e.g., media were just removed) */
543 if (!get_capacity(disk))
544 goto exit;
545
546 bdev = bdget_disk(disk, 0);
547 if (!bdev)
548 goto exit;
549
550 bdev->bd_invalidated = 1;
551 err = blkdev_get(bdev, FMODE_READ, NULL);
552 if (err < 0)
553 goto exit;
554 blkdev_put(bdev, FMODE_READ);
555
556exit:
557 /* announce disk after possible partitions are created */
558 dev_set_uevent_suppress(ddev, 0);
559 kobject_uevent(&ddev->kobj, KOBJ_ADD);
560
561 /* announce possible partitions */
562 disk_part_iter_init(&piter, disk, 0);
563 while ((part = disk_part_iter_next(&piter)))
564 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
565 disk_part_iter_exit(&piter);
566}
567
505/** 568/**
506 * add_disk - add partitioning information to kernel list 569 * add_disk - add partitioning information to kernel list
507 * @disk: per-device partitioning information 570 * @disk: per-device partitioning information
@@ -551,18 +614,48 @@ void add_disk(struct gendisk *disk)
551 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 614 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
552 "bdi"); 615 "bdi");
553 WARN_ON(retval); 616 WARN_ON(retval);
554}
555 617
618 disk_add_events(disk);
619}
556EXPORT_SYMBOL(add_disk); 620EXPORT_SYMBOL(add_disk);
557EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
558 621
559void unlink_gendisk(struct gendisk *disk) 622void del_gendisk(struct gendisk *disk)
560{ 623{
624 struct disk_part_iter piter;
625 struct hd_struct *part;
626
627 disk_del_events(disk);
628
629 /* invalidate stuff */
630 disk_part_iter_init(&piter, disk,
631 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
632 while ((part = disk_part_iter_next(&piter))) {
633 invalidate_partition(disk, part->partno);
634 delete_partition(disk, part->partno);
635 }
636 disk_part_iter_exit(&piter);
637
638 invalidate_partition(disk, 0);
639 blk_free_devt(disk_to_dev(disk)->devt);
640 set_capacity(disk, 0);
641 disk->flags &= ~GENHD_FL_UP;
642
561 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 643 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
562 bdi_unregister(&disk->queue->backing_dev_info); 644 bdi_unregister(&disk->queue->backing_dev_info);
563 blk_unregister_queue(disk); 645 blk_unregister_queue(disk);
564 blk_unregister_region(disk_devt(disk), disk->minors); 646 blk_unregister_region(disk_devt(disk), disk->minors);
647
648 part_stat_set_all(&disk->part0, 0);
649 disk->part0.stamp = 0;
650
651 kobject_put(disk->part0.holder_dir);
652 kobject_put(disk->slave_dir);
653 disk->driverfs_dev = NULL;
654 if (!sysfs_deprecated)
655 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
656 device_del(disk_to_dev(disk));
565} 657}
658EXPORT_SYMBOL(del_gendisk);
566 659
567/** 660/**
568 * get_gendisk - get partitioning information for a given device 661 * get_gendisk - get partitioning information for a given device
@@ -735,7 +828,7 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
735 static void *p; 828 static void *p;
736 829
737 p = disk_seqf_start(seqf, pos); 830 p = disk_seqf_start(seqf, pos);
738 if (!IS_ERR(p) && p && !*pos) 831 if (!IS_ERR_OR_NULL(p) && !*pos)
739 seq_puts(seqf, "major minor #blocks name\n\n"); 832 seq_puts(seqf, "major minor #blocks name\n\n");
740 return p; 833 return p;
741} 834}
@@ -1005,6 +1098,7 @@ static void disk_release(struct device *dev)
1005{ 1098{
1006 struct gendisk *disk = dev_to_disk(dev); 1099 struct gendisk *disk = dev_to_disk(dev);
1007 1100
1101 disk_release_events(disk);
1008 kfree(disk->random); 1102 kfree(disk->random);
1009 disk_replace_part_tbl(disk, NULL); 1103 disk_replace_part_tbl(disk, NULL);
1010 free_part_stats(&disk->part0); 1104 free_part_stats(&disk->part0);
@@ -1110,29 +1204,6 @@ static int __init proc_genhd_init(void)
1110module_init(proc_genhd_init); 1204module_init(proc_genhd_init);
1111#endif /* CONFIG_PROC_FS */ 1205#endif /* CONFIG_PROC_FS */
1112 1206
1113static void media_change_notify_thread(struct work_struct *work)
1114{
1115 struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1116 char event[] = "MEDIA_CHANGE=1";
1117 char *envp[] = { event, NULL };
1118
1119 /*
1120 * set enviroment vars to indicate which event this is for
1121 * so that user space will know to go check the media status.
1122 */
1123 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1124 put_device(gd->driverfs_dev);
1125}
1126
1127#if 0
1128void genhd_media_change_notify(struct gendisk *disk)
1129{
1130 get_device(disk->driverfs_dev);
1131 schedule_work(&disk->async_notify);
1132}
1133EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1134#endif /* 0 */
1135
1136dev_t blk_lookup_devt(const char *name, int partno) 1207dev_t blk_lookup_devt(const char *name, int partno)
1137{ 1208{
1138 dev_t devt = MKDEV(0, 0); 1209 dev_t devt = MKDEV(0, 0);
@@ -1198,8 +1269,6 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
1198 disk_to_dev(disk)->class = &block_class; 1269 disk_to_dev(disk)->class = &block_class;
1199 disk_to_dev(disk)->type = &disk_type; 1270 disk_to_dev(disk)->type = &disk_type;
1200 device_initialize(disk_to_dev(disk)); 1271 device_initialize(disk_to_dev(disk));
1201 INIT_WORK(&disk->async_notify,
1202 media_change_notify_thread);
1203 } 1272 }
1204 return disk; 1273 return disk;
1205} 1274}
@@ -1291,3 +1360,422 @@ int invalidate_partition(struct gendisk *disk, int partno)
1291} 1360}
1292 1361
1293EXPORT_SYMBOL(invalidate_partition); 1362EXPORT_SYMBOL(invalidate_partition);
1363
1364/*
1365 * Disk events - monitor disk events like media change and eject request.
1366 */
1367struct disk_events {
1368 struct list_head node; /* all disk_event's */
1369 struct gendisk *disk; /* the associated disk */
1370 spinlock_t lock;
1371
1372 int block; /* event blocking depth */
1373 unsigned int pending; /* events already sent out */
1374 unsigned int clearing; /* events being cleared */
1375
1376 long poll_msecs; /* interval, -1 for default */
1377 struct delayed_work dwork;
1378};
1379
1380static const char *disk_events_strs[] = {
1381 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
1382 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
1383};
1384
1385static char *disk_uevents[] = {
1386 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
1387 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
1388};
1389
1390/* list of all disk_events */
1391static DEFINE_MUTEX(disk_events_mutex);
1392static LIST_HEAD(disk_events);
1393
1394/* disable in-kernel polling by default */
1395static unsigned long disk_events_dfl_poll_msecs = 0;
1396
1397static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
1398{
1399 struct disk_events *ev = disk->ev;
1400 long intv_msecs = 0;
1401
1402 /*
1403 * If device-specific poll interval is set, always use it. If
1404 * the default is being used, poll iff there are events which
1405 * can't be monitored asynchronously.
1406 */
1407 if (ev->poll_msecs >= 0)
1408 intv_msecs = ev->poll_msecs;
1409 else if (disk->events & ~disk->async_events)
1410 intv_msecs = disk_events_dfl_poll_msecs;
1411
1412 return msecs_to_jiffies(intv_msecs);
1413}
1414
1415static void __disk_block_events(struct gendisk *disk, bool sync)
1416{
1417 struct disk_events *ev = disk->ev;
1418 unsigned long flags;
1419 bool cancel;
1420
1421 spin_lock_irqsave(&ev->lock, flags);
1422 cancel = !ev->block++;
1423 spin_unlock_irqrestore(&ev->lock, flags);
1424
1425 if (cancel) {
1426 if (sync)
1427 cancel_delayed_work_sync(&disk->ev->dwork);
1428 else
1429 cancel_delayed_work(&disk->ev->dwork);
1430 }
1431}
1432
1433static void __disk_unblock_events(struct gendisk *disk, bool check_now)
1434{
1435 struct disk_events *ev = disk->ev;
1436 unsigned long intv;
1437 unsigned long flags;
1438
1439 spin_lock_irqsave(&ev->lock, flags);
1440
1441 if (WARN_ON_ONCE(ev->block <= 0))
1442 goto out_unlock;
1443
1444 if (--ev->block)
1445 goto out_unlock;
1446
1447 /*
1448 * Not exactly a latency critical operation, set poll timer
1449 * slack to 25% and kick event check.
1450 */
1451 intv = disk_events_poll_jiffies(disk);
1452 set_timer_slack(&ev->dwork.timer, intv / 4);
1453 if (check_now)
1454 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1455 else if (intv)
1456 queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1457out_unlock:
1458 spin_unlock_irqrestore(&ev->lock, flags);
1459}
1460
1461/**
1462 * disk_block_events - block and flush disk event checking
1463 * @disk: disk to block events for
1464 *
1465 * On return from this function, it is guaranteed that event checking
1466 * isn't in progress and won't happen until unblocked by
1467 * disk_unblock_events(). Events blocking is counted and the actual
1468 * unblocking happens after the matching number of unblocks are done.
1469 *
1470 * Note that this intentionally does not block event checking from
1471 * disk_clear_events().
1472 *
1473 * CONTEXT:
1474 * Might sleep.
1475 */
1476void disk_block_events(struct gendisk *disk)
1477{
1478 if (disk->ev)
1479 __disk_block_events(disk, true);
1480}
1481
1482/**
1483 * disk_unblock_events - unblock disk event checking
1484 * @disk: disk to unblock events for
1485 *
1486 * Undo disk_block_events(). When the block count reaches zero, it
1487 * starts events polling if configured.
1488 *
1489 * CONTEXT:
1490 * Don't care. Safe to call from irq context.
1491 */
1492void disk_unblock_events(struct gendisk *disk)
1493{
1494 if (disk->ev)
1495 __disk_unblock_events(disk, true);
1496}
1497
1498/**
1499 * disk_check_events - schedule immediate event checking
1500 * @disk: disk to check events for
1501 *
1502 * Schedule immediate event checking on @disk if not blocked.
1503 *
1504 * CONTEXT:
1505 * Don't care. Safe to call from irq context.
1506 */
1507void disk_check_events(struct gendisk *disk)
1508{
1509 if (disk->ev) {
1510 __disk_block_events(disk, false);
1511 __disk_unblock_events(disk, true);
1512 }
1513}
1514EXPORT_SYMBOL_GPL(disk_check_events);
1515
1516/**
1517 * disk_clear_events - synchronously check, clear and return pending events
1518 * @disk: disk to fetch and clear events from
1519 * @mask: mask of events to be fetched and clearted
1520 *
1521 * Disk events are synchronously checked and pending events in @mask
1522 * are cleared and returned. This ignores the block count.
1523 *
1524 * CONTEXT:
1525 * Might sleep.
1526 */
1527unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
1528{
1529 const struct block_device_operations *bdops = disk->fops;
1530 struct disk_events *ev = disk->ev;
1531 unsigned int pending;
1532
1533 if (!ev) {
1534 /* for drivers still using the old ->media_changed method */
1535 if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
1536 bdops->media_changed && bdops->media_changed(disk))
1537 return DISK_EVENT_MEDIA_CHANGE;
1538 return 0;
1539 }
1540
1541 /* tell the workfn about the events being cleared */
1542 spin_lock_irq(&ev->lock);
1543 ev->clearing |= mask;
1544 spin_unlock_irq(&ev->lock);
1545
1546 /* uncondtionally schedule event check and wait for it to finish */
1547 __disk_block_events(disk, true);
1548 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1549 flush_delayed_work(&ev->dwork);
1550 __disk_unblock_events(disk, false);
1551
1552 /* then, fetch and clear pending events */
1553 spin_lock_irq(&ev->lock);
1554 WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
1555 pending = ev->pending & mask;
1556 ev->pending &= ~mask;
1557 spin_unlock_irq(&ev->lock);
1558
1559 return pending;
1560}
1561
1562static void disk_events_workfn(struct work_struct *work)
1563{
1564 struct delayed_work *dwork = to_delayed_work(work);
1565 struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
1566 struct gendisk *disk = ev->disk;
1567 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
1568 unsigned int clearing = ev->clearing;
1569 unsigned int events;
1570 unsigned long intv;
1571 int nr_events = 0, i;
1572
1573 /* check events */
1574 events = disk->fops->check_events(disk, clearing);
1575
1576 /* accumulate pending events and schedule next poll if necessary */
1577 spin_lock_irq(&ev->lock);
1578
1579 events &= ~ev->pending;
1580 ev->pending |= events;
1581 ev->clearing &= ~clearing;
1582
1583 intv = disk_events_poll_jiffies(disk);
1584 if (!ev->block && intv)
1585 queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1586
1587 spin_unlock_irq(&ev->lock);
1588
1589 /* tell userland about new events */
1590 for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
1591 if (events & (1 << i))
1592 envp[nr_events++] = disk_uevents[i];
1593
1594 if (nr_events)
1595 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
1596}
1597
1598/*
1599 * A disk events enabled device has the following sysfs nodes under
1600 * its /sys/block/X/ directory.
1601 *
1602 * events : list of all supported events
1603 * events_async : list of events which can be detected w/o polling
1604 * events_poll_msecs : polling interval, 0: disable, -1: system default
1605 */
1606static ssize_t __disk_events_show(unsigned int events, char *buf)
1607{
1608 const char *delim = "";
1609 ssize_t pos = 0;
1610 int i;
1611
1612 for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
1613 if (events & (1 << i)) {
1614 pos += sprintf(buf + pos, "%s%s",
1615 delim, disk_events_strs[i]);
1616 delim = " ";
1617 }
1618 if (pos)
1619 pos += sprintf(buf + pos, "\n");
1620 return pos;
1621}
1622
1623static ssize_t disk_events_show(struct device *dev,
1624 struct device_attribute *attr, char *buf)
1625{
1626 struct gendisk *disk = dev_to_disk(dev);
1627
1628 return __disk_events_show(disk->events, buf);
1629}
1630
1631static ssize_t disk_events_async_show(struct device *dev,
1632 struct device_attribute *attr, char *buf)
1633{
1634 struct gendisk *disk = dev_to_disk(dev);
1635
1636 return __disk_events_show(disk->async_events, buf);
1637}
1638
1639static ssize_t disk_events_poll_msecs_show(struct device *dev,
1640 struct device_attribute *attr,
1641 char *buf)
1642{
1643 struct gendisk *disk = dev_to_disk(dev);
1644
1645 return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
1646}
1647
1648static ssize_t disk_events_poll_msecs_store(struct device *dev,
1649 struct device_attribute *attr,
1650 const char *buf, size_t count)
1651{
1652 struct gendisk *disk = dev_to_disk(dev);
1653 long intv;
1654
1655 if (!count || !sscanf(buf, "%ld", &intv))
1656 return -EINVAL;
1657
1658 if (intv < 0 && intv != -1)
1659 return -EINVAL;
1660
1661 __disk_block_events(disk, true);
1662 disk->ev->poll_msecs = intv;
1663 __disk_unblock_events(disk, true);
1664
1665 return count;
1666}
1667
1668static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
1669static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
1670static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
1671 disk_events_poll_msecs_show,
1672 disk_events_poll_msecs_store);
1673
1674static const struct attribute *disk_events_attrs[] = {
1675 &dev_attr_events.attr,
1676 &dev_attr_events_async.attr,
1677 &dev_attr_events_poll_msecs.attr,
1678 NULL,
1679};
1680
1681/*
1682 * The default polling interval can be specified by the kernel
1683 * parameter block.events_dfl_poll_msecs which defaults to 0
1684 * (disable). This can also be modified runtime by writing to
1685 * /sys/module/block/events_dfl_poll_msecs.
1686 */
1687static int disk_events_set_dfl_poll_msecs(const char *val,
1688 const struct kernel_param *kp)
1689{
1690 struct disk_events *ev;
1691 int ret;
1692
1693 ret = param_set_ulong(val, kp);
1694 if (ret < 0)
1695 return ret;
1696
1697 mutex_lock(&disk_events_mutex);
1698
1699 list_for_each_entry(ev, &disk_events, node)
1700 disk_check_events(ev->disk);
1701
1702 mutex_unlock(&disk_events_mutex);
1703
1704 return 0;
1705}
1706
1707static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
1708 .set = disk_events_set_dfl_poll_msecs,
1709 .get = param_get_ulong,
1710};
1711
1712#undef MODULE_PARAM_PREFIX
1713#define MODULE_PARAM_PREFIX "block."
1714
1715module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
1716 &disk_events_dfl_poll_msecs, 0644);
1717
1718/*
1719 * disk_{add|del|release}_events - initialize and destroy disk_events.
1720 */
1721static void disk_add_events(struct gendisk *disk)
1722{
1723 struct disk_events *ev;
1724
1725 if (!disk->fops->check_events || !(disk->events | disk->async_events))
1726 return;
1727
1728 ev = kzalloc(sizeof(*ev), GFP_KERNEL);
1729 if (!ev) {
1730 pr_warn("%s: failed to initialize events\n", disk->disk_name);
1731 return;
1732 }
1733
1734 if (sysfs_create_files(&disk_to_dev(disk)->kobj,
1735 disk_events_attrs) < 0) {
1736 pr_warn("%s: failed to create sysfs files for events\n",
1737 disk->disk_name);
1738 kfree(ev);
1739 return;
1740 }
1741
1742 disk->ev = ev;
1743
1744 INIT_LIST_HEAD(&ev->node);
1745 ev->disk = disk;
1746 spin_lock_init(&ev->lock);
1747 ev->block = 1;
1748 ev->poll_msecs = -1;
1749 INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
1750
1751 mutex_lock(&disk_events_mutex);
1752 list_add_tail(&ev->node, &disk_events);
1753 mutex_unlock(&disk_events_mutex);
1754
1755 /*
1756 * Block count is initialized to 1 and the following initial
1757 * unblock kicks it into action.
1758 */
1759 __disk_unblock_events(disk, true);
1760}
1761
1762static void disk_del_events(struct gendisk *disk)
1763{
1764 if (!disk->ev)
1765 return;
1766
1767 __disk_block_events(disk, true);
1768
1769 mutex_lock(&disk_events_mutex);
1770 list_del_init(&disk->ev->node);
1771 mutex_unlock(&disk_events_mutex);
1772
1773 sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
1774}
1775
1776static void disk_release_events(struct gendisk *disk)
1777{
1778 /* the block count should be 1 from disk_del_events() */
1779 WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
1780 kfree(disk->ev);
1781}
diff --git a/block/ioctl.c b/block/ioctl.c
index a9a302eba01e..9049d460fa89 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
294 return -EINVAL; 294 return -EINVAL;
295 if (get_user(n, (int __user *) arg)) 295 if (get_user(n, (int __user *) arg))
296 return -EFAULT; 296 return -EFAULT;
297 if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) 297 if (!(mode & FMODE_EXCL) &&
298 blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
298 return -EBUSY; 299 return -EBUSY;
299 ret = set_blocksize(bdev, n); 300 ret = set_blocksize(bdev, n);
300 if (!(mode & FMODE_EXCL)) 301 if (!(mode & FMODE_EXCL))
301 bd_release(bdev); 302 blkdev_put(bdev, mode | FMODE_EXCL);
302 return ret; 303 return ret;
303 case BLKPG: 304 case BLKPG:
304 ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); 305 ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1ea1a34e78b2..3803a0348937 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -911,8 +911,6 @@ struct drbd_md {
911struct drbd_backing_dev { 911struct drbd_backing_dev {
912 struct block_device *backing_bdev; 912 struct block_device *backing_bdev;
913 struct block_device *md_bdev; 913 struct block_device *md_bdev;
914 struct file *lo_file;
915 struct file *md_file;
916 struct drbd_md md; 914 struct drbd_md md;
917 struct disk_conf dc; /* The user provided config... */ 915 struct disk_conf dc; /* The user provided config... */
918 sector_t known_size; /* last known size of that backing device */ 916 sector_t known_size; /* last known size of that backing device */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6be5401d0e88..29cd0dc9fe4f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3372,11 +3372,8 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
3372 if (ldev == NULL) 3372 if (ldev == NULL)
3373 return; 3373 return;
3374 3374
3375 bd_release(ldev->backing_bdev); 3375 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3376 bd_release(ldev->md_bdev); 3376 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3377
3378 fput(ldev->lo_file);
3379 fput(ldev->md_file);
3380 3377
3381 kfree(ldev); 3378 kfree(ldev);
3382} 3379}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 29e5c70e4e26..8cbfaa687d72 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -855,7 +855,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
855 sector_t max_possible_sectors; 855 sector_t max_possible_sectors;
856 sector_t min_md_device_sectors; 856 sector_t min_md_device_sectors;
857 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ 857 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
858 struct inode *inode, *inode2; 858 struct block_device *bdev;
859 struct lru_cache *resync_lru = NULL; 859 struct lru_cache *resync_lru = NULL;
860 union drbd_state ns, os; 860 union drbd_state ns, os;
861 unsigned int max_seg_s; 861 unsigned int max_seg_s;
@@ -907,46 +907,40 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
907 } 907 }
908 } 908 }
909 909
910 nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); 910 bdev = blkdev_get_by_path(nbc->dc.backing_dev,
911 if (IS_ERR(nbc->lo_file)) { 911 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
912 if (IS_ERR(bdev)) {
912 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, 913 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
913 PTR_ERR(nbc->lo_file)); 914 PTR_ERR(bdev));
914 nbc->lo_file = NULL;
915 retcode = ERR_OPEN_DISK; 915 retcode = ERR_OPEN_DISK;
916 goto fail; 916 goto fail;
917 } 917 }
918 nbc->backing_bdev = bdev;
918 919
919 inode = nbc->lo_file->f_dentry->d_inode; 920 /*
920 921 * meta_dev_idx >= 0: external fixed size, possibly multiple
921 if (!S_ISBLK(inode->i_mode)) { 922 * drbd sharing one meta device. TODO in that case, paranoia
922 retcode = ERR_DISK_NOT_BDEV; 923 * check that [md_bdev, meta_dev_idx] is not yet used by some
923 goto fail; 924 * other drbd minor! (if you use drbd.conf + drbdadm, that
924 } 925 * should check it for you already; but if you don't, or
925 926 * someone fooled it, we need to double check here)
926 nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); 927 */
927 if (IS_ERR(nbc->md_file)) { 928 bdev = blkdev_get_by_path(nbc->dc.meta_dev,
929 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
930 (nbc->dc.meta_dev_idx < 0) ?
931 (void *)mdev : (void *)drbd_m_holder);
932 if (IS_ERR(bdev)) {
928 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, 933 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
929 PTR_ERR(nbc->md_file)); 934 PTR_ERR(bdev));
930 nbc->md_file = NULL;
931 retcode = ERR_OPEN_MD_DISK; 935 retcode = ERR_OPEN_MD_DISK;
932 goto fail; 936 goto fail;
933 } 937 }
938 nbc->md_bdev = bdev;
934 939
935 inode2 = nbc->md_file->f_dentry->d_inode; 940 if ((nbc->backing_bdev == nbc->md_bdev) !=
936 941 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
937 if (!S_ISBLK(inode2->i_mode)) { 942 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
938 retcode = ERR_MD_NOT_BDEV; 943 retcode = ERR_MD_IDX_INVALID;
939 goto fail;
940 }
941
942 nbc->backing_bdev = inode->i_bdev;
943 if (bd_claim(nbc->backing_bdev, mdev)) {
944 printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
945 nbc->backing_bdev, mdev,
946 nbc->backing_bdev->bd_holder,
947 nbc->backing_bdev->bd_contains->bd_holder,
948 nbc->backing_bdev->bd_holders);
949 retcode = ERR_BDCLAIM_DISK;
950 goto fail; 944 goto fail;
951 } 945 }
952 946
@@ -955,28 +949,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
955 offsetof(struct bm_extent, lce)); 949 offsetof(struct bm_extent, lce));
956 if (!resync_lru) { 950 if (!resync_lru) {
957 retcode = ERR_NOMEM; 951 retcode = ERR_NOMEM;
958 goto release_bdev_fail; 952 goto fail;
959 }
960
961 /* meta_dev_idx >= 0: external fixed size,
962 * possibly multiple drbd sharing one meta device.
963 * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
964 * not yet used by some other drbd minor!
965 * (if you use drbd.conf + drbdadm,
966 * that should check it for you already; but if you don't, or someone
967 * fooled it, we need to double check here) */
968 nbc->md_bdev = inode2->i_bdev;
969 if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
970 : (void *) drbd_m_holder)) {
971 retcode = ERR_BDCLAIM_MD_DISK;
972 goto release_bdev_fail;
973 }
974
975 if ((nbc->backing_bdev == nbc->md_bdev) !=
976 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
977 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
978 retcode = ERR_MD_IDX_INVALID;
979 goto release_bdev2_fail;
980 } 953 }
981 954
982 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ 955 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
@@ -987,7 +960,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
987 (unsigned long long) drbd_get_max_capacity(nbc), 960 (unsigned long long) drbd_get_max_capacity(nbc),
988 (unsigned long long) nbc->dc.disk_size); 961 (unsigned long long) nbc->dc.disk_size);
989 retcode = ERR_DISK_TO_SMALL; 962 retcode = ERR_DISK_TO_SMALL;
990 goto release_bdev2_fail; 963 goto fail;
991 } 964 }
992 965
993 if (nbc->dc.meta_dev_idx < 0) { 966 if (nbc->dc.meta_dev_idx < 0) {
@@ -1004,7 +977,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1004 dev_warn(DEV, "refusing attach: md-device too small, " 977 dev_warn(DEV, "refusing attach: md-device too small, "
1005 "at least %llu sectors needed for this meta-disk type\n", 978 "at least %llu sectors needed for this meta-disk type\n",
1006 (unsigned long long) min_md_device_sectors); 979 (unsigned long long) min_md_device_sectors);
1007 goto release_bdev2_fail; 980 goto fail;
1008 } 981 }
1009 982
1010 /* Make sure the new disk is big enough 983 /* Make sure the new disk is big enough
@@ -1012,7 +985,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1012 if (drbd_get_max_capacity(nbc) < 985 if (drbd_get_max_capacity(nbc) <
1013 drbd_get_capacity(mdev->this_bdev)) { 986 drbd_get_capacity(mdev->this_bdev)) {
1014 retcode = ERR_DISK_TO_SMALL; 987 retcode = ERR_DISK_TO_SMALL;
1015 goto release_bdev2_fail; 988 goto fail;
1016 } 989 }
1017 990
1018 nbc->known_size = drbd_get_capacity(nbc->backing_bdev); 991 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
@@ -1035,7 +1008,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1035 retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); 1008 retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1036 drbd_resume_io(mdev); 1009 drbd_resume_io(mdev);
1037 if (retcode < SS_SUCCESS) 1010 if (retcode < SS_SUCCESS)
1038 goto release_bdev2_fail; 1011 goto fail;
1039 1012
1040 if (!get_ldev_if_state(mdev, D_ATTACHING)) 1013 if (!get_ldev_if_state(mdev, D_ATTACHING))
1041 goto force_diskless; 1014 goto force_diskless;
@@ -1269,18 +1242,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1269 force_diskless: 1242 force_diskless:
1270 drbd_force_state(mdev, NS(disk, D_FAILED)); 1243 drbd_force_state(mdev, NS(disk, D_FAILED));
1271 drbd_md_sync(mdev); 1244 drbd_md_sync(mdev);
1272 release_bdev2_fail:
1273 if (nbc)
1274 bd_release(nbc->md_bdev);
1275 release_bdev_fail:
1276 if (nbc)
1277 bd_release(nbc->backing_bdev);
1278 fail: 1245 fail:
1279 if (nbc) { 1246 if (nbc) {
1280 if (nbc->lo_file) 1247 if (nbc->backing_bdev)
1281 fput(nbc->lo_file); 1248 blkdev_put(nbc->backing_bdev,
1282 if (nbc->md_file) 1249 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1283 fput(nbc->md_file); 1250 if (nbc->md_bdev)
1251 blkdev_put(nbc->md_bdev,
1252 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1284 kfree(nbc); 1253 kfree(nbc);
1285 } 1254 }
1286 lc_destroy(resync_lru); 1255 lc_destroy(resync_lru);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 7ea0bea2f7e3..44e18c073c44 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -395,11 +395,7 @@ lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
395 struct loop_device *lo = p->lo; 395 struct loop_device *lo = p->lo;
396 struct page *page = buf->page; 396 struct page *page = buf->page;
397 sector_t IV; 397 sector_t IV;
398 int size, ret; 398 int size;
399
400 ret = buf->ops->confirm(pipe, buf);
401 if (unlikely(ret))
402 return ret;
403 399
404 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + 400 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
405 (buf->offset >> 9); 401 (buf->offset >> 9);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 19b3568e9326..77d70eebb6b2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2296,15 +2296,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2296 * so bdget() can't fail. 2296 * so bdget() can't fail.
2297 */ 2297 */
2298 bdget(pd->bdev->bd_dev); 2298 bdget(pd->bdev->bd_dev);
2299 if ((ret = blkdev_get(pd->bdev, FMODE_READ))) 2299 if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd)))
2300 goto out; 2300 goto out;
2301 2301
2302 if ((ret = bd_claim(pd->bdev, pd)))
2303 goto out_putdev;
2304
2305 if ((ret = pkt_get_last_written(pd, &lba))) { 2302 if ((ret = pkt_get_last_written(pd, &lba))) {
2306 printk(DRIVER_NAME": pkt_get_last_written failed\n"); 2303 printk(DRIVER_NAME": pkt_get_last_written failed\n");
2307 goto out_unclaim; 2304 goto out_putdev;
2308 } 2305 }
2309 2306
2310 set_capacity(pd->disk, lba << 2); 2307 set_capacity(pd->disk, lba << 2);
@@ -2314,7 +2311,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2314 q = bdev_get_queue(pd->bdev); 2311 q = bdev_get_queue(pd->bdev);
2315 if (write) { 2312 if (write) {
2316 if ((ret = pkt_open_write(pd))) 2313 if ((ret = pkt_open_write(pd)))
2317 goto out_unclaim; 2314 goto out_putdev;
2318 /* 2315 /*
2319 * Some CDRW drives can not handle writes larger than one packet, 2316 * Some CDRW drives can not handle writes larger than one packet,
2320 * even if the size is a multiple of the packet size. 2317 * even if the size is a multiple of the packet size.
@@ -2329,23 +2326,21 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2329 } 2326 }
2330 2327
2331 if ((ret = pkt_set_segment_merging(pd, q))) 2328 if ((ret = pkt_set_segment_merging(pd, q)))
2332 goto out_unclaim; 2329 goto out_putdev;
2333 2330
2334 if (write) { 2331 if (write) {
2335 if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { 2332 if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
2336 printk(DRIVER_NAME": not enough memory for buffers\n"); 2333 printk(DRIVER_NAME": not enough memory for buffers\n");
2337 ret = -ENOMEM; 2334 ret = -ENOMEM;
2338 goto out_unclaim; 2335 goto out_putdev;
2339 } 2336 }
2340 printk(DRIVER_NAME": %lukB available on disc\n", lba << 1); 2337 printk(DRIVER_NAME": %lukB available on disc\n", lba << 1);
2341 } 2338 }
2342 2339
2343 return 0; 2340 return 0;
2344 2341
2345out_unclaim:
2346 bd_release(pd->bdev);
2347out_putdev: 2342out_putdev:
2348 blkdev_put(pd->bdev, FMODE_READ); 2343 blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
2349out: 2344out:
2350 return ret; 2345 return ret;
2351} 2346}
@@ -2362,8 +2357,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
2362 pkt_lock_door(pd, 0); 2357 pkt_lock_door(pd, 0);
2363 2358
2364 pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); 2359 pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
2365 bd_release(pd->bdev); 2360 blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
2366 blkdev_put(pd->bdev, FMODE_READ);
2367 2361
2368 pkt_shrink_pktlist(pd); 2362 pkt_shrink_pktlist(pd);
2369} 2363}
@@ -2733,7 +2727,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
2733 bdev = bdget(dev); 2727 bdev = bdget(dev);
2734 if (!bdev) 2728 if (!bdev)
2735 return -ENOMEM; 2729 return -ENOMEM;
2736 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); 2730 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
2737 if (ret) 2731 if (ret)
2738 return ret; 2732 return ret;
2739 2733
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index af13c62dc473..14033a36bcd0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1348,7 +1348,10 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
1348 if (!CDROM_CAN(CDC_SELECT_DISC)) 1348 if (!CDROM_CAN(CDC_SELECT_DISC))
1349 return -EDRIVE_CANT_DO_THIS; 1349 return -EDRIVE_CANT_DO_THIS;
1350 1350
1351 (void) cdi->ops->media_changed(cdi, slot); 1351 if (cdi->ops->check_events)
1352 cdi->ops->check_events(cdi, 0, slot);
1353 else
1354 cdi->ops->media_changed(cdi, slot);
1352 1355
1353 if (slot == CDSL_NONE) { 1356 if (slot == CDSL_NONE) {
1354 /* set media changed bits, on both queues */ 1357 /* set media changed bits, on both queues */
@@ -1392,6 +1395,42 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
1392 return slot; 1395 return slot;
1393} 1396}
1394 1397
1398/*
1399 * As cdrom implements an extra ioctl consumer for media changed
1400 * event, it needs to buffer ->check_events() output, such that event
1401 * is not lost for both the usual VFS and ioctl paths.
1402 * cdi->{vfs|ioctl}_events are used to buffer pending events for each
1403 * path.
1404 *
1405 * XXX: Locking is non-existent. cdi->ops->check_events() can be
1406 * called in parallel and buffering fields are accessed without any
1407 * exclusion. The original media_changed code had the same problem.
1408 * It might be better to simply deprecate CDROM_MEDIA_CHANGED ioctl
1409 * and remove this cruft altogether. It doesn't have much usefulness
1410 * at this point.
1411 */
1412static void cdrom_update_events(struct cdrom_device_info *cdi,
1413 unsigned int clearing)
1414{
1415 unsigned int events;
1416
1417 events = cdi->ops->check_events(cdi, clearing, CDSL_CURRENT);
1418 cdi->vfs_events |= events;
1419 cdi->ioctl_events |= events;
1420}
1421
1422unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
1423 unsigned int clearing)
1424{
1425 unsigned int events;
1426
1427 cdrom_update_events(cdi, clearing);
1428 events = cdi->vfs_events;
1429 cdi->vfs_events = 0;
1430 return events;
1431}
1432EXPORT_SYMBOL(cdrom_check_events);
1433
1395/* We want to make media_changed accessible to the user through an 1434/* We want to make media_changed accessible to the user through an
1396 * ioctl. The main problem now is that we must double-buffer the 1435 * ioctl. The main problem now is that we must double-buffer the
1397 * low-level implementation, to assure that the VFS and the user both 1436 * low-level implementation, to assure that the VFS and the user both
@@ -1403,15 +1442,26 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
1403{ 1442{
1404 unsigned int mask = (1 << (queue & 1)); 1443 unsigned int mask = (1 << (queue & 1));
1405 int ret = !!(cdi->mc_flags & mask); 1444 int ret = !!(cdi->mc_flags & mask);
1445 bool changed;
1406 1446
1407 if (!CDROM_CAN(CDC_MEDIA_CHANGED)) 1447 if (!CDROM_CAN(CDC_MEDIA_CHANGED))
1408 return ret; 1448 return ret;
1449
1409 /* changed since last call? */ 1450 /* changed since last call? */
1410 if (cdi->ops->media_changed(cdi, CDSL_CURRENT)) { 1451 if (cdi->ops->check_events) {
1452 BUG_ON(!queue); /* shouldn't be called from VFS path */
1453 cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE);
1454 changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE;
1455 cdi->ioctl_events = 0;
1456 } else
1457 changed = cdi->ops->media_changed(cdi, CDSL_CURRENT);
1458
1459 if (changed) {
1411 cdi->mc_flags = 0x3; /* set bit on both queues */ 1460 cdi->mc_flags = 0x3; /* set bit on both queues */
1412 ret |= 1; 1461 ret |= 1;
1413 cdi->media_written = 0; 1462 cdi->media_written = 0;
1414 } 1463 }
1464
1415 cdi->mc_flags &= ~mask; /* clear bit */ 1465 cdi->mc_flags &= ~mask; /* clear bit */
1416 return ret; 1466 return ret;
1417} 1467}
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index bfe25ea9766b..b4b9d5a47885 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -65,15 +65,12 @@ static int raw_open(struct inode *inode, struct file *filp)
65 if (!bdev) 65 if (!bdev)
66 goto out; 66 goto out;
67 igrab(bdev->bd_inode); 67 igrab(bdev->bd_inode);
68 err = blkdev_get(bdev, filp->f_mode); 68 err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open);
69 if (err) 69 if (err)
70 goto out; 70 goto out;
71 err = bd_claim(bdev, raw_open);
72 if (err)
73 goto out1;
74 err = set_blocksize(bdev, bdev_logical_block_size(bdev)); 71 err = set_blocksize(bdev, bdev_logical_block_size(bdev));
75 if (err) 72 if (err)
76 goto out2; 73 goto out1;
77 filp->f_flags |= O_DIRECT; 74 filp->f_flags |= O_DIRECT;
78 filp->f_mapping = bdev->bd_inode->i_mapping; 75 filp->f_mapping = bdev->bd_inode->i_mapping;
79 if (++raw_devices[minor].inuse == 1) 76 if (++raw_devices[minor].inuse == 1)
@@ -83,10 +80,8 @@ static int raw_open(struct inode *inode, struct file *filp)
83 mutex_unlock(&raw_mutex); 80 mutex_unlock(&raw_mutex);
84 return 0; 81 return 0;
85 82
86out2:
87 bd_release(bdev);
88out1: 83out1:
89 blkdev_put(bdev, filp->f_mode); 84 blkdev_put(bdev, filp->f_mode | FMODE_EXCL);
90out: 85out:
91 mutex_unlock(&raw_mutex); 86 mutex_unlock(&raw_mutex);
92 return err; 87 return err;
@@ -110,8 +105,7 @@ static int raw_release(struct inode *inode, struct file *filp)
110 } 105 }
111 mutex_unlock(&raw_mutex); 106 mutex_unlock(&raw_mutex);
112 107
113 bd_release(bdev); 108 blkdev_put(bdev, filp->f_mode | FMODE_EXCL);
114 blkdev_put(bdev, filp->f_mode);
115 return 0; 109 return 0;
116} 110}
117 111
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 90267f8d64ee..67150c32986c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -325,15 +325,18 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev,
325 325
326 BUG_ON(d->dm_dev.bdev); 326 BUG_ON(d->dm_dev.bdev);
327 327
328 bdev = open_by_devnum(dev, d->dm_dev.mode); 328 bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
329 if (IS_ERR(bdev)) 329 if (IS_ERR(bdev))
330 return PTR_ERR(bdev); 330 return PTR_ERR(bdev);
331 r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); 331
332 if (r) 332 r = bd_link_disk_holder(bdev, dm_disk(md));
333 blkdev_put(bdev, d->dm_dev.mode); 333 if (r) {
334 else 334 blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
335 d->dm_dev.bdev = bdev; 335 return r;
336 return r; 336 }
337
338 d->dm_dev.bdev = bdev;
339 return 0;
337} 340}
338 341
339/* 342/*
@@ -344,8 +347,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
344 if (!d->dm_dev.bdev) 347 if (!d->dm_dev.bdev)
345 return; 348 return;
346 349
347 bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); 350 blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
348 blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
349 d->dm_dev.bdev = NULL; 351 d->dm_dev.bdev = NULL;
350} 352}
351 353
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7cb1352f7e7a..0a2b5516bc21 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -990,8 +990,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
990 if (r == DM_MAPIO_REMAPPED) { 990 if (r == DM_MAPIO_REMAPPED) {
991 /* the bio has been remapped so dispatch it */ 991 /* the bio has been remapped so dispatch it */
992 992
993 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, 993 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
994 tio->io->bio->bi_bdev->bd_dev, sector); 994 tio->io->bio->bi_bdev->bd_dev, sector);
995 995
996 generic_make_request(clone); 996 generic_make_request(clone);
997 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 997 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e71c5fa527f5..982fc347ebe4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1879,7 +1879,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1879 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); 1879 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
1880 1880
1881 list_add_rcu(&rdev->same_set, &mddev->disks); 1881 list_add_rcu(&rdev->same_set, &mddev->disks);
1882 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); 1882 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
1883 1883
1884 /* May as well allow recovery to be retried once */ 1884 /* May as well allow recovery to be retried once */
1885 mddev->recovery_disabled = 0; 1885 mddev->recovery_disabled = 0;
@@ -1906,7 +1906,6 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1906 MD_BUG(); 1906 MD_BUG();
1907 return; 1907 return;
1908 } 1908 }
1909 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1910 list_del_rcu(&rdev->same_set); 1909 list_del_rcu(&rdev->same_set);
1911 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); 1910 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1912 rdev->mddev = NULL; 1911 rdev->mddev = NULL;
@@ -1934,19 +1933,13 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1934 struct block_device *bdev; 1933 struct block_device *bdev;
1935 char b[BDEVNAME_SIZE]; 1934 char b[BDEVNAME_SIZE];
1936 1935
1937 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 1936 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1937 shared ? (mdk_rdev_t *)lock_rdev : rdev);
1938 if (IS_ERR(bdev)) { 1938 if (IS_ERR(bdev)) {
1939 printk(KERN_ERR "md: could not open %s.\n", 1939 printk(KERN_ERR "md: could not open %s.\n",
1940 __bdevname(dev, b)); 1940 __bdevname(dev, b));
1941 return PTR_ERR(bdev); 1941 return PTR_ERR(bdev);
1942 } 1942 }
1943 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1944 if (err) {
1945 printk(KERN_ERR "md: could not bd_claim %s.\n",
1946 bdevname(bdev, b));
1947 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1948 return err;
1949 }
1950 if (!shared) 1943 if (!shared)
1951 set_bit(AllReserved, &rdev->flags); 1944 set_bit(AllReserved, &rdev->flags);
1952 rdev->bdev = bdev; 1945 rdev->bdev = bdev;
@@ -1959,8 +1952,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
1959 rdev->bdev = NULL; 1952 rdev->bdev = NULL;
1960 if (!bdev) 1953 if (!bdev)
1961 MD_BUG(); 1954 MD_BUG();
1962 bd_release(bdev); 1955 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1963 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1964} 1956}
1965 1957
1966void md_autodetect_dev(dev_t dev); 1958void md_autodetect_dev(dev_t dev);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 2cf0cc6a4189..f29a6f9df6e7 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -224,7 +224,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
224 if (dev->blkdev) { 224 if (dev->blkdev) {
225 invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 225 invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping,
226 0, -1); 226 0, -1);
227 close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE); 227 blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
228 } 228 }
229 229
230 kfree(dev); 230 kfree(dev);
@@ -234,6 +234,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
234/* FIXME: ensure that mtd->size % erase_size == 0 */ 234/* FIXME: ensure that mtd->size % erase_size == 0 */
235static struct block2mtd_dev *add_device(char *devname, int erase_size) 235static struct block2mtd_dev *add_device(char *devname, int erase_size)
236{ 236{
237 const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
237 struct block_device *bdev; 238 struct block_device *bdev;
238 struct block2mtd_dev *dev; 239 struct block2mtd_dev *dev;
239 char *name; 240 char *name;
@@ -246,7 +247,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
246 return NULL; 247 return NULL;
247 248
248 /* Get a handle on the device */ 249 /* Get a handle on the device */
249 bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL); 250 bdev = blkdev_get_by_path(devname, mode, dev);
250#ifndef MODULE 251#ifndef MODULE
251 if (IS_ERR(bdev)) { 252 if (IS_ERR(bdev)) {
252 253
@@ -254,9 +255,8 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
254 to resolve the device name by other means. */ 255 to resolve the device name by other means. */
255 256
256 dev_t devt = name_to_dev_t(devname); 257 dev_t devt = name_to_dev_t(devname);
257 if (devt) { 258 if (devt)
258 bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ); 259 bdev = blkdev_get_by_dev(devt, mode, dev);
259 }
260 } 260 }
261#endif 261#endif
262 262
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 30a1ca3d08b7..5505bc07e1e7 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -103,7 +103,7 @@ int dasd_scan_partitions(struct dasd_block *block)
103 struct block_device *bdev; 103 struct block_device *bdev;
104 104
105 bdev = bdget_disk(block->gdp, 0); 105 bdev = bdget_disk(block->gdp, 0);
106 if (!bdev || blkdev_get(bdev, FMODE_READ) < 0) 106 if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0)
107 return -ENODEV; 107 return -ENODEV;
108 /* 108 /*
109 * See fs/partition/check.c:register_disk,rescan_partitions 109 * See fs/partition/check.c:register_disk,rescan_partitions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0ed7a66cdded..b702b446d151 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1978,8 +1978,7 @@ EXPORT_SYMBOL(scsi_mode_sense);
1978 * in. 1978 * in.
1979 * 1979 *
1980 * Returns zero if unsuccessful or an error if TUR failed. For 1980 * Returns zero if unsuccessful or an error if TUR failed. For
1981 * removable media, a return of NOT_READY or UNIT_ATTENTION is 1981 * removable media, UNIT_ATTENTION sets ->changed flag.
1982 * translated to success, with the ->changed flag updated.
1983 **/ 1982 **/
1984int 1983int
1985scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, 1984scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
@@ -2006,16 +2005,6 @@ scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
2006 } while (scsi_sense_valid(sshdr) && 2005 } while (scsi_sense_valid(sshdr) &&
2007 sshdr->sense_key == UNIT_ATTENTION && --retries); 2006 sshdr->sense_key == UNIT_ATTENTION && --retries);
2008 2007
2009 if (!sshdr)
2010 /* could not allocate sense buffer, so can't process it */
2011 return result;
2012
2013 if (sdev->removable && scsi_sense_valid(sshdr) &&
2014 (sshdr->sense_key == UNIT_ATTENTION ||
2015 sshdr->sense_key == NOT_READY)) {
2016 sdev->changed = 1;
2017 result = 0;
2018 }
2019 if (!sshdr_external) 2008 if (!sshdr_external)
2020 kfree(sshdr); 2009 kfree(sshdr);
2021 return result; 2010 return result;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 365024b0c407..b65e65aa07eb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1043,15 +1043,7 @@ static int sd_media_changed(struct gendisk *disk)
1043 sshdr); 1043 sshdr);
1044 } 1044 }
1045 1045
1046 /* 1046 if (retval) {
1047 * Unable to test, unit probably not ready. This usually
1048 * means there is no disc in the drive. Mark as changed,
1049 * and we will figure it out later once the drive is
1050 * available again.
1051 */
1052 if (retval || (scsi_sense_valid(sshdr) &&
1053 /* 0x3a is medium not present */
1054 sshdr->asc == 0x3a)) {
1055 set_media_not_present(sdkp); 1047 set_media_not_present(sdkp);
1056 goto out; 1048 goto out;
1057 } 1049 }
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index d7b383c96d5d..be6baf8ad704 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -104,14 +104,15 @@ static void sr_release(struct cdrom_device_info *);
104static void get_sectorsize(struct scsi_cd *); 104static void get_sectorsize(struct scsi_cd *);
105static void get_capabilities(struct scsi_cd *); 105static void get_capabilities(struct scsi_cd *);
106 106
107static int sr_media_change(struct cdrom_device_info *, int); 107static unsigned int sr_check_events(struct cdrom_device_info *cdi,
108 unsigned int clearing, int slot);
108static int sr_packet(struct cdrom_device_info *, struct packet_command *); 109static int sr_packet(struct cdrom_device_info *, struct packet_command *);
109 110
110static struct cdrom_device_ops sr_dops = { 111static struct cdrom_device_ops sr_dops = {
111 .open = sr_open, 112 .open = sr_open,
112 .release = sr_release, 113 .release = sr_release,
113 .drive_status = sr_drive_status, 114 .drive_status = sr_drive_status,
114 .media_changed = sr_media_change, 115 .check_events = sr_check_events,
115 .tray_move = sr_tray_move, 116 .tray_move = sr_tray_move,
116 .lock_door = sr_lock_door, 117 .lock_door = sr_lock_door,
117 .select_speed = sr_select_speed, 118 .select_speed = sr_select_speed,
@@ -165,90 +166,96 @@ static void scsi_cd_put(struct scsi_cd *cd)
165 mutex_unlock(&sr_ref_mutex); 166 mutex_unlock(&sr_ref_mutex);
166} 167}
167 168
168/* identical to scsi_test_unit_ready except that it doesn't 169static unsigned int sr_get_events(struct scsi_device *sdev)
169 * eat the NOT_READY returns for removable media */
170int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr)
171{ 170{
172 int retries = MAX_RETRIES; 171 u8 buf[8];
173 int the_result; 172 u8 cmd[] = { GET_EVENT_STATUS_NOTIFICATION,
174 u8 cmd[] = {TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 173 1, /* polled */
174 0, 0, /* reserved */
175 1 << 4, /* notification class: media */
176 0, 0, /* reserved */
177 0, sizeof(buf), /* allocation length */
178 0, /* control */
179 };
180 struct event_header *eh = (void *)buf;
181 struct media_event_desc *med = (void *)(buf + 4);
182 struct scsi_sense_hdr sshdr;
183 int result;
175 184
176 /* issue TEST_UNIT_READY until the initial startup UNIT_ATTENTION 185 result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, sizeof(buf),
177 * conditions are gone, or a timeout happens 186 &sshdr, SR_TIMEOUT, MAX_RETRIES, NULL);
178 */ 187 if (scsi_sense_valid(&sshdr) && sshdr.sense_key == UNIT_ATTENTION)
179 do { 188 return DISK_EVENT_MEDIA_CHANGE;
180 the_result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 189
181 0, sshdr, SR_TIMEOUT, 190 if (result || be16_to_cpu(eh->data_len) < sizeof(*med))
182 retries--, NULL); 191 return 0;
183 if (scsi_sense_valid(sshdr) && 192
184 sshdr->sense_key == UNIT_ATTENTION) 193 if (eh->nea || eh->notification_class != 0x4)
185 sdev->changed = 1; 194 return 0;
186 195
187 } while (retries > 0 && 196 if (med->media_event_code == 1)
188 (!scsi_status_is_good(the_result) || 197 return DISK_EVENT_EJECT_REQUEST;
189 (scsi_sense_valid(sshdr) && 198 else if (med->media_event_code == 2)
190 sshdr->sense_key == UNIT_ATTENTION))); 199 return DISK_EVENT_MEDIA_CHANGE;
191 return the_result; 200 return 0;
192} 201}
193 202
194/* 203/*
195 * This function checks to see if the media has been changed in the 204 * This function checks to see if the media has been changed or eject
196 * CDROM drive. It is possible that we have already sensed a change, 205 * button has been pressed. It is possible that we have already
197 * or the drive may have sensed one and not yet reported it. We must 206 * sensed a change, or the drive may have sensed one and not yet
198 * be ready for either case. This function always reports the current 207 * reported it. The past events are accumulated in sdev->changed and
199 * value of the changed bit. If flag is 0, then the changed bit is reset. 208 * returned together with the current state.
200 * This function could be done as an ioctl, but we would need to have
201 * an inode for that to work, and we do not always have one.
202 */ 209 */
203 210static unsigned int sr_check_events(struct cdrom_device_info *cdi,
204static int sr_media_change(struct cdrom_device_info *cdi, int slot) 211 unsigned int clearing, int slot)
205{ 212{
206 struct scsi_cd *cd = cdi->handle; 213 struct scsi_cd *cd = cdi->handle;
207 int retval; 214 bool last_present;
208 struct scsi_sense_hdr *sshdr; 215 struct scsi_sense_hdr sshdr;
216 unsigned int events;
217 int ret;
209 218
210 if (CDSL_CURRENT != slot) { 219 /* no changer support */
211 /* no changer support */ 220 if (CDSL_CURRENT != slot)
212 return -EINVAL; 221 return 0;
213 }
214 222
215 sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL); 223 events = sr_get_events(cd->device);
216 retval = sr_test_unit_ready(cd->device, sshdr); 224 /*
217 if (retval || (scsi_sense_valid(sshdr) && 225 * GET_EVENT_STATUS_NOTIFICATION is enough unless MEDIA_CHANGE
218 /* 0x3a is medium not present */ 226 * is being cleared. Note that there are devices which hang
219 sshdr->asc == 0x3a)) { 227 * if asked to execute TUR repeatedly.
220 /* Media not present or unable to test, unit probably not 228 */
221 * ready. This usually means there is no disc in the drive. 229 if (!(clearing & DISK_EVENT_MEDIA_CHANGE))
222 * Mark as changed, and we will figure it out later once 230 goto skip_tur;
223 * the drive is available again. 231
224 */ 232 /* let's see whether the media is there with TUR */
225 cd->device->changed = 1; 233 last_present = cd->media_present;
226 /* This will force a flush, if called from check_disk_change */ 234 ret = scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
227 retval = 1; 235
228 goto out; 236 /*
229 }; 237 * Media is considered to be present if TUR succeeds or fails with
238 * sense data indicating something other than media-not-present
239 * (ASC 0x3a).
240 */
241 cd->media_present = scsi_status_is_good(ret) ||
242 (scsi_sense_valid(&sshdr) && sshdr.asc != 0x3a);
230 243
231 retval = cd->device->changed; 244 if (last_present != cd->media_present)
232 cd->device->changed = 0; 245 events |= DISK_EVENT_MEDIA_CHANGE;
233 /* If the disk changed, the capacity will now be different, 246skip_tur:
234 * so we force a re-read of this information */ 247 if (cd->device->changed) {
235 if (retval) { 248 events |= DISK_EVENT_MEDIA_CHANGE;
236 /* check multisession offset etc */ 249 cd->device->changed = 0;
237 sr_cd_check(cdi);
238 get_sectorsize(cd);
239 } 250 }
240 251
241out: 252 /* for backward compatibility */
242 /* Notify userspace, that media has changed. */ 253 if (events & DISK_EVENT_MEDIA_CHANGE)
243 if (retval != cd->previous_state)
244 sdev_evt_send_simple(cd->device, SDEV_EVT_MEDIA_CHANGE, 254 sdev_evt_send_simple(cd->device, SDEV_EVT_MEDIA_CHANGE,
245 GFP_KERNEL); 255 GFP_KERNEL);
246 cd->previous_state = retval; 256 return events;
247 kfree(sshdr);
248
249 return retval;
250} 257}
251 258
252/* 259/*
253 * sr_done is the interrupt routine for the device driver. 260 * sr_done is the interrupt routine for the device driver.
254 * 261 *
@@ -533,10 +540,25 @@ out:
533 return ret; 540 return ret;
534} 541}
535 542
536static int sr_block_media_changed(struct gendisk *disk) 543static unsigned int sr_block_check_events(struct gendisk *disk,
544 unsigned int clearing)
537{ 545{
538 struct scsi_cd *cd = scsi_cd(disk); 546 struct scsi_cd *cd = scsi_cd(disk);
539 return cdrom_media_changed(&cd->cdi); 547 return cdrom_check_events(&cd->cdi, clearing);
548}
549
550static int sr_block_revalidate_disk(struct gendisk *disk)
551{
552 struct scsi_cd *cd = scsi_cd(disk);
553 struct scsi_sense_hdr sshdr;
554
555 /* if the unit is not ready, nothing more to do */
556 if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
557 return 0;
558
559 sr_cd_check(&cd->cdi);
560 get_sectorsize(cd);
561 return 0;
540} 562}
541 563
542static const struct block_device_operations sr_bdops = 564static const struct block_device_operations sr_bdops =
@@ -545,7 +567,8 @@ static const struct block_device_operations sr_bdops =
545 .open = sr_block_open, 567 .open = sr_block_open,
546 .release = sr_block_release, 568 .release = sr_block_release,
547 .ioctl = sr_block_ioctl, 569 .ioctl = sr_block_ioctl,
548 .media_changed = sr_block_media_changed, 570 .check_events = sr_block_check_events,
571 .revalidate_disk = sr_block_revalidate_disk,
549 /* 572 /*
550 * No compat_ioctl for now because sr_block_ioctl never 573 * No compat_ioctl for now because sr_block_ioctl never
551 * seems to pass arbitary ioctls down to host drivers. 574 * seems to pass arbitary ioctls down to host drivers.
@@ -618,6 +641,7 @@ static int sr_probe(struct device *dev)
618 sprintf(disk->disk_name, "sr%d", minor); 641 sprintf(disk->disk_name, "sr%d", minor);
619 disk->fops = &sr_bdops; 642 disk->fops = &sr_bdops;
620 disk->flags = GENHD_FL_CD; 643 disk->flags = GENHD_FL_CD;
644 disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
621 645
622 blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); 646 blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
623 647
@@ -627,7 +651,7 @@ static int sr_probe(struct device *dev)
627 cd->disk = disk; 651 cd->disk = disk;
628 cd->capacity = 0x1fffff; 652 cd->capacity = 0x1fffff;
629 cd->device->changed = 1; /* force recheck CD type */ 653 cd->device->changed = 1; /* force recheck CD type */
630 cd->previous_state = 1; 654 cd->media_present = 1;
631 cd->use = 1; 655 cd->use = 1;
632 cd->readcd_known = 0; 656 cd->readcd_known = 0;
633 cd->readcd_cdda = 0; 657 cd->readcd_cdda = 0;
@@ -780,7 +804,7 @@ static void get_capabilities(struct scsi_cd *cd)
780 } 804 }
781 805
782 /* eat unit attentions */ 806 /* eat unit attentions */
783 sr_test_unit_ready(cd->device, &sshdr); 807 scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
784 808
785 /* ask for mode page 0x2a */ 809 /* ask for mode page 0x2a */
786 rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128, 810 rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128,
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 1e144dfdbd4b..e036f1dc83c8 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -40,7 +40,7 @@ typedef struct scsi_cd {
40 unsigned xa_flag:1; /* CD has XA sectors ? */ 40 unsigned xa_flag:1; /* CD has XA sectors ? */
41 unsigned readcd_known:1; /* drive supports READ_CD (0xbe) */ 41 unsigned readcd_known:1; /* drive supports READ_CD (0xbe) */
42 unsigned readcd_cdda:1; /* reading audio data using READ_CD */ 42 unsigned readcd_cdda:1; /* reading audio data using READ_CD */
43 unsigned previous_state:1; /* media has changed */ 43 unsigned media_present:1; /* media is present */
44 struct cdrom_device_info cdi; 44 struct cdrom_device_info cdi;
45 /* We hold gendisk and scsi_device references on probe and use 45 /* We hold gendisk and scsi_device references on probe and use
46 * the refs on this kref to decide when to release them */ 46 * the refs on this kref to decide when to release them */
@@ -61,7 +61,6 @@ int sr_select_speed(struct cdrom_device_info *cdi, int speed);
61int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *); 61int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *);
62 62
63int sr_is_xa(Scsi_CD *); 63int sr_is_xa(Scsi_CD *);
64int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr);
65 64
66/* sr_vendor.c */ 65/* sr_vendor.c */
67void sr_vendor_init(Scsi_CD *); 66void sr_vendor_init(Scsi_CD *);
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 3cd8ffbad577..8be30554119b 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -307,7 +307,7 @@ int sr_drive_status(struct cdrom_device_info *cdi, int slot)
307 /* we have no changer support */ 307 /* we have no changer support */
308 return -EINVAL; 308 return -EINVAL;
309 } 309 }
310 if (0 == sr_test_unit_ready(cd->device, &sshdr)) 310 if (!scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
311 return CDS_DISC_OK; 311 return CDS_DISC_OK;
312 312
313 /* SK/ASC/ASCQ of 2/4/1 means "unit is becoming ready" */ 313 /* SK/ASC/ASCQ of 2/4/1 means "unit is becoming ready" */
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 3b513bafaf2a..b015561fd602 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -543,7 +543,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
543 ro = curlun->initially_ro; 543 ro = curlun->initially_ro;
544 if (!ro) { 544 if (!ro) {
545 filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0); 545 filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0);
546 if (-EROFS == PTR_ERR(filp)) 546 if (PTR_ERR(filp) == -EROFS || PTR_ERR(filp) == -EACCES)
547 ro = 1; 547 ro = 1;
548 } 548 }
549 if (ro) 549 if (ro)
@@ -558,10 +558,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
558 558
559 if (filp->f_path.dentry) 559 if (filp->f_path.dentry)
560 inode = filp->f_path.dentry->d_inode; 560 inode = filp->f_path.dentry->d_inode;
561 if (inode && S_ISBLK(inode->i_mode)) { 561 if (!inode || (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
562 if (bdev_read_only(inode->i_bdev))
563 ro = 1;
564 } else if (!inode || !S_ISREG(inode->i_mode)) {
565 LINFO(curlun, "invalid file type: %s\n", filename); 562 LINFO(curlun, "invalid file type: %s\n", filename);
566 goto out; 563 goto out;
567 } 564 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4230252fd689..dab021db6c0b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -425,9 +425,6 @@ static void init_once(void *foo)
425 mutex_init(&bdev->bd_mutex); 425 mutex_init(&bdev->bd_mutex);
426 INIT_LIST_HEAD(&bdev->bd_inodes); 426 INIT_LIST_HEAD(&bdev->bd_inodes);
427 INIT_LIST_HEAD(&bdev->bd_list); 427 INIT_LIST_HEAD(&bdev->bd_list);
428#ifdef CONFIG_SYSFS
429 INIT_LIST_HEAD(&bdev->bd_holder_list);
430#endif
431 inode_init_once(&ei->vfs_inode); 428 inode_init_once(&ei->vfs_inode);
432 /* Initialize mutex for freeze. */ 429 /* Initialize mutex for freeze. */
433 mutex_init(&bdev->bd_fsfreeze_mutex); 430 mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -662,7 +659,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
662 else if (bdev->bd_contains == bdev) 659 else if (bdev->bd_contains == bdev)
663 return true; /* is a whole device which isn't held */ 660 return true; /* is a whole device which isn't held */
664 661
665 else if (whole->bd_holder == bd_claim) 662 else if (whole->bd_holder == bd_may_claim)
666 return true; /* is a partition of a device that is being partitioned */ 663 return true; /* is a partition of a device that is being partitioned */
667 else if (whole->bd_holder != NULL) 664 else if (whole->bd_holder != NULL)
668 return false; /* is a partition of a held device */ 665 return false; /* is a partition of a held device */
@@ -774,439 +771,87 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
774 } 771 }
775} 772}
776 773
777/* releases bdev_lock */
778static void __bd_abort_claiming(struct block_device *whole, void *holder)
779{
780 BUG_ON(whole->bd_claiming != holder);
781 whole->bd_claiming = NULL;
782 wake_up_bit(&whole->bd_claiming, 0);
783
784 spin_unlock(&bdev_lock);
785 bdput(whole);
786}
787
788/**
789 * bd_abort_claiming - abort claiming a block device
790 * @whole: whole block device returned by bd_start_claiming()
791 * @holder: holder trying to claim @bdev
792 *
793 * Abort a claiming block started by bd_start_claiming(). Note that
794 * @whole is not the block device to be claimed but the whole device
795 * returned by bd_start_claiming().
796 *
797 * CONTEXT:
798 * Grabs and releases bdev_lock.
799 */
800static void bd_abort_claiming(struct block_device *whole, void *holder)
801{
802 spin_lock(&bdev_lock);
803 __bd_abort_claiming(whole, holder); /* releases bdev_lock */
804}
805
806/* increment holders when we have a legitimate claim. requires bdev_lock */
807static void __bd_claim(struct block_device *bdev, struct block_device *whole,
808 void *holder)
809{
810 /* note that for a whole device bd_holders
811 * will be incremented twice, and bd_holder will
812 * be set to bd_claim before being set to holder
813 */
814 whole->bd_holders++;
815 whole->bd_holder = bd_claim;
816 bdev->bd_holders++;
817 bdev->bd_holder = holder;
818}
819
820/**
821 * bd_finish_claiming - finish claiming a block device
822 * @bdev: block device of interest (passed to bd_start_claiming())
823 * @whole: whole block device returned by bd_start_claiming()
824 * @holder: holder trying to claim @bdev
825 *
826 * Finish a claiming block started by bd_start_claiming().
827 *
828 * CONTEXT:
829 * Grabs and releases bdev_lock.
830 */
831static void bd_finish_claiming(struct block_device *bdev,
832 struct block_device *whole, void *holder)
833{
834 spin_lock(&bdev_lock);
835 BUG_ON(!bd_may_claim(bdev, whole, holder));
836 __bd_claim(bdev, whole, holder);
837 __bd_abort_claiming(whole, holder); /* not actually an abort */
838}
839
840/**
841 * bd_claim - claim a block device
842 * @bdev: block device to claim
843 * @holder: holder trying to claim @bdev
844 *
845 * Try to claim @bdev which must have been opened successfully.
846 *
847 * CONTEXT:
848 * Might sleep.
849 *
850 * RETURNS:
851 * 0 if successful, -EBUSY if @bdev is already claimed.
852 */
853int bd_claim(struct block_device *bdev, void *holder)
854{
855 struct block_device *whole = bdev->bd_contains;
856 int res;
857
858 might_sleep();
859
860 spin_lock(&bdev_lock);
861 res = bd_prepare_to_claim(bdev, whole, holder);
862 if (res == 0)
863 __bd_claim(bdev, whole, holder);
864 spin_unlock(&bdev_lock);
865
866 return res;
867}
868EXPORT_SYMBOL(bd_claim);
869
870void bd_release(struct block_device *bdev)
871{
872 spin_lock(&bdev_lock);
873 if (!--bdev->bd_contains->bd_holders)
874 bdev->bd_contains->bd_holder = NULL;
875 if (!--bdev->bd_holders)
876 bdev->bd_holder = NULL;
877 spin_unlock(&bdev_lock);
878}
879
880EXPORT_SYMBOL(bd_release);
881
882#ifdef CONFIG_SYSFS 774#ifdef CONFIG_SYSFS
883/*
884 * Functions for bd_claim_by_kobject / bd_release_from_kobject
885 *
886 * If a kobject is passed to bd_claim_by_kobject()
887 * and the kobject has a parent directory,
888 * following symlinks are created:
889 * o from the kobject to the claimed bdev
890 * o from "holders" directory of the bdev to the parent of the kobject
891 * bd_release_from_kobject() removes these symlinks.
892 *
893 * Example:
894 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to
895 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
896 * /sys/block/dm-0/slaves/sda --> /sys/block/sda
897 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
898 */
899
900static int add_symlink(struct kobject *from, struct kobject *to) 775static int add_symlink(struct kobject *from, struct kobject *to)
901{ 776{
902 if (!from || !to)
903 return 0;
904 return sysfs_create_link(from, to, kobject_name(to)); 777 return sysfs_create_link(from, to, kobject_name(to));
905} 778}
906 779
907static void del_symlink(struct kobject *from, struct kobject *to) 780static void del_symlink(struct kobject *from, struct kobject *to)
908{ 781{
909 if (!from || !to)
910 return;
911 sysfs_remove_link(from, kobject_name(to)); 782 sysfs_remove_link(from, kobject_name(to));
912} 783}
913 784
914/*
915 * 'struct bd_holder' contains pointers to kobjects symlinked by
916 * bd_claim_by_kobject.
917 * It's connected to bd_holder_list which is protected by bdev->bd_sem.
918 */
919struct bd_holder {
920 struct list_head list; /* chain of holders of the bdev */
921 int count; /* references from the holder */
922 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */
923 struct kobject *hdev; /* e.g. "/block/dm-0" */
924 struct kobject *hdir; /* e.g. "/block/sda/holders" */
925 struct kobject *sdev; /* e.g. "/block/sda" */
926};
927
928/*
929 * Get references of related kobjects at once.
930 * Returns 1 on success. 0 on failure.
931 *
932 * Should call bd_holder_release_dirs() after successful use.
933 */
934static int bd_holder_grab_dirs(struct block_device *bdev,
935 struct bd_holder *bo)
936{
937 if (!bdev || !bo)
938 return 0;
939
940 bo->sdir = kobject_get(bo->sdir);
941 if (!bo->sdir)
942 return 0;
943
944 bo->hdev = kobject_get(bo->sdir->parent);
945 if (!bo->hdev)
946 goto fail_put_sdir;
947
948 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
949 if (!bo->sdev)
950 goto fail_put_hdev;
951
952 bo->hdir = kobject_get(bdev->bd_part->holder_dir);
953 if (!bo->hdir)
954 goto fail_put_sdev;
955
956 return 1;
957
958fail_put_sdev:
959 kobject_put(bo->sdev);
960fail_put_hdev:
961 kobject_put(bo->hdev);
962fail_put_sdir:
963 kobject_put(bo->sdir);
964
965 return 0;
966}
967
968/* Put references of related kobjects at once. */
969static void bd_holder_release_dirs(struct bd_holder *bo)
970{
971 kobject_put(bo->hdir);
972 kobject_put(bo->sdev);
973 kobject_put(bo->hdev);
974 kobject_put(bo->sdir);
975}
976
977static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
978{
979 struct bd_holder *bo;
980
981 bo = kzalloc(sizeof(*bo), GFP_KERNEL);
982 if (!bo)
983 return NULL;
984
985 bo->count = 1;
986 bo->sdir = kobj;
987
988 return bo;
989}
990
991static void free_bd_holder(struct bd_holder *bo)
992{
993 kfree(bo);
994}
995
996/** 785/**
997 * find_bd_holder - find matching struct bd_holder from the block device 786 * bd_link_disk_holder - create symlinks between holding disk and slave bdev
787 * @bdev: the claimed slave bdev
788 * @disk: the holding disk
998 * 789 *
999 * @bdev: struct block device to be searched 790 * This functions creates the following sysfs symlinks.
1000 * @bo: target struct bd_holder
1001 *
1002 * Returns matching entry with @bo in @bdev->bd_holder_list.
1003 * If found, increment the reference count and return the pointer.
1004 * If not found, returns NULL.
1005 */
1006static struct bd_holder *find_bd_holder(struct block_device *bdev,
1007 struct bd_holder *bo)
1008{
1009 struct bd_holder *tmp;
1010
1011 list_for_each_entry(tmp, &bdev->bd_holder_list, list)
1012 if (tmp->sdir == bo->sdir) {
1013 tmp->count++;
1014 return tmp;
1015 }
1016
1017 return NULL;
1018}
1019
1020/**
1021 * add_bd_holder - create sysfs symlinks for bd_claim() relationship
1022 *
1023 * @bdev: block device to be bd_claimed
1024 * @bo: preallocated and initialized by alloc_bd_holder()
1025 *
1026 * Add @bo to @bdev->bd_holder_list, create symlinks.
1027 *
1028 * Returns 0 if symlinks are created.
1029 * Returns -ve if something fails.
1030 */
1031static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
1032{
1033 int err;
1034
1035 if (!bo)
1036 return -EINVAL;
1037
1038 if (!bd_holder_grab_dirs(bdev, bo))
1039 return -EBUSY;
1040
1041 err = add_symlink(bo->sdir, bo->sdev);
1042 if (err)
1043 return err;
1044
1045 err = add_symlink(bo->hdir, bo->hdev);
1046 if (err) {
1047 del_symlink(bo->sdir, bo->sdev);
1048 return err;
1049 }
1050
1051 list_add_tail(&bo->list, &bdev->bd_holder_list);
1052 return 0;
1053}
1054
1055/**
1056 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
1057 * 791 *
1058 * @bdev: block device to be bd_claimed 792 * - from "slaves" directory of the holder @disk to the claimed @bdev
1059 * @kobj: holder's kobject 793 * - from "holders" directory of the @bdev to the holder @disk
1060 * 794 *
1061 * If there is matching entry with @kobj in @bdev->bd_holder_list 795 * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
1062 * and no other bd_claim() from the same kobject, 796 * passed to bd_link_disk_holder(), then:
1063 * remove the struct bd_holder from the list, delete symlinks for it.
1064 * 797 *
1065 * Returns a pointer to the struct bd_holder when it's removed from the list 798 * /sys/block/dm-0/slaves/sda --> /sys/block/sda
1066 * and ready to be freed. 799 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
1067 * Returns NULL if matching claim isn't found or there is other bd_claim()
1068 * by the same kobject.
1069 */
1070static struct bd_holder *del_bd_holder(struct block_device *bdev,
1071 struct kobject *kobj)
1072{
1073 struct bd_holder *bo;
1074
1075 list_for_each_entry(bo, &bdev->bd_holder_list, list) {
1076 if (bo->sdir == kobj) {
1077 bo->count--;
1078 BUG_ON(bo->count < 0);
1079 if (!bo->count) {
1080 list_del(&bo->list);
1081 del_symlink(bo->sdir, bo->sdev);
1082 del_symlink(bo->hdir, bo->hdev);
1083 bd_holder_release_dirs(bo);
1084 return bo;
1085 }
1086 break;
1087 }
1088 }
1089
1090 return NULL;
1091}
1092
1093/**
1094 * bd_claim_by_kobject - bd_claim() with additional kobject signature
1095 * 800 *
1096 * @bdev: block device to be claimed 801 * The caller must have claimed @bdev before calling this function and
1097 * @holder: holder's signature 802 * ensure that both @bdev and @disk are valid during the creation and
1098 * @kobj: holder's kobject 803 * lifetime of these symlinks.
1099 * 804 *
1100 * Do bd_claim() and if it succeeds, create sysfs symlinks between 805 * CONTEXT:
1101 * the bdev and the holder's kobject. 806 * Might sleep.
1102 * Use bd_release_from_kobject() when relesing the claimed bdev.
1103 * 807 *
1104 * Returns 0 on success. (same as bd_claim()) 808 * RETURNS:
1105 * Returns errno on failure. 809 * 0 on success, -errno on failure.
1106 */ 810 */
1107static int bd_claim_by_kobject(struct block_device *bdev, void *holder, 811int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1108 struct kobject *kobj)
1109{ 812{
1110 int err; 813 int ret = 0;
1111 struct bd_holder *bo, *found;
1112
1113 if (!kobj)
1114 return -EINVAL;
1115
1116 bo = alloc_bd_holder(kobj);
1117 if (!bo)
1118 return -ENOMEM;
1119 814
1120 mutex_lock(&bdev->bd_mutex); 815 mutex_lock(&bdev->bd_mutex);
1121 816
1122 err = bd_claim(bdev, holder); 817 WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
1123 if (err)
1124 goto fail;
1125 818
1126 found = find_bd_holder(bdev, bo); 819 /* FIXME: remove the following once add_disk() handles errors */
1127 if (found) 820 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1128 goto fail; 821 goto out_unlock;
1129 822
1130 err = add_bd_holder(bdev, bo); 823 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1131 if (err) 824 if (ret)
1132 bd_release(bdev); 825 goto out_unlock;
1133 else
1134 bo = NULL;
1135fail:
1136 mutex_unlock(&bdev->bd_mutex);
1137 free_bd_holder(bo);
1138 return err;
1139}
1140 826
1141/** 827 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1142 * bd_release_from_kobject - bd_release() with additional kobject signature 828 if (ret) {
1143 * 829 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1144 * @bdev: block device to be released 830 goto out_unlock;
1145 * @kobj: holder's kobject 831 }
1146 *
1147 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
1148 */
1149static void bd_release_from_kobject(struct block_device *bdev,
1150 struct kobject *kobj)
1151{
1152 if (!kobj)
1153 return;
1154 832
1155 mutex_lock(&bdev->bd_mutex); 833 bdev->bd_holder_disk = disk;
1156 bd_release(bdev); 834out_unlock:
1157 free_bd_holder(del_bd_holder(bdev, kobj));
1158 mutex_unlock(&bdev->bd_mutex); 835 mutex_unlock(&bdev->bd_mutex);
836 return ret;
1159} 837}
838EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1160 839
1161/** 840static void bd_unlink_disk_holder(struct block_device *bdev)
1162 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
1163 *
1164 * @bdev: block device to be claimed
1165 * @holder: holder's signature
1166 * @disk: holder's gendisk
1167 *
1168 * Call bd_claim_by_kobject() with getting @disk->slave_dir.
1169 */
1170int bd_claim_by_disk(struct block_device *bdev, void *holder,
1171 struct gendisk *disk)
1172{ 841{
1173 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); 842 struct gendisk *disk = bdev->bd_holder_disk;
1174}
1175EXPORT_SYMBOL_GPL(bd_claim_by_disk);
1176 843
1177/** 844 bdev->bd_holder_disk = NULL;
1178 * bd_release_from_disk - wrapper function for bd_release_from_kobject() 845 if (!disk)
1179 * 846 return;
1180 * @bdev: block device to be claimed
1181 * @disk: holder's gendisk
1182 *
1183 * Call bd_release_from_kobject() and put @disk->slave_dir.
1184 */
1185void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
1186{
1187 bd_release_from_kobject(bdev, disk->slave_dir);
1188 kobject_put(disk->slave_dir);
1189}
1190EXPORT_SYMBOL_GPL(bd_release_from_disk);
1191#endif
1192 847
1193/* 848 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1194 * Tries to open block device by device number. Use it ONLY if you 849 del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1195 * really do not have anything better - i.e. when you are behind a
1196 * truly sucky interface and all you are given is a device number. _Never_
1197 * to be used for internal purposes. If you ever need it - reconsider
1198 * your API.
1199 */
1200struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
1201{
1202 struct block_device *bdev = bdget(dev);
1203 int err = -ENOMEM;
1204 if (bdev)
1205 err = blkdev_get(bdev, mode);
1206 return err ? ERR_PTR(err) : bdev;
1207} 850}
1208 851#else
1209EXPORT_SYMBOL(open_by_devnum); 852static inline void bd_unlink_disk_holder(struct block_device *bdev)
853{ }
854#endif
1210 855
1211/** 856/**
1212 * flush_disk - invalidates all buffer-cache entries on a disk 857 * flush_disk - invalidates all buffer-cache entries on a disk
@@ -1302,10 +947,11 @@ int check_disk_change(struct block_device *bdev)
1302{ 947{
1303 struct gendisk *disk = bdev->bd_disk; 948 struct gendisk *disk = bdev->bd_disk;
1304 const struct block_device_operations *bdops = disk->fops; 949 const struct block_device_operations *bdops = disk->fops;
950 unsigned int events;
1305 951
1306 if (!bdops->media_changed) 952 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1307 return 0; 953 DISK_EVENT_EJECT_REQUEST);
1308 if (!bdops->media_changed(bdev->bd_disk)) 954 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1309 return 0; 955 return 0;
1310 956
1311 flush_disk(bdev); 957 flush_disk(bdev);
@@ -1468,17 +1114,171 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1468 return ret; 1114 return ret;
1469} 1115}
1470 1116
1471int blkdev_get(struct block_device *bdev, fmode_t mode) 1117/**
1118 * blkdev_get - open a block device
1119 * @bdev: block_device to open
1120 * @mode: FMODE_* mask
1121 * @holder: exclusive holder identifier
1122 *
1123 * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
1124 * open with exclusive access. Specifying %FMODE_EXCL with %NULL
1125 * @holder is invalid. Exclusive opens may nest for the same @holder.
1126 *
1127 * On success, the reference count of @bdev is unchanged. On failure,
1128 * @bdev is put.
1129 *
1130 * CONTEXT:
1131 * Might sleep.
1132 *
1133 * RETURNS:
1134 * 0 on success, -errno on failure.
1135 */
1136int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1472{ 1137{
1473 return __blkdev_get(bdev, mode, 0); 1138 struct block_device *whole = NULL;
1139 int res;
1140
1141 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1142
1143 if ((mode & FMODE_EXCL) && holder) {
1144 whole = bd_start_claiming(bdev, holder);
1145 if (IS_ERR(whole)) {
1146 bdput(bdev);
1147 return PTR_ERR(whole);
1148 }
1149 }
1150
1151 res = __blkdev_get(bdev, mode, 0);
1152
1153 /* __blkdev_get() may alter read only status, check it afterwards */
1154 if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1155 __blkdev_put(bdev, mode, 0);
1156 res = -EACCES;
1157 }
1158
1159 if (whole) {
1160 /* finish claiming */
1161 mutex_lock(&bdev->bd_mutex);
1162 spin_lock(&bdev_lock);
1163
1164 if (!res) {
1165 BUG_ON(!bd_may_claim(bdev, whole, holder));
1166 /*
1167 * Note that for a whole device bd_holders
1168 * will be incremented twice, and bd_holder
1169 * will be set to bd_may_claim before being
1170 * set to holder
1171 */
1172 whole->bd_holders++;
1173 whole->bd_holder = bd_may_claim;
1174 bdev->bd_holders++;
1175 bdev->bd_holder = holder;
1176 }
1177
1178 /* tell others that we're done */
1179 BUG_ON(whole->bd_claiming != holder);
1180 whole->bd_claiming = NULL;
1181 wake_up_bit(&whole->bd_claiming, 0);
1182
1183 spin_unlock(&bdev_lock);
1184
1185 /*
1186 * Block event polling for write claims. Any write
1187 * holder makes the write_holder state stick until all
1188 * are released. This is good enough and tracking
1189 * individual writeable reference is too fragile given
1190 * the way @mode is used in blkdev_get/put().
1191 */
1192 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
1193 bdev->bd_write_holder = true;
1194 disk_block_events(bdev->bd_disk);
1195 }
1196
1197 mutex_unlock(&bdev->bd_mutex);
1198 bdput(whole);
1199 }
1200
1201 return res;
1474} 1202}
1475EXPORT_SYMBOL(blkdev_get); 1203EXPORT_SYMBOL(blkdev_get);
1476 1204
1205/**
1206 * blkdev_get_by_path - open a block device by name
1207 * @path: path to the block device to open
1208 * @mode: FMODE_* mask
1209 * @holder: exclusive holder identifier
1210 *
1211 * Open the blockdevice described by the device file at @path. @mode
1212 * and @holder are identical to blkdev_get().
1213 *
1214 * On success, the returned block_device has reference count of one.
1215 *
1216 * CONTEXT:
1217 * Might sleep.
1218 *
1219 * RETURNS:
1220 * Pointer to block_device on success, ERR_PTR(-errno) on failure.
1221 */
1222struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1223 void *holder)
1224{
1225 struct block_device *bdev;
1226 int err;
1227
1228 bdev = lookup_bdev(path);
1229 if (IS_ERR(bdev))
1230 return bdev;
1231
1232 err = blkdev_get(bdev, mode, holder);
1233 if (err)
1234 return ERR_PTR(err);
1235
1236 return bdev;
1237}
1238EXPORT_SYMBOL(blkdev_get_by_path);
1239
1240/**
1241 * blkdev_get_by_dev - open a block device by device number
1242 * @dev: device number of block device to open
1243 * @mode: FMODE_* mask
1244 * @holder: exclusive holder identifier
1245 *
1246 * Open the blockdevice described by device number @dev. @mode and
1247 * @holder are identical to blkdev_get().
1248 *
1249 * Use it ONLY if you really do not have anything better - i.e. when
1250 * you are behind a truly sucky interface and all you are given is a
1251 * device number. _Never_ to be used for internal purposes. If you
1252 * ever need it - reconsider your API.
1253 *
1254 * On success, the returned block_device has reference count of one.
1255 *
1256 * CONTEXT:
1257 * Might sleep.
1258 *
1259 * RETURNS:
1260 * Pointer to block_device on success, ERR_PTR(-errno) on failure.
1261 */
1262struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1263{
1264 struct block_device *bdev;
1265 int err;
1266
1267 bdev = bdget(dev);
1268 if (!bdev)
1269 return ERR_PTR(-ENOMEM);
1270
1271 err = blkdev_get(bdev, mode, holder);
1272 if (err)
1273 return ERR_PTR(err);
1274
1275 return bdev;
1276}
1277EXPORT_SYMBOL(blkdev_get_by_dev);
1278
1477static int blkdev_open(struct inode * inode, struct file * filp) 1279static int blkdev_open(struct inode * inode, struct file * filp)
1478{ 1280{
1479 struct block_device *whole = NULL;
1480 struct block_device *bdev; 1281 struct block_device *bdev;
1481 int res;
1482 1282
1483 /* 1283 /*
1484 * Preserve backwards compatibility and allow large file access 1284 * Preserve backwards compatibility and allow large file access
@@ -1499,26 +1299,9 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1499 if (bdev == NULL) 1299 if (bdev == NULL)
1500 return -ENOMEM; 1300 return -ENOMEM;
1501 1301
1502 if (filp->f_mode & FMODE_EXCL) {
1503 whole = bd_start_claiming(bdev, filp);
1504 if (IS_ERR(whole)) {
1505 bdput(bdev);
1506 return PTR_ERR(whole);
1507 }
1508 }
1509
1510 filp->f_mapping = bdev->bd_inode->i_mapping; 1302 filp->f_mapping = bdev->bd_inode->i_mapping;
1511 1303
1512 res = blkdev_get(bdev, filp->f_mode); 1304 return blkdev_get(bdev, filp->f_mode, filp);
1513
1514 if (whole) {
1515 if (res == 0)
1516 bd_finish_claiming(bdev, whole, filp);
1517 else
1518 bd_abort_claiming(whole, filp);
1519 }
1520
1521 return res;
1522} 1305}
1523 1306
1524static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) 1307static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@ -1532,6 +1315,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1532 bdev->bd_part_count--; 1315 bdev->bd_part_count--;
1533 1316
1534 if (!--bdev->bd_openers) { 1317 if (!--bdev->bd_openers) {
1318 WARN_ON_ONCE(bdev->bd_holders);
1535 sync_blockdev(bdev); 1319 sync_blockdev(bdev);
1536 kill_bdev(bdev); 1320 kill_bdev(bdev);
1537 } 1321 }
@@ -1562,6 +1346,45 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1562 1346
1563int blkdev_put(struct block_device *bdev, fmode_t mode) 1347int blkdev_put(struct block_device *bdev, fmode_t mode)
1564{ 1348{
1349 if (mode & FMODE_EXCL) {
1350 bool bdev_free;
1351
1352 /*
1353 * Release a claim on the device. The holder fields
1354 * are protected with bdev_lock. bd_mutex is to
1355 * synchronize disk_holder unlinking.
1356 */
1357 mutex_lock(&bdev->bd_mutex);
1358 spin_lock(&bdev_lock);
1359
1360 WARN_ON_ONCE(--bdev->bd_holders < 0);
1361 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1362
1363 /* bd_contains might point to self, check in a separate step */
1364 if ((bdev_free = !bdev->bd_holders))
1365 bdev->bd_holder = NULL;
1366 if (!bdev->bd_contains->bd_holders)
1367 bdev->bd_contains->bd_holder = NULL;
1368
1369 spin_unlock(&bdev_lock);
1370
1371 /*
1372 * If this was the last claim, remove holder link and
1373 * unblock evpoll if it was a write holder.
1374 */
1375 if (bdev_free) {
1376 bd_unlink_disk_holder(bdev);
1377 if (bdev->bd_write_holder) {
1378 disk_unblock_events(bdev->bd_disk);
1379 bdev->bd_write_holder = false;
1380 } else
1381 disk_check_events(bdev->bd_disk);
1382 }
1383
1384 mutex_unlock(&bdev->bd_mutex);
1385 } else
1386 disk_check_events(bdev->bd_disk);
1387
1565 return __blkdev_put(bdev, mode, 0); 1388 return __blkdev_put(bdev, mode, 0);
1566} 1389}
1567EXPORT_SYMBOL(blkdev_put); 1390EXPORT_SYMBOL(blkdev_put);
@@ -1569,8 +1392,7 @@ EXPORT_SYMBOL(blkdev_put);
1569static int blkdev_close(struct inode * inode, struct file * filp) 1392static int blkdev_close(struct inode * inode, struct file * filp)
1570{ 1393{
1571 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1394 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1572 if (bdev->bd_holder == filp) 1395
1573 bd_release(bdev);
1574 return blkdev_put(bdev, filp->f_mode); 1396 return blkdev_put(bdev, filp->f_mode);
1575} 1397}
1576 1398
@@ -1715,67 +1537,6 @@ fail:
1715} 1537}
1716EXPORT_SYMBOL(lookup_bdev); 1538EXPORT_SYMBOL(lookup_bdev);
1717 1539
1718/**
1719 * open_bdev_exclusive - open a block device by name and set it up for use
1720 *
1721 * @path: special file representing the block device
1722 * @mode: FMODE_... combination to pass be used
1723 * @holder: owner for exclusion
1724 *
1725 * Open the blockdevice described by the special file at @path, claim it
1726 * for the @holder.
1727 */
1728struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1729{
1730 struct block_device *bdev, *whole;
1731 int error;
1732
1733 bdev = lookup_bdev(path);
1734 if (IS_ERR(bdev))
1735 return bdev;
1736
1737 whole = bd_start_claiming(bdev, holder);
1738 if (IS_ERR(whole)) {
1739 bdput(bdev);
1740 return whole;
1741 }
1742
1743 error = blkdev_get(bdev, mode);
1744 if (error)
1745 goto out_abort_claiming;
1746
1747 error = -EACCES;
1748 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1749 goto out_blkdev_put;
1750
1751 bd_finish_claiming(bdev, whole, holder);
1752 return bdev;
1753
1754out_blkdev_put:
1755 blkdev_put(bdev, mode);
1756out_abort_claiming:
1757 bd_abort_claiming(whole, holder);
1758 return ERR_PTR(error);
1759}
1760
1761EXPORT_SYMBOL(open_bdev_exclusive);
1762
1763/**
1764 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1765 *
1766 * @bdev: blockdevice to close
1767 * @mode: mode, must match that used to open.
1768 *
1769 * This is the counterpart to open_bdev_exclusive().
1770 */
1771void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1772{
1773 bd_release(bdev);
1774 blkdev_put(bdev, mode);
1775}
1776
1777EXPORT_SYMBOL(close_bdev_exclusive);
1778
1779int __invalidate_device(struct block_device *bdev) 1540int __invalidate_device(struct block_device *bdev)
1780{ 1541{
1781 struct super_block *sb = get_super(bdev); 1542 struct super_block *sb = get_super(bdev);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6b9884507837..1718e1a5c320 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -493,7 +493,7 @@ again:
493 continue; 493 continue;
494 494
495 if (device->bdev) { 495 if (device->bdev) {
496 close_bdev_exclusive(device->bdev, device->mode); 496 blkdev_put(device->bdev, device->mode);
497 device->bdev = NULL; 497 device->bdev = NULL;
498 fs_devices->open_devices--; 498 fs_devices->open_devices--;
499 } 499 }
@@ -527,7 +527,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
527 527
528 list_for_each_entry(device, &fs_devices->devices, dev_list) { 528 list_for_each_entry(device, &fs_devices->devices, dev_list) {
529 if (device->bdev) { 529 if (device->bdev) {
530 close_bdev_exclusive(device->bdev, device->mode); 530 blkdev_put(device->bdev, device->mode);
531 fs_devices->open_devices--; 531 fs_devices->open_devices--;
532 } 532 }
533 if (device->writeable) { 533 if (device->writeable) {
@@ -584,13 +584,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
584 int seeding = 1; 584 int seeding = 1;
585 int ret = 0; 585 int ret = 0;
586 586
587 flags |= FMODE_EXCL;
588
587 list_for_each_entry(device, head, dev_list) { 589 list_for_each_entry(device, head, dev_list) {
588 if (device->bdev) 590 if (device->bdev)
589 continue; 591 continue;
590 if (!device->name) 592 if (!device->name)
591 continue; 593 continue;
592 594
593 bdev = open_bdev_exclusive(device->name, flags, holder); 595 bdev = blkdev_get_by_path(device->name, flags, holder);
594 if (IS_ERR(bdev)) { 596 if (IS_ERR(bdev)) {
595 printk(KERN_INFO "open %s failed\n", device->name); 597 printk(KERN_INFO "open %s failed\n", device->name);
596 goto error; 598 goto error;
@@ -642,7 +644,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
642error_brelse: 644error_brelse:
643 brelse(bh); 645 brelse(bh);
644error_close: 646error_close:
645 close_bdev_exclusive(bdev, FMODE_READ); 647 blkdev_put(bdev, flags);
646error: 648error:
647 continue; 649 continue;
648 } 650 }
@@ -688,7 +690,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
688 690
689 mutex_lock(&uuid_mutex); 691 mutex_lock(&uuid_mutex);
690 692
691 bdev = open_bdev_exclusive(path, flags, holder); 693 flags |= FMODE_EXCL;
694 bdev = blkdev_get_by_path(path, flags, holder);
692 695
693 if (IS_ERR(bdev)) { 696 if (IS_ERR(bdev)) {
694 ret = PTR_ERR(bdev); 697 ret = PTR_ERR(bdev);
@@ -720,7 +723,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
720 723
721 brelse(bh); 724 brelse(bh);
722error_close: 725error_close:
723 close_bdev_exclusive(bdev, flags); 726 blkdev_put(bdev, flags);
724error: 727error:
725 mutex_unlock(&uuid_mutex); 728 mutex_unlock(&uuid_mutex);
726 return ret; 729 return ret;
@@ -1183,8 +1186,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1183 goto out; 1186 goto out;
1184 } 1187 }
1185 } else { 1188 } else {
1186 bdev = open_bdev_exclusive(device_path, FMODE_READ, 1189 bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
1187 root->fs_info->bdev_holder); 1190 root->fs_info->bdev_holder);
1188 if (IS_ERR(bdev)) { 1191 if (IS_ERR(bdev)) {
1189 ret = PTR_ERR(bdev); 1192 ret = PTR_ERR(bdev);
1190 goto out; 1193 goto out;
@@ -1251,7 +1254,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1251 root->fs_info->fs_devices->latest_bdev = next_device->bdev; 1254 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1252 1255
1253 if (device->bdev) { 1256 if (device->bdev) {
1254 close_bdev_exclusive(device->bdev, device->mode); 1257 blkdev_put(device->bdev, device->mode);
1255 device->bdev = NULL; 1258 device->bdev = NULL;
1256 device->fs_devices->open_devices--; 1259 device->fs_devices->open_devices--;
1257 } 1260 }
@@ -1294,7 +1297,7 @@ error_brelse:
1294 brelse(bh); 1297 brelse(bh);
1295error_close: 1298error_close:
1296 if (bdev) 1299 if (bdev)
1297 close_bdev_exclusive(bdev, FMODE_READ); 1300 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
1298out: 1301out:
1299 mutex_unlock(&root->fs_info->volume_mutex); 1302 mutex_unlock(&root->fs_info->volume_mutex);
1300 mutex_unlock(&uuid_mutex); 1303 mutex_unlock(&uuid_mutex);
@@ -1446,7 +1449,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1446 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) 1449 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1447 return -EINVAL; 1450 return -EINVAL;
1448 1451
1449 bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); 1452 bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
1453 root->fs_info->bdev_holder);
1450 if (IS_ERR(bdev)) 1454 if (IS_ERR(bdev))
1451 return PTR_ERR(bdev); 1455 return PTR_ERR(bdev);
1452 1456
@@ -1572,7 +1576,7 @@ out:
1572 mutex_unlock(&root->fs_info->volume_mutex); 1576 mutex_unlock(&root->fs_info->volume_mutex);
1573 return ret; 1577 return ret;
1574error: 1578error:
1575 close_bdev_exclusive(bdev, 0); 1579 blkdev_put(bdev, FMODE_EXCL);
1576 if (seeding_dev) { 1580 if (seeding_dev) {
1577 mutex_unlock(&uuid_mutex); 1581 mutex_unlock(&uuid_mutex);
1578 up_write(&sb->s_umount); 1582 up_write(&sb->s_umount);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2740db49eb04..1be781079450 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -50,7 +50,7 @@ struct btrfs_device {
50 50
51 struct block_device *bdev; 51 struct block_device *bdev;
52 52
53 /* the mode sent to open_bdev_exclusive */ 53 /* the mode sent to blkdev_get */
54 fmode_t mode; 54 fmode_t mode;
55 55
56 char *name; 56 char *name;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index e5b9df993b93..143f0207c7eb 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -59,7 +59,7 @@ static struct char_device_struct {
59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; 59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
60 60
61/* index in the above */ 61/* index in the above */
62static inline int major_to_index(int major) 62static inline int major_to_index(unsigned major)
63{ 63{
64 return major % CHRDEV_MAJOR_HASH_SIZE; 64 return major % CHRDEV_MAJOR_HASH_SIZE;
65} 65}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index acf8695fa8f0..0fad7c0449fb 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -346,7 +346,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
346 struct block_device *bdev; 346 struct block_device *bdev;
347 char b[BDEVNAME_SIZE]; 347 char b[BDEVNAME_SIZE];
348 348
349 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 349 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
350 if (IS_ERR(bdev)) 350 if (IS_ERR(bdev))
351 goto fail; 351 goto fail;
352 return bdev; 352 return bdev;
@@ -363,8 +363,7 @@ fail:
363 */ 363 */
364static int ext3_blkdev_put(struct block_device *bdev) 364static int ext3_blkdev_put(struct block_device *bdev)
365{ 365{
366 bd_release(bdev); 366 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
367 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
368} 367}
369 368
370static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 369static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
@@ -2135,13 +2134,6 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2135 if (bdev == NULL) 2134 if (bdev == NULL)
2136 return NULL; 2135 return NULL;
2137 2136
2138 if (bd_claim(bdev, sb)) {
2139 ext3_msg(sb, KERN_ERR,
2140 "error: failed to claim external journal device");
2141 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2142 return NULL;
2143 }
2144
2145 blocksize = sb->s_blocksize; 2137 blocksize = sb->s_blocksize;
2146 hblock = bdev_logical_block_size(bdev); 2138 hblock = bdev_logical_block_size(bdev);
2147 if (blocksize < hblock) { 2139 if (blocksize < hblock) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb15c9c0be74..c93bd1e651f5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -647,7 +647,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
647 struct block_device *bdev; 647 struct block_device *bdev;
648 char b[BDEVNAME_SIZE]; 648 char b[BDEVNAME_SIZE];
649 649
650 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 650 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
651 if (IS_ERR(bdev)) 651 if (IS_ERR(bdev))
652 goto fail; 652 goto fail;
653 return bdev; 653 return bdev;
@@ -663,8 +663,7 @@ fail:
663 */ 663 */
664static int ext4_blkdev_put(struct block_device *bdev) 664static int ext4_blkdev_put(struct block_device *bdev)
665{ 665{
666 bd_release(bdev); 666 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
667 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
668} 667}
669 668
670static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 669static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@ -3765,13 +3764,6 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
3765 if (bdev == NULL) 3764 if (bdev == NULL)
3766 return NULL; 3765 return NULL;
3767 3766
3768 if (bd_claim(bdev, sb)) {
3769 ext4_msg(sb, KERN_ERR,
3770 "failed to claim external journal device");
3771 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
3772 return NULL;
3773 }
3774
3775 blocksize = sb->s_blocksize; 3767 blocksize = sb->s_blocksize;
3776 hblock = bdev_logical_block_size(bdev); 3768 hblock = bdev_logical_block_size(bdev);
3777 if (blocksize < hblock) { 3769 if (blocksize < hblock) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3eb1393f7b81..bc56ccf98ffd 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1268,7 +1268,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1268{ 1268{
1269 struct block_device *bdev; 1269 struct block_device *bdev;
1270 struct super_block *s; 1270 struct super_block *s;
1271 fmode_t mode = FMODE_READ; 1271 fmode_t mode = FMODE_READ | FMODE_EXCL;
1272 int error; 1272 int error;
1273 struct gfs2_args args; 1273 struct gfs2_args args;
1274 struct gfs2_sbd *sdp; 1274 struct gfs2_sbd *sdp;
@@ -1276,7 +1276,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1276 if (!(flags & MS_RDONLY)) 1276 if (!(flags & MS_RDONLY))
1277 mode |= FMODE_WRITE; 1277 mode |= FMODE_WRITE;
1278 1278
1279 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1279 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1280 if (IS_ERR(bdev)) 1280 if (IS_ERR(bdev))
1281 return ERR_CAST(bdev); 1281 return ERR_CAST(bdev);
1282 1282
@@ -1298,7 +1298,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1298 goto error_bdev; 1298 goto error_bdev;
1299 1299
1300 if (s->s_root) 1300 if (s->s_root)
1301 close_bdev_exclusive(bdev, mode); 1301 blkdev_put(bdev, mode);
1302 1302
1303 memset(&args, 0, sizeof(args)); 1303 memset(&args, 0, sizeof(args));
1304 args.ar_quota = GFS2_QUOTA_DEFAULT; 1304 args.ar_quota = GFS2_QUOTA_DEFAULT;
@@ -1342,7 +1342,7 @@ error_super:
1342 deactivate_locked_super(s); 1342 deactivate_locked_super(s);
1343 return ERR_PTR(error); 1343 return ERR_PTR(error);
1344error_bdev: 1344error_bdev:
1345 close_bdev_exclusive(bdev, mode); 1345 blkdev_put(bdev, mode);
1346 return ERR_PTR(error); 1346 return ERR_PTR(error);
1347} 1347}
1348 1348
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index e1b8493b9aaa..278e3fb40b71 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1120,16 +1120,13 @@ int lmLogOpen(struct super_block *sb)
1120 * file systems to log may have n-to-1 relationship; 1120 * file systems to log may have n-to-1 relationship;
1121 */ 1121 */
1122 1122
1123 bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); 1123 bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1124 log);
1124 if (IS_ERR(bdev)) { 1125 if (IS_ERR(bdev)) {
1125 rc = -PTR_ERR(bdev); 1126 rc = -PTR_ERR(bdev);
1126 goto free; 1127 goto free;
1127 } 1128 }
1128 1129
1129 if ((rc = bd_claim(bdev, log))) {
1130 goto close;
1131 }
1132
1133 log->bdev = bdev; 1130 log->bdev = bdev;
1134 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); 1131 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1135 1132
@@ -1137,7 +1134,7 @@ int lmLogOpen(struct super_block *sb)
1137 * initialize log: 1134 * initialize log:
1138 */ 1135 */
1139 if ((rc = lmLogInit(log))) 1136 if ((rc = lmLogInit(log)))
1140 goto unclaim; 1137 goto close;
1141 1138
1142 list_add(&log->journal_list, &jfs_external_logs); 1139 list_add(&log->journal_list, &jfs_external_logs);
1143 1140
@@ -1163,11 +1160,8 @@ journal_found:
1163 list_del(&log->journal_list); 1160 list_del(&log->journal_list);
1164 lbmLogShutdown(log); 1161 lbmLogShutdown(log);
1165 1162
1166 unclaim:
1167 bd_release(bdev);
1168
1169 close: /* close external log device */ 1163 close: /* close external log device */
1170 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 1164 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1171 1165
1172 free: /* free log descriptor */ 1166 free: /* free log descriptor */
1173 mutex_unlock(&jfs_log_mutex); 1167 mutex_unlock(&jfs_log_mutex);
@@ -1512,8 +1506,7 @@ int lmLogClose(struct super_block *sb)
1512 bdev = log->bdev; 1506 bdev = log->bdev;
1513 rc = lmLogShutdown(log); 1507 rc = lmLogShutdown(log);
1514 1508
1515 bd_release(bdev); 1509 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1516 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1517 1510
1518 kfree(log); 1511 kfree(log);
1519 1512
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 92ca6fbe09bd..723bc5bca09a 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -300,7 +300,7 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
300 300
301static void bdev_put_device(struct logfs_super *s) 301static void bdev_put_device(struct logfs_super *s)
302{ 302{
303 close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE); 303 blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
304} 304}
305 305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -325,13 +325,14 @@ int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
325{ 325{
326 struct block_device *bdev; 326 struct block_device *bdev;
327 327
328 bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type); 328 bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
329 type);
329 if (IS_ERR(bdev)) 330 if (IS_ERR(bdev))
330 return PTR_ERR(bdev); 331 return PTR_ERR(bdev);
331 332
332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 333 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
333 int mtdnr = MINOR(bdev->bd_dev); 334 int mtdnr = MINOR(bdev->bd_dev);
334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); 335 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
335 return logfs_get_sb_mtd(p, mtdnr); 336 return logfs_get_sb_mtd(p, mtdnr);
336 } 337 }
337 338
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938fcff04..106ed482f119 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -845,11 +845,6 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
845 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 845 struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
846 struct page *page = buf->page; 846 struct page *page = buf->page;
847 size_t size; 847 size_t size;
848 int ret;
849
850 ret = buf->ops->confirm(pipe, buf);
851 if (unlikely(ret))
852 return ret;
853 848
854 size = sd->len; 849 size = sd->len;
855 850
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f804d41ec9d3..0030640e2d72 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1147,14 +1147,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1147{ 1147{
1148 struct nilfs_super_data sd; 1148 struct nilfs_super_data sd;
1149 struct super_block *s; 1149 struct super_block *s;
1150 fmode_t mode = FMODE_READ; 1150 fmode_t mode = FMODE_READ | FMODE_EXCL;
1151 struct dentry *root_dentry; 1151 struct dentry *root_dentry;
1152 int err, s_new = false; 1152 int err, s_new = false;
1153 1153
1154 if (!(flags & MS_RDONLY)) 1154 if (!(flags & MS_RDONLY))
1155 mode |= FMODE_WRITE; 1155 mode |= FMODE_WRITE;
1156 1156
1157 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1157 sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1158 if (IS_ERR(sd.bdev)) 1158 if (IS_ERR(sd.bdev))
1159 return ERR_CAST(sd.bdev); 1159 return ERR_CAST(sd.bdev);
1160 1160
@@ -1233,7 +1233,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1233 } 1233 }
1234 1234
1235 if (!s_new) 1235 if (!s_new)
1236 close_bdev_exclusive(sd.bdev, mode); 1236 blkdev_put(sd.bdev, mode);
1237 1237
1238 return root_dentry; 1238 return root_dentry;
1239 1239
@@ -1242,7 +1242,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1242 1242
1243 failed: 1243 failed:
1244 if (!s_new) 1244 if (!s_new)
1245 close_bdev_exclusive(sd.bdev, mode); 1245 blkdev_put(sd.bdev, mode);
1246 return ERR_PTR(err); 1246 return ERR_PTR(err);
1247} 1247}
1248 1248
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9f26ac9be2a4..ec0fdc3ebe97 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1674,7 +1674,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1674 goto out; 1674 goto out;
1675 1675
1676 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1676 reg->hr_bdev = I_BDEV(filp->f_mapping->host);
1677 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); 1677 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
1678 if (ret) { 1678 if (ret) {
1679 reg->hr_bdev = NULL; 1679 reg->hr_bdev = NULL;
1680 goto out; 1680 goto out;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0a8b0ad0c7e2..9a48d65d9855 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -237,6 +237,13 @@ ssize_t part_size_show(struct device *dev,
237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); 237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
238} 238}
239 239
240ssize_t part_ro_show(struct device *dev,
241 struct device_attribute *attr, char *buf)
242{
243 struct hd_struct *p = dev_to_part(dev);
244 return sprintf(buf, "%d\n", p->policy ? 1 : 0);
245}
246
240ssize_t part_alignment_offset_show(struct device *dev, 247ssize_t part_alignment_offset_show(struct device *dev,
241 struct device_attribute *attr, char *buf) 248 struct device_attribute *attr, char *buf)
242{ 249{
@@ -312,6 +319,7 @@ ssize_t part_fail_store(struct device *dev,
312static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); 319static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
313static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 320static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
314static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 321static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
322static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
315static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 323static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
316static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, 324static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
317 NULL); 325 NULL);
@@ -326,6 +334,7 @@ static struct attribute *part_attrs[] = {
326 &dev_attr_partition.attr, 334 &dev_attr_partition.attr,
327 &dev_attr_start.attr, 335 &dev_attr_start.attr,
328 &dev_attr_size.attr, 336 &dev_attr_size.attr,
337 &dev_attr_ro.attr,
329 &dev_attr_alignment_offset.attr, 338 &dev_attr_alignment_offset.attr,
330 &dev_attr_discard_alignment.attr, 339 &dev_attr_discard_alignment.attr,
331 &dev_attr_stat.attr, 340 &dev_attr_stat.attr,
@@ -507,65 +516,6 @@ out_put:
507 return ERR_PTR(err); 516 return ERR_PTR(err);
508} 517}
509 518
510/* Not exported, helper to add_disk(). */
511void register_disk(struct gendisk *disk)
512{
513 struct device *ddev = disk_to_dev(disk);
514 struct block_device *bdev;
515 struct disk_part_iter piter;
516 struct hd_struct *part;
517 int err;
518
519 ddev->parent = disk->driverfs_dev;
520
521 dev_set_name(ddev, disk->disk_name);
522
523 /* delay uevents, until we scanned partition table */
524 dev_set_uevent_suppress(ddev, 1);
525
526 if (device_add(ddev))
527 return;
528 if (!sysfs_deprecated) {
529 err = sysfs_create_link(block_depr, &ddev->kobj,
530 kobject_name(&ddev->kobj));
531 if (err) {
532 device_del(ddev);
533 return;
534 }
535 }
536 disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
537 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
538
539 /* No minors to use for partitions */
540 if (!disk_partitionable(disk))
541 goto exit;
542
543 /* No such device (e.g., media were just removed) */
544 if (!get_capacity(disk))
545 goto exit;
546
547 bdev = bdget_disk(disk, 0);
548 if (!bdev)
549 goto exit;
550
551 bdev->bd_invalidated = 1;
552 err = blkdev_get(bdev, FMODE_READ);
553 if (err < 0)
554 goto exit;
555 blkdev_put(bdev, FMODE_READ);
556
557exit:
558 /* announce disk after possible partitions are created */
559 dev_set_uevent_suppress(ddev, 0);
560 kobject_uevent(&ddev->kobj, KOBJ_ADD);
561
562 /* announce possible partitions */
563 disk_part_iter_init(&piter, disk, 0);
564 while ((part = disk_part_iter_next(&piter)))
565 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
566 disk_part_iter_exit(&piter);
567}
568
569static bool disk_unlock_native_capacity(struct gendisk *disk) 519static bool disk_unlock_native_capacity(struct gendisk *disk)
570{ 520{
571 const struct block_device_operations *bdops = disk->fops; 521 const struct block_device_operations *bdops = disk->fops;
@@ -728,33 +678,3 @@ fail:
728} 678}
729 679
730EXPORT_SYMBOL(read_dev_sector); 680EXPORT_SYMBOL(read_dev_sector);
731
732void del_gendisk(struct gendisk *disk)
733{
734 struct disk_part_iter piter;
735 struct hd_struct *part;
736
737 /* invalidate stuff */
738 disk_part_iter_init(&piter, disk,
739 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
740 while ((part = disk_part_iter_next(&piter))) {
741 invalidate_partition(disk, part->partno);
742 delete_partition(disk, part->partno);
743 }
744 disk_part_iter_exit(&piter);
745
746 invalidate_partition(disk, 0);
747 blk_free_devt(disk_to_dev(disk)->devt);
748 set_capacity(disk, 0);
749 disk->flags &= ~GENHD_FL_UP;
750 unlink_gendisk(disk);
751 part_stat_set_all(&disk->part0, 0);
752 disk->part0.stamp = 0;
753
754 kobject_put(disk->part0.holder_dir);
755 kobject_put(disk->slave_dir);
756 disk->driverfs_dev = NULL;
757 if (!sysfs_deprecated)
758 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
759 device_del(disk_to_dev(disk));
760}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index d31bce1a9f90..3eea859e6990 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2551,8 +2551,6 @@ static int release_journal_dev(struct super_block *super,
2551 result = 0; 2551 result = 0;
2552 2552
2553 if (journal->j_dev_bd != NULL) { 2553 if (journal->j_dev_bd != NULL) {
2554 if (journal->j_dev_bd->bd_dev != super->s_dev)
2555 bd_release(journal->j_dev_bd);
2556 result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); 2554 result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
2557 journal->j_dev_bd = NULL; 2555 journal->j_dev_bd = NULL;
2558 } 2556 }
@@ -2570,7 +2568,7 @@ static int journal_init_dev(struct super_block *super,
2570{ 2568{
2571 int result; 2569 int result;
2572 dev_t jdev; 2570 dev_t jdev;
2573 fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; 2571 fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
2574 char b[BDEVNAME_SIZE]; 2572 char b[BDEVNAME_SIZE];
2575 2573
2576 result = 0; 2574 result = 0;
@@ -2584,7 +2582,10 @@ static int journal_init_dev(struct super_block *super,
2584 2582
2585 /* there is no "jdev" option and journal is on separate device */ 2583 /* there is no "jdev" option and journal is on separate device */
2586 if ((!jdev_name || !jdev_name[0])) { 2584 if ((!jdev_name || !jdev_name[0])) {
2587 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2585 if (jdev == super->s_dev)
2586 blkdev_mode &= ~FMODE_EXCL;
2587 journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
2588 journal);
2588 journal->j_dev_mode = blkdev_mode; 2589 journal->j_dev_mode = blkdev_mode;
2589 if (IS_ERR(journal->j_dev_bd)) { 2590 if (IS_ERR(journal->j_dev_bd)) {
2590 result = PTR_ERR(journal->j_dev_bd); 2591 result = PTR_ERR(journal->j_dev_bd);
@@ -2593,22 +2594,14 @@ static int journal_init_dev(struct super_block *super,
2593 "cannot init journal device '%s': %i", 2594 "cannot init journal device '%s': %i",
2594 __bdevname(jdev, b), result); 2595 __bdevname(jdev, b), result);
2595 return result; 2596 return result;
2596 } else if (jdev != super->s_dev) { 2597 } else if (jdev != super->s_dev)
2597 result = bd_claim(journal->j_dev_bd, journal);
2598 if (result) {
2599 blkdev_put(journal->j_dev_bd, blkdev_mode);
2600 return result;
2601 }
2602
2603 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2598 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2604 }
2605 2599
2606 return 0; 2600 return 0;
2607 } 2601 }
2608 2602
2609 journal->j_dev_mode = blkdev_mode; 2603 journal->j_dev_mode = blkdev_mode;
2610 journal->j_dev_bd = open_bdev_exclusive(jdev_name, 2604 journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
2611 blkdev_mode, journal);
2612 if (IS_ERR(journal->j_dev_bd)) { 2605 if (IS_ERR(journal->j_dev_bd)) {
2613 result = PTR_ERR(journal->j_dev_bd); 2606 result = PTR_ERR(journal->j_dev_bd);
2614 journal->j_dev_bd = NULL; 2607 journal->j_dev_bd = NULL;
diff --git a/fs/splice.c b/fs/splice.c
index ce2f02579e35..50a5d978da16 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -682,19 +682,14 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
682{ 682{
683 struct file *file = sd->u.file; 683 struct file *file = sd->u.file;
684 loff_t pos = sd->pos; 684 loff_t pos = sd->pos;
685 int ret, more; 685 int more;
686
687 ret = buf->ops->confirm(pipe, buf);
688 if (!ret) {
689 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
690 if (file->f_op && file->f_op->sendpage)
691 ret = file->f_op->sendpage(file, buf->page, buf->offset,
692 sd->len, &pos, more);
693 else
694 ret = -EINVAL;
695 }
696 686
697 return ret; 687 if (!likely(file->f_op && file->f_op->sendpage))
688 return -EINVAL;
689
690 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
691 return file->f_op->sendpage(file, buf->page, buf->offset,
692 sd->len, &pos, more);
698} 693}
699 694
700/* 695/*
@@ -727,13 +722,6 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
727 void *fsdata; 722 void *fsdata;
728 int ret; 723 int ret;
729 724
730 /*
731 * make sure the data in this buffer is uptodate
732 */
733 ret = buf->ops->confirm(pipe, buf);
734 if (unlikely(ret))
735 return ret;
736
737 offset = sd->pos & ~PAGE_CACHE_MASK; 725 offset = sd->pos & ~PAGE_CACHE_MASK;
738 726
739 this_len = sd->len; 727 this_len = sd->len;
@@ -805,12 +793,17 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
805 if (sd->len > sd->total_len) 793 if (sd->len > sd->total_len)
806 sd->len = sd->total_len; 794 sd->len = sd->total_len;
807 795
808 ret = actor(pipe, buf, sd); 796 ret = buf->ops->confirm(pipe, buf);
809 if (ret <= 0) { 797 if (unlikely(ret)) {
810 if (ret == -ENODATA) 798 if (ret == -ENODATA)
811 ret = 0; 799 ret = 0;
812 return ret; 800 return ret;
813 } 801 }
802
803 ret = actor(pipe, buf, sd);
804 if (ret <= 0)
805 return ret;
806
814 buf->offset += ret; 807 buf->offset += ret;
815 buf->len -= ret; 808 buf->len -= ret;
816 809
@@ -1044,10 +1037,6 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1044 int ret; 1037 int ret;
1045 void *data; 1038 void *data;
1046 1039
1047 ret = buf->ops->confirm(pipe, buf);
1048 if (ret)
1049 return ret;
1050
1051 data = buf->ops->map(pipe, buf, 0); 1040 data = buf->ops->map(pipe, buf, 0);
1052 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); 1041 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
1053 buf->ops->unmap(pipe, buf, data); 1042 buf->ops->unmap(pipe, buf, data);
@@ -1495,10 +1484,6 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1495 char *src; 1484 char *src;
1496 int ret; 1485 int ret;
1497 1486
1498 ret = buf->ops->confirm(pipe, buf);
1499 if (unlikely(ret))
1500 return ret;
1501
1502 /* 1487 /*
1503 * See if we can use the atomic maps, by prefaulting in the 1488 * See if we can use the atomic maps, by prefaulting in the
1504 * pages and doing an atomic copy 1489 * pages and doing an atomic copy
diff --git a/fs/super.c b/fs/super.c
index ca696155cd9a..5d9a4497849a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -766,13 +766,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
766{ 766{
767 struct block_device *bdev; 767 struct block_device *bdev;
768 struct super_block *s; 768 struct super_block *s;
769 fmode_t mode = FMODE_READ; 769 fmode_t mode = FMODE_READ | FMODE_EXCL;
770 int error = 0; 770 int error = 0;
771 771
772 if (!(flags & MS_RDONLY)) 772 if (!(flags & MS_RDONLY))
773 mode |= FMODE_WRITE; 773 mode |= FMODE_WRITE;
774 774
775 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 775 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
776 if (IS_ERR(bdev)) 776 if (IS_ERR(bdev))
777 return ERR_CAST(bdev); 777 return ERR_CAST(bdev);
778 778
@@ -801,13 +801,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
801 801
802 /* 802 /*
803 * s_umount nests inside bd_mutex during 803 * s_umount nests inside bd_mutex during
804 * __invalidate_device(). close_bdev_exclusive() 804 * __invalidate_device(). blkdev_put() acquires
805 * acquires bd_mutex and can't be called under 805 * bd_mutex and can't be called under s_umount. Drop
806 * s_umount. Drop s_umount temporarily. This is safe 806 * s_umount temporarily. This is safe as we're
807 * as we're holding an active reference. 807 * holding an active reference.
808 */ 808 */
809 up_write(&s->s_umount); 809 up_write(&s->s_umount);
810 close_bdev_exclusive(bdev, mode); 810 blkdev_put(bdev, mode);
811 down_write(&s->s_umount); 811 down_write(&s->s_umount);
812 } else { 812 } else {
813 char b[BDEVNAME_SIZE]; 813 char b[BDEVNAME_SIZE];
@@ -831,7 +831,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
831error_s: 831error_s:
832 error = PTR_ERR(s); 832 error = PTR_ERR(s);
833error_bdev: 833error_bdev:
834 close_bdev_exclusive(bdev, mode); 834 blkdev_put(bdev, mode);
835error: 835error:
836 return ERR_PTR(error); 836 return ERR_PTR(error);
837} 837}
@@ -862,7 +862,8 @@ void kill_block_super(struct super_block *sb)
862 bdev->bd_super = NULL; 862 bdev->bd_super = NULL;
863 generic_shutdown_super(sb); 863 generic_shutdown_super(sb);
864 sync_blockdev(bdev); 864 sync_blockdev(bdev);
865 close_bdev_exclusive(bdev, mode); 865 WARN_ON_ONCE(!(mode & FMODE_EXCL));
866 blkdev_put(bdev, mode | FMODE_EXCL);
866} 867}
867 868
868EXPORT_SYMBOL(kill_block_super); 869EXPORT_SYMBOL(kill_block_super);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 064f964d4f3c..2d2ce7f651a7 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -606,7 +606,8 @@ xfs_blkdev_get(
606{ 606{
607 int error = 0; 607 int error = 0;
608 608
609 *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); 609 *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
610 mp);
610 if (IS_ERR(*bdevp)) { 611 if (IS_ERR(*bdevp)) {
611 error = PTR_ERR(*bdevp); 612 error = PTR_ERR(*bdevp);
612 printk("XFS: Invalid device [%s], error=%d\n", name, error); 613 printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@ -620,7 +621,7 @@ xfs_blkdev_put(
620 struct block_device *bdev) 621 struct block_device *bdev)
621{ 622{
622 if (bdev) 623 if (bdev)
623 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); 624 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
624} 625}
625 626
626/* 627/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aae86fd10c4f..05667e6989f1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -643,7 +643,6 @@ static inline void rq_flush_dcache_pages(struct request *rq)
643 643
644extern int blk_register_queue(struct gendisk *disk); 644extern int blk_register_queue(struct gendisk *disk);
645extern void blk_unregister_queue(struct gendisk *disk); 645extern void blk_unregister_queue(struct gendisk *disk);
646extern void register_disk(struct gendisk *dev);
647extern void generic_make_request(struct bio *bio); 646extern void generic_make_request(struct bio *bio);
648extern void blk_rq_init(struct request_queue *q, struct request *rq); 647extern void blk_rq_init(struct request_queue *q, struct request *rq);
649extern void blk_put_request(struct request *); 648extern void blk_put_request(struct request *);
@@ -1252,6 +1251,9 @@ struct block_device_operations {
1252 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1251 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1253 int (*direct_access) (struct block_device *, sector_t, 1252 int (*direct_access) (struct block_device *, sector_t,
1254 void **, unsigned long *); 1253 void **, unsigned long *);
1254 unsigned int (*check_events) (struct gendisk *disk,
1255 unsigned int clearing);
1256 /* ->media_changed() is DEPRECATED, use ->check_events() instead */
1255 int (*media_changed) (struct gendisk *); 1257 int (*media_changed) (struct gendisk *);
1256 void (*unlock_native_capacity) (struct gendisk *); 1258 void (*unlock_native_capacity) (struct gendisk *);
1257 int (*revalidate_disk) (struct gendisk *); 1259 int (*revalidate_disk) (struct gendisk *);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 78e904796622..35eae4b67503 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -946,6 +946,8 @@ struct cdrom_device_info {
946/* device-related storage */ 946/* device-related storage */
947 unsigned int options : 30; /* options flags */ 947 unsigned int options : 30; /* options flags */
948 unsigned mc_flags : 2; /* media change buffer flags */ 948 unsigned mc_flags : 2; /* media change buffer flags */
949 unsigned int vfs_events; /* cached events for vfs path */
950 unsigned int ioctl_events; /* cached events for ioctl path */
949 int use_count; /* number of times device opened */ 951 int use_count; /* number of times device opened */
950 char name[20]; /* name of the device type */ 952 char name[20]; /* name of the device type */
951/* per-device flags */ 953/* per-device flags */
@@ -965,6 +967,8 @@ struct cdrom_device_ops {
965 int (*open) (struct cdrom_device_info *, int); 967 int (*open) (struct cdrom_device_info *, int);
966 void (*release) (struct cdrom_device_info *); 968 void (*release) (struct cdrom_device_info *);
967 int (*drive_status) (struct cdrom_device_info *, int); 969 int (*drive_status) (struct cdrom_device_info *, int);
970 unsigned int (*check_events) (struct cdrom_device_info *cdi,
971 unsigned int clearing, int slot);
968 int (*media_changed) (struct cdrom_device_info *, int); 972 int (*media_changed) (struct cdrom_device_info *, int);
969 int (*tray_move) (struct cdrom_device_info *, int); 973 int (*tray_move) (struct cdrom_device_info *, int);
970 int (*lock_door) (struct cdrom_device_info *, int); 974 int (*lock_door) (struct cdrom_device_info *, int);
@@ -993,6 +997,8 @@ extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
993extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode); 997extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode);
994extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, 998extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
995 fmode_t mode, unsigned int cmd, unsigned long arg); 999 fmode_t mode, unsigned int cmd, unsigned long arg);
1000extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
1001 unsigned int clearing);
996extern int cdrom_media_changed(struct cdrom_device_info *); 1002extern int cdrom_media_changed(struct cdrom_device_info *);
997 1003
998extern int register_cdrom(struct cdrom_device_info *cdi); 1004extern int register_cdrom(struct cdrom_device_info *cdi);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 090f0eacde29..34209283eab6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -663,8 +663,9 @@ struct block_device {
663 void * bd_claiming; 663 void * bd_claiming;
664 void * bd_holder; 664 void * bd_holder;
665 int bd_holders; 665 int bd_holders;
666 bool bd_write_holder;
666#ifdef CONFIG_SYSFS 667#ifdef CONFIG_SYSFS
667 struct list_head bd_holder_list; 668 struct gendisk * bd_holder_disk; /* for sysfs slave linkng */
668#endif 669#endif
669 struct block_device * bd_contains; 670 struct block_device * bd_contains;
670 unsigned bd_block_size; 671 unsigned bd_block_size;
@@ -2006,7 +2007,6 @@ extern struct block_device *bdgrab(struct block_device *bdev);
2006extern void bd_set_size(struct block_device *, loff_t size); 2007extern void bd_set_size(struct block_device *, loff_t size);
2007extern void bd_forget(struct inode *inode); 2008extern void bd_forget(struct inode *inode);
2008extern void bdput(struct block_device *); 2009extern void bdput(struct block_device *);
2009extern struct block_device *open_by_devnum(dev_t, fmode_t);
2010extern void invalidate_bdev(struct block_device *); 2010extern void invalidate_bdev(struct block_device *);
2011extern int sync_blockdev(struct block_device *bdev); 2011extern int sync_blockdev(struct block_device *bdev);
2012extern struct super_block *freeze_bdev(struct block_device *); 2012extern struct super_block *freeze_bdev(struct block_device *);
@@ -2037,16 +2037,20 @@ extern const struct file_operations def_fifo_fops;
2037extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 2037extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
2038extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); 2038extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
2039extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 2039extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
2040extern int blkdev_get(struct block_device *, fmode_t); 2040extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
2041extern int blkdev_put(struct block_device *, fmode_t); 2041extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
2042extern int bd_claim(struct block_device *, void *); 2042 void *holder);
2043extern void bd_release(struct block_device *); 2043extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
2044 void *holder);
2045extern int blkdev_put(struct block_device *bdev, fmode_t mode);
2044#ifdef CONFIG_SYSFS 2046#ifdef CONFIG_SYSFS
2045extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); 2047extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
2046extern void bd_release_from_disk(struct block_device *, struct gendisk *);
2047#else 2048#else
2048#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) 2049static inline int bd_link_disk_holder(struct block_device *bdev,
2049#define bd_release_from_disk(bdev, disk) bd_release(bdev) 2050 struct gendisk *disk)
2051{
2052 return 0;
2053}
2050#endif 2054#endif
2051#endif 2055#endif
2052 2056
@@ -2082,8 +2086,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
2082extern const char *__bdevname(dev_t, char *buffer); 2086extern const char *__bdevname(dev_t, char *buffer);
2083extern const char *bdevname(struct block_device *bdev, char *buffer); 2087extern const char *bdevname(struct block_device *bdev, char *buffer);
2084extern struct block_device *lookup_bdev(const char *); 2088extern struct block_device *lookup_bdev(const char *);
2085extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
2086extern void close_bdev_exclusive(struct block_device *, fmode_t);
2087extern void blkdev_show(struct seq_file *,off_t); 2089extern void blkdev_show(struct seq_file *,off_t);
2088 2090
2089#else 2091#else
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7a7b9c1644e4..13893aa2ac9d 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -127,6 +127,11 @@ struct hd_struct {
127#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ 127#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
128#define GENHD_FL_NATIVE_CAPACITY 128 128#define GENHD_FL_NATIVE_CAPACITY 128
129 129
130enum {
131 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
132 DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */
133};
134
130#define BLK_SCSI_MAX_CMDS (256) 135#define BLK_SCSI_MAX_CMDS (256)
131#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) 136#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
132 137
@@ -143,6 +148,8 @@ struct disk_part_tbl {
143 struct hd_struct __rcu *part[]; 148 struct hd_struct __rcu *part[];
144}; 149};
145 150
151struct disk_events;
152
146struct gendisk { 153struct gendisk {
147 /* major, first_minor and minors are input parameters only, 154 /* major, first_minor and minors are input parameters only,
148 * don't use directly. Use disk_devt() and disk_max_parts(). 155 * don't use directly. Use disk_devt() and disk_max_parts().
@@ -154,6 +161,10 @@ struct gendisk {
154 161
155 char disk_name[DISK_NAME_LEN]; /* name of major driver */ 162 char disk_name[DISK_NAME_LEN]; /* name of major driver */
156 char *(*devnode)(struct gendisk *gd, mode_t *mode); 163 char *(*devnode)(struct gendisk *gd, mode_t *mode);
164
165 unsigned int events; /* supported events */
166 unsigned int async_events; /* async events, subset of all */
167
157 /* Array of pointers to partitions indexed by partno. 168 /* Array of pointers to partitions indexed by partno.
158 * Protected with matching bdev lock but stat and other 169 * Protected with matching bdev lock but stat and other
159 * non-critical accesses use RCU. Always access through 170 * non-critical accesses use RCU. Always access through
@@ -171,9 +182,8 @@ struct gendisk {
171 struct kobject *slave_dir; 182 struct kobject *slave_dir;
172 183
173 struct timer_rand_state *random; 184 struct timer_rand_state *random;
174
175 atomic_t sync_io; /* RAID */ 185 atomic_t sync_io; /* RAID */
176 struct work_struct async_notify; 186 struct disk_events *ev;
177#ifdef CONFIG_BLK_DEV_INTEGRITY 187#ifdef CONFIG_BLK_DEV_INTEGRITY
178 struct blk_integrity *integrity; 188 struct blk_integrity *integrity;
179#endif 189#endif
@@ -395,7 +405,6 @@ extern void part_round_stats(int cpu, struct hd_struct *part);
395/* block/genhd.c */ 405/* block/genhd.c */
396extern void add_disk(struct gendisk *disk); 406extern void add_disk(struct gendisk *disk);
397extern void del_gendisk(struct gendisk *gp); 407extern void del_gendisk(struct gendisk *gp);
398extern void unlink_gendisk(struct gendisk *gp);
399extern struct gendisk *get_gendisk(dev_t dev, int *partno); 408extern struct gendisk *get_gendisk(dev_t dev, int *partno);
400extern struct block_device *bdget_disk(struct gendisk *disk, int partno); 409extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
401 410
@@ -407,6 +416,11 @@ static inline int get_disk_ro(struct gendisk *disk)
407 return disk->part0.policy; 416 return disk->part0.policy;
408} 417}
409 418
419extern void disk_block_events(struct gendisk *disk);
420extern void disk_unblock_events(struct gendisk *disk);
421extern void disk_check_events(struct gendisk *disk);
422extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
423
410/* drivers/char/random.c */ 424/* drivers/char/random.c */
411extern void add_disk_randomness(struct gendisk *disk); 425extern void add_disk_randomness(struct gendisk *disk);
412extern void rand_initialize_disk(struct gendisk *disk); 426extern void rand_initialize_disk(struct gendisk *disk);
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 1651fef18831..648d23358038 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -104,6 +104,7 @@ struct scsi_cmnd;
104#define UNMAP 0x42 104#define UNMAP 0x42
105#define READ_TOC 0x43 105#define READ_TOC 0x43
106#define READ_HEADER 0x44 106#define READ_HEADER 0x44
107#define GET_EVENT_STATUS_NOTIFICATION 0x4a
107#define LOG_SELECT 0x4c 108#define LOG_SELECT 0x4c
108#define LOG_SENSE 0x4d 109#define LOG_SENSE 0x4d
109#define XDWRITEREAD_10 0x53 110#define XDWRITEREAD_10 0x53
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d8ce278515c3..b56c65dc105d 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -486,16 +486,16 @@ TRACE_EVENT(block_split,
486); 486);
487 487
488/** 488/**
489 * block_remap - map request for a partition to the raw device 489 * block_bio_remap - map request for a logical device to the raw device
490 * @q: queue holding the operation 490 * @q: queue holding the operation
491 * @bio: revised operation 491 * @bio: revised operation
492 * @dev: device for the operation 492 * @dev: device for the operation
493 * @from: original sector for the operation 493 * @from: original sector for the operation
494 * 494 *
495 * An operation for a partition on a block device has been mapped to the 495 * An operation for a logical device has been mapped to the
496 * raw block device. 496 * raw block device.
497 */ 497 */
498TRACE_EVENT(block_remap, 498TRACE_EVENT(block_bio_remap,
499 499
500 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, 500 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
501 sector_t from), 501 sector_t from),
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index baf667bb2794..2d6f6e109120 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -224,7 +224,7 @@ static int swsusp_swap_check(void)
224 return res; 224 return res;
225 225
226 root_swap = res; 226 root_swap = res;
227 res = blkdev_get(hib_resume_bdev, FMODE_WRITE); 227 res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
228 if (res) 228 if (res)
229 return res; 229 return res;
230 230
@@ -930,7 +930,8 @@ int swsusp_check(void)
930{ 930{
931 int error; 931 int error;
932 932
933 hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); 933 hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
934 FMODE_READ, NULL);
934 if (!IS_ERR(hib_resume_bdev)) { 935 if (!IS_ERR(hib_resume_bdev)) {
935 set_blocksize(hib_resume_bdev, PAGE_SIZE); 936 set_blocksize(hib_resume_bdev, PAGE_SIZE);
936 clear_page(swsusp_header); 937 clear_page(swsusp_header);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7b8ec0281548..2b8e2ee7c0ef 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -887,7 +887,7 @@ static void blk_add_trace_split(void *ignore,
887} 887}
888 888
889/** 889/**
890 * blk_add_trace_remap - Add a trace for a remap operation 890 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
891 * @ignore: trace callback data parameter (not used) 891 * @ignore: trace callback data parameter (not used)
892 * @q: queue the io is for 892 * @q: queue the io is for
893 * @bio: the source bio 893 * @bio: the source bio
@@ -899,9 +899,9 @@ static void blk_add_trace_split(void *ignore,
899 * it spans a stripe (or similar). Add a trace for that action. 899 * it spans a stripe (or similar). Add a trace for that action.
900 * 900 *
901 **/ 901 **/
902static void blk_add_trace_remap(void *ignore, 902static void blk_add_trace_bio_remap(void *ignore,
903 struct request_queue *q, struct bio *bio, 903 struct request_queue *q, struct bio *bio,
904 dev_t dev, sector_t from) 904 dev_t dev, sector_t from)
905{ 905{
906 struct blk_trace *bt = q->blk_trace; 906 struct blk_trace *bt = q->blk_trace;
907 struct blk_io_trace_remap r; 907 struct blk_io_trace_remap r;
@@ -1016,7 +1016,7 @@ static void blk_register_tracepoints(void)
1016 WARN_ON(ret); 1016 WARN_ON(ret);
1017 ret = register_trace_block_split(blk_add_trace_split, NULL); 1017 ret = register_trace_block_split(blk_add_trace_split, NULL);
1018 WARN_ON(ret); 1018 WARN_ON(ret);
1019 ret = register_trace_block_remap(blk_add_trace_remap, NULL); 1019 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1020 WARN_ON(ret); 1020 WARN_ON(ret);
1021 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1021 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1022 WARN_ON(ret); 1022 WARN_ON(ret);
@@ -1025,7 +1025,7 @@ static void blk_register_tracepoints(void)
1025static void blk_unregister_tracepoints(void) 1025static void blk_unregister_tracepoints(void)
1026{ 1026{
1027 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1027 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1028 unregister_trace_block_remap(blk_add_trace_remap, NULL); 1028 unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1029 unregister_trace_block_split(blk_add_trace_split, NULL); 1029 unregister_trace_block_split(blk_add_trace_split, NULL);
1030 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); 1030 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
1031 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); 1031 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 67ddaaf98c74..b6adcfbf6f48 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1677,7 +1677,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1677 if (S_ISBLK(inode->i_mode)) { 1677 if (S_ISBLK(inode->i_mode)) {
1678 struct block_device *bdev = I_BDEV(inode); 1678 struct block_device *bdev = I_BDEV(inode);
1679 set_blocksize(bdev, p->old_block_size); 1679 set_blocksize(bdev, p->old_block_size);
1680 bd_release(bdev); 1680 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1681 } else { 1681 } else {
1682 mutex_lock(&inode->i_mutex); 1682 mutex_lock(&inode->i_mutex);
1683 inode->i_flags &= ~S_SWAPFILE; 1683 inode->i_flags &= ~S_SWAPFILE;
@@ -1939,7 +1939,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1939 error = -EINVAL; 1939 error = -EINVAL;
1940 if (S_ISBLK(inode->i_mode)) { 1940 if (S_ISBLK(inode->i_mode)) {
1941 bdev = I_BDEV(inode); 1941 bdev = I_BDEV(inode);
1942 error = bd_claim(bdev, sys_swapon); 1942 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1943 sys_swapon);
1943 if (error < 0) { 1944 if (error < 0) {
1944 bdev = NULL; 1945 bdev = NULL;
1945 error = -EINVAL; 1946 error = -EINVAL;
@@ -2136,7 +2137,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2136bad_swap: 2137bad_swap:
2137 if (bdev) { 2138 if (bdev) {
2138 set_blocksize(bdev, p->old_block_size); 2139 set_blocksize(bdev, p->old_block_size);
2139 bd_release(bdev); 2140 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2140 } 2141 }
2141 destroy_swap_extents(p); 2142 destroy_swap_extents(p);
2142 swap_cgroup_swapoff(type); 2143 swap_cgroup_swapoff(type);