diff options
author | Tejun Heo <tj@kernel.org> | 2010-12-08 14:57:37 -0500 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-12-16 11:53:38 -0500 |
commit | 77ea887e433ad8389d416826936c110fa7910f80 (patch) | |
tree | ac9d32aabcebf5a465acae2066b12c9335b5ca6f /include | |
parent | d2bf1b6723ed0eab378363649d15b7893bf14e91 (diff) |
implement in-kernel gendisk events handling
Currently, media presence polling for removeable block devices is done
from userland. There are several issues with this.
* Polling is done by periodically opening the device. For SCSI
devices, the command sequence generated by such action involves a
few different commands including TEST_UNIT_READY. This behavior,
while perfectly legal, is different from Windows which only issues
single command, GET_EVENT_STATUS_NOTIFICATION. Unfortunately, some
ATAPI devices lock up after being periodically queried such command
sequences.
* There is no reliable and unintrusive way for a userland program to
tell whether the target device is safe for media presence polling.
For example, polling for media presence during an on-going burning
session can make it fail. The polling program can avoid this by
opening the device with O_EXCL but then it risks making a valid
exclusive user of the device fail w/ -EBUSY.
* Userland polling is unnecessarily heavy and in-kernel implementation
is lighter and better coordinated (workqueue, timer slack).
This patch implements framework for in-kernel disk event handling,
which includes media presence polling.
* bdops->check_events() is added, which supercedes ->media_changed().
It should check whether there's any pending event and return if so.
Currently, two events are defined - DISK_EVENT_MEDIA_CHANGE and
DISK_EVENT_EJECT_REQUEST. ->check_events() is guaranteed not to be
called parallelly.
* gendisk->events and ->async_events are added. These should be
initialized by block driver before passing the device to add_disk().
The former contains the mask of all supported events and the latter
the mask of all events which the device can report without polling.
/sys/block/*/events[_async] export these to userland.
* Kernel parameter block.events_dfl_poll_msecs controls the system
polling interval (default is 0 which means disable) and
/sys/block/*/events_poll_msecs control polling intervals for
individual devices (default is -1 meaning use system setting). Note
that if a device can report all supported events asynchronously and
its polling interval isn't explicitly set, the device won't be
polled regardless of the system polling interval.
* If a device is opened exclusively with write access, event checking
is automatically disabled until all write exclusive accesses are
released.
* There are event 'clearing' events. For example, both of currently
defined events are cleared after the device has been successfully
opened. This information is passed to ->check_events() callback
using @clearing argument as a hint.
* Event checking is always performed from system_nrt_wq and timer
slack is set to 25% for polling.
* Nothing changes for drivers which implement ->media_changed() but
not ->check_events(). Going forward, all drivers will be converted
to ->check_events() and ->media_change() will be dropped.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blkdev.h | 3 | ||||
-rw-r--r-- | include/linux/fs.h | 1 | ||||
-rw-r--r-- | include/linux/genhd.h | 18 |
3 files changed, 21 insertions, 1 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 83031bcf8366..05667e6989f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -1251,6 +1251,9 @@ struct block_device_operations { | |||
1251 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1251 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1252 | int (*direct_access) (struct block_device *, sector_t, | 1252 | int (*direct_access) (struct block_device *, sector_t, |
1253 | void **, unsigned long *); | 1253 | void **, unsigned long *); |
1254 | unsigned int (*check_events) (struct gendisk *disk, | ||
1255 | unsigned int clearing); | ||
1256 | /* ->media_changed() is DEPRECATED, use ->check_events() instead */ | ||
1254 | int (*media_changed) (struct gendisk *); | 1257 | int (*media_changed) (struct gendisk *); |
1255 | void (*unlock_native_capacity) (struct gendisk *); | 1258 | void (*unlock_native_capacity) (struct gendisk *); |
1256 | int (*revalidate_disk) (struct gendisk *); | 1259 | int (*revalidate_disk) (struct gendisk *); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index f48501563917..997d22efdef0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -662,6 +662,7 @@ struct block_device { | |||
662 | void * bd_claiming; | 662 | void * bd_claiming; |
663 | void * bd_holder; | 663 | void * bd_holder; |
664 | int bd_holders; | 664 | int bd_holders; |
665 | bool bd_write_holder; | ||
665 | #ifdef CONFIG_SYSFS | 666 | #ifdef CONFIG_SYSFS |
666 | struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ | 667 | struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ |
667 | #endif | 668 | #endif |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 56e17ed24816..13893aa2ac9d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -127,6 +127,11 @@ struct hd_struct { | |||
127 | #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ | 127 | #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ |
128 | #define GENHD_FL_NATIVE_CAPACITY 128 | 128 | #define GENHD_FL_NATIVE_CAPACITY 128 |
129 | 129 | ||
130 | enum { | ||
131 | DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ | ||
132 | DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ | ||
133 | }; | ||
134 | |||
130 | #define BLK_SCSI_MAX_CMDS (256) | 135 | #define BLK_SCSI_MAX_CMDS (256) |
131 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) | 136 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) |
132 | 137 | ||
@@ -143,6 +148,8 @@ struct disk_part_tbl { | |||
143 | struct hd_struct __rcu *part[]; | 148 | struct hd_struct __rcu *part[]; |
144 | }; | 149 | }; |
145 | 150 | ||
151 | struct disk_events; | ||
152 | |||
146 | struct gendisk { | 153 | struct gendisk { |
147 | /* major, first_minor and minors are input parameters only, | 154 | /* major, first_minor and minors are input parameters only, |
148 | * don't use directly. Use disk_devt() and disk_max_parts(). | 155 | * don't use directly. Use disk_devt() and disk_max_parts(). |
@@ -154,6 +161,10 @@ struct gendisk { | |||
154 | 161 | ||
155 | char disk_name[DISK_NAME_LEN]; /* name of major driver */ | 162 | char disk_name[DISK_NAME_LEN]; /* name of major driver */ |
156 | char *(*devnode)(struct gendisk *gd, mode_t *mode); | 163 | char *(*devnode)(struct gendisk *gd, mode_t *mode); |
164 | |||
165 | unsigned int events; /* supported events */ | ||
166 | unsigned int async_events; /* async events, subset of all */ | ||
167 | |||
157 | /* Array of pointers to partitions indexed by partno. | 168 | /* Array of pointers to partitions indexed by partno. |
158 | * Protected with matching bdev lock but stat and other | 169 | * Protected with matching bdev lock but stat and other |
159 | * non-critical accesses use RCU. Always access through | 170 | * non-critical accesses use RCU. Always access through |
@@ -171,8 +182,8 @@ struct gendisk { | |||
171 | struct kobject *slave_dir; | 182 | struct kobject *slave_dir; |
172 | 183 | ||
173 | struct timer_rand_state *random; | 184 | struct timer_rand_state *random; |
174 | |||
175 | atomic_t sync_io; /* RAID */ | 185 | atomic_t sync_io; /* RAID */ |
186 | struct disk_events *ev; | ||
176 | #ifdef CONFIG_BLK_DEV_INTEGRITY | 187 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
177 | struct blk_integrity *integrity; | 188 | struct blk_integrity *integrity; |
178 | #endif | 189 | #endif |
@@ -405,6 +416,11 @@ static inline int get_disk_ro(struct gendisk *disk) | |||
405 | return disk->part0.policy; | 416 | return disk->part0.policy; |
406 | } | 417 | } |
407 | 418 | ||
419 | extern void disk_block_events(struct gendisk *disk); | ||
420 | extern void disk_unblock_events(struct gendisk *disk); | ||
421 | extern void disk_check_events(struct gendisk *disk); | ||
422 | extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); | ||
423 | |||
408 | /* drivers/char/random.c */ | 424 | /* drivers/char/random.c */ |
409 | extern void add_disk_randomness(struct gendisk *disk); | 425 | extern void add_disk_randomness(struct gendisk *disk); |
410 | extern void rand_initialize_disk(struct gendisk *disk); | 426 | extern void rand_initialize_disk(struct gendisk *disk); |