diff options
author | Tejun Heo <tj@kernel.org> | 2010-12-08 14:57:37 -0500 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-12-16 11:53:38 -0500 |
commit | 77ea887e433ad8389d416826936c110fa7910f80 (patch) | |
tree | ac9d32aabcebf5a465acae2066b12c9335b5ca6f | |
parent | d2bf1b6723ed0eab378363649d15b7893bf14e91 (diff) |
implement in-kernel gendisk events handling
Currently, media presence polling for removeable block devices is done
from userland. There are several issues with this.
* Polling is done by periodically opening the device. For SCSI
devices, the command sequence generated by such action involves a
few different commands including TEST_UNIT_READY. This behavior,
while perfectly legal, is different from Windows which only issues
single command, GET_EVENT_STATUS_NOTIFICATION. Unfortunately, some
ATAPI devices lock up after being periodically queried such command
sequences.
* There is no reliable and unintrusive way for a userland program to
tell whether the target device is safe for media presence polling.
For example, polling for media presence during an on-going burning
session can make it fail. The polling program can avoid this by
opening the device with O_EXCL but then it risks making a valid
exclusive user of the device fail w/ -EBUSY.
* Userland polling is unnecessarily heavy and in-kernel implementation
is lighter and better coordinated (workqueue, timer slack).
This patch implements framework for in-kernel disk event handling,
which includes media presence polling.
* bdops->check_events() is added, which supercedes ->media_changed().
It should check whether there's any pending event and return if so.
Currently, two events are defined - DISK_EVENT_MEDIA_CHANGE and
DISK_EVENT_EJECT_REQUEST. ->check_events() is guaranteed not to be
called parallelly.
* gendisk->events and ->async_events are added. These should be
initialized by block driver before passing the device to add_disk().
The former contains the mask of all supported events and the latter
the mask of all events which the device can report without polling.
/sys/block/*/events[_async] export these to userland.
* Kernel parameter block.events_dfl_poll_msecs controls the system
polling interval (default is 0 which means disable) and
/sys/block/*/events_poll_msecs control polling intervals for
individual devices (default is -1 meaning use system setting). Note
that if a device can report all supported events asynchronously and
its polling interval isn't explicitly set, the device won't be
polled regardless of the system polling interval.
* If a device is opened exclusively with write access, event checking
is automatically disabled until all write exclusive accesses are
released.
* There are event 'clearing' events. For example, both of currently
defined events are cleared after the device has been successfully
opened. This information is passed to ->check_events() callback
using @clearing argument as a hint.
* Event checking is always performed from system_nrt_wq and timer
slack is set to 25% for polling.
* Nothing changes for drivers which implement ->media_changed() but
not ->check_events(). Going forward, all drivers will be converted
to ->check_events() and ->media_change() will be dropped.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r-- | block/genhd.c | 429 | ||||
-rw-r--r-- | fs/block_dev.c | 41 | ||||
-rw-r--r-- | include/linux/blkdev.h | 3 | ||||
-rw-r--r-- | include/linux/fs.h | 1 | ||||
-rw-r--r-- | include/linux/genhd.h | 18 |
5 files changed, 484 insertions, 8 deletions
diff --git a/block/genhd.c b/block/genhd.c index 2e5e4c0a1133..5465a824d489 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/buffer_head.h> | 18 | #include <linux/buffer_head.h> |
19 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/log2.h> | ||
21 | 22 | ||
22 | #include "blk.h" | 23 | #include "blk.h" |
23 | 24 | ||
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr); | |||
35 | 36 | ||
36 | static struct device_type disk_type; | 37 | static struct device_type disk_type; |
37 | 38 | ||
39 | static void disk_add_events(struct gendisk *disk); | ||
40 | static void disk_del_events(struct gendisk *disk); | ||
41 | static void disk_release_events(struct gendisk *disk); | ||
42 | |||
38 | /** | 43 | /** |
39 | * disk_get_part - get partition | 44 | * disk_get_part - get partition |
40 | * @disk: disk to look partition from | 45 | * @disk: disk to look partition from |
@@ -609,6 +614,8 @@ void add_disk(struct gendisk *disk) | |||
609 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, | 614 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
610 | "bdi"); | 615 | "bdi"); |
611 | WARN_ON(retval); | 616 | WARN_ON(retval); |
617 | |||
618 | disk_add_events(disk); | ||
612 | } | 619 | } |
613 | EXPORT_SYMBOL(add_disk); | 620 | EXPORT_SYMBOL(add_disk); |
614 | 621 | ||
@@ -617,6 +624,8 @@ void del_gendisk(struct gendisk *disk) | |||
617 | struct disk_part_iter piter; | 624 | struct disk_part_iter piter; |
618 | struct hd_struct *part; | 625 | struct hd_struct *part; |
619 | 626 | ||
627 | disk_del_events(disk); | ||
628 | |||
620 | /* invalidate stuff */ | 629 | /* invalidate stuff */ |
621 | disk_part_iter_init(&piter, disk, | 630 | disk_part_iter_init(&piter, disk, |
622 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); | 631 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
@@ -1089,6 +1098,7 @@ static void disk_release(struct device *dev) | |||
1089 | { | 1098 | { |
1090 | struct gendisk *disk = dev_to_disk(dev); | 1099 | struct gendisk *disk = dev_to_disk(dev); |
1091 | 1100 | ||
1101 | disk_release_events(disk); | ||
1092 | kfree(disk->random); | 1102 | kfree(disk->random); |
1093 | disk_replace_part_tbl(disk, NULL); | 1103 | disk_replace_part_tbl(disk, NULL); |
1094 | free_part_stats(&disk->part0); | 1104 | free_part_stats(&disk->part0); |
@@ -1350,3 +1360,422 @@ int invalidate_partition(struct gendisk *disk, int partno) | |||
1350 | } | 1360 | } |
1351 | 1361 | ||
1352 | EXPORT_SYMBOL(invalidate_partition); | 1362 | EXPORT_SYMBOL(invalidate_partition); |
1363 | |||
1364 | /* | ||
1365 | * Disk events - monitor disk events like media change and eject request. | ||
1366 | */ | ||
1367 | struct disk_events { | ||
1368 | struct list_head node; /* all disk_event's */ | ||
1369 | struct gendisk *disk; /* the associated disk */ | ||
1370 | spinlock_t lock; | ||
1371 | |||
1372 | int block; /* event blocking depth */ | ||
1373 | unsigned int pending; /* events already sent out */ | ||
1374 | unsigned int clearing; /* events being cleared */ | ||
1375 | |||
1376 | long poll_msecs; /* interval, -1 for default */ | ||
1377 | struct delayed_work dwork; | ||
1378 | }; | ||
1379 | |||
1380 | static const char *disk_events_strs[] = { | ||
1381 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", | ||
1382 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", | ||
1383 | }; | ||
1384 | |||
1385 | static char *disk_uevents[] = { | ||
1386 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", | ||
1387 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", | ||
1388 | }; | ||
1389 | |||
1390 | /* list of all disk_events */ | ||
1391 | static DEFINE_MUTEX(disk_events_mutex); | ||
1392 | static LIST_HEAD(disk_events); | ||
1393 | |||
1394 | /* disable in-kernel polling by default */ | ||
1395 | static unsigned long disk_events_dfl_poll_msecs = 0; | ||
1396 | |||
1397 | static unsigned long disk_events_poll_jiffies(struct gendisk *disk) | ||
1398 | { | ||
1399 | struct disk_events *ev = disk->ev; | ||
1400 | long intv_msecs = 0; | ||
1401 | |||
1402 | /* | ||
1403 | * If device-specific poll interval is set, always use it. If | ||
1404 | * the default is being used, poll iff there are events which | ||
1405 | * can't be monitored asynchronously. | ||
1406 | */ | ||
1407 | if (ev->poll_msecs >= 0) | ||
1408 | intv_msecs = ev->poll_msecs; | ||
1409 | else if (disk->events & ~disk->async_events) | ||
1410 | intv_msecs = disk_events_dfl_poll_msecs; | ||
1411 | |||
1412 | return msecs_to_jiffies(intv_msecs); | ||
1413 | } | ||
1414 | |||
1415 | static void __disk_block_events(struct gendisk *disk, bool sync) | ||
1416 | { | ||
1417 | struct disk_events *ev = disk->ev; | ||
1418 | unsigned long flags; | ||
1419 | bool cancel; | ||
1420 | |||
1421 | spin_lock_irqsave(&ev->lock, flags); | ||
1422 | cancel = !ev->block++; | ||
1423 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1424 | |||
1425 | if (cancel) { | ||
1426 | if (sync) | ||
1427 | cancel_delayed_work_sync(&disk->ev->dwork); | ||
1428 | else | ||
1429 | cancel_delayed_work(&disk->ev->dwork); | ||
1430 | } | ||
1431 | } | ||
1432 | |||
1433 | static void __disk_unblock_events(struct gendisk *disk, bool check_now) | ||
1434 | { | ||
1435 | struct disk_events *ev = disk->ev; | ||
1436 | unsigned long intv; | ||
1437 | unsigned long flags; | ||
1438 | |||
1439 | spin_lock_irqsave(&ev->lock, flags); | ||
1440 | |||
1441 | if (WARN_ON_ONCE(ev->block <= 0)) | ||
1442 | goto out_unlock; | ||
1443 | |||
1444 | if (--ev->block) | ||
1445 | goto out_unlock; | ||
1446 | |||
1447 | /* | ||
1448 | * Not exactly a latency critical operation, set poll timer | ||
1449 | * slack to 25% and kick event check. | ||
1450 | */ | ||
1451 | intv = disk_events_poll_jiffies(disk); | ||
1452 | set_timer_slack(&ev->dwork.timer, intv / 4); | ||
1453 | if (check_now) | ||
1454 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1455 | else if (intv) | ||
1456 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1457 | out_unlock: | ||
1458 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1459 | } | ||
1460 | |||
1461 | /** | ||
1462 | * disk_block_events - block and flush disk event checking | ||
1463 | * @disk: disk to block events for | ||
1464 | * | ||
1465 | * On return from this function, it is guaranteed that event checking | ||
1466 | * isn't in progress and won't happen until unblocked by | ||
1467 | * disk_unblock_events(). Events blocking is counted and the actual | ||
1468 | * unblocking happens after the matching number of unblocks are done. | ||
1469 | * | ||
1470 | * Note that this intentionally does not block event checking from | ||
1471 | * disk_clear_events(). | ||
1472 | * | ||
1473 | * CONTEXT: | ||
1474 | * Might sleep. | ||
1475 | */ | ||
1476 | void disk_block_events(struct gendisk *disk) | ||
1477 | { | ||
1478 | if (disk->ev) | ||
1479 | __disk_block_events(disk, true); | ||
1480 | } | ||
1481 | |||
1482 | /** | ||
1483 | * disk_unblock_events - unblock disk event checking | ||
1484 | * @disk: disk to unblock events for | ||
1485 | * | ||
1486 | * Undo disk_block_events(). When the block count reaches zero, it | ||
1487 | * starts events polling if configured. | ||
1488 | * | ||
1489 | * CONTEXT: | ||
1490 | * Don't care. Safe to call from irq context. | ||
1491 | */ | ||
1492 | void disk_unblock_events(struct gendisk *disk) | ||
1493 | { | ||
1494 | if (disk->ev) | ||
1495 | __disk_unblock_events(disk, true); | ||
1496 | } | ||
1497 | |||
1498 | /** | ||
1499 | * disk_check_events - schedule immediate event checking | ||
1500 | * @disk: disk to check events for | ||
1501 | * | ||
1502 | * Schedule immediate event checking on @disk if not blocked. | ||
1503 | * | ||
1504 | * CONTEXT: | ||
1505 | * Don't care. Safe to call from irq context. | ||
1506 | */ | ||
1507 | void disk_check_events(struct gendisk *disk) | ||
1508 | { | ||
1509 | if (disk->ev) { | ||
1510 | __disk_block_events(disk, false); | ||
1511 | __disk_unblock_events(disk, true); | ||
1512 | } | ||
1513 | } | ||
1514 | EXPORT_SYMBOL_GPL(disk_check_events); | ||
1515 | |||
1516 | /** | ||
1517 | * disk_clear_events - synchronously check, clear and return pending events | ||
1518 | * @disk: disk to fetch and clear events from | ||
1519 | * @mask: mask of events to be fetched and clearted | ||
1520 | * | ||
1521 | * Disk events are synchronously checked and pending events in @mask | ||
1522 | * are cleared and returned. This ignores the block count. | ||
1523 | * | ||
1524 | * CONTEXT: | ||
1525 | * Might sleep. | ||
1526 | */ | ||
1527 | unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) | ||
1528 | { | ||
1529 | const struct block_device_operations *bdops = disk->fops; | ||
1530 | struct disk_events *ev = disk->ev; | ||
1531 | unsigned int pending; | ||
1532 | |||
1533 | if (!ev) { | ||
1534 | /* for drivers still using the old ->media_changed method */ | ||
1535 | if ((mask & DISK_EVENT_MEDIA_CHANGE) && | ||
1536 | bdops->media_changed && bdops->media_changed(disk)) | ||
1537 | return DISK_EVENT_MEDIA_CHANGE; | ||
1538 | return 0; | ||
1539 | } | ||
1540 | |||
1541 | /* tell the workfn about the events being cleared */ | ||
1542 | spin_lock_irq(&ev->lock); | ||
1543 | ev->clearing |= mask; | ||
1544 | spin_unlock_irq(&ev->lock); | ||
1545 | |||
1546 | /* uncondtionally schedule event check and wait for it to finish */ | ||
1547 | __disk_block_events(disk, true); | ||
1548 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1549 | flush_delayed_work(&ev->dwork); | ||
1550 | __disk_unblock_events(disk, false); | ||
1551 | |||
1552 | /* then, fetch and clear pending events */ | ||
1553 | spin_lock_irq(&ev->lock); | ||
1554 | WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */ | ||
1555 | pending = ev->pending & mask; | ||
1556 | ev->pending &= ~mask; | ||
1557 | spin_unlock_irq(&ev->lock); | ||
1558 | |||
1559 | return pending; | ||
1560 | } | ||
1561 | |||
1562 | static void disk_events_workfn(struct work_struct *work) | ||
1563 | { | ||
1564 | struct delayed_work *dwork = to_delayed_work(work); | ||
1565 | struct disk_events *ev = container_of(dwork, struct disk_events, dwork); | ||
1566 | struct gendisk *disk = ev->disk; | ||
1567 | char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; | ||
1568 | unsigned int clearing = ev->clearing; | ||
1569 | unsigned int events; | ||
1570 | unsigned long intv; | ||
1571 | int nr_events = 0, i; | ||
1572 | |||
1573 | /* check events */ | ||
1574 | events = disk->fops->check_events(disk, clearing); | ||
1575 | |||
1576 | /* accumulate pending events and schedule next poll if necessary */ | ||
1577 | spin_lock_irq(&ev->lock); | ||
1578 | |||
1579 | events &= ~ev->pending; | ||
1580 | ev->pending |= events; | ||
1581 | ev->clearing &= ~clearing; | ||
1582 | |||
1583 | intv = disk_events_poll_jiffies(disk); | ||
1584 | if (!ev->block && intv) | ||
1585 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1586 | |||
1587 | spin_unlock_irq(&ev->lock); | ||
1588 | |||
1589 | /* tell userland about new events */ | ||
1590 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) | ||
1591 | if (events & (1 << i)) | ||
1592 | envp[nr_events++] = disk_uevents[i]; | ||
1593 | |||
1594 | if (nr_events) | ||
1595 | kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); | ||
1596 | } | ||
1597 | |||
1598 | /* | ||
1599 | * A disk events enabled device has the following sysfs nodes under | ||
1600 | * its /sys/block/X/ directory. | ||
1601 | * | ||
1602 | * events : list of all supported events | ||
1603 | * events_async : list of events which can be detected w/o polling | ||
1604 | * events_poll_msecs : polling interval, 0: disable, -1: system default | ||
1605 | */ | ||
1606 | static ssize_t __disk_events_show(unsigned int events, char *buf) | ||
1607 | { | ||
1608 | const char *delim = ""; | ||
1609 | ssize_t pos = 0; | ||
1610 | int i; | ||
1611 | |||
1612 | for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) | ||
1613 | if (events & (1 << i)) { | ||
1614 | pos += sprintf(buf + pos, "%s%s", | ||
1615 | delim, disk_events_strs[i]); | ||
1616 | delim = " "; | ||
1617 | } | ||
1618 | if (pos) | ||
1619 | pos += sprintf(buf + pos, "\n"); | ||
1620 | return pos; | ||
1621 | } | ||
1622 | |||
1623 | static ssize_t disk_events_show(struct device *dev, | ||
1624 | struct device_attribute *attr, char *buf) | ||
1625 | { | ||
1626 | struct gendisk *disk = dev_to_disk(dev); | ||
1627 | |||
1628 | return __disk_events_show(disk->events, buf); | ||
1629 | } | ||
1630 | |||
1631 | static ssize_t disk_events_async_show(struct device *dev, | ||
1632 | struct device_attribute *attr, char *buf) | ||
1633 | { | ||
1634 | struct gendisk *disk = dev_to_disk(dev); | ||
1635 | |||
1636 | return __disk_events_show(disk->async_events, buf); | ||
1637 | } | ||
1638 | |||
1639 | static ssize_t disk_events_poll_msecs_show(struct device *dev, | ||
1640 | struct device_attribute *attr, | ||
1641 | char *buf) | ||
1642 | { | ||
1643 | struct gendisk *disk = dev_to_disk(dev); | ||
1644 | |||
1645 | return sprintf(buf, "%ld\n", disk->ev->poll_msecs); | ||
1646 | } | ||
1647 | |||
1648 | static ssize_t disk_events_poll_msecs_store(struct device *dev, | ||
1649 | struct device_attribute *attr, | ||
1650 | const char *buf, size_t count) | ||
1651 | { | ||
1652 | struct gendisk *disk = dev_to_disk(dev); | ||
1653 | long intv; | ||
1654 | |||
1655 | if (!count || !sscanf(buf, "%ld", &intv)) | ||
1656 | return -EINVAL; | ||
1657 | |||
1658 | if (intv < 0 && intv != -1) | ||
1659 | return -EINVAL; | ||
1660 | |||
1661 | __disk_block_events(disk, true); | ||
1662 | disk->ev->poll_msecs = intv; | ||
1663 | __disk_unblock_events(disk, true); | ||
1664 | |||
1665 | return count; | ||
1666 | } | ||
1667 | |||
1668 | static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); | ||
1669 | static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); | ||
1670 | static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, | ||
1671 | disk_events_poll_msecs_show, | ||
1672 | disk_events_poll_msecs_store); | ||
1673 | |||
1674 | static const struct attribute *disk_events_attrs[] = { | ||
1675 | &dev_attr_events.attr, | ||
1676 | &dev_attr_events_async.attr, | ||
1677 | &dev_attr_events_poll_msecs.attr, | ||
1678 | NULL, | ||
1679 | }; | ||
1680 | |||
1681 | /* | ||
1682 | * The default polling interval can be specified by the kernel | ||
1683 | * parameter block.events_dfl_poll_msecs which defaults to 0 | ||
1684 | * (disable). This can also be modified runtime by writing to | ||
1685 | * /sys/module/block/events_dfl_poll_msecs. | ||
1686 | */ | ||
1687 | static int disk_events_set_dfl_poll_msecs(const char *val, | ||
1688 | const struct kernel_param *kp) | ||
1689 | { | ||
1690 | struct disk_events *ev; | ||
1691 | int ret; | ||
1692 | |||
1693 | ret = param_set_ulong(val, kp); | ||
1694 | if (ret < 0) | ||
1695 | return ret; | ||
1696 | |||
1697 | mutex_lock(&disk_events_mutex); | ||
1698 | |||
1699 | list_for_each_entry(ev, &disk_events, node) | ||
1700 | disk_check_events(ev->disk); | ||
1701 | |||
1702 | mutex_unlock(&disk_events_mutex); | ||
1703 | |||
1704 | return 0; | ||
1705 | } | ||
1706 | |||
1707 | static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { | ||
1708 | .set = disk_events_set_dfl_poll_msecs, | ||
1709 | .get = param_get_ulong, | ||
1710 | }; | ||
1711 | |||
1712 | #undef MODULE_PARAM_PREFIX | ||
1713 | #define MODULE_PARAM_PREFIX "block." | ||
1714 | |||
1715 | module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, | ||
1716 | &disk_events_dfl_poll_msecs, 0644); | ||
1717 | |||
1718 | /* | ||
1719 | * disk_{add|del|release}_events - initialize and destroy disk_events. | ||
1720 | */ | ||
1721 | static void disk_add_events(struct gendisk *disk) | ||
1722 | { | ||
1723 | struct disk_events *ev; | ||
1724 | |||
1725 | if (!disk->fops->check_events || !(disk->events | disk->async_events)) | ||
1726 | return; | ||
1727 | |||
1728 | ev = kzalloc(sizeof(*ev), GFP_KERNEL); | ||
1729 | if (!ev) { | ||
1730 | pr_warn("%s: failed to initialize events\n", disk->disk_name); | ||
1731 | return; | ||
1732 | } | ||
1733 | |||
1734 | if (sysfs_create_files(&disk_to_dev(disk)->kobj, | ||
1735 | disk_events_attrs) < 0) { | ||
1736 | pr_warn("%s: failed to create sysfs files for events\n", | ||
1737 | disk->disk_name); | ||
1738 | kfree(ev); | ||
1739 | return; | ||
1740 | } | ||
1741 | |||
1742 | disk->ev = ev; | ||
1743 | |||
1744 | INIT_LIST_HEAD(&ev->node); | ||
1745 | ev->disk = disk; | ||
1746 | spin_lock_init(&ev->lock); | ||
1747 | ev->block = 1; | ||
1748 | ev->poll_msecs = -1; | ||
1749 | INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); | ||
1750 | |||
1751 | mutex_lock(&disk_events_mutex); | ||
1752 | list_add_tail(&ev->node, &disk_events); | ||
1753 | mutex_unlock(&disk_events_mutex); | ||
1754 | |||
1755 | /* | ||
1756 | * Block count is initialized to 1 and the following initial | ||
1757 | * unblock kicks it into action. | ||
1758 | */ | ||
1759 | __disk_unblock_events(disk, true); | ||
1760 | } | ||
1761 | |||
1762 | static void disk_del_events(struct gendisk *disk) | ||
1763 | { | ||
1764 | if (!disk->ev) | ||
1765 | return; | ||
1766 | |||
1767 | __disk_block_events(disk, true); | ||
1768 | |||
1769 | mutex_lock(&disk_events_mutex); | ||
1770 | list_del_init(&disk->ev->node); | ||
1771 | mutex_unlock(&disk_events_mutex); | ||
1772 | |||
1773 | sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); | ||
1774 | } | ||
1775 | |||
1776 | static void disk_release_events(struct gendisk *disk) | ||
1777 | { | ||
1778 | /* the block count should be 1 from disk_del_events() */ | ||
1779 | WARN_ON_ONCE(disk->ev && disk->ev->block != 1); | ||
1780 | kfree(disk->ev); | ||
1781 | } | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index c1c1b8c3fb99..6017389711ee 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -948,10 +948,11 @@ int check_disk_change(struct block_device *bdev) | |||
948 | { | 948 | { |
949 | struct gendisk *disk = bdev->bd_disk; | 949 | struct gendisk *disk = bdev->bd_disk; |
950 | const struct block_device_operations *bdops = disk->fops; | 950 | const struct block_device_operations *bdops = disk->fops; |
951 | unsigned int events; | ||
951 | 952 | ||
952 | if (!bdops->media_changed) | 953 | events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | |
953 | return 0; | 954 | DISK_EVENT_EJECT_REQUEST); |
954 | if (!bdops->media_changed(bdev->bd_disk)) | 955 | if (!(events & DISK_EVENT_MEDIA_CHANGE)) |
955 | return 0; | 956 | return 0; |
956 | 957 | ||
957 | flush_disk(bdev); | 958 | flush_disk(bdev); |
@@ -1158,9 +1159,10 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
1158 | 1159 | ||
1159 | if (whole) { | 1160 | if (whole) { |
1160 | /* finish claiming */ | 1161 | /* finish claiming */ |
1162 | mutex_lock(&bdev->bd_mutex); | ||
1161 | spin_lock(&bdev_lock); | 1163 | spin_lock(&bdev_lock); |
1162 | 1164 | ||
1163 | if (res == 0) { | 1165 | if (!res) { |
1164 | BUG_ON(!bd_may_claim(bdev, whole, holder)); | 1166 | BUG_ON(!bd_may_claim(bdev, whole, holder)); |
1165 | /* | 1167 | /* |
1166 | * Note that for a whole device bd_holders | 1168 | * Note that for a whole device bd_holders |
@@ -1180,6 +1182,20 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
1180 | wake_up_bit(&whole->bd_claiming, 0); | 1182 | wake_up_bit(&whole->bd_claiming, 0); |
1181 | 1183 | ||
1182 | spin_unlock(&bdev_lock); | 1184 | spin_unlock(&bdev_lock); |
1185 | |||
1186 | /* | ||
1187 | * Block event polling for write claims. Any write | ||
1188 | * holder makes the write_holder state stick until all | ||
1189 | * are released. This is good enough and tracking | ||
1190 | * individual writeable reference is too fragile given | ||
1191 | * the way @mode is used in blkdev_get/put(). | ||
1192 | */ | ||
1193 | if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | ||
1194 | bdev->bd_write_holder = true; | ||
1195 | disk_block_events(bdev->bd_disk); | ||
1196 | } | ||
1197 | |||
1198 | mutex_unlock(&bdev->bd_mutex); | ||
1183 | bdput(whole); | 1199 | bdput(whole); |
1184 | } | 1200 | } |
1185 | 1201 | ||
@@ -1353,12 +1369,23 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) | |||
1353 | 1369 | ||
1354 | spin_unlock(&bdev_lock); | 1370 | spin_unlock(&bdev_lock); |
1355 | 1371 | ||
1356 | /* if this was the last claim, holder link should go too */ | 1372 | /* |
1357 | if (bdev_free) | 1373 | * If this was the last claim, remove holder link and |
1374 | * unblock evpoll if it was a write holder. | ||
1375 | */ | ||
1376 | if (bdev_free) { | ||
1358 | bd_unlink_disk_holder(bdev); | 1377 | bd_unlink_disk_holder(bdev); |
1378 | if (bdev->bd_write_holder) { | ||
1379 | disk_unblock_events(bdev->bd_disk); | ||
1380 | bdev->bd_write_holder = false; | ||
1381 | } else | ||
1382 | disk_check_events(bdev->bd_disk); | ||
1383 | } | ||
1359 | 1384 | ||
1360 | mutex_unlock(&bdev->bd_mutex); | 1385 | mutex_unlock(&bdev->bd_mutex); |
1361 | } | 1386 | } else |
1387 | disk_check_events(bdev->bd_disk); | ||
1388 | |||
1362 | return __blkdev_put(bdev, mode, 0); | 1389 | return __blkdev_put(bdev, mode, 0); |
1363 | } | 1390 | } |
1364 | EXPORT_SYMBOL(blkdev_put); | 1391 | EXPORT_SYMBOL(blkdev_put); |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 83031bcf8366..05667e6989f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -1251,6 +1251,9 @@ struct block_device_operations { | |||
1251 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); | 1251 | int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); |
1252 | int (*direct_access) (struct block_device *, sector_t, | 1252 | int (*direct_access) (struct block_device *, sector_t, |
1253 | void **, unsigned long *); | 1253 | void **, unsigned long *); |
1254 | unsigned int (*check_events) (struct gendisk *disk, | ||
1255 | unsigned int clearing); | ||
1256 | /* ->media_changed() is DEPRECATED, use ->check_events() instead */ | ||
1254 | int (*media_changed) (struct gendisk *); | 1257 | int (*media_changed) (struct gendisk *); |
1255 | void (*unlock_native_capacity) (struct gendisk *); | 1258 | void (*unlock_native_capacity) (struct gendisk *); |
1256 | int (*revalidate_disk) (struct gendisk *); | 1259 | int (*revalidate_disk) (struct gendisk *); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index f48501563917..997d22efdef0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -662,6 +662,7 @@ struct block_device { | |||
662 | void * bd_claiming; | 662 | void * bd_claiming; |
663 | void * bd_holder; | 663 | void * bd_holder; |
664 | int bd_holders; | 664 | int bd_holders; |
665 | bool bd_write_holder; | ||
665 | #ifdef CONFIG_SYSFS | 666 | #ifdef CONFIG_SYSFS |
666 | struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ | 667 | struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ |
667 | #endif | 668 | #endif |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 56e17ed24816..13893aa2ac9d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -127,6 +127,11 @@ struct hd_struct { | |||
127 | #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ | 127 | #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ |
128 | #define GENHD_FL_NATIVE_CAPACITY 128 | 128 | #define GENHD_FL_NATIVE_CAPACITY 128 |
129 | 129 | ||
130 | enum { | ||
131 | DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ | ||
132 | DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ | ||
133 | }; | ||
134 | |||
130 | #define BLK_SCSI_MAX_CMDS (256) | 135 | #define BLK_SCSI_MAX_CMDS (256) |
131 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) | 136 | #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) |
132 | 137 | ||
@@ -143,6 +148,8 @@ struct disk_part_tbl { | |||
143 | struct hd_struct __rcu *part[]; | 148 | struct hd_struct __rcu *part[]; |
144 | }; | 149 | }; |
145 | 150 | ||
151 | struct disk_events; | ||
152 | |||
146 | struct gendisk { | 153 | struct gendisk { |
147 | /* major, first_minor and minors are input parameters only, | 154 | /* major, first_minor and minors are input parameters only, |
148 | * don't use directly. Use disk_devt() and disk_max_parts(). | 155 | * don't use directly. Use disk_devt() and disk_max_parts(). |
@@ -154,6 +161,10 @@ struct gendisk { | |||
154 | 161 | ||
155 | char disk_name[DISK_NAME_LEN]; /* name of major driver */ | 162 | char disk_name[DISK_NAME_LEN]; /* name of major driver */ |
156 | char *(*devnode)(struct gendisk *gd, mode_t *mode); | 163 | char *(*devnode)(struct gendisk *gd, mode_t *mode); |
164 | |||
165 | unsigned int events; /* supported events */ | ||
166 | unsigned int async_events; /* async events, subset of all */ | ||
167 | |||
157 | /* Array of pointers to partitions indexed by partno. | 168 | /* Array of pointers to partitions indexed by partno. |
158 | * Protected with matching bdev lock but stat and other | 169 | * Protected with matching bdev lock but stat and other |
159 | * non-critical accesses use RCU. Always access through | 170 | * non-critical accesses use RCU. Always access through |
@@ -171,8 +182,8 @@ struct gendisk { | |||
171 | struct kobject *slave_dir; | 182 | struct kobject *slave_dir; |
172 | 183 | ||
173 | struct timer_rand_state *random; | 184 | struct timer_rand_state *random; |
174 | |||
175 | atomic_t sync_io; /* RAID */ | 185 | atomic_t sync_io; /* RAID */ |
186 | struct disk_events *ev; | ||
176 | #ifdef CONFIG_BLK_DEV_INTEGRITY | 187 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
177 | struct blk_integrity *integrity; | 188 | struct blk_integrity *integrity; |
178 | #endif | 189 | #endif |
@@ -405,6 +416,11 @@ static inline int get_disk_ro(struct gendisk *disk) | |||
405 | return disk->part0.policy; | 416 | return disk->part0.policy; |
406 | } | 417 | } |
407 | 418 | ||
419 | extern void disk_block_events(struct gendisk *disk); | ||
420 | extern void disk_unblock_events(struct gendisk *disk); | ||
421 | extern void disk_check_events(struct gendisk *disk); | ||
422 | extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); | ||
423 | |||
408 | /* drivers/char/random.c */ | 424 | /* drivers/char/random.c */ |
409 | extern void add_disk_randomness(struct gendisk *disk); | 425 | extern void add_disk_randomness(struct gendisk *disk); |
410 | extern void rand_initialize_disk(struct gendisk *disk); | 426 | extern void rand_initialize_disk(struct gendisk *disk); |