diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/genhd.c | 429 |
1 files changed, 429 insertions, 0 deletions
diff --git a/block/genhd.c b/block/genhd.c index 2e5e4c0a1133..5465a824d489 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/buffer_head.h> | 18 | #include <linux/buffer_head.h> |
19 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/log2.h> | ||
21 | 22 | ||
22 | #include "blk.h" | 23 | #include "blk.h" |
23 | 24 | ||
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr); | |||
35 | 36 | ||
36 | static struct device_type disk_type; | 37 | static struct device_type disk_type; |
37 | 38 | ||
39 | static void disk_add_events(struct gendisk *disk); | ||
40 | static void disk_del_events(struct gendisk *disk); | ||
41 | static void disk_release_events(struct gendisk *disk); | ||
42 | |||
38 | /** | 43 | /** |
39 | * disk_get_part - get partition | 44 | * disk_get_part - get partition |
40 | * @disk: disk to look partition from | 45 | * @disk: disk to look partition from |
@@ -609,6 +614,8 @@ void add_disk(struct gendisk *disk) | |||
609 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, | 614 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
610 | "bdi"); | 615 | "bdi"); |
611 | WARN_ON(retval); | 616 | WARN_ON(retval); |
617 | |||
618 | disk_add_events(disk); | ||
612 | } | 619 | } |
613 | EXPORT_SYMBOL(add_disk); | 620 | EXPORT_SYMBOL(add_disk); |
614 | 621 | ||
@@ -617,6 +624,8 @@ void del_gendisk(struct gendisk *disk) | |||
617 | struct disk_part_iter piter; | 624 | struct disk_part_iter piter; |
618 | struct hd_struct *part; | 625 | struct hd_struct *part; |
619 | 626 | ||
627 | disk_del_events(disk); | ||
628 | |||
620 | /* invalidate stuff */ | 629 | /* invalidate stuff */ |
621 | disk_part_iter_init(&piter, disk, | 630 | disk_part_iter_init(&piter, disk, |
622 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); | 631 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
@@ -1089,6 +1098,7 @@ static void disk_release(struct device *dev) | |||
1089 | { | 1098 | { |
1090 | struct gendisk *disk = dev_to_disk(dev); | 1099 | struct gendisk *disk = dev_to_disk(dev); |
1091 | 1100 | ||
1101 | disk_release_events(disk); | ||
1092 | kfree(disk->random); | 1102 | kfree(disk->random); |
1093 | disk_replace_part_tbl(disk, NULL); | 1103 | disk_replace_part_tbl(disk, NULL); |
1094 | free_part_stats(&disk->part0); | 1104 | free_part_stats(&disk->part0); |
@@ -1350,3 +1360,422 @@ int invalidate_partition(struct gendisk *disk, int partno) | |||
1350 | } | 1360 | } |
1351 | 1361 | ||
1352 | EXPORT_SYMBOL(invalidate_partition); | 1362 | EXPORT_SYMBOL(invalidate_partition); |
1363 | |||
1364 | /* | ||
1365 | * Disk events - monitor disk events like media change and eject request. | ||
1366 | */ | ||
1367 | struct disk_events { | ||
1368 | struct list_head node; /* all disk_event's */ | ||
1369 | struct gendisk *disk; /* the associated disk */ | ||
1370 | spinlock_t lock; | ||
1371 | |||
1372 | int block; /* event blocking depth */ | ||
1373 | unsigned int pending; /* events already sent out */ | ||
1374 | unsigned int clearing; /* events being cleared */ | ||
1375 | |||
1376 | long poll_msecs; /* interval, -1 for default */ | ||
1377 | struct delayed_work dwork; | ||
1378 | }; | ||
1379 | |||
1380 | static const char *disk_events_strs[] = { | ||
1381 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", | ||
1382 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", | ||
1383 | }; | ||
1384 | |||
1385 | static char *disk_uevents[] = { | ||
1386 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", | ||
1387 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", | ||
1388 | }; | ||
1389 | |||
1390 | /* list of all disk_events */ | ||
1391 | static DEFINE_MUTEX(disk_events_mutex); | ||
1392 | static LIST_HEAD(disk_events); | ||
1393 | |||
1394 | /* disable in-kernel polling by default */ | ||
1395 | static unsigned long disk_events_dfl_poll_msecs = 0; | ||
1396 | |||
1397 | static unsigned long disk_events_poll_jiffies(struct gendisk *disk) | ||
1398 | { | ||
1399 | struct disk_events *ev = disk->ev; | ||
1400 | long intv_msecs = 0; | ||
1401 | |||
1402 | /* | ||
1403 | * If device-specific poll interval is set, always use it. If | ||
1404 | * the default is being used, poll iff there are events which | ||
1405 | * can't be monitored asynchronously. | ||
1406 | */ | ||
1407 | if (ev->poll_msecs >= 0) | ||
1408 | intv_msecs = ev->poll_msecs; | ||
1409 | else if (disk->events & ~disk->async_events) | ||
1410 | intv_msecs = disk_events_dfl_poll_msecs; | ||
1411 | |||
1412 | return msecs_to_jiffies(intv_msecs); | ||
1413 | } | ||
1414 | |||
1415 | static void __disk_block_events(struct gendisk *disk, bool sync) | ||
1416 | { | ||
1417 | struct disk_events *ev = disk->ev; | ||
1418 | unsigned long flags; | ||
1419 | bool cancel; | ||
1420 | |||
1421 | spin_lock_irqsave(&ev->lock, flags); | ||
1422 | cancel = !ev->block++; | ||
1423 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1424 | |||
1425 | if (cancel) { | ||
1426 | if (sync) | ||
1427 | cancel_delayed_work_sync(&disk->ev->dwork); | ||
1428 | else | ||
1429 | cancel_delayed_work(&disk->ev->dwork); | ||
1430 | } | ||
1431 | } | ||
1432 | |||
1433 | static void __disk_unblock_events(struct gendisk *disk, bool check_now) | ||
1434 | { | ||
1435 | struct disk_events *ev = disk->ev; | ||
1436 | unsigned long intv; | ||
1437 | unsigned long flags; | ||
1438 | |||
1439 | spin_lock_irqsave(&ev->lock, flags); | ||
1440 | |||
1441 | if (WARN_ON_ONCE(ev->block <= 0)) | ||
1442 | goto out_unlock; | ||
1443 | |||
1444 | if (--ev->block) | ||
1445 | goto out_unlock; | ||
1446 | |||
1447 | /* | ||
1448 | * Not exactly a latency critical operation, set poll timer | ||
1449 | * slack to 25% and kick event check. | ||
1450 | */ | ||
1451 | intv = disk_events_poll_jiffies(disk); | ||
1452 | set_timer_slack(&ev->dwork.timer, intv / 4); | ||
1453 | if (check_now) | ||
1454 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1455 | else if (intv) | ||
1456 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1457 | out_unlock: | ||
1458 | spin_unlock_irqrestore(&ev->lock, flags); | ||
1459 | } | ||
1460 | |||
1461 | /** | ||
1462 | * disk_block_events - block and flush disk event checking | ||
1463 | * @disk: disk to block events for | ||
1464 | * | ||
1465 | * On return from this function, it is guaranteed that event checking | ||
1466 | * isn't in progress and won't happen until unblocked by | ||
1467 | * disk_unblock_events(). Events blocking is counted and the actual | ||
1468 | * unblocking happens after the matching number of unblocks are done. | ||
1469 | * | ||
1470 | * Note that this intentionally does not block event checking from | ||
1471 | * disk_clear_events(). | ||
1472 | * | ||
1473 | * CONTEXT: | ||
1474 | * Might sleep. | ||
1475 | */ | ||
1476 | void disk_block_events(struct gendisk *disk) | ||
1477 | { | ||
1478 | if (disk->ev) | ||
1479 | __disk_block_events(disk, true); | ||
1480 | } | ||
1481 | |||
1482 | /** | ||
1483 | * disk_unblock_events - unblock disk event checking | ||
1484 | * @disk: disk to unblock events for | ||
1485 | * | ||
1486 | * Undo disk_block_events(). When the block count reaches zero, it | ||
1487 | * starts events polling if configured. | ||
1488 | * | ||
1489 | * CONTEXT: | ||
1490 | * Don't care. Safe to call from irq context. | ||
1491 | */ | ||
1492 | void disk_unblock_events(struct gendisk *disk) | ||
1493 | { | ||
1494 | if (disk->ev) | ||
1495 | __disk_unblock_events(disk, true); | ||
1496 | } | ||
1497 | |||
1498 | /** | ||
1499 | * disk_check_events - schedule immediate event checking | ||
1500 | * @disk: disk to check events for | ||
1501 | * | ||
1502 | * Schedule immediate event checking on @disk if not blocked. | ||
1503 | * | ||
1504 | * CONTEXT: | ||
1505 | * Don't care. Safe to call from irq context. | ||
1506 | */ | ||
1507 | void disk_check_events(struct gendisk *disk) | ||
1508 | { | ||
1509 | if (disk->ev) { | ||
1510 | __disk_block_events(disk, false); | ||
1511 | __disk_unblock_events(disk, true); | ||
1512 | } | ||
1513 | } | ||
1514 | EXPORT_SYMBOL_GPL(disk_check_events); | ||
1515 | |||
1516 | /** | ||
1517 | * disk_clear_events - synchronously check, clear and return pending events | ||
1518 | * @disk: disk to fetch and clear events from | ||
1519 | * @mask: mask of events to be fetched and clearted | ||
1520 | * | ||
1521 | * Disk events are synchronously checked and pending events in @mask | ||
1522 | * are cleared and returned. This ignores the block count. | ||
1523 | * | ||
1524 | * CONTEXT: | ||
1525 | * Might sleep. | ||
1526 | */ | ||
1527 | unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) | ||
1528 | { | ||
1529 | const struct block_device_operations *bdops = disk->fops; | ||
1530 | struct disk_events *ev = disk->ev; | ||
1531 | unsigned int pending; | ||
1532 | |||
1533 | if (!ev) { | ||
1534 | /* for drivers still using the old ->media_changed method */ | ||
1535 | if ((mask & DISK_EVENT_MEDIA_CHANGE) && | ||
1536 | bdops->media_changed && bdops->media_changed(disk)) | ||
1537 | return DISK_EVENT_MEDIA_CHANGE; | ||
1538 | return 0; | ||
1539 | } | ||
1540 | |||
1541 | /* tell the workfn about the events being cleared */ | ||
1542 | spin_lock_irq(&ev->lock); | ||
1543 | ev->clearing |= mask; | ||
1544 | spin_unlock_irq(&ev->lock); | ||
1545 | |||
1546 | /* uncondtionally schedule event check and wait for it to finish */ | ||
1547 | __disk_block_events(disk, true); | ||
1548 | queue_delayed_work(system_nrt_wq, &ev->dwork, 0); | ||
1549 | flush_delayed_work(&ev->dwork); | ||
1550 | __disk_unblock_events(disk, false); | ||
1551 | |||
1552 | /* then, fetch and clear pending events */ | ||
1553 | spin_lock_irq(&ev->lock); | ||
1554 | WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */ | ||
1555 | pending = ev->pending & mask; | ||
1556 | ev->pending &= ~mask; | ||
1557 | spin_unlock_irq(&ev->lock); | ||
1558 | |||
1559 | return pending; | ||
1560 | } | ||
1561 | |||
1562 | static void disk_events_workfn(struct work_struct *work) | ||
1563 | { | ||
1564 | struct delayed_work *dwork = to_delayed_work(work); | ||
1565 | struct disk_events *ev = container_of(dwork, struct disk_events, dwork); | ||
1566 | struct gendisk *disk = ev->disk; | ||
1567 | char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; | ||
1568 | unsigned int clearing = ev->clearing; | ||
1569 | unsigned int events; | ||
1570 | unsigned long intv; | ||
1571 | int nr_events = 0, i; | ||
1572 | |||
1573 | /* check events */ | ||
1574 | events = disk->fops->check_events(disk, clearing); | ||
1575 | |||
1576 | /* accumulate pending events and schedule next poll if necessary */ | ||
1577 | spin_lock_irq(&ev->lock); | ||
1578 | |||
1579 | events &= ~ev->pending; | ||
1580 | ev->pending |= events; | ||
1581 | ev->clearing &= ~clearing; | ||
1582 | |||
1583 | intv = disk_events_poll_jiffies(disk); | ||
1584 | if (!ev->block && intv) | ||
1585 | queue_delayed_work(system_nrt_wq, &ev->dwork, intv); | ||
1586 | |||
1587 | spin_unlock_irq(&ev->lock); | ||
1588 | |||
1589 | /* tell userland about new events */ | ||
1590 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) | ||
1591 | if (events & (1 << i)) | ||
1592 | envp[nr_events++] = disk_uevents[i]; | ||
1593 | |||
1594 | if (nr_events) | ||
1595 | kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); | ||
1596 | } | ||
1597 | |||
1598 | /* | ||
1599 | * A disk events enabled device has the following sysfs nodes under | ||
1600 | * its /sys/block/X/ directory. | ||
1601 | * | ||
1602 | * events : list of all supported events | ||
1603 | * events_async : list of events which can be detected w/o polling | ||
1604 | * events_poll_msecs : polling interval, 0: disable, -1: system default | ||
1605 | */ | ||
1606 | static ssize_t __disk_events_show(unsigned int events, char *buf) | ||
1607 | { | ||
1608 | const char *delim = ""; | ||
1609 | ssize_t pos = 0; | ||
1610 | int i; | ||
1611 | |||
1612 | for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) | ||
1613 | if (events & (1 << i)) { | ||
1614 | pos += sprintf(buf + pos, "%s%s", | ||
1615 | delim, disk_events_strs[i]); | ||
1616 | delim = " "; | ||
1617 | } | ||
1618 | if (pos) | ||
1619 | pos += sprintf(buf + pos, "\n"); | ||
1620 | return pos; | ||
1621 | } | ||
1622 | |||
1623 | static ssize_t disk_events_show(struct device *dev, | ||
1624 | struct device_attribute *attr, char *buf) | ||
1625 | { | ||
1626 | struct gendisk *disk = dev_to_disk(dev); | ||
1627 | |||
1628 | return __disk_events_show(disk->events, buf); | ||
1629 | } | ||
1630 | |||
1631 | static ssize_t disk_events_async_show(struct device *dev, | ||
1632 | struct device_attribute *attr, char *buf) | ||
1633 | { | ||
1634 | struct gendisk *disk = dev_to_disk(dev); | ||
1635 | |||
1636 | return __disk_events_show(disk->async_events, buf); | ||
1637 | } | ||
1638 | |||
1639 | static ssize_t disk_events_poll_msecs_show(struct device *dev, | ||
1640 | struct device_attribute *attr, | ||
1641 | char *buf) | ||
1642 | { | ||
1643 | struct gendisk *disk = dev_to_disk(dev); | ||
1644 | |||
1645 | return sprintf(buf, "%ld\n", disk->ev->poll_msecs); | ||
1646 | } | ||
1647 | |||
1648 | static ssize_t disk_events_poll_msecs_store(struct device *dev, | ||
1649 | struct device_attribute *attr, | ||
1650 | const char *buf, size_t count) | ||
1651 | { | ||
1652 | struct gendisk *disk = dev_to_disk(dev); | ||
1653 | long intv; | ||
1654 | |||
1655 | if (!count || !sscanf(buf, "%ld", &intv)) | ||
1656 | return -EINVAL; | ||
1657 | |||
1658 | if (intv < 0 && intv != -1) | ||
1659 | return -EINVAL; | ||
1660 | |||
1661 | __disk_block_events(disk, true); | ||
1662 | disk->ev->poll_msecs = intv; | ||
1663 | __disk_unblock_events(disk, true); | ||
1664 | |||
1665 | return count; | ||
1666 | } | ||
1667 | |||
1668 | static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); | ||
1669 | static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); | ||
1670 | static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, | ||
1671 | disk_events_poll_msecs_show, | ||
1672 | disk_events_poll_msecs_store); | ||
1673 | |||
1674 | static const struct attribute *disk_events_attrs[] = { | ||
1675 | &dev_attr_events.attr, | ||
1676 | &dev_attr_events_async.attr, | ||
1677 | &dev_attr_events_poll_msecs.attr, | ||
1678 | NULL, | ||
1679 | }; | ||
1680 | |||
1681 | /* | ||
1682 | * The default polling interval can be specified by the kernel | ||
1683 | * parameter block.events_dfl_poll_msecs which defaults to 0 | ||
1684 | * (disable). This can also be modified runtime by writing to | ||
1685 | * /sys/module/block/events_dfl_poll_msecs. | ||
1686 | */ | ||
1687 | static int disk_events_set_dfl_poll_msecs(const char *val, | ||
1688 | const struct kernel_param *kp) | ||
1689 | { | ||
1690 | struct disk_events *ev; | ||
1691 | int ret; | ||
1692 | |||
1693 | ret = param_set_ulong(val, kp); | ||
1694 | if (ret < 0) | ||
1695 | return ret; | ||
1696 | |||
1697 | mutex_lock(&disk_events_mutex); | ||
1698 | |||
1699 | list_for_each_entry(ev, &disk_events, node) | ||
1700 | disk_check_events(ev->disk); | ||
1701 | |||
1702 | mutex_unlock(&disk_events_mutex); | ||
1703 | |||
1704 | return 0; | ||
1705 | } | ||
1706 | |||
1707 | static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { | ||
1708 | .set = disk_events_set_dfl_poll_msecs, | ||
1709 | .get = param_get_ulong, | ||
1710 | }; | ||
1711 | |||
1712 | #undef MODULE_PARAM_PREFIX | ||
1713 | #define MODULE_PARAM_PREFIX "block." | ||
1714 | |||
1715 | module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, | ||
1716 | &disk_events_dfl_poll_msecs, 0644); | ||
1717 | |||
1718 | /* | ||
1719 | * disk_{add|del|release}_events - initialize and destroy disk_events. | ||
1720 | */ | ||
1721 | static void disk_add_events(struct gendisk *disk) | ||
1722 | { | ||
1723 | struct disk_events *ev; | ||
1724 | |||
1725 | if (!disk->fops->check_events || !(disk->events | disk->async_events)) | ||
1726 | return; | ||
1727 | |||
1728 | ev = kzalloc(sizeof(*ev), GFP_KERNEL); | ||
1729 | if (!ev) { | ||
1730 | pr_warn("%s: failed to initialize events\n", disk->disk_name); | ||
1731 | return; | ||
1732 | } | ||
1733 | |||
1734 | if (sysfs_create_files(&disk_to_dev(disk)->kobj, | ||
1735 | disk_events_attrs) < 0) { | ||
1736 | pr_warn("%s: failed to create sysfs files for events\n", | ||
1737 | disk->disk_name); | ||
1738 | kfree(ev); | ||
1739 | return; | ||
1740 | } | ||
1741 | |||
1742 | disk->ev = ev; | ||
1743 | |||
1744 | INIT_LIST_HEAD(&ev->node); | ||
1745 | ev->disk = disk; | ||
1746 | spin_lock_init(&ev->lock); | ||
1747 | ev->block = 1; | ||
1748 | ev->poll_msecs = -1; | ||
1749 | INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); | ||
1750 | |||
1751 | mutex_lock(&disk_events_mutex); | ||
1752 | list_add_tail(&ev->node, &disk_events); | ||
1753 | mutex_unlock(&disk_events_mutex); | ||
1754 | |||
1755 | /* | ||
1756 | * Block count is initialized to 1 and the following initial | ||
1757 | * unblock kicks it into action. | ||
1758 | */ | ||
1759 | __disk_unblock_events(disk, true); | ||
1760 | } | ||
1761 | |||
1762 | static void disk_del_events(struct gendisk *disk) | ||
1763 | { | ||
1764 | if (!disk->ev) | ||
1765 | return; | ||
1766 | |||
1767 | __disk_block_events(disk, true); | ||
1768 | |||
1769 | mutex_lock(&disk_events_mutex); | ||
1770 | list_del_init(&disk->ev->node); | ||
1771 | mutex_unlock(&disk_events_mutex); | ||
1772 | |||
1773 | sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); | ||
1774 | } | ||
1775 | |||
1776 | static void disk_release_events(struct gendisk *disk) | ||
1777 | { | ||
1778 | /* the block count should be 1 from disk_del_events() */ | ||
1779 | WARN_ON_ONCE(disk->ev && disk->ev->block != 1); | ||
1780 | kfree(disk->ev); | ||
1781 | } | ||