aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Richter <stefanr@s5r6.in-berlin.de>2008-08-19 15:30:17 -0400
committerStefan Richter <stefanr@s5r6.in-berlin.de>2008-10-15 16:21:09 -0400
commitfc392fe83176cefbab99f9d12e6e27395aa2b5d0 (patch)
tree070fe832dff6cea485333b2572efb4c8b716a410
parent11305c3eda233d3aff52d755a2d6c1706c509962 (diff)
ieee1394: survive a few seconds connection loss
There are situations when nodes vanish from the bus and come back in quickly thereafter: - When certain bus-powered hubs are plugged in, - when certain disk enclosures are switched from self-power to bus power or vice versa and break the daisy chain during the transition, - when the user plugs a cable out and quickly plugs it back in, e.g. to reorder a daisy chain (works on Mac OS X if done quickly enough), - when certain hubs temporarily malfunction during high bus traffic. The ieee1394 driver's nodemgr already contained a function to set vanished nodes aside into "limbo"; i.e. they wouldn't actually be deleted right away. (In fact, only unloading the driver or writing into an obscure sysfs attribute would delete them eventually.) If nodes reappeared later, they would be resurrected out of limbo. Moving nodes into and out of limbo was accompanied with calling the .suspend() and .resume() driver methods of the drivers which were bound to a respective node's unit directories. Not only is this somewhat strange due to the intended use of these driver methods for power management, also the sbp2 driver in particular does not implement .suspend() and .resume(). Hence sbp2 would be disconnected from devices in situations as listed above. We now: - leave drivers bound when nodes go into limbo, - call the drivers' .update() when nodes come out of limbo, - automatically delete in-limbo nodes 3 seconds after the last bus reset and bus rescan. - Because of the automatic removal, the now obsolete bus attribute /sys/bus/ieee1394/destroy_node is removed. This especially lets sbp2 survive brief disconnections. You can for example yank a disk's cable and plug it back in while reading the respective disk with dd, but dd will happily continue as if nothing happened. Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
-rw-r--r--drivers/ieee1394/nodemgr.c147
-rw-r--r--drivers/ieee1394/nodemgr.h2
2 files changed, 51 insertions, 98 deletions
diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c
index b9d3f46c2b06..2376b729e876 100644
--- a/drivers/ieee1394/nodemgr.c
+++ b/drivers/ieee1394/nodemgr.c
@@ -154,9 +154,6 @@ struct host_info {
154 154
155static int nodemgr_bus_match(struct device * dev, struct device_driver * drv); 155static int nodemgr_bus_match(struct device * dev, struct device_driver * drv);
156static int nodemgr_uevent(struct device *dev, struct kobj_uevent_env *env); 156static int nodemgr_uevent(struct device *dev, struct kobj_uevent_env *env);
157static void nodemgr_reactivate_ne(struct node_entry *ne);
158static void nodemgr_remove_ne(struct node_entry *ne);
159static struct node_entry *find_entry_by_guid(u64 guid);
160 157
161struct bus_type ieee1394_bus_type = { 158struct bus_type ieee1394_bus_type = {
162 .name = "ieee1394", 159 .name = "ieee1394",
@@ -385,27 +382,6 @@ static ssize_t fw_get_ignore_driver(struct device *dev, struct device_attribute
385static DEVICE_ATTR(ignore_driver, S_IWUSR | S_IRUGO, fw_get_ignore_driver, fw_set_ignore_driver); 382static DEVICE_ATTR(ignore_driver, S_IWUSR | S_IRUGO, fw_get_ignore_driver, fw_set_ignore_driver);
386 383
387 384
388static ssize_t fw_set_destroy_node(struct bus_type *bus, const char *buf, size_t count)
389{
390 struct node_entry *ne;
391 u64 guid = (u64)simple_strtoull(buf, NULL, 16);
392
393 ne = find_entry_by_guid(guid);
394
395 if (ne == NULL || !ne->in_limbo)
396 return -EINVAL;
397
398 nodemgr_remove_ne(ne);
399
400 return count;
401}
402static ssize_t fw_get_destroy_node(struct bus_type *bus, char *buf)
403{
404 return sprintf(buf, "You can destroy in_limbo nodes by writing their GUID to this file\n");
405}
406static BUS_ATTR(destroy_node, S_IWUSR | S_IRUGO, fw_get_destroy_node, fw_set_destroy_node);
407
408
409static ssize_t fw_set_rescan(struct bus_type *bus, const char *buf, 385static ssize_t fw_set_rescan(struct bus_type *bus, const char *buf,
410 size_t count) 386 size_t count)
411{ 387{
@@ -442,7 +418,6 @@ static BUS_ATTR(ignore_drivers, S_IWUSR | S_IRUGO, fw_get_ignore_drivers, fw_set
442 418
443 419
444struct bus_attribute *const fw_bus_attrs[] = { 420struct bus_attribute *const fw_bus_attrs[] = {
445 &bus_attr_destroy_node,
446 &bus_attr_rescan, 421 &bus_attr_rescan,
447 &bus_attr_ignore_drivers, 422 &bus_attr_ignore_drivers,
448 NULL 423 NULL
@@ -1300,14 +1275,19 @@ static void nodemgr_update_node(struct node_entry *ne, struct csr1212_csr *csr,
1300 csr1212_destroy_csr(csr); 1275 csr1212_destroy_csr(csr);
1301 } 1276 }
1302 1277
1303 if (ne->in_limbo)
1304 nodemgr_reactivate_ne(ne);
1305
1306 /* Mark the node current */ 1278 /* Mark the node current */
1307 ne->generation = generation; 1279 ne->generation = generation;
1308}
1309 1280
1281 if (ne->in_limbo) {
1282 device_remove_file(&ne->device, &dev_attr_ne_in_limbo);
1283 ne->in_limbo = false;
1310 1284
1285 HPSB_DEBUG("Node reactivated: "
1286 "ID:BUS[" NODE_BUS_FMT "] GUID[%016Lx]",
1287 NODE_BUS_ARGS(ne->host, ne->nodeid),
1288 (unsigned long long)ne->guid);
1289 }
1290}
1311 1291
1312static void nodemgr_node_scan_one(struct hpsb_host *host, 1292static void nodemgr_node_scan_one(struct hpsb_host *host,
1313 nodeid_t nodeid, int generation) 1293 nodeid_t nodeid, int generation)
@@ -1392,75 +1372,14 @@ static void nodemgr_node_scan(struct hpsb_host *host, int generation)
1392 } 1372 }
1393} 1373}
1394 1374
1395static int pause_ne(struct device *dev, void *data)
1396{
1397 struct unit_directory *ud;
1398 struct device_driver *drv;
1399 struct node_entry *ne = data;
1400 int error;
1401
1402 ud = container_of(dev, struct unit_directory, unit_dev);
1403 if (ud->ne == ne) {
1404 drv = get_driver(ud->device.driver);
1405 if (drv) {
1406 error = 1; /* release if suspend is not implemented */
1407 if (drv->suspend) {
1408 down(&ud->device.sem);
1409 error = drv->suspend(&ud->device, PMSG_SUSPEND);
1410 up(&ud->device.sem);
1411 }
1412 if (error)
1413 device_release_driver(&ud->device);
1414 put_driver(drv);
1415 }
1416 }
1417
1418 return 0;
1419}
1420
1421static void nodemgr_pause_ne(struct node_entry *ne) 1375static void nodemgr_pause_ne(struct node_entry *ne)
1422{ 1376{
1423 HPSB_DEBUG("Node suspended: ID:BUS[" NODE_BUS_FMT "] GUID[%016Lx]", 1377 HPSB_DEBUG("Node paused: ID:BUS[" NODE_BUS_FMT "] GUID[%016Lx]",
1424 NODE_BUS_ARGS(ne->host, ne->nodeid), 1378 NODE_BUS_ARGS(ne->host, ne->nodeid),
1425 (unsigned long long)ne->guid); 1379 (unsigned long long)ne->guid);
1426 1380
1427 ne->in_limbo = 1; 1381 ne->in_limbo = true;
1428 WARN_ON(device_create_file(&ne->device, &dev_attr_ne_in_limbo)); 1382 WARN_ON(device_create_file(&ne->device, &dev_attr_ne_in_limbo));
1429
1430 class_for_each_device(&nodemgr_ud_class, NULL, ne, pause_ne);
1431}
1432
1433static int reactivate_ne(struct device *dev, void *data)
1434{
1435 struct unit_directory *ud;
1436 struct device_driver *drv;
1437 struct node_entry *ne = data;
1438
1439 ud = container_of(dev, struct unit_directory, unit_dev);
1440 if (ud->ne == ne) {
1441 drv = get_driver(ud->device.driver);
1442 if (drv) {
1443 if (drv->resume) {
1444 down(&ud->device.sem);
1445 drv->resume(&ud->device);
1446 up(&ud->device.sem);
1447 }
1448 put_driver(drv);
1449 }
1450 }
1451
1452 return 0;
1453}
1454
1455static void nodemgr_reactivate_ne(struct node_entry *ne)
1456{
1457 ne->in_limbo = 0;
1458 device_remove_file(&ne->device, &dev_attr_ne_in_limbo);
1459
1460 class_for_each_device(&nodemgr_ud_class, NULL, ne, reactivate_ne);
1461 HPSB_DEBUG("Node resumed: ID:BUS[" NODE_BUS_FMT "] GUID[%016Lx]",
1462 NODE_BUS_ARGS(ne->host, ne->nodeid),
1463 (unsigned long long)ne->guid);
1464} 1383}
1465 1384
1466static int update_pdrv(struct device *dev, void *data) 1385static int update_pdrv(struct device *dev, void *data)
@@ -1497,7 +1416,6 @@ static void nodemgr_update_pdrv(struct node_entry *ne)
1497 class_for_each_device(&nodemgr_ud_class, NULL, ne, update_pdrv); 1416 class_for_each_device(&nodemgr_ud_class, NULL, ne, update_pdrv);
1498} 1417}
1499 1418
1500
1501/* Write the BROADCAST_CHANNEL as per IEEE1394a 8.3.2.3.11 and 8.4.2.3. This 1419/* Write the BROADCAST_CHANNEL as per IEEE1394a 8.3.2.3.11 and 8.4.2.3. This
1502 * seems like an optional service but in the end it is practically mandatory 1420 * seems like an optional service but in the end it is practically mandatory
1503 * as a consequence of these clauses. 1421 * as a consequence of these clauses.
@@ -1574,7 +1492,7 @@ static int node_probe(struct device *dev, void *data)
1574 return 0; 1492 return 0;
1575} 1493}
1576 1494
1577static void nodemgr_node_probe(struct hpsb_host *host, int generation) 1495static int nodemgr_node_probe(struct hpsb_host *host, int generation)
1578{ 1496{
1579 struct node_probe_parameter p; 1497 struct node_probe_parameter p;
1580 1498
@@ -1595,11 +1513,11 @@ static void nodemgr_node_probe(struct hpsb_host *host, int generation)
1595 */ 1513 */
1596 p.probe_now = false; 1514 p.probe_now = false;
1597 if (class_for_each_device(&nodemgr_ne_class, NULL, &p, node_probe) != 0) 1515 if (class_for_each_device(&nodemgr_ne_class, NULL, &p, node_probe) != 0)
1598 return; 1516 return 0;
1599 1517
1600 p.probe_now = true; 1518 p.probe_now = true;
1601 if (class_for_each_device(&nodemgr_ne_class, NULL, &p, node_probe) != 0) 1519 if (class_for_each_device(&nodemgr_ne_class, NULL, &p, node_probe) != 0)
1602 return; 1520 return 0;
1603 /* 1521 /*
1604 * Now let's tell the bus to rescan our devices. This may seem 1522 * Now let's tell the bus to rescan our devices. This may seem
1605 * like overhead, but the driver-model core will only scan a 1523 * like overhead, but the driver-model core will only scan a
@@ -1611,6 +1529,27 @@ static void nodemgr_node_probe(struct hpsb_host *host, int generation)
1611 */ 1529 */
1612 if (bus_rescan_devices(&ieee1394_bus_type) != 0) 1530 if (bus_rescan_devices(&ieee1394_bus_type) != 0)
1613 HPSB_DEBUG("bus_rescan_devices had an error"); 1531 HPSB_DEBUG("bus_rescan_devices had an error");
1532
1533 return 1;
1534}
1535
1536static int remove_nodes_in_limbo(struct device *dev, void *data)
1537{
1538 struct node_entry *ne;
1539
1540 if (dev->bus != &ieee1394_bus_type)
1541 return 0;
1542
1543 ne = container_of(dev, struct node_entry, device);
1544 if (ne->in_limbo)
1545 nodemgr_remove_ne(ne);
1546
1547 return 0;
1548}
1549
1550static void nodemgr_remove_nodes_in_limbo(struct hpsb_host *host)
1551{
1552 device_for_each_child(&host->device, NULL, remove_nodes_in_limbo);
1614} 1553}
1615 1554
1616static int nodemgr_send_resume_packet(struct hpsb_host *host) 1555static int nodemgr_send_resume_packet(struct hpsb_host *host)
@@ -1781,10 +1720,24 @@ static int nodemgr_host_thread(void *data)
1781 1720
1782 /* This actually does the full probe, with sysfs 1721 /* This actually does the full probe, with sysfs
1783 * registration. */ 1722 * registration. */
1784 nodemgr_node_probe(host, generation); 1723 if (!nodemgr_node_probe(host, generation))
1724 continue;
1785 1725
1786 /* Update some of our sysfs symlinks */ 1726 /* Update some of our sysfs symlinks */
1787 nodemgr_update_host_dev_links(host); 1727 nodemgr_update_host_dev_links(host);
1728
1729 /* Sleep 3 seconds */
1730 for (i = 3000/200; i; i--) {
1731 msleep_interruptible(200);
1732 if (kthread_should_stop())
1733 goto exit;
1734
1735 if (generation != get_hpsb_generation(host))
1736 break;
1737 }
1738 /* Remove nodes which are gone, unless a bus reset happened */
1739 if (!i)
1740 nodemgr_remove_nodes_in_limbo(host);
1788 } 1741 }
1789exit: 1742exit:
1790 HPSB_VERBOSE("NodeMgr: Exiting thread"); 1743 HPSB_VERBOSE("NodeMgr: Exiting thread");
diff --git a/drivers/ieee1394/nodemgr.h b/drivers/ieee1394/nodemgr.h
index 6eb26465a84c..4f287a3561ba 100644
--- a/drivers/ieee1394/nodemgr.h
+++ b/drivers/ieee1394/nodemgr.h
@@ -110,7 +110,7 @@ struct node_entry {
110 struct device node_dev; 110 struct device node_dev;
111 111
112 /* Means this node is not attached anymore */ 112 /* Means this node is not attached anymore */
113 int in_limbo; 113 bool in_limbo;
114 114
115 struct csr1212_csr *csr; 115 struct csr1212_csr *csr;
116}; 116};