aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVishal Verma <vishal.l.verma@intel.com>2015-12-24 21:21:43 -0500
committerDan Williams <dan.j.williams@intel.com>2016-01-09 11:39:03 -0500
commit0caeef63e6d2f866d85bb507bf63e0ce8ec91cef (patch)
treedbd09f34ab455ca2dfc8c246ff7d19d17edd1de7
parentd26f73f083ed6fbea7fd3fdbacb527b7f3e75ac0 (diff)
libnvdimm: Add a poison list and export badblocks
During region creation, perform Address Range Scrubs (ARS) for the SPA (System Physical Address) ranges to retrieve known poison locations from firmware. Add a new data structure 'nd_poison' which is used as a list in nvdimm_bus to store these poison locations. When creating a pmem namespace, if there is any known poison associated with its physical address space, convert the poison ranges to bad sectors that are exposed using the badblocks interface. Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/acpi/nfit.c203
-rw-r--r--drivers/nvdimm/core.c187
-rw-r--r--drivers/nvdimm/nd-core.h3
-rw-r--r--drivers/nvdimm/nd.h6
-rw-r--r--drivers/nvdimm/pmem.c6
-rw-r--r--include/linux/libnvdimm.h1
6 files changed, 406 insertions, 0 deletions
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index e7ed39bab97d..e1dbc8da09b7 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/ndctl.h> 17#include <linux/ndctl.h>
18#include <linux/delay.h>
18#include <linux/list.h> 19#include <linux/list.h>
19#include <linux/acpi.h> 20#include <linux/acpi.h>
20#include <linux/sort.h> 21#include <linux/sort.h>
@@ -1473,6 +1474,201 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
1473 /* devm will free nfit_blk */ 1474 /* devm will free nfit_blk */
1474} 1475}
1475 1476
1477static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc,
1478 struct nd_cmd_ars_cap *cmd, u64 addr, u64 length)
1479{
1480 cmd->address = addr;
1481 cmd->length = length;
1482
1483 return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
1484 sizeof(*cmd));
1485}
1486
1487static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
1488 struct nd_cmd_ars_start *cmd, u64 addr, u64 length)
1489{
1490 int rc;
1491
1492 cmd->address = addr;
1493 cmd->length = length;
1494 cmd->type = ND_ARS_PERSISTENT;
1495
1496 while (1) {
1497 rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd,
1498 sizeof(*cmd));
1499 if (rc)
1500 return rc;
1501 switch (cmd->status) {
1502 case 0:
1503 return 0;
1504 case 1:
1505 /* ARS unsupported, but we should never get here */
1506 return 0;
1507 case 2:
1508 return -EINVAL;
1509 case 3:
1510 /* ARS is in progress */
1511 msleep(1000);
1512 break;
1513 default:
1514 return -ENXIO;
1515 }
1516 }
1517}
1518
1519static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
1520 struct nd_cmd_ars_status *cmd)
1521{
1522 int rc;
1523
1524 while (1) {
1525 rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
1526 sizeof(*cmd));
1527 if (rc || cmd->status & 0xffff)
1528 return -ENXIO;
1529
1530 /* Check extended status (Upper two bytes) */
1531 switch (cmd->status >> 16) {
1532 case 0:
1533 return 0;
1534 case 1:
1535 /* ARS is in progress */
1536 msleep(1000);
1537 break;
1538 case 2:
1539 /* No ARS performed for the current boot */
1540 return 0;
1541 default:
1542 return -ENXIO;
1543 }
1544 }
1545}
1546
1547static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
1548 struct nd_cmd_ars_status *ars_status, u64 start)
1549{
1550 int rc;
1551 u32 i;
1552
1553 /*
1554 * The address field returned by ars_status should be either
1555 * less than or equal to the address we last started ARS for.
1556 * The (start, length) returned by ars_status should also have
1557 * non-zero overlap with the range we started ARS for.
1558 * If this is not the case, bail.
1559 */
1560 if (ars_status->address > start ||
1561 (ars_status->address + ars_status->length < start))
1562 return -ENXIO;
1563
1564 for (i = 0; i < ars_status->num_records; i++) {
1565 rc = nvdimm_bus_add_poison(nvdimm_bus,
1566 ars_status->records[i].err_address,
1567 ars_status->records[i].length);
1568 if (rc)
1569 return rc;
1570 }
1571
1572 return 0;
1573}
1574
1575static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
1576 struct nd_region_desc *ndr_desc)
1577{
1578 struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1579 struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
1580 struct nd_cmd_ars_status *ars_status = NULL;
1581 struct nd_cmd_ars_start *ars_start = NULL;
1582 struct nd_cmd_ars_cap *ars_cap = NULL;
1583 u64 start, len, cur, remaining;
1584 int rc;
1585
1586 ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
1587 if (!ars_cap)
1588 return -ENOMEM;
1589
1590 start = ndr_desc->res->start;
1591 len = ndr_desc->res->end - ndr_desc->res->start + 1;
1592
1593 rc = ars_get_cap(nd_desc, ars_cap, start, len);
1594 if (rc)
1595 goto out;
1596
1597 /*
1598 * If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in
1599 * extended status is not set, skip this but continue initialization
1600 */
1601 if ((ars_cap->status & 0xffff) ||
1602 !(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
1603 dev_warn(acpi_desc->dev,
1604 "ARS unsupported (status: 0x%x), won't create an error list\n",
1605 ars_cap->status);
1606 goto out;
1607 }
1608
1609 /*
1610 * Check if a full-range ARS has been run. If so, use those results
1611 * without having to start a new ARS.
1612 */
1613 ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
1614 GFP_KERNEL);
1615 if (!ars_status) {
1616 rc = -ENOMEM;
1617 goto out;
1618 }
1619
1620 rc = ars_get_status(nd_desc, ars_status);
1621 if (rc)
1622 goto out;
1623
1624 if (ars_status->address <= start &&
1625 (ars_status->address + ars_status->length >= start + len)) {
1626 rc = ars_status_process_records(nvdimm_bus, ars_status, start);
1627 goto out;
1628 }
1629
1630 /*
1631 * ARS_STATUS can overflow if the number of poison entries found is
1632 * greater than the maximum buffer size (ars_cap->max_ars_out)
1633 * To detect overflow, check if the length field of ars_status
1634 * is less than the length we supplied. If so, process the
1635 * error entries we got, adjust the start point, and start again
1636 */
1637 ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL);
1638 if (!ars_start)
1639 return -ENOMEM;
1640
1641 cur = start;
1642 remaining = len;
1643 do {
1644 u64 done, end;
1645
1646 rc = ars_do_start(nd_desc, ars_start, cur, remaining);
1647 if (rc)
1648 goto out;
1649
1650 rc = ars_get_status(nd_desc, ars_status);
1651 if (rc)
1652 goto out;
1653
1654 rc = ars_status_process_records(nvdimm_bus, ars_status, cur);
1655 if (rc)
1656 goto out;
1657
1658 end = min(cur + remaining,
1659 ars_status->address + ars_status->length);
1660 done = end - cur;
1661 cur += done;
1662 remaining -= done;
1663 } while (remaining);
1664
1665 out:
1666 kfree(ars_cap);
1667 kfree(ars_start);
1668 kfree(ars_status);
1669 return rc;
1670}
1671
1476static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, 1672static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
1477 struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, 1673 struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
1478 struct acpi_nfit_memory_map *memdev, 1674 struct acpi_nfit_memory_map *memdev,
@@ -1585,6 +1781,13 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
1585 1781
1586 nvdimm_bus = acpi_desc->nvdimm_bus; 1782 nvdimm_bus = acpi_desc->nvdimm_bus;
1587 if (nfit_spa_type(spa) == NFIT_SPA_PM) { 1783 if (nfit_spa_type(spa) == NFIT_SPA_PM) {
1784 rc = acpi_nfit_find_poison(acpi_desc, ndr_desc);
1785 if (rc) {
1786 dev_err(acpi_desc->dev,
1787 "error while performing ARS to find poison: %d\n",
1788 rc);
1789 return rc;
1790 }
1588 if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc)) 1791 if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
1589 return -ENOMEM; 1792 return -ENOMEM;
1590 } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { 1793 } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 82c49bb87055..21003b7f0b38 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -325,6 +325,7 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
325 if (!nvdimm_bus) 325 if (!nvdimm_bus)
326 return NULL; 326 return NULL;
327 INIT_LIST_HEAD(&nvdimm_bus->list); 327 INIT_LIST_HEAD(&nvdimm_bus->list);
328 INIT_LIST_HEAD(&nvdimm_bus->poison_list);
328 init_waitqueue_head(&nvdimm_bus->probe_wait); 329 init_waitqueue_head(&nvdimm_bus->probe_wait);
329 nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); 330 nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
330 mutex_init(&nvdimm_bus->reconfig_mutex); 331 mutex_init(&nvdimm_bus->reconfig_mutex);
@@ -359,6 +360,191 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
359} 360}
360EXPORT_SYMBOL_GPL(__nvdimm_bus_register); 361EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
361 362
363/**
364 * __add_badblock_range() - Convert a physical address range to bad sectors
365 * @disk: the disk associated with the namespace
366 * @ns_offset: namespace offset where the error range begins (in bytes)
367 * @len: number of bytes of poison to be added
368 *
369 * This assumes that the range provided with (ns_offset, len) is within
370 * the bounds of physical addresses for this namespace, i.e. lies in the
371 * interval [ns_start, ns_start + ns_size)
372 */
373static int __add_badblock_range(struct gendisk *disk, u64 ns_offset, u64 len)
374{
375 unsigned int sector_size = queue_logical_block_size(disk->queue);
376 sector_t start_sector;
377 u64 num_sectors;
378 u32 rem;
379 int rc;
380
381 start_sector = div_u64(ns_offset, sector_size);
382 num_sectors = div_u64_rem(len, sector_size, &rem);
383 if (rem)
384 num_sectors++;
385
386 if (!disk->bb) {
387 rc = disk_alloc_badblocks(disk);
388 if (rc)
389 return rc;
390 }
391
392 if (unlikely(num_sectors > (u64)INT_MAX)) {
393 u64 remaining = num_sectors;
394 sector_t s = start_sector;
395
396 while (remaining) {
397 int done = min_t(u64, remaining, INT_MAX);
398
399 rc = disk_set_badblocks(disk, s, done);
400 if (rc)
401 return rc;
402 remaining -= done;
403 s += done;
404 }
405 return 0;
406 } else
407 return disk_set_badblocks(disk, start_sector, num_sectors);
408}
409
410/**
411 * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks
412 * @disk: the gendisk associated with the namespace where badblocks
413 * will be stored
414 * @offset: offset at the start of the namespace before 'sector 0'
415 * @ndns: the namespace containing poison ranges
416 *
417 * The poison list generated during NFIT initialization may contain multiple,
418 * possibly overlapping ranges in the SPA (System Physical Address) space.
419 * Compare each of these ranges to the namespace currently being initialized,
420 * and add badblocks to the gendisk for all matching sub-ranges
421 *
422 * Return:
423 * 0 - Success
424 */
425int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
426 struct nd_namespace_common *ndns)
427{
428 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
429 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
430 struct nvdimm_bus *nvdimm_bus;
431 struct list_head *poison_list;
432 u64 ns_start, ns_end, ns_size;
433 struct nd_poison *pl;
434 int rc;
435
436 ns_size = nvdimm_namespace_capacity(ndns) - offset;
437 ns_start = nsio->res.start + offset;
438 ns_end = nsio->res.end;
439
440 nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
441 poison_list = &nvdimm_bus->poison_list;
442 if (list_empty(poison_list))
443 return 0;
444
445 list_for_each_entry(pl, poison_list, list) {
446 u64 pl_end = pl->start + pl->length - 1;
447
448 /* Discard intervals with no intersection */
449 if (pl_end < ns_start)
450 continue;
451 if (pl->start > ns_end)
452 continue;
453 /* Deal with any overlap after start of the namespace */
454 if (pl->start >= ns_start) {
455 u64 start = pl->start;
456 u64 len;
457
458 if (pl_end <= ns_end)
459 len = pl->length;
460 else
461 len = ns_start + ns_size - pl->start;
462
463 rc = __add_badblock_range(disk, start - ns_start, len);
464 if (rc)
465 return rc;
466 dev_info(&nvdimm_bus->dev,
467 "Found a poison range (0x%llx, 0x%llx)\n",
468 start, len);
469 continue;
470 }
471 /* Deal with overlap for poison starting before the namespace */
472 if (pl->start < ns_start) {
473 u64 len;
474
475 if (pl_end < ns_end)
476 len = pl->start + pl->length - ns_start;
477 else
478 len = ns_size;
479
480 rc = __add_badblock_range(disk, 0, len);
481 if (rc)
482 return rc;
483 dev_info(&nvdimm_bus->dev,
484 "Found a poison range (0x%llx, 0x%llx)\n",
485 pl->start, len);
486 }
487 }
488
489 return 0;
490}
491EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison);
492
493static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
494{
495 struct nd_poison *pl;
496
497 pl = kzalloc(sizeof(*pl), GFP_KERNEL);
498 if (!pl)
499 return -ENOMEM;
500
501 pl->start = addr;
502 pl->length = length;
503 list_add_tail(&pl->list, &nvdimm_bus->poison_list);
504
505 return 0;
506}
507
508int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
509{
510 struct nd_poison *pl;
511
512 if (list_empty(&nvdimm_bus->poison_list))
513 return __add_poison(nvdimm_bus, addr, length);
514
515 /*
516 * There is a chance this is a duplicate, check for those first.
517 * This will be the common case as ARS_STATUS returns all known
518 * errors in the SPA space, and we can't query it per region
519 */
520 list_for_each_entry(pl, &nvdimm_bus->poison_list, list)
521 if (pl->start == addr) {
522 /* If length has changed, update this list entry */
523 if (pl->length != length)
524 pl->length = length;
525 return 0;
526 }
527
528 /*
529 * If not a duplicate or a simple length update, add the entry as is,
530 * as any overlapping ranges will get resolved when the list is consumed
531 * and converted to badblocks
532 */
533 return __add_poison(nvdimm_bus, addr, length);
534}
535EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
536
537static void free_poison_list(struct list_head *poison_list)
538{
539 struct nd_poison *pl, *next;
540
541 list_for_each_entry_safe(pl, next, poison_list, list) {
542 list_del(&pl->list);
543 kfree(pl);
544 }
545 list_del_init(poison_list);
546}
547
362static int child_unregister(struct device *dev, void *data) 548static int child_unregister(struct device *dev, void *data)
363{ 549{
364 /* 550 /*
@@ -385,6 +571,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
385 571
386 nd_synchronize(); 572 nd_synchronize();
387 device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); 573 device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
574 free_poison_list(&nvdimm_bus->poison_list);
388 nvdimm_bus_destroy_ndctl(nvdimm_bus); 575 nvdimm_bus_destroy_ndctl(nvdimm_bus);
389 576
390 device_unregister(&nvdimm_bus->dev); 577 device_unregister(&nvdimm_bus->dev);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 159aed532042..d3b7ea78df96 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -30,6 +30,7 @@ struct nvdimm_bus {
30 struct list_head list; 30 struct list_head list;
31 struct device dev; 31 struct device dev;
32 int id, probe_active; 32 int id, probe_active;
33 struct list_head poison_list;
33 struct mutex reconfig_mutex; 34 struct mutex reconfig_mutex;
34}; 35};
35 36
@@ -89,4 +90,6 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
89ssize_t nd_namespace_store(struct device *dev, 90ssize_t nd_namespace_store(struct device *dev,
90 struct nd_namespace_common **_ndns, const char *buf, 91 struct nd_namespace_common **_ndns, const char *buf,
91 size_t len); 92 size_t len);
93int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
94 struct nd_namespace_common *ndns);
92#endif /* __ND_CORE_H__ */ 95#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 417e521d299c..ba91fcd5818d 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -38,6 +38,12 @@ enum {
38#endif 38#endif
39}; 39};
40 40
41struct nd_poison {
42 u64 start;
43 u64 length;
44 struct list_head list;
45};
46
41struct nvdimm_drvdata { 47struct nvdimm_drvdata {
42 struct device *dev; 48 struct device *dev;
43 int nsindex_size; 49 int nsindex_size;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8ee79893d2f5..5b95043443a3 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -27,6 +27,7 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/pmem.h> 28#include <linux/pmem.h>
29#include <linux/nd.h> 29#include <linux/nd.h>
30#include "nd-core.h"
30#include "pfn.h" 31#include "pfn.h"
31#include "nd.h" 32#include "nd.h"
32 33
@@ -168,6 +169,7 @@ static int pmem_attach_disk(struct device *dev,
168{ 169{
169 int nid = dev_to_node(dev); 170 int nid = dev_to_node(dev);
170 struct gendisk *disk; 171 struct gendisk *disk;
172 int ret;
171 173
172 pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid); 174 pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
173 if (!pmem->pmem_queue) 175 if (!pmem->pmem_queue)
@@ -196,6 +198,10 @@ static int pmem_attach_disk(struct device *dev,
196 set_capacity(disk, (pmem->size - pmem->data_offset) / 512); 198 set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
197 pmem->pmem_disk = disk; 199 pmem->pmem_disk = disk;
198 200
201 ret = nvdimm_namespace_add_poison(disk, pmem->data_offset, ndns);
202 if (ret)
203 return ret;
204
199 add_disk(disk); 205 add_disk(disk);
200 revalidate_disk(disk); 206 revalidate_disk(disk);
201 207
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 3f021dc5da8c..bed40dff0e86 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -116,6 +116,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
116 116
117} 117}
118 118
119int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
119struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, 120struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
120 struct nvdimm_bus_descriptor *nfit_desc, struct module *module); 121 struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
121#define nvdimm_bus_register(parent, desc) \ 122#define nvdimm_bus_register(parent, desc) \