aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2019-05-01 00:51:21 -0400
committerDan Williams <dan.j.williams@intel.com>2019-05-01 00:51:21 -0400
commitc4703ce11c23423d4b46e3d59aef7979814fd608 (patch)
tree1d128060b3c634032fd2a52dfc5a94471f1e03fa
parent92f6f2d7f5c844faebf5b47d4a8f15de519b48c2 (diff)
libnvdimm/namespace: Fix label tracking error
Users have reported intermittent occurrences of DIMM initialization failures due to duplicate allocations of address capacity detected in the labels, or errors of the form below, both have the same root cause. nd namespace1.4: failed to track label: 0 WARNING: CPU: 17 PID: 1381 at drivers/nvdimm/label.c:863 RIP: 0010:__pmem_label_update+0x56c/0x590 [libnvdimm] Call Trace: ? nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm] nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm] uuid_store+0x17e/0x190 [libnvdimm] kernfs_fop_write+0xf0/0x1a0 vfs_write+0xb7/0x1b0 ksys_write+0x57/0xd0 do_syscall_64+0x60/0x210 Unfortunately those reports were typically with a busy parallel namespace creation / destruction loop making it difficult to see the components of the bug. However, Jane provided a simple reproducer using the work-in-progress sub-section implementation. When ndctl is reconfiguring a namespace it may take an existing defunct / disabled namespace and reconfigure it with a new uuid and other parameters. Critically namespace_update_uuid() takes existing address resources and renames them for the new namespace to use / reconfigure as it sees fit. The bug is that this rename only happens in the resource tracking tree. Existing labels with the old uuid are not reaped leading to a scenario where multiple active labels reference the same span of address range. Teach namespace_update_uuid() to flag any references to the old uuid for reaping at the next label update attempt. Cc: <stable@vger.kernel.org> Fixes: bf9bccc14c05 ("libnvdimm: pmem label sets and namespace instantiation") Link: https://github.com/pmem/ndctl/issues/91 Reported-by: Jane Chu <jane.chu@oracle.com> Reported-by: Jeff Moyer <jmoyer@redhat.com> Reported-by: Erwin Tsaur <erwin.tsaur@oracle.com> Cc: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/nvdimm/label.c29
-rw-r--r--drivers/nvdimm/namespace_devs.c15
-rw-r--r--drivers/nvdimm/nd.h4
3 files changed, 35 insertions, 13 deletions
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index f3d753d3169c..2030805aa216 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -756,6 +756,17 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
756 return &guid_null; 756 return &guid_null;
757} 757}
758 758
759static void reap_victim(struct nd_mapping *nd_mapping,
760 struct nd_label_ent *victim)
761{
762 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
763 u32 slot = to_slot(ndd, victim->label);
764
765 dev_dbg(ndd->dev, "free: %d\n", slot);
766 nd_label_free_slot(ndd, slot);
767 victim->label = NULL;
768}
769
759static int __pmem_label_update(struct nd_region *nd_region, 770static int __pmem_label_update(struct nd_region *nd_region,
760 struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, 771 struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
761 int pos, unsigned long flags) 772 int pos, unsigned long flags)
@@ -763,9 +774,9 @@ static int __pmem_label_update(struct nd_region *nd_region,
763 struct nd_namespace_common *ndns = &nspm->nsio.common; 774 struct nd_namespace_common *ndns = &nspm->nsio.common;
764 struct nd_interleave_set *nd_set = nd_region->nd_set; 775 struct nd_interleave_set *nd_set = nd_region->nd_set;
765 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 776 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
766 struct nd_label_ent *label_ent, *victim = NULL;
767 struct nd_namespace_label *nd_label; 777 struct nd_namespace_label *nd_label;
768 struct nd_namespace_index *nsindex; 778 struct nd_namespace_index *nsindex;
779 struct nd_label_ent *label_ent;
769 struct nd_label_id label_id; 780 struct nd_label_id label_id;
770 struct resource *res; 781 struct resource *res;
771 unsigned long *free; 782 unsigned long *free;
@@ -834,18 +845,10 @@ static int __pmem_label_update(struct nd_region *nd_region,
834 list_for_each_entry(label_ent, &nd_mapping->labels, list) { 845 list_for_each_entry(label_ent, &nd_mapping->labels, list) {
835 if (!label_ent->label) 846 if (!label_ent->label)
836 continue; 847 continue;
837 if (memcmp(nspm->uuid, label_ent->label->uuid, 848 if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)
838 NSLABEL_UUID_LEN) != 0) 849 || memcmp(nspm->uuid, label_ent->label->uuid,
839 continue; 850 NSLABEL_UUID_LEN) == 0)
840 victim = label_ent; 851 reap_victim(nd_mapping, label_ent);
841 list_move_tail(&victim->list, &nd_mapping->labels);
842 break;
843 }
844 if (victim) {
845 dev_dbg(ndd->dev, "free: %d\n", slot);
846 slot = to_slot(ndd, victim->label);
847 nd_label_free_slot(ndd, slot);
848 victim->label = NULL;
849 } 852 }
850 853
851 /* update index */ 854 /* update index */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index f293556cbbf6..d0214644e334 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1247,12 +1247,27 @@ static int namespace_update_uuid(struct nd_region *nd_region,
1247 for (i = 0; i < nd_region->ndr_mappings; i++) { 1247 for (i = 0; i < nd_region->ndr_mappings; i++) {
1248 struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 1248 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1249 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 1249 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
1250 struct nd_label_ent *label_ent;
1250 struct resource *res; 1251 struct resource *res;
1251 1252
1252 for_each_dpa_resource(ndd, res) 1253 for_each_dpa_resource(ndd, res)
1253 if (strcmp(res->name, old_label_id.id) == 0) 1254 if (strcmp(res->name, old_label_id.id) == 0)
1254 sprintf((void *) res->name, "%s", 1255 sprintf((void *) res->name, "%s",
1255 new_label_id.id); 1256 new_label_id.id);
1257
1258 mutex_lock(&nd_mapping->lock);
1259 list_for_each_entry(label_ent, &nd_mapping->labels, list) {
1260 struct nd_namespace_label *nd_label = label_ent->label;
1261 struct nd_label_id label_id;
1262
1263 if (!nd_label)
1264 continue;
1265 nd_label_gen_id(&label_id, nd_label->uuid,
1266 __le32_to_cpu(nd_label->flags));
1267 if (strcmp(old_label_id.id, label_id.id) == 0)
1268 set_bit(ND_LABEL_REAP, &label_ent->flags);
1269 }
1270 mutex_unlock(&nd_mapping->lock);
1256 } 1271 }
1257 kfree(*old_uuid); 1272 kfree(*old_uuid);
1258 out: 1273 out:
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index a5ac3b240293..191d62af0e51 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -113,8 +113,12 @@ struct nd_percpu_lane {
113 spinlock_t lock; 113 spinlock_t lock;
114}; 114};
115 115
116enum nd_label_flags {
117 ND_LABEL_REAP,
118};
116struct nd_label_ent { 119struct nd_label_ent {
117 struct list_head list; 120 struct list_head list;
121 unsigned long flags;
118 struct nd_namespace_label *label; 122 struct nd_namespace_label *label;
119}; 123};
120 124