summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-29 13:34:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-29 13:34:42 -0400
commit88793e5c774ec69351ef6b5200bb59f532e41bca (patch)
tree54c4be61777ea53fde892b71e795322c5227d16e /drivers
parent1bc5e157ed2b4f5b206155fc772d860158acd201 (diff)
parent61031952f4c89dba1065f7a5b9419badb112554c (diff)
Merge tag 'libnvdimm-for-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm
Pull libnvdimm subsystem from Dan Williams: "The libnvdimm sub-system introduces, in addition to the libnvdimm-core, 4 drivers / enabling modules: NFIT: Instantiates an "nvdimm bus" with the core and registers memory devices (NVDIMMs) enumerated by the ACPI 6.0 NFIT (NVDIMM Firmware Interface table). After registering NVDIMMs the NFIT driver then registers "region" devices. A libnvdimm-region defines an access mode and the boundaries of persistent memory media. A region may span multiple NVDIMMs that are interleaved by the hardware memory controller. In turn, a libnvdimm-region can be carved into a "namespace" device and bound to the PMEM or BLK driver which will attach a Linux block device (disk) interface to the memory. PMEM: Initially merged in v4.1 this driver for contiguous spans of persistent memory address ranges is re-worked to drive PMEM-namespaces emitted by the libnvdimm-core. In this update the PMEM driver, on x86, gains the ability to assert that writes to persistent memory have been flushed all the way through the caches and buffers in the platform to persistent media. See memcpy_to_pmem() and wmb_pmem(). BLK: This new driver enables access to persistent memory media through "Block Data Windows" as defined by the NFIT. The primary difference of this driver to PMEM is that only a small window of persistent memory is mapped into system address space at any given point in time. Per-NVDIMM windows are reprogrammed at run time, per-I/O, to access different portions of the media. BLK-mode, by definition, does not support DAX. BTT: This is a library, optionally consumed by either PMEM or BLK, that converts a byte-accessible namespace into a disk with atomic sector update semantics (prevents sector tearing on crash or power loss). The sinister aspect of sector tearing is that most applications do not know they have a atomic sector dependency. At least today's disk's rarely ever tear sectors and if they do one almost certainly gets a CRC error on access. NVDIMMs will always tear and always silently. Until an application is audited to be robust in the presence of sector-tearing the usage of BTT is recommended. Thanks to: Ross Zwisler, Jeff Moyer, Vishal Verma, Christoph Hellwig, Ingo Molnar, Neil Brown, Boaz Harrosh, Robert Elliott, Matthew Wilcox, Andy Rudoff, Linda Knippers, Toshi Kani, Nicholas Moulin, Rafael Wysocki, and Bob Moore" * tag 'libnvdimm-for-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm: (33 commits) arch, x86: pmem api for ensuring durability of persistent memory updates libnvdimm: Add sysfs numa_node to NVDIMM devices libnvdimm: Set numa_node to NVDIMM devices acpi: Add acpi_map_pxm_to_online_node() libnvdimm, nfit: handle unarmed dimms, mark namespaces read-only pmem: flag pmem block devices as non-rotational libnvdimm: enable iostat pmem: make_request cleanups libnvdimm, pmem: fix up max_hw_sectors libnvdimm, blk: add support for blk integrity libnvdimm, btt: add support for blk integrity fs/block_dev.c: skip rw_page if bdev has integrity libnvdimm: Non-Volatile Devices tools/testing/nvdimm: libnvdimm unit test infrastructure libnvdimm, nfit, nd_blk: driver for BLK-mode access persistent memory nd_btt: atomic sector updates libnvdimm: infrastructure for btt devices libnvdimm: write blk label set libnvdimm: write pmem label set libnvdimm: blk labels and namespace instantiation ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/Kconfig26
-rw-r--r--drivers/acpi/Makefile1
-rw-r--r--drivers/acpi/nfit.c1587
-rw-r--r--drivers/acpi/nfit.h158
-rw-r--r--drivers/acpi/numa.c50
-rw-r--r--drivers/block/Kconfig12
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/nvdimm/Kconfig68
-rw-r--r--drivers/nvdimm/Makefile20
-rw-r--r--drivers/nvdimm/blk.c384
-rw-r--r--drivers/nvdimm/btt.c1479
-rw-r--r--drivers/nvdimm/btt.h185
-rw-r--r--drivers/nvdimm/btt_devs.c425
-rw-r--r--drivers/nvdimm/bus.c730
-rw-r--r--drivers/nvdimm/core.c465
-rw-r--r--drivers/nvdimm/dimm.c102
-rw-r--r--drivers/nvdimm/dimm_devs.c551
-rw-r--r--drivers/nvdimm/label.c927
-rw-r--r--drivers/nvdimm/label.h141
-rw-r--r--drivers/nvdimm/namespace_devs.c1870
-rw-r--r--drivers/nvdimm/nd-core.h83
-rw-r--r--drivers/nvdimm/nd.h220
-rw-r--r--drivers/nvdimm/pmem.c (renamed from drivers/block/pmem.c)227
-rw-r--r--drivers/nvdimm/region.c114
-rw-r--r--drivers/nvdimm/region_devs.c787
27 files changed, 10506 insertions, 110 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index c0cc96bab9e7..6e973b8e3a3b 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -182,4 +182,6 @@ source "drivers/thunderbolt/Kconfig"
182 182
183source "drivers/android/Kconfig" 183source "drivers/android/Kconfig"
184 184
185source "drivers/nvdimm/Kconfig"
186
185endmenu 187endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 9a02fb7c5106..b64b49f6e01b 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
64 64
65obj-$(CONFIG_PARPORT) += parport/ 65obj-$(CONFIG_PARPORT) += parport/
66obj-y += base/ block/ misc/ mfd/ nfc/ 66obj-y += base/ block/ misc/ mfd/ nfc/
67obj-$(CONFIG_LIBNVDIMM) += nvdimm/
67obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ 68obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
68obj-$(CONFIG_NUBUS) += nubus/ 69obj-$(CONFIG_NUBUS) += nubus/
69obj-y += macintosh/ 70obj-y += macintosh/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 35da507411a0..f15db002be8e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -386,6 +386,32 @@ config ACPI_REDUCED_HARDWARE_ONLY
386 386
387 If you are unsure what to do, do not enable this option. 387 If you are unsure what to do, do not enable this option.
388 388
389config ACPI_NFIT
390 tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
391 depends on PHYS_ADDR_T_64BIT
392 depends on BLK_DEV
393 select LIBNVDIMM
394 help
395 Infrastructure to probe ACPI 6 compliant platforms for
396 NVDIMMs (NFIT) and register a libnvdimm device tree. In
397 addition to storage devices this also enables libnvdimm to pass
398 ACPI._DSM messages for platform/dimm configuration.
399
400 To compile this driver as a module, choose M here:
401 the module will be called nfit.
402
403config ACPI_NFIT_DEBUG
404 bool "NFIT DSM debug"
405 depends on ACPI_NFIT
406 depends on DYNAMIC_DEBUG
407 default n
408 help
409 Enabling this option causes the nfit driver to dump the
410 input and output buffers of _DSM operations on the ACPI0012
411 device and its children. This can be very verbose, so leave
412 it disabled unless you are debugging a hardware / firmware
413 issue.
414
389source "drivers/acpi/apei/Kconfig" 415source "drivers/acpi/apei/Kconfig"
390 416
391config ACPI_EXTLOG 417config ACPI_EXTLOG
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 73d840bef455..8321430d7f24 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o
68obj-$(CONFIG_ACPI_PROCESSOR) += processor.o 68obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
69obj-y += container.o 69obj-y += container.o
70obj-$(CONFIG_ACPI_THERMAL) += thermal.o 70obj-$(CONFIG_ACPI_THERMAL) += thermal.o
71obj-$(CONFIG_ACPI_NFIT) += nfit.o
71obj-y += acpi_memhotplug.o 72obj-y += acpi_memhotplug.o
72obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o 73obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
73obj-$(CONFIG_ACPI_BATTERY) += battery.o 74obj-$(CONFIG_ACPI_BATTERY) += battery.o
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
new file mode 100644
index 000000000000..2161fa178c8d
--- /dev/null
+++ b/drivers/acpi/nfit.c
@@ -0,0 +1,1587 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/list_sort.h>
14#include <linux/libnvdimm.h>
15#include <linux/module.h>
16#include <linux/mutex.h>
17#include <linux/ndctl.h>
18#include <linux/list.h>
19#include <linux/acpi.h>
20#include <linux/sort.h>
21#include <linux/io.h>
22#include "nfit.h"
23
24/*
25 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
26 * irrelevant.
27 */
28#include <asm-generic/io-64-nonatomic-hi-lo.h>
29
30static bool force_enable_dimms;
31module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
32MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
33
34static u8 nfit_uuid[NFIT_UUID_MAX][16];
35
36const u8 *to_nfit_uuid(enum nfit_uuids id)
37{
38 return nfit_uuid[id];
39}
40EXPORT_SYMBOL(to_nfit_uuid);
41
42static struct acpi_nfit_desc *to_acpi_nfit_desc(
43 struct nvdimm_bus_descriptor *nd_desc)
44{
45 return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
46}
47
48static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
49{
50 struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
51
52 /*
53 * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
54 * acpi_device.
55 */
56 if (!nd_desc->provider_name
57 || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
58 return NULL;
59
60 return to_acpi_device(acpi_desc->dev);
61}
62
63static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
64 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
65 unsigned int buf_len)
66{
67 struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
68 const struct nd_cmd_desc *desc = NULL;
69 union acpi_object in_obj, in_buf, *out_obj;
70 struct device *dev = acpi_desc->dev;
71 const char *cmd_name, *dimm_name;
72 unsigned long dsm_mask;
73 acpi_handle handle;
74 const u8 *uuid;
75 u32 offset;
76 int rc, i;
77
78 if (nvdimm) {
79 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
80 struct acpi_device *adev = nfit_mem->adev;
81
82 if (!adev)
83 return -ENOTTY;
84 dimm_name = nvdimm_name(nvdimm);
85 cmd_name = nvdimm_cmd_name(cmd);
86 dsm_mask = nfit_mem->dsm_mask;
87 desc = nd_cmd_dimm_desc(cmd);
88 uuid = to_nfit_uuid(NFIT_DEV_DIMM);
89 handle = adev->handle;
90 } else {
91 struct acpi_device *adev = to_acpi_dev(acpi_desc);
92
93 cmd_name = nvdimm_bus_cmd_name(cmd);
94 dsm_mask = nd_desc->dsm_mask;
95 desc = nd_cmd_bus_desc(cmd);
96 uuid = to_nfit_uuid(NFIT_DEV_BUS);
97 handle = adev->handle;
98 dimm_name = "bus";
99 }
100
101 if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
102 return -ENOTTY;
103
104 if (!test_bit(cmd, &dsm_mask))
105 return -ENOTTY;
106
107 in_obj.type = ACPI_TYPE_PACKAGE;
108 in_obj.package.count = 1;
109 in_obj.package.elements = &in_buf;
110 in_buf.type = ACPI_TYPE_BUFFER;
111 in_buf.buffer.pointer = buf;
112 in_buf.buffer.length = 0;
113
114 /* libnvdimm has already validated the input envelope */
115 for (i = 0; i < desc->in_num; i++)
116 in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
117 i, buf);
118
119 if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
120 dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__,
121 dimm_name, cmd_name, in_buf.buffer.length);
122 print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
123 4, in_buf.buffer.pointer, min_t(u32, 128,
124 in_buf.buffer.length), true);
125 }
126
127 out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj);
128 if (!out_obj) {
129 dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
130 cmd_name);
131 return -EINVAL;
132 }
133
134 if (out_obj->package.type != ACPI_TYPE_BUFFER) {
135 dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
136 __func__, dimm_name, cmd_name, out_obj->type);
137 rc = -EINVAL;
138 goto out;
139 }
140
141 if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
142 dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
143 dimm_name, cmd_name, out_obj->buffer.length);
144 print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
145 4, out_obj->buffer.pointer, min_t(u32, 128,
146 out_obj->buffer.length), true);
147 }
148
149 for (i = 0, offset = 0; i < desc->out_num; i++) {
150 u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
151 (u32 *) out_obj->buffer.pointer);
152
153 if (offset + out_size > out_obj->buffer.length) {
154 dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
155 __func__, dimm_name, cmd_name, i);
156 break;
157 }
158
159 if (in_buf.buffer.length + offset + out_size > buf_len) {
160 dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
161 __func__, dimm_name, cmd_name, i);
162 rc = -ENXIO;
163 goto out;
164 }
165 memcpy(buf + in_buf.buffer.length + offset,
166 out_obj->buffer.pointer + offset, out_size);
167 offset += out_size;
168 }
169 if (offset + in_buf.buffer.length < buf_len) {
170 if (i >= 1) {
171 /*
172 * status valid, return the number of bytes left
173 * unfilled in the output buffer
174 */
175 rc = buf_len - offset - in_buf.buffer.length;
176 } else {
177 dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
178 __func__, dimm_name, cmd_name, buf_len,
179 offset);
180 rc = -ENXIO;
181 }
182 } else
183 rc = 0;
184
185 out:
186 ACPI_FREE(out_obj);
187
188 return rc;
189}
190
191static const char *spa_type_name(u16 type)
192{
193 static const char *to_name[] = {
194 [NFIT_SPA_VOLATILE] = "volatile",
195 [NFIT_SPA_PM] = "pmem",
196 [NFIT_SPA_DCR] = "dimm-control-region",
197 [NFIT_SPA_BDW] = "block-data-window",
198 [NFIT_SPA_VDISK] = "volatile-disk",
199 [NFIT_SPA_VCD] = "volatile-cd",
200 [NFIT_SPA_PDISK] = "persistent-disk",
201 [NFIT_SPA_PCD] = "persistent-cd",
202
203 };
204
205 if (type > NFIT_SPA_PCD)
206 return "unknown";
207
208 return to_name[type];
209}
210
211static int nfit_spa_type(struct acpi_nfit_system_address *spa)
212{
213 int i;
214
215 for (i = 0; i < NFIT_UUID_MAX; i++)
216 if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
217 return i;
218 return -1;
219}
220
221static bool add_spa(struct acpi_nfit_desc *acpi_desc,
222 struct acpi_nfit_system_address *spa)
223{
224 struct device *dev = acpi_desc->dev;
225 struct nfit_spa *nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa),
226 GFP_KERNEL);
227
228 if (!nfit_spa)
229 return false;
230 INIT_LIST_HEAD(&nfit_spa->list);
231 nfit_spa->spa = spa;
232 list_add_tail(&nfit_spa->list, &acpi_desc->spas);
233 dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
234 spa->range_index,
235 spa_type_name(nfit_spa_type(spa)));
236 return true;
237}
238
239static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
240 struct acpi_nfit_memory_map *memdev)
241{
242 struct device *dev = acpi_desc->dev;
243 struct nfit_memdev *nfit_memdev = devm_kzalloc(dev,
244 sizeof(*nfit_memdev), GFP_KERNEL);
245
246 if (!nfit_memdev)
247 return false;
248 INIT_LIST_HEAD(&nfit_memdev->list);
249 nfit_memdev->memdev = memdev;
250 list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
251 dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
252 __func__, memdev->device_handle, memdev->range_index,
253 memdev->region_index);
254 return true;
255}
256
257static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
258 struct acpi_nfit_control_region *dcr)
259{
260 struct device *dev = acpi_desc->dev;
261 struct nfit_dcr *nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr),
262 GFP_KERNEL);
263
264 if (!nfit_dcr)
265 return false;
266 INIT_LIST_HEAD(&nfit_dcr->list);
267 nfit_dcr->dcr = dcr;
268 list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
269 dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
270 dcr->region_index, dcr->windows);
271 return true;
272}
273
274static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
275 struct acpi_nfit_data_region *bdw)
276{
277 struct device *dev = acpi_desc->dev;
278 struct nfit_bdw *nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw),
279 GFP_KERNEL);
280
281 if (!nfit_bdw)
282 return false;
283 INIT_LIST_HEAD(&nfit_bdw->list);
284 nfit_bdw->bdw = bdw;
285 list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
286 dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
287 bdw->region_index, bdw->windows);
288 return true;
289}
290
291static bool add_idt(struct acpi_nfit_desc *acpi_desc,
292 struct acpi_nfit_interleave *idt)
293{
294 struct device *dev = acpi_desc->dev;
295 struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt),
296 GFP_KERNEL);
297
298 if (!nfit_idt)
299 return false;
300 INIT_LIST_HEAD(&nfit_idt->list);
301 nfit_idt->idt = idt;
302 list_add_tail(&nfit_idt->list, &acpi_desc->idts);
303 dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
304 idt->interleave_index, idt->line_count);
305 return true;
306}
307
308static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
309 const void *end)
310{
311 struct device *dev = acpi_desc->dev;
312 struct acpi_nfit_header *hdr;
313 void *err = ERR_PTR(-ENOMEM);
314
315 if (table >= end)
316 return NULL;
317
318 hdr = table;
319 switch (hdr->type) {
320 case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
321 if (!add_spa(acpi_desc, table))
322 return err;
323 break;
324 case ACPI_NFIT_TYPE_MEMORY_MAP:
325 if (!add_memdev(acpi_desc, table))
326 return err;
327 break;
328 case ACPI_NFIT_TYPE_CONTROL_REGION:
329 if (!add_dcr(acpi_desc, table))
330 return err;
331 break;
332 case ACPI_NFIT_TYPE_DATA_REGION:
333 if (!add_bdw(acpi_desc, table))
334 return err;
335 break;
336 case ACPI_NFIT_TYPE_INTERLEAVE:
337 if (!add_idt(acpi_desc, table))
338 return err;
339 break;
340 case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
341 dev_dbg(dev, "%s: flush\n", __func__);
342 break;
343 case ACPI_NFIT_TYPE_SMBIOS:
344 dev_dbg(dev, "%s: smbios\n", __func__);
345 break;
346 default:
347 dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
348 break;
349 }
350
351 return table + hdr->length;
352}
353
354static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
355 struct nfit_mem *nfit_mem)
356{
357 u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
358 u16 dcr = nfit_mem->dcr->region_index;
359 struct nfit_spa *nfit_spa;
360
361 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
362 u16 range_index = nfit_spa->spa->range_index;
363 int type = nfit_spa_type(nfit_spa->spa);
364 struct nfit_memdev *nfit_memdev;
365
366 if (type != NFIT_SPA_BDW)
367 continue;
368
369 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
370 if (nfit_memdev->memdev->range_index != range_index)
371 continue;
372 if (nfit_memdev->memdev->device_handle != device_handle)
373 continue;
374 if (nfit_memdev->memdev->region_index != dcr)
375 continue;
376
377 nfit_mem->spa_bdw = nfit_spa->spa;
378 return;
379 }
380 }
381
382 dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
383 nfit_mem->spa_dcr->range_index);
384 nfit_mem->bdw = NULL;
385}
386
387static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
388 struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
389{
390 u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
391 struct nfit_memdev *nfit_memdev;
392 struct nfit_dcr *nfit_dcr;
393 struct nfit_bdw *nfit_bdw;
394 struct nfit_idt *nfit_idt;
395 u16 idt_idx, range_index;
396
397 list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
398 if (nfit_dcr->dcr->region_index != dcr)
399 continue;
400 nfit_mem->dcr = nfit_dcr->dcr;
401 break;
402 }
403
404 if (!nfit_mem->dcr) {
405 dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n",
406 spa->range_index, __to_nfit_memdev(nfit_mem)
407 ? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR");
408 return -ENODEV;
409 }
410
411 /*
412 * We've found enough to create an nvdimm, optionally
413 * find an associated BDW
414 */
415 list_add(&nfit_mem->list, &acpi_desc->dimms);
416
417 list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
418 if (nfit_bdw->bdw->region_index != dcr)
419 continue;
420 nfit_mem->bdw = nfit_bdw->bdw;
421 break;
422 }
423
424 if (!nfit_mem->bdw)
425 return 0;
426
427 nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
428
429 if (!nfit_mem->spa_bdw)
430 return 0;
431
432 range_index = nfit_mem->spa_bdw->range_index;
433 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
434 if (nfit_memdev->memdev->range_index != range_index ||
435 nfit_memdev->memdev->region_index != dcr)
436 continue;
437 nfit_mem->memdev_bdw = nfit_memdev->memdev;
438 idt_idx = nfit_memdev->memdev->interleave_index;
439 list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
440 if (nfit_idt->idt->interleave_index != idt_idx)
441 continue;
442 nfit_mem->idt_bdw = nfit_idt->idt;
443 break;
444 }
445 break;
446 }
447
448 return 0;
449}
450
451static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
452 struct acpi_nfit_system_address *spa)
453{
454 struct nfit_mem *nfit_mem, *found;
455 struct nfit_memdev *nfit_memdev;
456 int type = nfit_spa_type(spa);
457 u16 dcr;
458
459 switch (type) {
460 case NFIT_SPA_DCR:
461 case NFIT_SPA_PM:
462 break;
463 default:
464 return 0;
465 }
466
467 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
468 int rc;
469
470 if (nfit_memdev->memdev->range_index != spa->range_index)
471 continue;
472 found = NULL;
473 dcr = nfit_memdev->memdev->region_index;
474 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
475 if (__to_nfit_memdev(nfit_mem)->region_index == dcr) {
476 found = nfit_mem;
477 break;
478 }
479
480 if (found)
481 nfit_mem = found;
482 else {
483 nfit_mem = devm_kzalloc(acpi_desc->dev,
484 sizeof(*nfit_mem), GFP_KERNEL);
485 if (!nfit_mem)
486 return -ENOMEM;
487 INIT_LIST_HEAD(&nfit_mem->list);
488 }
489
490 if (type == NFIT_SPA_DCR) {
491 struct nfit_idt *nfit_idt;
492 u16 idt_idx;
493
494 /* multiple dimms may share a SPA when interleaved */
495 nfit_mem->spa_dcr = spa;
496 nfit_mem->memdev_dcr = nfit_memdev->memdev;
497 idt_idx = nfit_memdev->memdev->interleave_index;
498 list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
499 if (nfit_idt->idt->interleave_index != idt_idx)
500 continue;
501 nfit_mem->idt_dcr = nfit_idt->idt;
502 break;
503 }
504 } else {
505 /*
506 * A single dimm may belong to multiple SPA-PM
507 * ranges, record at least one in addition to
508 * any SPA-DCR range.
509 */
510 nfit_mem->memdev_pmem = nfit_memdev->memdev;
511 }
512
513 if (found)
514 continue;
515
516 rc = nfit_mem_add(acpi_desc, nfit_mem, spa);
517 if (rc)
518 return rc;
519 }
520
521 return 0;
522}
523
524static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
525{
526 struct nfit_mem *a = container_of(_a, typeof(*a), list);
527 struct nfit_mem *b = container_of(_b, typeof(*b), list);
528 u32 handleA, handleB;
529
530 handleA = __to_nfit_memdev(a)->device_handle;
531 handleB = __to_nfit_memdev(b)->device_handle;
532 if (handleA < handleB)
533 return -1;
534 else if (handleA > handleB)
535 return 1;
536 return 0;
537}
538
539static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
540{
541 struct nfit_spa *nfit_spa;
542
543 /*
544 * For each SPA-DCR or SPA-PMEM address range find its
545 * corresponding MEMDEV(s). From each MEMDEV find the
546 * corresponding DCR. Then, if we're operating on a SPA-DCR,
547 * try to find a SPA-BDW and a corresponding BDW that references
548 * the DCR. Throw it all into an nfit_mem object. Note, that
549 * BDWs are optional.
550 */
551 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
552 int rc;
553
554 rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
555 if (rc)
556 return rc;
557 }
558
559 list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
560
561 return 0;
562}
563
564static ssize_t revision_show(struct device *dev,
565 struct device_attribute *attr, char *buf)
566{
567 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
568 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
569 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
570
571 return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision);
572}
573static DEVICE_ATTR_RO(revision);
574
575static struct attribute *acpi_nfit_attributes[] = {
576 &dev_attr_revision.attr,
577 NULL,
578};
579
580static struct attribute_group acpi_nfit_attribute_group = {
581 .name = "nfit",
582 .attrs = acpi_nfit_attributes,
583};
584
585const struct attribute_group *acpi_nfit_attribute_groups[] = {
586 &nvdimm_bus_attribute_group,
587 &acpi_nfit_attribute_group,
588 NULL,
589};
590EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups);
591
592static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
593{
594 struct nvdimm *nvdimm = to_nvdimm(dev);
595 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
596
597 return __to_nfit_memdev(nfit_mem);
598}
599
600static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
601{
602 struct nvdimm *nvdimm = to_nvdimm(dev);
603 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
604
605 return nfit_mem->dcr;
606}
607
608static ssize_t handle_show(struct device *dev,
609 struct device_attribute *attr, char *buf)
610{
611 struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
612
613 return sprintf(buf, "%#x\n", memdev->device_handle);
614}
615static DEVICE_ATTR_RO(handle);
616
617static ssize_t phys_id_show(struct device *dev,
618 struct device_attribute *attr, char *buf)
619{
620 struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
621
622 return sprintf(buf, "%#x\n", memdev->physical_id);
623}
624static DEVICE_ATTR_RO(phys_id);
625
626static ssize_t vendor_show(struct device *dev,
627 struct device_attribute *attr, char *buf)
628{
629 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
630
631 return sprintf(buf, "%#x\n", dcr->vendor_id);
632}
633static DEVICE_ATTR_RO(vendor);
634
635static ssize_t rev_id_show(struct device *dev,
636 struct device_attribute *attr, char *buf)
637{
638 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
639
640 return sprintf(buf, "%#x\n", dcr->revision_id);
641}
642static DEVICE_ATTR_RO(rev_id);
643
644static ssize_t device_show(struct device *dev,
645 struct device_attribute *attr, char *buf)
646{
647 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
648
649 return sprintf(buf, "%#x\n", dcr->device_id);
650}
651static DEVICE_ATTR_RO(device);
652
653static ssize_t format_show(struct device *dev,
654 struct device_attribute *attr, char *buf)
655{
656 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
657
658 return sprintf(buf, "%#x\n", dcr->code);
659}
660static DEVICE_ATTR_RO(format);
661
662static ssize_t serial_show(struct device *dev,
663 struct device_attribute *attr, char *buf)
664{
665 struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
666
667 return sprintf(buf, "%#x\n", dcr->serial_number);
668}
669static DEVICE_ATTR_RO(serial);
670
671static ssize_t flags_show(struct device *dev,
672 struct device_attribute *attr, char *buf)
673{
674 u16 flags = to_nfit_memdev(dev)->flags;
675
676 return sprintf(buf, "%s%s%s%s%s\n",
677 flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "",
678 flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "",
679 flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "",
680 flags & ACPI_NFIT_MEM_ARMED ? "arm " : "",
681 flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart " : "");
682}
683static DEVICE_ATTR_RO(flags);
684
685static struct attribute *acpi_nfit_dimm_attributes[] = {
686 &dev_attr_handle.attr,
687 &dev_attr_phys_id.attr,
688 &dev_attr_vendor.attr,
689 &dev_attr_device.attr,
690 &dev_attr_format.attr,
691 &dev_attr_serial.attr,
692 &dev_attr_rev_id.attr,
693 &dev_attr_flags.attr,
694 NULL,
695};
696
697static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
698 struct attribute *a, int n)
699{
700 struct device *dev = container_of(kobj, struct device, kobj);
701
702 if (to_nfit_dcr(dev))
703 return a->mode;
704 else
705 return 0;
706}
707
708static struct attribute_group acpi_nfit_dimm_attribute_group = {
709 .name = "nfit",
710 .attrs = acpi_nfit_dimm_attributes,
711 .is_visible = acpi_nfit_dimm_attr_visible,
712};
713
714static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
715 &nvdimm_attribute_group,
716 &nd_device_attribute_group,
717 &acpi_nfit_dimm_attribute_group,
718 NULL,
719};
720
721static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
722 u32 device_handle)
723{
724 struct nfit_mem *nfit_mem;
725
726 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
727 if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
728 return nfit_mem->nvdimm;
729
730 return NULL;
731}
732
733static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
734 struct nfit_mem *nfit_mem, u32 device_handle)
735{
736 struct acpi_device *adev, *adev_dimm;
737 struct device *dev = acpi_desc->dev;
738 const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
739 unsigned long long sta;
740 int i, rc = -ENODEV;
741 acpi_status status;
742
743 nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
744 adev = to_acpi_dev(acpi_desc);
745 if (!adev)
746 return 0;
747
748 adev_dimm = acpi_find_child_device(adev, device_handle, false);
749 nfit_mem->adev = adev_dimm;
750 if (!adev_dimm) {
751 dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
752 device_handle);
753 return force_enable_dimms ? 0 : -ENODEV;
754 }
755
756 status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
757 if (status == AE_NOT_FOUND) {
758 dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
759 dev_name(&adev_dimm->dev));
760 rc = 0;
761 } else if (ACPI_FAILURE(status))
762 dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
763 dev_name(&adev_dimm->dev));
764 else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
765 dev_info(dev, "%s disabled by firmware\n",
766 dev_name(&adev_dimm->dev));
767 else
768 rc = 0;
769
770 for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
771 if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
772 set_bit(i, &nfit_mem->dsm_mask);
773
774 return force_enable_dimms ? 0 : rc;
775}
776
777static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
778{
779 struct nfit_mem *nfit_mem;
780 int dimm_count = 0;
781
782 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
783 struct nvdimm *nvdimm;
784 unsigned long flags = 0;
785 u32 device_handle;
786 u16 mem_flags;
787 int rc;
788
789 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
790 nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
791 if (nvdimm) {
792 /*
793 * If for some reason we find multiple DCRs the
794 * first one wins
795 */
796 dev_err(acpi_desc->dev, "duplicate DCR detected: %s\n",
797 nvdimm_name(nvdimm));
798 continue;
799 }
800
801 if (nfit_mem->bdw && nfit_mem->memdev_pmem)
802 flags |= NDD_ALIASING;
803
804 mem_flags = __to_nfit_memdev(nfit_mem)->flags;
805 if (mem_flags & ACPI_NFIT_MEM_ARMED)
806 flags |= NDD_UNARMED;
807
808 rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
809 if (rc)
810 continue;
811
812 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
813 acpi_nfit_dimm_attribute_groups,
814 flags, &nfit_mem->dsm_mask);
815 if (!nvdimm)
816 return -ENOMEM;
817
818 nfit_mem->nvdimm = nvdimm;
819 dimm_count++;
820
821 if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
822 continue;
823
824 dev_info(acpi_desc->dev, "%s: failed: %s%s%s%s\n",
825 nvdimm_name(nvdimm),
826 mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "",
827 mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "",
828 mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "",
829 mem_flags & ACPI_NFIT_MEM_ARMED ? "arm " : "");
830
831 }
832
833 return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
834}
835
836static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
837{
838 struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
839 const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
840 struct acpi_device *adev;
841 int i;
842
843 adev = to_acpi_dev(acpi_desc);
844 if (!adev)
845 return;
846
847 for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++)
848 if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
849 set_bit(i, &nd_desc->dsm_mask);
850}
851
852static ssize_t range_index_show(struct device *dev,
853 struct device_attribute *attr, char *buf)
854{
855 struct nd_region *nd_region = to_nd_region(dev);
856 struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
857
858 return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
859}
860static DEVICE_ATTR_RO(range_index);
861
862static struct attribute *acpi_nfit_region_attributes[] = {
863 &dev_attr_range_index.attr,
864 NULL,
865};
866
867static struct attribute_group acpi_nfit_region_attribute_group = {
868 .name = "nfit",
869 .attrs = acpi_nfit_region_attributes,
870};
871
872static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
873 &nd_region_attribute_group,
874 &nd_mapping_attribute_group,
875 &nd_device_attribute_group,
876 &nd_numa_attribute_group,
877 &acpi_nfit_region_attribute_group,
878 NULL,
879};
880
881/* enough info to uniquely specify an interleave set */
882struct nfit_set_info {
883 struct nfit_set_info_map {
884 u64 region_offset;
885 u32 serial_number;
886 u32 pad;
887 } mapping[0];
888};
889
890static size_t sizeof_nfit_set_info(int num_mappings)
891{
892 return sizeof(struct nfit_set_info)
893 + num_mappings * sizeof(struct nfit_set_info_map);
894}
895
896static int cmp_map(const void *m0, const void *m1)
897{
898 const struct nfit_set_info_map *map0 = m0;
899 const struct nfit_set_info_map *map1 = m1;
900
901 return memcmp(&map0->region_offset, &map1->region_offset,
902 sizeof(u64));
903}
904
905/* Retrieve the nth entry referencing this spa */
906static struct acpi_nfit_memory_map *memdev_from_spa(
907 struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
908{
909 struct nfit_memdev *nfit_memdev;
910
911 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
912 if (nfit_memdev->memdev->range_index == range_index)
913 if (n-- == 0)
914 return nfit_memdev->memdev;
915 return NULL;
916}
917
918static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
919 struct nd_region_desc *ndr_desc,
920 struct acpi_nfit_system_address *spa)
921{
922 int i, spa_type = nfit_spa_type(spa);
923 struct device *dev = acpi_desc->dev;
924 struct nd_interleave_set *nd_set;
925 u16 nr = ndr_desc->num_mappings;
926 struct nfit_set_info *info;
927
928 if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
929 /* pass */;
930 else
931 return 0;
932
933 nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
934 if (!nd_set)
935 return -ENOMEM;
936
937 info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
938 if (!info)
939 return -ENOMEM;
940 for (i = 0; i < nr; i++) {
941 struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
942 struct nfit_set_info_map *map = &info->mapping[i];
943 struct nvdimm *nvdimm = nd_mapping->nvdimm;
944 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
945 struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
946 spa->range_index, i);
947
948 if (!memdev || !nfit_mem->dcr) {
949 dev_err(dev, "%s: failed to find DCR\n", __func__);
950 return -ENODEV;
951 }
952
953 map->region_offset = memdev->region_offset;
954 map->serial_number = nfit_mem->dcr->serial_number;
955 }
956
957 sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
958 cmp_map, NULL);
959 nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
960 ndr_desc->nd_set = nd_set;
961 devm_kfree(dev, info);
962
963 return 0;
964}
965
966static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
967{
968 struct acpi_nfit_interleave *idt = mmio->idt;
969 u32 sub_line_offset, line_index, line_offset;
970 u64 line_no, table_skip_count, table_offset;
971
972 line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
973 table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
974 line_offset = idt->line_offset[line_index]
975 * mmio->line_size;
976 table_offset = table_skip_count * mmio->table_size;
977
978 return mmio->base_offset + line_offset + table_offset + sub_line_offset;
979}
980
981static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
982{
983 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
984 u64 offset = nfit_blk->stat_offset + mmio->size * bw;
985
986 if (mmio->num_lines)
987 offset = to_interleave_offset(offset, mmio);
988
989 return readq(mmio->base + offset);
990}
991
992static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
993 resource_size_t dpa, unsigned int len, unsigned int write)
994{
995 u64 cmd, offset;
996 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
997
998 enum {
999 BCW_OFFSET_MASK = (1ULL << 48)-1,
1000 BCW_LEN_SHIFT = 48,
1001 BCW_LEN_MASK = (1ULL << 8) - 1,
1002 BCW_CMD_SHIFT = 56,
1003 };
1004
1005 cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
1006 len = len >> L1_CACHE_SHIFT;
1007 cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
1008 cmd |= ((u64) write) << BCW_CMD_SHIFT;
1009
1010 offset = nfit_blk->cmd_offset + mmio->size * bw;
1011 if (mmio->num_lines)
1012 offset = to_interleave_offset(offset, mmio);
1013
1014 writeq(cmd, mmio->base + offset);
1015 /* FIXME: conditionally perform read-back if mandated by firmware */
1016}
1017
1018static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
1019 resource_size_t dpa, void *iobuf, size_t len, int rw,
1020 unsigned int lane)
1021{
1022 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
1023 unsigned int copied = 0;
1024 u64 base_offset;
1025 int rc;
1026
1027 base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
1028 + lane * mmio->size;
1029 /* TODO: non-temporal access, flush hints, cache management etc... */
1030 write_blk_ctl(nfit_blk, lane, dpa, len, rw);
1031 while (len) {
1032 unsigned int c;
1033 u64 offset;
1034
1035 if (mmio->num_lines) {
1036 u32 line_offset;
1037
1038 offset = to_interleave_offset(base_offset + copied,
1039 mmio);
1040 div_u64_rem(offset, mmio->line_size, &line_offset);
1041 c = min_t(size_t, len, mmio->line_size - line_offset);
1042 } else {
1043 offset = base_offset + nfit_blk->bdw_offset;
1044 c = len;
1045 }
1046
1047 if (rw)
1048 memcpy(mmio->aperture + offset, iobuf + copied, c);
1049 else
1050 memcpy(iobuf + copied, mmio->aperture + offset, c);
1051
1052 copied += c;
1053 len -= c;
1054 }
1055 rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
1056 return rc;
1057}
1058
1059static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
1060 resource_size_t dpa, void *iobuf, u64 len, int rw)
1061{
1062 struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
1063 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
1064 struct nd_region *nd_region = nfit_blk->nd_region;
1065 unsigned int lane, copied = 0;
1066 int rc = 0;
1067
1068 lane = nd_region_acquire_lane(nd_region);
1069 while (len) {
1070 u64 c = min(len, mmio->size);
1071
1072 rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
1073 iobuf + copied, c, rw, lane);
1074 if (rc)
1075 break;
1076
1077 copied += c;
1078 len -= c;
1079 }
1080 nd_region_release_lane(nd_region, lane);
1081
1082 return rc;
1083}
1084
1085static void nfit_spa_mapping_release(struct kref *kref)
1086{
1087 struct nfit_spa_mapping *spa_map = to_spa_map(kref);
1088 struct acpi_nfit_system_address *spa = spa_map->spa;
1089 struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
1090
1091 WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
1092 dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
1093 iounmap(spa_map->iomem);
1094 release_mem_region(spa->address, spa->length);
1095 list_del(&spa_map->list);
1096 kfree(spa_map);
1097}
1098
1099static struct nfit_spa_mapping *find_spa_mapping(
1100 struct acpi_nfit_desc *acpi_desc,
1101 struct acpi_nfit_system_address *spa)
1102{
1103 struct nfit_spa_mapping *spa_map;
1104
1105 WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
1106 list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
1107 if (spa_map->spa == spa)
1108 return spa_map;
1109
1110 return NULL;
1111}
1112
1113static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
1114 struct acpi_nfit_system_address *spa)
1115{
1116 struct nfit_spa_mapping *spa_map;
1117
1118 mutex_lock(&acpi_desc->spa_map_mutex);
1119 spa_map = find_spa_mapping(acpi_desc, spa);
1120
1121 if (spa_map)
1122 kref_put(&spa_map->kref, nfit_spa_mapping_release);
1123 mutex_unlock(&acpi_desc->spa_map_mutex);
1124}
1125
1126static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
1127 struct acpi_nfit_system_address *spa)
1128{
1129 resource_size_t start = spa->address;
1130 resource_size_t n = spa->length;
1131 struct nfit_spa_mapping *spa_map;
1132 struct resource *res;
1133
1134 WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
1135
1136 spa_map = find_spa_mapping(acpi_desc, spa);
1137 if (spa_map) {
1138 kref_get(&spa_map->kref);
1139 return spa_map->iomem;
1140 }
1141
1142 spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
1143 if (!spa_map)
1144 return NULL;
1145
1146 INIT_LIST_HEAD(&spa_map->list);
1147 spa_map->spa = spa;
1148 kref_init(&spa_map->kref);
1149 spa_map->acpi_desc = acpi_desc;
1150
1151 res = request_mem_region(start, n, dev_name(acpi_desc->dev));
1152 if (!res)
1153 goto err_mem;
1154
1155 /* TODO: cacheability based on the spa type */
1156 spa_map->iomem = ioremap_nocache(start, n);
1157 if (!spa_map->iomem)
1158 goto err_map;
1159
1160 list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
1161 return spa_map->iomem;
1162
1163 err_map:
1164 release_mem_region(start, n);
1165 err_mem:
1166 kfree(spa_map);
1167 return NULL;
1168}
1169
1170/**
1171 * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
1172 * @nvdimm_bus: NFIT-bus that provided the spa table entry
1173 * @nfit_spa: spa table to map
1174 *
1175 * In the case where block-data-window apertures and
1176 * dimm-control-regions are interleaved they will end up sharing a
1177 * single request_mem_region() + ioremap() for the address range. In
1178 * the style of devm nfit_spa_map() mappings are automatically dropped
1179 * when all region devices referencing the same mapping are disabled /
1180 * unbound.
1181 */
1182static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
1183 struct acpi_nfit_system_address *spa)
1184{
1185 void __iomem *iomem;
1186
1187 mutex_lock(&acpi_desc->spa_map_mutex);
1188 iomem = __nfit_spa_map(acpi_desc, spa);
1189 mutex_unlock(&acpi_desc->spa_map_mutex);
1190
1191 return iomem;
1192}
1193
1194static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
1195 struct acpi_nfit_interleave *idt, u16 interleave_ways)
1196{
1197 if (idt) {
1198 mmio->num_lines = idt->line_count;
1199 mmio->line_size = idt->line_size;
1200 if (interleave_ways == 0)
1201 return -ENXIO;
1202 mmio->table_size = mmio->num_lines * interleave_ways
1203 * mmio->line_size;
1204 }
1205
1206 return 0;
1207}
1208
1209static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1210 struct device *dev)
1211{
1212 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1213 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1214 struct nd_blk_region *ndbr = to_nd_blk_region(dev);
1215 struct nfit_blk_mmio *mmio;
1216 struct nfit_blk *nfit_blk;
1217 struct nfit_mem *nfit_mem;
1218 struct nvdimm *nvdimm;
1219 int rc;
1220
1221 nvdimm = nd_blk_region_to_dimm(ndbr);
1222 nfit_mem = nvdimm_provider_data(nvdimm);
1223 if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
1224 dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
1225 nfit_mem ? "" : " nfit_mem",
1226 nfit_mem->dcr ? "" : " dcr",
1227 nfit_mem->bdw ? "" : " bdw");
1228 return -ENXIO;
1229 }
1230
1231 nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
1232 if (!nfit_blk)
1233 return -ENOMEM;
1234 nd_blk_region_set_provider_data(ndbr, nfit_blk);
1235 nfit_blk->nd_region = to_nd_region(dev);
1236
1237 /* map block aperture memory */
1238 nfit_blk->bdw_offset = nfit_mem->bdw->offset;
1239 mmio = &nfit_blk->mmio[BDW];
1240 mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw);
1241 if (!mmio->base) {
1242 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
1243 nvdimm_name(nvdimm));
1244 return -ENOMEM;
1245 }
1246 mmio->size = nfit_mem->bdw->size;
1247 mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
1248 mmio->idt = nfit_mem->idt_bdw;
1249 mmio->spa = nfit_mem->spa_bdw;
1250 rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
1251 nfit_mem->memdev_bdw->interleave_ways);
1252 if (rc) {
1253 dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
1254 __func__, nvdimm_name(nvdimm));
1255 return rc;
1256 }
1257
1258 /* map block control memory */
1259 nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
1260 nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
1261 mmio = &nfit_blk->mmio[DCR];
1262 mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr);
1263 if (!mmio->base) {
1264 dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
1265 nvdimm_name(nvdimm));
1266 return -ENOMEM;
1267 }
1268 mmio->size = nfit_mem->dcr->window_size;
1269 mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
1270 mmio->idt = nfit_mem->idt_dcr;
1271 mmio->spa = nfit_mem->spa_dcr;
1272 rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
1273 nfit_mem->memdev_dcr->interleave_ways);
1274 if (rc) {
1275 dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
1276 __func__, nvdimm_name(nvdimm));
1277 return rc;
1278 }
1279
1280 if (mmio->line_size == 0)
1281 return 0;
1282
1283 if ((u32) nfit_blk->cmd_offset % mmio->line_size
1284 + 8 > mmio->line_size) {
1285 dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
1286 return -ENXIO;
1287 } else if ((u32) nfit_blk->stat_offset % mmio->line_size
1288 + 8 > mmio->line_size) {
1289 dev_dbg(dev, "stat_offset crosses interleave boundary\n");
1290 return -ENXIO;
1291 }
1292
1293 return 0;
1294}
1295
1296static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
1297 struct device *dev)
1298{
1299 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1300 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1301 struct nd_blk_region *ndbr = to_nd_blk_region(dev);
1302 struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
1303 int i;
1304
1305 if (!nfit_blk)
1306 return; /* never enabled */
1307
1308 /* auto-free BLK spa mappings */
1309 for (i = 0; i < 2; i++) {
1310 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
1311
1312 if (mmio->base)
1313 nfit_spa_unmap(acpi_desc, mmio->spa);
1314 }
1315 nd_blk_region_set_provider_data(ndbr, NULL);
1316 /* devm will free nfit_blk */
1317}
1318
1319static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
1320 struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
1321 struct acpi_nfit_memory_map *memdev,
1322 struct acpi_nfit_system_address *spa)
1323{
1324 struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
1325 memdev->device_handle);
1326 struct nd_blk_region_desc *ndbr_desc;
1327 struct nfit_mem *nfit_mem;
1328 int blk_valid = 0;
1329
1330 if (!nvdimm) {
1331 dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
1332 spa->range_index, memdev->device_handle);
1333 return -ENODEV;
1334 }
1335
1336 nd_mapping->nvdimm = nvdimm;
1337 switch (nfit_spa_type(spa)) {
1338 case NFIT_SPA_PM:
1339 case NFIT_SPA_VOLATILE:
1340 nd_mapping->start = memdev->address;
1341 nd_mapping->size = memdev->region_size;
1342 break;
1343 case NFIT_SPA_DCR:
1344 nfit_mem = nvdimm_provider_data(nvdimm);
1345 if (!nfit_mem || !nfit_mem->bdw) {
1346 dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
1347 spa->range_index, nvdimm_name(nvdimm));
1348 } else {
1349 nd_mapping->size = nfit_mem->bdw->capacity;
1350 nd_mapping->start = nfit_mem->bdw->start_address;
1351 ndr_desc->num_lanes = nfit_mem->bdw->windows;
1352 blk_valid = 1;
1353 }
1354
1355 ndr_desc->nd_mapping = nd_mapping;
1356 ndr_desc->num_mappings = blk_valid;
1357 ndbr_desc = to_blk_region_desc(ndr_desc);
1358 ndbr_desc->enable = acpi_nfit_blk_region_enable;
1359 ndbr_desc->disable = acpi_nfit_blk_region_disable;
1360 ndbr_desc->do_io = acpi_desc->blk_do_io;
1361 if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc))
1362 return -ENOMEM;
1363 break;
1364 }
1365
1366 return 0;
1367}
1368
1369static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
1370 struct nfit_spa *nfit_spa)
1371{
1372 static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
1373 struct acpi_nfit_system_address *spa = nfit_spa->spa;
1374 struct nd_blk_region_desc ndbr_desc;
1375 struct nd_region_desc *ndr_desc;
1376 struct nfit_memdev *nfit_memdev;
1377 struct nvdimm_bus *nvdimm_bus;
1378 struct resource res;
1379 int count = 0, rc;
1380
1381 if (spa->range_index == 0) {
1382 dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
1383 __func__);
1384 return 0;
1385 }
1386
1387 memset(&res, 0, sizeof(res));
1388 memset(&nd_mappings, 0, sizeof(nd_mappings));
1389 memset(&ndbr_desc, 0, sizeof(ndbr_desc));
1390 res.start = spa->address;
1391 res.end = res.start + spa->length - 1;
1392 ndr_desc = &ndbr_desc.ndr_desc;
1393 ndr_desc->res = &res;
1394 ndr_desc->provider_data = nfit_spa;
1395 ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
1396 if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
1397 ndr_desc->numa_node = acpi_map_pxm_to_online_node(
1398 spa->proximity_domain);
1399 else
1400 ndr_desc->numa_node = NUMA_NO_NODE;
1401
1402 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
1403 struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
1404 struct nd_mapping *nd_mapping;
1405
1406 if (memdev->range_index != spa->range_index)
1407 continue;
1408 if (count >= ND_MAX_MAPPINGS) {
1409 dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
1410 spa->range_index, ND_MAX_MAPPINGS);
1411 return -ENXIO;
1412 }
1413 nd_mapping = &nd_mappings[count++];
1414 rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
1415 memdev, spa);
1416 if (rc)
1417 return rc;
1418 }
1419
1420 ndr_desc->nd_mapping = nd_mappings;
1421 ndr_desc->num_mappings = count;
1422 rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
1423 if (rc)
1424 return rc;
1425
1426 nvdimm_bus = acpi_desc->nvdimm_bus;
1427 if (nfit_spa_type(spa) == NFIT_SPA_PM) {
1428 if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
1429 return -ENOMEM;
1430 } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
1431 if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc))
1432 return -ENOMEM;
1433 }
1434 return 0;
1435}
1436
1437static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
1438{
1439 struct nfit_spa *nfit_spa;
1440
1441 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
1442 int rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
1443
1444 if (rc)
1445 return rc;
1446 }
1447 return 0;
1448}
1449
1450int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
1451{
1452 struct device *dev = acpi_desc->dev;
1453 const void *end;
1454 u8 *data;
1455 int rc;
1456
1457 INIT_LIST_HEAD(&acpi_desc->spa_maps);
1458 INIT_LIST_HEAD(&acpi_desc->spas);
1459 INIT_LIST_HEAD(&acpi_desc->dcrs);
1460 INIT_LIST_HEAD(&acpi_desc->bdws);
1461 INIT_LIST_HEAD(&acpi_desc->idts);
1462 INIT_LIST_HEAD(&acpi_desc->memdevs);
1463 INIT_LIST_HEAD(&acpi_desc->dimms);
1464 mutex_init(&acpi_desc->spa_map_mutex);
1465
1466 data = (u8 *) acpi_desc->nfit;
1467 end = data + sz;
1468 data += sizeof(struct acpi_table_nfit);
1469 while (!IS_ERR_OR_NULL(data))
1470 data = add_table(acpi_desc, data, end);
1471
1472 if (IS_ERR(data)) {
1473 dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
1474 PTR_ERR(data));
1475 return PTR_ERR(data);
1476 }
1477
1478 if (nfit_mem_init(acpi_desc) != 0)
1479 return -ENOMEM;
1480
1481 acpi_nfit_init_dsms(acpi_desc);
1482
1483 rc = acpi_nfit_register_dimms(acpi_desc);
1484 if (rc)
1485 return rc;
1486
1487 return acpi_nfit_register_regions(acpi_desc);
1488}
1489EXPORT_SYMBOL_GPL(acpi_nfit_init);
1490
1491static int acpi_nfit_add(struct acpi_device *adev)
1492{
1493 struct nvdimm_bus_descriptor *nd_desc;
1494 struct acpi_nfit_desc *acpi_desc;
1495 struct device *dev = &adev->dev;
1496 struct acpi_table_header *tbl;
1497 acpi_status status = AE_OK;
1498 acpi_size sz;
1499 int rc;
1500
1501 status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz);
1502 if (ACPI_FAILURE(status)) {
1503 dev_err(dev, "failed to find NFIT\n");
1504 return -ENXIO;
1505 }
1506
1507 acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
1508 if (!acpi_desc)
1509 return -ENOMEM;
1510
1511 dev_set_drvdata(dev, acpi_desc);
1512 acpi_desc->dev = dev;
1513 acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
1514 acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
1515 nd_desc = &acpi_desc->nd_desc;
1516 nd_desc->provider_name = "ACPI.NFIT";
1517 nd_desc->ndctl = acpi_nfit_ctl;
1518 nd_desc->attr_groups = acpi_nfit_attribute_groups;
1519
1520 acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc);
1521 if (!acpi_desc->nvdimm_bus)
1522 return -ENXIO;
1523
1524 rc = acpi_nfit_init(acpi_desc, sz);
1525 if (rc) {
1526 nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
1527 return rc;
1528 }
1529 return 0;
1530}
1531
1532static int acpi_nfit_remove(struct acpi_device *adev)
1533{
1534 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
1535
1536 nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
1537 return 0;
1538}
1539
1540static const struct acpi_device_id acpi_nfit_ids[] = {
1541 { "ACPI0012", 0 },
1542 { "", 0 },
1543};
1544MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
1545
1546static struct acpi_driver acpi_nfit_driver = {
1547 .name = KBUILD_MODNAME,
1548 .ids = acpi_nfit_ids,
1549 .ops = {
1550 .add = acpi_nfit_add,
1551 .remove = acpi_nfit_remove,
1552 },
1553};
1554
1555static __init int nfit_init(void)
1556{
1557 BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
1558 BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
1559 BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
1560 BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
1561 BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
1562 BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
1563 BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
1564
1565 acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
1566 acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
1567 acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
1568 acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
1569 acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
1570 acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
1571 acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
1572 acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
1573 acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
1574 acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
1575
1576 return acpi_bus_register_driver(&acpi_nfit_driver);
1577}
1578
1579static __exit void nfit_exit(void)
1580{
1581 acpi_bus_unregister_driver(&acpi_nfit_driver);
1582}
1583
1584module_init(nfit_init);
1585module_exit(nfit_exit);
1586MODULE_LICENSE("GPL v2");
1587MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
new file mode 100644
index 000000000000..81f2e8c5a79c
--- /dev/null
+++ b/drivers/acpi/nfit.h
@@ -0,0 +1,158 @@
1/*
2 * NVDIMM Firmware Interface Table - NFIT
3 *
4 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15#ifndef __NFIT_H__
16#define __NFIT_H__
17#include <linux/libnvdimm.h>
18#include <linux/types.h>
19#include <linux/uuid.h>
20#include <linux/acpi.h>
21#include <acpi/acuuid.h>
22
23#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
24#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
25#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
26 | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
27 | ACPI_NFIT_MEM_ARMED)
28
29enum nfit_uuids {
30 NFIT_SPA_VOLATILE,
31 NFIT_SPA_PM,
32 NFIT_SPA_DCR,
33 NFIT_SPA_BDW,
34 NFIT_SPA_VDISK,
35 NFIT_SPA_VCD,
36 NFIT_SPA_PDISK,
37 NFIT_SPA_PCD,
38 NFIT_DEV_BUS,
39 NFIT_DEV_DIMM,
40 NFIT_UUID_MAX,
41};
42
43struct nfit_spa {
44 struct acpi_nfit_system_address *spa;
45 struct list_head list;
46};
47
48struct nfit_dcr {
49 struct acpi_nfit_control_region *dcr;
50 struct list_head list;
51};
52
53struct nfit_bdw {
54 struct acpi_nfit_data_region *bdw;
55 struct list_head list;
56};
57
58struct nfit_idt {
59 struct acpi_nfit_interleave *idt;
60 struct list_head list;
61};
62
63struct nfit_memdev {
64 struct acpi_nfit_memory_map *memdev;
65 struct list_head list;
66};
67
68/* assembled tables for a given dimm/memory-device */
69struct nfit_mem {
70 struct nvdimm *nvdimm;
71 struct acpi_nfit_memory_map *memdev_dcr;
72 struct acpi_nfit_memory_map *memdev_pmem;
73 struct acpi_nfit_memory_map *memdev_bdw;
74 struct acpi_nfit_control_region *dcr;
75 struct acpi_nfit_data_region *bdw;
76 struct acpi_nfit_system_address *spa_dcr;
77 struct acpi_nfit_system_address *spa_bdw;
78 struct acpi_nfit_interleave *idt_dcr;
79 struct acpi_nfit_interleave *idt_bdw;
80 struct list_head list;
81 struct acpi_device *adev;
82 unsigned long dsm_mask;
83};
84
85struct acpi_nfit_desc {
86 struct nvdimm_bus_descriptor nd_desc;
87 struct acpi_table_nfit *nfit;
88 struct mutex spa_map_mutex;
89 struct list_head spa_maps;
90 struct list_head memdevs;
91 struct list_head dimms;
92 struct list_head spas;
93 struct list_head dcrs;
94 struct list_head bdws;
95 struct list_head idts;
96 struct nvdimm_bus *nvdimm_bus;
97 struct device *dev;
98 unsigned long dimm_dsm_force_en;
99 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
100 void *iobuf, u64 len, int rw);
101};
102
103enum nd_blk_mmio_selector {
104 BDW,
105 DCR,
106};
107
108struct nfit_blk {
109 struct nfit_blk_mmio {
110 union {
111 void __iomem *base;
112 void *aperture;
113 };
114 u64 size;
115 u64 base_offset;
116 u32 line_size;
117 u32 num_lines;
118 u32 table_size;
119 struct acpi_nfit_interleave *idt;
120 struct acpi_nfit_system_address *spa;
121 } mmio[2];
122 struct nd_region *nd_region;
123 u64 bdw_offset; /* post interleave offset */
124 u64 stat_offset;
125 u64 cmd_offset;
126};
127
128struct nfit_spa_mapping {
129 struct acpi_nfit_desc *acpi_desc;
130 struct acpi_nfit_system_address *spa;
131 struct list_head list;
132 struct kref kref;
133 void __iomem *iomem;
134};
135
136static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
137{
138 return container_of(kref, struct nfit_spa_mapping, kref);
139}
140
141static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
142 struct nfit_mem *nfit_mem)
143{
144 if (nfit_mem->memdev_dcr)
145 return nfit_mem->memdev_dcr;
146 return nfit_mem->memdev_pmem;
147}
148
149static inline struct acpi_nfit_desc *to_acpi_desc(
150 struct nvdimm_bus_descriptor *nd_desc)
151{
152 return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
153}
154
155const u8 *to_nfit_uuid(enum nfit_uuids id);
156int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
157extern const struct attribute_group *acpi_nfit_attribute_groups[];
158#endif /* __NFIT_H__ */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 1333cbdc3ea2..acaa3b4ea504 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -29,6 +29,8 @@
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/acpi.h> 30#include <linux/acpi.h>
31#include <linux/numa.h> 31#include <linux/numa.h>
32#include <linux/nodemask.h>
33#include <linux/topology.h>
32 34
33#define PREFIX "ACPI: " 35#define PREFIX "ACPI: "
34 36
@@ -70,7 +72,12 @@ static void __acpi_map_pxm_to_node(int pxm, int node)
70 72
71int acpi_map_pxm_to_node(int pxm) 73int acpi_map_pxm_to_node(int pxm)
72{ 74{
73 int node = pxm_to_node_map[pxm]; 75 int node;
76
77 if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
78 return NUMA_NO_NODE;
79
80 node = pxm_to_node_map[pxm];
74 81
75 if (node == NUMA_NO_NODE) { 82 if (node == NUMA_NO_NODE) {
76 if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) 83 if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
@@ -83,6 +90,45 @@ int acpi_map_pxm_to_node(int pxm)
83 return node; 90 return node;
84} 91}
85 92
93/**
94 * acpi_map_pxm_to_online_node - Map proximity ID to online node
95 * @pxm: ACPI proximity ID
96 *
97 * This is similar to acpi_map_pxm_to_node(), but always returns an online
98 * node. When the mapped node from a given proximity ID is offline, it
99 * looks up the node distance table and returns the nearest online node.
100 *
101 * ACPI device drivers, which are called after the NUMA initialization has
102 * completed in the kernel, can call this interface to obtain their device
103 * NUMA topology from ACPI tables. Such drivers do not have to deal with
104 * offline nodes. A node may be offline when a device proximity ID is
105 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
106 * "numa=off" on x86.
107 */
108int acpi_map_pxm_to_online_node(int pxm)
109{
110 int node, n, dist, min_dist;
111
112 node = acpi_map_pxm_to_node(pxm);
113
114 if (node == NUMA_NO_NODE)
115 node = 0;
116
117 if (!node_online(node)) {
118 min_dist = INT_MAX;
119 for_each_online_node(n) {
120 dist = node_distance(node, n);
121 if (dist < min_dist) {
122 min_dist = dist;
123 node = n;
124 }
125 }
126 }
127
128 return node;
129}
130EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
131
86static void __init 132static void __init
87acpi_table_print_srat_entry(struct acpi_subtable_header *header) 133acpi_table_print_srat_entry(struct acpi_subtable_header *header)
88{ 134{
@@ -328,8 +374,6 @@ int acpi_get_node(acpi_handle handle)
328 int pxm; 374 int pxm;
329 375
330 pxm = acpi_get_pxm(handle); 376 pxm = acpi_get_pxm(handle);
331 if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
332 return NUMA_NO_NODE;
333 377
334 return acpi_map_pxm_to_node(pxm); 378 return acpi_map_pxm_to_node(pxm);
335} 379}
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 3ccef9eba6f9..1b8094d4d7af 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -404,18 +404,6 @@ config BLK_DEV_RAM_DAX
404 and will prevent RAM block device backing store memory from being 404 and will prevent RAM block device backing store memory from being
405 allocated from highmem (only a problem for highmem systems). 405 allocated from highmem (only a problem for highmem systems).
406 406
407config BLK_DEV_PMEM
408 tristate "Persistent memory block device support"
409 depends on HAS_IOMEM
410 help
411 Saying Y here will allow you to use a contiguous range of reserved
412 memory as one or more persistent block devices.
413
414 To compile this driver as a module, choose M here: the module will be
415 called 'pmem'.
416
417 If unsure, say N.
418
419config CDROM_PKTCDVD 407config CDROM_PKTCDVD
420 tristate "Packet writing on CD/DVD media" 408 tristate "Packet writing on CD/DVD media"
421 depends on !UML 409 depends on !UML
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 9cc6c18a1c7e..02b688d1438d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_PS3_VRAM) += ps3vram.o
14obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o 14obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
15obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o 15obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
16obj-$(CONFIG_BLK_DEV_RAM) += brd.o 16obj-$(CONFIG_BLK_DEV_RAM) += brd.o
17obj-$(CONFIG_BLK_DEV_PMEM) += pmem.o
18obj-$(CONFIG_BLK_DEV_LOOP) += loop.o 17obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
19obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o 18obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
20obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o 19obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
new file mode 100644
index 000000000000..72226acb5c0f
--- /dev/null
+++ b/drivers/nvdimm/Kconfig
@@ -0,0 +1,68 @@
1menuconfig LIBNVDIMM
2 tristate "NVDIMM (Non-Volatile Memory Device) Support"
3 depends on PHYS_ADDR_T_64BIT
4 depends on BLK_DEV
5 help
6 Generic support for non-volatile memory devices including
7 ACPI-6-NFIT defined resources. On platforms that define an
8 NFIT, or otherwise can discover NVDIMM resources, a libnvdimm
9 bus is registered to advertise PMEM (persistent memory)
10 namespaces (/dev/pmemX) and BLK (sliding mmio window(s))
11 namespaces (/dev/ndblkX.Y). A PMEM namespace refers to a
12 memory resource that may span multiple DIMMs and support DAX
13 (see CONFIG_DAX). A BLK namespace refers to an NVDIMM control
14 region which exposes an mmio register set for windowed access
15 mode to non-volatile memory.
16
17if LIBNVDIMM
18
19config BLK_DEV_PMEM
20 tristate "PMEM: Persistent memory block device support"
21 default LIBNVDIMM
22 depends on HAS_IOMEM
23 select ND_BTT if BTT
24 help
25 Memory ranges for PMEM are described by either an NFIT
26 (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
27 non-standard OEM-specific E820 memory type (type-12, see
28 CONFIG_X86_PMEM_LEGACY), or it is manually specified by the
29 'memmap=nn[KMG]!ss[KMG]' kernel command line (see
30 Documentation/kernel-parameters.txt). This driver converts
31 these persistent memory ranges into block devices that are
32 capable of DAX (direct-access) file system mappings. See
33 Documentation/nvdimm/nvdimm.txt for more details.
34
35 Say Y if you want to use an NVDIMM
36
37config ND_BLK
38 tristate "BLK: Block data window (aperture) device support"
39 default LIBNVDIMM
40 select ND_BTT if BTT
41 help
42 Support NVDIMMs, or other devices, that implement a BLK-mode
43 access capability. BLK-mode access uses memory-mapped-i/o
44 apertures to access persistent media.
45
46 Say Y if your platform firmware emits an ACPI.NFIT table
47 (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
48 capabilities.
49
50config ND_BTT
51 tristate
52
53config BTT
54 bool "BTT: Block Translation Table (atomic sector updates)"
55 default y if LIBNVDIMM
56 help
57 The Block Translation Table (BTT) provides atomic sector
58 update semantics for persistent memory devices, so that
59 applications that rely on sector writes not being torn (a
60 guarantee that typical disks provide) can continue to do so.
61 The BTT manifests itself as an alternate personality for an
62 NVDIMM namespace, i.e. a namespace can be in raw mode (pmemX,
63 ndblkX.Y, etc...), or 'sectored' mode, (pmemXs, ndblkX.Ys,
64 etc...).
65
66 Select Y if unsure
67
68endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
new file mode 100644
index 000000000000..594bb97c867a
--- /dev/null
+++ b/drivers/nvdimm/Makefile
@@ -0,0 +1,20 @@
1obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
2obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
3obj-$(CONFIG_ND_BTT) += nd_btt.o
4obj-$(CONFIG_ND_BLK) += nd_blk.o
5
6nd_pmem-y := pmem.o
7
8nd_btt-y := btt.o
9
10nd_blk-y := blk.o
11
12libnvdimm-y := core.o
13libnvdimm-y += bus.o
14libnvdimm-y += dimm_devs.o
15libnvdimm-y += dimm.o
16libnvdimm-y += region_devs.o
17libnvdimm-y += region.o
18libnvdimm-y += namespace_devs.o
19libnvdimm-y += label.o
20libnvdimm-$(CONFIG_BTT) += btt_devs.o
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
new file mode 100644
index 000000000000..4f97b248c236
--- /dev/null
+++ b/drivers/nvdimm/blk.c
@@ -0,0 +1,384 @@
1/*
2 * NVDIMM Block Window Driver
3 * Copyright (c) 2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/blkdev.h>
16#include <linux/fs.h>
17#include <linux/genhd.h>
18#include <linux/module.h>
19#include <linux/moduleparam.h>
20#include <linux/nd.h>
21#include <linux/sizes.h>
22#include "nd.h"
23
24struct nd_blk_device {
25 struct request_queue *queue;
26 struct gendisk *disk;
27 struct nd_namespace_blk *nsblk;
28 struct nd_blk_region *ndbr;
29 size_t disk_size;
30 u32 sector_size;
31 u32 internal_lbasize;
32};
33
34static int nd_blk_major;
35
36static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev)
37{
38 return blk_dev->nsblk->lbasize - blk_dev->sector_size;
39}
40
41static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
42 resource_size_t ns_offset, unsigned int len)
43{
44 int i;
45
46 for (i = 0; i < nsblk->num_resources; i++) {
47 if (ns_offset < resource_size(nsblk->res[i])) {
48 if (ns_offset + len > resource_size(nsblk->res[i])) {
49 dev_WARN_ONCE(&nsblk->common.dev, 1,
50 "illegal request\n");
51 return SIZE_MAX;
52 }
53 return nsblk->res[i]->start + ns_offset;
54 }
55 ns_offset -= resource_size(nsblk->res[i]);
56 }
57
58 dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
59 return SIZE_MAX;
60}
61
62#ifdef CONFIG_BLK_DEV_INTEGRITY
63static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
64 struct bio_integrity_payload *bip, u64 lba,
65 int rw)
66{
67 unsigned int len = nd_blk_meta_size(blk_dev);
68 resource_size_t dev_offset, ns_offset;
69 struct nd_namespace_blk *nsblk;
70 struct nd_blk_region *ndbr;
71 int err = 0;
72
73 nsblk = blk_dev->nsblk;
74 ndbr = blk_dev->ndbr;
75 ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size;
76 dev_offset = to_dev_offset(nsblk, ns_offset, len);
77 if (dev_offset == SIZE_MAX)
78 return -EIO;
79
80 while (len) {
81 unsigned int cur_len;
82 struct bio_vec bv;
83 void *iobuf;
84
85 bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
86 /*
87 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
88 * .bv_offset already adjusted for iter->bi_bvec_done, and we
89 * can use those directly
90 */
91
92 cur_len = min(len, bv.bv_len);
93 iobuf = kmap_atomic(bv.bv_page);
94 err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
95 cur_len, rw);
96 kunmap_atomic(iobuf);
97 if (err)
98 return err;
99
100 len -= cur_len;
101 dev_offset += cur_len;
102 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
103 }
104
105 return err;
106}
107
108#else /* CONFIG_BLK_DEV_INTEGRITY */
109static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
110 struct bio_integrity_payload *bip, u64 lba,
111 int rw)
112{
113 return 0;
114}
115#endif
116
117static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
118 struct bio_integrity_payload *bip, struct page *page,
119 unsigned int len, unsigned int off, int rw,
120 sector_t sector)
121{
122 struct nd_blk_region *ndbr = blk_dev->ndbr;
123 resource_size_t dev_offset, ns_offset;
124 int err = 0;
125 void *iobuf;
126 u64 lba;
127
128 while (len) {
129 unsigned int cur_len;
130
131 /*
132 * If we don't have an integrity payload, we don't have to
133 * split the bvec into sectors, as this would cause unnecessary
134 * Block Window setup/move steps. the do_io routine is capable
135 * of handling len <= PAGE_SIZE.
136 */
137 cur_len = bip ? min(len, blk_dev->sector_size) : len;
138
139 lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size);
140 ns_offset = lba * blk_dev->internal_lbasize;
141 dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len);
142 if (dev_offset == SIZE_MAX)
143 return -EIO;
144
145 iobuf = kmap_atomic(page);
146 err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
147 kunmap_atomic(iobuf);
148 if (err)
149 return err;
150
151 if (bip) {
152 err = nd_blk_rw_integrity(blk_dev, bip, lba, rw);
153 if (err)
154 return err;
155 }
156 len -= cur_len;
157 off += cur_len;
158 sector += blk_dev->sector_size >> SECTOR_SHIFT;
159 }
160
161 return err;
162}
163
164static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
165{
166 struct block_device *bdev = bio->bi_bdev;
167 struct gendisk *disk = bdev->bd_disk;
168 struct bio_integrity_payload *bip;
169 struct nd_blk_device *blk_dev;
170 struct bvec_iter iter;
171 unsigned long start;
172 struct bio_vec bvec;
173 int err = 0, rw;
174 bool do_acct;
175
176 /*
177 * bio_integrity_enabled also checks if the bio already has an
178 * integrity payload attached. If it does, we *don't* do a
179 * bio_integrity_prep here - the payload has been generated by
180 * another kernel subsystem, and we just pass it through.
181 */
182 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
183 err = -EIO;
184 goto out;
185 }
186
187 bip = bio_integrity(bio);
188 blk_dev = disk->private_data;
189 rw = bio_data_dir(bio);
190 do_acct = nd_iostat_start(bio, &start);
191 bio_for_each_segment(bvec, bio, iter) {
192 unsigned int len = bvec.bv_len;
193
194 BUG_ON(len > PAGE_SIZE);
195 err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len,
196 bvec.bv_offset, rw, iter.bi_sector);
197 if (err) {
198 dev_info(&blk_dev->nsblk->common.dev,
199 "io error in %s sector %lld, len %d,\n",
200 (rw == READ) ? "READ" : "WRITE",
201 (unsigned long long) iter.bi_sector, len);
202 break;
203 }
204 }
205 if (do_acct)
206 nd_iostat_end(bio, start);
207
208 out:
209 bio_endio(bio, err);
210}
211
212static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
213 resource_size_t offset, void *iobuf, size_t n, int rw)
214{
215 struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
216 struct nd_namespace_blk *nsblk = blk_dev->nsblk;
217 struct nd_blk_region *ndbr = blk_dev->ndbr;
218 resource_size_t dev_offset;
219
220 dev_offset = to_dev_offset(nsblk, offset, n);
221
222 if (unlikely(offset + n > blk_dev->disk_size)) {
223 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
224 return -EFAULT;
225 }
226
227 if (dev_offset == SIZE_MAX)
228 return -EIO;
229
230 return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
231}
232
233static const struct block_device_operations nd_blk_fops = {
234 .owner = THIS_MODULE,
235 .revalidate_disk = nvdimm_revalidate_disk,
236};
237
238static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
239 struct nd_blk_device *blk_dev)
240{
241 resource_size_t available_disk_size;
242 struct gendisk *disk;
243 u64 internal_nlba;
244
245 internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize);
246 available_disk_size = internal_nlba * blk_dev->sector_size;
247
248 blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
249 if (!blk_dev->queue)
250 return -ENOMEM;
251
252 blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
253 blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
254 blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
255 blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size);
256 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);
257
258 disk = blk_dev->disk = alloc_disk(0);
259 if (!disk) {
260 blk_cleanup_queue(blk_dev->queue);
261 return -ENOMEM;
262 }
263
264 disk->driverfs_dev = &ndns->dev;
265 disk->major = nd_blk_major;
266 disk->first_minor = 0;
267 disk->fops = &nd_blk_fops;
268 disk->private_data = blk_dev;
269 disk->queue = blk_dev->queue;
270 disk->flags = GENHD_FL_EXT_DEVT;
271 nvdimm_namespace_disk_name(ndns, disk->disk_name);
272 set_capacity(disk, 0);
273 add_disk(disk);
274
275 if (nd_blk_meta_size(blk_dev)) {
276 int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev));
277
278 if (rc) {
279 del_gendisk(disk);
280 put_disk(disk);
281 blk_cleanup_queue(blk_dev->queue);
282 return rc;
283 }
284 }
285
286 set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
287 revalidate_disk(disk);
288 return 0;
289}
290
291static int nd_blk_probe(struct device *dev)
292{
293 struct nd_namespace_common *ndns;
294 struct nd_namespace_blk *nsblk;
295 struct nd_blk_device *blk_dev;
296 int rc;
297
298 ndns = nvdimm_namespace_common_probe(dev);
299 if (IS_ERR(ndns))
300 return PTR_ERR(ndns);
301
302 blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
303 if (!blk_dev)
304 return -ENOMEM;
305
306 nsblk = to_nd_namespace_blk(&ndns->dev);
307 blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
308 blk_dev->ndbr = to_nd_blk_region(dev->parent);
309 blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
310 blk_dev->internal_lbasize = roundup(nsblk->lbasize,
311 INT_LBASIZE_ALIGNMENT);
312 blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
313 dev_set_drvdata(dev, blk_dev);
314
315 ndns->rw_bytes = nd_blk_rw_bytes;
316 if (is_nd_btt(dev))
317 rc = nvdimm_namespace_attach_btt(ndns);
318 else if (nd_btt_probe(ndns, blk_dev) == 0) {
319 /* we'll come back as btt-blk */
320 rc = -ENXIO;
321 } else
322 rc = nd_blk_attach_disk(ndns, blk_dev);
323 if (rc)
324 kfree(blk_dev);
325 return rc;
326}
327
328static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
329{
330 del_gendisk(blk_dev->disk);
331 put_disk(blk_dev->disk);
332 blk_cleanup_queue(blk_dev->queue);
333}
334
335static int nd_blk_remove(struct device *dev)
336{
337 struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
338
339 if (is_nd_btt(dev))
340 nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
341 else
342 nd_blk_detach_disk(blk_dev);
343 kfree(blk_dev);
344
345 return 0;
346}
347
348static struct nd_device_driver nd_blk_driver = {
349 .probe = nd_blk_probe,
350 .remove = nd_blk_remove,
351 .drv = {
352 .name = "nd_blk",
353 },
354 .type = ND_DRIVER_NAMESPACE_BLK,
355};
356
357static int __init nd_blk_init(void)
358{
359 int rc;
360
361 rc = register_blkdev(0, "nd_blk");
362 if (rc < 0)
363 return rc;
364
365 nd_blk_major = rc;
366 rc = nd_driver_register(&nd_blk_driver);
367
368 if (rc < 0)
369 unregister_blkdev(nd_blk_major, "nd_blk");
370
371 return rc;
372}
373
374static void __exit nd_blk_exit(void)
375{
376 driver_unregister(&nd_blk_driver.drv);
377 unregister_blkdev(nd_blk_major, "nd_blk");
378}
379
380MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
381MODULE_LICENSE("GPL v2");
382MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
383module_init(nd_blk_init);
384module_exit(nd_blk_exit);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
new file mode 100644
index 000000000000..411c7b2bb37a
--- /dev/null
+++ b/drivers/nvdimm/btt.c
@@ -0,0 +1,1479 @@
1/*
2 * Block Translation Table
3 * Copyright (c) 2014-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14#include <linux/highmem.h>
15#include <linux/debugfs.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/device.h>
19#include <linux/mutex.h>
20#include <linux/hdreg.h>
21#include <linux/genhd.h>
22#include <linux/sizes.h>
23#include <linux/ndctl.h>
24#include <linux/fs.h>
25#include <linux/nd.h>
26#include "btt.h"
27#include "nd.h"
28
29enum log_ent_request {
30 LOG_NEW_ENT = 0,
31 LOG_OLD_ENT
32};
33
34static int btt_major;
35
36static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
37 void *buf, size_t n)
38{
39 struct nd_btt *nd_btt = arena->nd_btt;
40 struct nd_namespace_common *ndns = nd_btt->ndns;
41
42 /* arena offsets are 4K from the base of the device */
43 offset += SZ_4K;
44 return nvdimm_read_bytes(ndns, offset, buf, n);
45}
46
47static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
48 void *buf, size_t n)
49{
50 struct nd_btt *nd_btt = arena->nd_btt;
51 struct nd_namespace_common *ndns = nd_btt->ndns;
52
53 /* arena offsets are 4K from the base of the device */
54 offset += SZ_4K;
55 return nvdimm_write_bytes(ndns, offset, buf, n);
56}
57
58static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
59{
60 int ret;
61
62 ret = arena_write_bytes(arena, arena->info2off, super,
63 sizeof(struct btt_sb));
64 if (ret)
65 return ret;
66
67 return arena_write_bytes(arena, arena->infooff, super,
68 sizeof(struct btt_sb));
69}
70
71static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
72{
73 WARN_ON(!super);
74 return arena_read_bytes(arena, arena->infooff, super,
75 sizeof(struct btt_sb));
76}
77
78/*
79 * 'raw' version of btt_map write
80 * Assumptions:
81 * mapping is in little-endian
82 * mapping contains 'E' and 'Z' flags as desired
83 */
84static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
85{
86 u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
87
88 WARN_ON(lba >= arena->external_nlba);
89 return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
90}
91
92static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
93 u32 z_flag, u32 e_flag)
94{
95 u32 ze;
96 __le32 mapping_le;
97
98 /*
99 * This 'mapping' is supposed to be just the LBA mapping, without
100 * any flags set, so strip the flag bits.
101 */
102 mapping &= MAP_LBA_MASK;
103
104 ze = (z_flag << 1) + e_flag;
105 switch (ze) {
106 case 0:
107 /*
108 * We want to set neither of the Z or E flags, and
109 * in the actual layout, this means setting the bit
110 * positions of both to '1' to indicate a 'normal'
111 * map entry
112 */
113 mapping |= MAP_ENT_NORMAL;
114 break;
115 case 1:
116 mapping |= (1 << MAP_ERR_SHIFT);
117 break;
118 case 2:
119 mapping |= (1 << MAP_TRIM_SHIFT);
120 break;
121 default:
122 /*
123 * The case where Z and E are both sent in as '1' could be
124 * construed as a valid 'normal' case, but we decide not to,
125 * to avoid confusion
126 */
127 WARN_ONCE(1, "Invalid use of Z and E flags\n");
128 return -EIO;
129 }
130
131 mapping_le = cpu_to_le32(mapping);
132 return __btt_map_write(arena, lba, mapping_le);
133}
134
135static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
136 int *trim, int *error)
137{
138 int ret;
139 __le32 in;
140 u32 raw_mapping, postmap, ze, z_flag, e_flag;
141 u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
142
143 WARN_ON(lba >= arena->external_nlba);
144
145 ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
146 if (ret)
147 return ret;
148
149 raw_mapping = le32_to_cpu(in);
150
151 z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
152 e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
153 ze = (z_flag << 1) + e_flag;
154 postmap = raw_mapping & MAP_LBA_MASK;
155
156 /* Reuse the {z,e}_flag variables for *trim and *error */
157 z_flag = 0;
158 e_flag = 0;
159
160 switch (ze) {
161 case 0:
162 /* Initial state. Return postmap = premap */
163 *mapping = lba;
164 break;
165 case 1:
166 *mapping = postmap;
167 e_flag = 1;
168 break;
169 case 2:
170 *mapping = postmap;
171 z_flag = 1;
172 break;
173 case 3:
174 *mapping = postmap;
175 break;
176 default:
177 return -EIO;
178 }
179
180 if (trim)
181 *trim = z_flag;
182 if (error)
183 *error = e_flag;
184
185 return ret;
186}
187
188static int btt_log_read_pair(struct arena_info *arena, u32 lane,
189 struct log_entry *ent)
190{
191 WARN_ON(!ent);
192 return arena_read_bytes(arena,
193 arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
194 2 * LOG_ENT_SIZE);
195}
196
197static struct dentry *debugfs_root;
198
199static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
200 int idx)
201{
202 char dirname[32];
203 struct dentry *d;
204
205 /* If for some reason, parent bttN was not created, exit */
206 if (!parent)
207 return;
208
209 snprintf(dirname, 32, "arena%d", idx);
210 d = debugfs_create_dir(dirname, parent);
211 if (IS_ERR_OR_NULL(d))
212 return;
213 a->debugfs_dir = d;
214
215 debugfs_create_x64("size", S_IRUGO, d, &a->size);
216 debugfs_create_x64("external_lba_start", S_IRUGO, d,
217 &a->external_lba_start);
218 debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba);
219 debugfs_create_u32("internal_lbasize", S_IRUGO, d,
220 &a->internal_lbasize);
221 debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba);
222 debugfs_create_u32("external_lbasize", S_IRUGO, d,
223 &a->external_lbasize);
224 debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree);
225 debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major);
226 debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor);
227 debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff);
228 debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff);
229 debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff);
230 debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff);
231 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
232 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
233 debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
234}
235
236static void btt_debugfs_init(struct btt *btt)
237{
238 int i = 0;
239 struct arena_info *arena;
240
241 btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev),
242 debugfs_root);
243 if (IS_ERR_OR_NULL(btt->debugfs_dir))
244 return;
245
246 list_for_each_entry(arena, &btt->arena_list, list) {
247 arena_debugfs_init(arena, btt->debugfs_dir, i);
248 i++;
249 }
250}
251
252/*
253 * This function accepts two log entries, and uses the
254 * sequence number to find the 'older' entry.
255 * It also updates the sequence number in this old entry to
256 * make it the 'new' one if the mark_flag is set.
257 * Finally, it returns which of the entries was the older one.
258 *
259 * TODO The logic feels a bit kludge-y. make it better..
260 */
261static int btt_log_get_old(struct log_entry *ent)
262{
263 int old;
264
265 /*
266 * the first ever time this is seen, the entry goes into [0]
267 * the next time, the following logic works out to put this
268 * (next) entry into [1]
269 */
270 if (ent[0].seq == 0) {
271 ent[0].seq = cpu_to_le32(1);
272 return 0;
273 }
274
275 if (ent[0].seq == ent[1].seq)
276 return -EINVAL;
277 if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
278 return -EINVAL;
279
280 if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
281 if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
282 old = 0;
283 else
284 old = 1;
285 } else {
286 if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
287 old = 1;
288 else
289 old = 0;
290 }
291
292 return old;
293}
294
295static struct device *to_dev(struct arena_info *arena)
296{
297 return &arena->nd_btt->dev;
298}
299
300/*
301 * This function copies the desired (old/new) log entry into ent if
302 * it is not NULL. It returns the sub-slot number (0 or 1)
303 * where the desired log entry was found. Negative return values
304 * indicate errors.
305 */
306static int btt_log_read(struct arena_info *arena, u32 lane,
307 struct log_entry *ent, int old_flag)
308{
309 int ret;
310 int old_ent, ret_ent;
311 struct log_entry log[2];
312
313 ret = btt_log_read_pair(arena, lane, log);
314 if (ret)
315 return -EIO;
316
317 old_ent = btt_log_get_old(log);
318 if (old_ent < 0 || old_ent > 1) {
319 dev_info(to_dev(arena),
320 "log corruption (%d): lane %d seq [%d, %d]\n",
321 old_ent, lane, log[0].seq, log[1].seq);
322 /* TODO set error state? */
323 return -EIO;
324 }
325
326 ret_ent = (old_flag ? old_ent : (1 - old_ent));
327
328 if (ent != NULL)
329 memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
330
331 return ret_ent;
332}
333
334/*
335 * This function commits a log entry to media
336 * It does _not_ prepare the freelist entry for the next write
337 * btt_flog_write is the wrapper for updating the freelist elements
338 */
339static int __btt_log_write(struct arena_info *arena, u32 lane,
340 u32 sub, struct log_entry *ent)
341{
342 int ret;
343 /*
344 * Ignore the padding in log_entry for calculating log_half.
345 * The entry is 'committed' when we write the sequence number,
346 * and we want to ensure that that is the last thing written.
347 * We don't bother writing the padding as that would be extra
348 * media wear and write amplification
349 */
350 unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
351 u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
352 void *src = ent;
353
354 /* split the 16B write into atomic, durable halves */
355 ret = arena_write_bytes(arena, ns_off, src, log_half);
356 if (ret)
357 return ret;
358
359 ns_off += log_half;
360 src += log_half;
361 return arena_write_bytes(arena, ns_off, src, log_half);
362}
363
364static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
365 struct log_entry *ent)
366{
367 int ret;
368
369 ret = __btt_log_write(arena, lane, sub, ent);
370 if (ret)
371 return ret;
372
373 /* prepare the next free entry */
374 arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
375 if (++(arena->freelist[lane].seq) == 4)
376 arena->freelist[lane].seq = 1;
377 arena->freelist[lane].block = le32_to_cpu(ent->old_map);
378
379 return ret;
380}
381
382/*
383 * This function initializes the BTT map to the initial state, which is
384 * all-zeroes, and indicates an identity mapping
385 */
386static int btt_map_init(struct arena_info *arena)
387{
388 int ret = -EINVAL;
389 void *zerobuf;
390 size_t offset = 0;
391 size_t chunk_size = SZ_2M;
392 size_t mapsize = arena->logoff - arena->mapoff;
393
394 zerobuf = kzalloc(chunk_size, GFP_KERNEL);
395 if (!zerobuf)
396 return -ENOMEM;
397
398 while (mapsize) {
399 size_t size = min(mapsize, chunk_size);
400
401 ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
402 size);
403 if (ret)
404 goto free;
405
406 offset += size;
407 mapsize -= size;
408 cond_resched();
409 }
410
411 free:
412 kfree(zerobuf);
413 return ret;
414}
415
416/*
417 * This function initializes the BTT log with 'fake' entries pointing
418 * to the initial reserved set of blocks as being free
419 */
420static int btt_log_init(struct arena_info *arena)
421{
422 int ret;
423 u32 i;
424 struct log_entry log, zerolog;
425
426 memset(&zerolog, 0, sizeof(zerolog));
427
428 for (i = 0; i < arena->nfree; i++) {
429 log.lba = cpu_to_le32(i);
430 log.old_map = cpu_to_le32(arena->external_nlba + i);
431 log.new_map = cpu_to_le32(arena->external_nlba + i);
432 log.seq = cpu_to_le32(LOG_SEQ_INIT);
433 ret = __btt_log_write(arena, i, 0, &log);
434 if (ret)
435 return ret;
436 ret = __btt_log_write(arena, i, 1, &zerolog);
437 if (ret)
438 return ret;
439 }
440
441 return 0;
442}
443
444static int btt_freelist_init(struct arena_info *arena)
445{
446 int old, new, ret;
447 u32 i, map_entry;
448 struct log_entry log_new, log_old;
449
450 arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
451 GFP_KERNEL);
452 if (!arena->freelist)
453 return -ENOMEM;
454
455 for (i = 0; i < arena->nfree; i++) {
456 old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
457 if (old < 0)
458 return old;
459
460 new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
461 if (new < 0)
462 return new;
463
464 /* sub points to the next one to be overwritten */
465 arena->freelist[i].sub = 1 - new;
466 arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
467 arena->freelist[i].block = le32_to_cpu(log_new.old_map);
468
469 /* This implies a newly created or untouched flog entry */
470 if (log_new.old_map == log_new.new_map)
471 continue;
472
473 /* Check if map recovery is needed */
474 ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
475 NULL, NULL);
476 if (ret)
477 return ret;
478 if ((le32_to_cpu(log_new.new_map) != map_entry) &&
479 (le32_to_cpu(log_new.old_map) == map_entry)) {
480 /*
481 * Last transaction wrote the flog, but wasn't able
482 * to complete the map write. So fix up the map.
483 */
484 ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
485 le32_to_cpu(log_new.new_map), 0, 0);
486 if (ret)
487 return ret;
488 }
489
490 }
491
492 return 0;
493}
494
495static int btt_rtt_init(struct arena_info *arena)
496{
497 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
498 if (arena->rtt == NULL)
499 return -ENOMEM;
500
501 return 0;
502}
503
504static int btt_maplocks_init(struct arena_info *arena)
505{
506 u32 i;
507
508 arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock),
509 GFP_KERNEL);
510 if (!arena->map_locks)
511 return -ENOMEM;
512
513 for (i = 0; i < arena->nfree; i++)
514 spin_lock_init(&arena->map_locks[i].lock);
515
516 return 0;
517}
518
519static struct arena_info *alloc_arena(struct btt *btt, size_t size,
520 size_t start, size_t arena_off)
521{
522 struct arena_info *arena;
523 u64 logsize, mapsize, datasize;
524 u64 available = size;
525
526 arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL);
527 if (!arena)
528 return NULL;
529 arena->nd_btt = btt->nd_btt;
530
531 if (!size)
532 return arena;
533
534 arena->size = size;
535 arena->external_lba_start = start;
536 arena->external_lbasize = btt->lbasize;
537 arena->internal_lbasize = roundup(arena->external_lbasize,
538 INT_LBASIZE_ALIGNMENT);
539 arena->nfree = BTT_DEFAULT_NFREE;
540 arena->version_major = 1;
541 arena->version_minor = 1;
542
543 if (available % BTT_PG_SIZE)
544 available -= (available % BTT_PG_SIZE);
545
546 /* Two pages are reserved for the super block and its copy */
547 available -= 2 * BTT_PG_SIZE;
548
549 /* The log takes a fixed amount of space based on nfree */
550 logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
551 BTT_PG_SIZE);
552 available -= logsize;
553
554 /* Calculate optimal split between map and data area */
555 arena->internal_nlba = div_u64(available - BTT_PG_SIZE,
556 arena->internal_lbasize + MAP_ENT_SIZE);
557 arena->external_nlba = arena->internal_nlba - arena->nfree;
558
559 mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE);
560 datasize = available - mapsize;
561
562 /* 'Absolute' values, relative to start of storage space */
563 arena->infooff = arena_off;
564 arena->dataoff = arena->infooff + BTT_PG_SIZE;
565 arena->mapoff = arena->dataoff + datasize;
566 arena->logoff = arena->mapoff + mapsize;
567 arena->info2off = arena->logoff + logsize;
568 return arena;
569}
570
571static void free_arenas(struct btt *btt)
572{
573 struct arena_info *arena, *next;
574
575 list_for_each_entry_safe(arena, next, &btt->arena_list, list) {
576 list_del(&arena->list);
577 kfree(arena->rtt);
578 kfree(arena->map_locks);
579 kfree(arena->freelist);
580 debugfs_remove_recursive(arena->debugfs_dir);
581 kfree(arena);
582 }
583}
584
585/*
586 * This function checks if the metadata layout is valid and error free
587 */
588static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
589 u8 *uuid, u32 lbasize)
590{
591 u64 checksum;
592
593 if (memcmp(super->uuid, uuid, 16))
594 return 0;
595
596 checksum = le64_to_cpu(super->checksum);
597 super->checksum = 0;
598 if (checksum != nd_btt_sb_checksum(super))
599 return 0;
600 super->checksum = cpu_to_le64(checksum);
601
602 if (lbasize != le32_to_cpu(super->external_lbasize))
603 return 0;
604
605 /* TODO: figure out action for this */
606 if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
607 dev_info(to_dev(arena), "Found arena with an error flag\n");
608
609 return 1;
610}
611
612/*
613 * This function reads an existing valid btt superblock and
614 * populates the corresponding arena_info struct
615 */
616static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
617 u64 arena_off)
618{
619 arena->internal_nlba = le32_to_cpu(super->internal_nlba);
620 arena->internal_lbasize = le32_to_cpu(super->internal_lbasize);
621 arena->external_nlba = le32_to_cpu(super->external_nlba);
622 arena->external_lbasize = le32_to_cpu(super->external_lbasize);
623 arena->nfree = le32_to_cpu(super->nfree);
624 arena->version_major = le16_to_cpu(super->version_major);
625 arena->version_minor = le16_to_cpu(super->version_minor);
626
627 arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off +
628 le64_to_cpu(super->nextoff));
629 arena->infooff = arena_off;
630 arena->dataoff = arena_off + le64_to_cpu(super->dataoff);
631 arena->mapoff = arena_off + le64_to_cpu(super->mapoff);
632 arena->logoff = arena_off + le64_to_cpu(super->logoff);
633 arena->info2off = arena_off + le64_to_cpu(super->info2off);
634
635 arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) :
636 (arena->info2off - arena->infooff + BTT_PG_SIZE);
637
638 arena->flags = le32_to_cpu(super->flags);
639}
640
641static int discover_arenas(struct btt *btt)
642{
643 int ret = 0;
644 struct arena_info *arena;
645 struct btt_sb *super;
646 size_t remaining = btt->rawsize;
647 u64 cur_nlba = 0;
648 size_t cur_off = 0;
649 int num_arenas = 0;
650
651 super = kzalloc(sizeof(*super), GFP_KERNEL);
652 if (!super)
653 return -ENOMEM;
654
655 while (remaining) {
656 /* Alloc memory for arena */
657 arena = alloc_arena(btt, 0, 0, 0);
658 if (!arena) {
659 ret = -ENOMEM;
660 goto out_super;
661 }
662
663 arena->infooff = cur_off;
664 ret = btt_info_read(arena, super);
665 if (ret)
666 goto out;
667
668 if (!arena_is_valid(arena, super, btt->nd_btt->uuid,
669 btt->lbasize)) {
670 if (remaining == btt->rawsize) {
671 btt->init_state = INIT_NOTFOUND;
672 dev_info(to_dev(arena), "No existing arenas\n");
673 goto out;
674 } else {
675 dev_info(to_dev(arena),
676 "Found corrupted metadata!\n");
677 ret = -ENODEV;
678 goto out;
679 }
680 }
681
682 arena->external_lba_start = cur_nlba;
683 parse_arena_meta(arena, super, cur_off);
684
685 ret = btt_freelist_init(arena);
686 if (ret)
687 goto out;
688
689 ret = btt_rtt_init(arena);
690 if (ret)
691 goto out;
692
693 ret = btt_maplocks_init(arena);
694 if (ret)
695 goto out;
696
697 list_add_tail(&arena->list, &btt->arena_list);
698
699 remaining -= arena->size;
700 cur_off += arena->size;
701 cur_nlba += arena->external_nlba;
702 num_arenas++;
703
704 if (arena->nextoff == 0)
705 break;
706 }
707 btt->num_arenas = num_arenas;
708 btt->nlba = cur_nlba;
709 btt->init_state = INIT_READY;
710
711 kfree(super);
712 return ret;
713
714 out:
715 kfree(arena);
716 free_arenas(btt);
717 out_super:
718 kfree(super);
719 return ret;
720}
721
722static int create_arenas(struct btt *btt)
723{
724 size_t remaining = btt->rawsize;
725 size_t cur_off = 0;
726
727 while (remaining) {
728 struct arena_info *arena;
729 size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining);
730
731 remaining -= arena_size;
732 if (arena_size < ARENA_MIN_SIZE)
733 break;
734
735 arena = alloc_arena(btt, arena_size, btt->nlba, cur_off);
736 if (!arena) {
737 free_arenas(btt);
738 return -ENOMEM;
739 }
740 btt->nlba += arena->external_nlba;
741 if (remaining >= ARENA_MIN_SIZE)
742 arena->nextoff = arena->size;
743 else
744 arena->nextoff = 0;
745 cur_off += arena_size;
746 list_add_tail(&arena->list, &btt->arena_list);
747 }
748
749 return 0;
750}
751
752/*
753 * This function completes arena initialization by writing
754 * all the metadata.
755 * It is only called for an uninitialized arena when a write
756 * to that arena occurs for the first time.
757 */
758static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
759{
760 int ret;
761 struct btt_sb *super;
762
763 ret = btt_map_init(arena);
764 if (ret)
765 return ret;
766
767 ret = btt_log_init(arena);
768 if (ret)
769 return ret;
770
771 super = kzalloc(sizeof(struct btt_sb), GFP_NOIO);
772 if (!super)
773 return -ENOMEM;
774
775 strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
776 memcpy(super->uuid, uuid, 16);
777 super->flags = cpu_to_le32(arena->flags);
778 super->version_major = cpu_to_le16(arena->version_major);
779 super->version_minor = cpu_to_le16(arena->version_minor);
780 super->external_lbasize = cpu_to_le32(arena->external_lbasize);
781 super->external_nlba = cpu_to_le32(arena->external_nlba);
782 super->internal_lbasize = cpu_to_le32(arena->internal_lbasize);
783 super->internal_nlba = cpu_to_le32(arena->internal_nlba);
784 super->nfree = cpu_to_le32(arena->nfree);
785 super->infosize = cpu_to_le32(sizeof(struct btt_sb));
786 super->nextoff = cpu_to_le64(arena->nextoff);
787 /*
788 * Subtract arena->infooff (arena start) so numbers are relative
789 * to 'this' arena
790 */
791 super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff);
792 super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff);
793 super->logoff = cpu_to_le64(arena->logoff - arena->infooff);
794 super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
795
796 super->flags = 0;
797 super->checksum = cpu_to_le64(nd_btt_sb_checksum(super));
798
799 ret = btt_info_write(arena, super);
800
801 kfree(super);
802 return ret;
803}
804
805/*
806 * This function completes the initialization for the BTT namespace
807 * such that it is ready to accept IOs
808 */
809static int btt_meta_init(struct btt *btt)
810{
811 int ret = 0;
812 struct arena_info *arena;
813
814 mutex_lock(&btt->init_lock);
815 list_for_each_entry(arena, &btt->arena_list, list) {
816 ret = btt_arena_write_layout(arena, btt->nd_btt->uuid);
817 if (ret)
818 goto unlock;
819
820 ret = btt_freelist_init(arena);
821 if (ret)
822 goto unlock;
823
824 ret = btt_rtt_init(arena);
825 if (ret)
826 goto unlock;
827
828 ret = btt_maplocks_init(arena);
829 if (ret)
830 goto unlock;
831 }
832
833 btt->init_state = INIT_READY;
834
835 unlock:
836 mutex_unlock(&btt->init_lock);
837 return ret;
838}
839
840static u32 btt_meta_size(struct btt *btt)
841{
842 return btt->lbasize - btt->sector_size;
843}
844
845/*
846 * This function calculates the arena in which the given LBA lies
847 * by doing a linear walk. This is acceptable since we expect only
848 * a few arenas. If we have backing devices that get much larger,
849 * we can construct a balanced binary tree of arenas at init time
850 * so that this range search becomes faster.
851 */
852static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap,
853 struct arena_info **arena)
854{
855 struct arena_info *arena_list;
856 __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size);
857
858 list_for_each_entry(arena_list, &btt->arena_list, list) {
859 if (lba < arena_list->external_nlba) {
860 *arena = arena_list;
861 *premap = lba;
862 return 0;
863 }
864 lba -= arena_list->external_nlba;
865 }
866
867 return -EIO;
868}
869
870/*
871 * The following (lock_map, unlock_map) are mostly just to improve
872 * readability, since they index into an array of locks
873 */
874static void lock_map(struct arena_info *arena, u32 premap)
875 __acquires(&arena->map_locks[idx].lock)
876{
877 u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
878
879 spin_lock(&arena->map_locks[idx].lock);
880}
881
882static void unlock_map(struct arena_info *arena, u32 premap)
883 __releases(&arena->map_locks[idx].lock)
884{
885 u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
886
887 spin_unlock(&arena->map_locks[idx].lock);
888}
889
890static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
891{
892 return arena->dataoff + ((u64)lba * arena->internal_lbasize);
893}
894
895static int btt_data_read(struct arena_info *arena, struct page *page,
896 unsigned int off, u32 lba, u32 len)
897{
898 int ret;
899 u64 nsoff = to_namespace_offset(arena, lba);
900 void *mem = kmap_atomic(page);
901
902 ret = arena_read_bytes(arena, nsoff, mem + off, len);
903 kunmap_atomic(mem);
904
905 return ret;
906}
907
908static int btt_data_write(struct arena_info *arena, u32 lba,
909 struct page *page, unsigned int off, u32 len)
910{
911 int ret;
912 u64 nsoff = to_namespace_offset(arena, lba);
913 void *mem = kmap_atomic(page);
914
915 ret = arena_write_bytes(arena, nsoff, mem + off, len);
916 kunmap_atomic(mem);
917
918 return ret;
919}
920
921static void zero_fill_data(struct page *page, unsigned int off, u32 len)
922{
923 void *mem = kmap_atomic(page);
924
925 memset(mem + off, 0, len);
926 kunmap_atomic(mem);
927}
928
929#ifdef CONFIG_BLK_DEV_INTEGRITY
930static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
931 struct arena_info *arena, u32 postmap, int rw)
932{
933 unsigned int len = btt_meta_size(btt);
934 u64 meta_nsoff;
935 int ret = 0;
936
937 if (bip == NULL)
938 return 0;
939
940 meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size;
941
942 while (len) {
943 unsigned int cur_len;
944 struct bio_vec bv;
945 void *mem;
946
947 bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
948 /*
949 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
950 * .bv_offset already adjusted for iter->bi_bvec_done, and we
951 * can use those directly
952 */
953
954 cur_len = min(len, bv.bv_len);
955 mem = kmap_atomic(bv.bv_page);
956 if (rw)
957 ret = arena_write_bytes(arena, meta_nsoff,
958 mem + bv.bv_offset, cur_len);
959 else
960 ret = arena_read_bytes(arena, meta_nsoff,
961 mem + bv.bv_offset, cur_len);
962
963 kunmap_atomic(mem);
964 if (ret)
965 return ret;
966
967 len -= cur_len;
968 meta_nsoff += cur_len;
969 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
970 }
971
972 return ret;
973}
974
975#else /* CONFIG_BLK_DEV_INTEGRITY */
976static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
977 struct arena_info *arena, u32 postmap, int rw)
978{
979 return 0;
980}
981#endif
982
983static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
984 struct page *page, unsigned int off, sector_t sector,
985 unsigned int len)
986{
987 int ret = 0;
988 int t_flag, e_flag;
989 struct arena_info *arena = NULL;
990 u32 lane = 0, premap, postmap;
991
992 while (len) {
993 u32 cur_len;
994
995 lane = nd_region_acquire_lane(btt->nd_region);
996
997 ret = lba_to_arena(btt, sector, &premap, &arena);
998 if (ret)
999 goto out_lane;
1000
1001 cur_len = min(btt->sector_size, len);
1002
1003 ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
1004 if (ret)
1005 goto out_lane;
1006
1007 /*
1008 * We loop to make sure that the post map LBA didn't change
1009 * from under us between writing the RTT and doing the actual
1010 * read.
1011 */
1012 while (1) {
1013 u32 new_map;
1014
1015 if (t_flag) {
1016 zero_fill_data(page, off, cur_len);
1017 goto out_lane;
1018 }
1019
1020 if (e_flag) {
1021 ret = -EIO;
1022 goto out_lane;
1023 }
1024
1025 arena->rtt[lane] = RTT_VALID | postmap;
1026 /*
1027 * Barrier to make sure this write is not reordered
1028 * to do the verification map_read before the RTT store
1029 */
1030 barrier();
1031
1032 ret = btt_map_read(arena, premap, &new_map, &t_flag,
1033 &e_flag);
1034 if (ret)
1035 goto out_rtt;
1036
1037 if (postmap == new_map)
1038 break;
1039
1040 postmap = new_map;
1041 }
1042
1043 ret = btt_data_read(arena, page, off, postmap, cur_len);
1044 if (ret)
1045 goto out_rtt;
1046
1047 if (bip) {
1048 ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
1049 if (ret)
1050 goto out_rtt;
1051 }
1052
1053 arena->rtt[lane] = RTT_INVALID;
1054 nd_region_release_lane(btt->nd_region, lane);
1055
1056 len -= cur_len;
1057 off += cur_len;
1058 sector += btt->sector_size >> SECTOR_SHIFT;
1059 }
1060
1061 return 0;
1062
1063 out_rtt:
1064 arena->rtt[lane] = RTT_INVALID;
1065 out_lane:
1066 nd_region_release_lane(btt->nd_region, lane);
1067 return ret;
1068}
1069
1070static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1071 sector_t sector, struct page *page, unsigned int off,
1072 unsigned int len)
1073{
1074 int ret = 0;
1075 struct arena_info *arena = NULL;
1076 u32 premap = 0, old_postmap, new_postmap, lane = 0, i;
1077 struct log_entry log;
1078 int sub;
1079
1080 while (len) {
1081 u32 cur_len;
1082
1083 lane = nd_region_acquire_lane(btt->nd_region);
1084
1085 ret = lba_to_arena(btt, sector, &premap, &arena);
1086 if (ret)
1087 goto out_lane;
1088 cur_len = min(btt->sector_size, len);
1089
1090 if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) {
1091 ret = -EIO;
1092 goto out_lane;
1093 }
1094
1095 new_postmap = arena->freelist[lane].block;
1096
1097 /* Wait if the new block is being read from */
1098 for (i = 0; i < arena->nfree; i++)
1099 while (arena->rtt[i] == (RTT_VALID | new_postmap))
1100 cpu_relax();
1101
1102
1103 if (new_postmap >= arena->internal_nlba) {
1104 ret = -EIO;
1105 goto out_lane;
1106 }
1107
1108 ret = btt_data_write(arena, new_postmap, page, off, cur_len);
1109 if (ret)
1110 goto out_lane;
1111
1112 if (bip) {
1113 ret = btt_rw_integrity(btt, bip, arena, new_postmap,
1114 WRITE);
1115 if (ret)
1116 goto out_lane;
1117 }
1118
1119 lock_map(arena, premap);
1120 ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
1121 if (ret)
1122 goto out_map;
1123 if (old_postmap >= arena->internal_nlba) {
1124 ret = -EIO;
1125 goto out_map;
1126 }
1127
1128 log.lba = cpu_to_le32(premap);
1129 log.old_map = cpu_to_le32(old_postmap);
1130 log.new_map = cpu_to_le32(new_postmap);
1131 log.seq = cpu_to_le32(arena->freelist[lane].seq);
1132 sub = arena->freelist[lane].sub;
1133 ret = btt_flog_write(arena, lane, sub, &log);
1134 if (ret)
1135 goto out_map;
1136
1137 ret = btt_map_write(arena, premap, new_postmap, 0, 0);
1138 if (ret)
1139 goto out_map;
1140
1141 unlock_map(arena, premap);
1142 nd_region_release_lane(btt->nd_region, lane);
1143
1144 len -= cur_len;
1145 off += cur_len;
1146 sector += btt->sector_size >> SECTOR_SHIFT;
1147 }
1148
1149 return 0;
1150
1151 out_map:
1152 unlock_map(arena, premap);
1153 out_lane:
1154 nd_region_release_lane(btt->nd_region, lane);
1155 return ret;
1156}
1157
1158static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
1159 struct page *page, unsigned int len, unsigned int off,
1160 int rw, sector_t sector)
1161{
1162 int ret;
1163
1164 if (rw == READ) {
1165 ret = btt_read_pg(btt, bip, page, off, sector, len);
1166 flush_dcache_page(page);
1167 } else {
1168 flush_dcache_page(page);
1169 ret = btt_write_pg(btt, bip, sector, page, off, len);
1170 }
1171
1172 return ret;
1173}
1174
1175static void btt_make_request(struct request_queue *q, struct bio *bio)
1176{
1177 struct bio_integrity_payload *bip = bio_integrity(bio);
1178 struct btt *btt = q->queuedata;
1179 struct bvec_iter iter;
1180 unsigned long start;
1181 struct bio_vec bvec;
1182 int err = 0, rw;
1183 bool do_acct;
1184
1185 /*
1186 * bio_integrity_enabled also checks if the bio already has an
1187 * integrity payload attached. If it does, we *don't* do a
1188 * bio_integrity_prep here - the payload has been generated by
1189 * another kernel subsystem, and we just pass it through.
1190 */
1191 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1192 err = -EIO;
1193 goto out;
1194 }
1195
1196 do_acct = nd_iostat_start(bio, &start);
1197 rw = bio_data_dir(bio);
1198 bio_for_each_segment(bvec, bio, iter) {
1199 unsigned int len = bvec.bv_len;
1200
1201 BUG_ON(len > PAGE_SIZE);
1202 /* Make sure len is in multiples of sector size. */
1203 /* XXX is this right? */
1204 BUG_ON(len < btt->sector_size);
1205 BUG_ON(len % btt->sector_size);
1206
1207 err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
1208 rw, iter.bi_sector);
1209 if (err) {
1210 dev_info(&btt->nd_btt->dev,
1211 "io error in %s sector %lld, len %d,\n",
1212 (rw == READ) ? "READ" : "WRITE",
1213 (unsigned long long) iter.bi_sector, len);
1214 break;
1215 }
1216 }
1217 if (do_acct)
1218 nd_iostat_end(bio, start);
1219
1220out:
1221 bio_endio(bio, err);
1222}
1223
1224static int btt_rw_page(struct block_device *bdev, sector_t sector,
1225 struct page *page, int rw)
1226{
1227 struct btt *btt = bdev->bd_disk->private_data;
1228
1229 btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector);
1230 page_endio(page, rw & WRITE, 0);
1231 return 0;
1232}
1233
1234
1235static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
1236{
1237 /* some standard values */
1238 geo->heads = 1 << 6;
1239 geo->sectors = 1 << 5;
1240 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
1241 return 0;
1242}
1243
1244static const struct block_device_operations btt_fops = {
1245 .owner = THIS_MODULE,
1246 .rw_page = btt_rw_page,
1247 .getgeo = btt_getgeo,
1248 .revalidate_disk = nvdimm_revalidate_disk,
1249};
1250
1251static int btt_blk_init(struct btt *btt)
1252{
1253 struct nd_btt *nd_btt = btt->nd_btt;
1254 struct nd_namespace_common *ndns = nd_btt->ndns;
1255
1256 /* create a new disk and request queue for btt */
1257 btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
1258 if (!btt->btt_queue)
1259 return -ENOMEM;
1260
1261 btt->btt_disk = alloc_disk(0);
1262 if (!btt->btt_disk) {
1263 blk_cleanup_queue(btt->btt_queue);
1264 return -ENOMEM;
1265 }
1266
1267 nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
1268 btt->btt_disk->driverfs_dev = &btt->nd_btt->dev;
1269 btt->btt_disk->major = btt_major;
1270 btt->btt_disk->first_minor = 0;
1271 btt->btt_disk->fops = &btt_fops;
1272 btt->btt_disk->private_data = btt;
1273 btt->btt_disk->queue = btt->btt_queue;
1274 btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
1275
1276 blk_queue_make_request(btt->btt_queue, btt_make_request);
1277 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
1278 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
1279 blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
1280 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
1281 btt->btt_queue->queuedata = btt;
1282
1283 set_capacity(btt->btt_disk, 0);
1284 add_disk(btt->btt_disk);
1285 if (btt_meta_size(btt)) {
1286 int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
1287
1288 if (rc) {
1289 del_gendisk(btt->btt_disk);
1290 put_disk(btt->btt_disk);
1291 blk_cleanup_queue(btt->btt_queue);
1292 return rc;
1293 }
1294 }
1295 set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
1296 revalidate_disk(btt->btt_disk);
1297
1298 return 0;
1299}
1300
1301static void btt_blk_cleanup(struct btt *btt)
1302{
1303 blk_integrity_unregister(btt->btt_disk);
1304 del_gendisk(btt->btt_disk);
1305 put_disk(btt->btt_disk);
1306 blk_cleanup_queue(btt->btt_queue);
1307}
1308
1309/**
1310 * btt_init - initialize a block translation table for the given device
1311 * @nd_btt: device with BTT geometry and backing device info
1312 * @rawsize: raw size in bytes of the backing device
1313 * @lbasize: lba size of the backing device
1314 * @uuid: A uuid for the backing device - this is stored on media
1315 * @maxlane: maximum number of parallel requests the device can handle
1316 *
1317 * Initialize a Block Translation Table on a backing device to provide
1318 * single sector power fail atomicity.
1319 *
1320 * Context:
1321 * Might sleep.
1322 *
1323 * Returns:
1324 * Pointer to a new struct btt on success, NULL on failure.
1325 */
1326static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1327 u32 lbasize, u8 *uuid, struct nd_region *nd_region)
1328{
1329 int ret;
1330 struct btt *btt;
1331 struct device *dev = &nd_btt->dev;
1332
1333 btt = kzalloc(sizeof(struct btt), GFP_KERNEL);
1334 if (!btt)
1335 return NULL;
1336
1337 btt->nd_btt = nd_btt;
1338 btt->rawsize = rawsize;
1339 btt->lbasize = lbasize;
1340 btt->sector_size = ((lbasize >= 4096) ? 4096 : 512);
1341 INIT_LIST_HEAD(&btt->arena_list);
1342 mutex_init(&btt->init_lock);
1343 btt->nd_region = nd_region;
1344
1345 ret = discover_arenas(btt);
1346 if (ret) {
1347 dev_err(dev, "init: error in arena_discover: %d\n", ret);
1348 goto out_free;
1349 }
1350
1351 if (btt->init_state != INIT_READY && nd_region->ro) {
1352 dev_info(dev, "%s is read-only, unable to init btt metadata\n",
1353 dev_name(&nd_region->dev));
1354 goto out_free;
1355 } else if (btt->init_state != INIT_READY) {
1356 btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
1357 ((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
1358 dev_dbg(dev, "init: %d arenas for %llu rawsize\n",
1359 btt->num_arenas, rawsize);
1360
1361 ret = create_arenas(btt);
1362 if (ret) {
1363 dev_info(dev, "init: create_arenas: %d\n", ret);
1364 goto out_free;
1365 }
1366
1367 ret = btt_meta_init(btt);
1368 if (ret) {
1369 dev_err(dev, "init: error in meta_init: %d\n", ret);
1370 goto out_free;
1371 }
1372 }
1373
1374 ret = btt_blk_init(btt);
1375 if (ret) {
1376 dev_err(dev, "init: error in blk_init: %d\n", ret);
1377 goto out_free;
1378 }
1379
1380 btt_debugfs_init(btt);
1381
1382 return btt;
1383
1384 out_free:
1385 kfree(btt);
1386 return NULL;
1387}
1388
1389/**
1390 * btt_fini - de-initialize a BTT
1391 * @btt: the BTT handle that was generated by btt_init
1392 *
1393 * De-initialize a Block Translation Table on device removal
1394 *
1395 * Context:
1396 * Might sleep.
1397 */
1398static void btt_fini(struct btt *btt)
1399{
1400 if (btt) {
1401 btt_blk_cleanup(btt);
1402 free_arenas(btt);
1403 debugfs_remove_recursive(btt->debugfs_dir);
1404 kfree(btt);
1405 }
1406}
1407
1408int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
1409{
1410 struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
1411 struct nd_region *nd_region;
1412 struct btt *btt;
1413 size_t rawsize;
1414
1415 if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize)
1416 return -ENODEV;
1417
1418 rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
1419 if (rawsize < ARENA_MIN_SIZE) {
1420 return -ENXIO;
1421 }
1422 nd_region = to_nd_region(nd_btt->dev.parent);
1423 btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
1424 nd_region);
1425 if (!btt)
1426 return -ENOMEM;
1427 nd_btt->btt = btt;
1428
1429 return 0;
1430}
1431EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
1432
1433int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns)
1434{
1435 struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
1436 struct btt *btt = nd_btt->btt;
1437
1438 btt_fini(btt);
1439 nd_btt->btt = NULL;
1440
1441 return 0;
1442}
1443EXPORT_SYMBOL(nvdimm_namespace_detach_btt);
1444
1445static int __init nd_btt_init(void)
1446{
1447 int rc;
1448
1449 BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
1450
1451 btt_major = register_blkdev(0, "btt");
1452 if (btt_major < 0)
1453 return btt_major;
1454
1455 debugfs_root = debugfs_create_dir("btt", NULL);
1456 if (IS_ERR_OR_NULL(debugfs_root)) {
1457 rc = -ENXIO;
1458 goto err_debugfs;
1459 }
1460
1461 return 0;
1462
1463 err_debugfs:
1464 unregister_blkdev(btt_major, "btt");
1465
1466 return rc;
1467}
1468
1469static void __exit nd_btt_exit(void)
1470{
1471 debugfs_remove_recursive(debugfs_root);
1472 unregister_blkdev(btt_major, "btt");
1473}
1474
1475MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);
1476MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>");
1477MODULE_LICENSE("GPL v2");
1478module_init(nd_btt_init);
1479module_exit(nd_btt_exit);
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
new file mode 100644
index 000000000000..75b0d80a6bd9
--- /dev/null
+++ b/drivers/nvdimm/btt.h
@@ -0,0 +1,185 @@
1/*
2 * Block Translation Table library
3 * Copyright (c) 2014-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#ifndef _LINUX_BTT_H
16#define _LINUX_BTT_H
17
18#include <linux/types.h>
19
20#define BTT_SIG_LEN 16
21#define BTT_SIG "BTT_ARENA_INFO\0"
22#define MAP_ENT_SIZE 4
23#define MAP_TRIM_SHIFT 31
24#define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT)
25#define MAP_ERR_SHIFT 30
26#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
27#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
28#define MAP_ENT_NORMAL 0xC0000000
29#define LOG_ENT_SIZE sizeof(struct log_entry)
30#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
31#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
32#define RTT_VALID (1UL << 31)
33#define RTT_INVALID 0
34#define BTT_PG_SIZE 4096
35#define BTT_DEFAULT_NFREE ND_MAX_LANES
36#define LOG_SEQ_INIT 1
37
38#define IB_FLAG_ERROR 0x00000001
39#define IB_FLAG_ERROR_MASK 0x00000001
40
41enum btt_init_state {
42 INIT_UNCHECKED = 0,
43 INIT_NOTFOUND,
44 INIT_READY
45};
46
47struct log_entry {
48 __le32 lba;
49 __le32 old_map;
50 __le32 new_map;
51 __le32 seq;
52 __le64 padding[2];
53};
54
55struct btt_sb {
56 u8 signature[BTT_SIG_LEN];
57 u8 uuid[16];
58 u8 parent_uuid[16];
59 __le32 flags;
60 __le16 version_major;
61 __le16 version_minor;
62 __le32 external_lbasize;
63 __le32 external_nlba;
64 __le32 internal_lbasize;
65 __le32 internal_nlba;
66 __le32 nfree;
67 __le32 infosize;
68 __le64 nextoff;
69 __le64 dataoff;
70 __le64 mapoff;
71 __le64 logoff;
72 __le64 info2off;
73 u8 padding[3968];
74 __le64 checksum;
75};
76
77struct free_entry {
78 u32 block;
79 u8 sub;
80 u8 seq;
81};
82
83struct aligned_lock {
84 union {
85 spinlock_t lock;
86 u8 cacheline_padding[L1_CACHE_BYTES];
87 };
88};
89
90/**
91 * struct arena_info - handle for an arena
92 * @size: Size in bytes this arena occupies on the raw device.
93 * This includes arena metadata.
94 * @external_lba_start: The first external LBA in this arena.
95 * @internal_nlba: Number of internal blocks available in the arena
96 * including nfree reserved blocks
97 * @internal_lbasize: Internal and external lba sizes may be different as
98 * we can round up 'odd' external lbasizes such as 520B
99 * to be aligned.
100 * @external_nlba: Number of blocks contributed by the arena to the number
101 * reported to upper layers. (internal_nlba - nfree)
102 * @external_lbasize: LBA size as exposed to upper layers.
103 * @nfree: A reserve number of 'free' blocks that is used to
104 * handle incoming writes.
105 * @version_major: Metadata layout version major.
106 * @version_minor: Metadata layout version minor.
107 * @nextoff: Offset in bytes to the start of the next arena.
108 * @infooff: Offset in bytes to the info block of this arena.
109 * @dataoff: Offset in bytes to the data area of this arena.
110 * @mapoff: Offset in bytes to the map area of this arena.
111 * @logoff: Offset in bytes to the log area of this arena.
112 * @info2off: Offset in bytes to the backup info block of this arena.
113 * @freelist: Pointer to in-memory list of free blocks
114 * @rtt: Pointer to in-memory "Read Tracking Table"
115 * @map_locks: Spinlocks protecting concurrent map writes
116 * @nd_btt: Pointer to parent nd_btt structure.
117 * @list: List head for list of arenas
118 * @debugfs_dir: Debugfs dentry
119 * @flags: Arena flags - may signify error states.
120 *
121 * arena_info is a per-arena handle. Once an arena is narrowed down for an
122 * IO, this struct is passed around for the duration of the IO.
123 */
124struct arena_info {
125 u64 size; /* Total bytes for this arena */
126 u64 external_lba_start;
127 u32 internal_nlba;
128 u32 internal_lbasize;
129 u32 external_nlba;
130 u32 external_lbasize;
131 u32 nfree;
132 u16 version_major;
133 u16 version_minor;
134 /* Byte offsets to the different on-media structures */
135 u64 nextoff;
136 u64 infooff;
137 u64 dataoff;
138 u64 mapoff;
139 u64 logoff;
140 u64 info2off;
141 /* Pointers to other in-memory structures for this arena */
142 struct free_entry *freelist;
143 u32 *rtt;
144 struct aligned_lock *map_locks;
145 struct nd_btt *nd_btt;
146 struct list_head list;
147 struct dentry *debugfs_dir;
148 /* Arena flags */
149 u32 flags;
150};
151
152/**
153 * struct btt - handle for a BTT instance
154 * @btt_disk: Pointer to the gendisk for BTT device
155 * @btt_queue: Pointer to the request queue for the BTT device
156 * @arena_list: Head of the list of arenas
157 * @debugfs_dir: Debugfs dentry
158 * @nd_btt: Parent nd_btt struct
159 * @nlba: Number of logical blocks exposed to the upper layers
160 * after removing the amount of space needed by metadata
161 * @rawsize: Total size in bytes of the available backing device
162 * @lbasize: LBA size as requested and presented to upper layers.
163 * This is sector_size + size of any metadata.
164 * @sector_size: The Linux sector size - 512 or 4096
165 * @lanes: Per-lane spinlocks
166 * @init_lock: Mutex used for the BTT initialization
167 * @init_state: Flag describing the initialization state for the BTT
168 * @num_arenas: Number of arenas in the BTT instance
169 */
170struct btt {
171 struct gendisk *btt_disk;
172 struct request_queue *btt_queue;
173 struct list_head arena_list;
174 struct dentry *debugfs_dir;
175 struct nd_btt *nd_btt;
176 u64 nlba;
177 unsigned long long rawsize;
178 u32 lbasize;
179 u32 sector_size;
180 struct nd_region *nd_region;
181 struct mutex init_lock;
182 int init_state;
183 int num_arenas;
184};
185#endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
new file mode 100644
index 000000000000..6ac8c0fea3ec
--- /dev/null
+++ b/drivers/nvdimm/btt_devs.c
@@ -0,0 +1,425 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/blkdev.h>
14#include <linux/device.h>
15#include <linux/genhd.h>
16#include <linux/sizes.h>
17#include <linux/slab.h>
18#include <linux/fs.h>
19#include <linux/mm.h>
20#include "nd-core.h"
21#include "btt.h"
22#include "nd.h"
23
24static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
25{
26 struct nd_namespace_common *ndns = nd_btt->ndns;
27
28 dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
29 || ndns->claim != &nd_btt->dev,
30 "%s: invalid claim\n", __func__);
31 ndns->claim = NULL;
32 nd_btt->ndns = NULL;
33 put_device(&ndns->dev);
34}
35
36static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
37{
38 struct nd_namespace_common *ndns = nd_btt->ndns;
39
40 if (!ndns)
41 return;
42 get_device(&ndns->dev);
43 device_lock(&ndns->dev);
44 __nd_btt_detach_ndns(nd_btt);
45 device_unlock(&ndns->dev);
46 put_device(&ndns->dev);
47}
48
49static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
50 struct nd_namespace_common *ndns)
51{
52 if (ndns->claim)
53 return false;
54 dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
55 || nd_btt->ndns,
56 "%s: invalid claim\n", __func__);
57 ndns->claim = &nd_btt->dev;
58 nd_btt->ndns = ndns;
59 get_device(&ndns->dev);
60 return true;
61}
62
63static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
64 struct nd_namespace_common *ndns)
65{
66 bool claimed;
67
68 device_lock(&ndns->dev);
69 claimed = __nd_btt_attach_ndns(nd_btt, ndns);
70 device_unlock(&ndns->dev);
71 return claimed;
72}
73
74static void nd_btt_release(struct device *dev)
75{
76 struct nd_region *nd_region = to_nd_region(dev->parent);
77 struct nd_btt *nd_btt = to_nd_btt(dev);
78
79 dev_dbg(dev, "%s\n", __func__);
80 nd_btt_detach_ndns(nd_btt);
81 ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
82 kfree(nd_btt->uuid);
83 kfree(nd_btt);
84}
85
86static struct device_type nd_btt_device_type = {
87 .name = "nd_btt",
88 .release = nd_btt_release,
89};
90
91bool is_nd_btt(struct device *dev)
92{
93 return dev->type == &nd_btt_device_type;
94}
95EXPORT_SYMBOL(is_nd_btt);
96
97struct nd_btt *to_nd_btt(struct device *dev)
98{
99 struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
100
101 WARN_ON(!is_nd_btt(dev));
102 return nd_btt;
103}
104EXPORT_SYMBOL(to_nd_btt);
105
106static const unsigned long btt_lbasize_supported[] = { 512, 520, 528,
107 4096, 4104, 4160, 4224, 0 };
108
109static ssize_t sector_size_show(struct device *dev,
110 struct device_attribute *attr, char *buf)
111{
112 struct nd_btt *nd_btt = to_nd_btt(dev);
113
114 return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
115}
116
117static ssize_t sector_size_store(struct device *dev,
118 struct device_attribute *attr, const char *buf, size_t len)
119{
120 struct nd_btt *nd_btt = to_nd_btt(dev);
121 ssize_t rc;
122
123 device_lock(dev);
124 nvdimm_bus_lock(dev);
125 rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
126 btt_lbasize_supported);
127 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
128 rc, buf, buf[len - 1] == '\n' ? "" : "\n");
129 nvdimm_bus_unlock(dev);
130 device_unlock(dev);
131
132 return rc ? rc : len;
133}
134static DEVICE_ATTR_RW(sector_size);
135
136static ssize_t uuid_show(struct device *dev,
137 struct device_attribute *attr, char *buf)
138{
139 struct nd_btt *nd_btt = to_nd_btt(dev);
140
141 if (nd_btt->uuid)
142 return sprintf(buf, "%pUb\n", nd_btt->uuid);
143 return sprintf(buf, "\n");
144}
145
146static ssize_t uuid_store(struct device *dev,
147 struct device_attribute *attr, const char *buf, size_t len)
148{
149 struct nd_btt *nd_btt = to_nd_btt(dev);
150 ssize_t rc;
151
152 device_lock(dev);
153 rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
154 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
155 rc, buf, buf[len - 1] == '\n' ? "" : "\n");
156 device_unlock(dev);
157
158 return rc ? rc : len;
159}
160static DEVICE_ATTR_RW(uuid);
161
162static ssize_t namespace_show(struct device *dev,
163 struct device_attribute *attr, char *buf)
164{
165 struct nd_btt *nd_btt = to_nd_btt(dev);
166 ssize_t rc;
167
168 nvdimm_bus_lock(dev);
169 rc = sprintf(buf, "%s\n", nd_btt->ndns
170 ? dev_name(&nd_btt->ndns->dev) : "");
171 nvdimm_bus_unlock(dev);
172 return rc;
173}
174
175static int namespace_match(struct device *dev, void *data)
176{
177 char *name = data;
178
179 return strcmp(name, dev_name(dev)) == 0;
180}
181
182static bool is_nd_btt_idle(struct device *dev)
183{
184 struct nd_region *nd_region = to_nd_region(dev->parent);
185 struct nd_btt *nd_btt = to_nd_btt(dev);
186
187 if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
188 return false;
189 return true;
190}
191
192static ssize_t __namespace_store(struct device *dev,
193 struct device_attribute *attr, const char *buf, size_t len)
194{
195 struct nd_btt *nd_btt = to_nd_btt(dev);
196 struct nd_namespace_common *ndns;
197 struct device *found;
198 char *name;
199
200 if (dev->driver) {
201 dev_dbg(dev, "%s: -EBUSY\n", __func__);
202 return -EBUSY;
203 }
204
205 name = kstrndup(buf, len, GFP_KERNEL);
206 if (!name)
207 return -ENOMEM;
208 strim(name);
209
210 if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
211 /* pass */;
212 else {
213 len = -EINVAL;
214 goto out;
215 }
216
217 ndns = nd_btt->ndns;
218 if (strcmp(name, "") == 0) {
219 /* detach the namespace and destroy / reset the btt device */
220 nd_btt_detach_ndns(nd_btt);
221 if (is_nd_btt_idle(dev))
222 nd_device_unregister(dev, ND_ASYNC);
223 else {
224 nd_btt->lbasize = 0;
225 kfree(nd_btt->uuid);
226 nd_btt->uuid = NULL;
227 }
228 goto out;
229 } else if (ndns) {
230 dev_dbg(dev, "namespace already set to: %s\n",
231 dev_name(&ndns->dev));
232 len = -EBUSY;
233 goto out;
234 }
235
236 found = device_find_child(dev->parent, name, namespace_match);
237 if (!found) {
238 dev_dbg(dev, "'%s' not found under %s\n", name,
239 dev_name(dev->parent));
240 len = -ENODEV;
241 goto out;
242 }
243
244 ndns = to_ndns(found);
245 if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
246 dev_dbg(dev, "%s too small to host btt\n", name);
247 len = -ENXIO;
248 goto out_attach;
249 }
250
251 WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
252 if (!nd_btt_attach_ndns(nd_btt, ndns)) {
253 dev_dbg(dev, "%s already claimed\n",
254 dev_name(&ndns->dev));
255 len = -EBUSY;
256 }
257
258 out_attach:
259 put_device(&ndns->dev); /* from device_find_child */
260 out:
261 kfree(name);
262 return len;
263}
264
265static ssize_t namespace_store(struct device *dev,
266 struct device_attribute *attr, const char *buf, size_t len)
267{
268 ssize_t rc;
269
270 nvdimm_bus_lock(dev);
271 device_lock(dev);
272 rc = __namespace_store(dev, attr, buf, len);
273 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
274 rc, buf, buf[len - 1] == '\n' ? "" : "\n");
275 device_unlock(dev);
276 nvdimm_bus_unlock(dev);
277
278 return rc;
279}
280static DEVICE_ATTR_RW(namespace);
281
282static struct attribute *nd_btt_attributes[] = {
283 &dev_attr_sector_size.attr,
284 &dev_attr_namespace.attr,
285 &dev_attr_uuid.attr,
286 NULL,
287};
288
289static struct attribute_group nd_btt_attribute_group = {
290 .attrs = nd_btt_attributes,
291};
292
293static const struct attribute_group *nd_btt_attribute_groups[] = {
294 &nd_btt_attribute_group,
295 &nd_device_attribute_group,
296 &nd_numa_attribute_group,
297 NULL,
298};
299
300static struct device *__nd_btt_create(struct nd_region *nd_region,
301 unsigned long lbasize, u8 *uuid,
302 struct nd_namespace_common *ndns)
303{
304 struct nd_btt *nd_btt;
305 struct device *dev;
306
307 nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
308 if (!nd_btt)
309 return NULL;
310
311 nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
312 if (nd_btt->id < 0) {
313 kfree(nd_btt);
314 return NULL;
315 }
316
317 nd_btt->lbasize = lbasize;
318 if (uuid)
319 uuid = kmemdup(uuid, 16, GFP_KERNEL);
320 nd_btt->uuid = uuid;
321 dev = &nd_btt->dev;
322 dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
323 dev->parent = &nd_region->dev;
324 dev->type = &nd_btt_device_type;
325 dev->groups = nd_btt_attribute_groups;
326 device_initialize(&nd_btt->dev);
327 if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
328 dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
329 __func__, dev_name(ndns->claim));
330 put_device(dev);
331 return NULL;
332 }
333 return dev;
334}
335
336struct device *nd_btt_create(struct nd_region *nd_region)
337{
338 struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
339
340 if (dev)
341 __nd_device_register(dev);
342 return dev;
343}
344
345/*
346 * nd_btt_sb_checksum: compute checksum for btt info block
347 *
348 * Returns a fletcher64 checksum of everything in the given info block
349 * except the last field (since that's where the checksum lives).
350 */
351u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
352{
353 u64 sum;
354 __le64 sum_save;
355
356 sum_save = btt_sb->checksum;
357 btt_sb->checksum = 0;
358 sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
359 btt_sb->checksum = sum_save;
360 return sum;
361}
362EXPORT_SYMBOL(nd_btt_sb_checksum);
363
364static int __nd_btt_probe(struct nd_btt *nd_btt,
365 struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
366{
367 u64 checksum;
368
369 if (!btt_sb || !ndns || !nd_btt)
370 return -ENODEV;
371
372 if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb)))
373 return -ENXIO;
374
375 if (nvdimm_namespace_capacity(ndns) < SZ_16M)
376 return -ENXIO;
377
378 if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
379 return -ENODEV;
380
381 checksum = le64_to_cpu(btt_sb->checksum);
382 btt_sb->checksum = 0;
383 if (checksum != nd_btt_sb_checksum(btt_sb))
384 return -ENODEV;
385 btt_sb->checksum = cpu_to_le64(checksum);
386
387 nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
388 nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
389 if (!nd_btt->uuid)
390 return -ENOMEM;
391
392 __nd_device_register(&nd_btt->dev);
393
394 return 0;
395}
396
397int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
398{
399 int rc;
400 struct device *dev;
401 struct btt_sb *btt_sb;
402 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
403
404 if (ndns->force_raw)
405 return -ENODEV;
406
407 nvdimm_bus_lock(&ndns->dev);
408 dev = __nd_btt_create(nd_region, 0, NULL, ndns);
409 nvdimm_bus_unlock(&ndns->dev);
410 if (!dev)
411 return -ENOMEM;
412 dev_set_drvdata(dev, drvdata);
413 btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
414 rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb);
415 kfree(btt_sb);
416 dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
417 rc == 0 ? dev_name(dev) : "<none>");
418 if (rc < 0) {
419 __nd_btt_detach_ndns(to_nd_btt(dev));
420 put_device(dev);
421 }
422
423 return rc;
424}
425EXPORT_SYMBOL(nd_btt_probe);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
new file mode 100644
index 000000000000..8eb22c0ca7ce
--- /dev/null
+++ b/drivers/nvdimm/bus.c
@@ -0,0 +1,730 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/vmalloc.h>
15#include <linux/uaccess.h>
16#include <linux/module.h>
17#include <linux/blkdev.h>
18#include <linux/fcntl.h>
19#include <linux/async.h>
20#include <linux/genhd.h>
21#include <linux/ndctl.h>
22#include <linux/sched.h>
23#include <linux/slab.h>
24#include <linux/fs.h>
25#include <linux/io.h>
26#include <linux/mm.h>
27#include <linux/nd.h>
28#include "nd-core.h"
29#include "nd.h"
30
31int nvdimm_major;
32static int nvdimm_bus_major;
33static struct class *nd_class;
34
35static int to_nd_device_type(struct device *dev)
36{
37 if (is_nvdimm(dev))
38 return ND_DEVICE_DIMM;
39 else if (is_nd_pmem(dev))
40 return ND_DEVICE_REGION_PMEM;
41 else if (is_nd_blk(dev))
42 return ND_DEVICE_REGION_BLK;
43 else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
44 return nd_region_to_nstype(to_nd_region(dev->parent));
45
46 return 0;
47}
48
49static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
50{
51 /*
52 * Ensure that region devices always have their numa node set as
53 * early as possible.
54 */
55 if (is_nd_pmem(dev) || is_nd_blk(dev))
56 set_dev_node(dev, to_nd_region(dev)->numa_node);
57 return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
58 to_nd_device_type(dev));
59}
60
61static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
62{
63 struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
64
65 return test_bit(to_nd_device_type(dev), &nd_drv->type);
66}
67
68static struct module *to_bus_provider(struct device *dev)
69{
70 /* pin bus providers while regions are enabled */
71 if (is_nd_pmem(dev) || is_nd_blk(dev)) {
72 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
73
74 return nvdimm_bus->module;
75 }
76 return NULL;
77}
78
79static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus)
80{
81 nvdimm_bus_lock(&nvdimm_bus->dev);
82 nvdimm_bus->probe_active++;
83 nvdimm_bus_unlock(&nvdimm_bus->dev);
84}
85
86static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus)
87{
88 nvdimm_bus_lock(&nvdimm_bus->dev);
89 if (--nvdimm_bus->probe_active == 0)
90 wake_up(&nvdimm_bus->probe_wait);
91 nvdimm_bus_unlock(&nvdimm_bus->dev);
92}
93
94static int nvdimm_bus_probe(struct device *dev)
95{
96 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
97 struct module *provider = to_bus_provider(dev);
98 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
99 int rc;
100
101 if (!try_module_get(provider))
102 return -ENXIO;
103
104 nvdimm_bus_probe_start(nvdimm_bus);
105 rc = nd_drv->probe(dev);
106 if (rc == 0)
107 nd_region_probe_success(nvdimm_bus, dev);
108 else
109 nd_region_disable(nvdimm_bus, dev);
110 nvdimm_bus_probe_end(nvdimm_bus);
111
112 dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
113 dev_name(dev), rc);
114
115 if (rc != 0)
116 module_put(provider);
117 return rc;
118}
119
120static int nvdimm_bus_remove(struct device *dev)
121{
122 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
123 struct module *provider = to_bus_provider(dev);
124 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
125 int rc;
126
127 rc = nd_drv->remove(dev);
128 nd_region_disable(nvdimm_bus, dev);
129
130 dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
131 dev_name(dev), rc);
132 module_put(provider);
133 return rc;
134}
135
136static struct bus_type nvdimm_bus_type = {
137 .name = "nd",
138 .uevent = nvdimm_bus_uevent,
139 .match = nvdimm_bus_match,
140 .probe = nvdimm_bus_probe,
141 .remove = nvdimm_bus_remove,
142};
143
144static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
145
146void nd_synchronize(void)
147{
148 async_synchronize_full_domain(&nd_async_domain);
149}
150EXPORT_SYMBOL_GPL(nd_synchronize);
151
152static void nd_async_device_register(void *d, async_cookie_t cookie)
153{
154 struct device *dev = d;
155
156 if (device_add(dev) != 0) {
157 dev_err(dev, "%s: failed\n", __func__);
158 put_device(dev);
159 }
160 put_device(dev);
161}
162
163static void nd_async_device_unregister(void *d, async_cookie_t cookie)
164{
165 struct device *dev = d;
166
167 /* flush bus operations before delete */
168 nvdimm_bus_lock(dev);
169 nvdimm_bus_unlock(dev);
170
171 device_unregister(dev);
172 put_device(dev);
173}
174
175void __nd_device_register(struct device *dev)
176{
177 dev->bus = &nvdimm_bus_type;
178 get_device(dev);
179 async_schedule_domain(nd_async_device_register, dev,
180 &nd_async_domain);
181}
182
183void nd_device_register(struct device *dev)
184{
185 device_initialize(dev);
186 __nd_device_register(dev);
187}
188EXPORT_SYMBOL(nd_device_register);
189
190void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
191{
192 switch (mode) {
193 case ND_ASYNC:
194 get_device(dev);
195 async_schedule_domain(nd_async_device_unregister, dev,
196 &nd_async_domain);
197 break;
198 case ND_SYNC:
199 nd_synchronize();
200 device_unregister(dev);
201 break;
202 }
203}
204EXPORT_SYMBOL(nd_device_unregister);
205
206/**
207 * __nd_driver_register() - register a region or a namespace driver
208 * @nd_drv: driver to register
209 * @owner: automatically set by nd_driver_register() macro
210 * @mod_name: automatically set by nd_driver_register() macro
211 */
212int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
213 const char *mod_name)
214{
215 struct device_driver *drv = &nd_drv->drv;
216
217 if (!nd_drv->type) {
218 pr_debug("driver type bitmask not set (%pf)\n",
219 __builtin_return_address(0));
220 return -EINVAL;
221 }
222
223 if (!nd_drv->probe || !nd_drv->remove) {
224 pr_debug("->probe() and ->remove() must be specified\n");
225 return -EINVAL;
226 }
227
228 drv->bus = &nvdimm_bus_type;
229 drv->owner = owner;
230 drv->mod_name = mod_name;
231
232 return driver_register(drv);
233}
234EXPORT_SYMBOL(__nd_driver_register);
235
236int nvdimm_revalidate_disk(struct gendisk *disk)
237{
238 struct device *dev = disk->driverfs_dev;
239 struct nd_region *nd_region = to_nd_region(dev->parent);
240 const char *pol = nd_region->ro ? "only" : "write";
241
242 if (nd_region->ro == get_disk_ro(disk))
243 return 0;
244
245 dev_info(dev, "%s read-%s, marking %s read-%s\n",
246 dev_name(&nd_region->dev), pol, disk->disk_name, pol);
247 set_disk_ro(disk, nd_region->ro);
248
249 return 0;
250
251}
252EXPORT_SYMBOL(nvdimm_revalidate_disk);
253
254static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
255 char *buf)
256{
257 return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n",
258 to_nd_device_type(dev));
259}
260static DEVICE_ATTR_RO(modalias);
261
262static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
263 char *buf)
264{
265 return sprintf(buf, "%s\n", dev->type->name);
266}
267static DEVICE_ATTR_RO(devtype);
268
269static struct attribute *nd_device_attributes[] = {
270 &dev_attr_modalias.attr,
271 &dev_attr_devtype.attr,
272 NULL,
273};
274
275/**
276 * nd_device_attribute_group - generic attributes for all devices on an nd bus
277 */
278struct attribute_group nd_device_attribute_group = {
279 .attrs = nd_device_attributes,
280};
281EXPORT_SYMBOL_GPL(nd_device_attribute_group);
282
283static ssize_t numa_node_show(struct device *dev,
284 struct device_attribute *attr, char *buf)
285{
286 return sprintf(buf, "%d\n", dev_to_node(dev));
287}
288static DEVICE_ATTR_RO(numa_node);
289
290static struct attribute *nd_numa_attributes[] = {
291 &dev_attr_numa_node.attr,
292 NULL,
293};
294
295static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
296 int n)
297{
298 if (!IS_ENABLED(CONFIG_NUMA))
299 return 0;
300
301 return a->mode;
302}
303
304/**
305 * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
306 */
307struct attribute_group nd_numa_attribute_group = {
308 .attrs = nd_numa_attributes,
309 .is_visible = nd_numa_attr_visible,
310};
311EXPORT_SYMBOL_GPL(nd_numa_attribute_group);
312
313int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
314{
315 dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id);
316 struct device *dev;
317
318 dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
319 "ndctl%d", nvdimm_bus->id);
320
321 if (IS_ERR(dev)) {
322 dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
323 nvdimm_bus->id, PTR_ERR(dev));
324 return PTR_ERR(dev);
325 }
326 return 0;
327}
328
329void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
330{
331 device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id));
332}
333
334static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
335 [ND_CMD_IMPLEMENTED] = { },
336 [ND_CMD_SMART] = {
337 .out_num = 2,
338 .out_sizes = { 4, 8, },
339 },
340 [ND_CMD_SMART_THRESHOLD] = {
341 .out_num = 2,
342 .out_sizes = { 4, 8, },
343 },
344 [ND_CMD_DIMM_FLAGS] = {
345 .out_num = 2,
346 .out_sizes = { 4, 4 },
347 },
348 [ND_CMD_GET_CONFIG_SIZE] = {
349 .out_num = 3,
350 .out_sizes = { 4, 4, 4, },
351 },
352 [ND_CMD_GET_CONFIG_DATA] = {
353 .in_num = 2,
354 .in_sizes = { 4, 4, },
355 .out_num = 2,
356 .out_sizes = { 4, UINT_MAX, },
357 },
358 [ND_CMD_SET_CONFIG_DATA] = {
359 .in_num = 3,
360 .in_sizes = { 4, 4, UINT_MAX, },
361 .out_num = 1,
362 .out_sizes = { 4, },
363 },
364 [ND_CMD_VENDOR] = {
365 .in_num = 3,
366 .in_sizes = { 4, 4, UINT_MAX, },
367 .out_num = 3,
368 .out_sizes = { 4, 4, UINT_MAX, },
369 },
370};
371
372const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
373{
374 if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs))
375 return &__nd_cmd_dimm_descs[cmd];
376 return NULL;
377}
378EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc);
379
380static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
381 [ND_CMD_IMPLEMENTED] = { },
382 [ND_CMD_ARS_CAP] = {
383 .in_num = 2,
384 .in_sizes = { 8, 8, },
385 .out_num = 2,
386 .out_sizes = { 4, 4, },
387 },
388 [ND_CMD_ARS_START] = {
389 .in_num = 4,
390 .in_sizes = { 8, 8, 2, 6, },
391 .out_num = 1,
392 .out_sizes = { 4, },
393 },
394 [ND_CMD_ARS_STATUS] = {
395 .out_num = 2,
396 .out_sizes = { 4, UINT_MAX, },
397 },
398};
399
400const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
401{
402 if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs))
403 return &__nd_cmd_bus_descs[cmd];
404 return NULL;
405}
406EXPORT_SYMBOL_GPL(nd_cmd_bus_desc);
407
408u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
409 const struct nd_cmd_desc *desc, int idx, void *buf)
410{
411 if (idx >= desc->in_num)
412 return UINT_MAX;
413
414 if (desc->in_sizes[idx] < UINT_MAX)
415 return desc->in_sizes[idx];
416
417 if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) {
418 struct nd_cmd_set_config_hdr *hdr = buf;
419
420 return hdr->in_length;
421 } else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
422 struct nd_cmd_vendor_hdr *hdr = buf;
423
424 return hdr->in_length;
425 }
426
427 return UINT_MAX;
428}
429EXPORT_SYMBOL_GPL(nd_cmd_in_size);
430
431u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
432 const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
433 const u32 *out_field)
434{
435 if (idx >= desc->out_num)
436 return UINT_MAX;
437
438 if (desc->out_sizes[idx] < UINT_MAX)
439 return desc->out_sizes[idx];
440
441 if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
442 return in_field[1];
443 else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
444 return out_field[1];
445 else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
446 return ND_CMD_ARS_STATUS_MAX;
447
448 return UINT_MAX;
449}
450EXPORT_SYMBOL_GPL(nd_cmd_out_size);
451
452void wait_nvdimm_bus_probe_idle(struct device *dev)
453{
454 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
455
456 do {
457 if (nvdimm_bus->probe_active == 0)
458 break;
459 nvdimm_bus_unlock(&nvdimm_bus->dev);
460 wait_event(nvdimm_bus->probe_wait,
461 nvdimm_bus->probe_active == 0);
462 nvdimm_bus_lock(&nvdimm_bus->dev);
463 } while (true);
464}
465
466/* set_config requires an idle interleave set */
467static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
468{
469 struct nvdimm_bus *nvdimm_bus;
470
471 if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
472 return 0;
473
474 nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
475 wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
476
477 if (atomic_read(&nvdimm->busy))
478 return -EBUSY;
479 return 0;
480}
481
482static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
483 int read_only, unsigned int ioctl_cmd, unsigned long arg)
484{
485 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
486 size_t buf_len = 0, in_len = 0, out_len = 0;
487 static char out_env[ND_CMD_MAX_ENVELOPE];
488 static char in_env[ND_CMD_MAX_ENVELOPE];
489 const struct nd_cmd_desc *desc = NULL;
490 unsigned int cmd = _IOC_NR(ioctl_cmd);
491 void __user *p = (void __user *) arg;
492 struct device *dev = &nvdimm_bus->dev;
493 const char *cmd_name, *dimm_name;
494 unsigned long dsm_mask;
495 void *buf;
496 int rc, i;
497
498 if (nvdimm) {
499 desc = nd_cmd_dimm_desc(cmd);
500 cmd_name = nvdimm_cmd_name(cmd);
501 dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0;
502 dimm_name = dev_name(&nvdimm->dev);
503 } else {
504 desc = nd_cmd_bus_desc(cmd);
505 cmd_name = nvdimm_bus_cmd_name(cmd);
506 dsm_mask = nd_desc->dsm_mask;
507 dimm_name = "bus";
508 }
509
510 if (!desc || (desc->out_num + desc->in_num == 0) ||
511 !test_bit(cmd, &dsm_mask))
512 return -ENOTTY;
513
514 /* fail write commands (when read-only) */
515 if (read_only)
516 switch (ioctl_cmd) {
517 case ND_IOCTL_VENDOR:
518 case ND_IOCTL_SET_CONFIG_DATA:
519 case ND_IOCTL_ARS_START:
520 dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
521 nvdimm ? nvdimm_cmd_name(cmd)
522 : nvdimm_bus_cmd_name(cmd));
523 return -EPERM;
524 default:
525 break;
526 }
527
528 /* process an input envelope */
529 for (i = 0; i < desc->in_num; i++) {
530 u32 in_size, copy;
531
532 in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env);
533 if (in_size == UINT_MAX) {
534 dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
535 __func__, dimm_name, cmd_name, i);
536 return -ENXIO;
537 }
538 if (!access_ok(VERIFY_READ, p + in_len, in_size))
539 return -EFAULT;
540 if (in_len < sizeof(in_env))
541 copy = min_t(u32, sizeof(in_env) - in_len, in_size);
542 else
543 copy = 0;
544 if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
545 return -EFAULT;
546 in_len += in_size;
547 }
548
549 /* process an output envelope */
550 for (i = 0; i < desc->out_num; i++) {
551 u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
552 (u32 *) in_env, (u32 *) out_env);
553 u32 copy;
554
555 if (out_size == UINT_MAX) {
556 dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
557 __func__, dimm_name, cmd_name, i);
558 return -EFAULT;
559 }
560 if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size))
561 return -EFAULT;
562 if (out_len < sizeof(out_env))
563 copy = min_t(u32, sizeof(out_env) - out_len, out_size);
564 else
565 copy = 0;
566 if (copy && copy_from_user(&out_env[out_len],
567 p + in_len + out_len, copy))
568 return -EFAULT;
569 out_len += out_size;
570 }
571
572 buf_len = out_len + in_len;
573 if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len)))
574 return -EFAULT;
575
576 if (buf_len > ND_IOCTL_MAX_BUFLEN) {
577 dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__,
578 dimm_name, cmd_name, buf_len,
579 ND_IOCTL_MAX_BUFLEN);
580 return -EINVAL;
581 }
582
583 buf = vmalloc(buf_len);
584 if (!buf)
585 return -ENOMEM;
586
587 if (copy_from_user(buf, p, buf_len)) {
588 rc = -EFAULT;
589 goto out;
590 }
591
592 nvdimm_bus_lock(&nvdimm_bus->dev);
593 rc = nd_cmd_clear_to_send(nvdimm, cmd);
594 if (rc)
595 goto out_unlock;
596
597 rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len);
598 if (rc < 0)
599 goto out_unlock;
600 if (copy_to_user(p, buf, buf_len))
601 rc = -EFAULT;
602 out_unlock:
603 nvdimm_bus_unlock(&nvdimm_bus->dev);
604 out:
605 vfree(buf);
606 return rc;
607}
608
609static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
610{
611 long id = (long) file->private_data;
612 int rc = -ENXIO, read_only;
613 struct nvdimm_bus *nvdimm_bus;
614
615 read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
616 mutex_lock(&nvdimm_bus_list_mutex);
617 list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
618 if (nvdimm_bus->id == id) {
619 rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg);
620 break;
621 }
622 }
623 mutex_unlock(&nvdimm_bus_list_mutex);
624
625 return rc;
626}
627
628static int match_dimm(struct device *dev, void *data)
629{
630 long id = (long) data;
631
632 if (is_nvdimm(dev)) {
633 struct nvdimm *nvdimm = to_nvdimm(dev);
634
635 return nvdimm->id == id;
636 }
637
638 return 0;
639}
640
641static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
642{
643 int rc = -ENXIO, read_only;
644 struct nvdimm_bus *nvdimm_bus;
645
646 read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
647 mutex_lock(&nvdimm_bus_list_mutex);
648 list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
649 struct device *dev = device_find_child(&nvdimm_bus->dev,
650 file->private_data, match_dimm);
651 struct nvdimm *nvdimm;
652
653 if (!dev)
654 continue;
655
656 nvdimm = to_nvdimm(dev);
657 rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg);
658 put_device(dev);
659 break;
660 }
661 mutex_unlock(&nvdimm_bus_list_mutex);
662
663 return rc;
664}
665
666static int nd_open(struct inode *inode, struct file *file)
667{
668 long minor = iminor(inode);
669
670 file->private_data = (void *) minor;
671 return 0;
672}
673
674static const struct file_operations nvdimm_bus_fops = {
675 .owner = THIS_MODULE,
676 .open = nd_open,
677 .unlocked_ioctl = nd_ioctl,
678 .compat_ioctl = nd_ioctl,
679 .llseek = noop_llseek,
680};
681
682static const struct file_operations nvdimm_fops = {
683 .owner = THIS_MODULE,
684 .open = nd_open,
685 .unlocked_ioctl = nvdimm_ioctl,
686 .compat_ioctl = nvdimm_ioctl,
687 .llseek = noop_llseek,
688};
689
690int __init nvdimm_bus_init(void)
691{
692 int rc;
693
694 rc = bus_register(&nvdimm_bus_type);
695 if (rc)
696 return rc;
697
698 rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops);
699 if (rc < 0)
700 goto err_bus_chrdev;
701 nvdimm_bus_major = rc;
702
703 rc = register_chrdev(0, "dimmctl", &nvdimm_fops);
704 if (rc < 0)
705 goto err_dimm_chrdev;
706 nvdimm_major = rc;
707
708 nd_class = class_create(THIS_MODULE, "nd");
709 if (IS_ERR(nd_class))
710 goto err_class;
711
712 return 0;
713
714 err_class:
715 unregister_chrdev(nvdimm_major, "dimmctl");
716 err_dimm_chrdev:
717 unregister_chrdev(nvdimm_bus_major, "ndctl");
718 err_bus_chrdev:
719 bus_unregister(&nvdimm_bus_type);
720
721 return rc;
722}
723
724void nvdimm_bus_exit(void)
725{
726 class_destroy(nd_class);
727 unregister_chrdev(nvdimm_bus_major, "ndctl");
728 unregister_chrdev(nvdimm_major, "dimmctl");
729 bus_unregister(&nvdimm_bus_type);
730}
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
new file mode 100644
index 000000000000..cb62ec6a12d0
--- /dev/null
+++ b/drivers/nvdimm/core.c
@@ -0,0 +1,465 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/libnvdimm.h>
14#include <linux/export.h>
15#include <linux/module.h>
16#include <linux/blkdev.h>
17#include <linux/device.h>
18#include <linux/ctype.h>
19#include <linux/ndctl.h>
20#include <linux/mutex.h>
21#include <linux/slab.h>
22#include "nd-core.h"
23#include "nd.h"
24
25LIST_HEAD(nvdimm_bus_list);
26DEFINE_MUTEX(nvdimm_bus_list_mutex);
27static DEFINE_IDA(nd_ida);
28
29void nvdimm_bus_lock(struct device *dev)
30{
31 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
32
33 if (!nvdimm_bus)
34 return;
35 mutex_lock(&nvdimm_bus->reconfig_mutex);
36}
37EXPORT_SYMBOL(nvdimm_bus_lock);
38
39void nvdimm_bus_unlock(struct device *dev)
40{
41 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
42
43 if (!nvdimm_bus)
44 return;
45 mutex_unlock(&nvdimm_bus->reconfig_mutex);
46}
47EXPORT_SYMBOL(nvdimm_bus_unlock);
48
49bool is_nvdimm_bus_locked(struct device *dev)
50{
51 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
52
53 if (!nvdimm_bus)
54 return false;
55 return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
56}
57EXPORT_SYMBOL(is_nvdimm_bus_locked);
58
59u64 nd_fletcher64(void *addr, size_t len, bool le)
60{
61 u32 *buf = addr;
62 u32 lo32 = 0;
63 u64 hi32 = 0;
64 int i;
65
66 for (i = 0; i < len / sizeof(u32); i++) {
67 lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i];
68 hi32 += lo32;
69 }
70
71 return hi32 << 32 | lo32;
72}
73EXPORT_SYMBOL_GPL(nd_fletcher64);
74
75static void nvdimm_bus_release(struct device *dev)
76{
77 struct nvdimm_bus *nvdimm_bus;
78
79 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
80 ida_simple_remove(&nd_ida, nvdimm_bus->id);
81 kfree(nvdimm_bus);
82}
83
84struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
85{
86 struct nvdimm_bus *nvdimm_bus;
87
88 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
89 WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
90 return nvdimm_bus;
91}
92EXPORT_SYMBOL_GPL(to_nvdimm_bus);
93
94struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
95{
96 /* struct nvdimm_bus definition is private to libnvdimm */
97 return nvdimm_bus->nd_desc;
98}
99EXPORT_SYMBOL_GPL(to_nd_desc);
100
101struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
102{
103 struct device *dev;
104
105 for (dev = nd_dev; dev; dev = dev->parent)
106 if (dev->release == nvdimm_bus_release)
107 break;
108 dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
109 if (dev)
110 return to_nvdimm_bus(dev);
111 return NULL;
112}
113
114static bool is_uuid_sep(char sep)
115{
116 if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
117 return true;
118 return false;
119}
120
121static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
122 size_t len)
123{
124 const char *str = buf;
125 u8 uuid[16];
126 int i;
127
128 for (i = 0; i < 16; i++) {
129 if (!isxdigit(str[0]) || !isxdigit(str[1])) {
130 dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
131 __func__, i, str - buf, str[0],
132 str + 1 - buf, str[1]);
133 return -EINVAL;
134 }
135
136 uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
137 str += 2;
138 if (is_uuid_sep(*str))
139 str++;
140 }
141
142 memcpy(uuid_out, uuid, sizeof(uuid));
143 return 0;
144}
145
146/**
147 * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
148 * @dev: container device for the uuid property
149 * @uuid_out: uuid buffer to replace
150 * @buf: raw sysfs buffer to parse
151 *
152 * Enforce that uuids can only be changed while the device is disabled
153 * (driver detached)
154 * LOCKING: expects device_lock() is held on entry
155 */
156int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
157 size_t len)
158{
159 u8 uuid[16];
160 int rc;
161
162 if (dev->driver)
163 return -EBUSY;
164
165 rc = nd_uuid_parse(dev, uuid, buf, len);
166 if (rc)
167 return rc;
168
169 kfree(*uuid_out);
170 *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
171 if (!(*uuid_out))
172 return -ENOMEM;
173
174 return 0;
175}
176
177ssize_t nd_sector_size_show(unsigned long current_lbasize,
178 const unsigned long *supported, char *buf)
179{
180 ssize_t len = 0;
181 int i;
182
183 for (i = 0; supported[i]; i++)
184 if (current_lbasize == supported[i])
185 len += sprintf(buf + len, "[%ld] ", supported[i]);
186 else
187 len += sprintf(buf + len, "%ld ", supported[i]);
188 len += sprintf(buf + len, "\n");
189 return len;
190}
191
192ssize_t nd_sector_size_store(struct device *dev, const char *buf,
193 unsigned long *current_lbasize, const unsigned long *supported)
194{
195 unsigned long lbasize;
196 int rc, i;
197
198 if (dev->driver)
199 return -EBUSY;
200
201 rc = kstrtoul(buf, 0, &lbasize);
202 if (rc)
203 return rc;
204
205 for (i = 0; supported[i]; i++)
206 if (lbasize == supported[i])
207 break;
208
209 if (supported[i]) {
210 *current_lbasize = lbasize;
211 return 0;
212 } else {
213 return -EINVAL;
214 }
215}
216
217void __nd_iostat_start(struct bio *bio, unsigned long *start)
218{
219 struct gendisk *disk = bio->bi_bdev->bd_disk;
220 const int rw = bio_data_dir(bio);
221 int cpu = part_stat_lock();
222
223 *start = jiffies;
224 part_round_stats(cpu, &disk->part0);
225 part_stat_inc(cpu, &disk->part0, ios[rw]);
226 part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
227 part_inc_in_flight(&disk->part0, rw);
228 part_stat_unlock();
229}
230EXPORT_SYMBOL(__nd_iostat_start);
231
232void nd_iostat_end(struct bio *bio, unsigned long start)
233{
234 struct gendisk *disk = bio->bi_bdev->bd_disk;
235 unsigned long duration = jiffies - start;
236 const int rw = bio_data_dir(bio);
237 int cpu = part_stat_lock();
238
239 part_stat_add(cpu, &disk->part0, ticks[rw], duration);
240 part_round_stats(cpu, &disk->part0);
241 part_dec_in_flight(&disk->part0, rw);
242 part_stat_unlock();
243}
244EXPORT_SYMBOL(nd_iostat_end);
245
246static ssize_t commands_show(struct device *dev,
247 struct device_attribute *attr, char *buf)
248{
249 int cmd, len = 0;
250 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
251 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
252
253 for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG)
254 len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
255 len += sprintf(buf + len, "\n");
256 return len;
257}
258static DEVICE_ATTR_RO(commands);
259
260static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
261{
262 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
263 struct device *parent = nvdimm_bus->dev.parent;
264
265 if (nd_desc->provider_name)
266 return nd_desc->provider_name;
267 else if (parent)
268 return dev_name(parent);
269 else
270 return "unknown";
271}
272
273static ssize_t provider_show(struct device *dev,
274 struct device_attribute *attr, char *buf)
275{
276 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
277
278 return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus));
279}
280static DEVICE_ATTR_RO(provider);
281
282static int flush_namespaces(struct device *dev, void *data)
283{
284 device_lock(dev);
285 device_unlock(dev);
286 return 0;
287}
288
289static int flush_regions_dimms(struct device *dev, void *data)
290{
291 device_lock(dev);
292 device_unlock(dev);
293 device_for_each_child(dev, NULL, flush_namespaces);
294 return 0;
295}
296
297static ssize_t wait_probe_show(struct device *dev,
298 struct device_attribute *attr, char *buf)
299{
300 nd_synchronize();
301 device_for_each_child(dev, NULL, flush_regions_dimms);
302 return sprintf(buf, "1\n");
303}
304static DEVICE_ATTR_RO(wait_probe);
305
306static struct attribute *nvdimm_bus_attributes[] = {
307 &dev_attr_commands.attr,
308 &dev_attr_wait_probe.attr,
309 &dev_attr_provider.attr,
310 NULL,
311};
312
313struct attribute_group nvdimm_bus_attribute_group = {
314 .attrs = nvdimm_bus_attributes,
315};
316EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
317
318struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
319 struct nvdimm_bus_descriptor *nd_desc, struct module *module)
320{
321 struct nvdimm_bus *nvdimm_bus;
322 int rc;
323
324 nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
325 if (!nvdimm_bus)
326 return NULL;
327 INIT_LIST_HEAD(&nvdimm_bus->list);
328 init_waitqueue_head(&nvdimm_bus->probe_wait);
329 nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
330 mutex_init(&nvdimm_bus->reconfig_mutex);
331 if (nvdimm_bus->id < 0) {
332 kfree(nvdimm_bus);
333 return NULL;
334 }
335 nvdimm_bus->nd_desc = nd_desc;
336 nvdimm_bus->module = module;
337 nvdimm_bus->dev.parent = parent;
338 nvdimm_bus->dev.release = nvdimm_bus_release;
339 nvdimm_bus->dev.groups = nd_desc->attr_groups;
340 dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
341 rc = device_register(&nvdimm_bus->dev);
342 if (rc) {
343 dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
344 goto err;
345 }
346
347 rc = nvdimm_bus_create_ndctl(nvdimm_bus);
348 if (rc)
349 goto err;
350
351 mutex_lock(&nvdimm_bus_list_mutex);
352 list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
353 mutex_unlock(&nvdimm_bus_list_mutex);
354
355 return nvdimm_bus;
356 err:
357 put_device(&nvdimm_bus->dev);
358 return NULL;
359}
360EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
361
362static int child_unregister(struct device *dev, void *data)
363{
364 /*
365 * the singular ndctl class device per bus needs to be
366 * "device_destroy"ed, so skip it here
367 *
368 * i.e. remove classless children
369 */
370 if (dev->class)
371 /* pass */;
372 else
373 nd_device_unregister(dev, ND_SYNC);
374 return 0;
375}
376
377void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
378{
379 if (!nvdimm_bus)
380 return;
381
382 mutex_lock(&nvdimm_bus_list_mutex);
383 list_del_init(&nvdimm_bus->list);
384 mutex_unlock(&nvdimm_bus_list_mutex);
385
386 nd_synchronize();
387 device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
388 nvdimm_bus_destroy_ndctl(nvdimm_bus);
389
390 device_unregister(&nvdimm_bus->dev);
391}
392EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
393
394#ifdef CONFIG_BLK_DEV_INTEGRITY
395static int nd_pi_nop_generate_verify(struct blk_integrity_iter *iter)
396{
397 return 0;
398}
399
400int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
401{
402 struct blk_integrity integrity = {
403 .name = "ND-PI-NOP",
404 .generate_fn = nd_pi_nop_generate_verify,
405 .verify_fn = nd_pi_nop_generate_verify,
406 .tuple_size = meta_size,
407 .tag_size = meta_size,
408 };
409 int ret;
410
411 if (meta_size == 0)
412 return 0;
413
414 ret = blk_integrity_register(disk, &integrity);
415 if (ret)
416 return ret;
417
418 blk_queue_max_integrity_segments(disk->queue, 1);
419
420 return 0;
421}
422EXPORT_SYMBOL(nd_integrity_init);
423
424#else /* CONFIG_BLK_DEV_INTEGRITY */
425int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
426{
427 return 0;
428}
429EXPORT_SYMBOL(nd_integrity_init);
430
431#endif
432
433static __init int libnvdimm_init(void)
434{
435 int rc;
436
437 rc = nvdimm_bus_init();
438 if (rc)
439 return rc;
440 rc = nvdimm_init();
441 if (rc)
442 goto err_dimm;
443 rc = nd_region_init();
444 if (rc)
445 goto err_region;
446 return 0;
447 err_region:
448 nvdimm_exit();
449 err_dimm:
450 nvdimm_bus_exit();
451 return rc;
452}
453
454static __exit void libnvdimm_exit(void)
455{
456 WARN_ON(!list_empty(&nvdimm_bus_list));
457 nd_region_exit();
458 nvdimm_exit();
459 nvdimm_bus_exit();
460}
461
462MODULE_LICENSE("GPL v2");
463MODULE_AUTHOR("Intel Corporation");
464subsys_initcall(libnvdimm_init);
465module_exit(libnvdimm_exit);
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
new file mode 100644
index 000000000000..71d12bb67339
--- /dev/null
+++ b/drivers/nvdimm/dimm.c
@@ -0,0 +1,102 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/vmalloc.h>
14#include <linux/module.h>
15#include <linux/device.h>
16#include <linux/sizes.h>
17#include <linux/ndctl.h>
18#include <linux/slab.h>
19#include <linux/mm.h>
20#include <linux/nd.h>
21#include "label.h"
22#include "nd.h"
23
24static int nvdimm_probe(struct device *dev)
25{
26 struct nvdimm_drvdata *ndd;
27 int rc;
28
29 ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
30 if (!ndd)
31 return -ENOMEM;
32
33 dev_set_drvdata(dev, ndd);
34 ndd->dpa.name = dev_name(dev);
35 ndd->ns_current = -1;
36 ndd->ns_next = -1;
37 ndd->dpa.start = 0;
38 ndd->dpa.end = -1;
39 ndd->dev = dev;
40 get_device(dev);
41 kref_init(&ndd->kref);
42
43 rc = nvdimm_init_nsarea(ndd);
44 if (rc)
45 goto err;
46
47 rc = nvdimm_init_config_data(ndd);
48 if (rc)
49 goto err;
50
51 dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
52
53 nvdimm_bus_lock(dev);
54 ndd->ns_current = nd_label_validate(ndd);
55 ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
56 nd_label_copy(ndd, to_next_namespace_index(ndd),
57 to_current_namespace_index(ndd));
58 rc = nd_label_reserve_dpa(ndd);
59 nvdimm_bus_unlock(dev);
60
61 if (rc)
62 goto err;
63
64 return 0;
65
66 err:
67 put_ndd(ndd);
68 return rc;
69}
70
71static int nvdimm_remove(struct device *dev)
72{
73 struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
74
75 nvdimm_bus_lock(dev);
76 dev_set_drvdata(dev, NULL);
77 nvdimm_bus_unlock(dev);
78 put_ndd(ndd);
79
80 return 0;
81}
82
83static struct nd_device_driver nvdimm_driver = {
84 .probe = nvdimm_probe,
85 .remove = nvdimm_remove,
86 .drv = {
87 .name = "nvdimm",
88 },
89 .type = ND_DRIVER_DIMM,
90};
91
92int __init nvdimm_init(void)
93{
94 return nd_driver_register(&nvdimm_driver);
95}
96
97void nvdimm_exit(void)
98{
99 driver_unregister(&nvdimm_driver.drv);
100}
101
102MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
new file mode 100644
index 000000000000..c05eb807d674
--- /dev/null
+++ b/drivers/nvdimm/dimm_devs.c
@@ -0,0 +1,551 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/vmalloc.h>
15#include <linux/device.h>
16#include <linux/ndctl.h>
17#include <linux/slab.h>
18#include <linux/io.h>
19#include <linux/fs.h>
20#include <linux/mm.h>
21#include "nd-core.h"
22#include "label.h"
23#include "nd.h"
24
25static DEFINE_IDA(dimm_ida);
26
27/*
28 * Retrieve bus and dimm handle and return if this bus supports
29 * get_config_data commands
30 */
31static int __validate_dimm(struct nvdimm_drvdata *ndd)
32{
33 struct nvdimm *nvdimm;
34
35 if (!ndd)
36 return -EINVAL;
37
38 nvdimm = to_nvdimm(ndd->dev);
39
40 if (!nvdimm->dsm_mask)
41 return -ENXIO;
42 if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask))
43 return -ENXIO;
44
45 return 0;
46}
47
48static int validate_dimm(struct nvdimm_drvdata *ndd)
49{
50 int rc = __validate_dimm(ndd);
51
52 if (rc && ndd)
53 dev_dbg(ndd->dev, "%pf: %s error: %d\n",
54 __builtin_return_address(0), __func__, rc);
55 return rc;
56}
57
58/**
59 * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area
60 * @nvdimm: dimm to initialize
61 */
62int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
63{
64 struct nd_cmd_get_config_size *cmd = &ndd->nsarea;
65 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
66 struct nvdimm_bus_descriptor *nd_desc;
67 int rc = validate_dimm(ndd);
68
69 if (rc)
70 return rc;
71
72 if (cmd->config_size)
73 return 0; /* already valid */
74
75 memset(cmd, 0, sizeof(*cmd));
76 nd_desc = nvdimm_bus->nd_desc;
77 return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
78 ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd));
79}
80
81int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
82{
83 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
84 struct nd_cmd_get_config_data_hdr *cmd;
85 struct nvdimm_bus_descriptor *nd_desc;
86 int rc = validate_dimm(ndd);
87 u32 max_cmd_size, config_size;
88 size_t offset;
89
90 if (rc)
91 return rc;
92
93 if (ndd->data)
94 return 0;
95
96 if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
97 || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
98 dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
99 ndd->nsarea.max_xfer, ndd->nsarea.config_size);
100 return -ENXIO;
101 }
102
103 ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
104 if (!ndd->data)
105 ndd->data = vmalloc(ndd->nsarea.config_size);
106
107 if (!ndd->data)
108 return -ENOMEM;
109
110 max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
111 cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
112 if (!cmd)
113 return -ENOMEM;
114
115 nd_desc = nvdimm_bus->nd_desc;
116 for (config_size = ndd->nsarea.config_size, offset = 0;
117 config_size; config_size -= cmd->in_length,
118 offset += cmd->in_length) {
119 cmd->in_length = min(config_size, max_cmd_size);
120 cmd->in_offset = offset;
121 rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
122 ND_CMD_GET_CONFIG_DATA, cmd,
123 cmd->in_length + sizeof(*cmd));
124 if (rc || cmd->status) {
125 rc = -ENXIO;
126 break;
127 }
128 memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
129 }
130 dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
131 kfree(cmd);
132
133 return rc;
134}
135
136int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
137 void *buf, size_t len)
138{
139 int rc = validate_dimm(ndd);
140 size_t max_cmd_size, buf_offset;
141 struct nd_cmd_set_config_hdr *cmd;
142 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
143 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
144
145 if (rc)
146 return rc;
147
148 if (!ndd->data)
149 return -ENXIO;
150
151 if (offset + len > ndd->nsarea.config_size)
152 return -ENXIO;
153
154 max_cmd_size = min_t(u32, PAGE_SIZE, len);
155 max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer);
156 cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
157 if (!cmd)
158 return -ENOMEM;
159
160 for (buf_offset = 0; len; len -= cmd->in_length,
161 buf_offset += cmd->in_length) {
162 size_t cmd_size;
163 u32 *status;
164
165 cmd->in_offset = offset + buf_offset;
166 cmd->in_length = min(max_cmd_size, len);
167 memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length);
168
169 /* status is output in the last 4-bytes of the command buffer */
170 cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32);
171 status = ((void *) cmd) + cmd_size - sizeof(u32);
172
173 rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
174 ND_CMD_SET_CONFIG_DATA, cmd, cmd_size);
175 if (rc || *status) {
176 rc = rc ? rc : -ENXIO;
177 break;
178 }
179 }
180 kfree(cmd);
181
182 return rc;
183}
184
185static void nvdimm_release(struct device *dev)
186{
187 struct nvdimm *nvdimm = to_nvdimm(dev);
188
189 ida_simple_remove(&dimm_ida, nvdimm->id);
190 kfree(nvdimm);
191}
192
193static struct device_type nvdimm_device_type = {
194 .name = "nvdimm",
195 .release = nvdimm_release,
196};
197
198bool is_nvdimm(struct device *dev)
199{
200 return dev->type == &nvdimm_device_type;
201}
202
203struct nvdimm *to_nvdimm(struct device *dev)
204{
205 struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev);
206
207 WARN_ON(!is_nvdimm(dev));
208 return nvdimm;
209}
210EXPORT_SYMBOL_GPL(to_nvdimm);
211
212struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
213{
214 struct nd_region *nd_region = &ndbr->nd_region;
215 struct nd_mapping *nd_mapping = &nd_region->mapping[0];
216
217 return nd_mapping->nvdimm;
218}
219EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
220
221struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
222{
223 struct nvdimm *nvdimm = nd_mapping->nvdimm;
224
225 WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
226
227 return dev_get_drvdata(&nvdimm->dev);
228}
229EXPORT_SYMBOL(to_ndd);
230
231void nvdimm_drvdata_release(struct kref *kref)
232{
233 struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref);
234 struct device *dev = ndd->dev;
235 struct resource *res, *_r;
236
237 dev_dbg(dev, "%s\n", __func__);
238
239 nvdimm_bus_lock(dev);
240 for_each_dpa_resource_safe(ndd, res, _r)
241 nvdimm_free_dpa(ndd, res);
242 nvdimm_bus_unlock(dev);
243
244 if (ndd->data && is_vmalloc_addr(ndd->data))
245 vfree(ndd->data);
246 else
247 kfree(ndd->data);
248 kfree(ndd);
249 put_device(dev);
250}
251
252void get_ndd(struct nvdimm_drvdata *ndd)
253{
254 kref_get(&ndd->kref);
255}
256
257void put_ndd(struct nvdimm_drvdata *ndd)
258{
259 if (ndd)
260 kref_put(&ndd->kref, nvdimm_drvdata_release);
261}
262
263const char *nvdimm_name(struct nvdimm *nvdimm)
264{
265 return dev_name(&nvdimm->dev);
266}
267EXPORT_SYMBOL_GPL(nvdimm_name);
268
269void *nvdimm_provider_data(struct nvdimm *nvdimm)
270{
271 if (nvdimm)
272 return nvdimm->provider_data;
273 return NULL;
274}
275EXPORT_SYMBOL_GPL(nvdimm_provider_data);
276
277static ssize_t commands_show(struct device *dev,
278 struct device_attribute *attr, char *buf)
279{
280 struct nvdimm *nvdimm = to_nvdimm(dev);
281 int cmd, len = 0;
282
283 if (!nvdimm->dsm_mask)
284 return sprintf(buf, "\n");
285
286 for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG)
287 len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd));
288 len += sprintf(buf + len, "\n");
289 return len;
290}
291static DEVICE_ATTR_RO(commands);
292
293static ssize_t state_show(struct device *dev, struct device_attribute *attr,
294 char *buf)
295{
296 struct nvdimm *nvdimm = to_nvdimm(dev);
297
298 /*
299 * The state may be in the process of changing, userspace should
300 * quiesce probing if it wants a static answer
301 */
302 nvdimm_bus_lock(dev);
303 nvdimm_bus_unlock(dev);
304 return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy)
305 ? "active" : "idle");
306}
307static DEVICE_ATTR_RO(state);
308
309static ssize_t available_slots_show(struct device *dev,
310 struct device_attribute *attr, char *buf)
311{
312 struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
313 ssize_t rc;
314 u32 nfree;
315
316 if (!ndd)
317 return -ENXIO;
318
319 nvdimm_bus_lock(dev);
320 nfree = nd_label_nfree(ndd);
321 if (nfree - 1 > nfree) {
322 dev_WARN_ONCE(dev, 1, "we ate our last label?\n");
323 nfree = 0;
324 } else
325 nfree--;
326 rc = sprintf(buf, "%d\n", nfree);
327 nvdimm_bus_unlock(dev);
328 return rc;
329}
330static DEVICE_ATTR_RO(available_slots);
331
332static struct attribute *nvdimm_attributes[] = {
333 &dev_attr_state.attr,
334 &dev_attr_commands.attr,
335 &dev_attr_available_slots.attr,
336 NULL,
337};
338
339struct attribute_group nvdimm_attribute_group = {
340 .attrs = nvdimm_attributes,
341};
342EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
343
344struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
345 const struct attribute_group **groups, unsigned long flags,
346 unsigned long *dsm_mask)
347{
348 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
349 struct device *dev;
350
351 if (!nvdimm)
352 return NULL;
353
354 nvdimm->id = ida_simple_get(&dimm_ida, 0, 0, GFP_KERNEL);
355 if (nvdimm->id < 0) {
356 kfree(nvdimm);
357 return NULL;
358 }
359 nvdimm->provider_data = provider_data;
360 nvdimm->flags = flags;
361 nvdimm->dsm_mask = dsm_mask;
362 atomic_set(&nvdimm->busy, 0);
363 dev = &nvdimm->dev;
364 dev_set_name(dev, "nmem%d", nvdimm->id);
365 dev->parent = &nvdimm_bus->dev;
366 dev->type = &nvdimm_device_type;
367 dev->devt = MKDEV(nvdimm_major, nvdimm->id);
368 dev->groups = groups;
369 nd_device_register(dev);
370
371 return nvdimm;
372}
373EXPORT_SYMBOL_GPL(nvdimm_create);
374
375/**
376 * nd_blk_available_dpa - account the unused dpa of BLK region
377 * @nd_mapping: container of dpa-resource-root + labels
378 *
379 * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
380 */
381resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
382{
383 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
384 resource_size_t map_end, busy = 0, available;
385 struct resource *res;
386
387 if (!ndd)
388 return 0;
389
390 map_end = nd_mapping->start + nd_mapping->size - 1;
391 for_each_dpa_resource(ndd, res)
392 if (res->start >= nd_mapping->start && res->start < map_end) {
393 resource_size_t end = min(map_end, res->end);
394
395 busy += end - res->start + 1;
396 } else if (res->end >= nd_mapping->start
397 && res->end <= map_end) {
398 busy += res->end - nd_mapping->start;
399 } else if (nd_mapping->start > res->start
400 && nd_mapping->start < res->end) {
401 /* total eclipse of the BLK region mapping */
402 busy += nd_mapping->size;
403 }
404
405 available = map_end - nd_mapping->start + 1;
406 if (busy < available)
407 return available - busy;
408 return 0;
409}
410
411/**
412 * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
413 * @nd_mapping: container of dpa-resource-root + labels
414 * @nd_region: constrain available space check to this reference region
415 * @overlap: calculate available space assuming this level of overlap
416 *
417 * Validate that a PMEM label, if present, aligns with the start of an
418 * interleave set and truncate the available size at the lowest BLK
419 * overlap point.
420 *
421 * The expectation is that this routine is called multiple times as it
422 * probes for the largest BLK encroachment for any single member DIMM of
423 * the interleave set. Once that value is determined the PMEM-limit for
424 * the set can be established.
425 */
426resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
427 struct nd_mapping *nd_mapping, resource_size_t *overlap)
428{
429 resource_size_t map_start, map_end, busy = 0, available, blk_start;
430 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
431 struct resource *res;
432 const char *reason;
433
434 if (!ndd)
435 return 0;
436
437 map_start = nd_mapping->start;
438 map_end = map_start + nd_mapping->size - 1;
439 blk_start = max(map_start, map_end + 1 - *overlap);
440 for_each_dpa_resource(ndd, res)
441 if (res->start >= map_start && res->start < map_end) {
442 if (strncmp(res->name, "blk", 3) == 0)
443 blk_start = min(blk_start, res->start);
444 else if (res->start != map_start) {
445 reason = "misaligned to iset";
446 goto err;
447 } else {
448 if (busy) {
449 reason = "duplicate overlapping PMEM reservations?";
450 goto err;
451 }
452 busy += resource_size(res);
453 continue;
454 }
455 } else if (res->end >= map_start && res->end <= map_end) {
456 if (strncmp(res->name, "blk", 3) == 0) {
457 /*
458 * If a BLK allocation overlaps the start of
459 * PMEM the entire interleave set may now only
460 * be used for BLK.
461 */
462 blk_start = map_start;
463 } else {
464 reason = "misaligned to iset";
465 goto err;
466 }
467 } else if (map_start > res->start && map_start < res->end) {
468 /* total eclipse of the mapping */
469 busy += nd_mapping->size;
470 blk_start = map_start;
471 }
472
473 *overlap = map_end + 1 - blk_start;
474 available = blk_start - map_start;
475 if (busy < available)
476 return available - busy;
477 return 0;
478
479 err:
480 /*
481 * Something is wrong, PMEM must align with the start of the
482 * interleave set, and there can only be one allocation per set.
483 */
484 nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
485 return 0;
486}
487
488void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
489{
490 WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
491 kfree(res->name);
492 __release_region(&ndd->dpa, res->start, resource_size(res));
493}
494
495struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
496 struct nd_label_id *label_id, resource_size_t start,
497 resource_size_t n)
498{
499 char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL);
500 struct resource *res;
501
502 if (!name)
503 return NULL;
504
505 WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
506 res = __request_region(&ndd->dpa, start, n, name, 0);
507 if (!res)
508 kfree(name);
509 return res;
510}
511
512/**
513 * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
514 * @nvdimm: container of dpa-resource-root + labels
515 * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
516 */
517resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
518 struct nd_label_id *label_id)
519{
520 resource_size_t allocated = 0;
521 struct resource *res;
522
523 for_each_dpa_resource(ndd, res)
524 if (strcmp(res->name, label_id->id) == 0)
525 allocated += resource_size(res);
526
527 return allocated;
528}
529
530static int count_dimms(struct device *dev, void *c)
531{
532 int *count = c;
533
534 if (is_nvdimm(dev))
535 (*count)++;
536 return 0;
537}
538
539int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
540{
541 int count = 0;
542 /* Flush any possible dimm registration failures */
543 nd_synchronize();
544
545 device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
546 dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
547 if (count != dimm_count)
548 return -ENXIO;
549 return 0;
550}
551EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
new file mode 100644
index 000000000000..96526dcfdd37
--- /dev/null
+++ b/drivers/nvdimm/label.c
@@ -0,0 +1,927 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/device.h>
14#include <linux/ndctl.h>
15#include <linux/slab.h>
16#include <linux/io.h>
17#include <linux/nd.h>
18#include "nd-core.h"
19#include "label.h"
20#include "nd.h"
21
22static u32 best_seq(u32 a, u32 b)
23{
24 a &= NSINDEX_SEQ_MASK;
25 b &= NSINDEX_SEQ_MASK;
26
27 if (a == 0 || a == b)
28 return b;
29 else if (b == 0)
30 return a;
31 else if (nd_inc_seq(a) == b)
32 return b;
33 else
34 return a;
35}
36
37size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
38{
39 u32 index_span;
40
41 if (ndd->nsindex_size)
42 return ndd->nsindex_size;
43
44 /*
45 * The minimum index space is 512 bytes, with that amount of
46 * index we can describe ~1400 labels which is less than a byte
47 * of overhead per label. Round up to a byte of overhead per
48 * label and determine the size of the index region. Yes, this
49 * starts to waste space at larger config_sizes, but it's
50 * unlikely we'll ever see anything but 128K.
51 */
52 index_span = ndd->nsarea.config_size / 129;
53 index_span /= NSINDEX_ALIGN * 2;
54 ndd->nsindex_size = index_span * NSINDEX_ALIGN;
55
56 return ndd->nsindex_size;
57}
58
59int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
60{
61 return ndd->nsarea.config_size / 129;
62}
63
64int nd_label_validate(struct nvdimm_drvdata *ndd)
65{
66 /*
67 * On media label format consists of two index blocks followed
68 * by an array of labels. None of these structures are ever
69 * updated in place. A sequence number tracks the current
70 * active index and the next one to write, while labels are
71 * written to free slots.
72 *
73 * +------------+
74 * | |
75 * | nsindex0 |
76 * | |
77 * +------------+
78 * | |
79 * | nsindex1 |
80 * | |
81 * +------------+
82 * | label0 |
83 * +------------+
84 * | label1 |
85 * +------------+
86 * | |
87 * ....nslot...
88 * | |
89 * +------------+
90 * | labelN |
91 * +------------+
92 */
93 struct nd_namespace_index *nsindex[] = {
94 to_namespace_index(ndd, 0),
95 to_namespace_index(ndd, 1),
96 };
97 const int num_index = ARRAY_SIZE(nsindex);
98 struct device *dev = ndd->dev;
99 bool valid[2] = { 0 };
100 int i, num_valid = 0;
101 u32 seq;
102
103 for (i = 0; i < num_index; i++) {
104 u32 nslot;
105 u8 sig[NSINDEX_SIG_LEN];
106 u64 sum_save, sum, size;
107
108 memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
109 if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
110 dev_dbg(dev, "%s: nsindex%d signature invalid\n",
111 __func__, i);
112 continue;
113 }
114 sum_save = __le64_to_cpu(nsindex[i]->checksum);
115 nsindex[i]->checksum = __cpu_to_le64(0);
116 sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
117 nsindex[i]->checksum = __cpu_to_le64(sum_save);
118 if (sum != sum_save) {
119 dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
120 __func__, i);
121 continue;
122 }
123
124 seq = __le32_to_cpu(nsindex[i]->seq);
125 if ((seq & NSINDEX_SEQ_MASK) == 0) {
126 dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
127 __func__, i, seq);
128 continue;
129 }
130
131 /* sanity check the index against expected values */
132 if (__le64_to_cpu(nsindex[i]->myoff)
133 != i * sizeof_namespace_index(ndd)) {
134 dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
135 __func__, i, (unsigned long long)
136 __le64_to_cpu(nsindex[i]->myoff));
137 continue;
138 }
139 if (__le64_to_cpu(nsindex[i]->otheroff)
140 != (!i) * sizeof_namespace_index(ndd)) {
141 dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
142 __func__, i, (unsigned long long)
143 __le64_to_cpu(nsindex[i]->otheroff));
144 continue;
145 }
146
147 size = __le64_to_cpu(nsindex[i]->mysize);
148 if (size > sizeof_namespace_index(ndd)
149 || size < sizeof(struct nd_namespace_index)) {
150 dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
151 __func__, i, size);
152 continue;
153 }
154
155 nslot = __le32_to_cpu(nsindex[i]->nslot);
156 if (nslot * sizeof(struct nd_namespace_label)
157 + 2 * sizeof_namespace_index(ndd)
158 > ndd->nsarea.config_size) {
159 dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
160 __func__, i, nslot,
161 ndd->nsarea.config_size);
162 continue;
163 }
164 valid[i] = true;
165 num_valid++;
166 }
167
168 switch (num_valid) {
169 case 0:
170 break;
171 case 1:
172 for (i = 0; i < num_index; i++)
173 if (valid[i])
174 return i;
175 /* can't have num_valid > 0 but valid[] = { false, false } */
176 WARN_ON(1);
177 break;
178 default:
179 /* pick the best index... */
180 seq = best_seq(__le32_to_cpu(nsindex[0]->seq),
181 __le32_to_cpu(nsindex[1]->seq));
182 if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK))
183 return 1;
184 else
185 return 0;
186 break;
187 }
188
189 return -1;
190}
191
192void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
193 struct nd_namespace_index *src)
194{
195 if (dst && src)
196 /* pass */;
197 else
198 return;
199
200 memcpy(dst, src, sizeof_namespace_index(ndd));
201}
202
203static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
204{
205 void *base = to_namespace_index(ndd, 0);
206
207 return base + 2 * sizeof_namespace_index(ndd);
208}
209
210static int to_slot(struct nvdimm_drvdata *ndd,
211 struct nd_namespace_label *nd_label)
212{
213 return nd_label - nd_label_base(ndd);
214}
215
216#define for_each_clear_bit_le(bit, addr, size) \
217 for ((bit) = find_next_zero_bit_le((addr), (size), 0); \
218 (bit) < (size); \
219 (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1))
220
221/**
222 * preamble_index - common variable initialization for nd_label_* routines
223 * @ndd: dimm container for the relevant label set
224 * @idx: namespace_index index
225 * @nsindex_out: on return set to the currently active namespace index
226 * @free: on return set to the free label bitmap in the index
227 * @nslot: on return set to the number of slots in the label space
228 */
229static bool preamble_index(struct nvdimm_drvdata *ndd, int idx,
230 struct nd_namespace_index **nsindex_out,
231 unsigned long **free, u32 *nslot)
232{
233 struct nd_namespace_index *nsindex;
234
235 nsindex = to_namespace_index(ndd, idx);
236 if (nsindex == NULL)
237 return false;
238
239 *free = (unsigned long *) nsindex->free;
240 *nslot = __le32_to_cpu(nsindex->nslot);
241 *nsindex_out = nsindex;
242
243 return true;
244}
245
246char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
247{
248 if (!label_id || !uuid)
249 return NULL;
250 snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
251 flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
252 return label_id->id;
253}
254
255static bool preamble_current(struct nvdimm_drvdata *ndd,
256 struct nd_namespace_index **nsindex,
257 unsigned long **free, u32 *nslot)
258{
259 return preamble_index(ndd, ndd->ns_current, nsindex,
260 free, nslot);
261}
262
263static bool preamble_next(struct nvdimm_drvdata *ndd,
264 struct nd_namespace_index **nsindex,
265 unsigned long **free, u32 *nslot)
266{
267 return preamble_index(ndd, ndd->ns_next, nsindex,
268 free, nslot);
269}
270
271static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
272{
273 /* check that we are written where we expect to be written */
274 if (slot != __le32_to_cpu(nd_label->slot))
275 return false;
276
277 /* check that DPA allocations are page aligned */
278 if ((__le64_to_cpu(nd_label->dpa)
279 | __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
280 return false;
281
282 return true;
283}
284
285int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
286{
287 struct nd_namespace_index *nsindex;
288 unsigned long *free;
289 u32 nslot, slot;
290
291 if (!preamble_current(ndd, &nsindex, &free, &nslot))
292 return 0; /* no label, nothing to reserve */
293
294 for_each_clear_bit_le(slot, free, nslot) {
295 struct nd_namespace_label *nd_label;
296 struct nd_region *nd_region = NULL;
297 u8 label_uuid[NSLABEL_UUID_LEN];
298 struct nd_label_id label_id;
299 struct resource *res;
300 u32 flags;
301
302 nd_label = nd_label_base(ndd) + slot;
303
304 if (!slot_valid(nd_label, slot))
305 continue;
306
307 memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
308 flags = __le32_to_cpu(nd_label->flags);
309 nd_label_gen_id(&label_id, label_uuid, flags);
310 res = nvdimm_allocate_dpa(ndd, &label_id,
311 __le64_to_cpu(nd_label->dpa),
312 __le64_to_cpu(nd_label->rawsize));
313 nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
314 if (!res)
315 return -EBUSY;
316 }
317
318 return 0;
319}
320
321int nd_label_active_count(struct nvdimm_drvdata *ndd)
322{
323 struct nd_namespace_index *nsindex;
324 unsigned long *free;
325 u32 nslot, slot;
326 int count = 0;
327
328 if (!preamble_current(ndd, &nsindex, &free, &nslot))
329 return 0;
330
331 for_each_clear_bit_le(slot, free, nslot) {
332 struct nd_namespace_label *nd_label;
333
334 nd_label = nd_label_base(ndd) + slot;
335
336 if (!slot_valid(nd_label, slot)) {
337 u32 label_slot = __le32_to_cpu(nd_label->slot);
338 u64 size = __le64_to_cpu(nd_label->rawsize);
339 u64 dpa = __le64_to_cpu(nd_label->dpa);
340
341 dev_dbg(ndd->dev,
342 "%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
343 __func__, slot, label_slot, dpa, size);
344 continue;
345 }
346 count++;
347 }
348 return count;
349}
350
351struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
352{
353 struct nd_namespace_index *nsindex;
354 unsigned long *free;
355 u32 nslot, slot;
356
357 if (!preamble_current(ndd, &nsindex, &free, &nslot))
358 return NULL;
359
360 for_each_clear_bit_le(slot, free, nslot) {
361 struct nd_namespace_label *nd_label;
362
363 nd_label = nd_label_base(ndd) + slot;
364 if (!slot_valid(nd_label, slot))
365 continue;
366
367 if (n-- == 0)
368 return nd_label_base(ndd) + slot;
369 }
370
371 return NULL;
372}
373
374u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd)
375{
376 struct nd_namespace_index *nsindex;
377 unsigned long *free;
378 u32 nslot, slot;
379
380 if (!preamble_next(ndd, &nsindex, &free, &nslot))
381 return UINT_MAX;
382
383 WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
384
385 slot = find_next_bit_le(free, nslot, 0);
386 if (slot == nslot)
387 return UINT_MAX;
388
389 clear_bit_le(slot, free);
390
391 return slot;
392}
393
394bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot)
395{
396 struct nd_namespace_index *nsindex;
397 unsigned long *free;
398 u32 nslot;
399
400 if (!preamble_next(ndd, &nsindex, &free, &nslot))
401 return false;
402
403 WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
404
405 if (slot < nslot)
406 return !test_and_set_bit_le(slot, free);
407 return false;
408}
409
410u32 nd_label_nfree(struct nvdimm_drvdata *ndd)
411{
412 struct nd_namespace_index *nsindex;
413 unsigned long *free;
414 u32 nslot;
415
416 WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
417
418 if (!preamble_next(ndd, &nsindex, &free, &nslot))
419 return nvdimm_num_label_slots(ndd);
420
421 return bitmap_weight(free, nslot);
422}
423
424static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq,
425 unsigned long flags)
426{
427 struct nd_namespace_index *nsindex;
428 unsigned long offset;
429 u64 checksum;
430 u32 nslot;
431 int rc;
432
433 nsindex = to_namespace_index(ndd, index);
434 if (flags & ND_NSINDEX_INIT)
435 nslot = nvdimm_num_label_slots(ndd);
436 else
437 nslot = __le32_to_cpu(nsindex->nslot);
438
439 memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN);
440 nsindex->flags = __cpu_to_le32(0);
441 nsindex->seq = __cpu_to_le32(seq);
442 offset = (unsigned long) nsindex
443 - (unsigned long) to_namespace_index(ndd, 0);
444 nsindex->myoff = __cpu_to_le64(offset);
445 nsindex->mysize = __cpu_to_le64(sizeof_namespace_index(ndd));
446 offset = (unsigned long) to_namespace_index(ndd,
447 nd_label_next_nsindex(index))
448 - (unsigned long) to_namespace_index(ndd, 0);
449 nsindex->otheroff = __cpu_to_le64(offset);
450 offset = (unsigned long) nd_label_base(ndd)
451 - (unsigned long) to_namespace_index(ndd, 0);
452 nsindex->labeloff = __cpu_to_le64(offset);
453 nsindex->nslot = __cpu_to_le32(nslot);
454 nsindex->major = __cpu_to_le16(1);
455 nsindex->minor = __cpu_to_le16(1);
456 nsindex->checksum = __cpu_to_le64(0);
457 if (flags & ND_NSINDEX_INIT) {
458 unsigned long *free = (unsigned long *) nsindex->free;
459 u32 nfree = ALIGN(nslot, BITS_PER_LONG);
460 int last_bits, i;
461
462 memset(nsindex->free, 0xff, nfree / 8);
463 for (i = 0, last_bits = nfree - nslot; i < last_bits; i++)
464 clear_bit_le(nslot + i, free);
465 }
466 checksum = nd_fletcher64(nsindex, sizeof_namespace_index(ndd), 1);
467 nsindex->checksum = __cpu_to_le64(checksum);
468 rc = nvdimm_set_config_data(ndd, __le64_to_cpu(nsindex->myoff),
469 nsindex, sizeof_namespace_index(ndd));
470 if (rc < 0)
471 return rc;
472
473 if (flags & ND_NSINDEX_INIT)
474 return 0;
475
476 /* copy the index we just wrote to the new 'next' */
477 WARN_ON(index != ndd->ns_next);
478 nd_label_copy(ndd, to_current_namespace_index(ndd), nsindex);
479 ndd->ns_current = nd_label_next_nsindex(ndd->ns_current);
480 ndd->ns_next = nd_label_next_nsindex(ndd->ns_next);
481 WARN_ON(ndd->ns_current == ndd->ns_next);
482
483 return 0;
484}
485
486static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
487 struct nd_namespace_label *nd_label)
488{
489 return (unsigned long) nd_label
490 - (unsigned long) to_namespace_index(ndd, 0);
491}
492
493static int __pmem_label_update(struct nd_region *nd_region,
494 struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
495 int pos)
496{
497 u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize;
498 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
499 struct nd_namespace_label *victim_label;
500 struct nd_namespace_label *nd_label;
501 struct nd_namespace_index *nsindex;
502 unsigned long *free;
503 u32 nslot, slot;
504 size_t offset;
505 int rc;
506
507 if (!preamble_next(ndd, &nsindex, &free, &nslot))
508 return -ENXIO;
509
510 /* allocate and write the label to the staging (next) index */
511 slot = nd_label_alloc_slot(ndd);
512 if (slot == UINT_MAX)
513 return -ENXIO;
514 dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
515
516 nd_label = nd_label_base(ndd) + slot;
517 memset(nd_label, 0, sizeof(struct nd_namespace_label));
518 memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
519 if (nspm->alt_name)
520 memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
521 nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING);
522 nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
523 nd_label->position = __cpu_to_le16(pos);
524 nd_label->isetcookie = __cpu_to_le64(cookie);
525 rawsize = div_u64(resource_size(&nspm->nsio.res),
526 nd_region->ndr_mappings);
527 nd_label->rawsize = __cpu_to_le64(rawsize);
528 nd_label->dpa = __cpu_to_le64(nd_mapping->start);
529 nd_label->slot = __cpu_to_le32(slot);
530
531 /* update label */
532 offset = nd_label_offset(ndd, nd_label);
533 rc = nvdimm_set_config_data(ndd, offset, nd_label,
534 sizeof(struct nd_namespace_label));
535 if (rc < 0)
536 return rc;
537
538 /* Garbage collect the previous label */
539 victim_label = nd_mapping->labels[0];
540 if (victim_label) {
541 slot = to_slot(ndd, victim_label);
542 nd_label_free_slot(ndd, slot);
543 dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
544 }
545
546 /* update index */
547 rc = nd_label_write_index(ndd, ndd->ns_next,
548 nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
549 if (rc < 0)
550 return rc;
551
552 nd_mapping->labels[0] = nd_label;
553
554 return 0;
555}
556
557static void del_label(struct nd_mapping *nd_mapping, int l)
558{
559 struct nd_namespace_label *next_label, *nd_label;
560 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
561 unsigned int slot;
562 int j;
563
564 nd_label = nd_mapping->labels[l];
565 slot = to_slot(ndd, nd_label);
566 dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot);
567
568 for (j = l; (next_label = nd_mapping->labels[j + 1]); j++)
569 nd_mapping->labels[j] = next_label;
570 nd_mapping->labels[j] = NULL;
571}
572
573static bool is_old_resource(struct resource *res, struct resource **list, int n)
574{
575 int i;
576
577 if (res->flags & DPA_RESOURCE_ADJUSTED)
578 return false;
579 for (i = 0; i < n; i++)
580 if (res == list[i])
581 return true;
582 return false;
583}
584
585static struct resource *to_resource(struct nvdimm_drvdata *ndd,
586 struct nd_namespace_label *nd_label)
587{
588 struct resource *res;
589
590 for_each_dpa_resource(ndd, res) {
591 if (res->start != __le64_to_cpu(nd_label->dpa))
592 continue;
593 if (resource_size(res) != __le64_to_cpu(nd_label->rawsize))
594 continue;
595 return res;
596 }
597
598 return NULL;
599}
600
601/*
602 * 1/ Account all the labels that can be freed after this update
603 * 2/ Allocate and write the label to the staging (next) index
604 * 3/ Record the resources in the namespace device
605 */
606static int __blk_label_update(struct nd_region *nd_region,
607 struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk,
608 int num_labels)
609{
610 int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
611 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
612 struct nd_namespace_label *nd_label;
613 struct nd_namespace_index *nsindex;
614 unsigned long *free, *victim_map = NULL;
615 struct resource *res, **old_res_list;
616 struct nd_label_id label_id;
617 u8 uuid[NSLABEL_UUID_LEN];
618 u32 nslot, slot;
619
620 if (!preamble_next(ndd, &nsindex, &free, &nslot))
621 return -ENXIO;
622
623 old_res_list = nsblk->res;
624 nfree = nd_label_nfree(ndd);
625 old_num_resources = nsblk->num_resources;
626 nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
627
628 /*
629 * We need to loop over the old resources a few times, which seems a
630 * bit inefficient, but we need to know that we have the label
631 * space before we start mutating the tracking structures.
632 * Otherwise the recovery method of last resort for userspace is
633 * disable and re-enable the parent region.
634 */
635 alloc = 0;
636 for_each_dpa_resource(ndd, res) {
637 if (strcmp(res->name, label_id.id) != 0)
638 continue;
639 if (!is_old_resource(res, old_res_list, old_num_resources))
640 alloc++;
641 }
642
643 victims = 0;
644 if (old_num_resources) {
645 /* convert old local-label-map to dimm-slot victim-map */
646 victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long),
647 GFP_KERNEL);
648 if (!victim_map)
649 return -ENOMEM;
650
651 /* mark unused labels for garbage collection */
652 for_each_clear_bit_le(slot, free, nslot) {
653 nd_label = nd_label_base(ndd) + slot;
654 memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
655 if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
656 continue;
657 res = to_resource(ndd, nd_label);
658 if (res && is_old_resource(res, old_res_list,
659 old_num_resources))
660 continue;
661 slot = to_slot(ndd, nd_label);
662 set_bit(slot, victim_map);
663 victims++;
664 }
665 }
666
667 /* don't allow updates that consume the last label */
668 if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
669 dev_info(&nsblk->common.dev, "insufficient label space\n");
670 kfree(victim_map);
671 return -ENOSPC;
672 }
673 /* from here on we need to abort on error */
674
675
676 /* assign all resources to the namespace before writing the labels */
677 nsblk->res = NULL;
678 nsblk->num_resources = 0;
679 for_each_dpa_resource(ndd, res) {
680 if (strcmp(res->name, label_id.id) != 0)
681 continue;
682 if (!nsblk_add_resource(nd_region, ndd, nsblk, res->start)) {
683 rc = -ENOMEM;
684 goto abort;
685 }
686 }
687
688 for (i = 0; i < nsblk->num_resources; i++) {
689 size_t offset;
690
691 res = nsblk->res[i];
692 if (is_old_resource(res, old_res_list, old_num_resources))
693 continue; /* carry-over */
694 slot = nd_label_alloc_slot(ndd);
695 if (slot == UINT_MAX)
696 goto abort;
697 dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
698
699 nd_label = nd_label_base(ndd) + slot;
700 memset(nd_label, 0, sizeof(struct nd_namespace_label));
701 memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
702 if (nsblk->alt_name)
703 memcpy(nd_label->name, nsblk->alt_name,
704 NSLABEL_NAME_LEN);
705 nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
706 nd_label->nlabel = __cpu_to_le16(0); /* N/A */
707 nd_label->position = __cpu_to_le16(0); /* N/A */
708 nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
709 nd_label->dpa = __cpu_to_le64(res->start);
710 nd_label->rawsize = __cpu_to_le64(resource_size(res));
711 nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
712 nd_label->slot = __cpu_to_le32(slot);
713
714 /* update label */
715 offset = nd_label_offset(ndd, nd_label);
716 rc = nvdimm_set_config_data(ndd, offset, nd_label,
717 sizeof(struct nd_namespace_label));
718 if (rc < 0)
719 goto abort;
720 }
721
722 /* free up now unused slots in the new index */
723 for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
724 dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
725 nd_label_free_slot(ndd, slot);
726 }
727
728 /* update index */
729 rc = nd_label_write_index(ndd, ndd->ns_next,
730 nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
731 if (rc)
732 goto abort;
733
734 /*
735 * Now that the on-dimm labels are up to date, fix up the tracking
736 * entries in nd_mapping->labels
737 */
738 nlabel = 0;
739 for_each_label(l, nd_label, nd_mapping->labels) {
740 nlabel++;
741 memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
742 if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
743 continue;
744 nlabel--;
745 del_label(nd_mapping, l);
746 l--; /* retry with the new label at this index */
747 }
748 if (nlabel + nsblk->num_resources > num_labels) {
749 /*
750 * Bug, we can't end up with more resources than
751 * available labels
752 */
753 WARN_ON_ONCE(1);
754 rc = -ENXIO;
755 goto out;
756 }
757
758 for_each_clear_bit_le(slot, free, nslot) {
759 nd_label = nd_label_base(ndd) + slot;
760 memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
761 if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
762 continue;
763 res = to_resource(ndd, nd_label);
764 res->flags &= ~DPA_RESOURCE_ADJUSTED;
765 dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
766 l, slot);
767 nd_mapping->labels[l++] = nd_label;
768 }
769 nd_mapping->labels[l] = NULL;
770
771 out:
772 kfree(old_res_list);
773 kfree(victim_map);
774 return rc;
775
776 abort:
777 /*
778 * 1/ repair the allocated label bitmap in the index
779 * 2/ restore the resource list
780 */
781 nd_label_copy(ndd, nsindex, to_current_namespace_index(ndd));
782 kfree(nsblk->res);
783 nsblk->res = old_res_list;
784 nsblk->num_resources = old_num_resources;
785 old_res_list = NULL;
786 goto out;
787}
788
789static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
790{
791 int i, l, old_num_labels = 0;
792 struct nd_namespace_index *nsindex;
793 struct nd_namespace_label *nd_label;
794 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
795 size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *);
796
797 for_each_label(l, nd_label, nd_mapping->labels)
798 old_num_labels++;
799
800 /*
801 * We need to preserve all the old labels for the mapping so
802 * they can be garbage collected after writing the new labels.
803 */
804 if (num_labels > old_num_labels) {
805 struct nd_namespace_label **labels;
806
807 labels = krealloc(nd_mapping->labels, size, GFP_KERNEL);
808 if (!labels)
809 return -ENOMEM;
810 nd_mapping->labels = labels;
811 }
812 if (!nd_mapping->labels)
813 return -ENOMEM;
814
815 for (i = old_num_labels; i <= num_labels; i++)
816 nd_mapping->labels[i] = NULL;
817
818 if (ndd->ns_current == -1 || ndd->ns_next == -1)
819 /* pass */;
820 else
821 return max(num_labels, old_num_labels);
822
823 nsindex = to_namespace_index(ndd, 0);
824 memset(nsindex, 0, ndd->nsarea.config_size);
825 for (i = 0; i < 2; i++) {
826 int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT);
827
828 if (rc)
829 return rc;
830 }
831 ndd->ns_next = 1;
832 ndd->ns_current = 0;
833
834 return max(num_labels, old_num_labels);
835}
836
837static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
838{
839 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
840 struct nd_namespace_label *nd_label;
841 struct nd_namespace_index *nsindex;
842 u8 label_uuid[NSLABEL_UUID_LEN];
843 int l, num_freed = 0;
844 unsigned long *free;
845 u32 nslot, slot;
846
847 if (!uuid)
848 return 0;
849
850 /* no index || no labels == nothing to delete */
851 if (!preamble_next(ndd, &nsindex, &free, &nslot)
852 || !nd_mapping->labels)
853 return 0;
854
855 for_each_label(l, nd_label, nd_mapping->labels) {
856 memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
857 if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
858 continue;
859 slot = to_slot(ndd, nd_label);
860 nd_label_free_slot(ndd, slot);
861 dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
862 del_label(nd_mapping, l);
863 num_freed++;
864 l--; /* retry with new label at this index */
865 }
866
867 if (num_freed > l) {
868 /*
869 * num_freed will only ever be > l when we delete the last
870 * label
871 */
872 kfree(nd_mapping->labels);
873 nd_mapping->labels = NULL;
874 dev_dbg(ndd->dev, "%s: no more labels\n", __func__);
875 }
876
877 return nd_label_write_index(ndd, ndd->ns_next,
878 nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
879}
880
881int nd_pmem_namespace_label_update(struct nd_region *nd_region,
882 struct nd_namespace_pmem *nspm, resource_size_t size)
883{
884 int i;
885
886 for (i = 0; i < nd_region->ndr_mappings; i++) {
887 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
888 int rc;
889
890 if (size == 0) {
891 rc = del_labels(nd_mapping, nspm->uuid);
892 if (rc)
893 return rc;
894 continue;
895 }
896
897 rc = init_labels(nd_mapping, 1);
898 if (rc < 0)
899 return rc;
900
901 rc = __pmem_label_update(nd_region, nd_mapping, nspm, i);
902 if (rc)
903 return rc;
904 }
905
906 return 0;
907}
908
909int nd_blk_namespace_label_update(struct nd_region *nd_region,
910 struct nd_namespace_blk *nsblk, resource_size_t size)
911{
912 struct nd_mapping *nd_mapping = &nd_region->mapping[0];
913 struct resource *res;
914 int count = 0;
915
916 if (size == 0)
917 return del_labels(nd_mapping, nsblk->uuid);
918
919 for_each_dpa_resource(to_ndd(nd_mapping), res)
920 count++;
921
922 count = init_labels(nd_mapping, count);
923 if (count < 0)
924 return count;
925
926 return __blk_label_update(nd_region, nd_mapping, nsblk, count);
927}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
new file mode 100644
index 000000000000..a59ef6eef2a3
--- /dev/null
+++ b/drivers/nvdimm/label.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __LABEL_H__
14#define __LABEL_H__
15
16#include <linux/ndctl.h>
17#include <linux/sizes.h>
18#include <linux/io.h>
19
20enum {
21 NSINDEX_SIG_LEN = 16,
22 NSINDEX_ALIGN = 256,
23 NSINDEX_SEQ_MASK = 0x3,
24 NSLABEL_UUID_LEN = 16,
25 NSLABEL_NAME_LEN = 64,
26 NSLABEL_FLAG_ROLABEL = 0x1, /* read-only label */
27 NSLABEL_FLAG_LOCAL = 0x2, /* DIMM-local namespace */
28 NSLABEL_FLAG_BTT = 0x4, /* namespace contains a BTT */
29 NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */
30 BTT_ALIGN = 4096, /* all btt structures */
31 BTTINFO_SIG_LEN = 16,
32 BTTINFO_UUID_LEN = 16,
33 BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */
34 BTTINFO_MAJOR_VERSION = 1,
35 ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
36 ND_LABEL_ID_SIZE = 50,
37 ND_NSINDEX_INIT = 0x1,
38};
39
40static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
41
42/**
43 * struct nd_namespace_index - label set superblock
44 * @sig: NAMESPACE_INDEX\0
45 * @flags: placeholder
46 * @seq: sequence number for this index
47 * @myoff: offset of this index in label area
48 * @mysize: size of this index struct
49 * @otheroff: offset of other index
50 * @labeloff: offset of first label slot
51 * @nslot: total number of label slots
52 * @major: label area major version
53 * @minor: label area minor version
54 * @checksum: fletcher64 of all fields
55 * @free[0]: bitmap, nlabel bits
56 *
57 * The size of free[] is rounded up so the total struct size is a
58 * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond
59 * nlabel bits must be zero.
60 */
61struct nd_namespace_index {
62 u8 sig[NSINDEX_SIG_LEN];
63 __le32 flags;
64 __le32 seq;
65 __le64 myoff;
66 __le64 mysize;
67 __le64 otheroff;
68 __le64 labeloff;
69 __le32 nslot;
70 __le16 major;
71 __le16 minor;
72 __le64 checksum;
73 u8 free[0];
74};
75
76/**
77 * struct nd_namespace_label - namespace superblock
78 * @uuid: UUID per RFC 4122
79 * @name: optional name (NULL-terminated)
80 * @flags: see NSLABEL_FLAG_*
81 * @nlabel: num labels to describe this ns
82 * @position: labels position in set
83 * @isetcookie: interleave set cookie
84 * @lbasize: LBA size in bytes or 0 for pmem
85 * @dpa: DPA of NVM range on this DIMM
86 * @rawsize: size of namespace
87 * @slot: slot of this label in label area
88 * @unused: must be zero
89 */
90struct nd_namespace_label {
91 u8 uuid[NSLABEL_UUID_LEN];
92 u8 name[NSLABEL_NAME_LEN];
93 __le32 flags;
94 __le16 nlabel;
95 __le16 position;
96 __le64 isetcookie;
97 __le64 lbasize;
98 __le64 dpa;
99 __le64 rawsize;
100 __le32 slot;
101 __le32 unused;
102};
103
104/**
105 * struct nd_label_id - identifier string for dpa allocation
106 * @id: "{blk|pmem}-<namespace uuid>"
107 */
108struct nd_label_id {
109 char id[ND_LABEL_ID_SIZE];
110};
111
112/*
113 * If the 'best' index is invalid, so is the 'next' index. Otherwise,
114 * the next index is MOD(index+1, 2)
115 */
116static inline int nd_label_next_nsindex(int index)
117{
118 if (index < 0)
119 return -1;
120
121 return (index + 1) % 2;
122}
123
124struct nvdimm_drvdata;
125int nd_label_validate(struct nvdimm_drvdata *ndd);
126void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
127 struct nd_namespace_index *src);
128size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
129int nd_label_active_count(struct nvdimm_drvdata *ndd);
130struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
131u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
132bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
133u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
134struct nd_region;
135struct nd_namespace_pmem;
136struct nd_namespace_blk;
137int nd_pmem_namespace_label_update(struct nd_region *nd_region,
138 struct nd_namespace_pmem *nspm, resource_size_t size);
139int nd_blk_namespace_label_update(struct nd_region *nd_region,
140 struct nd_namespace_blk *nsblk, resource_size_t size);
141#endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
new file mode 100644
index 000000000000..fef0dd80d4ad
--- /dev/null
+++ b/drivers/nvdimm/namespace_devs.c
@@ -0,0 +1,1870 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/module.h>
14#include <linux/device.h>
15#include <linux/slab.h>
16#include <linux/nd.h>
17#include "nd-core.h"
18#include "nd.h"
19
20static void namespace_io_release(struct device *dev)
21{
22 struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
23
24 kfree(nsio);
25}
26
27static void namespace_pmem_release(struct device *dev)
28{
29 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
30
31 kfree(nspm->alt_name);
32 kfree(nspm->uuid);
33 kfree(nspm);
34}
35
36static void namespace_blk_release(struct device *dev)
37{
38 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
39 struct nd_region *nd_region = to_nd_region(dev->parent);
40
41 if (nsblk->id >= 0)
42 ida_simple_remove(&nd_region->ns_ida, nsblk->id);
43 kfree(nsblk->alt_name);
44 kfree(nsblk->uuid);
45 kfree(nsblk->res);
46 kfree(nsblk);
47}
48
49static struct device_type namespace_io_device_type = {
50 .name = "nd_namespace_io",
51 .release = namespace_io_release,
52};
53
54static struct device_type namespace_pmem_device_type = {
55 .name = "nd_namespace_pmem",
56 .release = namespace_pmem_release,
57};
58
59static struct device_type namespace_blk_device_type = {
60 .name = "nd_namespace_blk",
61 .release = namespace_blk_release,
62};
63
64static bool is_namespace_pmem(struct device *dev)
65{
66 return dev ? dev->type == &namespace_pmem_device_type : false;
67}
68
69static bool is_namespace_blk(struct device *dev)
70{
71 return dev ? dev->type == &namespace_blk_device_type : false;
72}
73
74static bool is_namespace_io(struct device *dev)
75{
76 return dev ? dev->type == &namespace_io_device_type : false;
77}
78
79const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
80 char *name)
81{
82 struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
83 const char *suffix = "";
84
85 if (ndns->claim && is_nd_btt(ndns->claim))
86 suffix = "s";
87
88 if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
89 sprintf(name, "pmem%d%s", nd_region->id, suffix);
90 else if (is_namespace_blk(&ndns->dev)) {
91 struct nd_namespace_blk *nsblk;
92
93 nsblk = to_nd_namespace_blk(&ndns->dev);
94 sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix);
95 } else {
96 return NULL;
97 }
98
99 return name;
100}
101EXPORT_SYMBOL(nvdimm_namespace_disk_name);
102
103static ssize_t nstype_show(struct device *dev,
104 struct device_attribute *attr, char *buf)
105{
106 struct nd_region *nd_region = to_nd_region(dev->parent);
107
108 return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
109}
110static DEVICE_ATTR_RO(nstype);
111
112static ssize_t __alt_name_store(struct device *dev, const char *buf,
113 const size_t len)
114{
115 char *input, *pos, *alt_name, **ns_altname;
116 ssize_t rc;
117
118 if (is_namespace_pmem(dev)) {
119 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
120
121 ns_altname = &nspm->alt_name;
122 } else if (is_namespace_blk(dev)) {
123 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
124
125 ns_altname = &nsblk->alt_name;
126 } else
127 return -ENXIO;
128
129 if (dev->driver || to_ndns(dev)->claim)
130 return -EBUSY;
131
132 input = kmemdup(buf, len + 1, GFP_KERNEL);
133 if (!input)
134 return -ENOMEM;
135
136 input[len] = '\0';
137 pos = strim(input);
138 if (strlen(pos) + 1 > NSLABEL_NAME_LEN) {
139 rc = -EINVAL;
140 goto out;
141 }
142
143 alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL);
144 if (!alt_name) {
145 rc = -ENOMEM;
146 goto out;
147 }
148 kfree(*ns_altname);
149 *ns_altname = alt_name;
150 sprintf(*ns_altname, "%s", pos);
151 rc = len;
152
153out:
154 kfree(input);
155 return rc;
156}
157
158static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk)
159{
160 struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
161 struct nd_mapping *nd_mapping = &nd_region->mapping[0];
162 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
163 struct nd_label_id label_id;
164 resource_size_t size = 0;
165 struct resource *res;
166
167 if (!nsblk->uuid)
168 return 0;
169 nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
170 for_each_dpa_resource(ndd, res)
171 if (strcmp(res->name, label_id.id) == 0)
172 size += resource_size(res);
173 return size;
174}
175
176static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
177{
178 struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
179 struct nd_mapping *nd_mapping = &nd_region->mapping[0];
180 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
181 struct nd_label_id label_id;
182 struct resource *res;
183 int count, i;
184
185 if (!nsblk->uuid || !nsblk->lbasize || !ndd)
186 return false;
187
188 count = 0;
189 nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
190 for_each_dpa_resource(ndd, res) {
191 if (strcmp(res->name, label_id.id) != 0)
192 continue;
193 /*
194 * Resources with unacknoweldged adjustments indicate a
195 * failure to update labels
196 */
197 if (res->flags & DPA_RESOURCE_ADJUSTED)
198 return false;
199 count++;
200 }
201
202 /* These values match after a successful label update */
203 if (count != nsblk->num_resources)
204 return false;
205
206 for (i = 0; i < nsblk->num_resources; i++) {
207 struct resource *found = NULL;
208
209 for_each_dpa_resource(ndd, res)
210 if (res == nsblk->res[i]) {
211 found = res;
212 break;
213 }
214 /* stale resource */
215 if (!found)
216 return false;
217 }
218
219 return true;
220}
221
222resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
223{
224 resource_size_t size;
225
226 nvdimm_bus_lock(&nsblk->common.dev);
227 size = __nd_namespace_blk_validate(nsblk);
228 nvdimm_bus_unlock(&nsblk->common.dev);
229
230 return size;
231}
232EXPORT_SYMBOL(nd_namespace_blk_validate);
233
234
235static int nd_namespace_label_update(struct nd_region *nd_region,
236 struct device *dev)
237{
238 dev_WARN_ONCE(dev, dev->driver || to_ndns(dev)->claim,
239 "namespace must be idle during label update\n");
240 if (dev->driver || to_ndns(dev)->claim)
241 return 0;
242
243 /*
244 * Only allow label writes that will result in a valid namespace
245 * or deletion of an existing namespace.
246 */
247 if (is_namespace_pmem(dev)) {
248 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
249 resource_size_t size = resource_size(&nspm->nsio.res);
250
251 if (size == 0 && nspm->uuid)
252 /* delete allocation */;
253 else if (!nspm->uuid)
254 return 0;
255
256 return nd_pmem_namespace_label_update(nd_region, nspm, size);
257 } else if (is_namespace_blk(dev)) {
258 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
259 resource_size_t size = nd_namespace_blk_size(nsblk);
260
261 if (size == 0 && nsblk->uuid)
262 /* delete allocation */;
263 else if (!nsblk->uuid || !nsblk->lbasize)
264 return 0;
265
266 return nd_blk_namespace_label_update(nd_region, nsblk, size);
267 } else
268 return -ENXIO;
269}
270
271static ssize_t alt_name_store(struct device *dev,
272 struct device_attribute *attr, const char *buf, size_t len)
273{
274 struct nd_region *nd_region = to_nd_region(dev->parent);
275 ssize_t rc;
276
277 device_lock(dev);
278 nvdimm_bus_lock(dev);
279 wait_nvdimm_bus_probe_idle(dev);
280 rc = __alt_name_store(dev, buf, len);
281 if (rc >= 0)
282 rc = nd_namespace_label_update(nd_region, dev);
283 dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
284 nvdimm_bus_unlock(dev);
285 device_unlock(dev);
286
287 return rc < 0 ? rc : len;
288}
289
290static ssize_t alt_name_show(struct device *dev,
291 struct device_attribute *attr, char *buf)
292{
293 char *ns_altname;
294
295 if (is_namespace_pmem(dev)) {
296 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
297
298 ns_altname = nspm->alt_name;
299 } else if (is_namespace_blk(dev)) {
300 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
301
302 ns_altname = nsblk->alt_name;
303 } else
304 return -ENXIO;
305
306 return sprintf(buf, "%s\n", ns_altname ? ns_altname : "");
307}
308static DEVICE_ATTR_RW(alt_name);
309
310static int scan_free(struct nd_region *nd_region,
311 struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
312 resource_size_t n)
313{
314 bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
315 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
316 int rc = 0;
317
318 while (n) {
319 struct resource *res, *last;
320 resource_size_t new_start;
321
322 last = NULL;
323 for_each_dpa_resource(ndd, res)
324 if (strcmp(res->name, label_id->id) == 0)
325 last = res;
326 res = last;
327 if (!res)
328 return 0;
329
330 if (n >= resource_size(res)) {
331 n -= resource_size(res);
332 nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc);
333 nvdimm_free_dpa(ndd, res);
334 /* retry with last resource deleted */
335 continue;
336 }
337
338 /*
339 * Keep BLK allocations relegated to high DPA as much as
340 * possible
341 */
342 if (is_blk)
343 new_start = res->start + n;
344 else
345 new_start = res->start;
346
347 rc = adjust_resource(res, new_start, resource_size(res) - n);
348 if (rc == 0)
349 res->flags |= DPA_RESOURCE_ADJUSTED;
350 nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
351 break;
352 }
353
354 return rc;
355}
356
357/**
358 * shrink_dpa_allocation - for each dimm in region free n bytes for label_id
359 * @nd_region: the set of dimms to reclaim @n bytes from
360 * @label_id: unique identifier for the namespace consuming this dpa range
361 * @n: number of bytes per-dimm to release
362 *
363 * Assumes resources are ordered. Starting from the end try to
364 * adjust_resource() the allocation to @n, but if @n is larger than the
365 * allocation delete it and find the 'new' last allocation in the label
366 * set.
367 */
368static int shrink_dpa_allocation(struct nd_region *nd_region,
369 struct nd_label_id *label_id, resource_size_t n)
370{
371 int i;
372
373 for (i = 0; i < nd_region->ndr_mappings; i++) {
374 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
375 int rc;
376
377 rc = scan_free(nd_region, nd_mapping, label_id, n);
378 if (rc)
379 return rc;
380 }
381
382 return 0;
383}
384
385static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
386 struct nd_region *nd_region, struct nd_mapping *nd_mapping,
387 resource_size_t n)
388{
389 bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
390 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
391 resource_size_t first_dpa;
392 struct resource *res;
393 int rc = 0;
394
395 /* allocate blk from highest dpa first */
396 if (is_blk)
397 first_dpa = nd_mapping->start + nd_mapping->size - n;
398 else
399 first_dpa = nd_mapping->start;
400
401 /* first resource allocation for this label-id or dimm */
402 res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n);
403 if (!res)
404 rc = -EBUSY;
405
406 nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc);
407 return rc ? n : 0;
408}
409
410static bool space_valid(bool is_pmem, bool is_reserve,
411 struct nd_label_id *label_id, struct resource *res)
412{
413 /*
414 * For BLK-space any space is valid, for PMEM-space, it must be
415 * contiguous with an existing allocation unless we are
416 * reserving pmem.
417 */
418 if (is_reserve || !is_pmem)
419 return true;
420 if (!res || strcmp(res->name, label_id->id) == 0)
421 return true;
422 return false;
423}
424
425enum alloc_loc {
426 ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER,
427};
428
429static resource_size_t scan_allocate(struct nd_region *nd_region,
430 struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
431 resource_size_t n)
432{
433 resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
434 bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
435 bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
436 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
437 const resource_size_t to_allocate = n;
438 struct resource *res;
439 int first;
440
441 retry:
442 first = 0;
443 for_each_dpa_resource(ndd, res) {
444 resource_size_t allocate, available = 0, free_start, free_end;
445 struct resource *next = res->sibling, *new_res = NULL;
446 enum alloc_loc loc = ALLOC_ERR;
447 const char *action;
448 int rc = 0;
449
450 /* ignore resources outside this nd_mapping */
451 if (res->start > mapping_end)
452 continue;
453 if (res->end < nd_mapping->start)
454 continue;
455
456 /* space at the beginning of the mapping */
457 if (!first++ && res->start > nd_mapping->start) {
458 free_start = nd_mapping->start;
459 available = res->start - free_start;
460 if (space_valid(is_pmem, is_reserve, label_id, NULL))
461 loc = ALLOC_BEFORE;
462 }
463
464 /* space between allocations */
465 if (!loc && next) {
466 free_start = res->start + resource_size(res);
467 free_end = min(mapping_end, next->start - 1);
468 if (space_valid(is_pmem, is_reserve, label_id, res)
469 && free_start < free_end) {
470 available = free_end + 1 - free_start;
471 loc = ALLOC_MID;
472 }
473 }
474
475 /* space at the end of the mapping */
476 if (!loc && !next) {
477 free_start = res->start + resource_size(res);
478 free_end = mapping_end;
479 if (space_valid(is_pmem, is_reserve, label_id, res)
480 && free_start < free_end) {
481 available = free_end + 1 - free_start;
482 loc = ALLOC_AFTER;
483 }
484 }
485
486 if (!loc || !available)
487 continue;
488 allocate = min(available, n);
489 switch (loc) {
490 case ALLOC_BEFORE:
491 if (strcmp(res->name, label_id->id) == 0) {
492 /* adjust current resource up */
493 if (is_pmem && !is_reserve)
494 return n;
495 rc = adjust_resource(res, res->start - allocate,
496 resource_size(res) + allocate);
497 action = "cur grow up";
498 } else
499 action = "allocate";
500 break;
501 case ALLOC_MID:
502 if (strcmp(next->name, label_id->id) == 0) {
503 /* adjust next resource up */
504 if (is_pmem && !is_reserve)
505 return n;
506 rc = adjust_resource(next, next->start
507 - allocate, resource_size(next)
508 + allocate);
509 new_res = next;
510 action = "next grow up";
511 } else if (strcmp(res->name, label_id->id) == 0) {
512 action = "grow down";
513 } else
514 action = "allocate";
515 break;
516 case ALLOC_AFTER:
517 if (strcmp(res->name, label_id->id) == 0)
518 action = "grow down";
519 else
520 action = "allocate";
521 break;
522 default:
523 return n;
524 }
525
526 if (strcmp(action, "allocate") == 0) {
527 /* BLK allocate bottom up */
528 if (!is_pmem)
529 free_start += available - allocate;
530 else if (!is_reserve && free_start != nd_mapping->start)
531 return n;
532
533 new_res = nvdimm_allocate_dpa(ndd, label_id,
534 free_start, allocate);
535 if (!new_res)
536 rc = -EBUSY;
537 } else if (strcmp(action, "grow down") == 0) {
538 /* adjust current resource down */
539 rc = adjust_resource(res, res->start, resource_size(res)
540 + allocate);
541 if (rc == 0)
542 res->flags |= DPA_RESOURCE_ADJUSTED;
543 }
544
545 if (!new_res)
546 new_res = res;
547
548 nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n",
549 action, loc, rc);
550
551 if (rc)
552 return n;
553
554 n -= allocate;
555 if (n) {
556 /*
557 * Retry scan with newly inserted resources.
558 * For example, if we did an ALLOC_BEFORE
559 * insertion there may also have been space
560 * available for an ALLOC_AFTER insertion, so we
561 * need to check this same resource again
562 */
563 goto retry;
564 } else
565 return 0;
566 }
567
568 /*
569 * If we allocated nothing in the BLK case it may be because we are in
570 * an initial "pmem-reserve pass". Only do an initial BLK allocation
571 * when none of the DPA space is reserved.
572 */
573 if ((is_pmem || !ndd->dpa.child) && n == to_allocate)
574 return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
575 return n;
576}
577
578static int merge_dpa(struct nd_region *nd_region,
579 struct nd_mapping *nd_mapping, struct nd_label_id *label_id)
580{
581 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
582 struct resource *res;
583
584 if (strncmp("pmem", label_id->id, 4) == 0)
585 return 0;
586 retry:
587 for_each_dpa_resource(ndd, res) {
588 int rc;
589 struct resource *next = res->sibling;
590 resource_size_t end = res->start + resource_size(res);
591
592 if (!next || strcmp(res->name, label_id->id) != 0
593 || strcmp(next->name, label_id->id) != 0
594 || end != next->start)
595 continue;
596 end += resource_size(next);
597 nvdimm_free_dpa(ndd, next);
598 rc = adjust_resource(res, res->start, end - res->start);
599 nd_dbg_dpa(nd_region, ndd, res, "merge %d\n", rc);
600 if (rc)
601 return rc;
602 res->flags |= DPA_RESOURCE_ADJUSTED;
603 goto retry;
604 }
605
606 return 0;
607}
608
609static int __reserve_free_pmem(struct device *dev, void *data)
610{
611 struct nvdimm *nvdimm = data;
612 struct nd_region *nd_region;
613 struct nd_label_id label_id;
614 int i;
615
616 if (!is_nd_pmem(dev))
617 return 0;
618
619 nd_region = to_nd_region(dev);
620 if (nd_region->ndr_mappings == 0)
621 return 0;
622
623 memset(&label_id, 0, sizeof(label_id));
624 strcat(label_id.id, "pmem-reserve");
625 for (i = 0; i < nd_region->ndr_mappings; i++) {
626 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
627 resource_size_t n, rem = 0;
628
629 if (nd_mapping->nvdimm != nvdimm)
630 continue;
631
632 n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem);
633 if (n == 0)
634 return 0;
635 rem = scan_allocate(nd_region, nd_mapping, &label_id, n);
636 dev_WARN_ONCE(&nd_region->dev, rem,
637 "pmem reserve underrun: %#llx of %#llx bytes\n",
638 (unsigned long long) n - rem,
639 (unsigned long long) n);
640 return rem ? -ENXIO : 0;
641 }
642
643 return 0;
644}
645
646static void release_free_pmem(struct nvdimm_bus *nvdimm_bus,
647 struct nd_mapping *nd_mapping)
648{
649 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
650 struct resource *res, *_res;
651
652 for_each_dpa_resource_safe(ndd, res, _res)
653 if (strcmp(res->name, "pmem-reserve") == 0)
654 nvdimm_free_dpa(ndd, res);
655}
656
657static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus,
658 struct nd_mapping *nd_mapping)
659{
660 struct nvdimm *nvdimm = nd_mapping->nvdimm;
661 int rc;
662
663 rc = device_for_each_child(&nvdimm_bus->dev, nvdimm,
664 __reserve_free_pmem);
665 if (rc)
666 release_free_pmem(nvdimm_bus, nd_mapping);
667 return rc;
668}
669
670/**
671 * grow_dpa_allocation - for each dimm allocate n bytes for @label_id
672 * @nd_region: the set of dimms to allocate @n more bytes from
673 * @label_id: unique identifier for the namespace consuming this dpa range
674 * @n: number of bytes per-dimm to add to the existing allocation
675 *
676 * Assumes resources are ordered. For BLK regions, first consume
677 * BLK-only available DPA free space, then consume PMEM-aliased DPA
678 * space starting at the highest DPA. For PMEM regions start
679 * allocations from the start of an interleave set and end at the first
680 * BLK allocation or the end of the interleave set, whichever comes
681 * first.
682 */
683static int grow_dpa_allocation(struct nd_region *nd_region,
684 struct nd_label_id *label_id, resource_size_t n)
685{
686 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
687 bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
688 int i;
689
690 for (i = 0; i < nd_region->ndr_mappings; i++) {
691 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
692 resource_size_t rem = n;
693 int rc, j;
694
695 /*
696 * In the BLK case try once with all unallocated PMEM
697 * reserved, and once without
698 */
699 for (j = is_pmem; j < 2; j++) {
700 bool blk_only = j == 0;
701
702 if (blk_only) {
703 rc = reserve_free_pmem(nvdimm_bus, nd_mapping);
704 if (rc)
705 return rc;
706 }
707 rem = scan_allocate(nd_region, nd_mapping,
708 label_id, rem);
709 if (blk_only)
710 release_free_pmem(nvdimm_bus, nd_mapping);
711
712 /* try again and allow encroachments into PMEM */
713 if (rem == 0)
714 break;
715 }
716
717 dev_WARN_ONCE(&nd_region->dev, rem,
718 "allocation underrun: %#llx of %#llx bytes\n",
719 (unsigned long long) n - rem,
720 (unsigned long long) n);
721 if (rem)
722 return -ENXIO;
723
724 rc = merge_dpa(nd_region, nd_mapping, label_id);
725 if (rc)
726 return rc;
727 }
728
729 return 0;
730}
731
732static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
733 struct nd_namespace_pmem *nspm, resource_size_t size)
734{
735 struct resource *res = &nspm->nsio.res;
736
737 res->start = nd_region->ndr_start;
738 res->end = nd_region->ndr_start + size - 1;
739}
740
741static ssize_t __size_store(struct device *dev, unsigned long long val)
742{
743 resource_size_t allocated = 0, available = 0;
744 struct nd_region *nd_region = to_nd_region(dev->parent);
745 struct nd_mapping *nd_mapping;
746 struct nvdimm_drvdata *ndd;
747 struct nd_label_id label_id;
748 u32 flags = 0, remainder;
749 u8 *uuid = NULL;
750 int rc, i;
751
752 if (dev->driver || to_ndns(dev)->claim)
753 return -EBUSY;
754
755 if (is_namespace_pmem(dev)) {
756 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
757
758 uuid = nspm->uuid;
759 } else if (is_namespace_blk(dev)) {
760 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
761
762 uuid = nsblk->uuid;
763 flags = NSLABEL_FLAG_LOCAL;
764 }
765
766 /*
767 * We need a uuid for the allocation-label and dimm(s) on which
768 * to store the label.
769 */
770 if (!uuid || nd_region->ndr_mappings == 0)
771 return -ENXIO;
772
773 div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder);
774 if (remainder) {
775 dev_dbg(dev, "%llu is not %dK aligned\n", val,
776 (SZ_4K * nd_region->ndr_mappings) / SZ_1K);
777 return -EINVAL;
778 }
779
780 nd_label_gen_id(&label_id, uuid, flags);
781 for (i = 0; i < nd_region->ndr_mappings; i++) {
782 nd_mapping = &nd_region->mapping[i];
783 ndd = to_ndd(nd_mapping);
784
785 /*
786 * All dimms in an interleave set, or the base dimm for a blk
787 * region, need to be enabled for the size to be changed.
788 */
789 if (!ndd)
790 return -ENXIO;
791
792 allocated += nvdimm_allocated_dpa(ndd, &label_id);
793 }
794 available = nd_region_available_dpa(nd_region);
795
796 if (val > available + allocated)
797 return -ENOSPC;
798
799 if (val == allocated)
800 return 0;
801
802 val = div_u64(val, nd_region->ndr_mappings);
803 allocated = div_u64(allocated, nd_region->ndr_mappings);
804 if (val < allocated)
805 rc = shrink_dpa_allocation(nd_region, &label_id,
806 allocated - val);
807 else
808 rc = grow_dpa_allocation(nd_region, &label_id, val - allocated);
809
810 if (rc)
811 return rc;
812
813 if (is_namespace_pmem(dev)) {
814 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
815
816 nd_namespace_pmem_set_size(nd_region, nspm,
817 val * nd_region->ndr_mappings);
818 } else if (is_namespace_blk(dev)) {
819 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
820
821 /*
822 * Try to delete the namespace if we deleted all of its
823 * allocation, this is not the seed device for the
824 * region, and it is not actively claimed by a btt
825 * instance.
826 */
827 if (val == 0 && nd_region->ns_seed != dev
828 && !nsblk->common.claim)
829 nd_device_unregister(dev, ND_ASYNC);
830 }
831
832 return rc;
833}
834
835static ssize_t size_store(struct device *dev,
836 struct device_attribute *attr, const char *buf, size_t len)
837{
838 struct nd_region *nd_region = to_nd_region(dev->parent);
839 unsigned long long val;
840 u8 **uuid = NULL;
841 int rc;
842
843 rc = kstrtoull(buf, 0, &val);
844 if (rc)
845 return rc;
846
847 device_lock(dev);
848 nvdimm_bus_lock(dev);
849 wait_nvdimm_bus_probe_idle(dev);
850 rc = __size_store(dev, val);
851 if (rc >= 0)
852 rc = nd_namespace_label_update(nd_region, dev);
853
854 if (is_namespace_pmem(dev)) {
855 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
856
857 uuid = &nspm->uuid;
858 } else if (is_namespace_blk(dev)) {
859 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
860
861 uuid = &nsblk->uuid;
862 }
863
864 if (rc == 0 && val == 0 && uuid) {
865 /* setting size zero == 'delete namespace' */
866 kfree(*uuid);
867 *uuid = NULL;
868 }
869
870 dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
871 ? "fail" : "success", rc);
872
873 nvdimm_bus_unlock(dev);
874 device_unlock(dev);
875
876 return rc < 0 ? rc : len;
877}
878
879resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
880{
881 struct device *dev = &ndns->dev;
882
883 if (is_namespace_pmem(dev)) {
884 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
885
886 return resource_size(&nspm->nsio.res);
887 } else if (is_namespace_blk(dev)) {
888 return nd_namespace_blk_size(to_nd_namespace_blk(dev));
889 } else if (is_namespace_io(dev)) {
890 struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
891
892 return resource_size(&nsio->res);
893 } else
894 WARN_ONCE(1, "unknown namespace type\n");
895 return 0;
896}
897
898resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
899{
900 resource_size_t size;
901
902 nvdimm_bus_lock(&ndns->dev);
903 size = __nvdimm_namespace_capacity(ndns);
904 nvdimm_bus_unlock(&ndns->dev);
905
906 return size;
907}
908EXPORT_SYMBOL(nvdimm_namespace_capacity);
909
910static ssize_t size_show(struct device *dev,
911 struct device_attribute *attr, char *buf)
912{
913 return sprintf(buf, "%llu\n", (unsigned long long)
914 nvdimm_namespace_capacity(to_ndns(dev)));
915}
916static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
917
918static ssize_t uuid_show(struct device *dev,
919 struct device_attribute *attr, char *buf)
920{
921 u8 *uuid;
922
923 if (is_namespace_pmem(dev)) {
924 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
925
926 uuid = nspm->uuid;
927 } else if (is_namespace_blk(dev)) {
928 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
929
930 uuid = nsblk->uuid;
931 } else
932 return -ENXIO;
933
934 if (uuid)
935 return sprintf(buf, "%pUb\n", uuid);
936 return sprintf(buf, "\n");
937}
938
939/**
940 * namespace_update_uuid - check for a unique uuid and whether we're "renaming"
941 * @nd_region: parent region so we can updates all dimms in the set
942 * @dev: namespace type for generating label_id
943 * @new_uuid: incoming uuid
944 * @old_uuid: reference to the uuid storage location in the namespace object
945 */
946static int namespace_update_uuid(struct nd_region *nd_region,
947 struct device *dev, u8 *new_uuid, u8 **old_uuid)
948{
949 u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0;
950 struct nd_label_id old_label_id;
951 struct nd_label_id new_label_id;
952 int i;
953
954 if (!nd_is_uuid_unique(dev, new_uuid))
955 return -EINVAL;
956
957 if (*old_uuid == NULL)
958 goto out;
959
960 /*
961 * If we've already written a label with this uuid, then it's
962 * too late to rename because we can't reliably update the uuid
963 * without losing the old namespace. Userspace must delete this
964 * namespace to abandon the old uuid.
965 */
966 for (i = 0; i < nd_region->ndr_mappings; i++) {
967 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
968
969 /*
970 * This check by itself is sufficient because old_uuid
971 * would be NULL above if this uuid did not exist in the
972 * currently written set.
973 *
974 * FIXME: can we delete uuid with zero dpa allocated?
975 */
976 if (nd_mapping->labels)
977 return -EBUSY;
978 }
979
980 nd_label_gen_id(&old_label_id, *old_uuid, flags);
981 nd_label_gen_id(&new_label_id, new_uuid, flags);
982 for (i = 0; i < nd_region->ndr_mappings; i++) {
983 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
984 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
985 struct resource *res;
986
987 for_each_dpa_resource(ndd, res)
988 if (strcmp(res->name, old_label_id.id) == 0)
989 sprintf((void *) res->name, "%s",
990 new_label_id.id);
991 }
992 kfree(*old_uuid);
993 out:
994 *old_uuid = new_uuid;
995 return 0;
996}
997
998static ssize_t uuid_store(struct device *dev,
999 struct device_attribute *attr, const char *buf, size_t len)
1000{
1001 struct nd_region *nd_region = to_nd_region(dev->parent);
1002 u8 *uuid = NULL;
1003 ssize_t rc = 0;
1004 u8 **ns_uuid;
1005
1006 if (is_namespace_pmem(dev)) {
1007 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
1008
1009 ns_uuid = &nspm->uuid;
1010 } else if (is_namespace_blk(dev)) {
1011 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
1012
1013 ns_uuid = &nsblk->uuid;
1014 } else
1015 return -ENXIO;
1016
1017 device_lock(dev);
1018 nvdimm_bus_lock(dev);
1019 wait_nvdimm_bus_probe_idle(dev);
1020 if (to_ndns(dev)->claim)
1021 rc = -EBUSY;
1022 if (rc >= 0)
1023 rc = nd_uuid_store(dev, &uuid, buf, len);
1024 if (rc >= 0)
1025 rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid);
1026 if (rc >= 0)
1027 rc = nd_namespace_label_update(nd_region, dev);
1028 else
1029 kfree(uuid);
1030 dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
1031 rc, buf, buf[len - 1] == '\n' ? "" : "\n");
1032 nvdimm_bus_unlock(dev);
1033 device_unlock(dev);
1034
1035 return rc < 0 ? rc : len;
1036}
1037static DEVICE_ATTR_RW(uuid);
1038
1039static ssize_t resource_show(struct device *dev,
1040 struct device_attribute *attr, char *buf)
1041{
1042 struct resource *res;
1043
1044 if (is_namespace_pmem(dev)) {
1045 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
1046
1047 res = &nspm->nsio.res;
1048 } else if (is_namespace_io(dev)) {
1049 struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
1050
1051 res = &nsio->res;
1052 } else
1053 return -ENXIO;
1054
1055 /* no address to convey if the namespace has no allocation */
1056 if (resource_size(res) == 0)
1057 return -ENXIO;
1058 return sprintf(buf, "%#llx\n", (unsigned long long) res->start);
1059}
1060static DEVICE_ATTR_RO(resource);
1061
1062static const unsigned long ns_lbasize_supported[] = { 512, 520, 528,
1063 4096, 4104, 4160, 4224, 0 };
1064
1065static ssize_t sector_size_show(struct device *dev,
1066 struct device_attribute *attr, char *buf)
1067{
1068 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
1069
1070 if (!is_namespace_blk(dev))
1071 return -ENXIO;
1072
1073 return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf);
1074}
1075
1076static ssize_t sector_size_store(struct device *dev,
1077 struct device_attribute *attr, const char *buf, size_t len)
1078{
1079 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
1080 struct nd_region *nd_region = to_nd_region(dev->parent);
1081 ssize_t rc = 0;
1082
1083 if (!is_namespace_blk(dev))
1084 return -ENXIO;
1085
1086 device_lock(dev);
1087 nvdimm_bus_lock(dev);
1088 if (to_ndns(dev)->claim)
1089 rc = -EBUSY;
1090 if (rc >= 0)
1091 rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
1092 ns_lbasize_supported);
1093 if (rc >= 0)
1094 rc = nd_namespace_label_update(nd_region, dev);
1095 dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
1096 rc, rc < 0 ? "tried" : "wrote", buf,
1097 buf[len - 1] == '\n' ? "" : "\n");
1098 nvdimm_bus_unlock(dev);
1099 device_unlock(dev);
1100
1101 return rc ? rc : len;
1102}
1103static DEVICE_ATTR_RW(sector_size);
1104
1105static ssize_t dpa_extents_show(struct device *dev,
1106 struct device_attribute *attr, char *buf)
1107{
1108 struct nd_region *nd_region = to_nd_region(dev->parent);
1109 struct nd_label_id label_id;
1110 int count = 0, i;
1111 u8 *uuid = NULL;
1112 u32 flags = 0;
1113
1114 nvdimm_bus_lock(dev);
1115 if (is_namespace_pmem(dev)) {
1116 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
1117
1118 uuid = nspm->uuid;
1119 flags = 0;
1120 } else if (is_namespace_blk(dev)) {
1121 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
1122
1123 uuid = nsblk->uuid;
1124 flags = NSLABEL_FLAG_LOCAL;
1125 }
1126
1127 if (!uuid)
1128 goto out;
1129
1130 nd_label_gen_id(&label_id, uuid, flags);
1131 for (i = 0; i < nd_region->ndr_mappings; i++) {
1132 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1133 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
1134 struct resource *res;
1135
1136 for_each_dpa_resource(ndd, res)
1137 if (strcmp(res->name, label_id.id) == 0)
1138 count++;
1139 }
1140 out:
1141 nvdimm_bus_unlock(dev);
1142
1143 return sprintf(buf, "%d\n", count);
1144}
1145static DEVICE_ATTR_RO(dpa_extents);
1146
1147static ssize_t holder_show(struct device *dev,
1148 struct device_attribute *attr, char *buf)
1149{
1150 struct nd_namespace_common *ndns = to_ndns(dev);
1151 ssize_t rc;
1152
1153 device_lock(dev);
1154 rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
1155 device_unlock(dev);
1156
1157 return rc;
1158}
1159static DEVICE_ATTR_RO(holder);
1160
1161static ssize_t force_raw_store(struct device *dev,
1162 struct device_attribute *attr, const char *buf, size_t len)
1163{
1164 bool force_raw;
1165 int rc = strtobool(buf, &force_raw);
1166
1167 if (rc)
1168 return rc;
1169
1170 to_ndns(dev)->force_raw = force_raw;
1171 return len;
1172}
1173
1174static ssize_t force_raw_show(struct device *dev,
1175 struct device_attribute *attr, char *buf)
1176{
1177 return sprintf(buf, "%d\n", to_ndns(dev)->force_raw);
1178}
1179static DEVICE_ATTR_RW(force_raw);
1180
1181static struct attribute *nd_namespace_attributes[] = {
1182 &dev_attr_nstype.attr,
1183 &dev_attr_size.attr,
1184 &dev_attr_uuid.attr,
1185 &dev_attr_holder.attr,
1186 &dev_attr_resource.attr,
1187 &dev_attr_alt_name.attr,
1188 &dev_attr_force_raw.attr,
1189 &dev_attr_sector_size.attr,
1190 &dev_attr_dpa_extents.attr,
1191 NULL,
1192};
1193
1194static umode_t namespace_visible(struct kobject *kobj,
1195 struct attribute *a, int n)
1196{
1197 struct device *dev = container_of(kobj, struct device, kobj);
1198
1199 if (a == &dev_attr_resource.attr) {
1200 if (is_namespace_blk(dev))
1201 return 0;
1202 return a->mode;
1203 }
1204
1205 if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
1206 if (a == &dev_attr_size.attr)
1207 return S_IWUSR | S_IRUGO;
1208
1209 if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
1210 return 0;
1211
1212 return a->mode;
1213 }
1214
1215 if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
1216 || a == &dev_attr_holder.attr
1217 || a == &dev_attr_force_raw.attr)
1218 return a->mode;
1219
1220 return 0;
1221}
1222
1223static struct attribute_group nd_namespace_attribute_group = {
1224 .attrs = nd_namespace_attributes,
1225 .is_visible = namespace_visible,
1226};
1227
1228static const struct attribute_group *nd_namespace_attribute_groups[] = {
1229 &nd_device_attribute_group,
1230 &nd_namespace_attribute_group,
1231 &nd_numa_attribute_group,
1232 NULL,
1233};
1234
1235struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
1236{
1237 struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
1238 struct nd_namespace_common *ndns;
1239 resource_size_t size;
1240
1241 if (nd_btt) {
1242 ndns = nd_btt->ndns;
1243 if (!ndns)
1244 return ERR_PTR(-ENODEV);
1245
1246 /*
1247 * Flush any in-progess probes / removals in the driver
1248 * for the raw personality of this namespace.
1249 */
1250 device_lock(&ndns->dev);
1251 device_unlock(&ndns->dev);
1252 if (ndns->dev.driver) {
1253 dev_dbg(&ndns->dev, "is active, can't bind %s\n",
1254 dev_name(&nd_btt->dev));
1255 return ERR_PTR(-EBUSY);
1256 }
1257 if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev,
1258 "host (%s) vs claim (%s) mismatch\n",
1259 dev_name(&nd_btt->dev),
1260 dev_name(ndns->claim)))
1261 return ERR_PTR(-ENXIO);
1262 } else {
1263 ndns = to_ndns(dev);
1264 if (ndns->claim) {
1265 dev_dbg(dev, "claimed by %s, failing probe\n",
1266 dev_name(ndns->claim));
1267
1268 return ERR_PTR(-ENXIO);
1269 }
1270 }
1271
1272 size = nvdimm_namespace_capacity(ndns);
1273 if (size < ND_MIN_NAMESPACE_SIZE) {
1274 dev_dbg(&ndns->dev, "%pa, too small must be at least %#x\n",
1275 &size, ND_MIN_NAMESPACE_SIZE);
1276 return ERR_PTR(-ENODEV);
1277 }
1278
1279 if (is_namespace_pmem(&ndns->dev)) {
1280 struct nd_namespace_pmem *nspm;
1281
1282 nspm = to_nd_namespace_pmem(&ndns->dev);
1283 if (!nspm->uuid) {
1284 dev_dbg(&ndns->dev, "%s: uuid not set\n", __func__);
1285 return ERR_PTR(-ENODEV);
1286 }
1287 } else if (is_namespace_blk(&ndns->dev)) {
1288 struct nd_namespace_blk *nsblk;
1289
1290 nsblk = to_nd_namespace_blk(&ndns->dev);
1291 if (!nd_namespace_blk_validate(nsblk))
1292 return ERR_PTR(-ENODEV);
1293 }
1294
1295 return ndns;
1296}
1297EXPORT_SYMBOL(nvdimm_namespace_common_probe);
1298
1299static struct device **create_namespace_io(struct nd_region *nd_region)
1300{
1301 struct nd_namespace_io *nsio;
1302 struct device *dev, **devs;
1303 struct resource *res;
1304
1305 nsio = kzalloc(sizeof(*nsio), GFP_KERNEL);
1306 if (!nsio)
1307 return NULL;
1308
1309 devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
1310 if (!devs) {
1311 kfree(nsio);
1312 return NULL;
1313 }
1314
1315 dev = &nsio->common.dev;
1316 dev->type = &namespace_io_device_type;
1317 dev->parent = &nd_region->dev;
1318 res = &nsio->res;
1319 res->name = dev_name(&nd_region->dev);
1320 res->flags = IORESOURCE_MEM;
1321 res->start = nd_region->ndr_start;
1322 res->end = res->start + nd_region->ndr_size - 1;
1323
1324 devs[0] = dev;
1325 return devs;
1326}
1327
1328static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
1329 u64 cookie, u16 pos)
1330{
1331 struct nd_namespace_label *found = NULL;
1332 int i;
1333
1334 for (i = 0; i < nd_region->ndr_mappings; i++) {
1335 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1336 struct nd_namespace_label *nd_label;
1337 bool found_uuid = false;
1338 int l;
1339
1340 for_each_label(l, nd_label, nd_mapping->labels) {
1341 u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
1342 u16 position = __le16_to_cpu(nd_label->position);
1343 u16 nlabel = __le16_to_cpu(nd_label->nlabel);
1344
1345 if (isetcookie != cookie)
1346 continue;
1347
1348 if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
1349 continue;
1350
1351 if (found_uuid) {
1352 dev_dbg(to_ndd(nd_mapping)->dev,
1353 "%s duplicate entry for uuid\n",
1354 __func__);
1355 return false;
1356 }
1357 found_uuid = true;
1358 if (nlabel != nd_region->ndr_mappings)
1359 continue;
1360 if (position != pos)
1361 continue;
1362 found = nd_label;
1363 break;
1364 }
1365 if (found)
1366 break;
1367 }
1368 return found != NULL;
1369}
1370
1371static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
1372{
1373 struct nd_namespace_label *select = NULL;
1374 int i;
1375
1376 if (!pmem_id)
1377 return -ENODEV;
1378
1379 for (i = 0; i < nd_region->ndr_mappings; i++) {
1380 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1381 struct nd_namespace_label *nd_label;
1382 u64 hw_start, hw_end, pmem_start, pmem_end;
1383 int l;
1384
1385 for_each_label(l, nd_label, nd_mapping->labels)
1386 if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
1387 break;
1388
1389 if (!nd_label) {
1390 WARN_ON(1);
1391 return -EINVAL;
1392 }
1393
1394 select = nd_label;
1395 /*
1396 * Check that this label is compliant with the dpa
1397 * range published in NFIT
1398 */
1399 hw_start = nd_mapping->start;
1400 hw_end = hw_start + nd_mapping->size;
1401 pmem_start = __le64_to_cpu(select->dpa);
1402 pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
1403 if (pmem_start == hw_start && pmem_end <= hw_end)
1404 /* pass */;
1405 else
1406 return -EINVAL;
1407
1408 nd_mapping->labels[0] = select;
1409 nd_mapping->labels[1] = NULL;
1410 }
1411 return 0;
1412}
1413
1414/**
1415 * find_pmem_label_set - validate interleave set labelling, retrieve label0
1416 * @nd_region: region with mappings to validate
1417 */
1418static int find_pmem_label_set(struct nd_region *nd_region,
1419 struct nd_namespace_pmem *nspm)
1420{
1421 u64 cookie = nd_region_interleave_set_cookie(nd_region);
1422 struct nd_namespace_label *nd_label;
1423 u8 select_id[NSLABEL_UUID_LEN];
1424 resource_size_t size = 0;
1425 u8 *pmem_id = NULL;
1426 int rc = -ENODEV, l;
1427 u16 i;
1428
1429 if (cookie == 0)
1430 return -ENXIO;
1431
1432 /*
1433 * Find a complete set of labels by uuid. By definition we can start
1434 * with any mapping as the reference label
1435 */
1436 for_each_label(l, nd_label, nd_region->mapping[0].labels) {
1437 u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
1438
1439 if (isetcookie != cookie)
1440 continue;
1441
1442 for (i = 0; nd_region->ndr_mappings; i++)
1443 if (!has_uuid_at_pos(nd_region, nd_label->uuid,
1444 cookie, i))
1445 break;
1446 if (i < nd_region->ndr_mappings) {
1447 /*
1448 * Give up if we don't find an instance of a
1449 * uuid at each position (from 0 to
1450 * nd_region->ndr_mappings - 1), or if we find a
1451 * dimm with two instances of the same uuid.
1452 */
1453 rc = -EINVAL;
1454 goto err;
1455 } else if (pmem_id) {
1456 /*
1457 * If there is more than one valid uuid set, we
1458 * need userspace to clean this up.
1459 */
1460 rc = -EBUSY;
1461 goto err;
1462 }
1463 memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
1464 pmem_id = select_id;
1465 }
1466
1467 /*
1468 * Fix up each mapping's 'labels' to have the validated pmem label for
1469 * that position at labels[0], and NULL at labels[1]. In the process,
1470 * check that the namespace aligns with interleave-set. We know
1471 * that it does not overlap with any blk namespaces by virtue of
1472 * the dimm being enabled (i.e. nd_label_reserve_dpa()
1473 * succeeded).
1474 */
1475 rc = select_pmem_id(nd_region, pmem_id);
1476 if (rc)
1477 goto err;
1478
1479 /* Calculate total size and populate namespace properties from label0 */
1480 for (i = 0; i < nd_region->ndr_mappings; i++) {
1481 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1482 struct nd_namespace_label *label0 = nd_mapping->labels[0];
1483
1484 size += __le64_to_cpu(label0->rawsize);
1485 if (__le16_to_cpu(label0->position) != 0)
1486 continue;
1487 WARN_ON(nspm->alt_name || nspm->uuid);
1488 nspm->alt_name = kmemdup((void __force *) label0->name,
1489 NSLABEL_NAME_LEN, GFP_KERNEL);
1490 nspm->uuid = kmemdup((void __force *) label0->uuid,
1491 NSLABEL_UUID_LEN, GFP_KERNEL);
1492 }
1493
1494 if (!nspm->alt_name || !nspm->uuid) {
1495 rc = -ENOMEM;
1496 goto err;
1497 }
1498
1499 nd_namespace_pmem_set_size(nd_region, nspm, size);
1500
1501 return 0;
1502 err:
1503 switch (rc) {
1504 case -EINVAL:
1505 dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
1506 break;
1507 case -ENODEV:
1508 dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
1509 break;
1510 default:
1511 dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
1512 __func__, rc);
1513 break;
1514 }
1515 return rc;
1516}
1517
1518static struct device **create_namespace_pmem(struct nd_region *nd_region)
1519{
1520 struct nd_namespace_pmem *nspm;
1521 struct device *dev, **devs;
1522 struct resource *res;
1523 int rc;
1524
1525 nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
1526 if (!nspm)
1527 return NULL;
1528
1529 dev = &nspm->nsio.common.dev;
1530 dev->type = &namespace_pmem_device_type;
1531 dev->parent = &nd_region->dev;
1532 res = &nspm->nsio.res;
1533 res->name = dev_name(&nd_region->dev);
1534 res->flags = IORESOURCE_MEM;
1535 rc = find_pmem_label_set(nd_region, nspm);
1536 if (rc == -ENODEV) {
1537 int i;
1538
1539 /* Pass, try to permit namespace creation... */
1540 for (i = 0; i < nd_region->ndr_mappings; i++) {
1541 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1542
1543 kfree(nd_mapping->labels);
1544 nd_mapping->labels = NULL;
1545 }
1546
1547 /* Publish a zero-sized namespace for userspace to configure. */
1548 nd_namespace_pmem_set_size(nd_region, nspm, 0);
1549
1550 rc = 0;
1551 } else if (rc)
1552 goto err;
1553
1554 devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
1555 if (!devs)
1556 goto err;
1557
1558 devs[0] = dev;
1559 return devs;
1560
1561 err:
1562 namespace_pmem_release(&nspm->nsio.common.dev);
1563 return NULL;
1564}
1565
1566struct resource *nsblk_add_resource(struct nd_region *nd_region,
1567 struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
1568 resource_size_t start)
1569{
1570 struct nd_label_id label_id;
1571 struct resource *res;
1572
1573 nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
1574 res = krealloc(nsblk->res,
1575 sizeof(void *) * (nsblk->num_resources + 1),
1576 GFP_KERNEL);
1577 if (!res)
1578 return NULL;
1579 nsblk->res = (struct resource **) res;
1580 for_each_dpa_resource(ndd, res)
1581 if (strcmp(res->name, label_id.id) == 0
1582 && res->start == start) {
1583 nsblk->res[nsblk->num_resources++] = res;
1584 return res;
1585 }
1586 return NULL;
1587}
1588
1589static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
1590{
1591 struct nd_namespace_blk *nsblk;
1592 struct device *dev;
1593
1594 if (!is_nd_blk(&nd_region->dev))
1595 return NULL;
1596
1597 nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
1598 if (!nsblk)
1599 return NULL;
1600
1601 dev = &nsblk->common.dev;
1602 dev->type = &namespace_blk_device_type;
1603 nsblk->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
1604 if (nsblk->id < 0) {
1605 kfree(nsblk);
1606 return NULL;
1607 }
1608 dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id);
1609 dev->parent = &nd_region->dev;
1610 dev->groups = nd_namespace_attribute_groups;
1611
1612 return &nsblk->common.dev;
1613}
1614
1615void nd_region_create_blk_seed(struct nd_region *nd_region)
1616{
1617 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
1618 nd_region->ns_seed = nd_namespace_blk_create(nd_region);
1619 /*
1620 * Seed creation failures are not fatal, provisioning is simply
1621 * disabled until memory becomes available
1622 */
1623 if (!nd_region->ns_seed)
1624 dev_err(&nd_region->dev, "failed to create blk namespace\n");
1625 else
1626 nd_device_register(nd_region->ns_seed);
1627}
1628
1629void nd_region_create_btt_seed(struct nd_region *nd_region)
1630{
1631 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
1632 nd_region->btt_seed = nd_btt_create(nd_region);
1633 /*
1634 * Seed creation failures are not fatal, provisioning is simply
1635 * disabled until memory becomes available
1636 */
1637 if (!nd_region->btt_seed)
1638 dev_err(&nd_region->dev, "failed to create btt namespace\n");
1639}
1640
1641static struct device **create_namespace_blk(struct nd_region *nd_region)
1642{
1643 struct nd_mapping *nd_mapping = &nd_region->mapping[0];
1644 struct nd_namespace_label *nd_label;
1645 struct device *dev, **devs = NULL;
1646 struct nd_namespace_blk *nsblk;
1647 struct nvdimm_drvdata *ndd;
1648 int i, l, count = 0;
1649 struct resource *res;
1650
1651 if (nd_region->ndr_mappings == 0)
1652 return NULL;
1653
1654 ndd = to_ndd(nd_mapping);
1655 for_each_label(l, nd_label, nd_mapping->labels) {
1656 u32 flags = __le32_to_cpu(nd_label->flags);
1657 char *name[NSLABEL_NAME_LEN];
1658 struct device **__devs;
1659
1660 if (flags & NSLABEL_FLAG_LOCAL)
1661 /* pass */;
1662 else
1663 continue;
1664
1665 for (i = 0; i < count; i++) {
1666 nsblk = to_nd_namespace_blk(devs[i]);
1667 if (memcmp(nsblk->uuid, nd_label->uuid,
1668 NSLABEL_UUID_LEN) == 0) {
1669 res = nsblk_add_resource(nd_region, ndd, nsblk,
1670 __le64_to_cpu(nd_label->dpa));
1671 if (!res)
1672 goto err;
1673 nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
1674 dev_name(&nsblk->common.dev));
1675 break;
1676 }
1677 }
1678 if (i < count)
1679 continue;
1680 __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
1681 if (!__devs)
1682 goto err;
1683 memcpy(__devs, devs, sizeof(dev) * count);
1684 kfree(devs);
1685 devs = __devs;
1686
1687 nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
1688 if (!nsblk)
1689 goto err;
1690 dev = &nsblk->common.dev;
1691 dev->type = &namespace_blk_device_type;
1692 dev->parent = &nd_region->dev;
1693 dev_set_name(dev, "namespace%d.%d", nd_region->id, count);
1694 devs[count++] = dev;
1695 nsblk->id = -1;
1696 nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
1697 nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
1698 GFP_KERNEL);
1699 if (!nsblk->uuid)
1700 goto err;
1701 memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
1702 if (name[0])
1703 nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
1704 GFP_KERNEL);
1705 res = nsblk_add_resource(nd_region, ndd, nsblk,
1706 __le64_to_cpu(nd_label->dpa));
1707 if (!res)
1708 goto err;
1709 nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
1710 dev_name(&nsblk->common.dev));
1711 }
1712
1713 dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n",
1714 __func__, count, count == 1 ? "" : "s");
1715
1716 if (count == 0) {
1717 /* Publish a zero-sized namespace for userspace to configure. */
1718 for (i = 0; i < nd_region->ndr_mappings; i++) {
1719 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1720
1721 kfree(nd_mapping->labels);
1722 nd_mapping->labels = NULL;
1723 }
1724
1725 devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
1726 if (!devs)
1727 goto err;
1728 nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
1729 if (!nsblk)
1730 goto err;
1731 dev = &nsblk->common.dev;
1732 dev->type = &namespace_blk_device_type;
1733 dev->parent = &nd_region->dev;
1734 devs[count++] = dev;
1735 }
1736
1737 return devs;
1738
1739err:
1740 for (i = 0; i < count; i++) {
1741 nsblk = to_nd_namespace_blk(devs[i]);
1742 namespace_blk_release(&nsblk->common.dev);
1743 }
1744 kfree(devs);
1745 return NULL;
1746}
1747
1748static int init_active_labels(struct nd_region *nd_region)
1749{
1750 int i;
1751
1752 for (i = 0; i < nd_region->ndr_mappings; i++) {
1753 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1754 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
1755 struct nvdimm *nvdimm = nd_mapping->nvdimm;
1756 int count, j;
1757
1758 /*
1759 * If the dimm is disabled then prevent the region from
1760 * being activated if it aliases DPA.
1761 */
1762 if (!ndd) {
1763 if ((nvdimm->flags & NDD_ALIASING) == 0)
1764 return 0;
1765 dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n",
1766 dev_name(&nd_mapping->nvdimm->dev));
1767 return -ENXIO;
1768 }
1769 nd_mapping->ndd = ndd;
1770 atomic_inc(&nvdimm->busy);
1771 get_ndd(ndd);
1772
1773 count = nd_label_active_count(ndd);
1774 dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
1775 if (!count)
1776 continue;
1777 nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
1778 GFP_KERNEL);
1779 if (!nd_mapping->labels)
1780 return -ENOMEM;
1781 for (j = 0; j < count; j++) {
1782 struct nd_namespace_label *label;
1783
1784 label = nd_label_active(ndd, j);
1785 nd_mapping->labels[j] = label;
1786 }
1787 }
1788
1789 return 0;
1790}
1791
1792int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
1793{
1794 struct device **devs = NULL;
1795 int i, rc = 0, type;
1796
1797 *err = 0;
1798 nvdimm_bus_lock(&nd_region->dev);
1799 rc = init_active_labels(nd_region);
1800 if (rc) {
1801 nvdimm_bus_unlock(&nd_region->dev);
1802 return rc;
1803 }
1804
1805 type = nd_region_to_nstype(nd_region);
1806 switch (type) {
1807 case ND_DEVICE_NAMESPACE_IO:
1808 devs = create_namespace_io(nd_region);
1809 break;
1810 case ND_DEVICE_NAMESPACE_PMEM:
1811 devs = create_namespace_pmem(nd_region);
1812 break;
1813 case ND_DEVICE_NAMESPACE_BLK:
1814 devs = create_namespace_blk(nd_region);
1815 break;
1816 default:
1817 break;
1818 }
1819 nvdimm_bus_unlock(&nd_region->dev);
1820
1821 if (!devs)
1822 return -ENODEV;
1823
1824 for (i = 0; devs[i]; i++) {
1825 struct device *dev = devs[i];
1826 int id;
1827
1828 if (type == ND_DEVICE_NAMESPACE_BLK) {
1829 struct nd_namespace_blk *nsblk;
1830
1831 nsblk = to_nd_namespace_blk(dev);
1832 id = ida_simple_get(&nd_region->ns_ida, 0, 0,
1833 GFP_KERNEL);
1834 nsblk->id = id;
1835 } else
1836 id = i;
1837
1838 if (id < 0)
1839 break;
1840 dev_set_name(dev, "namespace%d.%d", nd_region->id, id);
1841 dev->groups = nd_namespace_attribute_groups;
1842 nd_device_register(dev);
1843 }
1844 if (i)
1845 nd_region->ns_seed = devs[0];
1846
1847 if (devs[i]) {
1848 int j;
1849
1850 for (j = i; devs[j]; j++) {
1851 struct device *dev = devs[j];
1852
1853 device_initialize(dev);
1854 put_device(dev);
1855 }
1856 *err = j - i;
1857 /*
1858 * All of the namespaces we tried to register failed, so
1859 * fail region activation.
1860 */
1861 if (*err == 0)
1862 rc = -ENODEV;
1863 }
1864 kfree(devs);
1865
1866 if (rc == -ENODEV)
1867 return rc;
1868
1869 return i;
1870}
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
new file mode 100644
index 000000000000..e1970c71ad1c
--- /dev/null
+++ b/drivers/nvdimm/nd-core.h
@@ -0,0 +1,83 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __ND_CORE_H__
14#define __ND_CORE_H__
15#include <linux/libnvdimm.h>
16#include <linux/device.h>
17#include <linux/libnvdimm.h>
18#include <linux/sizes.h>
19#include <linux/mutex.h>
20#include <linux/nd.h>
21
22extern struct list_head nvdimm_bus_list;
23extern struct mutex nvdimm_bus_list_mutex;
24extern int nvdimm_major;
25
26struct nvdimm_bus {
27 struct nvdimm_bus_descriptor *nd_desc;
28 wait_queue_head_t probe_wait;
29 struct module *module;
30 struct list_head list;
31 struct device dev;
32 int id, probe_active;
33 struct mutex reconfig_mutex;
34};
35
36struct nvdimm {
37 unsigned long flags;
38 void *provider_data;
39 unsigned long *dsm_mask;
40 struct device dev;
41 atomic_t busy;
42 int id;
43};
44
45bool is_nvdimm(struct device *dev);
46bool is_nd_pmem(struct device *dev);
47bool is_nd_blk(struct device *dev);
48struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
49int __init nvdimm_bus_init(void);
50void nvdimm_bus_exit(void);
51void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
52struct nd_region;
53void nd_region_create_blk_seed(struct nd_region *nd_region);
54void nd_region_create_btt_seed(struct nd_region *nd_region);
55void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
56int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
57void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
58void nd_synchronize(void);
59int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
60int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
61int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
62void __nd_device_register(struct device *dev);
63int nd_match_dimm(struct device *dev, void *data);
64struct nd_label_id;
65char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
66bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
67struct nd_region;
68struct nvdimm_drvdata;
69struct nd_mapping;
70resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
71 struct nd_mapping *nd_mapping, resource_size_t *overlap);
72resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
73resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
74resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
75 struct nd_label_id *label_id);
76struct nd_mapping;
77struct resource *nsblk_add_resource(struct nd_region *nd_region,
78 struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
79 resource_size_t start);
80int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
81void get_ndd(struct nvdimm_drvdata *ndd);
82resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
83#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
new file mode 100644
index 000000000000..c41f53e74277
--- /dev/null
+++ b/drivers/nvdimm/nd.h
@@ -0,0 +1,220 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#ifndef __ND_H__
14#define __ND_H__
15#include <linux/libnvdimm.h>
16#include <linux/blkdev.h>
17#include <linux/device.h>
18#include <linux/mutex.h>
19#include <linux/ndctl.h>
20#include <linux/types.h>
21#include "label.h"
22
23enum {
24 /*
25 * Limits the maximum number of block apertures a dimm can
26 * support and is an input to the geometry/on-disk-format of a
27 * BTT instance
28 */
29 ND_MAX_LANES = 256,
30 SECTOR_SHIFT = 9,
31 INT_LBASIZE_ALIGNMENT = 64,
32};
33
34struct nvdimm_drvdata {
35 struct device *dev;
36 int nsindex_size;
37 struct nd_cmd_get_config_size nsarea;
38 void *data;
39 int ns_current, ns_next;
40 struct resource dpa;
41 struct kref kref;
42};
43
44struct nd_region_namespaces {
45 int count;
46 int active;
47};
48
49static inline struct nd_namespace_index *to_namespace_index(
50 struct nvdimm_drvdata *ndd, int i)
51{
52 if (i < 0)
53 return NULL;
54
55 return ndd->data + sizeof_namespace_index(ndd) * i;
56}
57
58static inline struct nd_namespace_index *to_current_namespace_index(
59 struct nvdimm_drvdata *ndd)
60{
61 return to_namespace_index(ndd, ndd->ns_current);
62}
63
64static inline struct nd_namespace_index *to_next_namespace_index(
65 struct nvdimm_drvdata *ndd)
66{
67 return to_namespace_index(ndd, ndd->ns_next);
68}
69
70#define nd_dbg_dpa(r, d, res, fmt, arg...) \
71 dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
72 (r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
73 (unsigned long long) (res ? resource_size(res) : 0), \
74 (unsigned long long) (res ? res->start : 0), ##arg)
75
76#define for_each_label(l, label, labels) \
77 for (l = 0; (label = labels ? labels[l] : NULL); l++)
78
79#define for_each_dpa_resource(ndd, res) \
80 for (res = (ndd)->dpa.child; res; res = res->sibling)
81
82#define for_each_dpa_resource_safe(ndd, res, next) \
83 for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
84 res; res = next, next = next ? next->sibling : NULL)
85
86struct nd_percpu_lane {
87 int count;
88 spinlock_t lock;
89};
90
91struct nd_region {
92 struct device dev;
93 struct ida ns_ida;
94 struct ida btt_ida;
95 struct device *ns_seed;
96 struct device *btt_seed;
97 u16 ndr_mappings;
98 u64 ndr_size;
99 u64 ndr_start;
100 int id, num_lanes, ro, numa_node;
101 void *provider_data;
102 struct nd_interleave_set *nd_set;
103 struct nd_percpu_lane __percpu *lane;
104 struct nd_mapping mapping[0];
105};
106
107struct nd_blk_region {
108 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
109 void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
110 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
111 void *iobuf, u64 len, int rw);
112 void *blk_provider_data;
113 struct nd_region nd_region;
114};
115
116/*
117 * Lookup next in the repeating sequence of 01, 10, and 11.
118 */
119static inline unsigned nd_inc_seq(unsigned seq)
120{
121 static const unsigned next[] = { 0, 2, 3, 1 };
122
123 return next[seq & 3];
124}
125
126struct btt;
127struct nd_btt {
128 struct device dev;
129 struct nd_namespace_common *ndns;
130 struct btt *btt;
131 unsigned long lbasize;
132 u8 *uuid;
133 int id;
134};
135
136enum nd_async_mode {
137 ND_SYNC,
138 ND_ASYNC,
139};
140
141int nd_integrity_init(struct gendisk *disk, unsigned long meta_size);
142void wait_nvdimm_bus_probe_idle(struct device *dev);
143void nd_device_register(struct device *dev);
144void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
145int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
146 size_t len);
147ssize_t nd_sector_size_show(unsigned long current_lbasize,
148 const unsigned long *supported, char *buf);
149ssize_t nd_sector_size_store(struct device *dev, const char *buf,
150 unsigned long *current_lbasize, const unsigned long *supported);
151int __init nvdimm_init(void);
152int __init nd_region_init(void);
153void nvdimm_exit(void);
154void nd_region_exit(void);
155struct nvdimm;
156struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
157int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
158int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
159int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
160 void *buf, size_t len);
161struct nd_btt *to_nd_btt(struct device *dev);
162struct btt_sb;
163u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
164#if IS_ENABLED(CONFIG_BTT)
165int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
166bool is_nd_btt(struct device *dev);
167struct device *nd_btt_create(struct nd_region *nd_region);
168#else
169static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
170{
171 return -ENODEV;
172}
173
174static inline bool is_nd_btt(struct device *dev)
175{
176 return false;
177}
178
179static inline struct device *nd_btt_create(struct nd_region *nd_region)
180{
181 return NULL;
182}
183
184#endif
185struct nd_region *to_nd_region(struct device *dev);
186int nd_region_to_nstype(struct nd_region *nd_region);
187int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
188u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
189void nvdimm_bus_lock(struct device *dev);
190void nvdimm_bus_unlock(struct device *dev);
191bool is_nvdimm_bus_locked(struct device *dev);
192int nvdimm_revalidate_disk(struct gendisk *disk);
193void nvdimm_drvdata_release(struct kref *kref);
194void put_ndd(struct nvdimm_drvdata *ndd);
195int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
196void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
197struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
198 struct nd_label_id *label_id, resource_size_t start,
199 resource_size_t n);
200resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
201struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev);
202int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
203int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns);
204const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
205 char *name);
206int nd_blk_region_init(struct nd_region *nd_region);
207void __nd_iostat_start(struct bio *bio, unsigned long *start);
208static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
209{
210 struct gendisk *disk = bio->bi_bdev->bd_disk;
211
212 if (!blk_queue_io_stat(disk->queue))
213 return false;
214
215 __nd_iostat_start(bio, start);
216 return true;
217}
218void nd_iostat_end(struct bio *bio, unsigned long start);
219resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
220#endif /* __ND_H__ */
diff --git a/drivers/block/pmem.c b/drivers/nvdimm/pmem.c
index 095dfaadcaa5..ade9eb917a4d 100644
--- a/drivers/block/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Persistent Memory Driver 2 * Persistent Memory Driver
3 * 3 *
4 * Copyright (c) 2014, Intel Corporation. 4 * Copyright (c) 2014-2015, Intel Corporation.
5 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>. 5 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
6 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. 6 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
7 * 7 *
@@ -23,8 +23,9 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/moduleparam.h> 24#include <linux/moduleparam.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26 26#include <linux/pmem.h>
27#define PMEM_MINORS 16 27#include <linux/nd.h>
28#include "nd.h"
28 29
29struct pmem_device { 30struct pmem_device {
30 struct request_queue *pmem_queue; 31 struct request_queue *pmem_queue;
@@ -32,12 +33,11 @@ struct pmem_device {
32 33
33 /* One contiguous memory region per device */ 34 /* One contiguous memory region per device */
34 phys_addr_t phys_addr; 35 phys_addr_t phys_addr;
35 void *virt_addr; 36 void __pmem *virt_addr;
36 size_t size; 37 size_t size;
37}; 38};
38 39
39static int pmem_major; 40static int pmem_major;
40static atomic_t pmem_index;
41 41
42static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, 42static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
43 unsigned int len, unsigned int off, int rw, 43 unsigned int len, unsigned int off, int rw,
@@ -45,13 +45,14 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
45{ 45{
46 void *mem = kmap_atomic(page); 46 void *mem = kmap_atomic(page);
47 size_t pmem_off = sector << 9; 47 size_t pmem_off = sector << 9;
48 void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
48 49
49 if (rw == READ) { 50 if (rw == READ) {
50 memcpy(mem + off, pmem->virt_addr + pmem_off, len); 51 memcpy_from_pmem(mem + off, pmem_addr, len);
51 flush_dcache_page(page); 52 flush_dcache_page(page);
52 } else { 53 } else {
53 flush_dcache_page(page); 54 flush_dcache_page(page);
54 memcpy(pmem->virt_addr + pmem_off, mem + off, len); 55 memcpy_to_pmem(pmem_addr, mem + off, len);
55 } 56 }
56 57
57 kunmap_atomic(mem); 58 kunmap_atomic(mem);
@@ -59,31 +60,24 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
59 60
60static void pmem_make_request(struct request_queue *q, struct bio *bio) 61static void pmem_make_request(struct request_queue *q, struct bio *bio)
61{ 62{
62 struct block_device *bdev = bio->bi_bdev; 63 bool do_acct;
63 struct pmem_device *pmem = bdev->bd_disk->private_data; 64 unsigned long start;
64 int rw;
65 struct bio_vec bvec; 65 struct bio_vec bvec;
66 sector_t sector;
67 struct bvec_iter iter; 66 struct bvec_iter iter;
68 int err = 0; 67 struct block_device *bdev = bio->bi_bdev;
69 68 struct pmem_device *pmem = bdev->bd_disk->private_data;
70 if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
71 err = -EIO;
72 goto out;
73 }
74
75 BUG_ON(bio->bi_rw & REQ_DISCARD);
76 69
77 rw = bio_data_dir(bio); 70 do_acct = nd_iostat_start(bio, &start);
78 sector = bio->bi_iter.bi_sector; 71 bio_for_each_segment(bvec, bio, iter)
79 bio_for_each_segment(bvec, bio, iter) {
80 pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset, 72 pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
81 rw, sector); 73 bio_data_dir(bio), iter.bi_sector);
82 sector += bvec.bv_len >> 9; 74 if (do_acct)
83 } 75 nd_iostat_end(bio, start);
84 76
85out: 77 if (bio_data_dir(bio))
86 bio_endio(bio, err); 78 wmb_pmem();
79
80 bio_endio(bio, 0);
87} 81}
88 82
89static int pmem_rw_page(struct block_device *bdev, sector_t sector, 83static int pmem_rw_page(struct block_device *bdev, sector_t sector,
@@ -106,7 +100,8 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
106 if (!pmem) 100 if (!pmem)
107 return -ENODEV; 101 return -ENODEV;
108 102
109 *kaddr = pmem->virt_addr + offset; 103 /* FIXME convert DAX to comprehend that this mapping has a lifetime */
104 *kaddr = (void __force *) pmem->virt_addr + offset;
110 *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; 105 *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
111 106
112 return pmem->size - offset; 107 return pmem->size - offset;
@@ -116,124 +111,165 @@ static const struct block_device_operations pmem_fops = {
116 .owner = THIS_MODULE, 111 .owner = THIS_MODULE,
117 .rw_page = pmem_rw_page, 112 .rw_page = pmem_rw_page,
118 .direct_access = pmem_direct_access, 113 .direct_access = pmem_direct_access,
114 .revalidate_disk = nvdimm_revalidate_disk,
119}; 115};
120 116
121static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res) 117static struct pmem_device *pmem_alloc(struct device *dev,
118 struct resource *res, int id)
122{ 119{
123 struct pmem_device *pmem; 120 struct pmem_device *pmem;
124 struct gendisk *disk;
125 int idx, err;
126 121
127 err = -ENOMEM;
128 pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); 122 pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
129 if (!pmem) 123 if (!pmem)
130 goto out; 124 return ERR_PTR(-ENOMEM);
131 125
132 pmem->phys_addr = res->start; 126 pmem->phys_addr = res->start;
133 pmem->size = resource_size(res); 127 pmem->size = resource_size(res);
128 if (!arch_has_pmem_api())
129 dev_warn(dev, "unable to guarantee persistence of writes\n");
130
131 if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
132 dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
133 &pmem->phys_addr, pmem->size);
134 kfree(pmem);
135 return ERR_PTR(-EBUSY);
136 }
134 137
135 err = -EINVAL; 138 pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
136 if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) { 139 if (!pmem->virt_addr) {
137 dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); 140 release_mem_region(pmem->phys_addr, pmem->size);
138 goto out_free_dev; 141 kfree(pmem);
142 return ERR_PTR(-ENXIO);
139 } 143 }
140 144
141 /* 145 return pmem;
142 * Map the memory as write-through, as we can't write back the contents 146}
143 * of the CPU caches in case of a crash. 147
144 */ 148static void pmem_detach_disk(struct pmem_device *pmem)
145 err = -ENOMEM; 149{
146 pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size); 150 del_gendisk(pmem->pmem_disk);
147 if (!pmem->virt_addr) 151 put_disk(pmem->pmem_disk);
148 goto out_release_region; 152 blk_cleanup_queue(pmem->pmem_queue);
153}
154
155static int pmem_attach_disk(struct nd_namespace_common *ndns,
156 struct pmem_device *pmem)
157{
158 struct gendisk *disk;
149 159
150 pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL); 160 pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
151 if (!pmem->pmem_queue) 161 if (!pmem->pmem_queue)
152 goto out_unmap; 162 return -ENOMEM;
153 163
154 blk_queue_make_request(pmem->pmem_queue, pmem_make_request); 164 blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
155 blk_queue_max_hw_sectors(pmem->pmem_queue, 1024); 165 blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
156 blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); 166 blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
167 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
157 168
158 disk = alloc_disk(PMEM_MINORS); 169 disk = alloc_disk(0);
159 if (!disk) 170 if (!disk) {
160 goto out_free_queue; 171 blk_cleanup_queue(pmem->pmem_queue);
161 172 return -ENOMEM;
162 idx = atomic_inc_return(&pmem_index) - 1; 173 }
163 174
164 disk->major = pmem_major; 175 disk->major = pmem_major;
165 disk->first_minor = PMEM_MINORS * idx; 176 disk->first_minor = 0;
166 disk->fops = &pmem_fops; 177 disk->fops = &pmem_fops;
167 disk->private_data = pmem; 178 disk->private_data = pmem;
168 disk->queue = pmem->pmem_queue; 179 disk->queue = pmem->pmem_queue;
169 disk->flags = GENHD_FL_EXT_DEVT; 180 disk->flags = GENHD_FL_EXT_DEVT;
170 sprintf(disk->disk_name, "pmem%d", idx); 181 nvdimm_namespace_disk_name(ndns, disk->disk_name);
171 disk->driverfs_dev = dev; 182 disk->driverfs_dev = &ndns->dev;
172 set_capacity(disk, pmem->size >> 9); 183 set_capacity(disk, pmem->size >> 9);
173 pmem->pmem_disk = disk; 184 pmem->pmem_disk = disk;
174 185
175 add_disk(disk); 186 add_disk(disk);
187 revalidate_disk(disk);
176 188
177 return pmem; 189 return 0;
190}
178 191
179out_free_queue: 192static int pmem_rw_bytes(struct nd_namespace_common *ndns,
180 blk_cleanup_queue(pmem->pmem_queue); 193 resource_size_t offset, void *buf, size_t size, int rw)
181out_unmap: 194{
182 iounmap(pmem->virt_addr); 195 struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
183out_release_region: 196
184 release_mem_region(pmem->phys_addr, pmem->size); 197 if (unlikely(offset + size > pmem->size)) {
185out_free_dev: 198 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
186 kfree(pmem); 199 return -EFAULT;
187out: 200 }
188 return ERR_PTR(err); 201
202 if (rw == READ)
203 memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
204 else {
205 memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
206 wmb_pmem();
207 }
208
209 return 0;
189} 210}
190 211
191static void pmem_free(struct pmem_device *pmem) 212static void pmem_free(struct pmem_device *pmem)
192{ 213{
193 del_gendisk(pmem->pmem_disk); 214 memunmap_pmem(pmem->virt_addr);
194 put_disk(pmem->pmem_disk);
195 blk_cleanup_queue(pmem->pmem_queue);
196 iounmap(pmem->virt_addr);
197 release_mem_region(pmem->phys_addr, pmem->size); 215 release_mem_region(pmem->phys_addr, pmem->size);
198 kfree(pmem); 216 kfree(pmem);
199} 217}
200 218
201static int pmem_probe(struct platform_device *pdev) 219static int nd_pmem_probe(struct device *dev)
202{ 220{
221 struct nd_region *nd_region = to_nd_region(dev->parent);
222 struct nd_namespace_common *ndns;
223 struct nd_namespace_io *nsio;
203 struct pmem_device *pmem; 224 struct pmem_device *pmem;
204 struct resource *res; 225 int rc;
205
206 if (WARN_ON(pdev->num_resources > 1))
207 return -ENXIO;
208 226
209 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 227 ndns = nvdimm_namespace_common_probe(dev);
210 if (!res) 228 if (IS_ERR(ndns))
211 return -ENXIO; 229 return PTR_ERR(ndns);
212 230
213 pmem = pmem_alloc(&pdev->dev, res); 231 nsio = to_nd_namespace_io(&ndns->dev);
232 pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
214 if (IS_ERR(pmem)) 233 if (IS_ERR(pmem))
215 return PTR_ERR(pmem); 234 return PTR_ERR(pmem);
216 235
217 platform_set_drvdata(pdev, pmem); 236 dev_set_drvdata(dev, pmem);
218 237 ndns->rw_bytes = pmem_rw_bytes;
219 return 0; 238 if (is_nd_btt(dev))
239 rc = nvdimm_namespace_attach_btt(ndns);
240 else if (nd_btt_probe(ndns, pmem) == 0) {
241 /* we'll come back as btt-pmem */
242 rc = -ENXIO;
243 } else
244 rc = pmem_attach_disk(ndns, pmem);
245 if (rc)
246 pmem_free(pmem);
247 return rc;
220} 248}
221 249
222static int pmem_remove(struct platform_device *pdev) 250static int nd_pmem_remove(struct device *dev)
223{ 251{
224 struct pmem_device *pmem = platform_get_drvdata(pdev); 252 struct pmem_device *pmem = dev_get_drvdata(dev);
225 253
254 if (is_nd_btt(dev))
255 nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
256 else
257 pmem_detach_disk(pmem);
226 pmem_free(pmem); 258 pmem_free(pmem);
259
227 return 0; 260 return 0;
228} 261}
229 262
230static struct platform_driver pmem_driver = { 263MODULE_ALIAS("pmem");
231 .probe = pmem_probe, 264MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
232 .remove = pmem_remove, 265MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
233 .driver = { 266static struct nd_device_driver nd_pmem_driver = {
234 .owner = THIS_MODULE, 267 .probe = nd_pmem_probe,
235 .name = "pmem", 268 .remove = nd_pmem_remove,
269 .drv = {
270 .name = "nd_pmem",
236 }, 271 },
272 .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
237}; 273};
238 274
239static int __init pmem_init(void) 275static int __init pmem_init(void)
@@ -244,16 +280,19 @@ static int __init pmem_init(void)
244 if (pmem_major < 0) 280 if (pmem_major < 0)
245 return pmem_major; 281 return pmem_major;
246 282
247 error = platform_driver_register(&pmem_driver); 283 error = nd_driver_register(&nd_pmem_driver);
248 if (error) 284 if (error) {
249 unregister_blkdev(pmem_major, "pmem"); 285 unregister_blkdev(pmem_major, "pmem");
250 return error; 286 return error;
287 }
288
289 return 0;
251} 290}
252module_init(pmem_init); 291module_init(pmem_init);
253 292
254static void pmem_exit(void) 293static void pmem_exit(void)
255{ 294{
256 platform_driver_unregister(&pmem_driver); 295 driver_unregister(&nd_pmem_driver.drv);
257 unregister_blkdev(pmem_major, "pmem"); 296 unregister_blkdev(pmem_major, "pmem");
258} 297}
259module_exit(pmem_exit); 298module_exit(pmem_exit);
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
new file mode 100644
index 000000000000..f28f78ccff19
--- /dev/null
+++ b/drivers/nvdimm/region.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/cpumask.h>
14#include <linux/module.h>
15#include <linux/device.h>
16#include <linux/nd.h>
17#include "nd.h"
18
19static int nd_region_probe(struct device *dev)
20{
21 int err, rc;
22 static unsigned long once;
23 struct nd_region_namespaces *num_ns;
24 struct nd_region *nd_region = to_nd_region(dev);
25
26 if (nd_region->num_lanes > num_online_cpus()
27 && nd_region->num_lanes < num_possible_cpus()
28 && !test_and_set_bit(0, &once)) {
29 dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
30 num_online_cpus(), nd_region->num_lanes,
31 num_possible_cpus());
32 dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
33 nd_region->num_lanes);
34 }
35
36 rc = nd_blk_region_init(nd_region);
37 if (rc)
38 return rc;
39
40 rc = nd_region_register_namespaces(nd_region, &err);
41 num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
42 if (!num_ns)
43 return -ENOMEM;
44
45 if (rc < 0)
46 return rc;
47
48 num_ns->active = rc;
49 num_ns->count = rc + err;
50 dev_set_drvdata(dev, num_ns);
51
52 if (rc && err && rc == err)
53 return -ENODEV;
54
55 nd_region->btt_seed = nd_btt_create(nd_region);
56 if (err == 0)
57 return 0;
58
59 /*
60 * Given multiple namespaces per region, we do not want to
61 * disable all the successfully registered peer namespaces upon
62 * a single registration failure. If userspace is missing a
63 * namespace that it expects it can disable/re-enable the region
64 * to retry discovery after correcting the failure.
65 * <regionX>/namespaces returns the current
66 * "<async-registered>/<total>" namespace count.
67 */
68 dev_err(dev, "failed to register %d namespace%s, continuing...\n",
69 err, err == 1 ? "" : "s");
70 return 0;
71}
72
73static int child_unregister(struct device *dev, void *data)
74{
75 nd_device_unregister(dev, ND_SYNC);
76 return 0;
77}
78
79static int nd_region_remove(struct device *dev)
80{
81 struct nd_region *nd_region = to_nd_region(dev);
82
83 /* flush attribute readers and disable */
84 nvdimm_bus_lock(dev);
85 nd_region->ns_seed = NULL;
86 nd_region->btt_seed = NULL;
87 dev_set_drvdata(dev, NULL);
88 nvdimm_bus_unlock(dev);
89
90 device_for_each_child(dev, NULL, child_unregister);
91 return 0;
92}
93
94static struct nd_device_driver nd_region_driver = {
95 .probe = nd_region_probe,
96 .remove = nd_region_remove,
97 .drv = {
98 .name = "nd_region",
99 },
100 .type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM,
101};
102
103int __init nd_region_init(void)
104{
105 return nd_driver_register(&nd_region_driver);
106}
107
108void nd_region_exit(void)
109{
110 driver_unregister(&nd_region_driver.drv);
111}
112
113MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM);
114MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
new file mode 100644
index 000000000000..a5233422f9dc
--- /dev/null
+++ b/drivers/nvdimm/region_devs.c
@@ -0,0 +1,787 @@
1/*
2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/scatterlist.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/sort.h>
18#include <linux/io.h>
19#include <linux/nd.h>
20#include "nd-core.h"
21#include "nd.h"
22
23static DEFINE_IDA(region_ida);
24
25static void nd_region_release(struct device *dev)
26{
27 struct nd_region *nd_region = to_nd_region(dev);
28 u16 i;
29
30 for (i = 0; i < nd_region->ndr_mappings; i++) {
31 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
32 struct nvdimm *nvdimm = nd_mapping->nvdimm;
33
34 put_device(&nvdimm->dev);
35 }
36 free_percpu(nd_region->lane);
37 ida_simple_remove(&region_ida, nd_region->id);
38 if (is_nd_blk(dev))
39 kfree(to_nd_blk_region(dev));
40 else
41 kfree(nd_region);
42}
43
44static struct device_type nd_blk_device_type = {
45 .name = "nd_blk",
46 .release = nd_region_release,
47};
48
49static struct device_type nd_pmem_device_type = {
50 .name = "nd_pmem",
51 .release = nd_region_release,
52};
53
54static struct device_type nd_volatile_device_type = {
55 .name = "nd_volatile",
56 .release = nd_region_release,
57};
58
59bool is_nd_pmem(struct device *dev)
60{
61 return dev ? dev->type == &nd_pmem_device_type : false;
62}
63
64bool is_nd_blk(struct device *dev)
65{
66 return dev ? dev->type == &nd_blk_device_type : false;
67}
68
69struct nd_region *to_nd_region(struct device *dev)
70{
71 struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
72
73 WARN_ON(dev->type->release != nd_region_release);
74 return nd_region;
75}
76EXPORT_SYMBOL_GPL(to_nd_region);
77
78struct nd_blk_region *to_nd_blk_region(struct device *dev)
79{
80 struct nd_region *nd_region = to_nd_region(dev);
81
82 WARN_ON(!is_nd_blk(dev));
83 return container_of(nd_region, struct nd_blk_region, nd_region);
84}
85EXPORT_SYMBOL_GPL(to_nd_blk_region);
86
87void *nd_region_provider_data(struct nd_region *nd_region)
88{
89 return nd_region->provider_data;
90}
91EXPORT_SYMBOL_GPL(nd_region_provider_data);
92
93void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
94{
95 return ndbr->blk_provider_data;
96}
97EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);
98
99void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
100{
101 ndbr->blk_provider_data = data;
102}
103EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
104
105/**
106 * nd_region_to_nstype() - region to an integer namespace type
107 * @nd_region: region-device to interrogate
108 *
109 * This is the 'nstype' attribute of a region as well, an input to the
110 * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
111 * namespace devices with namespace drivers.
112 */
113int nd_region_to_nstype(struct nd_region *nd_region)
114{
115 if (is_nd_pmem(&nd_region->dev)) {
116 u16 i, alias;
117
118 for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
119 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
120 struct nvdimm *nvdimm = nd_mapping->nvdimm;
121
122 if (nvdimm->flags & NDD_ALIASING)
123 alias++;
124 }
125 if (alias)
126 return ND_DEVICE_NAMESPACE_PMEM;
127 else
128 return ND_DEVICE_NAMESPACE_IO;
129 } else if (is_nd_blk(&nd_region->dev)) {
130 return ND_DEVICE_NAMESPACE_BLK;
131 }
132
133 return 0;
134}
135EXPORT_SYMBOL(nd_region_to_nstype);
136
137static int is_uuid_busy(struct device *dev, void *data)
138{
139 struct nd_region *nd_region = to_nd_region(dev->parent);
140 u8 *uuid = data;
141
142 switch (nd_region_to_nstype(nd_region)) {
143 case ND_DEVICE_NAMESPACE_PMEM: {
144 struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
145
146 if (!nspm->uuid)
147 break;
148 if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0)
149 return -EBUSY;
150 break;
151 }
152 case ND_DEVICE_NAMESPACE_BLK: {
153 struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
154
155 if (!nsblk->uuid)
156 break;
157 if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) == 0)
158 return -EBUSY;
159 break;
160 }
161 default:
162 break;
163 }
164
165 return 0;
166}
167
168static int is_namespace_uuid_busy(struct device *dev, void *data)
169{
170 if (is_nd_pmem(dev) || is_nd_blk(dev))
171 return device_for_each_child(dev, data, is_uuid_busy);
172 return 0;
173}
174
175/**
176 * nd_is_uuid_unique - verify that no other namespace has @uuid
177 * @dev: any device on a nvdimm_bus
178 * @uuid: uuid to check
179 */
180bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
181{
182 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
183
184 if (!nvdimm_bus)
185 return false;
186 WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
187 if (device_for_each_child(&nvdimm_bus->dev, uuid,
188 is_namespace_uuid_busy) != 0)
189 return false;
190 return true;
191}
192
193static ssize_t size_show(struct device *dev,
194 struct device_attribute *attr, char *buf)
195{
196 struct nd_region *nd_region = to_nd_region(dev);
197 unsigned long long size = 0;
198
199 if (is_nd_pmem(dev)) {
200 size = nd_region->ndr_size;
201 } else if (nd_region->ndr_mappings == 1) {
202 struct nd_mapping *nd_mapping = &nd_region->mapping[0];
203
204 size = nd_mapping->size;
205 }
206
207 return sprintf(buf, "%llu\n", size);
208}
209static DEVICE_ATTR_RO(size);
210
211static ssize_t mappings_show(struct device *dev,
212 struct device_attribute *attr, char *buf)
213{
214 struct nd_region *nd_region = to_nd_region(dev);
215
216 return sprintf(buf, "%d\n", nd_region->ndr_mappings);
217}
218static DEVICE_ATTR_RO(mappings);
219
220static ssize_t nstype_show(struct device *dev,
221 struct device_attribute *attr, char *buf)
222{
223 struct nd_region *nd_region = to_nd_region(dev);
224
225 return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
226}
227static DEVICE_ATTR_RO(nstype);
228
229static ssize_t set_cookie_show(struct device *dev,
230 struct device_attribute *attr, char *buf)
231{
232 struct nd_region *nd_region = to_nd_region(dev);
233 struct nd_interleave_set *nd_set = nd_region->nd_set;
234
235 if (is_nd_pmem(dev) && nd_set)
236 /* pass, should be precluded by region_visible */;
237 else
238 return -ENXIO;
239
240 return sprintf(buf, "%#llx\n", nd_set->cookie);
241}
242static DEVICE_ATTR_RO(set_cookie);
243
244resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
245{
246 resource_size_t blk_max_overlap = 0, available, overlap;
247 int i;
248
249 WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
250
251 retry:
252 available = 0;
253 overlap = blk_max_overlap;
254 for (i = 0; i < nd_region->ndr_mappings; i++) {
255 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
256 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
257
258 /* if a dimm is disabled the available capacity is zero */
259 if (!ndd)
260 return 0;
261
262 if (is_nd_pmem(&nd_region->dev)) {
263 available += nd_pmem_available_dpa(nd_region,
264 nd_mapping, &overlap);
265 if (overlap > blk_max_overlap) {
266 blk_max_overlap = overlap;
267 goto retry;
268 }
269 } else if (is_nd_blk(&nd_region->dev)) {
270 available += nd_blk_available_dpa(nd_mapping);
271 }
272 }
273
274 return available;
275}
276
277static ssize_t available_size_show(struct device *dev,
278 struct device_attribute *attr, char *buf)
279{
280 struct nd_region *nd_region = to_nd_region(dev);
281 unsigned long long available = 0;
282
283 /*
284 * Flush in-flight updates and grab a snapshot of the available
285 * size. Of course, this value is potentially invalidated the
286 * memory nvdimm_bus_lock() is dropped, but that's userspace's
287 * problem to not race itself.
288 */
289 nvdimm_bus_lock(dev);
290 wait_nvdimm_bus_probe_idle(dev);
291 available = nd_region_available_dpa(nd_region);
292 nvdimm_bus_unlock(dev);
293
294 return sprintf(buf, "%llu\n", available);
295}
296static DEVICE_ATTR_RO(available_size);
297
298static ssize_t init_namespaces_show(struct device *dev,
299 struct device_attribute *attr, char *buf)
300{
301 struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
302 ssize_t rc;
303
304 nvdimm_bus_lock(dev);
305 if (num_ns)
306 rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
307 else
308 rc = -ENXIO;
309 nvdimm_bus_unlock(dev);
310
311 return rc;
312}
313static DEVICE_ATTR_RO(init_namespaces);
314
315static ssize_t namespace_seed_show(struct device *dev,
316 struct device_attribute *attr, char *buf)
317{
318 struct nd_region *nd_region = to_nd_region(dev);
319 ssize_t rc;
320
321 nvdimm_bus_lock(dev);
322 if (nd_region->ns_seed)
323 rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
324 else
325 rc = sprintf(buf, "\n");
326 nvdimm_bus_unlock(dev);
327 return rc;
328}
329static DEVICE_ATTR_RO(namespace_seed);
330
331static ssize_t btt_seed_show(struct device *dev,
332 struct device_attribute *attr, char *buf)
333{
334 struct nd_region *nd_region = to_nd_region(dev);
335 ssize_t rc;
336
337 nvdimm_bus_lock(dev);
338 if (nd_region->btt_seed)
339 rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
340 else
341 rc = sprintf(buf, "\n");
342 nvdimm_bus_unlock(dev);
343
344 return rc;
345}
346static DEVICE_ATTR_RO(btt_seed);
347
348static ssize_t read_only_show(struct device *dev,
349 struct device_attribute *attr, char *buf)
350{
351 struct nd_region *nd_region = to_nd_region(dev);
352
353 return sprintf(buf, "%d\n", nd_region->ro);
354}
355
356static ssize_t read_only_store(struct device *dev,
357 struct device_attribute *attr, const char *buf, size_t len)
358{
359 bool ro;
360 int rc = strtobool(buf, &ro);
361 struct nd_region *nd_region = to_nd_region(dev);
362
363 if (rc)
364 return rc;
365
366 nd_region->ro = ro;
367 return len;
368}
369static DEVICE_ATTR_RW(read_only);
370
371static struct attribute *nd_region_attributes[] = {
372 &dev_attr_size.attr,
373 &dev_attr_nstype.attr,
374 &dev_attr_mappings.attr,
375 &dev_attr_btt_seed.attr,
376 &dev_attr_read_only.attr,
377 &dev_attr_set_cookie.attr,
378 &dev_attr_available_size.attr,
379 &dev_attr_namespace_seed.attr,
380 &dev_attr_init_namespaces.attr,
381 NULL,
382};
383
384static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
385{
386 struct device *dev = container_of(kobj, typeof(*dev), kobj);
387 struct nd_region *nd_region = to_nd_region(dev);
388 struct nd_interleave_set *nd_set = nd_region->nd_set;
389 int type = nd_region_to_nstype(nd_region);
390
391 if (a != &dev_attr_set_cookie.attr
392 && a != &dev_attr_available_size.attr)
393 return a->mode;
394
395 if ((type == ND_DEVICE_NAMESPACE_PMEM
396 || type == ND_DEVICE_NAMESPACE_BLK)
397 && a == &dev_attr_available_size.attr)
398 return a->mode;
399 else if (is_nd_pmem(dev) && nd_set)
400 return a->mode;
401
402 return 0;
403}
404
405struct attribute_group nd_region_attribute_group = {
406 .attrs = nd_region_attributes,
407 .is_visible = region_visible,
408};
409EXPORT_SYMBOL_GPL(nd_region_attribute_group);
410
411u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
412{
413 struct nd_interleave_set *nd_set = nd_region->nd_set;
414
415 if (nd_set)
416 return nd_set->cookie;
417 return 0;
418}
419
420/*
421 * Upon successful probe/remove, take/release a reference on the
422 * associated interleave set (if present), and plant new btt + namespace
423 * seeds. Also, on the removal of a BLK region, notify the provider to
424 * disable the region.
425 */
426static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
427 struct device *dev, bool probe)
428{
429 struct nd_region *nd_region;
430
431 if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
432 int i;
433
434 nd_region = to_nd_region(dev);
435 for (i = 0; i < nd_region->ndr_mappings; i++) {
436 struct nd_mapping *nd_mapping = &nd_region->mapping[i];
437 struct nvdimm_drvdata *ndd = nd_mapping->ndd;
438 struct nvdimm *nvdimm = nd_mapping->nvdimm;
439
440 kfree(nd_mapping->labels);
441 nd_mapping->labels = NULL;
442 put_ndd(ndd);
443 nd_mapping->ndd = NULL;
444 if (ndd)
445 atomic_dec(&nvdimm->busy);
446 }
447
448 if (is_nd_pmem(dev))
449 return;
450
451 to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
452 }
453 if (dev->parent && is_nd_blk(dev->parent) && probe) {
454 nd_region = to_nd_region(dev->parent);
455 nvdimm_bus_lock(dev);
456 if (nd_region->ns_seed == dev)
457 nd_region_create_blk_seed(nd_region);
458 nvdimm_bus_unlock(dev);
459 }
460 if (is_nd_btt(dev) && probe) {
461 nd_region = to_nd_region(dev->parent);
462 nvdimm_bus_lock(dev);
463 if (nd_region->btt_seed == dev)
464 nd_region_create_btt_seed(nd_region);
465 nvdimm_bus_unlock(dev);
466 }
467}
468
469void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
470{
471 nd_region_notify_driver_action(nvdimm_bus, dev, true);
472}
473
474void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev)
475{
476 nd_region_notify_driver_action(nvdimm_bus, dev, false);
477}
478
479static ssize_t mappingN(struct device *dev, char *buf, int n)
480{
481 struct nd_region *nd_region = to_nd_region(dev);
482 struct nd_mapping *nd_mapping;
483 struct nvdimm *nvdimm;
484
485 if (n >= nd_region->ndr_mappings)
486 return -ENXIO;
487 nd_mapping = &nd_region->mapping[n];
488 nvdimm = nd_mapping->nvdimm;
489
490 return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev),
491 nd_mapping->start, nd_mapping->size);
492}
493
494#define REGION_MAPPING(idx) \
495static ssize_t mapping##idx##_show(struct device *dev, \
496 struct device_attribute *attr, char *buf) \
497{ \
498 return mappingN(dev, buf, idx); \
499} \
500static DEVICE_ATTR_RO(mapping##idx)
501
502/*
503 * 32 should be enough for a while, even in the presence of socket
504 * interleave a 32-way interleave set is a degenerate case.
505 */
506REGION_MAPPING(0);
507REGION_MAPPING(1);
508REGION_MAPPING(2);
509REGION_MAPPING(3);
510REGION_MAPPING(4);
511REGION_MAPPING(5);
512REGION_MAPPING(6);
513REGION_MAPPING(7);
514REGION_MAPPING(8);
515REGION_MAPPING(9);
516REGION_MAPPING(10);
517REGION_MAPPING(11);
518REGION_MAPPING(12);
519REGION_MAPPING(13);
520REGION_MAPPING(14);
521REGION_MAPPING(15);
522REGION_MAPPING(16);
523REGION_MAPPING(17);
524REGION_MAPPING(18);
525REGION_MAPPING(19);
526REGION_MAPPING(20);
527REGION_MAPPING(21);
528REGION_MAPPING(22);
529REGION_MAPPING(23);
530REGION_MAPPING(24);
531REGION_MAPPING(25);
532REGION_MAPPING(26);
533REGION_MAPPING(27);
534REGION_MAPPING(28);
535REGION_MAPPING(29);
536REGION_MAPPING(30);
537REGION_MAPPING(31);
538
539static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n)
540{
541 struct device *dev = container_of(kobj, struct device, kobj);
542 struct nd_region *nd_region = to_nd_region(dev);
543
544 if (n < nd_region->ndr_mappings)
545 return a->mode;
546 return 0;
547}
548
549static struct attribute *mapping_attributes[] = {
550 &dev_attr_mapping0.attr,
551 &dev_attr_mapping1.attr,
552 &dev_attr_mapping2.attr,
553 &dev_attr_mapping3.attr,
554 &dev_attr_mapping4.attr,
555 &dev_attr_mapping5.attr,
556 &dev_attr_mapping6.attr,
557 &dev_attr_mapping7.attr,
558 &dev_attr_mapping8.attr,
559 &dev_attr_mapping9.attr,
560 &dev_attr_mapping10.attr,
561 &dev_attr_mapping11.attr,
562 &dev_attr_mapping12.attr,
563 &dev_attr_mapping13.attr,
564 &dev_attr_mapping14.attr,
565 &dev_attr_mapping15.attr,
566 &dev_attr_mapping16.attr,
567 &dev_attr_mapping17.attr,
568 &dev_attr_mapping18.attr,
569 &dev_attr_mapping19.attr,
570 &dev_attr_mapping20.attr,
571 &dev_attr_mapping21.attr,
572 &dev_attr_mapping22.attr,
573 &dev_attr_mapping23.attr,
574 &dev_attr_mapping24.attr,
575 &dev_attr_mapping25.attr,
576 &dev_attr_mapping26.attr,
577 &dev_attr_mapping27.attr,
578 &dev_attr_mapping28.attr,
579 &dev_attr_mapping29.attr,
580 &dev_attr_mapping30.attr,
581 &dev_attr_mapping31.attr,
582 NULL,
583};
584
585struct attribute_group nd_mapping_attribute_group = {
586 .is_visible = mapping_visible,
587 .attrs = mapping_attributes,
588};
589EXPORT_SYMBOL_GPL(nd_mapping_attribute_group);
590
591int nd_blk_region_init(struct nd_region *nd_region)
592{
593 struct device *dev = &nd_region->dev;
594 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
595
596 if (!is_nd_blk(dev))
597 return 0;
598
599 if (nd_region->ndr_mappings < 1) {
600 dev_err(dev, "invalid BLK region\n");
601 return -ENXIO;
602 }
603
604 return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
605}
606
607/**
608 * nd_region_acquire_lane - allocate and lock a lane
609 * @nd_region: region id and number of lanes possible
610 *
611 * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
612 * We optimize for the common case where there are 256 lanes, one
613 * per-cpu. For larger systems we need to lock to share lanes. For now
614 * this implementation assumes the cost of maintaining an allocator for
615 * free lanes is on the order of the lock hold time, so it implements a
616 * static lane = cpu % num_lanes mapping.
617 *
618 * In the case of a BTT instance on top of a BLK namespace a lane may be
619 * acquired recursively. We lock on the first instance.
620 *
621 * In the case of a BTT instance on top of PMEM, we only acquire a lane
622 * for the BTT metadata updates.
623 */
624unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
625{
626 unsigned int cpu, lane;
627
628 cpu = get_cpu();
629 if (nd_region->num_lanes < nr_cpu_ids) {
630 struct nd_percpu_lane *ndl_lock, *ndl_count;
631
632 lane = cpu % nd_region->num_lanes;
633 ndl_count = per_cpu_ptr(nd_region->lane, cpu);
634 ndl_lock = per_cpu_ptr(nd_region->lane, lane);
635 if (ndl_count->count++ == 0)
636 spin_lock(&ndl_lock->lock);
637 } else
638 lane = cpu;
639
640 return lane;
641}
642EXPORT_SYMBOL(nd_region_acquire_lane);
643
644void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
645{
646 if (nd_region->num_lanes < nr_cpu_ids) {
647 unsigned int cpu = get_cpu();
648 struct nd_percpu_lane *ndl_lock, *ndl_count;
649
650 ndl_count = per_cpu_ptr(nd_region->lane, cpu);
651 ndl_lock = per_cpu_ptr(nd_region->lane, lane);
652 if (--ndl_count->count == 0)
653 spin_unlock(&ndl_lock->lock);
654 put_cpu();
655 }
656 put_cpu();
657}
658EXPORT_SYMBOL(nd_region_release_lane);
659
660static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
661 struct nd_region_desc *ndr_desc, struct device_type *dev_type,
662 const char *caller)
663{
664 struct nd_region *nd_region;
665 struct device *dev;
666 void *region_buf;
667 unsigned int i;
668 int ro = 0;
669
670 for (i = 0; i < ndr_desc->num_mappings; i++) {
671 struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
672 struct nvdimm *nvdimm = nd_mapping->nvdimm;
673
674 if ((nd_mapping->start | nd_mapping->size) % SZ_4K) {
675 dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
676 caller, dev_name(&nvdimm->dev), i);
677
678 return NULL;
679 }
680
681 if (nvdimm->flags & NDD_UNARMED)
682 ro = 1;
683 }
684
685 if (dev_type == &nd_blk_device_type) {
686 struct nd_blk_region_desc *ndbr_desc;
687 struct nd_blk_region *ndbr;
688
689 ndbr_desc = to_blk_region_desc(ndr_desc);
690 ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
691 * ndr_desc->num_mappings,
692 GFP_KERNEL);
693 if (ndbr) {
694 nd_region = &ndbr->nd_region;
695 ndbr->enable = ndbr_desc->enable;
696 ndbr->disable = ndbr_desc->disable;
697 ndbr->do_io = ndbr_desc->do_io;
698 }
699 region_buf = ndbr;
700 } else {
701 nd_region = kzalloc(sizeof(struct nd_region)
702 + sizeof(struct nd_mapping)
703 * ndr_desc->num_mappings,
704 GFP_KERNEL);
705 region_buf = nd_region;
706 }
707
708 if (!region_buf)
709 return NULL;
710 nd_region->id = ida_simple_get(&region_ida, 0, 0, GFP_KERNEL);
711 if (nd_region->id < 0)
712 goto err_id;
713
714 nd_region->lane = alloc_percpu(struct nd_percpu_lane);
715 if (!nd_region->lane)
716 goto err_percpu;
717
718 for (i = 0; i < nr_cpu_ids; i++) {
719 struct nd_percpu_lane *ndl;
720
721 ndl = per_cpu_ptr(nd_region->lane, i);
722 spin_lock_init(&ndl->lock);
723 ndl->count = 0;
724 }
725
726 memcpy(nd_region->mapping, ndr_desc->nd_mapping,
727 sizeof(struct nd_mapping) * ndr_desc->num_mappings);
728 for (i = 0; i < ndr_desc->num_mappings; i++) {
729 struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
730 struct nvdimm *nvdimm = nd_mapping->nvdimm;
731
732 get_device(&nvdimm->dev);
733 }
734 nd_region->ndr_mappings = ndr_desc->num_mappings;
735 nd_region->provider_data = ndr_desc->provider_data;
736 nd_region->nd_set = ndr_desc->nd_set;
737 nd_region->num_lanes = ndr_desc->num_lanes;
738 nd_region->ro = ro;
739 nd_region->numa_node = ndr_desc->numa_node;
740 ida_init(&nd_region->ns_ida);
741 ida_init(&nd_region->btt_ida);
742 dev = &nd_region->dev;
743 dev_set_name(dev, "region%d", nd_region->id);
744 dev->parent = &nvdimm_bus->dev;
745 dev->type = dev_type;
746 dev->groups = ndr_desc->attr_groups;
747 nd_region->ndr_size = resource_size(ndr_desc->res);
748 nd_region->ndr_start = ndr_desc->res->start;
749 nd_device_register(dev);
750
751 return nd_region;
752
753 err_percpu:
754 ida_simple_remove(&region_ida, nd_region->id);
755 err_id:
756 kfree(region_buf);
757 return NULL;
758}
759
760struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
761 struct nd_region_desc *ndr_desc)
762{
763 ndr_desc->num_lanes = ND_MAX_LANES;
764 return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type,
765 __func__);
766}
767EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
768
769struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
770 struct nd_region_desc *ndr_desc)
771{
772 if (ndr_desc->num_mappings > 1)
773 return NULL;
774 ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES);
775 return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type,
776 __func__);
777}
778EXPORT_SYMBOL_GPL(nvdimm_blk_region_create);
779
780struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
781 struct nd_region_desc *ndr_desc)
782{
783 ndr_desc->num_lanes = ND_MAX_LANES;
784 return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type,
785 __func__);
786}
787EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);