diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 12:51:57 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 12:51:57 -0500 |
commit | a3841f94c7ecb3ede0f888d3fcfe8fb6368ddd7a (patch) | |
tree | 6625eedf10d0672068ee218bb893a5a0e1803df2 | |
parent | adeba81ac2a6451f44545874da3d181081f0ab04 (diff) | |
parent | 4247f24c23589bcc3bc3490515ef8c9497e9ae55 (diff) |
Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm and dax updates from Dan Williams:
"Save for a few late fixes, all of these commits have shipped in -next
releases since before the merge window opened, and 0day has given a
build success notification.
The ext4 touches came from Jan, and the xfs touches have Darrick's
reviewed-by. An xfstest for the MAP_SYNC feature has been through
a few round of reviews and is on track to be merged.
- Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable
'userspace flush' of persistent memory updates via filesystem-dax
mappings. It arranges for any filesystem metadata updates that may
be required to satisfy a write fault to also be flushed ("on disk")
before the kernel returns to userspace from the fault handler.
Effectively every write-fault that dirties metadata completes an
fsync() before returning from the fault handler. The new
MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag
is validated as supported by the filesystem's ->mmap() file
operation.
- Add support for the standard ACPI 6.2 label access methods that
replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods.
This enables interoperability with environments that only implement
the standardized methods.
- Add support for the ACPI 6.2 NVDIMM media error injection methods.
- Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for
latch last shutdown status, firmware update, SMART error injection,
and SMART alarm threshold control.
- Cleanup physical address information disclosures to be root-only.
- Fix revalidation of the DIMM "locked label area" status to support
dynamic unlock of the label area.
- Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA
(system-physical-address) command and error injection commands.
Acknowledgements that came after the commits were pushed to -next:
- 957ac8c421ad ("dax: fix PMD faults on zero-length files"):
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
- a39e596baa07 ("xfs: support for synchronous DAX faults") and
7b565c9f965b ("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()")
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>"
* tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits)
acpi, nfit: add 'Enable Latch System Shutdown Status' command support
dax: fix general protection fault in dax_alloc_inode
dax: fix PMD faults on zero-length files
dax: stop requiring a live device for dax_flush()
brd: remove dax support
dax: quiet bdev_dax_supported()
fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core
tools/testing/nvdimm: unit test clear-error commands
acpi, nfit: validate commands against the device type
tools/testing/nvdimm: stricter bounds checking for error injection commands
xfs: support for synchronous DAX faults
xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()
ext4: Support for synchronous DAX faults
ext4: Simplify error handling in ext4_dax_huge_fault()
dax: Implement dax_finish_sync_fault()
dax, iomap: Add support for synchronous faults
mm: Define MAP_SYNC and VM_SYNC flags
dax: Allow tuning whether dax_insert_mapping_entry() dirties entry
dax: Allow dax_iomap_fault() to return pfn
dax: Fix comment describing dax_iomap_fault()
...
48 files changed, 1405 insertions, 560 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 540762a62906..e04d108055f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4208,7 +4208,7 @@ L: linux-i2c@vger.kernel.org | |||
4208 | S: Maintained | 4208 | S: Maintained |
4209 | F: drivers/i2c/busses/i2c-diolan-u2c.c | 4209 | F: drivers/i2c/busses/i2c-diolan-u2c.c |
4210 | 4210 | ||
4211 | DIRECT ACCESS (DAX) | 4211 | FILESYSTEM DIRECT ACCESS (DAX) |
4212 | M: Matthew Wilcox <mawilcox@microsoft.com> | 4212 | M: Matthew Wilcox <mawilcox@microsoft.com> |
4213 | M: Ross Zwisler <ross.zwisler@linux.intel.com> | 4213 | M: Ross Zwisler <ross.zwisler@linux.intel.com> |
4214 | L: linux-fsdevel@vger.kernel.org | 4214 | L: linux-fsdevel@vger.kernel.org |
@@ -4217,6 +4217,12 @@ F: fs/dax.c | |||
4217 | F: include/linux/dax.h | 4217 | F: include/linux/dax.h |
4218 | F: include/trace/events/fs_dax.h | 4218 | F: include/trace/events/fs_dax.h |
4219 | 4219 | ||
4220 | DEVICE DIRECT ACCESS (DAX) | ||
4221 | M: Dan Williams <dan.j.williams@intel.com> | ||
4222 | L: linux-nvdimm@lists.01.org | ||
4223 | S: Supported | ||
4224 | F: drivers/dax/ | ||
4225 | |||
4220 | DIRECTORY NOTIFICATION (DNOTIFY) | 4226 | DIRECTORY NOTIFICATION (DNOTIFY) |
4221 | M: Jan Kara <jack@suse.cz> | 4227 | M: Jan Kara <jack@suse.cz> |
4222 | R: Amir Goldstein <amir73il@gmail.com> | 4228 | R: Amir Goldstein <amir73il@gmail.com> |
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 6bf730063e3f..2dbdf59258d9 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #define MAP_SHARED 0x01 /* Share changes */ | 13 | #define MAP_SHARED 0x01 /* Share changes */ |
14 | #define MAP_PRIVATE 0x02 /* Changes are private */ | 14 | #define MAP_PRIVATE 0x02 /* Changes are private */ |
15 | #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ | ||
15 | #define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */ | 16 | #define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */ |
16 | #define MAP_FIXED 0x100 /* Interpret addr exactly */ | 17 | #define MAP_FIXED 0x100 /* Interpret addr exactly */ |
17 | #define MAP_ANONYMOUS 0x10 /* don't use a file */ | 18 | #define MAP_ANONYMOUS 0x10 /* don't use a file */ |
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 20c3df7a8fdd..606e02ca4b6c 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h | |||
@@ -29,6 +29,7 @@ | |||
29 | */ | 29 | */ |
30 | #define MAP_SHARED 0x001 /* Share changes */ | 30 | #define MAP_SHARED 0x001 /* Share changes */ |
31 | #define MAP_PRIVATE 0x002 /* Changes are private */ | 31 | #define MAP_PRIVATE 0x002 /* Changes are private */ |
32 | #define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ | ||
32 | #define MAP_TYPE 0x00f /* Mask for type of mapping */ | 33 | #define MAP_TYPE 0x00f /* Mask for type of mapping */ |
33 | #define MAP_FIXED 0x010 /* Interpret addr exactly */ | 34 | #define MAP_FIXED 0x010 /* Interpret addr exactly */ |
34 | 35 | ||
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index d1af0d74a188..80510ba44c08 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #define MAP_SHARED 0x01 /* Share changes */ | 13 | #define MAP_SHARED 0x01 /* Share changes */ |
14 | #define MAP_PRIVATE 0x02 /* Changes are private */ | 14 | #define MAP_PRIVATE 0x02 /* Changes are private */ |
15 | #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ | ||
15 | #define MAP_TYPE 0x03 /* Mask for type of mapping */ | 16 | #define MAP_TYPE 0x03 /* Mask for type of mapping */ |
16 | #define MAP_FIXED 0x04 /* Interpret addr exactly */ | 17 | #define MAP_FIXED 0x04 /* Interpret addr exactly */ |
17 | #define MAP_ANONYMOUS 0x10 /* don't use a file */ | 18 | #define MAP_ANONYMOUS 0x10 /* don't use a file */ |
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 2bfe590694fc..3e9d01ada81f 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h | |||
@@ -36,6 +36,7 @@ | |||
36 | */ | 36 | */ |
37 | #define MAP_SHARED 0x001 /* Share changes */ | 37 | #define MAP_SHARED 0x001 /* Share changes */ |
38 | #define MAP_PRIVATE 0x002 /* Changes are private */ | 38 | #define MAP_PRIVATE 0x002 /* Changes are private */ |
39 | #define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ | ||
39 | #define MAP_TYPE 0x00f /* Mask for type of mapping */ | 40 | #define MAP_TYPE 0x00f /* Mask for type of mapping */ |
40 | #define MAP_FIXED 0x010 /* Interpret addr exactly */ | 41 | #define MAP_FIXED 0x010 /* Interpret addr exactly */ |
41 | 42 | ||
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 9c2c49b6a240..ff2580e7611d 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c | |||
@@ -183,13 +183,33 @@ static int xlat_bus_status(void *buf, unsigned int cmd, u32 status) | |||
183 | return 0; | 183 | return 0; |
184 | } | 184 | } |
185 | 185 | ||
186 | static int xlat_nvdimm_status(void *buf, unsigned int cmd, u32 status) | 186 | #define ACPI_LABELS_LOCKED 3 |
187 | |||
188 | static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd, | ||
189 | u32 status) | ||
187 | { | 190 | { |
191 | struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); | ||
192 | |||
188 | switch (cmd) { | 193 | switch (cmd) { |
189 | case ND_CMD_GET_CONFIG_SIZE: | 194 | case ND_CMD_GET_CONFIG_SIZE: |
195 | /* | ||
196 | * In the _LSI, _LSR, _LSW case the locked status is | ||
197 | * communicated via the read/write commands | ||
198 | */ | ||
199 | if (nfit_mem->has_lsi) | ||
200 | break; | ||
201 | |||
190 | if (status >> 16 & ND_CONFIG_LOCKED) | 202 | if (status >> 16 & ND_CONFIG_LOCKED) |
191 | return -EACCES; | 203 | return -EACCES; |
192 | break; | 204 | break; |
205 | case ND_CMD_GET_CONFIG_DATA: | ||
206 | if (nfit_mem->has_lsr && status == ACPI_LABELS_LOCKED) | ||
207 | return -EACCES; | ||
208 | break; | ||
209 | case ND_CMD_SET_CONFIG_DATA: | ||
210 | if (nfit_mem->has_lsw && status == ACPI_LABELS_LOCKED) | ||
211 | return -EACCES; | ||
212 | break; | ||
193 | default: | 213 | default: |
194 | break; | 214 | break; |
195 | } | 215 | } |
@@ -205,13 +225,182 @@ static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd, | |||
205 | { | 225 | { |
206 | if (!nvdimm) | 226 | if (!nvdimm) |
207 | return xlat_bus_status(buf, cmd, status); | 227 | return xlat_bus_status(buf, cmd, status); |
208 | return xlat_nvdimm_status(buf, cmd, status); | 228 | return xlat_nvdimm_status(nvdimm, buf, cmd, status); |
229 | } | ||
230 | |||
231 | /* convert _LS{I,R} packages to the buffer object acpi_nfit_ctl expects */ | ||
232 | static union acpi_object *pkg_to_buf(union acpi_object *pkg) | ||
233 | { | ||
234 | int i; | ||
235 | void *dst; | ||
236 | size_t size = 0; | ||
237 | union acpi_object *buf = NULL; | ||
238 | |||
239 | if (pkg->type != ACPI_TYPE_PACKAGE) { | ||
240 | WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", | ||
241 | pkg->type); | ||
242 | goto err; | ||
243 | } | ||
244 | |||
245 | for (i = 0; i < pkg->package.count; i++) { | ||
246 | union acpi_object *obj = &pkg->package.elements[i]; | ||
247 | |||
248 | if (obj->type == ACPI_TYPE_INTEGER) | ||
249 | size += 4; | ||
250 | else if (obj->type == ACPI_TYPE_BUFFER) | ||
251 | size += obj->buffer.length; | ||
252 | else { | ||
253 | WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", | ||
254 | obj->type); | ||
255 | goto err; | ||
256 | } | ||
257 | } | ||
258 | |||
259 | buf = ACPI_ALLOCATE(sizeof(*buf) + size); | ||
260 | if (!buf) | ||
261 | goto err; | ||
262 | |||
263 | dst = buf + 1; | ||
264 | buf->type = ACPI_TYPE_BUFFER; | ||
265 | buf->buffer.length = size; | ||
266 | buf->buffer.pointer = dst; | ||
267 | for (i = 0; i < pkg->package.count; i++) { | ||
268 | union acpi_object *obj = &pkg->package.elements[i]; | ||
269 | |||
270 | if (obj->type == ACPI_TYPE_INTEGER) { | ||
271 | memcpy(dst, &obj->integer.value, 4); | ||
272 | dst += 4; | ||
273 | } else if (obj->type == ACPI_TYPE_BUFFER) { | ||
274 | memcpy(dst, obj->buffer.pointer, obj->buffer.length); | ||
275 | dst += obj->buffer.length; | ||
276 | } | ||
277 | } | ||
278 | err: | ||
279 | ACPI_FREE(pkg); | ||
280 | return buf; | ||
281 | } | ||
282 | |||
283 | static union acpi_object *int_to_buf(union acpi_object *integer) | ||
284 | { | ||
285 | union acpi_object *buf = ACPI_ALLOCATE(sizeof(*buf) + 4); | ||
286 | void *dst = NULL; | ||
287 | |||
288 | if (!buf) | ||
289 | goto err; | ||
290 | |||
291 | if (integer->type != ACPI_TYPE_INTEGER) { | ||
292 | WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", | ||
293 | integer->type); | ||
294 | goto err; | ||
295 | } | ||
296 | |||
297 | dst = buf + 1; | ||
298 | buf->type = ACPI_TYPE_BUFFER; | ||
299 | buf->buffer.length = 4; | ||
300 | buf->buffer.pointer = dst; | ||
301 | memcpy(dst, &integer->integer.value, 4); | ||
302 | err: | ||
303 | ACPI_FREE(integer); | ||
304 | return buf; | ||
305 | } | ||
306 | |||
307 | static union acpi_object *acpi_label_write(acpi_handle handle, u32 offset, | ||
308 | u32 len, void *data) | ||
309 | { | ||
310 | acpi_status rc; | ||
311 | struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; | ||
312 | struct acpi_object_list input = { | ||
313 | .count = 3, | ||
314 | .pointer = (union acpi_object []) { | ||
315 | [0] = { | ||
316 | .integer.type = ACPI_TYPE_INTEGER, | ||
317 | .integer.value = offset, | ||
318 | }, | ||
319 | [1] = { | ||
320 | .integer.type = ACPI_TYPE_INTEGER, | ||
321 | .integer.value = len, | ||
322 | }, | ||
323 | [2] = { | ||
324 | .buffer.type = ACPI_TYPE_BUFFER, | ||
325 | .buffer.pointer = data, | ||
326 | .buffer.length = len, | ||
327 | }, | ||
328 | }, | ||
329 | }; | ||
330 | |||
331 | rc = acpi_evaluate_object(handle, "_LSW", &input, &buf); | ||
332 | if (ACPI_FAILURE(rc)) | ||
333 | return NULL; | ||
334 | return int_to_buf(buf.pointer); | ||
335 | } | ||
336 | |||
337 | static union acpi_object *acpi_label_read(acpi_handle handle, u32 offset, | ||
338 | u32 len) | ||
339 | { | ||
340 | acpi_status rc; | ||
341 | struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; | ||
342 | struct acpi_object_list input = { | ||
343 | .count = 2, | ||
344 | .pointer = (union acpi_object []) { | ||
345 | [0] = { | ||
346 | .integer.type = ACPI_TYPE_INTEGER, | ||
347 | .integer.value = offset, | ||
348 | }, | ||
349 | [1] = { | ||
350 | .integer.type = ACPI_TYPE_INTEGER, | ||
351 | .integer.value = len, | ||
352 | }, | ||
353 | }, | ||
354 | }; | ||
355 | |||
356 | rc = acpi_evaluate_object(handle, "_LSR", &input, &buf); | ||
357 | if (ACPI_FAILURE(rc)) | ||
358 | return NULL; | ||
359 | return pkg_to_buf(buf.pointer); | ||
360 | } | ||
361 | |||
362 | static union acpi_object *acpi_label_info(acpi_handle handle) | ||
363 | { | ||
364 | acpi_status rc; | ||
365 | struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; | ||
366 | |||
367 | rc = acpi_evaluate_object(handle, "_LSI", NULL, &buf); | ||
368 | if (ACPI_FAILURE(rc)) | ||
369 | return NULL; | ||
370 | return pkg_to_buf(buf.pointer); | ||
371 | } | ||
372 | |||
373 | static u8 nfit_dsm_revid(unsigned family, unsigned func) | ||
374 | { | ||
375 | static const u8 revid_table[NVDIMM_FAMILY_MAX+1][32] = { | ||
376 | [NVDIMM_FAMILY_INTEL] = { | ||
377 | [NVDIMM_INTEL_GET_MODES] = 2, | ||
378 | [NVDIMM_INTEL_GET_FWINFO] = 2, | ||
379 | [NVDIMM_INTEL_START_FWUPDATE] = 2, | ||
380 | [NVDIMM_INTEL_SEND_FWUPDATE] = 2, | ||
381 | [NVDIMM_INTEL_FINISH_FWUPDATE] = 2, | ||
382 | [NVDIMM_INTEL_QUERY_FWUPDATE] = 2, | ||
383 | [NVDIMM_INTEL_SET_THRESHOLD] = 2, | ||
384 | [NVDIMM_INTEL_INJECT_ERROR] = 2, | ||
385 | }, | ||
386 | }; | ||
387 | u8 id; | ||
388 | |||
389 | if (family > NVDIMM_FAMILY_MAX) | ||
390 | return 0; | ||
391 | if (func > 31) | ||
392 | return 0; | ||
393 | id = revid_table[family][func]; | ||
394 | if (id == 0) | ||
395 | return 1; /* default */ | ||
396 | return id; | ||
209 | } | 397 | } |
210 | 398 | ||
211 | int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | 399 | int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, |
212 | unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) | 400 | unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) |
213 | { | 401 | { |
214 | struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); | 402 | struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); |
403 | struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); | ||
215 | union acpi_object in_obj, in_buf, *out_obj; | 404 | union acpi_object in_obj, in_buf, *out_obj; |
216 | const struct nd_cmd_desc *desc = NULL; | 405 | const struct nd_cmd_desc *desc = NULL; |
217 | struct device *dev = acpi_desc->dev; | 406 | struct device *dev = acpi_desc->dev; |
@@ -235,7 +424,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | |||
235 | } | 424 | } |
236 | 425 | ||
237 | if (nvdimm) { | 426 | if (nvdimm) { |
238 | struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); | ||
239 | struct acpi_device *adev = nfit_mem->adev; | 427 | struct acpi_device *adev = nfit_mem->adev; |
240 | 428 | ||
241 | if (!adev) | 429 | if (!adev) |
@@ -294,7 +482,29 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | |||
294 | in_buf.buffer.pointer, | 482 | in_buf.buffer.pointer, |
295 | min_t(u32, 256, in_buf.buffer.length), true); | 483 | min_t(u32, 256, in_buf.buffer.length), true); |
296 | 484 | ||
297 | out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj); | 485 | /* call the BIOS, prefer the named methods over _DSM if available */ |
486 | if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi) | ||
487 | out_obj = acpi_label_info(handle); | ||
488 | else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) { | ||
489 | struct nd_cmd_get_config_data_hdr *p = buf; | ||
490 | |||
491 | out_obj = acpi_label_read(handle, p->in_offset, p->in_length); | ||
492 | } else if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA | ||
493 | && nfit_mem->has_lsw) { | ||
494 | struct nd_cmd_set_config_hdr *p = buf; | ||
495 | |||
496 | out_obj = acpi_label_write(handle, p->in_offset, p->in_length, | ||
497 | p->in_buf); | ||
498 | } else { | ||
499 | u8 revid; | ||
500 | |||
501 | if (nvdimm) | ||
502 | revid = nfit_dsm_revid(nfit_mem->family, func); | ||
503 | else | ||
504 | revid = 1; | ||
505 | out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj); | ||
506 | } | ||
507 | |||
298 | if (!out_obj) { | 508 | if (!out_obj) { |
299 | dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, | 509 | dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, |
300 | cmd_name); | 510 | cmd_name); |
@@ -356,8 +566,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, | |||
356 | * Set fw_status for all the commands with a known format to be | 566 | * Set fw_status for all the commands with a known format to be |
357 | * later interpreted by xlat_status(). | 567 | * later interpreted by xlat_status(). |
358 | */ | 568 | */ |
359 | if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR) | 569 | if (i >= 1 && ((!nvdimm && cmd >= ND_CMD_ARS_CAP |
360 | || (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR))) | 570 | && cmd <= ND_CMD_CLEAR_ERROR) |
571 | || (nvdimm && cmd >= ND_CMD_SMART | ||
572 | && cmd <= ND_CMD_VENDOR))) | ||
361 | fw_status = *(u32 *) out_obj->buffer.pointer; | 573 | fw_status = *(u32 *) out_obj->buffer.pointer; |
362 | 574 | ||
363 | if (offset + in_buf.buffer.length < buf_len) { | 575 | if (offset + in_buf.buffer.length < buf_len) { |
@@ -1431,6 +1643,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, | |||
1431 | { | 1643 | { |
1432 | struct acpi_device *adev, *adev_dimm; | 1644 | struct acpi_device *adev, *adev_dimm; |
1433 | struct device *dev = acpi_desc->dev; | 1645 | struct device *dev = acpi_desc->dev; |
1646 | union acpi_object *obj; | ||
1434 | unsigned long dsm_mask; | 1647 | unsigned long dsm_mask; |
1435 | const guid_t *guid; | 1648 | const guid_t *guid; |
1436 | int i; | 1649 | int i; |
@@ -1463,7 +1676,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, | |||
1463 | * different command sets. Note, that checking for function0 (bit0) | 1676 | * different command sets. Note, that checking for function0 (bit0) |
1464 | * tells us if any commands are reachable through this GUID. | 1677 | * tells us if any commands are reachable through this GUID. |
1465 | */ | 1678 | */ |
1466 | for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++) | 1679 | for (i = 0; i <= NVDIMM_FAMILY_MAX; i++) |
1467 | if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) | 1680 | if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) |
1468 | if (family < 0 || i == default_dsm_family) | 1681 | if (family < 0 || i == default_dsm_family) |
1469 | family = i; | 1682 | family = i; |
@@ -1473,7 +1686,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, | |||
1473 | if (override_dsm_mask && !disable_vendor_specific) | 1686 | if (override_dsm_mask && !disable_vendor_specific) |
1474 | dsm_mask = override_dsm_mask; | 1687 | dsm_mask = override_dsm_mask; |
1475 | else if (nfit_mem->family == NVDIMM_FAMILY_INTEL) { | 1688 | else if (nfit_mem->family == NVDIMM_FAMILY_INTEL) { |
1476 | dsm_mask = 0x3fe; | 1689 | dsm_mask = NVDIMM_INTEL_CMDMASK; |
1477 | if (disable_vendor_specific) | 1690 | if (disable_vendor_specific) |
1478 | dsm_mask &= ~(1 << ND_CMD_VENDOR); | 1691 | dsm_mask &= ~(1 << ND_CMD_VENDOR); |
1479 | } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) { | 1692 | } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) { |
@@ -1493,9 +1706,32 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, | |||
1493 | 1706 | ||
1494 | guid = to_nfit_uuid(nfit_mem->family); | 1707 | guid = to_nfit_uuid(nfit_mem->family); |
1495 | for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) | 1708 | for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) |
1496 | if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i)) | 1709 | if (acpi_check_dsm(adev_dimm->handle, guid, |
1710 | nfit_dsm_revid(nfit_mem->family, i), | ||
1711 | 1ULL << i)) | ||
1497 | set_bit(i, &nfit_mem->dsm_mask); | 1712 | set_bit(i, &nfit_mem->dsm_mask); |
1498 | 1713 | ||
1714 | obj = acpi_label_info(adev_dimm->handle); | ||
1715 | if (obj) { | ||
1716 | ACPI_FREE(obj); | ||
1717 | nfit_mem->has_lsi = 1; | ||
1718 | dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev)); | ||
1719 | } | ||
1720 | |||
1721 | obj = acpi_label_read(adev_dimm->handle, 0, 0); | ||
1722 | if (obj) { | ||
1723 | ACPI_FREE(obj); | ||
1724 | nfit_mem->has_lsr = 1; | ||
1725 | dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev)); | ||
1726 | } | ||
1727 | |||
1728 | obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL); | ||
1729 | if (obj) { | ||
1730 | ACPI_FREE(obj); | ||
1731 | nfit_mem->has_lsw = 1; | ||
1732 | dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev)); | ||
1733 | } | ||
1734 | |||
1499 | return 0; | 1735 | return 0; |
1500 | } | 1736 | } |
1501 | 1737 | ||
@@ -1571,8 +1807,21 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) | |||
1571 | * userspace interface. | 1807 | * userspace interface. |
1572 | */ | 1808 | */ |
1573 | cmd_mask = 1UL << ND_CMD_CALL; | 1809 | cmd_mask = 1UL << ND_CMD_CALL; |
1574 | if (nfit_mem->family == NVDIMM_FAMILY_INTEL) | 1810 | if (nfit_mem->family == NVDIMM_FAMILY_INTEL) { |
1575 | cmd_mask |= nfit_mem->dsm_mask; | 1811 | /* |
1812 | * These commands have a 1:1 correspondence | ||
1813 | * between DSM payload and libnvdimm ioctl | ||
1814 | * payload format. | ||
1815 | */ | ||
1816 | cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; | ||
1817 | } | ||
1818 | |||
1819 | if (nfit_mem->has_lsi) | ||
1820 | set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); | ||
1821 | if (nfit_mem->has_lsr) | ||
1822 | set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); | ||
1823 | if (nfit_mem->has_lsw) | ||
1824 | set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); | ||
1576 | 1825 | ||
1577 | flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush | 1826 | flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush |
1578 | : NULL; | 1827 | : NULL; |
@@ -1645,6 +1894,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) | |||
1645 | int i; | 1894 | int i; |
1646 | 1895 | ||
1647 | nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; | 1896 | nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; |
1897 | nd_desc->bus_dsm_mask = acpi_desc->bus_nfit_cmd_force_en; | ||
1648 | adev = to_acpi_dev(acpi_desc); | 1898 | adev = to_acpi_dev(acpi_desc); |
1649 | if (!adev) | 1899 | if (!adev) |
1650 | return; | 1900 | return; |
@@ -2239,7 +2489,7 @@ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc, | |||
2239 | if (ars_status->out_length | 2489 | if (ars_status->out_length |
2240 | < 44 + sizeof(struct nd_ars_record) * (i + 1)) | 2490 | < 44 + sizeof(struct nd_ars_record) * (i + 1)) |
2241 | break; | 2491 | break; |
2242 | rc = nvdimm_bus_add_poison(nvdimm_bus, | 2492 | rc = nvdimm_bus_add_badrange(nvdimm_bus, |
2243 | ars_status->records[i].err_address, | 2493 | ars_status->records[i].err_address, |
2244 | ars_status->records[i].length); | 2494 | ars_status->records[i].length); |
2245 | if (rc) | 2495 | if (rc) |
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index feeb95d574fa..b92921439657 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c | |||
@@ -67,7 +67,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, | |||
67 | continue; | 67 | continue; |
68 | 68 | ||
69 | /* If this fails due to an -ENOMEM, there is little we can do */ | 69 | /* If this fails due to an -ENOMEM, there is little we can do */ |
70 | nvdimm_bus_add_poison(acpi_desc->nvdimm_bus, | 70 | nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus, |
71 | ALIGN(mce->addr, L1_CACHE_BYTES), | 71 | ALIGN(mce->addr, L1_CACHE_BYTES), |
72 | L1_CACHE_BYTES); | 72 | L1_CACHE_BYTES); |
73 | nvdimm_region_notify(nfit_spa->nd_region, | 73 | nvdimm_region_notify(nfit_spa->nd_region, |
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 54292db61262..f0cf18b2da8b 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h | |||
@@ -24,7 +24,7 @@ | |||
24 | /* ACPI 6.1 */ | 24 | /* ACPI 6.1 */ |
25 | #define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" | 25 | #define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" |
26 | 26 | ||
27 | /* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */ | 27 | /* http://pmem.io/documents/NVDIMM_DSM_Interface-V1.6.pdf */ |
28 | #define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" | 28 | #define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" |
29 | 29 | ||
30 | /* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */ | 30 | /* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */ |
@@ -38,6 +38,37 @@ | |||
38 | | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | 38 | | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ |
39 | | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) | 39 | | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) |
40 | 40 | ||
41 | #define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT | ||
42 | |||
43 | #define NVDIMM_STANDARD_CMDMASK \ | ||
44 | (1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \ | ||
45 | | 1 << ND_CMD_GET_CONFIG_SIZE | 1 << ND_CMD_GET_CONFIG_DATA \ | ||
46 | | 1 << ND_CMD_SET_CONFIG_DATA | 1 << ND_CMD_VENDOR_EFFECT_LOG_SIZE \ | ||
47 | | 1 << ND_CMD_VENDOR_EFFECT_LOG | 1 << ND_CMD_VENDOR) | ||
48 | |||
49 | /* | ||
50 | * Command numbers that the kernel needs to know about to handle | ||
51 | * non-default DSM revision ids | ||
52 | */ | ||
53 | enum nvdimm_family_cmds { | ||
54 | NVDIMM_INTEL_LATCH_SHUTDOWN = 10, | ||
55 | NVDIMM_INTEL_GET_MODES = 11, | ||
56 | NVDIMM_INTEL_GET_FWINFO = 12, | ||
57 | NVDIMM_INTEL_START_FWUPDATE = 13, | ||
58 | NVDIMM_INTEL_SEND_FWUPDATE = 14, | ||
59 | NVDIMM_INTEL_FINISH_FWUPDATE = 15, | ||
60 | NVDIMM_INTEL_QUERY_FWUPDATE = 16, | ||
61 | NVDIMM_INTEL_SET_THRESHOLD = 17, | ||
62 | NVDIMM_INTEL_INJECT_ERROR = 18, | ||
63 | }; | ||
64 | |||
65 | #define NVDIMM_INTEL_CMDMASK \ | ||
66 | (NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \ | ||
67 | | 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \ | ||
68 | | 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \ | ||
69 | | 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \ | ||
70 | | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN) | ||
71 | |||
41 | enum nfit_uuids { | 72 | enum nfit_uuids { |
42 | /* for simplicity alias the uuid index with the family id */ | 73 | /* for simplicity alias the uuid index with the family id */ |
43 | NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, | 74 | NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, |
@@ -140,6 +171,9 @@ struct nfit_mem { | |||
140 | struct resource *flush_wpq; | 171 | struct resource *flush_wpq; |
141 | unsigned long dsm_mask; | 172 | unsigned long dsm_mask; |
142 | int family; | 173 | int family; |
174 | u32 has_lsi:1; | ||
175 | u32 has_lsr:1; | ||
176 | u32 has_lsw:1; | ||
143 | }; | 177 | }; |
144 | 178 | ||
145 | struct acpi_nfit_desc { | 179 | struct acpi_nfit_desc { |
@@ -167,6 +201,7 @@ struct acpi_nfit_desc { | |||
167 | unsigned int init_complete:1; | 201 | unsigned int init_complete:1; |
168 | unsigned long dimm_cmd_force_en; | 202 | unsigned long dimm_cmd_force_en; |
169 | unsigned long bus_cmd_force_en; | 203 | unsigned long bus_cmd_force_en; |
204 | unsigned long bus_nfit_cmd_force_en; | ||
170 | int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, | 205 | int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, |
171 | void *iobuf, u64 len, int rw); | 206 | void *iobuf, u64 len, int rw); |
172 | }; | 207 | }; |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 923b417eaf4c..40579d0cb3d1 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -302,7 +302,6 @@ config BLK_DEV_SX8 | |||
302 | 302 | ||
303 | config BLK_DEV_RAM | 303 | config BLK_DEV_RAM |
304 | tristate "RAM block device support" | 304 | tristate "RAM block device support" |
305 | select DAX if BLK_DEV_RAM_DAX | ||
306 | ---help--- | 305 | ---help--- |
307 | Saying Y here will allow you to use a portion of your RAM memory as | 306 | Saying Y here will allow you to use a portion of your RAM memory as |
308 | a block device, so that you can make file systems on it, read and | 307 | a block device, so that you can make file systems on it, read and |
@@ -338,17 +337,6 @@ config BLK_DEV_RAM_SIZE | |||
338 | The default value is 4096 kilobytes. Only change this if you know | 337 | The default value is 4096 kilobytes. Only change this if you know |
339 | what you are doing. | 338 | what you are doing. |
340 | 339 | ||
341 | config BLK_DEV_RAM_DAX | ||
342 | bool "Support Direct Access (DAX) to RAM block devices" | ||
343 | depends on BLK_DEV_RAM && FS_DAX | ||
344 | default n | ||
345 | help | ||
346 | Support filesystems using DAX to access RAM block devices. This | ||
347 | avoids double-buffering data in the page cache before copying it | ||
348 | to the block device. Answering Y will slightly enlarge the kernel, | ||
349 | and will prevent RAM block device backing store memory from being | ||
350 | allocated from highmem (only a problem for highmem systems). | ||
351 | |||
352 | config CDROM_PKTCDVD | 340 | config CDROM_PKTCDVD |
353 | tristate "Packet writing on CD/DVD media (DEPRECATED)" | 341 | tristate "Packet writing on CD/DVD media (DEPRECATED)" |
354 | depends on !UML | 342 | depends on !UML |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 588360d79fca..8028a3a7e7fd 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -21,11 +21,6 @@ | |||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
25 | #include <linux/pfn_t.h> | ||
26 | #include <linux/dax.h> | ||
27 | #include <linux/uio.h> | ||
28 | #endif | ||
29 | 24 | ||
30 | #include <linux/uaccess.h> | 25 | #include <linux/uaccess.h> |
31 | 26 | ||
@@ -45,9 +40,6 @@ struct brd_device { | |||
45 | 40 | ||
46 | struct request_queue *brd_queue; | 41 | struct request_queue *brd_queue; |
47 | struct gendisk *brd_disk; | 42 | struct gendisk *brd_disk; |
48 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
49 | struct dax_device *dax_dev; | ||
50 | #endif | ||
51 | struct list_head brd_list; | 43 | struct list_head brd_list; |
52 | 44 | ||
53 | /* | 45 | /* |
@@ -112,9 +104,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) | |||
112 | * restriction might be able to be lifted. | 104 | * restriction might be able to be lifted. |
113 | */ | 105 | */ |
114 | gfp_flags = GFP_NOIO | __GFP_ZERO; | 106 | gfp_flags = GFP_NOIO | __GFP_ZERO; |
115 | #ifndef CONFIG_BLK_DEV_RAM_DAX | ||
116 | gfp_flags |= __GFP_HIGHMEM; | ||
117 | #endif | ||
118 | page = alloc_page(gfp_flags); | 107 | page = alloc_page(gfp_flags); |
119 | if (!page) | 108 | if (!page) |
120 | return NULL; | 109 | return NULL; |
@@ -334,43 +323,6 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, | |||
334 | return err; | 323 | return err; |
335 | } | 324 | } |
336 | 325 | ||
337 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
338 | static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff, | ||
339 | long nr_pages, void **kaddr, pfn_t *pfn) | ||
340 | { | ||
341 | struct page *page; | ||
342 | |||
343 | if (!brd) | ||
344 | return -ENODEV; | ||
345 | page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT); | ||
346 | if (!page) | ||
347 | return -ENOSPC; | ||
348 | *kaddr = page_address(page); | ||
349 | *pfn = page_to_pfn_t(page); | ||
350 | |||
351 | return 1; | ||
352 | } | ||
353 | |||
354 | static long brd_dax_direct_access(struct dax_device *dax_dev, | ||
355 | pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) | ||
356 | { | ||
357 | struct brd_device *brd = dax_get_private(dax_dev); | ||
358 | |||
359 | return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn); | ||
360 | } | ||
361 | |||
362 | static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, | ||
363 | void *addr, size_t bytes, struct iov_iter *i) | ||
364 | { | ||
365 | return copy_from_iter(addr, bytes, i); | ||
366 | } | ||
367 | |||
368 | static const struct dax_operations brd_dax_ops = { | ||
369 | .direct_access = brd_dax_direct_access, | ||
370 | .copy_from_iter = brd_dax_copy_from_iter, | ||
371 | }; | ||
372 | #endif | ||
373 | |||
374 | static const struct block_device_operations brd_fops = { | 326 | static const struct block_device_operations brd_fops = { |
375 | .owner = THIS_MODULE, | 327 | .owner = THIS_MODULE, |
376 | .rw_page = brd_rw_page, | 328 | .rw_page = brd_rw_page, |
@@ -451,21 +403,8 @@ static struct brd_device *brd_alloc(int i) | |||
451 | set_capacity(disk, rd_size * 2); | 403 | set_capacity(disk, rd_size * 2); |
452 | disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; | 404 | disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; |
453 | 405 | ||
454 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
455 | queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); | ||
456 | brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops); | ||
457 | if (!brd->dax_dev) | ||
458 | goto out_free_inode; | ||
459 | #endif | ||
460 | |||
461 | |||
462 | return brd; | 406 | return brd; |
463 | 407 | ||
464 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
465 | out_free_inode: | ||
466 | kill_dax(brd->dax_dev); | ||
467 | put_dax(brd->dax_dev); | ||
468 | #endif | ||
469 | out_free_queue: | 408 | out_free_queue: |
470 | blk_cleanup_queue(brd->brd_queue); | 409 | blk_cleanup_queue(brd->brd_queue); |
471 | out_free_dev: | 410 | out_free_dev: |
@@ -505,10 +444,6 @@ out: | |||
505 | static void brd_del_one(struct brd_device *brd) | 444 | static void brd_del_one(struct brd_device *brd) |
506 | { | 445 | { |
507 | list_del(&brd->brd_list); | 446 | list_del(&brd->brd_list); |
508 | #ifdef CONFIG_BLK_DEV_RAM_DAX | ||
509 | kill_dax(brd->dax_dev); | ||
510 | put_dax(brd->dax_dev); | ||
511 | #endif | ||
512 | del_gendisk(brd->brd_disk); | 447 | del_gendisk(brd->brd_disk); |
513 | brd_free(brd); | 448 | brd_free(brd); |
514 | } | 449 | } |
diff --git a/drivers/dax/device.c b/drivers/dax/device.c index e9f3b3e4bbf4..6833ada237ab 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c | |||
@@ -222,7 +222,8 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, | |||
222 | unsigned long size) | 222 | unsigned long size) |
223 | { | 223 | { |
224 | struct resource *res; | 224 | struct resource *res; |
225 | phys_addr_t phys; | 225 | /* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */ |
226 | phys_addr_t uninitialized_var(phys); | ||
226 | int i; | 227 | int i; |
227 | 228 | ||
228 | for (i = 0; i < dev_dax->num_resources; i++) { | 229 | for (i = 0; i < dev_dax->num_resources; i++) { |
diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 557b93703532..3ec804672601 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c | |||
@@ -92,21 +92,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) | |||
92 | long len; | 92 | long len; |
93 | 93 | ||
94 | if (blocksize != PAGE_SIZE) { | 94 | if (blocksize != PAGE_SIZE) { |
95 | pr_err("VFS (%s): error: unsupported blocksize for dax\n", | 95 | pr_debug("VFS (%s): error: unsupported blocksize for dax\n", |
96 | sb->s_id); | 96 | sb->s_id); |
97 | return -EINVAL; | 97 | return -EINVAL; |
98 | } | 98 | } |
99 | 99 | ||
100 | err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); | 100 | err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); |
101 | if (err) { | 101 | if (err) { |
102 | pr_err("VFS (%s): error: unaligned partition for dax\n", | 102 | pr_debug("VFS (%s): error: unaligned partition for dax\n", |
103 | sb->s_id); | 103 | sb->s_id); |
104 | return err; | 104 | return err; |
105 | } | 105 | } |
106 | 106 | ||
107 | dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); | 107 | dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); |
108 | if (!dax_dev) { | 108 | if (!dax_dev) { |
109 | pr_err("VFS (%s): error: device does not support dax\n", | 109 | pr_debug("VFS (%s): error: device does not support dax\n", |
110 | sb->s_id); | 110 | sb->s_id); |
111 | return -EOPNOTSUPP; | 111 | return -EOPNOTSUPP; |
112 | } | 112 | } |
@@ -118,7 +118,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) | |||
118 | put_dax(dax_dev); | 118 | put_dax(dax_dev); |
119 | 119 | ||
120 | if (len < 1) { | 120 | if (len < 1) { |
121 | pr_err("VFS (%s): error: dax access failed (%ld)", | 121 | pr_debug("VFS (%s): error: dax access failed (%ld)\n", |
122 | sb->s_id, len); | 122 | sb->s_id, len); |
123 | return len < 0 ? len : -EIO; | 123 | return len < 0 ? len : -EIO; |
124 | } | 124 | } |
@@ -273,9 +273,6 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter); | |||
273 | void arch_wb_cache_pmem(void *addr, size_t size); | 273 | void arch_wb_cache_pmem(void *addr, size_t size); |
274 | void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) | 274 | void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) |
275 | { | 275 | { |
276 | if (unlikely(!dax_alive(dax_dev))) | ||
277 | return; | ||
278 | |||
279 | if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))) | 276 | if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))) |
280 | return; | 277 | return; |
281 | 278 | ||
@@ -344,6 +341,9 @@ static struct inode *dax_alloc_inode(struct super_block *sb) | |||
344 | struct inode *inode; | 341 | struct inode *inode; |
345 | 342 | ||
346 | dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); | 343 | dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); |
344 | if (!dax_dev) | ||
345 | return NULL; | ||
346 | |||
347 | inode = &dax_dev->inode; | 347 | inode = &dax_dev->inode; |
348 | inode->i_rdev = 0; | 348 | inode->i_rdev = 0; |
349 | return inode; | 349 | return inode; |
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 447e0e14f3b6..70d5f3ad9909 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile | |||
@@ -21,6 +21,7 @@ libnvdimm-y += region_devs.o | |||
21 | libnvdimm-y += region.o | 21 | libnvdimm-y += region.o |
22 | libnvdimm-y += namespace_devs.o | 22 | libnvdimm-y += namespace_devs.o |
23 | libnvdimm-y += label.o | 23 | libnvdimm-y += label.o |
24 | libnvdimm-y += badrange.o | ||
24 | libnvdimm-$(CONFIG_ND_CLAIM) += claim.o | 25 | libnvdimm-$(CONFIG_ND_CLAIM) += claim.o |
25 | libnvdimm-$(CONFIG_BTT) += btt_devs.o | 26 | libnvdimm-$(CONFIG_BTT) += btt_devs.o |
26 | libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o | 27 | libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o |
diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c new file mode 100644 index 000000000000..e068d72b4357 --- /dev/null +++ b/drivers/nvdimm/badrange.c | |||
@@ -0,0 +1,293 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2017 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of version 2 of the GNU General Public License as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | */ | ||
13 | #include <linux/libnvdimm.h> | ||
14 | #include <linux/badblocks.h> | ||
15 | #include <linux/export.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/blkdev.h> | ||
18 | #include <linux/device.h> | ||
19 | #include <linux/ctype.h> | ||
20 | #include <linux/ndctl.h> | ||
21 | #include <linux/mutex.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/io.h> | ||
24 | #include "nd-core.h" | ||
25 | #include "nd.h" | ||
26 | |||
27 | void badrange_init(struct badrange *badrange) | ||
28 | { | ||
29 | INIT_LIST_HEAD(&badrange->list); | ||
30 | spin_lock_init(&badrange->lock); | ||
31 | } | ||
32 | EXPORT_SYMBOL_GPL(badrange_init); | ||
33 | |||
34 | static void append_badrange_entry(struct badrange *badrange, | ||
35 | struct badrange_entry *bre, u64 addr, u64 length) | ||
36 | { | ||
37 | lockdep_assert_held(&badrange->lock); | ||
38 | bre->start = addr; | ||
39 | bre->length = length; | ||
40 | list_add_tail(&bre->list, &badrange->list); | ||
41 | } | ||
42 | |||
43 | static int alloc_and_append_badrange_entry(struct badrange *badrange, | ||
44 | u64 addr, u64 length, gfp_t flags) | ||
45 | { | ||
46 | struct badrange_entry *bre; | ||
47 | |||
48 | bre = kzalloc(sizeof(*bre), flags); | ||
49 | if (!bre) | ||
50 | return -ENOMEM; | ||
51 | |||
52 | append_badrange_entry(badrange, bre, addr, length); | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | static int add_badrange(struct badrange *badrange, u64 addr, u64 length) | ||
57 | { | ||
58 | struct badrange_entry *bre, *bre_new; | ||
59 | |||
60 | spin_unlock(&badrange->lock); | ||
61 | bre_new = kzalloc(sizeof(*bre_new), GFP_KERNEL); | ||
62 | spin_lock(&badrange->lock); | ||
63 | |||
64 | if (list_empty(&badrange->list)) { | ||
65 | if (!bre_new) | ||
66 | return -ENOMEM; | ||
67 | append_badrange_entry(badrange, bre_new, addr, length); | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * There is a chance this is a duplicate, check for those first. | ||
73 | * This will be the common case as ARS_STATUS returns all known | ||
74 | * errors in the SPA space, and we can't query it per region | ||
75 | */ | ||
76 | list_for_each_entry(bre, &badrange->list, list) | ||
77 | if (bre->start == addr) { | ||
78 | /* If length has changed, update this list entry */ | ||
79 | if (bre->length != length) | ||
80 | bre->length = length; | ||
81 | kfree(bre_new); | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * If not a duplicate or a simple length update, add the entry as is, | ||
87 | * as any overlapping ranges will get resolved when the list is consumed | ||
88 | * and converted to badblocks | ||
89 | */ | ||
90 | if (!bre_new) | ||
91 | return -ENOMEM; | ||
92 | append_badrange_entry(badrange, bre_new, addr, length); | ||
93 | |||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | int badrange_add(struct badrange *badrange, u64 addr, u64 length) | ||
98 | { | ||
99 | int rc; | ||
100 | |||
101 | spin_lock(&badrange->lock); | ||
102 | rc = add_badrange(badrange, addr, length); | ||
103 | spin_unlock(&badrange->lock); | ||
104 | |||
105 | return rc; | ||
106 | } | ||
107 | EXPORT_SYMBOL_GPL(badrange_add); | ||
108 | |||
109 | void badrange_forget(struct badrange *badrange, phys_addr_t start, | ||
110 | unsigned int len) | ||
111 | { | ||
112 | struct list_head *badrange_list = &badrange->list; | ||
113 | u64 clr_end = start + len - 1; | ||
114 | struct badrange_entry *bre, *next; | ||
115 | |||
116 | spin_lock(&badrange->lock); | ||
117 | |||
118 | /* | ||
119 | * [start, clr_end] is the badrange interval being cleared. | ||
120 | * [bre->start, bre_end] is the badrange_list entry we're comparing | ||
121 | * the above interval against. The badrange list entry may need | ||
122 | * to be modified (update either start or length), deleted, or | ||
123 | * split into two based on the overlap characteristics | ||
124 | */ | ||
125 | |||
126 | list_for_each_entry_safe(bre, next, badrange_list, list) { | ||
127 | u64 bre_end = bre->start + bre->length - 1; | ||
128 | |||
129 | /* Skip intervals with no intersection */ | ||
130 | if (bre_end < start) | ||
131 | continue; | ||
132 | if (bre->start > clr_end) | ||
133 | continue; | ||
134 | /* Delete completely overlapped badrange entries */ | ||
135 | if ((bre->start >= start) && (bre_end <= clr_end)) { | ||
136 | list_del(&bre->list); | ||
137 | kfree(bre); | ||
138 | continue; | ||
139 | } | ||
140 | /* Adjust start point of partially cleared entries */ | ||
141 | if ((start <= bre->start) && (clr_end > bre->start)) { | ||
142 | bre->length -= clr_end - bre->start + 1; | ||
143 | bre->start = clr_end + 1; | ||
144 | continue; | ||
145 | } | ||
146 | /* Adjust bre->length for partial clearing at the tail end */ | ||
147 | if ((bre->start < start) && (bre_end <= clr_end)) { | ||
148 | /* bre->start remains the same */ | ||
149 | bre->length = start - bre->start; | ||
150 | continue; | ||
151 | } | ||
152 | /* | ||
153 | * If clearing in the middle of an entry, we split it into | ||
154 | * two by modifying the current entry to represent one half of | ||
155 | * the split, and adding a new entry for the second half. | ||
156 | */ | ||
157 | if ((bre->start < start) && (bre_end > clr_end)) { | ||
158 | u64 new_start = clr_end + 1; | ||
159 | u64 new_len = bre_end - new_start + 1; | ||
160 | |||
161 | /* Add new entry covering the right half */ | ||
162 | alloc_and_append_badrange_entry(badrange, new_start, | ||
163 | new_len, GFP_NOWAIT); | ||
164 | /* Adjust this entry to cover the left half */ | ||
165 | bre->length = start - bre->start; | ||
166 | continue; | ||
167 | } | ||
168 | } | ||
169 | spin_unlock(&badrange->lock); | ||
170 | } | ||
171 | EXPORT_SYMBOL_GPL(badrange_forget); | ||
172 | |||
173 | static void set_badblock(struct badblocks *bb, sector_t s, int num) | ||
174 | { | ||
175 | dev_dbg(bb->dev, "Found a bad range (0x%llx, 0x%llx)\n", | ||
176 | (u64) s * 512, (u64) num * 512); | ||
177 | /* this isn't an error as the hardware will still throw an exception */ | ||
178 | if (badblocks_set(bb, s, num, 1)) | ||
179 | dev_info_once(bb->dev, "%s: failed for sector %llx\n", | ||
180 | __func__, (u64) s); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * __add_badblock_range() - Convert a physical address range to bad sectors | ||
185 | * @bb: badblocks instance to populate | ||
186 | * @ns_offset: namespace offset where the error range begins (in bytes) | ||
187 | * @len: number of bytes of badrange to be added | ||
188 | * | ||
189 | * This assumes that the range provided with (ns_offset, len) is within | ||
190 | * the bounds of physical addresses for this namespace, i.e. lies in the | ||
191 | * interval [ns_start, ns_start + ns_size) | ||
192 | */ | ||
193 | static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) | ||
194 | { | ||
195 | const unsigned int sector_size = 512; | ||
196 | sector_t start_sector, end_sector; | ||
197 | u64 num_sectors; | ||
198 | u32 rem; | ||
199 | |||
200 | start_sector = div_u64(ns_offset, sector_size); | ||
201 | end_sector = div_u64_rem(ns_offset + len, sector_size, &rem); | ||
202 | if (rem) | ||
203 | end_sector++; | ||
204 | num_sectors = end_sector - start_sector; | ||
205 | |||
206 | if (unlikely(num_sectors > (u64)INT_MAX)) { | ||
207 | u64 remaining = num_sectors; | ||
208 | sector_t s = start_sector; | ||
209 | |||
210 | while (remaining) { | ||
211 | int done = min_t(u64, remaining, INT_MAX); | ||
212 | |||
213 | set_badblock(bb, s, done); | ||
214 | remaining -= done; | ||
215 | s += done; | ||
216 | } | ||
217 | } else | ||
218 | set_badblock(bb, start_sector, num_sectors); | ||
219 | } | ||
220 | |||
221 | static void badblocks_populate(struct badrange *badrange, | ||
222 | struct badblocks *bb, const struct resource *res) | ||
223 | { | ||
224 | struct badrange_entry *bre; | ||
225 | |||
226 | if (list_empty(&badrange->list)) | ||
227 | return; | ||
228 | |||
229 | list_for_each_entry(bre, &badrange->list, list) { | ||
230 | u64 bre_end = bre->start + bre->length - 1; | ||
231 | |||
232 | /* Discard intervals with no intersection */ | ||
233 | if (bre_end < res->start) | ||
234 | continue; | ||
235 | if (bre->start > res->end) | ||
236 | continue; | ||
237 | /* Deal with any overlap after start of the namespace */ | ||
238 | if (bre->start >= res->start) { | ||
239 | u64 start = bre->start; | ||
240 | u64 len; | ||
241 | |||
242 | if (bre_end <= res->end) | ||
243 | len = bre->length; | ||
244 | else | ||
245 | len = res->start + resource_size(res) | ||
246 | - bre->start; | ||
247 | __add_badblock_range(bb, start - res->start, len); | ||
248 | continue; | ||
249 | } | ||
250 | /* | ||
251 | * Deal with overlap for badrange starting before | ||
252 | * the namespace. | ||
253 | */ | ||
254 | if (bre->start < res->start) { | ||
255 | u64 len; | ||
256 | |||
257 | if (bre_end < res->end) | ||
258 | len = bre->start + bre->length - res->start; | ||
259 | else | ||
260 | len = resource_size(res); | ||
261 | __add_badblock_range(bb, 0, len); | ||
262 | } | ||
263 | } | ||
264 | } | ||
265 | |||
266 | /** | ||
267 | * nvdimm_badblocks_populate() - Convert a list of badranges to badblocks | ||
268 | * @region: parent region of the range to interrogate | ||
269 | * @bb: badblocks instance to populate | ||
270 | * @res: resource range to consider | ||
271 | * | ||
272 | * The badrange list generated during bus initialization may contain | ||
273 | * multiple, possibly overlapping physical address ranges. Compare each | ||
274 | * of these ranges to the resource range currently being initialized, | ||
275 | * and add badblocks entries for all matching sub-ranges | ||
276 | */ | ||
277 | void nvdimm_badblocks_populate(struct nd_region *nd_region, | ||
278 | struct badblocks *bb, const struct resource *res) | ||
279 | { | ||
280 | struct nvdimm_bus *nvdimm_bus; | ||
281 | |||
282 | if (!is_memory(&nd_region->dev)) { | ||
283 | dev_WARN_ONCE(&nd_region->dev, 1, | ||
284 | "%s only valid for pmem regions\n", __func__); | ||
285 | return; | ||
286 | } | ||
287 | nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); | ||
288 | |||
289 | nvdimm_bus_lock(&nvdimm_bus->dev); | ||
290 | badblocks_populate(&nvdimm_bus->badrange, bb, res); | ||
291 | nvdimm_bus_unlock(&nvdimm_bus->dev); | ||
292 | } | ||
293 | EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); | ||
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index baf283986a7e..0a5e6cd758fe 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c | |||
@@ -11,6 +11,7 @@ | |||
11 | * General Public License for more details. | 11 | * General Public License for more details. |
12 | */ | 12 | */ |
13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
14 | #include <linux/libnvdimm.h> | ||
14 | #include <linux/sched/mm.h> | 15 | #include <linux/sched/mm.h> |
15 | #include <linux/vmalloc.h> | 16 | #include <linux/vmalloc.h> |
16 | #include <linux/uaccess.h> | 17 | #include <linux/uaccess.h> |
@@ -221,7 +222,7 @@ static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus, | |||
221 | phys_addr_t phys, u64 cleared) | 222 | phys_addr_t phys, u64 cleared) |
222 | { | 223 | { |
223 | if (cleared > 0) | 224 | if (cleared > 0) |
224 | nvdimm_forget_poison(nvdimm_bus, phys, cleared); | 225 | badrange_forget(&nvdimm_bus->badrange, phys, cleared); |
225 | 226 | ||
226 | if (cleared > 0 && cleared / 512) | 227 | if (cleared > 0 && cleared / 512) |
227 | nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared); | 228 | nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared); |
@@ -344,11 +345,10 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, | |||
344 | return NULL; | 345 | return NULL; |
345 | INIT_LIST_HEAD(&nvdimm_bus->list); | 346 | INIT_LIST_HEAD(&nvdimm_bus->list); |
346 | INIT_LIST_HEAD(&nvdimm_bus->mapping_list); | 347 | INIT_LIST_HEAD(&nvdimm_bus->mapping_list); |
347 | INIT_LIST_HEAD(&nvdimm_bus->poison_list); | ||
348 | init_waitqueue_head(&nvdimm_bus->probe_wait); | 348 | init_waitqueue_head(&nvdimm_bus->probe_wait); |
349 | nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); | 349 | nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); |
350 | mutex_init(&nvdimm_bus->reconfig_mutex); | 350 | mutex_init(&nvdimm_bus->reconfig_mutex); |
351 | spin_lock_init(&nvdimm_bus->poison_lock); | 351 | badrange_init(&nvdimm_bus->badrange); |
352 | if (nvdimm_bus->id < 0) { | 352 | if (nvdimm_bus->id < 0) { |
353 | kfree(nvdimm_bus); | 353 | kfree(nvdimm_bus); |
354 | return NULL; | 354 | return NULL; |
@@ -395,15 +395,15 @@ static int child_unregister(struct device *dev, void *data) | |||
395 | return 0; | 395 | return 0; |
396 | } | 396 | } |
397 | 397 | ||
398 | static void free_poison_list(struct list_head *poison_list) | 398 | static void free_badrange_list(struct list_head *badrange_list) |
399 | { | 399 | { |
400 | struct nd_poison *pl, *next; | 400 | struct badrange_entry *bre, *next; |
401 | 401 | ||
402 | list_for_each_entry_safe(pl, next, poison_list, list) { | 402 | list_for_each_entry_safe(bre, next, badrange_list, list) { |
403 | list_del(&pl->list); | 403 | list_del(&bre->list); |
404 | kfree(pl); | 404 | kfree(bre); |
405 | } | 405 | } |
406 | list_del_init(poison_list); | 406 | list_del_init(badrange_list); |
407 | } | 407 | } |
408 | 408 | ||
409 | static int nd_bus_remove(struct device *dev) | 409 | static int nd_bus_remove(struct device *dev) |
@@ -417,9 +417,9 @@ static int nd_bus_remove(struct device *dev) | |||
417 | nd_synchronize(); | 417 | nd_synchronize(); |
418 | device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); | 418 | device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); |
419 | 419 | ||
420 | spin_lock(&nvdimm_bus->poison_lock); | 420 | spin_lock(&nvdimm_bus->badrange.lock); |
421 | free_poison_list(&nvdimm_bus->poison_list); | 421 | free_badrange_list(&nvdimm_bus->badrange.list); |
422 | spin_unlock(&nvdimm_bus->poison_lock); | 422 | spin_unlock(&nvdimm_bus->badrange.lock); |
423 | 423 | ||
424 | nvdimm_bus_destroy_ndctl(nvdimm_bus); | 424 | nvdimm_bus_destroy_ndctl(nvdimm_bus); |
425 | 425 | ||
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index bb71f0cf8f5d..1dc527660637 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c | |||
@@ -398,265 +398,11 @@ struct attribute_group nvdimm_bus_attribute_group = { | |||
398 | }; | 398 | }; |
399 | EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); | 399 | EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); |
400 | 400 | ||
401 | static void set_badblock(struct badblocks *bb, sector_t s, int num) | 401 | int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) |
402 | { | 402 | { |
403 | dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n", | 403 | return badrange_add(&nvdimm_bus->badrange, addr, length); |
404 | (u64) s * 512, (u64) num * 512); | ||
405 | /* this isn't an error as the hardware will still throw an exception */ | ||
406 | if (badblocks_set(bb, s, num, 1)) | ||
407 | dev_info_once(bb->dev, "%s: failed for sector %llx\n", | ||
408 | __func__, (u64) s); | ||
409 | } | 404 | } |
410 | 405 | EXPORT_SYMBOL_GPL(nvdimm_bus_add_badrange); | |
411 | /** | ||
412 | * __add_badblock_range() - Convert a physical address range to bad sectors | ||
413 | * @bb: badblocks instance to populate | ||
414 | * @ns_offset: namespace offset where the error range begins (in bytes) | ||
415 | * @len: number of bytes of poison to be added | ||
416 | * | ||
417 | * This assumes that the range provided with (ns_offset, len) is within | ||
418 | * the bounds of physical addresses for this namespace, i.e. lies in the | ||
419 | * interval [ns_start, ns_start + ns_size) | ||
420 | */ | ||
421 | static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) | ||
422 | { | ||
423 | const unsigned int sector_size = 512; | ||
424 | sector_t start_sector, end_sector; | ||
425 | u64 num_sectors; | ||
426 | u32 rem; | ||
427 | |||
428 | start_sector = div_u64(ns_offset, sector_size); | ||
429 | end_sector = div_u64_rem(ns_offset + len, sector_size, &rem); | ||
430 | if (rem) | ||
431 | end_sector++; | ||
432 | num_sectors = end_sector - start_sector; | ||
433 | |||
434 | if (unlikely(num_sectors > (u64)INT_MAX)) { | ||
435 | u64 remaining = num_sectors; | ||
436 | sector_t s = start_sector; | ||
437 | |||
438 | while (remaining) { | ||
439 | int done = min_t(u64, remaining, INT_MAX); | ||
440 | |||
441 | set_badblock(bb, s, done); | ||
442 | remaining -= done; | ||
443 | s += done; | ||
444 | } | ||
445 | } else | ||
446 | set_badblock(bb, start_sector, num_sectors); | ||
447 | } | ||
448 | |||
449 | static void badblocks_populate(struct list_head *poison_list, | ||
450 | struct badblocks *bb, const struct resource *res) | ||
451 | { | ||
452 | struct nd_poison *pl; | ||
453 | |||
454 | if (list_empty(poison_list)) | ||
455 | return; | ||
456 | |||
457 | list_for_each_entry(pl, poison_list, list) { | ||
458 | u64 pl_end = pl->start + pl->length - 1; | ||
459 | |||
460 | /* Discard intervals with no intersection */ | ||
461 | if (pl_end < res->start) | ||
462 | continue; | ||
463 | if (pl->start > res->end) | ||
464 | continue; | ||
465 | /* Deal with any overlap after start of the namespace */ | ||
466 | if (pl->start >= res->start) { | ||
467 | u64 start = pl->start; | ||
468 | u64 len; | ||
469 | |||
470 | if (pl_end <= res->end) | ||
471 | len = pl->length; | ||
472 | else | ||
473 | len = res->start + resource_size(res) | ||
474 | - pl->start; | ||
475 | __add_badblock_range(bb, start - res->start, len); | ||
476 | continue; | ||
477 | } | ||
478 | /* Deal with overlap for poison starting before the namespace */ | ||
479 | if (pl->start < res->start) { | ||
480 | u64 len; | ||
481 | |||
482 | if (pl_end < res->end) | ||
483 | len = pl->start + pl->length - res->start; | ||
484 | else | ||
485 | len = resource_size(res); | ||
486 | __add_badblock_range(bb, 0, len); | ||
487 | } | ||
488 | } | ||
489 | } | ||
490 | |||
491 | /** | ||
492 | * nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks | ||
493 | * @region: parent region of the range to interrogate | ||
494 | * @bb: badblocks instance to populate | ||
495 | * @res: resource range to consider | ||
496 | * | ||
497 | * The poison list generated during bus initialization may contain | ||
498 | * multiple, possibly overlapping physical address ranges. Compare each | ||
499 | * of these ranges to the resource range currently being initialized, | ||
500 | * and add badblocks entries for all matching sub-ranges | ||
501 | */ | ||
502 | void nvdimm_badblocks_populate(struct nd_region *nd_region, | ||
503 | struct badblocks *bb, const struct resource *res) | ||
504 | { | ||
505 | struct nvdimm_bus *nvdimm_bus; | ||
506 | struct list_head *poison_list; | ||
507 | |||
508 | if (!is_memory(&nd_region->dev)) { | ||
509 | dev_WARN_ONCE(&nd_region->dev, 1, | ||
510 | "%s only valid for pmem regions\n", __func__); | ||
511 | return; | ||
512 | } | ||
513 | nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); | ||
514 | poison_list = &nvdimm_bus->poison_list; | ||
515 | |||
516 | nvdimm_bus_lock(&nvdimm_bus->dev); | ||
517 | badblocks_populate(poison_list, bb, res); | ||
518 | nvdimm_bus_unlock(&nvdimm_bus->dev); | ||
519 | } | ||
520 | EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); | ||
521 | |||
522 | static void append_poison_entry(struct nvdimm_bus *nvdimm_bus, | ||
523 | struct nd_poison *pl, u64 addr, u64 length) | ||
524 | { | ||
525 | lockdep_assert_held(&nvdimm_bus->poison_lock); | ||
526 | pl->start = addr; | ||
527 | pl->length = length; | ||
528 | list_add_tail(&pl->list, &nvdimm_bus->poison_list); | ||
529 | } | ||
530 | |||
531 | static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length, | ||
532 | gfp_t flags) | ||
533 | { | ||
534 | struct nd_poison *pl; | ||
535 | |||
536 | pl = kzalloc(sizeof(*pl), flags); | ||
537 | if (!pl) | ||
538 | return -ENOMEM; | ||
539 | |||
540 | append_poison_entry(nvdimm_bus, pl, addr, length); | ||
541 | return 0; | ||
542 | } | ||
543 | |||
544 | static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) | ||
545 | { | ||
546 | struct nd_poison *pl, *pl_new; | ||
547 | |||
548 | spin_unlock(&nvdimm_bus->poison_lock); | ||
549 | pl_new = kzalloc(sizeof(*pl_new), GFP_KERNEL); | ||
550 | spin_lock(&nvdimm_bus->poison_lock); | ||
551 | |||
552 | if (list_empty(&nvdimm_bus->poison_list)) { | ||
553 | if (!pl_new) | ||
554 | return -ENOMEM; | ||
555 | append_poison_entry(nvdimm_bus, pl_new, addr, length); | ||
556 | return 0; | ||
557 | } | ||
558 | |||
559 | /* | ||
560 | * There is a chance this is a duplicate, check for those first. | ||
561 | * This will be the common case as ARS_STATUS returns all known | ||
562 | * errors in the SPA space, and we can't query it per region | ||
563 | */ | ||
564 | list_for_each_entry(pl, &nvdimm_bus->poison_list, list) | ||
565 | if (pl->start == addr) { | ||
566 | /* If length has changed, update this list entry */ | ||
567 | if (pl->length != length) | ||
568 | pl->length = length; | ||
569 | kfree(pl_new); | ||
570 | return 0; | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * If not a duplicate or a simple length update, add the entry as is, | ||
575 | * as any overlapping ranges will get resolved when the list is consumed | ||
576 | * and converted to badblocks | ||
577 | */ | ||
578 | if (!pl_new) | ||
579 | return -ENOMEM; | ||
580 | append_poison_entry(nvdimm_bus, pl_new, addr, length); | ||
581 | |||
582 | return 0; | ||
583 | } | ||
584 | |||
585 | int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) | ||
586 | { | ||
587 | int rc; | ||
588 | |||
589 | spin_lock(&nvdimm_bus->poison_lock); | ||
590 | rc = bus_add_poison(nvdimm_bus, addr, length); | ||
591 | spin_unlock(&nvdimm_bus->poison_lock); | ||
592 | |||
593 | return rc; | ||
594 | } | ||
595 | EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); | ||
596 | |||
597 | void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start, | ||
598 | unsigned int len) | ||
599 | { | ||
600 | struct list_head *poison_list = &nvdimm_bus->poison_list; | ||
601 | u64 clr_end = start + len - 1; | ||
602 | struct nd_poison *pl, *next; | ||
603 | |||
604 | spin_lock(&nvdimm_bus->poison_lock); | ||
605 | WARN_ON_ONCE(list_empty(poison_list)); | ||
606 | |||
607 | /* | ||
608 | * [start, clr_end] is the poison interval being cleared. | ||
609 | * [pl->start, pl_end] is the poison_list entry we're comparing | ||
610 | * the above interval against. The poison list entry may need | ||
611 | * to be modified (update either start or length), deleted, or | ||
612 | * split into two based on the overlap characteristics | ||
613 | */ | ||
614 | |||
615 | list_for_each_entry_safe(pl, next, poison_list, list) { | ||
616 | u64 pl_end = pl->start + pl->length - 1; | ||
617 | |||
618 | /* Skip intervals with no intersection */ | ||
619 | if (pl_end < start) | ||
620 | continue; | ||
621 | if (pl->start > clr_end) | ||
622 | continue; | ||
623 | /* Delete completely overlapped poison entries */ | ||
624 | if ((pl->start >= start) && (pl_end <= clr_end)) { | ||
625 | list_del(&pl->list); | ||
626 | kfree(pl); | ||
627 | continue; | ||
628 | } | ||
629 | /* Adjust start point of partially cleared entries */ | ||
630 | if ((start <= pl->start) && (clr_end > pl->start)) { | ||
631 | pl->length -= clr_end - pl->start + 1; | ||
632 | pl->start = clr_end + 1; | ||
633 | continue; | ||
634 | } | ||
635 | /* Adjust pl->length for partial clearing at the tail end */ | ||
636 | if ((pl->start < start) && (pl_end <= clr_end)) { | ||
637 | /* pl->start remains the same */ | ||
638 | pl->length = start - pl->start; | ||
639 | continue; | ||
640 | } | ||
641 | /* | ||
642 | * If clearing in the middle of an entry, we split it into | ||
643 | * two by modifying the current entry to represent one half of | ||
644 | * the split, and adding a new entry for the second half. | ||
645 | */ | ||
646 | if ((pl->start < start) && (pl_end > clr_end)) { | ||
647 | u64 new_start = clr_end + 1; | ||
648 | u64 new_len = pl_end - new_start + 1; | ||
649 | |||
650 | /* Add new entry covering the right half */ | ||
651 | add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT); | ||
652 | /* Adjust this entry to cover the left half */ | ||
653 | pl->length = start - pl->start; | ||
654 | continue; | ||
655 | } | ||
656 | } | ||
657 | spin_unlock(&nvdimm_bus->poison_lock); | ||
658 | } | ||
659 | EXPORT_SYMBOL_GPL(nvdimm_forget_poison); | ||
660 | 406 | ||
661 | #ifdef CONFIG_BLK_DEV_INTEGRITY | 407 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
662 | int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) | 408 | int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) |
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index e0f0e3ce1a32..f8913b8124b6 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c | |||
@@ -55,6 +55,8 @@ static int nvdimm_probe(struct device *dev) | |||
55 | goto err; | 55 | goto err; |
56 | 56 | ||
57 | rc = nvdimm_init_config_data(ndd); | 57 | rc = nvdimm_init_config_data(ndd); |
58 | if (rc == -EACCES) | ||
59 | nvdimm_set_locked(dev); | ||
58 | if (rc) | 60 | if (rc) |
59 | goto err; | 61 | goto err; |
60 | 62 | ||
@@ -68,6 +70,7 @@ static int nvdimm_probe(struct device *dev) | |||
68 | rc = nd_label_reserve_dpa(ndd); | 70 | rc = nd_label_reserve_dpa(ndd); |
69 | if (ndd->ns_current >= 0) | 71 | if (ndd->ns_current >= 0) |
70 | nvdimm_set_aliasing(dev); | 72 | nvdimm_set_aliasing(dev); |
73 | nvdimm_clear_locked(dev); | ||
71 | nvdimm_bus_unlock(dev); | 74 | nvdimm_bus_unlock(dev); |
72 | 75 | ||
73 | if (rc) | 76 | if (rc) |
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index f0d1b7e5de01..097794d9f786 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c | |||
@@ -200,6 +200,13 @@ void nvdimm_set_locked(struct device *dev) | |||
200 | set_bit(NDD_LOCKED, &nvdimm->flags); | 200 | set_bit(NDD_LOCKED, &nvdimm->flags); |
201 | } | 201 | } |
202 | 202 | ||
203 | void nvdimm_clear_locked(struct device *dev) | ||
204 | { | ||
205 | struct nvdimm *nvdimm = to_nvdimm(dev); | ||
206 | |||
207 | clear_bit(NDD_LOCKED, &nvdimm->flags); | ||
208 | } | ||
209 | |||
203 | static void nvdimm_release(struct device *dev) | 210 | static void nvdimm_release(struct device *dev) |
204 | { | 211 | { |
205 | struct nvdimm *nvdimm = to_nvdimm(dev); | 212 | struct nvdimm *nvdimm = to_nvdimm(dev); |
@@ -324,6 +331,17 @@ static ssize_t commands_show(struct device *dev, | |||
324 | } | 331 | } |
325 | static DEVICE_ATTR_RO(commands); | 332 | static DEVICE_ATTR_RO(commands); |
326 | 333 | ||
334 | static ssize_t flags_show(struct device *dev, | ||
335 | struct device_attribute *attr, char *buf) | ||
336 | { | ||
337 | struct nvdimm *nvdimm = to_nvdimm(dev); | ||
338 | |||
339 | return sprintf(buf, "%s%s\n", | ||
340 | test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "", | ||
341 | test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : ""); | ||
342 | } | ||
343 | static DEVICE_ATTR_RO(flags); | ||
344 | |||
327 | static ssize_t state_show(struct device *dev, struct device_attribute *attr, | 345 | static ssize_t state_show(struct device *dev, struct device_attribute *attr, |
328 | char *buf) | 346 | char *buf) |
329 | { | 347 | { |
@@ -365,6 +383,7 @@ static DEVICE_ATTR_RO(available_slots); | |||
365 | 383 | ||
366 | static struct attribute *nvdimm_attributes[] = { | 384 | static struct attribute *nvdimm_attributes[] = { |
367 | &dev_attr_state.attr, | 385 | &dev_attr_state.attr, |
386 | &dev_attr_flags.attr, | ||
368 | &dev_attr_commands.attr, | 387 | &dev_attr_commands.attr, |
369 | &dev_attr_available_slots.attr, | 388 | &dev_attr_available_slots.attr, |
370 | NULL, | 389 | NULL, |
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 9c5f108910e3..de66c02f6140 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c | |||
@@ -1050,7 +1050,7 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels) | |||
1050 | nsindex = to_namespace_index(ndd, 0); | 1050 | nsindex = to_namespace_index(ndd, 0); |
1051 | memset(nsindex, 0, ndd->nsarea.config_size); | 1051 | memset(nsindex, 0, ndd->nsarea.config_size); |
1052 | for (i = 0; i < 2; i++) { | 1052 | for (i = 0; i < 2; i++) { |
1053 | int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT); | 1053 | int rc = nd_label_write_index(ndd, i, 3 - i, ND_NSINDEX_INIT); |
1054 | 1054 | ||
1055 | if (rc) | 1055 | if (rc) |
1056 | return rc; | 1056 | return rc; |
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 3e4d1e7998da..bb3ba8cf24d4 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c | |||
@@ -1620,7 +1620,7 @@ static umode_t namespace_visible(struct kobject *kobj, | |||
1620 | if (a == &dev_attr_resource.attr) { | 1620 | if (a == &dev_attr_resource.attr) { |
1621 | if (is_namespace_blk(dev)) | 1621 | if (is_namespace_blk(dev)) |
1622 | return 0; | 1622 | return 0; |
1623 | return a->mode; | 1623 | return 0400; |
1624 | } | 1624 | } |
1625 | 1625 | ||
1626 | if (is_namespace_pmem(dev) || is_namespace_blk(dev)) { | 1626 | if (is_namespace_pmem(dev) || is_namespace_blk(dev)) { |
@@ -1875,7 +1875,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) | |||
1875 | * @nspm: target namespace to create | 1875 | * @nspm: target namespace to create |
1876 | * @nd_label: target pmem namespace label to evaluate | 1876 | * @nd_label: target pmem namespace label to evaluate |
1877 | */ | 1877 | */ |
1878 | struct device *create_namespace_pmem(struct nd_region *nd_region, | 1878 | static struct device *create_namespace_pmem(struct nd_region *nd_region, |
1879 | struct nd_namespace_index *nsindex, | 1879 | struct nd_namespace_index *nsindex, |
1880 | struct nd_namespace_label *nd_label) | 1880 | struct nd_namespace_label *nd_label) |
1881 | { | 1881 | { |
@@ -2186,7 +2186,7 @@ static int add_namespace_resource(struct nd_region *nd_region, | |||
2186 | return i; | 2186 | return i; |
2187 | } | 2187 | } |
2188 | 2188 | ||
2189 | struct device *create_namespace_blk(struct nd_region *nd_region, | 2189 | static struct device *create_namespace_blk(struct nd_region *nd_region, |
2190 | struct nd_namespace_label *nd_label, int count) | 2190 | struct nd_namespace_label *nd_label, int count) |
2191 | { | 2191 | { |
2192 | 2192 | ||
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 86bc19ae30da..79274ead54fb 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h | |||
@@ -29,10 +29,9 @@ struct nvdimm_bus { | |||
29 | struct list_head list; | 29 | struct list_head list; |
30 | struct device dev; | 30 | struct device dev; |
31 | int id, probe_active; | 31 | int id, probe_active; |
32 | struct list_head poison_list; | ||
33 | struct list_head mapping_list; | 32 | struct list_head mapping_list; |
34 | struct mutex reconfig_mutex; | 33 | struct mutex reconfig_mutex; |
35 | spinlock_t poison_lock; | 34 | struct badrange badrange; |
36 | }; | 35 | }; |
37 | 36 | ||
38 | struct nvdimm { | 37 | struct nvdimm { |
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 9c758a91372b..e958f3724c41 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h | |||
@@ -34,12 +34,6 @@ enum { | |||
34 | NVDIMM_IO_ATOMIC = 1, | 34 | NVDIMM_IO_ATOMIC = 1, |
35 | }; | 35 | }; |
36 | 36 | ||
37 | struct nd_poison { | ||
38 | u64 start; | ||
39 | u64 length; | ||
40 | struct list_head list; | ||
41 | }; | ||
42 | |||
43 | struct nvdimm_drvdata { | 37 | struct nvdimm_drvdata { |
44 | struct device *dev; | 38 | struct device *dev; |
45 | int nslabel_size; | 39 | int nslabel_size; |
@@ -254,6 +248,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, | |||
254 | unsigned int len); | 248 | unsigned int len); |
255 | void nvdimm_set_aliasing(struct device *dev); | 249 | void nvdimm_set_aliasing(struct device *dev); |
256 | void nvdimm_set_locked(struct device *dev); | 250 | void nvdimm_set_locked(struct device *dev); |
251 | void nvdimm_clear_locked(struct device *dev); | ||
257 | struct nd_btt *to_nd_btt(struct device *dev); | 252 | struct nd_btt *to_nd_btt(struct device *dev); |
258 | 253 | ||
259 | struct nd_gen_sb { | 254 | struct nd_gen_sb { |
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 9576c444f0ab..65cc171c721d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c | |||
@@ -282,8 +282,16 @@ static struct attribute *nd_pfn_attributes[] = { | |||
282 | NULL, | 282 | NULL, |
283 | }; | 283 | }; |
284 | 284 | ||
285 | static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n) | ||
286 | { | ||
287 | if (a == &dev_attr_resource.attr) | ||
288 | return 0400; | ||
289 | return a->mode; | ||
290 | } | ||
291 | |||
285 | struct attribute_group nd_pfn_attribute_group = { | 292 | struct attribute_group nd_pfn_attribute_group = { |
286 | .attrs = nd_pfn_attributes, | 293 | .attrs = nd_pfn_attributes, |
294 | .is_visible = pfn_visible, | ||
287 | }; | 295 | }; |
288 | 296 | ||
289 | static const struct attribute_group *nd_pfn_attribute_groups[] = { | 297 | static const struct attribute_group *nd_pfn_attribute_groups[] = { |
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 829d760f651c..abaf38c61220 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c | |||
@@ -562,8 +562,12 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) | |||
562 | if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) | 562 | if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) |
563 | return 0; | 563 | return 0; |
564 | 564 | ||
565 | if (!is_nd_pmem(dev) && a == &dev_attr_resource.attr) | 565 | if (a == &dev_attr_resource.attr) { |
566 | return 0; | 566 | if (is_nd_pmem(dev)) |
567 | return 0400; | ||
568 | else | ||
569 | return 0; | ||
570 | } | ||
567 | 571 | ||
568 | if (a == &dev_attr_deep_flush.attr) { | 572 | if (a == &dev_attr_deep_flush.attr) { |
569 | int has_flush = nvdimm_has_flush(nd_region); | 573 | int has_flush = nvdimm_has_flush(nd_region); |
@@ -526,13 +526,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, | |||
526 | static void *dax_insert_mapping_entry(struct address_space *mapping, | 526 | static void *dax_insert_mapping_entry(struct address_space *mapping, |
527 | struct vm_fault *vmf, | 527 | struct vm_fault *vmf, |
528 | void *entry, sector_t sector, | 528 | void *entry, sector_t sector, |
529 | unsigned long flags) | 529 | unsigned long flags, bool dirty) |
530 | { | 530 | { |
531 | struct radix_tree_root *page_tree = &mapping->page_tree; | 531 | struct radix_tree_root *page_tree = &mapping->page_tree; |
532 | void *new_entry; | 532 | void *new_entry; |
533 | pgoff_t index = vmf->pgoff; | 533 | pgoff_t index = vmf->pgoff; |
534 | 534 | ||
535 | if (vmf->flags & FAULT_FLAG_WRITE) | 535 | if (dirty) |
536 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 536 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
537 | 537 | ||
538 | if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { | 538 | if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { |
@@ -569,7 +569,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
569 | entry = new_entry; | 569 | entry = new_entry; |
570 | } | 570 | } |
571 | 571 | ||
572 | if (vmf->flags & FAULT_FLAG_WRITE) | 572 | if (dirty) |
573 | radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); | 573 | radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); |
574 | 574 | ||
575 | spin_unlock_irq(&mapping->tree_lock); | 575 | spin_unlock_irq(&mapping->tree_lock); |
@@ -825,38 +825,42 @@ out: | |||
825 | } | 825 | } |
826 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); | 826 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); |
827 | 827 | ||
828 | static int dax_insert_mapping(struct address_space *mapping, | 828 | static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) |
829 | struct block_device *bdev, struct dax_device *dax_dev, | ||
830 | sector_t sector, size_t size, void *entry, | ||
831 | struct vm_area_struct *vma, struct vm_fault *vmf) | ||
832 | { | 829 | { |
833 | unsigned long vaddr = vmf->address; | 830 | return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; |
834 | void *ret, *kaddr; | 831 | } |
832 | |||
833 | static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, | ||
834 | pfn_t *pfnp) | ||
835 | { | ||
836 | const sector_t sector = dax_iomap_sector(iomap, pos); | ||
835 | pgoff_t pgoff; | 837 | pgoff_t pgoff; |
838 | void *kaddr; | ||
836 | int id, rc; | 839 | int id, rc; |
837 | pfn_t pfn; | 840 | long length; |
838 | 841 | ||
839 | rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); | 842 | rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff); |
840 | if (rc) | 843 | if (rc) |
841 | return rc; | 844 | return rc; |
842 | |||
843 | id = dax_read_lock(); | 845 | id = dax_read_lock(); |
844 | rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); | 846 | length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), |
845 | if (rc < 0) { | 847 | &kaddr, pfnp); |
846 | dax_read_unlock(id); | 848 | if (length < 0) { |
847 | return rc; | 849 | rc = length; |
850 | goto out; | ||
848 | } | 851 | } |
852 | rc = -EINVAL; | ||
853 | if (PFN_PHYS(length) < size) | ||
854 | goto out; | ||
855 | if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) | ||
856 | goto out; | ||
857 | /* For larger pages we need devmap */ | ||
858 | if (length > 1 && !pfn_t_devmap(*pfnp)) | ||
859 | goto out; | ||
860 | rc = 0; | ||
861 | out: | ||
849 | dax_read_unlock(id); | 862 | dax_read_unlock(id); |
850 | 863 | return rc; | |
851 | ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0); | ||
852 | if (IS_ERR(ret)) | ||
853 | return PTR_ERR(ret); | ||
854 | |||
855 | trace_dax_insert_mapping(mapping->host, vmf, ret); | ||
856 | if (vmf->flags & FAULT_FLAG_WRITE) | ||
857 | return vm_insert_mixed_mkwrite(vma, vaddr, pfn); | ||
858 | else | ||
859 | return vm_insert_mixed(vma, vaddr, pfn); | ||
860 | } | 864 | } |
861 | 865 | ||
862 | /* | 866 | /* |
@@ -882,7 +886,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry, | |||
882 | } | 886 | } |
883 | 887 | ||
884 | entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0, | 888 | entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0, |
885 | RADIX_DAX_ZERO_PAGE); | 889 | RADIX_DAX_ZERO_PAGE, false); |
886 | if (IS_ERR(entry2)) { | 890 | if (IS_ERR(entry2)) { |
887 | ret = VM_FAULT_SIGBUS; | 891 | ret = VM_FAULT_SIGBUS; |
888 | goto out; | 892 | goto out; |
@@ -941,11 +945,6 @@ int __dax_zero_page_range(struct block_device *bdev, | |||
941 | } | 945 | } |
942 | EXPORT_SYMBOL_GPL(__dax_zero_page_range); | 946 | EXPORT_SYMBOL_GPL(__dax_zero_page_range); |
943 | 947 | ||
944 | static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) | ||
945 | { | ||
946 | return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; | ||
947 | } | ||
948 | |||
949 | static loff_t | 948 | static loff_t |
950 | dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | 949 | dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, |
951 | struct iomap *iomap) | 950 | struct iomap *iomap) |
@@ -1085,19 +1084,33 @@ static int dax_fault_return(int error) | |||
1085 | return VM_FAULT_SIGBUS; | 1084 | return VM_FAULT_SIGBUS; |
1086 | } | 1085 | } |
1087 | 1086 | ||
1088 | static int dax_iomap_pte_fault(struct vm_fault *vmf, | 1087 | /* |
1088 | * MAP_SYNC on a dax mapping guarantees dirty metadata is | ||
1089 | * flushed on write-faults (non-cow), but not read-faults. | ||
1090 | */ | ||
1091 | static bool dax_fault_is_synchronous(unsigned long flags, | ||
1092 | struct vm_area_struct *vma, struct iomap *iomap) | ||
1093 | { | ||
1094 | return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) | ||
1095 | && (iomap->flags & IOMAP_F_DIRTY); | ||
1096 | } | ||
1097 | |||
1098 | static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, | ||
1089 | const struct iomap_ops *ops) | 1099 | const struct iomap_ops *ops) |
1090 | { | 1100 | { |
1091 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | 1101 | struct vm_area_struct *vma = vmf->vma; |
1102 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
1092 | struct inode *inode = mapping->host; | 1103 | struct inode *inode = mapping->host; |
1093 | unsigned long vaddr = vmf->address; | 1104 | unsigned long vaddr = vmf->address; |
1094 | loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; | 1105 | loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; |
1095 | sector_t sector; | ||
1096 | struct iomap iomap = { 0 }; | 1106 | struct iomap iomap = { 0 }; |
1097 | unsigned flags = IOMAP_FAULT; | 1107 | unsigned flags = IOMAP_FAULT; |
1098 | int error, major = 0; | 1108 | int error, major = 0; |
1109 | bool write = vmf->flags & FAULT_FLAG_WRITE; | ||
1110 | bool sync; | ||
1099 | int vmf_ret = 0; | 1111 | int vmf_ret = 0; |
1100 | void *entry; | 1112 | void *entry; |
1113 | pfn_t pfn; | ||
1101 | 1114 | ||
1102 | trace_dax_pte_fault(inode, vmf, vmf_ret); | 1115 | trace_dax_pte_fault(inode, vmf, vmf_ret); |
1103 | /* | 1116 | /* |
@@ -1110,7 +1123,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, | |||
1110 | goto out; | 1123 | goto out; |
1111 | } | 1124 | } |
1112 | 1125 | ||
1113 | if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) | 1126 | if (write && !vmf->cow_page) |
1114 | flags |= IOMAP_WRITE; | 1127 | flags |= IOMAP_WRITE; |
1115 | 1128 | ||
1116 | entry = grab_mapping_entry(mapping, vmf->pgoff, 0); | 1129 | entry = grab_mapping_entry(mapping, vmf->pgoff, 0); |
@@ -1145,9 +1158,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, | |||
1145 | goto error_finish_iomap; | 1158 | goto error_finish_iomap; |
1146 | } | 1159 | } |
1147 | 1160 | ||
1148 | sector = dax_iomap_sector(&iomap, pos); | ||
1149 | |||
1150 | if (vmf->cow_page) { | 1161 | if (vmf->cow_page) { |
1162 | sector_t sector = dax_iomap_sector(&iomap, pos); | ||
1163 | |||
1151 | switch (iomap.type) { | 1164 | switch (iomap.type) { |
1152 | case IOMAP_HOLE: | 1165 | case IOMAP_HOLE: |
1153 | case IOMAP_UNWRITTEN: | 1166 | case IOMAP_UNWRITTEN: |
@@ -1173,22 +1186,55 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, | |||
1173 | goto finish_iomap; | 1186 | goto finish_iomap; |
1174 | } | 1187 | } |
1175 | 1188 | ||
1189 | sync = dax_fault_is_synchronous(flags, vma, &iomap); | ||
1190 | |||
1176 | switch (iomap.type) { | 1191 | switch (iomap.type) { |
1177 | case IOMAP_MAPPED: | 1192 | case IOMAP_MAPPED: |
1178 | if (iomap.flags & IOMAP_F_NEW) { | 1193 | if (iomap.flags & IOMAP_F_NEW) { |
1179 | count_vm_event(PGMAJFAULT); | 1194 | count_vm_event(PGMAJFAULT); |
1180 | count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); | 1195 | count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); |
1181 | major = VM_FAULT_MAJOR; | 1196 | major = VM_FAULT_MAJOR; |
1182 | } | 1197 | } |
1183 | error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev, | 1198 | error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); |
1184 | sector, PAGE_SIZE, entry, vmf->vma, vmf); | 1199 | if (error < 0) |
1200 | goto error_finish_iomap; | ||
1201 | |||
1202 | entry = dax_insert_mapping_entry(mapping, vmf, entry, | ||
1203 | dax_iomap_sector(&iomap, pos), | ||
1204 | 0, write && !sync); | ||
1205 | if (IS_ERR(entry)) { | ||
1206 | error = PTR_ERR(entry); | ||
1207 | goto error_finish_iomap; | ||
1208 | } | ||
1209 | |||
1210 | /* | ||
1211 | * If we are doing synchronous page fault and inode needs fsync, | ||
1212 | * we can insert PTE into page tables only after that happens. | ||
1213 | * Skip insertion for now and return the pfn so that caller can | ||
1214 | * insert it after fsync is done. | ||
1215 | */ | ||
1216 | if (sync) { | ||
1217 | if (WARN_ON_ONCE(!pfnp)) { | ||
1218 | error = -EIO; | ||
1219 | goto error_finish_iomap; | ||
1220 | } | ||
1221 | *pfnp = pfn; | ||
1222 | vmf_ret = VM_FAULT_NEEDDSYNC | major; | ||
1223 | goto finish_iomap; | ||
1224 | } | ||
1225 | trace_dax_insert_mapping(inode, vmf, entry); | ||
1226 | if (write) | ||
1227 | error = vm_insert_mixed_mkwrite(vma, vaddr, pfn); | ||
1228 | else | ||
1229 | error = vm_insert_mixed(vma, vaddr, pfn); | ||
1230 | |||
1185 | /* -EBUSY is fine, somebody else faulted on the same PTE */ | 1231 | /* -EBUSY is fine, somebody else faulted on the same PTE */ |
1186 | if (error == -EBUSY) | 1232 | if (error == -EBUSY) |
1187 | error = 0; | 1233 | error = 0; |
1188 | break; | 1234 | break; |
1189 | case IOMAP_UNWRITTEN: | 1235 | case IOMAP_UNWRITTEN: |
1190 | case IOMAP_HOLE: | 1236 | case IOMAP_HOLE: |
1191 | if (!(vmf->flags & FAULT_FLAG_WRITE)) { | 1237 | if (!write) { |
1192 | vmf_ret = dax_load_hole(mapping, entry, vmf); | 1238 | vmf_ret = dax_load_hole(mapping, entry, vmf); |
1193 | goto finish_iomap; | 1239 | goto finish_iomap; |
1194 | } | 1240 | } |
@@ -1223,53 +1269,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, | |||
1223 | } | 1269 | } |
1224 | 1270 | ||
1225 | #ifdef CONFIG_FS_DAX_PMD | 1271 | #ifdef CONFIG_FS_DAX_PMD |
1226 | static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, | 1272 | /* |
1227 | loff_t pos, void *entry) | 1273 | * The 'colour' (ie low bits) within a PMD of a page offset. This comes up |
1228 | { | 1274 | * more often than one might expect in the below functions. |
1229 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | 1275 | */ |
1230 | const sector_t sector = dax_iomap_sector(iomap, pos); | 1276 | #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) |
1231 | struct dax_device *dax_dev = iomap->dax_dev; | ||
1232 | struct block_device *bdev = iomap->bdev; | ||
1233 | struct inode *inode = mapping->host; | ||
1234 | const size_t size = PMD_SIZE; | ||
1235 | void *ret = NULL, *kaddr; | ||
1236 | long length = 0; | ||
1237 | pgoff_t pgoff; | ||
1238 | pfn_t pfn = {}; | ||
1239 | int id; | ||
1240 | |||
1241 | if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0) | ||
1242 | goto fallback; | ||
1243 | |||
1244 | id = dax_read_lock(); | ||
1245 | length = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); | ||
1246 | if (length < 0) | ||
1247 | goto unlock_fallback; | ||
1248 | length = PFN_PHYS(length); | ||
1249 | |||
1250 | if (length < size) | ||
1251 | goto unlock_fallback; | ||
1252 | if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR) | ||
1253 | goto unlock_fallback; | ||
1254 | if (!pfn_t_devmap(pfn)) | ||
1255 | goto unlock_fallback; | ||
1256 | dax_read_unlock(id); | ||
1257 | |||
1258 | ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, | ||
1259 | RADIX_DAX_PMD); | ||
1260 | if (IS_ERR(ret)) | ||
1261 | goto fallback; | ||
1262 | |||
1263 | trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret); | ||
1264 | return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, | ||
1265 | pfn, vmf->flags & FAULT_FLAG_WRITE); | ||
1266 | |||
1267 | unlock_fallback: | ||
1268 | dax_read_unlock(id); | ||
1269 | fallback: | ||
1270 | trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, pfn, ret); | ||
1271 | return VM_FAULT_FALLBACK; | ||
1272 | } | ||
1273 | 1277 | ||
1274 | static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, | 1278 | static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, |
1275 | void *entry) | 1279 | void *entry) |
@@ -1288,7 +1292,7 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, | |||
1288 | goto fallback; | 1292 | goto fallback; |
1289 | 1293 | ||
1290 | ret = dax_insert_mapping_entry(mapping, vmf, entry, 0, | 1294 | ret = dax_insert_mapping_entry(mapping, vmf, entry, 0, |
1291 | RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE); | 1295 | RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); |
1292 | if (IS_ERR(ret)) | 1296 | if (IS_ERR(ret)) |
1293 | goto fallback; | 1297 | goto fallback; |
1294 | 1298 | ||
@@ -1310,13 +1314,14 @@ fallback: | |||
1310 | return VM_FAULT_FALLBACK; | 1314 | return VM_FAULT_FALLBACK; |
1311 | } | 1315 | } |
1312 | 1316 | ||
1313 | static int dax_iomap_pmd_fault(struct vm_fault *vmf, | 1317 | static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
1314 | const struct iomap_ops *ops) | 1318 | const struct iomap_ops *ops) |
1315 | { | 1319 | { |
1316 | struct vm_area_struct *vma = vmf->vma; | 1320 | struct vm_area_struct *vma = vmf->vma; |
1317 | struct address_space *mapping = vma->vm_file->f_mapping; | 1321 | struct address_space *mapping = vma->vm_file->f_mapping; |
1318 | unsigned long pmd_addr = vmf->address & PMD_MASK; | 1322 | unsigned long pmd_addr = vmf->address & PMD_MASK; |
1319 | bool write = vmf->flags & FAULT_FLAG_WRITE; | 1323 | bool write = vmf->flags & FAULT_FLAG_WRITE; |
1324 | bool sync; | ||
1320 | unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; | 1325 | unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; |
1321 | struct inode *inode = mapping->host; | 1326 | struct inode *inode = mapping->host; |
1322 | int result = VM_FAULT_FALLBACK; | 1327 | int result = VM_FAULT_FALLBACK; |
@@ -1325,6 +1330,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, | |||
1325 | void *entry; | 1330 | void *entry; |
1326 | loff_t pos; | 1331 | loff_t pos; |
1327 | int error; | 1332 | int error; |
1333 | pfn_t pfn; | ||
1328 | 1334 | ||
1329 | /* | 1335 | /* |
1330 | * Check whether offset isn't beyond end of file now. Caller is | 1336 | * Check whether offset isn't beyond end of file now. Caller is |
@@ -1332,7 +1338,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, | |||
1332 | * this is a reliable test. | 1338 | * this is a reliable test. |
1333 | */ | 1339 | */ |
1334 | pgoff = linear_page_index(vma, pmd_addr); | 1340 | pgoff = linear_page_index(vma, pmd_addr); |
1335 | max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; | 1341 | max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); |
1336 | 1342 | ||
1337 | trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); | 1343 | trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); |
1338 | 1344 | ||
@@ -1356,13 +1362,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, | |||
1356 | if ((pmd_addr + PMD_SIZE) > vma->vm_end) | 1362 | if ((pmd_addr + PMD_SIZE) > vma->vm_end) |
1357 | goto fallback; | 1363 | goto fallback; |
1358 | 1364 | ||
1359 | if (pgoff > max_pgoff) { | 1365 | if (pgoff >= max_pgoff) { |
1360 | result = VM_FAULT_SIGBUS; | 1366 | result = VM_FAULT_SIGBUS; |
1361 | goto out; | 1367 | goto out; |
1362 | } | 1368 | } |
1363 | 1369 | ||
1364 | /* If the PMD would extend beyond the file size */ | 1370 | /* If the PMD would extend beyond the file size */ |
1365 | if ((pgoff | PG_PMD_COLOUR) > max_pgoff) | 1371 | if ((pgoff | PG_PMD_COLOUR) >= max_pgoff) |
1366 | goto fallback; | 1372 | goto fallback; |
1367 | 1373 | ||
1368 | /* | 1374 | /* |
@@ -1400,9 +1406,37 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, | |||
1400 | if (iomap.offset + iomap.length < pos + PMD_SIZE) | 1406 | if (iomap.offset + iomap.length < pos + PMD_SIZE) |
1401 | goto finish_iomap; | 1407 | goto finish_iomap; |
1402 | 1408 | ||
1409 | sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap); | ||
1410 | |||
1403 | switch (iomap.type) { | 1411 | switch (iomap.type) { |
1404 | case IOMAP_MAPPED: | 1412 | case IOMAP_MAPPED: |
1405 | result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry); | 1413 | error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); |
1414 | if (error < 0) | ||
1415 | goto finish_iomap; | ||
1416 | |||
1417 | entry = dax_insert_mapping_entry(mapping, vmf, entry, | ||
1418 | dax_iomap_sector(&iomap, pos), | ||
1419 | RADIX_DAX_PMD, write && !sync); | ||
1420 | if (IS_ERR(entry)) | ||
1421 | goto finish_iomap; | ||
1422 | |||
1423 | /* | ||
1424 | * If we are doing synchronous page fault and inode needs fsync, | ||
1425 | * we can insert PMD into page tables only after that happens. | ||
1426 | * Skip insertion for now and return the pfn so that caller can | ||
1427 | * insert it after fsync is done. | ||
1428 | */ | ||
1429 | if (sync) { | ||
1430 | if (WARN_ON_ONCE(!pfnp)) | ||
1431 | goto finish_iomap; | ||
1432 | *pfnp = pfn; | ||
1433 | result = VM_FAULT_NEEDDSYNC; | ||
1434 | goto finish_iomap; | ||
1435 | } | ||
1436 | |||
1437 | trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); | ||
1438 | result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, | ||
1439 | write); | ||
1406 | break; | 1440 | break; |
1407 | case IOMAP_UNWRITTEN: | 1441 | case IOMAP_UNWRITTEN: |
1408 | case IOMAP_HOLE: | 1442 | case IOMAP_HOLE: |
@@ -1442,7 +1476,7 @@ out: | |||
1442 | return result; | 1476 | return result; |
1443 | } | 1477 | } |
1444 | #else | 1478 | #else |
1445 | static int dax_iomap_pmd_fault(struct vm_fault *vmf, | 1479 | static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, |
1446 | const struct iomap_ops *ops) | 1480 | const struct iomap_ops *ops) |
1447 | { | 1481 | { |
1448 | return VM_FAULT_FALLBACK; | 1482 | return VM_FAULT_FALLBACK; |
@@ -1452,7 +1486,9 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, | |||
1452 | /** | 1486 | /** |
1453 | * dax_iomap_fault - handle a page fault on a DAX file | 1487 | * dax_iomap_fault - handle a page fault on a DAX file |
1454 | * @vmf: The description of the fault | 1488 | * @vmf: The description of the fault |
1455 | * @ops: iomap ops passed from the file system | 1489 | * @pe_size: Size of the page to fault in |
1490 | * @pfnp: PFN to insert for synchronous faults if fsync is required | ||
1491 | * @ops: Iomap ops passed from the file system | ||
1456 | * | 1492 | * |
1457 | * When a page fault occurs, filesystems may call this helper in | 1493 | * When a page fault occurs, filesystems may call this helper in |
1458 | * their fault handler for DAX files. dax_iomap_fault() assumes the caller | 1494 | * their fault handler for DAX files. dax_iomap_fault() assumes the caller |
@@ -1460,15 +1496,98 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, | |||
1460 | * successfully. | 1496 | * successfully. |
1461 | */ | 1497 | */ |
1462 | int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | 1498 | int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, |
1463 | const struct iomap_ops *ops) | 1499 | pfn_t *pfnp, const struct iomap_ops *ops) |
1464 | { | 1500 | { |
1465 | switch (pe_size) { | 1501 | switch (pe_size) { |
1466 | case PE_SIZE_PTE: | 1502 | case PE_SIZE_PTE: |
1467 | return dax_iomap_pte_fault(vmf, ops); | 1503 | return dax_iomap_pte_fault(vmf, pfnp, ops); |
1468 | case PE_SIZE_PMD: | 1504 | case PE_SIZE_PMD: |
1469 | return dax_iomap_pmd_fault(vmf, ops); | 1505 | return dax_iomap_pmd_fault(vmf, pfnp, ops); |
1470 | default: | 1506 | default: |
1471 | return VM_FAULT_FALLBACK; | 1507 | return VM_FAULT_FALLBACK; |
1472 | } | 1508 | } |
1473 | } | 1509 | } |
1474 | EXPORT_SYMBOL_GPL(dax_iomap_fault); | 1510 | EXPORT_SYMBOL_GPL(dax_iomap_fault); |
1511 | |||
1512 | /** | ||
1513 | * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables | ||
1514 | * @vmf: The description of the fault | ||
1515 | * @pe_size: Size of entry to be inserted | ||
1516 | * @pfn: PFN to insert | ||
1517 | * | ||
1518 | * This function inserts writeable PTE or PMD entry into page tables for mmaped | ||
1519 | * DAX file. It takes care of marking corresponding radix tree entry as dirty | ||
1520 | * as well. | ||
1521 | */ | ||
1522 | static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, | ||
1523 | enum page_entry_size pe_size, | ||
1524 | pfn_t pfn) | ||
1525 | { | ||
1526 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | ||
1527 | void *entry, **slot; | ||
1528 | pgoff_t index = vmf->pgoff; | ||
1529 | int vmf_ret, error; | ||
1530 | |||
1531 | spin_lock_irq(&mapping->tree_lock); | ||
1532 | entry = get_unlocked_mapping_entry(mapping, index, &slot); | ||
1533 | /* Did we race with someone splitting entry or so? */ | ||
1534 | if (!entry || | ||
1535 | (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || | ||
1536 | (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { | ||
1537 | put_unlocked_mapping_entry(mapping, index, entry); | ||
1538 | spin_unlock_irq(&mapping->tree_lock); | ||
1539 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, | ||
1540 | VM_FAULT_NOPAGE); | ||
1541 | return VM_FAULT_NOPAGE; | ||
1542 | } | ||
1543 | radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY); | ||
1544 | entry = lock_slot(mapping, slot); | ||
1545 | spin_unlock_irq(&mapping->tree_lock); | ||
1546 | switch (pe_size) { | ||
1547 | case PE_SIZE_PTE: | ||
1548 | error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); | ||
1549 | vmf_ret = dax_fault_return(error); | ||
1550 | break; | ||
1551 | #ifdef CONFIG_FS_DAX_PMD | ||
1552 | case PE_SIZE_PMD: | ||
1553 | vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, | ||
1554 | pfn, true); | ||
1555 | break; | ||
1556 | #endif | ||
1557 | default: | ||
1558 | vmf_ret = VM_FAULT_FALLBACK; | ||
1559 | } | ||
1560 | put_locked_mapping_entry(mapping, index); | ||
1561 | trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret); | ||
1562 | return vmf_ret; | ||
1563 | } | ||
1564 | |||
1565 | /** | ||
1566 | * dax_finish_sync_fault - finish synchronous page fault | ||
1567 | * @vmf: The description of the fault | ||
1568 | * @pe_size: Size of entry to be inserted | ||
1569 | * @pfn: PFN to insert | ||
1570 | * | ||
1571 | * This function ensures that the file range touched by the page fault is | ||
1572 | * stored persistently on the media and handles inserting of appropriate page | ||
1573 | * table entry. | ||
1574 | */ | ||
1575 | int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | ||
1576 | pfn_t pfn) | ||
1577 | { | ||
1578 | int err; | ||
1579 | loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; | ||
1580 | size_t len = 0; | ||
1581 | |||
1582 | if (pe_size == PE_SIZE_PTE) | ||
1583 | len = PAGE_SIZE; | ||
1584 | else if (pe_size == PE_SIZE_PMD) | ||
1585 | len = PMD_SIZE; | ||
1586 | else | ||
1587 | WARN_ON_ONCE(1); | ||
1588 | err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1); | ||
1589 | if (err) | ||
1590 | return VM_FAULT_SIGBUS; | ||
1591 | return dax_insert_pfn_mkwrite(vmf, pe_size, pfn); | ||
1592 | } | ||
1593 | EXPORT_SYMBOL_GPL(dax_finish_sync_fault); | ||
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index c67b486488fd..2da67699dc33 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf) | |||
100 | } | 100 | } |
101 | down_read(&ei->dax_sem); | 101 | down_read(&ei->dax_sem); |
102 | 102 | ||
103 | ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops); | 103 | ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops); |
104 | 104 | ||
105 | up_read(&ei->dax_sem); | 105 | up_read(&ei->dax_sem); |
106 | if (vmf->flags & FAULT_FLAG_WRITE) | 106 | if (vmf->flags & FAULT_FLAG_WRITE) |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ad204d2724ac..a0ae27b1bc66 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/quotaops.h> | 28 | #include <linux/quotaops.h> |
29 | #include <linux/pagevec.h> | 29 | #include <linux/pagevec.h> |
30 | #include <linux/uio.h> | 30 | #include <linux/uio.h> |
31 | #include <linux/mman.h> | ||
31 | #include "ext4.h" | 32 | #include "ext4.h" |
32 | #include "ext4_jbd2.h" | 33 | #include "ext4_jbd2.h" |
33 | #include "xattr.h" | 34 | #include "xattr.h" |
@@ -297,6 +298,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, | |||
297 | */ | 298 | */ |
298 | bool write = (vmf->flags & FAULT_FLAG_WRITE) && | 299 | bool write = (vmf->flags & FAULT_FLAG_WRITE) && |
299 | (vmf->vma->vm_flags & VM_SHARED); | 300 | (vmf->vma->vm_flags & VM_SHARED); |
301 | pfn_t pfn; | ||
300 | 302 | ||
301 | if (write) { | 303 | if (write) { |
302 | sb_start_pagefault(sb); | 304 | sb_start_pagefault(sb); |
@@ -304,16 +306,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, | |||
304 | down_read(&EXT4_I(inode)->i_mmap_sem); | 306 | down_read(&EXT4_I(inode)->i_mmap_sem); |
305 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, | 307 | handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, |
306 | EXT4_DATA_TRANS_BLOCKS(sb)); | 308 | EXT4_DATA_TRANS_BLOCKS(sb)); |
309 | if (IS_ERR(handle)) { | ||
310 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
311 | sb_end_pagefault(sb); | ||
312 | return VM_FAULT_SIGBUS; | ||
313 | } | ||
307 | } else { | 314 | } else { |
308 | down_read(&EXT4_I(inode)->i_mmap_sem); | 315 | down_read(&EXT4_I(inode)->i_mmap_sem); |
309 | } | 316 | } |
310 | if (!IS_ERR(handle)) | 317 | result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops); |
311 | result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); | ||
312 | else | ||
313 | result = VM_FAULT_SIGBUS; | ||
314 | if (write) { | 318 | if (write) { |
315 | if (!IS_ERR(handle)) | 319 | ext4_journal_stop(handle); |
316 | ext4_journal_stop(handle); | 320 | /* Handling synchronous page fault? */ |
321 | if (result & VM_FAULT_NEEDDSYNC) | ||
322 | result = dax_finish_sync_fault(vmf, pe_size, pfn); | ||
317 | up_read(&EXT4_I(inode)->i_mmap_sem); | 323 | up_read(&EXT4_I(inode)->i_mmap_sem); |
318 | sb_end_pagefault(sb); | 324 | sb_end_pagefault(sb); |
319 | } else { | 325 | } else { |
@@ -351,6 +357,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
351 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | 357 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) |
352 | return -EIO; | 358 | return -EIO; |
353 | 359 | ||
360 | /* | ||
361 | * We don't support synchronous mappings for non-DAX files. At least | ||
362 | * until someone comes with a sensible use case. | ||
363 | */ | ||
364 | if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC)) | ||
365 | return -EOPNOTSUPP; | ||
366 | |||
354 | file_accessed(file); | 367 | file_accessed(file); |
355 | if (IS_DAX(file_inode(file))) { | 368 | if (IS_DAX(file_inode(file))) { |
356 | vma->vm_ops = &ext4_dax_vm_ops; | 369 | vma->vm_ops = &ext4_dax_vm_ops; |
@@ -469,6 +482,7 @@ const struct file_operations ext4_file_operations = { | |||
469 | .compat_ioctl = ext4_compat_ioctl, | 482 | .compat_ioctl = ext4_compat_ioctl, |
470 | #endif | 483 | #endif |
471 | .mmap = ext4_file_mmap, | 484 | .mmap = ext4_file_mmap, |
485 | .mmap_supported_flags = MAP_SYNC, | ||
472 | .open = ext4_file_open, | 486 | .open = ext4_file_open, |
473 | .release = ext4_release_file, | 487 | .release = ext4_release_file, |
474 | .fsync = ext4_sync_file, | 488 | .fsync = ext4_sync_file, |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8d2b582fb141..0992d76f7ab1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3384,6 +3384,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3384 | return try_to_free_buffers(page); | 3384 | return try_to_free_buffers(page); |
3385 | } | 3385 | } |
3386 | 3386 | ||
3387 | static bool ext4_inode_datasync_dirty(struct inode *inode) | ||
3388 | { | ||
3389 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
3390 | |||
3391 | if (journal) | ||
3392 | return !jbd2_transaction_committed(journal, | ||
3393 | EXT4_I(inode)->i_datasync_tid); | ||
3394 | /* Any metadata buffers to write? */ | ||
3395 | if (!list_empty(&inode->i_mapping->private_list)) | ||
3396 | return true; | ||
3397 | return inode->i_state & I_DIRTY_DATASYNC; | ||
3398 | } | ||
3399 | |||
3387 | static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | 3400 | static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, |
3388 | unsigned flags, struct iomap *iomap) | 3401 | unsigned flags, struct iomap *iomap) |
3389 | { | 3402 | { |
@@ -3497,6 +3510,8 @@ retry: | |||
3497 | } | 3510 | } |
3498 | 3511 | ||
3499 | iomap->flags = 0; | 3512 | iomap->flags = 0; |
3513 | if (ext4_inode_datasync_dirty(inode)) | ||
3514 | iomap->flags |= IOMAP_F_DIRTY; | ||
3500 | iomap->bdev = inode->i_sb->s_bdev; | 3515 | iomap->bdev = inode->i_sb->s_bdev; |
3501 | iomap->dax_dev = sbi->s_daxdev; | 3516 | iomap->dax_dev = sbi->s_daxdev; |
3502 | iomap->offset = first_block << blkbits; | 3517 | iomap->offset = first_block << blkbits; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index d2a85c9720e9..67546c7ad473 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -737,6 +737,23 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
737 | return err; | 737 | return err; |
738 | } | 738 | } |
739 | 739 | ||
740 | /* Return 1 when transaction with given tid has already committed. */ | ||
741 | int jbd2_transaction_committed(journal_t *journal, tid_t tid) | ||
742 | { | ||
743 | int ret = 1; | ||
744 | |||
745 | read_lock(&journal->j_state_lock); | ||
746 | if (journal->j_running_transaction && | ||
747 | journal->j_running_transaction->t_tid == tid) | ||
748 | ret = 0; | ||
749 | if (journal->j_committing_transaction && | ||
750 | journal->j_committing_transaction->t_tid == tid) | ||
751 | ret = 0; | ||
752 | read_unlock(&journal->j_state_lock); | ||
753 | return ret; | ||
754 | } | ||
755 | EXPORT_SYMBOL(jbd2_transaction_committed); | ||
756 | |||
740 | /* | 757 | /* |
741 | * When this function returns the transaction corresponding to tid | 758 | * When this function returns the transaction corresponding to tid |
742 | * will be completed. If the transaction has currently running, start | 759 | * will be completed. If the transaction has currently running, start |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 875231c36cb3..339e4c1c044d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -661,6 +661,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | |||
661 | [ilog2(VM_ACCOUNT)] = "ac", | 661 | [ilog2(VM_ACCOUNT)] = "ac", |
662 | [ilog2(VM_NORESERVE)] = "nr", | 662 | [ilog2(VM_NORESERVE)] = "nr", |
663 | [ilog2(VM_HUGETLB)] = "ht", | 663 | [ilog2(VM_HUGETLB)] = "ht", |
664 | [ilog2(VM_SYNC)] = "sf", | ||
664 | [ilog2(VM_ARCH_1)] = "ar", | 665 | [ilog2(VM_ARCH_1)] = "ar", |
665 | [ilog2(VM_WIPEONFORK)] = "wf", | 666 | [ilog2(VM_WIPEONFORK)] = "wf", |
666 | [ilog2(VM_DONTDUMP)] = "dd", | 667 | [ilog2(VM_DONTDUMP)] = "dd", |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 18146873a8b3..8601275cc5e6 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/falloc.h> | 44 | #include <linux/falloc.h> |
45 | #include <linux/pagevec.h> | 45 | #include <linux/pagevec.h> |
46 | #include <linux/backing-dev.h> | 46 | #include <linux/backing-dev.h> |
47 | #include <linux/mman.h> | ||
47 | 48 | ||
48 | static const struct vm_operations_struct xfs_file_vm_ops; | 49 | static const struct vm_operations_struct xfs_file_vm_ops; |
49 | 50 | ||
@@ -1045,7 +1046,11 @@ __xfs_filemap_fault( | |||
1045 | 1046 | ||
1046 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1047 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1047 | if (IS_DAX(inode)) { | 1048 | if (IS_DAX(inode)) { |
1048 | ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops); | 1049 | pfn_t pfn; |
1050 | |||
1051 | ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops); | ||
1052 | if (ret & VM_FAULT_NEEDDSYNC) | ||
1053 | ret = dax_finish_sync_fault(vmf, pe_size, pfn); | ||
1049 | } else { | 1054 | } else { |
1050 | if (write_fault) | 1055 | if (write_fault) |
1051 | ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); | 1056 | ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); |
@@ -1090,37 +1095,16 @@ xfs_filemap_page_mkwrite( | |||
1090 | } | 1095 | } |
1091 | 1096 | ||
1092 | /* | 1097 | /* |
1093 | * pfn_mkwrite was originally inteneded to ensure we capture time stamp | 1098 | * pfn_mkwrite was originally intended to ensure we capture time stamp updates |
1094 | * updates on write faults. In reality, it's need to serialise against | 1099 | * on write faults. In reality, it needs to serialise against truncate and |
1095 | * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED | 1100 | * prepare memory for writing so handle is as standard write fault. |
1096 | * to ensure we serialise the fault barrier in place. | ||
1097 | */ | 1101 | */ |
1098 | static int | 1102 | static int |
1099 | xfs_filemap_pfn_mkwrite( | 1103 | xfs_filemap_pfn_mkwrite( |
1100 | struct vm_fault *vmf) | 1104 | struct vm_fault *vmf) |
1101 | { | 1105 | { |
1102 | 1106 | ||
1103 | struct inode *inode = file_inode(vmf->vma->vm_file); | 1107 | return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); |
1104 | struct xfs_inode *ip = XFS_I(inode); | ||
1105 | int ret = VM_FAULT_NOPAGE; | ||
1106 | loff_t size; | ||
1107 | |||
1108 | trace_xfs_filemap_pfn_mkwrite(ip); | ||
1109 | |||
1110 | sb_start_pagefault(inode->i_sb); | ||
1111 | file_update_time(vmf->vma->vm_file); | ||
1112 | |||
1113 | /* check if the faulting page hasn't raced with truncate */ | ||
1114 | xfs_ilock(ip, XFS_MMAPLOCK_SHARED); | ||
1115 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
1116 | if (vmf->pgoff >= size) | ||
1117 | ret = VM_FAULT_SIGBUS; | ||
1118 | else if (IS_DAX(inode)) | ||
1119 | ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); | ||
1120 | xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); | ||
1121 | sb_end_pagefault(inode->i_sb); | ||
1122 | return ret; | ||
1123 | |||
1124 | } | 1108 | } |
1125 | 1109 | ||
1126 | static const struct vm_operations_struct xfs_file_vm_ops = { | 1110 | static const struct vm_operations_struct xfs_file_vm_ops = { |
@@ -1136,6 +1120,13 @@ xfs_file_mmap( | |||
1136 | struct file *filp, | 1120 | struct file *filp, |
1137 | struct vm_area_struct *vma) | 1121 | struct vm_area_struct *vma) |
1138 | { | 1122 | { |
1123 | /* | ||
1124 | * We don't support synchronous mappings for non-DAX files. At least | ||
1125 | * until someone comes with a sensible use case. | ||
1126 | */ | ||
1127 | if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC)) | ||
1128 | return -EOPNOTSUPP; | ||
1129 | |||
1139 | file_accessed(filp); | 1130 | file_accessed(filp); |
1140 | vma->vm_ops = &xfs_file_vm_ops; | 1131 | vma->vm_ops = &xfs_file_vm_ops; |
1141 | if (IS_DAX(file_inode(filp))) | 1132 | if (IS_DAX(file_inode(filp))) |
@@ -1154,6 +1145,7 @@ const struct file_operations xfs_file_operations = { | |||
1154 | .compat_ioctl = xfs_file_compat_ioctl, | 1145 | .compat_ioctl = xfs_file_compat_ioctl, |
1155 | #endif | 1146 | #endif |
1156 | .mmap = xfs_file_mmap, | 1147 | .mmap = xfs_file_mmap, |
1148 | .mmap_supported_flags = MAP_SYNC, | ||
1157 | .open = xfs_file_open, | 1149 | .open = xfs_file_open, |
1158 | .release = xfs_file_release, | 1150 | .release = xfs_file_release, |
1159 | .fsync = xfs_file_fsync, | 1151 | .fsync = xfs_file_fsync, |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 18077e2189a9..33eb4fb2e3fd 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "xfs_error.h" | 34 | #include "xfs_error.h" |
35 | #include "xfs_trans.h" | 35 | #include "xfs_trans.h" |
36 | #include "xfs_trans_space.h" | 36 | #include "xfs_trans_space.h" |
37 | #include "xfs_inode_item.h" | ||
37 | #include "xfs_iomap.h" | 38 | #include "xfs_iomap.h" |
38 | #include "xfs_trace.h" | 39 | #include "xfs_trace.h" |
39 | #include "xfs_icache.h" | 40 | #include "xfs_icache.h" |
@@ -1089,6 +1090,10 @@ xfs_file_iomap_begin( | |||
1089 | trace_xfs_iomap_found(ip, offset, length, 0, &imap); | 1090 | trace_xfs_iomap_found(ip, offset, length, 0, &imap); |
1090 | } | 1091 | } |
1091 | 1092 | ||
1093 | if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields | ||
1094 | & ~XFS_ILOG_TIMESTAMP)) | ||
1095 | iomap->flags |= IOMAP_F_DIRTY; | ||
1096 | |||
1092 | xfs_bmbt_to_iomap(ip, iomap, &imap); | 1097 | xfs_bmbt_to_iomap(ip, iomap, &imap); |
1093 | 1098 | ||
1094 | if (shared) | 1099 | if (shared) |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 515ba042d75c..d718a10c2271 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -654,8 +654,6 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); | |||
654 | DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); | 654 | DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); |
655 | DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); | 655 | DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); |
656 | 656 | ||
657 | DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); | ||
658 | |||
659 | TRACE_EVENT(xfs_filemap_fault, | 657 | TRACE_EVENT(xfs_filemap_fault, |
660 | TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, | 658 | TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, |
661 | bool write_fault), | 659 | bool write_fault), |
diff --git a/include/linux/dax.h b/include/linux/dax.h index 895e16fcc62d..5258346c558c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h | |||
@@ -96,7 +96,9 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev); | |||
96 | ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, | 96 | ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, |
97 | const struct iomap_ops *ops); | 97 | const struct iomap_ops *ops); |
98 | int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | 98 | int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, |
99 | const struct iomap_ops *ops); | 99 | pfn_t *pfnp, const struct iomap_ops *ops); |
100 | int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, | ||
101 | pfn_t pfn); | ||
100 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); | 102 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); |
101 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, | 103 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, |
102 | pgoff_t index); | 104 | pgoff_t index); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 269086440071..a2b5d64ea503 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1702,6 +1702,7 @@ struct file_operations { | |||
1702 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 1702 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
1703 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 1703 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
1704 | int (*mmap) (struct file *, struct vm_area_struct *); | 1704 | int (*mmap) (struct file *, struct vm_area_struct *); |
1705 | unsigned long mmap_supported_flags; | ||
1705 | int (*open) (struct inode *, struct file *); | 1706 | int (*open) (struct inode *, struct file *); |
1706 | int (*flush) (struct file *, fl_owner_t id); | 1707 | int (*flush) (struct file *, fl_owner_t id); |
1707 | int (*release) (struct inode *, struct file *); | 1708 | int (*release) (struct inode *, struct file *); |
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index ca10767ab73d..19a07de28212 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h | |||
@@ -21,9 +21,13 @@ struct vm_fault; | |||
21 | 21 | ||
22 | /* | 22 | /* |
23 | * Flags for all iomap mappings: | 23 | * Flags for all iomap mappings: |
24 | * | ||
25 | * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access | ||
26 | * written data and requires fdatasync to commit them to persistent storage. | ||
24 | */ | 27 | */ |
25 | #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ | 28 | #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ |
26 | #define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */ | 29 | #define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */ |
30 | #define IOMAP_F_DIRTY 0x04 /* uncommitted metadata */ | ||
27 | 31 | ||
28 | /* | 32 | /* |
29 | * Flags that only need to be reported for IOMAP_REPORT requests: | 33 | * Flags that only need to be reported for IOMAP_REPORT requests: |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 606b6bce3a5b..296d1e0ea87b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -1367,6 +1367,7 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid); | |||
1367 | int __jbd2_log_start_commit(journal_t *journal, tid_t tid); | 1367 | int __jbd2_log_start_commit(journal_t *journal, tid_t tid); |
1368 | int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); | 1368 | int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); |
1369 | int jbd2_log_wait_commit(journal_t *journal, tid_t tid); | 1369 | int jbd2_log_wait_commit(journal_t *journal, tid_t tid); |
1370 | int jbd2_transaction_committed(journal_t *journal, tid_t tid); | ||
1370 | int jbd2_complete_transaction(journal_t *journal, tid_t tid); | 1371 | int jbd2_complete_transaction(journal_t *journal, tid_t tid); |
1371 | int jbd2_log_do_checkpoint(journal_t *journal); | 1372 | int jbd2_log_do_checkpoint(journal_t *journal); |
1372 | int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); | 1373 | int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); |
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 3eaad2fbf284..f8109ddb5ef1 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h | |||
@@ -18,6 +18,18 @@ | |||
18 | #include <linux/sizes.h> | 18 | #include <linux/sizes.h> |
19 | #include <linux/types.h> | 19 | #include <linux/types.h> |
20 | #include <linux/uuid.h> | 20 | #include <linux/uuid.h> |
21 | #include <linux/spinlock.h> | ||
22 | |||
23 | struct badrange_entry { | ||
24 | u64 start; | ||
25 | u64 length; | ||
26 | struct list_head list; | ||
27 | }; | ||
28 | |||
29 | struct badrange { | ||
30 | struct list_head list; | ||
31 | spinlock_t lock; | ||
32 | }; | ||
21 | 33 | ||
22 | enum { | 34 | enum { |
23 | /* when a dimm supports both PMEM and BLK access a label is required */ | 35 | /* when a dimm supports both PMEM and BLK access a label is required */ |
@@ -129,9 +141,12 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( | |||
129 | 141 | ||
130 | } | 142 | } |
131 | 143 | ||
132 | int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); | 144 | void badrange_init(struct badrange *badrange); |
133 | void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, | 145 | int badrange_add(struct badrange *badrange, u64 addr, u64 length); |
134 | phys_addr_t start, unsigned int len); | 146 | void badrange_forget(struct badrange *badrange, phys_addr_t start, |
147 | unsigned int len); | ||
148 | int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr, | ||
149 | u64 length); | ||
135 | struct nvdimm_bus *nvdimm_bus_register(struct device *parent, | 150 | struct nvdimm_bus *nvdimm_bus_register(struct device *parent, |
136 | struct nvdimm_bus_descriptor *nfit_desc); | 151 | struct nvdimm_bus_descriptor *nfit_desc); |
137 | void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); | 152 | void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index c7b1d617dff6..ee073146aaa7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -199,6 +199,7 @@ extern unsigned int kobjsize(const void *objp); | |||
199 | #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ | 199 | #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ |
200 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ | 200 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ |
201 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ | 201 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ |
202 | #define VM_SYNC 0x00800000 /* Synchronous page faults */ | ||
202 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ | 203 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ |
203 | #define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ | 204 | #define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ |
204 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ | 205 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ |
@@ -1191,8 +1192,9 @@ static inline void clear_page_pfmemalloc(struct page *page) | |||
1191 | #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ | 1192 | #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ |
1192 | #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ | 1193 | #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ |
1193 | #define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */ | 1194 | #define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */ |
1194 | 1195 | #define VM_FAULT_NEEDDSYNC 0x2000 /* ->fault did not modify page tables | |
1195 | #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ | 1196 | * and needs fsync() to complete (for |
1197 | * synchronous page faults in DAX) */ | ||
1196 | 1198 | ||
1197 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ | 1199 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ |
1198 | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ | 1200 | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ |
@@ -1210,7 +1212,8 @@ static inline void clear_page_pfmemalloc(struct page *page) | |||
1210 | { VM_FAULT_LOCKED, "LOCKED" }, \ | 1212 | { VM_FAULT_LOCKED, "LOCKED" }, \ |
1211 | { VM_FAULT_RETRY, "RETRY" }, \ | 1213 | { VM_FAULT_RETRY, "RETRY" }, \ |
1212 | { VM_FAULT_FALLBACK, "FALLBACK" }, \ | 1214 | { VM_FAULT_FALLBACK, "FALLBACK" }, \ |
1213 | { VM_FAULT_DONE_COW, "DONE_COW" } | 1215 | { VM_FAULT_DONE_COW, "DONE_COW" }, \ |
1216 | { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } | ||
1214 | 1217 | ||
1215 | /* Encode hstate index for a hwpoisoned large page */ | 1218 | /* Encode hstate index for a hwpoisoned large page */ |
1216 | #define VM_FAULT_SET_HINDEX(x) ((x) << 12) | 1219 | #define VM_FAULT_SET_HINDEX(x) ((x) << 12) |
diff --git a/include/linux/mman.h b/include/linux/mman.h index 7c87b6652244..6a4d1caaff5c 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h | |||
@@ -8,6 +8,48 @@ | |||
8 | #include <linux/atomic.h> | 8 | #include <linux/atomic.h> |
9 | #include <uapi/linux/mman.h> | 9 | #include <uapi/linux/mman.h> |
10 | 10 | ||
11 | /* | ||
12 | * Arrange for legacy / undefined architecture specific flags to be | ||
13 | * ignored by mmap handling code. | ||
14 | */ | ||
15 | #ifndef MAP_32BIT | ||
16 | #define MAP_32BIT 0 | ||
17 | #endif | ||
18 | #ifndef MAP_HUGE_2MB | ||
19 | #define MAP_HUGE_2MB 0 | ||
20 | #endif | ||
21 | #ifndef MAP_HUGE_1GB | ||
22 | #define MAP_HUGE_1GB 0 | ||
23 | #endif | ||
24 | #ifndef MAP_UNINITIALIZED | ||
25 | #define MAP_UNINITIALIZED 0 | ||
26 | #endif | ||
27 | #ifndef MAP_SYNC | ||
28 | #define MAP_SYNC 0 | ||
29 | #endif | ||
30 | |||
31 | /* | ||
32 | * The historical set of flags that all mmap implementations implicitly | ||
33 | * support when a ->mmap_validate() op is not provided in file_operations. | ||
34 | */ | ||
35 | #define LEGACY_MAP_MASK (MAP_SHARED \ | ||
36 | | MAP_PRIVATE \ | ||
37 | | MAP_FIXED \ | ||
38 | | MAP_ANONYMOUS \ | ||
39 | | MAP_DENYWRITE \ | ||
40 | | MAP_EXECUTABLE \ | ||
41 | | MAP_UNINITIALIZED \ | ||
42 | | MAP_GROWSDOWN \ | ||
43 | | MAP_LOCKED \ | ||
44 | | MAP_NORESERVE \ | ||
45 | | MAP_POPULATE \ | ||
46 | | MAP_NONBLOCK \ | ||
47 | | MAP_STACK \ | ||
48 | | MAP_HUGETLB \ | ||
49 | | MAP_32BIT \ | ||
50 | | MAP_HUGE_2MB \ | ||
51 | | MAP_HUGE_1GB) | ||
52 | |||
11 | extern int sysctl_overcommit_memory; | 53 | extern int sysctl_overcommit_memory; |
12 | extern int sysctl_overcommit_ratio; | 54 | extern int sysctl_overcommit_ratio; |
13 | extern unsigned long sysctl_overcommit_kbytes; | 55 | extern unsigned long sysctl_overcommit_kbytes; |
@@ -64,8 +106,9 @@ static inline bool arch_validate_prot(unsigned long prot) | |||
64 | * ("bit1" and "bit2" must be single bits) | 106 | * ("bit1" and "bit2" must be single bits) |
65 | */ | 107 | */ |
66 | #define _calc_vm_trans(x, bit1, bit2) \ | 108 | #define _calc_vm_trans(x, bit1, bit2) \ |
109 | ((!(bit1) || !(bit2)) ? 0 : \ | ||
67 | ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ | 110 | ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ |
68 | : ((x) & (bit1)) / ((bit1) / (bit2))) | 111 | : ((x) & (bit1)) / ((bit1) / (bit2)))) |
69 | 112 | ||
70 | /* | 113 | /* |
71 | * Combine the mmap "prot" argument into "vm_flags" used internally. | 114 | * Combine the mmap "prot" argument into "vm_flags" used internally. |
@@ -87,7 +130,8 @@ calc_vm_flag_bits(unsigned long flags) | |||
87 | { | 130 | { |
88 | return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | | 131 | return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | |
89 | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | | 132 | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | |
90 | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); | 133 | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | |
134 | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ); | ||
91 | } | 135 | } |
92 | 136 | ||
93 | unsigned long vm_commit_limit(void); | 137 | unsigned long vm_commit_limit(void); |
diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 8a8df5423dca..97b09fcf7e52 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h | |||
@@ -149,7 +149,6 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \ | |||
149 | TP_ARGS(inode, vmf, length, pfn, radix_entry)) | 149 | TP_ARGS(inode, vmf, length, pfn, radix_entry)) |
150 | 150 | ||
151 | DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); | 151 | DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); |
152 | DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback); | ||
153 | 152 | ||
154 | DECLARE_EVENT_CLASS(dax_pte_fault_class, | 153 | DECLARE_EVENT_CLASS(dax_pte_fault_class, |
155 | TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), | 154 | TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), |
@@ -192,6 +191,8 @@ DEFINE_EVENT(dax_pte_fault_class, name, \ | |||
192 | DEFINE_PTE_FAULT_EVENT(dax_pte_fault); | 191 | DEFINE_PTE_FAULT_EVENT(dax_pte_fault); |
193 | DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); | 192 | DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); |
194 | DEFINE_PTE_FAULT_EVENT(dax_load_hole); | 193 | DEFINE_PTE_FAULT_EVENT(dax_load_hole); |
194 | DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite_no_entry); | ||
195 | DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite); | ||
195 | 196 | ||
196 | TRACE_EVENT(dax_insert_mapping, | 197 | TRACE_EVENT(dax_insert_mapping, |
197 | TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry), | 198 | TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry), |
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 6d319c46fd90..f8b134f5608f 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #define MAP_SHARED 0x01 /* Share changes */ | 18 | #define MAP_SHARED 0x01 /* Share changes */ |
19 | #define MAP_PRIVATE 0x02 /* Changes are private */ | 19 | #define MAP_PRIVATE 0x02 /* Changes are private */ |
20 | #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ | ||
20 | #define MAP_TYPE 0x0f /* Mask for type of mapping */ | 21 | #define MAP_TYPE 0x0f /* Mask for type of mapping */ |
21 | #define MAP_FIXED 0x10 /* Interpret addr exactly */ | 22 | #define MAP_FIXED 0x10 /* Interpret addr exactly */ |
22 | #define MAP_ANONYMOUS 0x20 /* don't use a file */ | 23 | #define MAP_ANONYMOUS 0x20 /* don't use a file */ |
diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 2dffcbf705b3..653687d9771b 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h | |||
@@ -13,6 +13,7 @@ | |||
13 | #define MAP_NONBLOCK 0x10000 /* do not block on IO */ | 13 | #define MAP_NONBLOCK 0x10000 /* do not block on IO */ |
14 | #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ | 14 | #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ |
15 | #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ | 15 | #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ |
16 | #define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */ | ||
16 | 17 | ||
17 | /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ | 18 | /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ |
18 | 19 | ||
@@ -1387,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr, | |||
1387 | 1387 | ||
1388 | if (file) { | 1388 | if (file) { |
1389 | struct inode *inode = file_inode(file); | 1389 | struct inode *inode = file_inode(file); |
1390 | unsigned long flags_mask; | ||
1391 | |||
1392 | flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; | ||
1390 | 1393 | ||
1391 | switch (flags & MAP_TYPE) { | 1394 | switch (flags & MAP_TYPE) { |
1392 | case MAP_SHARED: | 1395 | case MAP_SHARED: |
1396 | /* | ||
1397 | * Force use of MAP_SHARED_VALIDATE with non-legacy | ||
1398 | * flags. E.g. MAP_SYNC is dangerous to use with | ||
1399 | * MAP_SHARED as you don't know which consistency model | ||
1400 | * you will get. We silently ignore unsupported flags | ||
1401 | * with MAP_SHARED to preserve backward compatibility. | ||
1402 | */ | ||
1403 | flags &= LEGACY_MAP_MASK; | ||
1404 | /* fall through */ | ||
1405 | case MAP_SHARED_VALIDATE: | ||
1406 | if (flags & ~flags_mask) | ||
1407 | return -EOPNOTSUPP; | ||
1393 | if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) | 1408 | if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) |
1394 | return -EACCES; | 1409 | return -EACCES; |
1395 | 1410 | ||
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 6d319c46fd90..f8b134f5608f 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #define MAP_SHARED 0x01 /* Share changes */ | 18 | #define MAP_SHARED 0x01 /* Share changes */ |
19 | #define MAP_PRIVATE 0x02 /* Changes are private */ | 19 | #define MAP_PRIVATE 0x02 /* Changes are private */ |
20 | #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ | ||
20 | #define MAP_TYPE 0x0f /* Mask for type of mapping */ | 21 | #define MAP_TYPE 0x0f /* Mask for type of mapping */ |
21 | #define MAP_FIXED 0x10 /* Interpret addr exactly */ | 22 | #define MAP_FIXED 0x10 /* Interpret addr exactly */ |
22 | #define MAP_ANONYMOUS 0x20 /* don't use a file */ | 23 | #define MAP_ANONYMOUS 0x20 /* don't use a file */ |
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 65368d9027f5..db33b28c5ef3 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild | |||
@@ -70,6 +70,7 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o | |||
70 | libnvdimm-y += $(NVDIMM_SRC)/region.o | 70 | libnvdimm-y += $(NVDIMM_SRC)/region.o |
71 | libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o | 71 | libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o |
72 | libnvdimm-y += $(NVDIMM_SRC)/label.o | 72 | libnvdimm-y += $(NVDIMM_SRC)/label.o |
73 | libnvdimm-y += $(NVDIMM_SRC)/badrange.o | ||
73 | libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o | 74 | libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o |
74 | libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o | 75 | libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o |
75 | libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o | 76 | libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o |
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index bef419d4266d..7217b2b953b5 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c | |||
@@ -168,8 +168,12 @@ struct nfit_test { | |||
168 | spinlock_t lock; | 168 | spinlock_t lock; |
169 | } ars_state; | 169 | } ars_state; |
170 | struct device *dimm_dev[NUM_DCR]; | 170 | struct device *dimm_dev[NUM_DCR]; |
171 | struct badrange badrange; | ||
172 | struct work_struct work; | ||
171 | }; | 173 | }; |
172 | 174 | ||
175 | static struct workqueue_struct *nfit_wq; | ||
176 | |||
173 | static struct nfit_test *to_nfit_test(struct device *dev) | 177 | static struct nfit_test *to_nfit_test(struct device *dev) |
174 | { | 178 | { |
175 | struct platform_device *pdev = to_platform_device(dev); | 179 | struct platform_device *pdev = to_platform_device(dev); |
@@ -234,48 +238,68 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd, | |||
234 | return rc; | 238 | return rc; |
235 | } | 239 | } |
236 | 240 | ||
237 | #define NFIT_TEST_ARS_RECORDS 4 | ||
238 | #define NFIT_TEST_CLEAR_ERR_UNIT 256 | 241 | #define NFIT_TEST_CLEAR_ERR_UNIT 256 |
239 | 242 | ||
240 | static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, | 243 | static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, |
241 | unsigned int buf_len) | 244 | unsigned int buf_len) |
242 | { | 245 | { |
246 | int ars_recs; | ||
247 | |||
243 | if (buf_len < sizeof(*nd_cmd)) | 248 | if (buf_len < sizeof(*nd_cmd)) |
244 | return -EINVAL; | 249 | return -EINVAL; |
245 | 250 | ||
251 | /* for testing, only store up to n records that fit within 4k */ | ||
252 | ars_recs = SZ_4K / sizeof(struct nd_ars_record); | ||
253 | |||
246 | nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status) | 254 | nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status) |
247 | + NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record); | 255 | + ars_recs * sizeof(struct nd_ars_record); |
248 | nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; | 256 | nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; |
249 | nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT; | 257 | nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT; |
250 | 258 | ||
251 | return 0; | 259 | return 0; |
252 | } | 260 | } |
253 | 261 | ||
254 | /* | 262 | static void post_ars_status(struct ars_state *ars_state, |
255 | * Initialize the ars_state to return an ars_result 1 second in the future with | 263 | struct badrange *badrange, u64 addr, u64 len) |
256 | * a 4K error range in the middle of the requested address range. | ||
257 | */ | ||
258 | static void post_ars_status(struct ars_state *ars_state, u64 addr, u64 len) | ||
259 | { | 264 | { |
260 | struct nd_cmd_ars_status *ars_status; | 265 | struct nd_cmd_ars_status *ars_status; |
261 | struct nd_ars_record *ars_record; | 266 | struct nd_ars_record *ars_record; |
267 | struct badrange_entry *be; | ||
268 | u64 end = addr + len - 1; | ||
269 | int i = 0; | ||
262 | 270 | ||
263 | ars_state->deadline = jiffies + 1*HZ; | 271 | ars_state->deadline = jiffies + 1*HZ; |
264 | ars_status = ars_state->ars_status; | 272 | ars_status = ars_state->ars_status; |
265 | ars_status->status = 0; | 273 | ars_status->status = 0; |
266 | ars_status->out_length = sizeof(struct nd_cmd_ars_status) | ||
267 | + sizeof(struct nd_ars_record); | ||
268 | ars_status->address = addr; | 274 | ars_status->address = addr; |
269 | ars_status->length = len; | 275 | ars_status->length = len; |
270 | ars_status->type = ND_ARS_PERSISTENT; | 276 | ars_status->type = ND_ARS_PERSISTENT; |
271 | ars_status->num_records = 1; | 277 | |
272 | ars_record = &ars_status->records[0]; | 278 | spin_lock(&badrange->lock); |
273 | ars_record->handle = 0; | 279 | list_for_each_entry(be, &badrange->list, list) { |
274 | ars_record->err_address = addr + len / 2; | 280 | u64 be_end = be->start + be->length - 1; |
275 | ars_record->length = SZ_4K; | 281 | u64 rstart, rend; |
282 | |||
283 | /* skip entries outside the range */ | ||
284 | if (be_end < addr || be->start > end) | ||
285 | continue; | ||
286 | |||
287 | rstart = (be->start < addr) ? addr : be->start; | ||
288 | rend = (be_end < end) ? be_end : end; | ||
289 | ars_record = &ars_status->records[i]; | ||
290 | ars_record->handle = 0; | ||
291 | ars_record->err_address = rstart; | ||
292 | ars_record->length = rend - rstart + 1; | ||
293 | i++; | ||
294 | } | ||
295 | spin_unlock(&badrange->lock); | ||
296 | ars_status->num_records = i; | ||
297 | ars_status->out_length = sizeof(struct nd_cmd_ars_status) | ||
298 | + i * sizeof(struct nd_ars_record); | ||
276 | } | 299 | } |
277 | 300 | ||
278 | static int nfit_test_cmd_ars_start(struct ars_state *ars_state, | 301 | static int nfit_test_cmd_ars_start(struct nfit_test *t, |
302 | struct ars_state *ars_state, | ||
279 | struct nd_cmd_ars_start *ars_start, unsigned int buf_len, | 303 | struct nd_cmd_ars_start *ars_start, unsigned int buf_len, |
280 | int *cmd_rc) | 304 | int *cmd_rc) |
281 | { | 305 | { |
@@ -289,7 +313,7 @@ static int nfit_test_cmd_ars_start(struct ars_state *ars_state, | |||
289 | } else { | 313 | } else { |
290 | ars_start->status = 0; | 314 | ars_start->status = 0; |
291 | ars_start->scrub_time = 1; | 315 | ars_start->scrub_time = 1; |
292 | post_ars_status(ars_state, ars_start->address, | 316 | post_ars_status(ars_state, &t->badrange, ars_start->address, |
293 | ars_start->length); | 317 | ars_start->length); |
294 | *cmd_rc = 0; | 318 | *cmd_rc = 0; |
295 | } | 319 | } |
@@ -320,7 +344,8 @@ static int nfit_test_cmd_ars_status(struct ars_state *ars_state, | |||
320 | return 0; | 344 | return 0; |
321 | } | 345 | } |
322 | 346 | ||
323 | static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, | 347 | static int nfit_test_cmd_clear_error(struct nfit_test *t, |
348 | struct nd_cmd_clear_error *clear_err, | ||
324 | unsigned int buf_len, int *cmd_rc) | 349 | unsigned int buf_len, int *cmd_rc) |
325 | { | 350 | { |
326 | const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1; | 351 | const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1; |
@@ -330,18 +355,91 @@ static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, | |||
330 | if ((clear_err->address & mask) || (clear_err->length & mask)) | 355 | if ((clear_err->address & mask) || (clear_err->length & mask)) |
331 | return -EINVAL; | 356 | return -EINVAL; |
332 | 357 | ||
333 | /* | 358 | badrange_forget(&t->badrange, clear_err->address, clear_err->length); |
334 | * Report 'all clear' success for all commands even though a new | ||
335 | * scrub will find errors again. This is enough to have the | ||
336 | * error removed from the 'badblocks' tracking in the pmem | ||
337 | * driver. | ||
338 | */ | ||
339 | clear_err->status = 0; | 359 | clear_err->status = 0; |
340 | clear_err->cleared = clear_err->length; | 360 | clear_err->cleared = clear_err->length; |
341 | *cmd_rc = 0; | 361 | *cmd_rc = 0; |
342 | return 0; | 362 | return 0; |
343 | } | 363 | } |
344 | 364 | ||
365 | struct region_search_spa { | ||
366 | u64 addr; | ||
367 | struct nd_region *region; | ||
368 | }; | ||
369 | |||
370 | static int is_region_device(struct device *dev) | ||
371 | { | ||
372 | return !strncmp(dev->kobj.name, "region", 6); | ||
373 | } | ||
374 | |||
375 | static int nfit_test_search_region_spa(struct device *dev, void *data) | ||
376 | { | ||
377 | struct region_search_spa *ctx = data; | ||
378 | struct nd_region *nd_region; | ||
379 | resource_size_t ndr_end; | ||
380 | |||
381 | if (!is_region_device(dev)) | ||
382 | return 0; | ||
383 | |||
384 | nd_region = to_nd_region(dev); | ||
385 | ndr_end = nd_region->ndr_start + nd_region->ndr_size; | ||
386 | |||
387 | if (ctx->addr >= nd_region->ndr_start && ctx->addr < ndr_end) { | ||
388 | ctx->region = nd_region; | ||
389 | return 1; | ||
390 | } | ||
391 | |||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | static int nfit_test_search_spa(struct nvdimm_bus *bus, | ||
396 | struct nd_cmd_translate_spa *spa) | ||
397 | { | ||
398 | int ret; | ||
399 | struct nd_region *nd_region = NULL; | ||
400 | struct nvdimm *nvdimm = NULL; | ||
401 | struct nd_mapping *nd_mapping = NULL; | ||
402 | struct region_search_spa ctx = { | ||
403 | .addr = spa->spa, | ||
404 | .region = NULL, | ||
405 | }; | ||
406 | u64 dpa; | ||
407 | |||
408 | ret = device_for_each_child(&bus->dev, &ctx, | ||
409 | nfit_test_search_region_spa); | ||
410 | |||
411 | if (!ret) | ||
412 | return -ENODEV; | ||
413 | |||
414 | nd_region = ctx.region; | ||
415 | |||
416 | dpa = ctx.addr - nd_region->ndr_start; | ||
417 | |||
418 | /* | ||
419 | * last dimm is selected for test | ||
420 | */ | ||
421 | nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1]; | ||
422 | nvdimm = nd_mapping->nvdimm; | ||
423 | |||
424 | spa->devices[0].nfit_device_handle = handle[nvdimm->id]; | ||
425 | spa->num_nvdimms = 1; | ||
426 | spa->devices[0].dpa = dpa; | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus, | ||
432 | struct nd_cmd_translate_spa *spa, unsigned int buf_len) | ||
433 | { | ||
434 | if (buf_len < spa->translate_length) | ||
435 | return -EINVAL; | ||
436 | |||
437 | if (nfit_test_search_spa(bus, spa) < 0 || !spa->num_nvdimms) | ||
438 | spa->status = 2; | ||
439 | |||
440 | return 0; | ||
441 | } | ||
442 | |||
345 | static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) | 443 | static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) |
346 | { | 444 | { |
347 | static const struct nd_smart_payload smart_data = { | 445 | static const struct nd_smart_payload smart_data = { |
@@ -378,6 +476,93 @@ static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, | |||
378 | return 0; | 476 | return 0; |
379 | } | 477 | } |
380 | 478 | ||
479 | static void uc_error_notify(struct work_struct *work) | ||
480 | { | ||
481 | struct nfit_test *t = container_of(work, typeof(*t), work); | ||
482 | |||
483 | __acpi_nfit_notify(&t->pdev.dev, t, NFIT_NOTIFY_UC_MEMORY_ERROR); | ||
484 | } | ||
485 | |||
486 | static int nfit_test_cmd_ars_error_inject(struct nfit_test *t, | ||
487 | struct nd_cmd_ars_err_inj *err_inj, unsigned int buf_len) | ||
488 | { | ||
489 | int rc; | ||
490 | |||
491 | if (buf_len != sizeof(*err_inj)) { | ||
492 | rc = -EINVAL; | ||
493 | goto err; | ||
494 | } | ||
495 | |||
496 | if (err_inj->err_inj_spa_range_length <= 0) { | ||
497 | rc = -EINVAL; | ||
498 | goto err; | ||
499 | } | ||
500 | |||
501 | rc = badrange_add(&t->badrange, err_inj->err_inj_spa_range_base, | ||
502 | err_inj->err_inj_spa_range_length); | ||
503 | if (rc < 0) | ||
504 | goto err; | ||
505 | |||
506 | if (err_inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY)) | ||
507 | queue_work(nfit_wq, &t->work); | ||
508 | |||
509 | err_inj->status = 0; | ||
510 | return 0; | ||
511 | |||
512 | err: | ||
513 | err_inj->status = NFIT_ARS_INJECT_INVALID; | ||
514 | return rc; | ||
515 | } | ||
516 | |||
517 | static int nfit_test_cmd_ars_inject_clear(struct nfit_test *t, | ||
518 | struct nd_cmd_ars_err_inj_clr *err_clr, unsigned int buf_len) | ||
519 | { | ||
520 | int rc; | ||
521 | |||
522 | if (buf_len != sizeof(*err_clr)) { | ||
523 | rc = -EINVAL; | ||
524 | goto err; | ||
525 | } | ||
526 | |||
527 | if (err_clr->err_inj_clr_spa_range_length <= 0) { | ||
528 | rc = -EINVAL; | ||
529 | goto err; | ||
530 | } | ||
531 | |||
532 | badrange_forget(&t->badrange, err_clr->err_inj_clr_spa_range_base, | ||
533 | err_clr->err_inj_clr_spa_range_length); | ||
534 | |||
535 | err_clr->status = 0; | ||
536 | return 0; | ||
537 | |||
538 | err: | ||
539 | err_clr->status = NFIT_ARS_INJECT_INVALID; | ||
540 | return rc; | ||
541 | } | ||
542 | |||
543 | static int nfit_test_cmd_ars_inject_status(struct nfit_test *t, | ||
544 | struct nd_cmd_ars_err_inj_stat *err_stat, | ||
545 | unsigned int buf_len) | ||
546 | { | ||
547 | struct badrange_entry *be; | ||
548 | int max = SZ_4K / sizeof(struct nd_error_stat_query_record); | ||
549 | int i = 0; | ||
550 | |||
551 | err_stat->status = 0; | ||
552 | spin_lock(&t->badrange.lock); | ||
553 | list_for_each_entry(be, &t->badrange.list, list) { | ||
554 | err_stat->record[i].err_inj_stat_spa_range_base = be->start; | ||
555 | err_stat->record[i].err_inj_stat_spa_range_length = be->length; | ||
556 | i++; | ||
557 | if (i > max) | ||
558 | break; | ||
559 | } | ||
560 | spin_unlock(&t->badrange.lock); | ||
561 | err_stat->inj_err_rec_count = i; | ||
562 | |||
563 | return 0; | ||
564 | } | ||
565 | |||
381 | static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, | 566 | static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, |
382 | struct nvdimm *nvdimm, unsigned int cmd, void *buf, | 567 | struct nvdimm *nvdimm, unsigned int cmd, void *buf, |
383 | unsigned int buf_len, int *cmd_rc) | 568 | unsigned int buf_len, int *cmd_rc) |
@@ -449,6 +634,38 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, | |||
449 | } | 634 | } |
450 | } else { | 635 | } else { |
451 | struct ars_state *ars_state = &t->ars_state; | 636 | struct ars_state *ars_state = &t->ars_state; |
637 | struct nd_cmd_pkg *call_pkg = buf; | ||
638 | |||
639 | if (!nd_desc) | ||
640 | return -ENOTTY; | ||
641 | |||
642 | if (cmd == ND_CMD_CALL) { | ||
643 | func = call_pkg->nd_command; | ||
644 | |||
645 | buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; | ||
646 | buf = (void *) call_pkg->nd_payload; | ||
647 | |||
648 | switch (func) { | ||
649 | case NFIT_CMD_TRANSLATE_SPA: | ||
650 | rc = nfit_test_cmd_translate_spa( | ||
651 | acpi_desc->nvdimm_bus, buf, buf_len); | ||
652 | return rc; | ||
653 | case NFIT_CMD_ARS_INJECT_SET: | ||
654 | rc = nfit_test_cmd_ars_error_inject(t, buf, | ||
655 | buf_len); | ||
656 | return rc; | ||
657 | case NFIT_CMD_ARS_INJECT_CLEAR: | ||
658 | rc = nfit_test_cmd_ars_inject_clear(t, buf, | ||
659 | buf_len); | ||
660 | return rc; | ||
661 | case NFIT_CMD_ARS_INJECT_GET: | ||
662 | rc = nfit_test_cmd_ars_inject_status(t, buf, | ||
663 | buf_len); | ||
664 | return rc; | ||
665 | default: | ||
666 | return -ENOTTY; | ||
667 | } | ||
668 | } | ||
452 | 669 | ||
453 | if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) | 670 | if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) |
454 | return -ENOTTY; | 671 | return -ENOTTY; |
@@ -458,15 +675,15 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, | |||
458 | rc = nfit_test_cmd_ars_cap(buf, buf_len); | 675 | rc = nfit_test_cmd_ars_cap(buf, buf_len); |
459 | break; | 676 | break; |
460 | case ND_CMD_ARS_START: | 677 | case ND_CMD_ARS_START: |
461 | rc = nfit_test_cmd_ars_start(ars_state, buf, buf_len, | 678 | rc = nfit_test_cmd_ars_start(t, ars_state, buf, |
462 | cmd_rc); | 679 | buf_len, cmd_rc); |
463 | break; | 680 | break; |
464 | case ND_CMD_ARS_STATUS: | 681 | case ND_CMD_ARS_STATUS: |
465 | rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len, | 682 | rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len, |
466 | cmd_rc); | 683 | cmd_rc); |
467 | break; | 684 | break; |
468 | case ND_CMD_CLEAR_ERROR: | 685 | case ND_CMD_CLEAR_ERROR: |
469 | rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc); | 686 | rc = nfit_test_cmd_clear_error(t, buf, buf_len, cmd_rc); |
470 | break; | 687 | break; |
471 | default: | 688 | default: |
472 | return -ENOTTY; | 689 | return -ENOTTY; |
@@ -566,10 +783,9 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) | |||
566 | 783 | ||
567 | static int ars_state_init(struct device *dev, struct ars_state *ars_state) | 784 | static int ars_state_init(struct device *dev, struct ars_state *ars_state) |
568 | { | 785 | { |
786 | /* for testing, only store up to n records that fit within 4k */ | ||
569 | ars_state->ars_status = devm_kzalloc(dev, | 787 | ars_state->ars_status = devm_kzalloc(dev, |
570 | sizeof(struct nd_cmd_ars_status) | 788 | sizeof(struct nd_cmd_ars_status) + SZ_4K, GFP_KERNEL); |
571 | + sizeof(struct nd_ars_record) * NFIT_TEST_ARS_RECORDS, | ||
572 | GFP_KERNEL); | ||
573 | if (!ars_state->ars_status) | 789 | if (!ars_state->ars_status) |
574 | return -ENOMEM; | 790 | return -ENOMEM; |
575 | spin_lock_init(&ars_state->lock); | 791 | spin_lock_init(&ars_state->lock); |
@@ -1419,7 +1635,8 @@ static void nfit_test0_setup(struct nfit_test *t) | |||
1419 | + i * sizeof(u64); | 1635 | + i * sizeof(u64); |
1420 | } | 1636 | } |
1421 | 1637 | ||
1422 | post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); | 1638 | post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], |
1639 | SPA0_SIZE); | ||
1423 | 1640 | ||
1424 | acpi_desc = &t->acpi_desc; | 1641 | acpi_desc = &t->acpi_desc; |
1425 | set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); | 1642 | set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); |
@@ -1430,7 +1647,12 @@ static void nfit_test0_setup(struct nfit_test *t) | |||
1430 | set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); | 1647 | set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); |
1431 | set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); | 1648 | set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); |
1432 | set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); | 1649 | set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); |
1650 | set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); | ||
1433 | set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); | 1651 | set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); |
1652 | set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); | ||
1653 | set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); | ||
1654 | set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); | ||
1655 | set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); | ||
1434 | } | 1656 | } |
1435 | 1657 | ||
1436 | static void nfit_test1_setup(struct nfit_test *t) | 1658 | static void nfit_test1_setup(struct nfit_test *t) |
@@ -1520,7 +1742,8 @@ static void nfit_test1_setup(struct nfit_test *t) | |||
1520 | dcr->code = NFIT_FIC_BYTE; | 1742 | dcr->code = NFIT_FIC_BYTE; |
1521 | dcr->windows = 0; | 1743 | dcr->windows = 0; |
1522 | 1744 | ||
1523 | post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); | 1745 | post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], |
1746 | SPA2_SIZE); | ||
1524 | 1747 | ||
1525 | acpi_desc = &t->acpi_desc; | 1748 | acpi_desc = &t->acpi_desc; |
1526 | set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); | 1749 | set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); |
@@ -1589,6 +1812,7 @@ static int nfit_ctl_test(struct device *dev) | |||
1589 | unsigned long mask, cmd_size, offset; | 1812 | unsigned long mask, cmd_size, offset; |
1590 | union { | 1813 | union { |
1591 | struct nd_cmd_get_config_size cfg_size; | 1814 | struct nd_cmd_get_config_size cfg_size; |
1815 | struct nd_cmd_clear_error clear_err; | ||
1592 | struct nd_cmd_ars_status ars_stat; | 1816 | struct nd_cmd_ars_status ars_stat; |
1593 | struct nd_cmd_ars_cap ars_cap; | 1817 | struct nd_cmd_ars_cap ars_cap; |
1594 | char buf[sizeof(struct nd_cmd_ars_status) | 1818 | char buf[sizeof(struct nd_cmd_ars_status) |
@@ -1613,10 +1837,15 @@ static int nfit_ctl_test(struct device *dev) | |||
1613 | .cmd_mask = 1UL << ND_CMD_ARS_CAP | 1837 | .cmd_mask = 1UL << ND_CMD_ARS_CAP |
1614 | | 1UL << ND_CMD_ARS_START | 1838 | | 1UL << ND_CMD_ARS_START |
1615 | | 1UL << ND_CMD_ARS_STATUS | 1839 | | 1UL << ND_CMD_ARS_STATUS |
1616 | | 1UL << ND_CMD_CLEAR_ERROR, | 1840 | | 1UL << ND_CMD_CLEAR_ERROR |
1841 | | 1UL << ND_CMD_CALL, | ||
1617 | .module = THIS_MODULE, | 1842 | .module = THIS_MODULE, |
1618 | .provider_name = "ACPI.NFIT", | 1843 | .provider_name = "ACPI.NFIT", |
1619 | .ndctl = acpi_nfit_ctl, | 1844 | .ndctl = acpi_nfit_ctl, |
1845 | .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA | ||
1846 | | 1UL << NFIT_CMD_ARS_INJECT_SET | ||
1847 | | 1UL << NFIT_CMD_ARS_INJECT_CLEAR | ||
1848 | | 1UL << NFIT_CMD_ARS_INJECT_GET, | ||
1620 | }, | 1849 | }, |
1621 | .dev = &adev->dev, | 1850 | .dev = &adev->dev, |
1622 | }; | 1851 | }; |
@@ -1767,6 +1996,23 @@ static int nfit_ctl_test(struct device *dev) | |||
1767 | return -EIO; | 1996 | return -EIO; |
1768 | } | 1997 | } |
1769 | 1998 | ||
1999 | /* test clear error */ | ||
2000 | cmd_size = sizeof(cmds.clear_err); | ||
2001 | cmds.clear_err = (struct nd_cmd_clear_error) { | ||
2002 | .length = 512, | ||
2003 | .cleared = 512, | ||
2004 | }; | ||
2005 | rc = setup_result(cmds.buf, cmd_size); | ||
2006 | if (rc) | ||
2007 | return rc; | ||
2008 | rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR, | ||
2009 | cmds.buf, cmd_size, &cmd_rc); | ||
2010 | if (rc < 0 || cmd_rc) { | ||
2011 | dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", | ||
2012 | __func__, __LINE__, rc, cmd_rc); | ||
2013 | return -EIO; | ||
2014 | } | ||
2015 | |||
1770 | return 0; | 2016 | return 0; |
1771 | } | 2017 | } |
1772 | 2018 | ||
@@ -1915,6 +2161,10 @@ static __init int nfit_test_init(void) | |||
1915 | 2161 | ||
1916 | nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); | 2162 | nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); |
1917 | 2163 | ||
2164 | nfit_wq = create_singlethread_workqueue("nfit"); | ||
2165 | if (!nfit_wq) | ||
2166 | return -ENOMEM; | ||
2167 | |||
1918 | nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm"); | 2168 | nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm"); |
1919 | if (IS_ERR(nfit_test_dimm)) { | 2169 | if (IS_ERR(nfit_test_dimm)) { |
1920 | rc = PTR_ERR(nfit_test_dimm); | 2170 | rc = PTR_ERR(nfit_test_dimm); |
@@ -1931,6 +2181,7 @@ static __init int nfit_test_init(void) | |||
1931 | goto err_register; | 2181 | goto err_register; |
1932 | } | 2182 | } |
1933 | INIT_LIST_HEAD(&nfit_test->resources); | 2183 | INIT_LIST_HEAD(&nfit_test->resources); |
2184 | badrange_init(&nfit_test->badrange); | ||
1934 | switch (i) { | 2185 | switch (i) { |
1935 | case 0: | 2186 | case 0: |
1936 | nfit_test->num_pm = NUM_PM; | 2187 | nfit_test->num_pm = NUM_PM; |
@@ -1966,6 +2217,7 @@ static __init int nfit_test_init(void) | |||
1966 | goto err_register; | 2217 | goto err_register; |
1967 | 2218 | ||
1968 | instances[i] = nfit_test; | 2219 | instances[i] = nfit_test; |
2220 | INIT_WORK(&nfit_test->work, uc_error_notify); | ||
1969 | } | 2221 | } |
1970 | 2222 | ||
1971 | rc = platform_driver_register(&nfit_test_driver); | 2223 | rc = platform_driver_register(&nfit_test_driver); |
@@ -1974,6 +2226,7 @@ static __init int nfit_test_init(void) | |||
1974 | return 0; | 2226 | return 0; |
1975 | 2227 | ||
1976 | err_register: | 2228 | err_register: |
2229 | destroy_workqueue(nfit_wq); | ||
1977 | for (i = 0; i < NUM_NFITS; i++) | 2230 | for (i = 0; i < NUM_NFITS; i++) |
1978 | if (instances[i]) | 2231 | if (instances[i]) |
1979 | platform_device_unregister(&instances[i]->pdev); | 2232 | platform_device_unregister(&instances[i]->pdev); |
@@ -1989,6 +2242,8 @@ static __exit void nfit_test_exit(void) | |||
1989 | { | 2242 | { |
1990 | int i; | 2243 | int i; |
1991 | 2244 | ||
2245 | flush_workqueue(nfit_wq); | ||
2246 | destroy_workqueue(nfit_wq); | ||
1992 | for (i = 0; i < NUM_NFITS; i++) | 2247 | for (i = 0; i < NUM_NFITS; i++) |
1993 | platform_device_unregister(&instances[i]->pdev); | 2248 | platform_device_unregister(&instances[i]->pdev); |
1994 | platform_driver_unregister(&nfit_test_driver); | 2249 | platform_driver_unregister(&nfit_test_driver); |
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index d3d63dd5ed38..113b44675a71 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h | |||
@@ -32,6 +32,58 @@ struct nfit_test_resource { | |||
32 | void *buf; | 32 | void *buf; |
33 | }; | 33 | }; |
34 | 34 | ||
35 | #define ND_TRANSLATE_SPA_STATUS_INVALID_SPA 2 | ||
36 | #define NFIT_ARS_INJECT_INVALID 2 | ||
37 | |||
38 | enum err_inj_options { | ||
39 | ND_ARS_ERR_INJ_OPT_NOTIFY = 0, | ||
40 | }; | ||
41 | |||
42 | /* nfit commands */ | ||
43 | enum nfit_cmd_num { | ||
44 | NFIT_CMD_TRANSLATE_SPA = 5, | ||
45 | NFIT_CMD_ARS_INJECT_SET = 7, | ||
46 | NFIT_CMD_ARS_INJECT_CLEAR = 8, | ||
47 | NFIT_CMD_ARS_INJECT_GET = 9, | ||
48 | }; | ||
49 | |||
50 | struct nd_cmd_translate_spa { | ||
51 | __u64 spa; | ||
52 | __u32 status; | ||
53 | __u8 flags; | ||
54 | __u8 _reserved[3]; | ||
55 | __u64 translate_length; | ||
56 | __u32 num_nvdimms; | ||
57 | struct nd_nvdimm_device { | ||
58 | __u32 nfit_device_handle; | ||
59 | __u32 _reserved; | ||
60 | __u64 dpa; | ||
61 | } __packed devices[0]; | ||
62 | |||
63 | } __packed; | ||
64 | |||
65 | struct nd_cmd_ars_err_inj { | ||
66 | __u64 err_inj_spa_range_base; | ||
67 | __u64 err_inj_spa_range_length; | ||
68 | __u8 err_inj_options; | ||
69 | __u32 status; | ||
70 | } __packed; | ||
71 | |||
72 | struct nd_cmd_ars_err_inj_clr { | ||
73 | __u64 err_inj_clr_spa_range_base; | ||
74 | __u64 err_inj_clr_spa_range_length; | ||
75 | __u32 status; | ||
76 | } __packed; | ||
77 | |||
78 | struct nd_cmd_ars_err_inj_stat { | ||
79 | __u32 status; | ||
80 | __u32 inj_err_rec_count; | ||
81 | struct nd_error_stat_query_record { | ||
82 | __u64 err_inj_stat_spa_range_base; | ||
83 | __u64 err_inj_stat_spa_range_length; | ||
84 | } __packed record[0]; | ||
85 | } __packed; | ||
86 | |||
35 | union acpi_object; | 87 | union acpi_object; |
36 | typedef void *acpi_handle; | 88 | typedef void *acpi_handle; |
37 | 89 | ||