aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2017-06-27 00:28:41 -0400
committerDan Williams <dan.j.williams@intel.com>2017-06-29 12:29:50 -0400
commit6e0c90d691cd5d90569f5918ab03eb76c81f9c6e (patch)
tree58a98b00690d31fc0b7552cca10686661acaf51e
parent9a60c3ef577beb0376704808949f2c1f8fb0672c (diff)
libnvdimm, pmem, dax: export a cache control attribute
The dax_flush() operation can be turned into a nop on platforms where firmware arranges for cpu caches to be flushed on a power-fail event. The ACPI 6.2 specification defines a mechanism for the platform to indicate this capability so the kernel can select the proper default. However, for other platforms, the administrator must toggle this setting manually. Given this flush setting is a dax-specific mechanism we advertise it through a 'dax' attribute group hanging off a host device. For example, a 'pmem0' block-device gets a 'dax' sysfs-subdirectory with a 'write_cache' attribute to control response to dax cache flush requests. This is similar to the 'queue/write_cache' attribute that appears under block devices. Cc: Jan Kara <jack@suse.cz> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Matthew Wilcox <mawilcox@microsoft.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Suggested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/dax/super.c79
-rw-r--r--drivers/nvdimm/pmem.c10
-rw-r--r--include/linux/dax.h3
3 files changed, 92 insertions, 0 deletions
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 8bf71195921b..4827251782a1 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -119,6 +119,8 @@ EXPORT_SYMBOL_GPL(__bdev_dax_supported);
119enum dax_device_flags { 119enum dax_device_flags {
120 /* !alive + rcu grace period == no new operations / mappings */ 120 /* !alive + rcu grace period == no new operations / mappings */
121 DAXDEV_ALIVE, 121 DAXDEV_ALIVE,
122 /* gate whether dax_flush() calls the low level flush routine */
123 DAXDEV_WRITE_CACHE,
122}; 124};
123 125
124/** 126/**
@@ -139,6 +141,71 @@ struct dax_device {
139 const struct dax_operations *ops; 141 const struct dax_operations *ops;
140}; 142};
141 143
144static ssize_t write_cache_show(struct device *dev,
145 struct device_attribute *attr, char *buf)
146{
147 struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
148 ssize_t rc;
149
150 WARN_ON_ONCE(!dax_dev);
151 if (!dax_dev)
152 return -ENXIO;
153
154 rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
155 &dax_dev->flags));
156 put_dax(dax_dev);
157 return rc;
158}
159
160static ssize_t write_cache_store(struct device *dev,
161 struct device_attribute *attr, const char *buf, size_t len)
162{
163 bool write_cache;
164 int rc = strtobool(buf, &write_cache);
165 struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
166
167 WARN_ON_ONCE(!dax_dev);
168 if (!dax_dev)
169 return -ENXIO;
170
171 if (rc)
172 len = rc;
173 else if (write_cache)
174 set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
175 else
176 clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
177
178 put_dax(dax_dev);
179 return len;
180}
181static DEVICE_ATTR_RW(write_cache);
182
183static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
184{
185 struct device *dev = container_of(kobj, typeof(*dev), kobj);
186 struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
187
188 WARN_ON_ONCE(!dax_dev);
189 if (!dax_dev)
190 return 0;
191
192 if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush)
193 return 0;
194 return a->mode;
195}
196
197static struct attribute *dax_attributes[] = {
198 &dev_attr_write_cache.attr,
199 NULL,
200};
201
202struct attribute_group dax_attribute_group = {
203 .name = "dax",
204 .attrs = dax_attributes,
205 .is_visible = dax_visible,
206};
207EXPORT_SYMBOL_GPL(dax_attribute_group);
208
142/** 209/**
143 * dax_direct_access() - translate a device pgoff to an absolute pfn 210 * dax_direct_access() - translate a device pgoff to an absolute pfn
144 * @dax_dev: a dax_device instance representing the logical memory range 211 * @dax_dev: a dax_device instance representing the logical memory range
@@ -194,11 +261,23 @@ void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
194 if (!dax_alive(dax_dev)) 261 if (!dax_alive(dax_dev))
195 return; 262 return;
196 263
264 if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))
265 return;
266
197 if (dax_dev->ops->flush) 267 if (dax_dev->ops->flush)
198 dax_dev->ops->flush(dax_dev, pgoff, addr, size); 268 dax_dev->ops->flush(dax_dev, pgoff, addr, size);
199} 269}
200EXPORT_SYMBOL_GPL(dax_flush); 270EXPORT_SYMBOL_GPL(dax_flush);
201 271
272void dax_write_cache(struct dax_device *dax_dev, bool wc)
273{
274 if (wc)
275 set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
276 else
277 clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
278}
279EXPORT_SYMBOL_GPL(dax_write_cache);
280
202bool dax_alive(struct dax_device *dax_dev) 281bool dax_alive(struct dax_device *dax_dev)
203{ 282{
204 lockdep_assert_held(&dax_srcu); 283 lockdep_assert_held(&dax_srcu);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 06f6c27ec1e9..7339d184070e 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -253,6 +253,11 @@ static const struct dax_operations pmem_dax_ops = {
253 .flush = pmem_dax_flush, 253 .flush = pmem_dax_flush,
254}; 254};
255 255
256static const struct attribute_group *pmem_attribute_groups[] = {
257 &dax_attribute_group,
258 NULL,
259};
260
256static void pmem_release_queue(void *q) 261static void pmem_release_queue(void *q)
257{ 262{
258 blk_cleanup_queue(q); 263 blk_cleanup_queue(q);
@@ -287,6 +292,7 @@ static int pmem_attach_disk(struct device *dev,
287 struct pmem_device *pmem; 292 struct pmem_device *pmem;
288 struct resource pfn_res; 293 struct resource pfn_res;
289 struct request_queue *q; 294 struct request_queue *q;
295 struct device *gendev;
290 struct gendisk *disk; 296 struct gendisk *disk;
291 void *addr; 297 void *addr;
292 298
@@ -384,8 +390,12 @@ static int pmem_attach_disk(struct device *dev,
384 put_disk(disk); 390 put_disk(disk);
385 return -ENOMEM; 391 return -ENOMEM;
386 } 392 }
393 dax_write_cache(dax_dev, true);
387 pmem->dax_dev = dax_dev; 394 pmem->dax_dev = dax_dev;
388 395
396 gendev = disk_to_dev(disk);
397 gendev->groups = pmem_attribute_groups;
398
389 device_add_disk(dev, disk); 399 device_add_disk(dev, disk);
390 if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) 400 if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
391 return -ENOMEM; 401 return -ENOMEM;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 73fca1bebaf3..8f39db7439c3 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -23,6 +23,8 @@ struct dax_operations {
23 void (*flush)(struct dax_device *, pgoff_t, void *, size_t); 23 void (*flush)(struct dax_device *, pgoff_t, void *, size_t);
24}; 24};
25 25
26extern struct attribute_group dax_attribute_group;
27
26#if IS_ENABLED(CONFIG_DAX) 28#if IS_ENABLED(CONFIG_DAX)
27struct dax_device *dax_get_by_host(const char *host); 29struct dax_device *dax_get_by_host(const char *host);
28void put_dax(struct dax_device *dax_dev); 30void put_dax(struct dax_device *dax_dev);
@@ -84,6 +86,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
84 size_t bytes, struct iov_iter *i); 86 size_t bytes, struct iov_iter *i);
85void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 87void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
86 size_t size); 88 size_t size);
89void dax_write_cache(struct dax_device *dax_dev, bool wc);
87 90
88/* 91/*
89 * We use lowest available bit in exceptional entry for locking, one bit for 92 * We use lowest available bit in exceptional entry for locking, one bit for