aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2014-10-13 01:41:28 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-11-07 13:53:25 -0500
commit2b75869bba676c248d8d25ae6d2bd9221dfffdb6 (patch)
treebcbf58a3e4da882e9cc4dda7da4b65f18a01fa81
parent0936896056365349afa867c16e9f9100a6707cbf (diff)
sysfs/kernfs: allow attributes to request write buffer be pre-allocated.
md/raid allows metadata management to be performed in user-space. A various times, particularly on device failure, the metadata needs to be updated before further writes can be permitted. This means that the user-space program which updates metadata much not block on writeout, and so must not allocate memory. mlockall(MCL_CURRENT|MCL_FUTURE) and pre-allocation can avoid all memory allocation issues for user-memory, but that does not help kernel memory. Several kernel objects can be pre-allocated. e.g. files opened before any writes to the array are permitted. However some kernel allocation happens in places that cannot be pre-allocated. In particular, writes to sysfs files (to tell md that it can now allow writes to the array) allocate a buffer using GFP_KERNEL. This patch allows attributes to be marked as "PREALLOC". In that case the maximal buffer is allocated when the file is opened, and then used on each write instead of allocating a new buffer. As the same buffer is now shared for all writes on the same file description, the mutex is extended to cover full use of the buffer including the copy_from_user(). The new __ATTR_PREALLOC() 'or's a new flag in to the 'mode', which is inspected by sysfs_add_file_mode_ns() to determine if the file should be marked as requiring prealloc. Despite the comment, we *do* use ->seq_show together with ->prealloc in this patch. The next patch fixes that. Signed-off-by: NeilBrown <neilb@suse.de> Reviewed-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--fs/kernfs/file.c45
-rw-r--r--fs/sysfs/file.c31
-rw-r--r--include/linux/kernfs.h8
-rw-r--r--include/linux/sysfs.h9
4 files changed, 71 insertions, 22 deletions
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 4429d6d9217f..70186e2e692a 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -106,7 +106,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
106 const struct kernfs_ops *ops; 106 const struct kernfs_ops *ops;
107 107
108 /* 108 /*
109 * @of->mutex nests outside active ref and is just to ensure that 109 * @of->mutex nests outside active ref and is primarily to ensure that
110 * the ops aren't called concurrently for the same open file. 110 * the ops aren't called concurrently for the same open file.
111 */ 111 */
112 mutex_lock(&of->mutex); 112 mutex_lock(&of->mutex);
@@ -194,7 +194,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
194 return -ENOMEM; 194 return -ENOMEM;
195 195
196 /* 196 /*
197 * @of->mutex nests outside active ref and is just to ensure that 197 * @of->mutex nests outside active ref and is primarily to ensure that
198 * the ops aren't called concurrently for the same open file. 198 * the ops aren't called concurrently for the same open file.
199 */ 199 */
200 mutex_lock(&of->mutex); 200 mutex_lock(&of->mutex);
@@ -278,19 +278,16 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
278 len = min_t(size_t, count, PAGE_SIZE); 278 len = min_t(size_t, count, PAGE_SIZE);
279 } 279 }
280 280
281 buf = kmalloc(len + 1, GFP_KERNEL); 281 buf = of->prealloc_buf;
282 if (!buf)
283 buf = kmalloc(len + 1, GFP_KERNEL);
282 if (!buf) 284 if (!buf)
283 return -ENOMEM; 285 return -ENOMEM;
284 286
285 if (copy_from_user(buf, user_buf, len)) {
286 len = -EFAULT;
287 goto out_free;
288 }
289 buf[len] = '\0'; /* guarantee string termination */
290
291 /* 287 /*
292 * @of->mutex nests outside active ref and is just to ensure that 288 * @of->mutex nests outside active ref and is used both to ensure that
293 * the ops aren't called concurrently for the same open file. 289 * the ops aren't called concurrently for the same open file, and
290 * to provide exclusive access to ->prealloc_buf (when that exists).
294 */ 291 */
295 mutex_lock(&of->mutex); 292 mutex_lock(&of->mutex);
296 if (!kernfs_get_active(of->kn)) { 293 if (!kernfs_get_active(of->kn)) {
@@ -299,19 +296,27 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
299 goto out_free; 296 goto out_free;
300 } 297 }
301 298
299 if (copy_from_user(buf, user_buf, len)) {
300 len = -EFAULT;
301 goto out_unlock;
302 }
303 buf[len] = '\0'; /* guarantee string termination */
304
302 ops = kernfs_ops(of->kn); 305 ops = kernfs_ops(of->kn);
303 if (ops->write) 306 if (ops->write)
304 len = ops->write(of, buf, len, *ppos); 307 len = ops->write(of, buf, len, *ppos);
305 else 308 else
306 len = -EINVAL; 309 len = -EINVAL;
307 310
308 kernfs_put_active(of->kn);
309 mutex_unlock(&of->mutex);
310
311 if (len > 0) 311 if (len > 0)
312 *ppos += len; 312 *ppos += len;
313
314out_unlock:
315 kernfs_put_active(of->kn);
316 mutex_unlock(&of->mutex);
313out_free: 317out_free:
314 kfree(buf); 318 if (buf != of->prealloc_buf)
319 kfree(buf);
315 return len; 320 return len;
316} 321}
317 322
@@ -685,6 +690,14 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
685 */ 690 */
686 of->atomic_write_len = ops->atomic_write_len; 691 of->atomic_write_len = ops->atomic_write_len;
687 692
693 if (ops->prealloc) {
694 int len = of->atomic_write_len ?: PAGE_SIZE;
695 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
696 error = -ENOMEM;
697 if (!of->prealloc_buf)
698 goto err_free;
699 }
700
688 /* 701 /*
689 * Always instantiate seq_file even if read access doesn't use 702 * Always instantiate seq_file even if read access doesn't use
690 * seq_file or is not requested. This unifies private data access 703 * seq_file or is not requested. This unifies private data access
@@ -715,6 +728,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
715err_close: 728err_close:
716 seq_release(inode, file); 729 seq_release(inode, file);
717err_free: 730err_free:
731 kfree(of->prealloc_buf);
718 kfree(of); 732 kfree(of);
719err_out: 733err_out:
720 kernfs_put_active(kn); 734 kernfs_put_active(kn);
@@ -728,6 +742,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
728 742
729 kernfs_put_open_node(kn, of); 743 kernfs_put_open_node(kn, of);
730 seq_release(inode, filp); 744 seq_release(inode, filp);
745 kfree(of->prealloc_buf);
731 kfree(of); 746 kfree(of);
732 747
733 return 0; 748 return 0;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 589abee16a39..4ad3721a991c 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -184,6 +184,17 @@ static const struct kernfs_ops sysfs_file_kfops_rw = {
184 .write = sysfs_kf_write, 184 .write = sysfs_kf_write,
185}; 185};
186 186
187static const struct kernfs_ops sysfs_prealloc_kfops_wo = {
188 .write = sysfs_kf_write,
189 .prealloc = true,
190};
191
192static const struct kernfs_ops sysfs_prealloc_kfops_rw = {
193 .seq_show = sysfs_kf_seq_show,
194 .write = sysfs_kf_write,
195 .prealloc = true,
196};
197
187static const struct kernfs_ops sysfs_bin_kfops_ro = { 198static const struct kernfs_ops sysfs_bin_kfops_ro = {
188 .read = sysfs_kf_bin_read, 199 .read = sysfs_kf_bin_read,
189}; 200};
@@ -222,13 +233,19 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
222 kobject_name(kobj))) 233 kobject_name(kobj)))
223 return -EINVAL; 234 return -EINVAL;
224 235
225 if (sysfs_ops->show && sysfs_ops->store) 236 if (sysfs_ops->show && sysfs_ops->store) {
226 ops = &sysfs_file_kfops_rw; 237 if (mode & SYSFS_PREALLOC)
227 else if (sysfs_ops->show) 238 ops = &sysfs_prealloc_kfops_rw;
239 else
240 ops = &sysfs_file_kfops_rw;
241 } else if (sysfs_ops->show)
228 ops = &sysfs_file_kfops_ro; 242 ops = &sysfs_file_kfops_ro;
229 else if (sysfs_ops->store) 243 else if (sysfs_ops->store) {
230 ops = &sysfs_file_kfops_wo; 244 if (mode & SYSFS_PREALLOC)
231 else 245 ops = &sysfs_prealloc_kfops_wo;
246 else
247 ops = &sysfs_file_kfops_wo;
248 } else
232 ops = &sysfs_file_kfops_empty; 249 ops = &sysfs_file_kfops_empty;
233 250
234 size = PAGE_SIZE; 251 size = PAGE_SIZE;
@@ -253,7 +270,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
253 if (!attr->ignore_lockdep) 270 if (!attr->ignore_lockdep)
254 key = attr->key ?: (struct lock_class_key *)&attr->skey; 271 key = attr->key ?: (struct lock_class_key *)&attr->skey;
255#endif 272#endif
256 kn = __kernfs_create_file(parent, attr->name, mode, size, ops, 273 kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
257 (void *)attr, ns, true, key); 274 (void *)attr, ns, true, key);
258 if (IS_ERR(kn)) { 275 if (IS_ERR(kn)) {
259 if (PTR_ERR(kn) == -EEXIST) 276 if (PTR_ERR(kn) == -EEXIST)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 30faf797c2c3..d4e01b358341 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -179,6 +179,7 @@ struct kernfs_open_file {
179 struct mutex mutex; 179 struct mutex mutex;
180 int event; 180 int event;
181 struct list_head list; 181 struct list_head list;
182 char *prealloc_buf;
182 183
183 size_t atomic_write_len; 184 size_t atomic_write_len;
184 bool mmapped; 185 bool mmapped;
@@ -214,6 +215,13 @@ struct kernfs_ops {
214 * larger ones are rejected with -E2BIG. 215 * larger ones are rejected with -E2BIG.
215 */ 216 */
216 size_t atomic_write_len; 217 size_t atomic_write_len;
218 /*
219 * "prealloc" causes a buffer to be allocated at open for
220 * all read/write requests. As ->seq_show uses seq_read()
221 * which does its own allocation, it is incompatible with
222 * ->prealloc. Provide ->read and ->write with ->prealloc.
223 */
224 bool prealloc;
217 ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes, 225 ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
218 loff_t off); 226 loff_t off);
219 227
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index f97d0dbb59fa..ddad16148bd6 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -70,6 +70,8 @@ struct attribute_group {
70 * for examples.. 70 * for examples..
71 */ 71 */
72 72
73#define SYSFS_PREALLOC 010000
74
73#define __ATTR(_name, _mode, _show, _store) { \ 75#define __ATTR(_name, _mode, _show, _store) { \
74 .attr = {.name = __stringify(_name), \ 76 .attr = {.name = __stringify(_name), \
75 .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ 77 .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
@@ -77,6 +79,13 @@ struct attribute_group {
77 .store = _store, \ 79 .store = _store, \
78} 80}
79 81
82#define __ATTR_PREALLOC(_name, _mode, _show, _store) { \
83 .attr = {.name = __stringify(_name), \
84 .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode) },\
85 .show = _show, \
86 .store = _store, \
87}
88
80#define __ATTR_RO(_name) { \ 89#define __ATTR_RO(_name) { \
81 .attr = { .name = __stringify(_name), .mode = S_IRUGO }, \ 90 .attr = { .name = __stringify(_name), .mode = S_IRUGO }, \
82 .show = _name##_show, \ 91 .show = _name##_show, \