aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKay Sievers <kay.sievers@vrfy.org>2009-04-30 09:23:42 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2009-09-15 12:50:49 -0400
commit2b2af54a5bb6f7e80ccf78f20084b93c398c3a8b (patch)
treebb27e3b16a2a8fadeea869033a406f0a19b63e29
parentea5ffff57dce2f4c85ab056f4b0a202f71db2bdf (diff)
Driver Core: devtmpfs - kernel-maintained tmpfs-based /dev
Devtmpfs lets the kernel create a tmpfs instance called devtmpfs very early at kernel initialization, before any driver-core device is registered. Every device with a major/minor will provide a device node in devtmpfs. Devtmpfs can be changed and altered by userspace at any time, and in any way needed - just like today's udev-mounted tmpfs. Unmodified udev versions will run just fine on top of it, and will recognize an already existing kernel-created device node and use it. The default node permissions are root:root 0600. Proper permissions and user/group ownership, meaningful symlinks, all other policy still needs to be applied by userspace. If a node is created by devtmps, devtmpfs will remove the device node when the device goes away. If the device node was created by userspace, or the devtmpfs created node was replaced by userspace, it will no longer be removed by devtmpfs. If it is requested to auto-mount it, it makes init=/bin/sh work without any further userspace support. /dev will be fully populated and dynamic, and always reflect the current device state of the kernel. With the commonly used dynamic device numbers, it solves the problem where static devices nodes may point to the wrong devices. It is intended to make the initial bootup logic simpler and more robust, by de-coupling the creation of the inital environment, to reliably run userspace processes, from a complex userspace bootstrap logic to provide a working /dev. Signed-off-by: Kay Sievers <kay.sievers@vrfy.org> Signed-off-by: Jan Blunck <jblunck@suse.de> Tested-By: Harald Hoyer <harald@redhat.com> Tested-By: Scott James Remnant <scott@ubuntu.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--drivers/base/Kconfig25
-rw-r--r--drivers/base/Makefile1
-rw-r--r--drivers/base/base.h6
-rw-r--r--drivers/base/core.c3
-rw-r--r--drivers/base/devtmpfs.c367
-rw-r--r--drivers/base/init.c1
-rw-r--r--include/linux/device.h10
-rw-r--r--include/linux/shmem_fs.h3
-rw-r--r--init/do_mounts.c2
-rw-r--r--init/main.c2
-rw-r--r--mm/shmem.c9
11 files changed, 422 insertions, 7 deletions
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 8f006f96ff53..ee377270beb9 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -8,6 +8,31 @@ config UEVENT_HELPER_PATH
8 Path to uevent helper program forked by the kernel for 8 Path to uevent helper program forked by the kernel for
9 every uevent. 9 every uevent.
10 10
11config DEVTMPFS
12 bool "Create a kernel maintained /dev tmpfs (EXPERIMENTAL)"
13 depends on HOTPLUG && SHMEM && TMPFS
14 help
15 This creates a tmpfs filesystem, and mounts it at bootup
16 and mounts it at /dev. The kernel driver core creates device
17 nodes for all registered devices in that filesystem. All device
18 nodes are owned by root and have the default mode of 0600.
19 Userspace can add and delete the nodes as needed. This is
20 intended to simplify bootup, and make it possible to delay
21 the initial coldplug at bootup done by udev in userspace.
22 It should also provide a simpler way for rescue systems
23 to bring up a kernel with dynamic major/minor numbers.
24 Meaningful symlinks, permissions and device ownership must
25 still be handled by userspace.
26 If unsure, say N here.
27
28config DEVTMPFS_MOUNT
29 bool "Automount devtmpfs at /dev"
30 depends on DEVTMPFS
31 help
32 This will mount devtmpfs at /dev if the kernel mounts the root
33 filesystem. It will not affect initramfs based mounting.
34 If unsure, say N here.
35
11config STANDALONE 36config STANDALONE
12 bool "Select only drivers that don't need compile-time external firmware" if EXPERIMENTAL 37 bool "Select only drivers that don't need compile-time external firmware" if EXPERIMENTAL
13 default y 38 default y
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 1b2640ce74f0..c12c7f2f2a6f 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -4,6 +4,7 @@ obj-y := core.o sys.o bus.o dd.o \
4 driver.o class.o platform.o \ 4 driver.o class.o platform.o \
5 cpu.o firmware.o init.o map.o devres.o \ 5 cpu.o firmware.o init.o map.o devres.o \
6 attribute_container.o transport_class.o 6 attribute_container.o transport_class.o
7obj-$(CONFIG_DEVTMPFS) += devtmpfs.o
7obj-y += power/ 8obj-y += power/
8obj-$(CONFIG_HAS_DMA) += dma-mapping.o 9obj-$(CONFIG_HAS_DMA) += dma-mapping.o
9obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o 10obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 503d59c57501..2ca7f5b7b824 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -139,3 +139,9 @@ static inline void module_add_driver(struct module *mod,
139 struct device_driver *drv) { } 139 struct device_driver *drv) { }
140static inline void module_remove_driver(struct device_driver *drv) { } 140static inline void module_remove_driver(struct device_driver *drv) { }
141#endif 141#endif
142
143#ifdef CONFIG_DEVTMPFS
144extern int devtmpfs_init(void);
145#else
146static inline int devtmpfs_init(void) { return 0; }
147#endif
diff --git a/drivers/base/core.c b/drivers/base/core.c
index a992985d1fab..390e664ec1c7 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -929,6 +929,8 @@ int device_add(struct device *dev)
929 error = device_create_sys_dev_entry(dev); 929 error = device_create_sys_dev_entry(dev);
930 if (error) 930 if (error)
931 goto devtattrError; 931 goto devtattrError;
932
933 devtmpfs_create_node(dev);
932 } 934 }
933 935
934 error = device_add_class_symlinks(dev); 936 error = device_add_class_symlinks(dev);
@@ -1075,6 +1077,7 @@ void device_del(struct device *dev)
1075 if (parent) 1077 if (parent)
1076 klist_del(&dev->p->knode_parent); 1078 klist_del(&dev->p->knode_parent);
1077 if (MAJOR(dev->devt)) { 1079 if (MAJOR(dev->devt)) {
1080 devtmpfs_delete_node(dev);
1078 device_remove_sys_dev_entry(dev); 1081 device_remove_sys_dev_entry(dev);
1079 device_remove_file(dev, &devt_attr); 1082 device_remove_file(dev, &devt_attr);
1080 } 1083 }
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
new file mode 100644
index 000000000000..fd488ad4263a
--- /dev/null
+++ b/drivers/base/devtmpfs.c
@@ -0,0 +1,367 @@
1/*
2 * devtmpfs - kernel-maintained tmpfs-based /dev
3 *
4 * Copyright (C) 2009, Kay Sievers <kay.sievers@vrfy.org>
5 *
6 * During bootup, before any driver core device is registered,
7 * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
8 * device which requests a device node, will add a node in this
9 * filesystem. The node is named after the the name of the device,
10 * or the susbsytem can provide a custom name. All devices are
11 * owned by root and have a mode of 0600.
12 */
13
14#include <linux/kernel.h>
15#include <linux/syscalls.h>
16#include <linux/mount.h>
17#include <linux/device.h>
18#include <linux/genhd.h>
19#include <linux/namei.h>
20#include <linux/fs.h>
21#include <linux/shmem_fs.h>
22#include <linux/cred.h>
23#include <linux/init_task.h>
24
25static struct vfsmount *dev_mnt;
26
27#if defined CONFIG_DEVTMPFS_MOUNT
28static int dev_mount = 1;
29#else
30static int dev_mount;
31#endif
32
33static int __init mount_param(char *str)
34{
35 dev_mount = simple_strtoul(str, NULL, 0);
36 return 1;
37}
38__setup("devtmpfs.mount=", mount_param);
39
40static int dev_get_sb(struct file_system_type *fs_type, int flags,
41 const char *dev_name, void *data, struct vfsmount *mnt)
42{
43 return get_sb_single(fs_type, flags, data, shmem_fill_super, mnt);
44}
45
46static struct file_system_type dev_fs_type = {
47 .name = "devtmpfs",
48 .get_sb = dev_get_sb,
49 .kill_sb = kill_litter_super,
50};
51
52#ifdef CONFIG_BLOCK
53static inline int is_blockdev(struct device *dev)
54{
55 return dev->class == &block_class;
56}
57#else
58static inline int is_blockdev(struct device *dev) { return 0; }
59#endif
60
61static int dev_mkdir(const char *name, mode_t mode)
62{
63 struct nameidata nd;
64 struct dentry *dentry;
65 int err;
66
67 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
68 name, LOOKUP_PARENT, &nd);
69 if (err)
70 return err;
71
72 dentry = lookup_create(&nd, 1);
73 if (!IS_ERR(dentry)) {
74 err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
75 dput(dentry);
76 } else {
77 err = PTR_ERR(dentry);
78 }
79 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
80
81 path_put(&nd.path);
82 return err;
83}
84
85static int create_path(const char *nodepath)
86{
87 char *path;
88 struct nameidata nd;
89 int err = 0;
90
91 path = kstrdup(nodepath, GFP_KERNEL);
92 if (!path)
93 return -ENOMEM;
94
95 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
96 path, LOOKUP_PARENT, &nd);
97 if (err == 0) {
98 struct dentry *dentry;
99
100 /* create directory right away */
101 dentry = lookup_create(&nd, 1);
102 if (!IS_ERR(dentry)) {
103 err = vfs_mkdir(nd.path.dentry->d_inode,
104 dentry, 0755);
105 dput(dentry);
106 }
107 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
108
109 path_put(&nd.path);
110 } else if (err == -ENOENT) {
111 char *s;
112
113 /* parent directories do not exist, create them */
114 s = path;
115 while (1) {
116 s = strchr(s, '/');
117 if (!s)
118 break;
119 s[0] = '\0';
120 err = dev_mkdir(path, 0755);
121 if (err && err != -EEXIST)
122 break;
123 s[0] = '/';
124 s++;
125 }
126 }
127
128 kfree(path);
129 return err;
130}
131
132int devtmpfs_create_node(struct device *dev)
133{
134 const char *tmp = NULL;
135 const char *nodename;
136 const struct cred *curr_cred;
137 mode_t mode;
138 struct nameidata nd;
139 struct dentry *dentry;
140 int err;
141
142 if (!dev_mnt)
143 return 0;
144
145 nodename = device_get_nodename(dev, &tmp);
146 if (!nodename)
147 return -ENOMEM;
148
149 if (is_blockdev(dev))
150 mode = S_IFBLK|0600;
151 else
152 mode = S_IFCHR|0600;
153
154 curr_cred = override_creds(&init_cred);
155 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
156 nodename, LOOKUP_PARENT, &nd);
157 if (err == -ENOENT) {
158 /* create missing parent directories */
159 create_path(nodename);
160 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
161 nodename, LOOKUP_PARENT, &nd);
162 if (err)
163 goto out;
164 }
165
166 dentry = lookup_create(&nd, 0);
167 if (!IS_ERR(dentry)) {
168 err = vfs_mknod(nd.path.dentry->d_inode,
169 dentry, mode, dev->devt);
170 /* mark as kernel created inode */
171 if (!err)
172 dentry->d_inode->i_private = &dev_mnt;
173 dput(dentry);
174 } else {
175 err = PTR_ERR(dentry);
176 }
177 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
178
179 path_put(&nd.path);
180out:
181 kfree(tmp);
182 revert_creds(curr_cred);
183 return err;
184}
185
186static int dev_rmdir(const char *name)
187{
188 struct nameidata nd;
189 struct dentry *dentry;
190 int err;
191
192 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
193 name, LOOKUP_PARENT, &nd);
194 if (err)
195 return err;
196
197 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
198 dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
199 if (!IS_ERR(dentry)) {
200 if (dentry->d_inode)
201 err = vfs_rmdir(nd.path.dentry->d_inode, dentry);
202 else
203 err = -ENOENT;
204 dput(dentry);
205 } else {
206 err = PTR_ERR(dentry);
207 }
208 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
209
210 path_put(&nd.path);
211 return err;
212}
213
214static int delete_path(const char *nodepath)
215{
216 const char *path;
217 int err = 0;
218
219 path = kstrdup(nodepath, GFP_KERNEL);
220 if (!path)
221 return -ENOMEM;
222
223 while (1) {
224 char *base;
225
226 base = strrchr(path, '/');
227 if (!base)
228 break;
229 base[0] = '\0';
230 err = dev_rmdir(path);
231 if (err)
232 break;
233 }
234
235 kfree(path);
236 return err;
237}
238
239static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
240{
241 /* did we create it */
242 if (inode->i_private != &dev_mnt)
243 return 0;
244
245 /* does the dev_t match */
246 if (is_blockdev(dev)) {
247 if (!S_ISBLK(stat->mode))
248 return 0;
249 } else {
250 if (!S_ISCHR(stat->mode))
251 return 0;
252 }
253 if (stat->rdev != dev->devt)
254 return 0;
255
256 /* ours */
257 return 1;
258}
259
260int devtmpfs_delete_node(struct device *dev)
261{
262 const char *tmp = NULL;
263 const char *nodename;
264 const struct cred *curr_cred;
265 struct nameidata nd;
266 struct dentry *dentry;
267 struct kstat stat;
268 int deleted = 1;
269 int err;
270
271 if (!dev_mnt)
272 return 0;
273
274 nodename = device_get_nodename(dev, &tmp);
275 if (!nodename)
276 return -ENOMEM;
277
278 curr_cred = override_creds(&init_cred);
279 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
280 nodename, LOOKUP_PARENT, &nd);
281 if (err)
282 goto out;
283
284 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
285 dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
286 if (!IS_ERR(dentry)) {
287 if (dentry->d_inode) {
288 err = vfs_getattr(nd.path.mnt, dentry, &stat);
289 if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
290 err = vfs_unlink(nd.path.dentry->d_inode,
291 dentry);
292 if (!err || err == -ENOENT)
293 deleted = 1;
294 }
295 } else {
296 err = -ENOENT;
297 }
298 dput(dentry);
299 } else {
300 err = PTR_ERR(dentry);
301 }
302 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
303
304 path_put(&nd.path);
305 if (deleted && strchr(nodename, '/'))
306 delete_path(nodename);
307out:
308 kfree(tmp);
309 revert_creds(curr_cred);
310 return err;
311}
312
313/*
314 * If configured, or requested by the commandline, devtmpfs will be
315 * auto-mounted after the kernel mounted the root filesystem.
316 */
317int devtmpfs_mount(const char *mountpoint)
318{
319 struct path path;
320 int err;
321
322 if (!dev_mount)
323 return 0;
324
325 if (!dev_mnt)
326 return 0;
327
328 err = kern_path(mountpoint, LOOKUP_FOLLOW, &path);
329 if (err)
330 return err;
331 err = do_add_mount(dev_mnt, &path, 0, NULL);
332 if (err)
333 printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
334 else
335 printk(KERN_INFO "devtmpfs: mounted\n");
336 path_put(&path);
337 return err;
338}
339
340/*
341 * Create devtmpfs instance, driver-core devices will add their device
342 * nodes here.
343 */
344int __init devtmpfs_init(void)
345{
346 int err;
347 struct vfsmount *mnt;
348
349 err = register_filesystem(&dev_fs_type);
350 if (err) {
351 printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
352 "type %i\n", err);
353 return err;
354 }
355
356 mnt = kern_mount(&dev_fs_type);
357 if (IS_ERR(mnt)) {
358 err = PTR_ERR(mnt);
359 printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
360 unregister_filesystem(&dev_fs_type);
361 return err;
362 }
363 dev_mnt = mnt;
364
365 printk(KERN_INFO "devtmpfs: initialized\n");
366 return 0;
367}
diff --git a/drivers/base/init.c b/drivers/base/init.c
index 7bd9b6a5b01f..c8a934e79421 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -20,6 +20,7 @@
20void __init driver_init(void) 20void __init driver_init(void)
21{ 21{
22 /* These are the core pieces */ 22 /* These are the core pieces */
23 devtmpfs_init();
23 devices_init(); 24 devices_init();
24 buses_init(); 25 buses_init();
25 classes_init(); 26 classes_init();
diff --git a/include/linux/device.h b/include/linux/device.h
index 62ff53a67931..847b763e40e9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -552,6 +552,16 @@ extern void put_device(struct device *dev);
552 552
553extern void wait_for_device_probe(void); 553extern void wait_for_device_probe(void);
554 554
555#ifdef CONFIG_DEVTMPFS
556extern int devtmpfs_create_node(struct device *dev);
557extern int devtmpfs_delete_node(struct device *dev);
558extern int devtmpfs_mount(const char *mountpoint);
559#else
560static inline int devtmpfs_create_node(struct device *dev) { return 0; }
561static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
562static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
563#endif
564
555/* drivers/base/power/shutdown.c */ 565/* drivers/base/power/shutdown.c */
556extern void device_shutdown(void); 566extern void device_shutdown(void);
557 567
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 6d3f2f449ead..deee7afd8d66 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -38,6 +38,9 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
38 return container_of(inode, struct shmem_inode_info, vfs_inode); 38 return container_of(inode, struct shmem_inode_info, vfs_inode);
39} 39}
40 40
41extern int init_tmpfs(void);
42extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
43
41#ifdef CONFIG_TMPFS_POSIX_ACL 44#ifdef CONFIG_TMPFS_POSIX_ACL
42int shmem_check_acl(struct inode *, int); 45int shmem_check_acl(struct inode *, int);
43int shmem_acl_init(struct inode *, struct inode *); 46int shmem_acl_init(struct inode *, struct inode *);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 093f65915501..bb008d064c1a 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -415,7 +415,7 @@ void __init prepare_namespace(void)
415 415
416 mount_root(); 416 mount_root();
417out: 417out:
418 devtmpfs_mount("dev");
418 sys_mount(".", "/", NULL, MS_MOVE, NULL); 419 sys_mount(".", "/", NULL, MS_MOVE, NULL);
419 sys_chroot("."); 420 sys_chroot(".");
420} 421}
421
diff --git a/init/main.c b/init/main.c
index b34fd8e5edef..8e6a7846bd07 100644
--- a/init/main.c
+++ b/init/main.c
@@ -68,6 +68,7 @@
68#include <linux/async.h> 68#include <linux/async.h>
69#include <linux/kmemcheck.h> 69#include <linux/kmemcheck.h>
70#include <linux/kmemtrace.h> 70#include <linux/kmemtrace.h>
71#include <linux/shmem_fs.h>
71#include <trace/boot.h> 72#include <trace/boot.h>
72 73
73#include <asm/io.h> 74#include <asm/io.h>
@@ -809,6 +810,7 @@ static void __init do_basic_setup(void)
809 init_workqueues(); 810 init_workqueues();
810 cpuset_init_smp(); 811 cpuset_init_smp();
811 usermodehelper_init(); 812 usermodehelper_init();
813 init_tmpfs();
812 driver_init(); 814 driver_init();
813 init_irq_proc(); 815 init_irq_proc();
814 do_ctors(); 816 do_ctors();
diff --git a/mm/shmem.c b/mm/shmem.c
index 5a0b3d4055f3..bd20f8bb02aa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2298,8 +2298,7 @@ static void shmem_put_super(struct super_block *sb)
2298 sb->s_fs_info = NULL; 2298 sb->s_fs_info = NULL;
2299} 2299}
2300 2300
2301static int shmem_fill_super(struct super_block *sb, 2301int shmem_fill_super(struct super_block *sb, void *data, int silent)
2302 void *data, int silent)
2303{ 2302{
2304 struct inode *inode; 2303 struct inode *inode;
2305 struct dentry *root; 2304 struct dentry *root;
@@ -2519,7 +2518,7 @@ static struct file_system_type tmpfs_fs_type = {
2519 .kill_sb = kill_litter_super, 2518 .kill_sb = kill_litter_super,
2520}; 2519};
2521 2520
2522static int __init init_tmpfs(void) 2521int __init init_tmpfs(void)
2523{ 2522{
2524 int error; 2523 int error;
2525 2524
@@ -2576,7 +2575,7 @@ static struct file_system_type tmpfs_fs_type = {
2576 .kill_sb = kill_litter_super, 2575 .kill_sb = kill_litter_super,
2577}; 2576};
2578 2577
2579static int __init init_tmpfs(void) 2578int __init init_tmpfs(void)
2580{ 2579{
2581 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); 2580 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
2582 2581
@@ -2687,5 +2686,3 @@ int shmem_zero_setup(struct vm_area_struct *vma)
2687 vma->vm_ops = &shmem_vm_ops; 2686 vma->vm_ops = &shmem_vm_ops;
2688 return 0; 2687 return 0;
2689} 2688}
2690
2691module_init(init_tmpfs)