diff options
author | Fred Isaman <iisaman@netapp.com> | 2010-10-20 00:17:59 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2010-10-24 18:07:10 -0400 |
commit | 02c35fca7cf4ea2dfdc6db279e230cacbbf4b870 (patch) | |
tree | 7449a8a3fb119d862f6c163c05cf43d58443c377 | |
parent | 85e174ba6b786ad336eb2df105b4f66d0932e70a (diff) |
NFSv4.1: pnfs: full mount/umount infrastructure
Allow a module implementing a layout type to register, and
have its mount/umount routines called for filesystems that
the server declares support it.
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Bian Naimeng <biannm@cn.fujitsu.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | Documentation/filesystems/nfs/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/pnfs.txt | 48 | ||||
-rw-r--r-- | fs/nfs/Kconfig | 8 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 88 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 9 |
5 files changed, 151 insertions, 4 deletions
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index 3225a5662114..a57e12411d2a 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX | |||
@@ -12,6 +12,8 @@ nfs-rdma.txt | |||
12 | - how to install and setup the Linux NFS/RDMA client and server software | 12 | - how to install and setup the Linux NFS/RDMA client and server software |
13 | nfsroot.txt | 13 | nfsroot.txt |
14 | - short guide on setting up a diskless box with NFS root filesystem. | 14 | - short guide on setting up a diskless box with NFS root filesystem. |
15 | pnfs.txt | ||
16 | - short explanation of some of the internals of the pnfs client code | ||
15 | rpc-cache.txt | 17 | rpc-cache.txt |
16 | - introduction to the caching mechanisms in the sunrpc layer. | 18 | - introduction to the caching mechanisms in the sunrpc layer. |
17 | idmapper.txt | 19 | idmapper.txt |
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt new file mode 100644 index 000000000000..bc0b9cfe095b --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs.txt | |||
@@ -0,0 +1,48 @@ | |||
1 | Reference counting in pnfs: | ||
2 | ========================== | ||
3 | |||
4 | The are several inter-related caches. We have layouts which can | ||
5 | reference multiple devices, each of which can reference multiple data servers. | ||
6 | Each data server can be referenced by multiple devices. Each device | ||
7 | can be referenced by multiple layouts. To keep all of this straight, | ||
8 | we need to reference count. | ||
9 | |||
10 | |||
11 | struct pnfs_layout_hdr | ||
12 | ---------------------- | ||
13 | The on-the-wire command LAYOUTGET corresponds to struct | ||
14 | pnfs_layout_segment, usually referred to by the variable name lseg. | ||
15 | Each nfs_inode may hold a pointer to a cache of of these layout | ||
16 | segments in nfsi->layout, of type struct pnfs_layout_hdr. | ||
17 | |||
18 | We reference the header for the inode pointing to it, across each | ||
19 | outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN, | ||
20 | LAYOUTCOMMIT), and for each lseg held within. | ||
21 | |||
22 | Each header is also (when non-empty) put on a list associated with | ||
23 | struct nfs_client (cl_layouts). Being put on this list does not bump | ||
24 | the reference count, as the layout is kept around by the lseg that | ||
25 | keeps it in the list. | ||
26 | |||
27 | deviceid_cache | ||
28 | -------------- | ||
29 | lsegs reference device ids, which are resolved per nfs_client and | ||
30 | layout driver type. The device ids are held in a RCU cache (struct | ||
31 | nfs4_deviceid_cache). The cache itself is referenced across each | ||
32 | mount. The entries (struct nfs4_deviceid) themselves are held across | ||
33 | the lifetime of each lseg referencing them. | ||
34 | |||
35 | RCU is used because the deviceid is basically a write once, read many | ||
36 | data structure. The hlist size of 32 buckets needs better | ||
37 | justification, but seems reasonable given that we can have multiple | ||
38 | deviceid's per filesystem, and multiple filesystems per nfs_client. | ||
39 | |||
40 | The hash code is copied from the nfsd code base. A discussion of | ||
41 | hashing and variations of this algorithm can be found at: | ||
42 | http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809 | ||
43 | |||
44 | data server cache | ||
45 | ----------------- | ||
46 | file driver devices refer to data servers, which are kept in a module | ||
47 | level cache. Its reference is held over the lifetime of the deviceid | ||
48 | pointing to it. | ||
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 3f69752d6f18..d94311943ec6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -75,13 +75,17 @@ config NFS_V4 | |||
75 | 75 | ||
76 | config NFS_V4_1 | 76 | config NFS_V4_1 |
77 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 77 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
78 | depends on NFS_V4 && EXPERIMENTAL | 78 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL |
79 | select PNFS_FILE_LAYOUT | ||
79 | help | 80 | help |
80 | This option enables support for minor version 1 of the NFSv4 protocol | 81 | This option enables support for minor version 1 of the NFSv4 protocol |
81 | (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. | 82 | (RFC 5661) in the kernel's NFS client. |
82 | 83 | ||
83 | If unsure, say N. | 84 | If unsure, say N. |
84 | 85 | ||
86 | config PNFS_FILE_LAYOUT | ||
87 | tristate | ||
88 | |||
85 | config ROOT_NFS | 89 | config ROOT_NFS |
86 | bool "Root file system on NFS" | 90 | bool "Root file system on NFS" |
87 | depends on NFS_FS=y && IP_PNP | 91 | depends on NFS_FS=y && IP_PNP |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b483026e82aa..cf795625610e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -32,16 +32,51 @@ | |||
32 | 32 | ||
33 | #define NFSDBG_FACILITY NFSDBG_PNFS | 33 | #define NFSDBG_FACILITY NFSDBG_PNFS |
34 | 34 | ||
35 | /* STUB that returns the equivalent of "no module found" */ | 35 | /* Locking: |
36 | * | ||
37 | * pnfs_spinlock: | ||
38 | * protects pnfs_modules_tbl. | ||
39 | */ | ||
40 | static DEFINE_SPINLOCK(pnfs_spinlock); | ||
41 | |||
42 | /* | ||
43 | * pnfs_modules_tbl holds all pnfs modules | ||
44 | */ | ||
45 | static LIST_HEAD(pnfs_modules_tbl); | ||
46 | |||
47 | /* Return the registered pnfs layout driver module matching given id */ | ||
48 | static struct pnfs_layoutdriver_type * | ||
49 | find_pnfs_driver_locked(u32 id) | ||
50 | { | ||
51 | struct pnfs_layoutdriver_type *local; | ||
52 | |||
53 | list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) | ||
54 | if (local->id == id) | ||
55 | goto out; | ||
56 | local = NULL; | ||
57 | out: | ||
58 | dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); | ||
59 | return local; | ||
60 | } | ||
61 | |||
36 | static struct pnfs_layoutdriver_type * | 62 | static struct pnfs_layoutdriver_type * |
37 | find_pnfs_driver(u32 id) | 63 | find_pnfs_driver(u32 id) |
38 | { | 64 | { |
39 | return NULL; | 65 | struct pnfs_layoutdriver_type *local; |
66 | |||
67 | spin_lock(&pnfs_spinlock); | ||
68 | local = find_pnfs_driver_locked(id); | ||
69 | spin_unlock(&pnfs_spinlock); | ||
70 | return local; | ||
40 | } | 71 | } |
41 | 72 | ||
42 | void | 73 | void |
43 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | 74 | unset_pnfs_layoutdriver(struct nfs_server *nfss) |
44 | { | 75 | { |
76 | if (nfss->pnfs_curr_ld) { | ||
77 | nfss->pnfs_curr_ld->uninitialize_mountpoint(nfss); | ||
78 | module_put(nfss->pnfs_curr_ld->owner); | ||
79 | } | ||
45 | nfss->pnfs_curr_ld = NULL; | 80 | nfss->pnfs_curr_ld = NULL; |
46 | } | 81 | } |
47 | 82 | ||
@@ -74,7 +109,18 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | |||
74 | goto out_no_driver; | 109 | goto out_no_driver; |
75 | } | 110 | } |
76 | } | 111 | } |
112 | if (!try_module_get(ld_type->owner)) { | ||
113 | dprintk("%s: Could not grab reference on module\n", __func__); | ||
114 | goto out_no_driver; | ||
115 | } | ||
77 | server->pnfs_curr_ld = ld_type; | 116 | server->pnfs_curr_ld = ld_type; |
117 | if (ld_type->initialize_mountpoint(server)) { | ||
118 | printk(KERN_ERR | ||
119 | "%s: Error initializing mount point for layout driver %u.\n", | ||
120 | __func__, id); | ||
121 | module_put(ld_type->owner); | ||
122 | goto out_no_driver; | ||
123 | } | ||
78 | dprintk("%s: pNFS module for %u set\n", __func__, id); | 124 | dprintk("%s: pNFS module for %u set\n", __func__, id); |
79 | return; | 125 | return; |
80 | 126 | ||
@@ -82,3 +128,41 @@ out_no_driver: | |||
82 | dprintk("%s: Using NFSv4 I/O\n", __func__); | 128 | dprintk("%s: Using NFSv4 I/O\n", __func__); |
83 | server->pnfs_curr_ld = NULL; | 129 | server->pnfs_curr_ld = NULL; |
84 | } | 130 | } |
131 | |||
132 | int | ||
133 | pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
134 | { | ||
135 | int status = -EINVAL; | ||
136 | struct pnfs_layoutdriver_type *tmp; | ||
137 | |||
138 | if (ld_type->id == 0) { | ||
139 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); | ||
140 | return status; | ||
141 | } | ||
142 | |||
143 | spin_lock(&pnfs_spinlock); | ||
144 | tmp = find_pnfs_driver_locked(ld_type->id); | ||
145 | if (!tmp) { | ||
146 | list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); | ||
147 | status = 0; | ||
148 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, | ||
149 | ld_type->name); | ||
150 | } else { | ||
151 | printk(KERN_ERR "%s Module with id %d already loaded!\n", | ||
152 | __func__, ld_type->id); | ||
153 | } | ||
154 | spin_unlock(&pnfs_spinlock); | ||
155 | |||
156 | return status; | ||
157 | } | ||
158 | EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); | ||
159 | |||
160 | void | ||
161 | pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
162 | { | ||
163 | dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); | ||
164 | spin_lock(&pnfs_spinlock); | ||
165 | list_del(&ld_type->pnfs_tblid); | ||
166 | spin_unlock(&pnfs_spinlock); | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c628ef131d83..61531f338576 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -36,8 +36,17 @@ | |||
36 | 36 | ||
37 | /* Per-layout driver specific registration structure */ | 37 | /* Per-layout driver specific registration structure */ |
38 | struct pnfs_layoutdriver_type { | 38 | struct pnfs_layoutdriver_type { |
39 | struct list_head pnfs_tblid; | ||
40 | const u32 id; | ||
41 | const char *name; | ||
42 | struct module *owner; | ||
43 | int (*initialize_mountpoint) (struct nfs_server *); | ||
44 | int (*uninitialize_mountpoint) (struct nfs_server *); | ||
39 | }; | 45 | }; |
40 | 46 | ||
47 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | ||
48 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | ||
49 | |||
41 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 50 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); |
42 | void unset_pnfs_layoutdriver(struct nfs_server *); | 51 | void unset_pnfs_layoutdriver(struct nfs_server *); |
43 | 52 | ||