diff options
| -rw-r--r-- | Documentation/filesystems/nfs/00-INDEX | 2 | ||||
| -rw-r--r-- | Documentation/filesystems/nfs/pnfs.txt | 48 | ||||
| -rw-r--r-- | fs/nfs/Kconfig | 8 | ||||
| -rw-r--r-- | fs/nfs/pnfs.c | 88 | ||||
| -rw-r--r-- | fs/nfs/pnfs.h | 9 |
5 files changed, 151 insertions, 4 deletions
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index 3225a5662114..a57e12411d2a 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX | |||
| @@ -12,6 +12,8 @@ nfs-rdma.txt | |||
| 12 | - how to install and setup the Linux NFS/RDMA client and server software | 12 | - how to install and setup the Linux NFS/RDMA client and server software |
| 13 | nfsroot.txt | 13 | nfsroot.txt |
| 14 | - short guide on setting up a diskless box with NFS root filesystem. | 14 | - short guide on setting up a diskless box with NFS root filesystem. |
| 15 | pnfs.txt | ||
| 16 | - short explanation of some of the internals of the pnfs client code | ||
| 15 | rpc-cache.txt | 17 | rpc-cache.txt |
| 16 | - introduction to the caching mechanisms in the sunrpc layer. | 18 | - introduction to the caching mechanisms in the sunrpc layer. |
| 17 | idmapper.txt | 19 | idmapper.txt |
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt new file mode 100644 index 000000000000..bc0b9cfe095b --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs.txt | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | Reference counting in pnfs: | ||
| 2 | ========================== | ||
| 3 | |||
| 4 | The are several inter-related caches. We have layouts which can | ||
| 5 | reference multiple devices, each of which can reference multiple data servers. | ||
| 6 | Each data server can be referenced by multiple devices. Each device | ||
| 7 | can be referenced by multiple layouts. To keep all of this straight, | ||
| 8 | we need to reference count. | ||
| 9 | |||
| 10 | |||
| 11 | struct pnfs_layout_hdr | ||
| 12 | ---------------------- | ||
| 13 | The on-the-wire command LAYOUTGET corresponds to struct | ||
| 14 | pnfs_layout_segment, usually referred to by the variable name lseg. | ||
| 15 | Each nfs_inode may hold a pointer to a cache of of these layout | ||
| 16 | segments in nfsi->layout, of type struct pnfs_layout_hdr. | ||
| 17 | |||
| 18 | We reference the header for the inode pointing to it, across each | ||
| 19 | outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN, | ||
| 20 | LAYOUTCOMMIT), and for each lseg held within. | ||
| 21 | |||
| 22 | Each header is also (when non-empty) put on a list associated with | ||
| 23 | struct nfs_client (cl_layouts). Being put on this list does not bump | ||
| 24 | the reference count, as the layout is kept around by the lseg that | ||
| 25 | keeps it in the list. | ||
| 26 | |||
| 27 | deviceid_cache | ||
| 28 | -------------- | ||
| 29 | lsegs reference device ids, which are resolved per nfs_client and | ||
| 30 | layout driver type. The device ids are held in a RCU cache (struct | ||
| 31 | nfs4_deviceid_cache). The cache itself is referenced across each | ||
| 32 | mount. The entries (struct nfs4_deviceid) themselves are held across | ||
| 33 | the lifetime of each lseg referencing them. | ||
| 34 | |||
| 35 | RCU is used because the deviceid is basically a write once, read many | ||
| 36 | data structure. The hlist size of 32 buckets needs better | ||
| 37 | justification, but seems reasonable given that we can have multiple | ||
| 38 | deviceid's per filesystem, and multiple filesystems per nfs_client. | ||
| 39 | |||
| 40 | The hash code is copied from the nfsd code base. A discussion of | ||
| 41 | hashing and variations of this algorithm can be found at: | ||
| 42 | http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809 | ||
| 43 | |||
| 44 | data server cache | ||
| 45 | ----------------- | ||
| 46 | file driver devices refer to data servers, which are kept in a module | ||
| 47 | level cache. Its reference is held over the lifetime of the deviceid | ||
| 48 | pointing to it. | ||
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 3f69752d6f18..d94311943ec6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
| @@ -75,13 +75,17 @@ config NFS_V4 | |||
| 75 | 75 | ||
| 76 | config NFS_V4_1 | 76 | config NFS_V4_1 |
| 77 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 77 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
| 78 | depends on NFS_V4 && EXPERIMENTAL | 78 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL |
| 79 | select PNFS_FILE_LAYOUT | ||
| 79 | help | 80 | help |
| 80 | This option enables support for minor version 1 of the NFSv4 protocol | 81 | This option enables support for minor version 1 of the NFSv4 protocol |
| 81 | (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. | 82 | (RFC 5661) in the kernel's NFS client. |
| 82 | 83 | ||
| 83 | If unsure, say N. | 84 | If unsure, say N. |
| 84 | 85 | ||
| 86 | config PNFS_FILE_LAYOUT | ||
| 87 | tristate | ||
| 88 | |||
| 85 | config ROOT_NFS | 89 | config ROOT_NFS |
| 86 | bool "Root file system on NFS" | 90 | bool "Root file system on NFS" |
| 87 | depends on NFS_FS=y && IP_PNP | 91 | depends on NFS_FS=y && IP_PNP |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b483026e82aa..cf795625610e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -32,16 +32,51 @@ | |||
| 32 | 32 | ||
| 33 | #define NFSDBG_FACILITY NFSDBG_PNFS | 33 | #define NFSDBG_FACILITY NFSDBG_PNFS |
| 34 | 34 | ||
| 35 | /* STUB that returns the equivalent of "no module found" */ | 35 | /* Locking: |
| 36 | * | ||
| 37 | * pnfs_spinlock: | ||
| 38 | * protects pnfs_modules_tbl. | ||
| 39 | */ | ||
| 40 | static DEFINE_SPINLOCK(pnfs_spinlock); | ||
| 41 | |||
| 42 | /* | ||
| 43 | * pnfs_modules_tbl holds all pnfs modules | ||
| 44 | */ | ||
| 45 | static LIST_HEAD(pnfs_modules_tbl); | ||
| 46 | |||
| 47 | /* Return the registered pnfs layout driver module matching given id */ | ||
| 48 | static struct pnfs_layoutdriver_type * | ||
| 49 | find_pnfs_driver_locked(u32 id) | ||
| 50 | { | ||
| 51 | struct pnfs_layoutdriver_type *local; | ||
| 52 | |||
| 53 | list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) | ||
| 54 | if (local->id == id) | ||
| 55 | goto out; | ||
| 56 | local = NULL; | ||
| 57 | out: | ||
| 58 | dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); | ||
| 59 | return local; | ||
| 60 | } | ||
| 61 | |||
| 36 | static struct pnfs_layoutdriver_type * | 62 | static struct pnfs_layoutdriver_type * |
| 37 | find_pnfs_driver(u32 id) | 63 | find_pnfs_driver(u32 id) |
| 38 | { | 64 | { |
| 39 | return NULL; | 65 | struct pnfs_layoutdriver_type *local; |
| 66 | |||
| 67 | spin_lock(&pnfs_spinlock); | ||
| 68 | local = find_pnfs_driver_locked(id); | ||
| 69 | spin_unlock(&pnfs_spinlock); | ||
| 70 | return local; | ||
| 40 | } | 71 | } |
| 41 | 72 | ||
| 42 | void | 73 | void |
| 43 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | 74 | unset_pnfs_layoutdriver(struct nfs_server *nfss) |
| 44 | { | 75 | { |
| 76 | if (nfss->pnfs_curr_ld) { | ||
| 77 | nfss->pnfs_curr_ld->uninitialize_mountpoint(nfss); | ||
| 78 | module_put(nfss->pnfs_curr_ld->owner); | ||
| 79 | } | ||
| 45 | nfss->pnfs_curr_ld = NULL; | 80 | nfss->pnfs_curr_ld = NULL; |
| 46 | } | 81 | } |
| 47 | 82 | ||
| @@ -74,7 +109,18 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | |||
| 74 | goto out_no_driver; | 109 | goto out_no_driver; |
| 75 | } | 110 | } |
| 76 | } | 111 | } |
| 112 | if (!try_module_get(ld_type->owner)) { | ||
| 113 | dprintk("%s: Could not grab reference on module\n", __func__); | ||
| 114 | goto out_no_driver; | ||
| 115 | } | ||
| 77 | server->pnfs_curr_ld = ld_type; | 116 | server->pnfs_curr_ld = ld_type; |
| 117 | if (ld_type->initialize_mountpoint(server)) { | ||
| 118 | printk(KERN_ERR | ||
| 119 | "%s: Error initializing mount point for layout driver %u.\n", | ||
| 120 | __func__, id); | ||
| 121 | module_put(ld_type->owner); | ||
| 122 | goto out_no_driver; | ||
| 123 | } | ||
| 78 | dprintk("%s: pNFS module for %u set\n", __func__, id); | 124 | dprintk("%s: pNFS module for %u set\n", __func__, id); |
| 79 | return; | 125 | return; |
| 80 | 126 | ||
| @@ -82,3 +128,41 @@ out_no_driver: | |||
| 82 | dprintk("%s: Using NFSv4 I/O\n", __func__); | 128 | dprintk("%s: Using NFSv4 I/O\n", __func__); |
| 83 | server->pnfs_curr_ld = NULL; | 129 | server->pnfs_curr_ld = NULL; |
| 84 | } | 130 | } |
| 131 | |||
| 132 | int | ||
| 133 | pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
| 134 | { | ||
| 135 | int status = -EINVAL; | ||
| 136 | struct pnfs_layoutdriver_type *tmp; | ||
| 137 | |||
| 138 | if (ld_type->id == 0) { | ||
| 139 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); | ||
| 140 | return status; | ||
| 141 | } | ||
| 142 | |||
| 143 | spin_lock(&pnfs_spinlock); | ||
| 144 | tmp = find_pnfs_driver_locked(ld_type->id); | ||
| 145 | if (!tmp) { | ||
| 146 | list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); | ||
| 147 | status = 0; | ||
| 148 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, | ||
| 149 | ld_type->name); | ||
| 150 | } else { | ||
| 151 | printk(KERN_ERR "%s Module with id %d already loaded!\n", | ||
| 152 | __func__, ld_type->id); | ||
| 153 | } | ||
| 154 | spin_unlock(&pnfs_spinlock); | ||
| 155 | |||
| 156 | return status; | ||
| 157 | } | ||
| 158 | EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); | ||
| 159 | |||
| 160 | void | ||
| 161 | pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
| 162 | { | ||
| 163 | dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); | ||
| 164 | spin_lock(&pnfs_spinlock); | ||
| 165 | list_del(&ld_type->pnfs_tblid); | ||
| 166 | spin_unlock(&pnfs_spinlock); | ||
| 167 | } | ||
| 168 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c628ef131d83..61531f338576 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -36,8 +36,17 @@ | |||
| 36 | 36 | ||
| 37 | /* Per-layout driver specific registration structure */ | 37 | /* Per-layout driver specific registration structure */ |
| 38 | struct pnfs_layoutdriver_type { | 38 | struct pnfs_layoutdriver_type { |
| 39 | struct list_head pnfs_tblid; | ||
| 40 | const u32 id; | ||
| 41 | const char *name; | ||
| 42 | struct module *owner; | ||
| 43 | int (*initialize_mountpoint) (struct nfs_server *); | ||
| 44 | int (*uninitialize_mountpoint) (struct nfs_server *); | ||
| 39 | }; | 45 | }; |
| 40 | 46 | ||
| 47 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | ||
| 48 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | ||
| 49 | |||
| 41 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 50 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); |
| 42 | void unset_pnfs_layoutdriver(struct nfs_server *); | 51 | void unset_pnfs_layoutdriver(struct nfs_server *); |
| 43 | 52 | ||
