diff options
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/Kconfig | 8 | ||||
-rw-r--r-- | fs/nfs/Makefile | 4 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 8 | ||||
-rw-r--r-- | fs/nfs/client.c | 10 | ||||
-rw-r--r-- | fs/nfs/file.c | 5 | ||||
-rw-r--r-- | fs/nfs/inode.c | 3 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.c | 280 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.h | 94 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayoutdev.c | 448 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 218 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 360 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 783 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 189 | ||||
-rw-r--r-- | fs/nfs/read.c | 3 |
15 files changed, 2374 insertions, 41 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 5c55c26af165..fd667652c502 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -77,13 +77,17 @@ config NFS_V4 | |||
77 | 77 | ||
78 | config NFS_V4_1 | 78 | config NFS_V4_1 |
79 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 79 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
80 | depends on NFS_V4 && EXPERIMENTAL | 80 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL |
81 | select PNFS_FILE_LAYOUT | ||
81 | help | 82 | help |
82 | This option enables support for minor version 1 of the NFSv4 protocol | 83 | This option enables support for minor version 1 of the NFSv4 protocol |
83 | (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. | 84 | (RFC 5661) in the kernel's NFS client. |
84 | 85 | ||
85 | If unsure, say N. | 86 | If unsure, say N. |
86 | 87 | ||
88 | config PNFS_FILE_LAYOUT | ||
89 | tristate | ||
90 | |||
87 | config ROOT_NFS | 91 | config ROOT_NFS |
88 | bool "Root file system on NFS" | 92 | bool "Root file system on NFS" |
89 | depends on NFS_FS=y && IP_PNP | 93 | depends on NFS_FS=y && IP_PNP |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index da7fda639eac..4776ff9e3814 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ | |||
15 | delegation.o idmap.o \ | 15 | delegation.o idmap.o \ |
16 | callback.o callback_xdr.o callback_proc.o \ | 16 | callback.o callback_xdr.o callback_proc.o \ |
17 | nfs4namespace.o | 17 | nfs4namespace.o |
18 | nfs-$(CONFIG_NFS_V4_1) += pnfs.o | ||
18 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 19 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
19 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | 20 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o |
21 | |||
22 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | ||
23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 930d10fecdaf..2950fca0c61b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n | |||
118 | if (delegation == NULL) | 118 | if (delegation == NULL) |
119 | return 0; | 119 | return 0; |
120 | 120 | ||
121 | /* seqid is 4-bytes long */ | 121 | if (stateid->stateid.seqid != 0) |
122 | if (((u32 *) &stateid->data)[0] != 0) | ||
123 | return 0; | 122 | return 0; |
124 | if (memcmp(&delegation->stateid.data[4], &stateid->data[4], | 123 | if (memcmp(&delegation->stateid.stateid.other, |
125 | sizeof(stateid->data)-4)) | 124 | &stateid->stateid.other, |
125 | NFS4_STATEID_OTHER_SIZE)) | ||
126 | return 0; | 126 | return 0; |
127 | 127 | ||
128 | return 1; | 128 | return 1; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a882785eba41..fd6f0a70021b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "iostat.h" | 48 | #include "iostat.h" |
49 | #include "internal.h" | 49 | #include "internal.h" |
50 | #include "fscache.h" | 50 | #include "fscache.h" |
51 | #include "pnfs.h" | ||
51 | 52 | ||
52 | #define NFSDBG_FACILITY NFSDBG_CLIENT | 53 | #define NFSDBG_FACILITY NFSDBG_CLIENT |
53 | 54 | ||
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
155 | cred = rpc_lookup_machine_cred(); | 156 | cred = rpc_lookup_machine_cred(); |
156 | if (!IS_ERR(cred)) | 157 | if (!IS_ERR(cred)) |
157 | clp->cl_machine_cred = cred; | 158 | clp->cl_machine_cred = cred; |
158 | 159 | #if defined(CONFIG_NFS_V4_1) | |
160 | INIT_LIST_HEAD(&clp->cl_layouts); | ||
161 | #endif | ||
159 | nfs_fscache_get_client_cookie(clp); | 162 | nfs_fscache_get_client_cookie(clp); |
160 | 163 | ||
161 | return clp; | 164 | return clp; |
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp) | |||
252 | nfs_free_client(clp); | 255 | nfs_free_client(clp); |
253 | } | 256 | } |
254 | } | 257 | } |
258 | EXPORT_SYMBOL_GPL(nfs_put_client); | ||
255 | 259 | ||
256 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 260 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
257 | /* | 261 | /* |
@@ -900,6 +904,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * | |||
900 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) | 904 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) |
901 | server->wsize = NFS_MAX_FILE_IO_SIZE; | 905 | server->wsize = NFS_MAX_FILE_IO_SIZE; |
902 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 906 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
907 | set_pnfs_layoutdriver(server, fsinfo->layouttype); | ||
908 | |||
903 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); | 909 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); |
904 | 910 | ||
905 | server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); | 911 | server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); |
@@ -939,6 +945,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str | |||
939 | } | 945 | } |
940 | 946 | ||
941 | fsinfo.fattr = fattr; | 947 | fsinfo.fattr = fattr; |
948 | fsinfo.layouttype = 0; | ||
942 | error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); | 949 | error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); |
943 | if (error < 0) | 950 | if (error < 0) |
944 | goto out_error; | 951 | goto out_error; |
@@ -1021,6 +1028,7 @@ void nfs_free_server(struct nfs_server *server) | |||
1021 | { | 1028 | { |
1022 | dprintk("--> nfs_free_server()\n"); | 1029 | dprintk("--> nfs_free_server()\n"); |
1023 | 1030 | ||
1031 | unset_pnfs_layoutdriver(server); | ||
1024 | spin_lock(&nfs_client_lock); | 1032 | spin_lock(&nfs_client_lock); |
1025 | list_del(&server->client_link); | 1033 | list_del(&server->client_link); |
1026 | list_del(&server->master_link); | 1034 | list_del(&server->master_link); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e18c31e08a28..e756075637b0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | #include "iostat.h" | 37 | #include "iostat.h" |
38 | #include "fscache.h" | 38 | #include "fscache.h" |
39 | #include "pnfs.h" | ||
39 | 40 | ||
40 | #define NFSDBG_FACILITY NFSDBG_FILE | 41 | #define NFSDBG_FACILITY NFSDBG_FILE |
41 | 42 | ||
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, | |||
386 | file->f_path.dentry->d_name.name, | 387 | file->f_path.dentry->d_name.name, |
387 | mapping->host->i_ino, len, (long long) pos); | 388 | mapping->host->i_ino, len, (long long) pos); |
388 | 389 | ||
390 | pnfs_update_layout(mapping->host, | ||
391 | nfs_file_open_context(file), | ||
392 | IOMODE_RW); | ||
393 | |||
389 | start: | 394 | start: |
390 | /* | 395 | /* |
391 | * Prevent starvation issues if someone is doing a consistency | 396 | * Prevent starvation issues if someone is doing a consistency |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6eec28656415..314f57164602 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "internal.h" | 48 | #include "internal.h" |
49 | #include "fscache.h" | 49 | #include "fscache.h" |
50 | #include "dns_resolve.h" | 50 | #include "dns_resolve.h" |
51 | #include "pnfs.h" | ||
51 | 52 | ||
52 | #define NFSDBG_FACILITY NFSDBG_VFS | 53 | #define NFSDBG_FACILITY NFSDBG_VFS |
53 | 54 | ||
@@ -1410,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode) | |||
1410 | { | 1411 | { |
1411 | truncate_inode_pages(&inode->i_data, 0); | 1412 | truncate_inode_pages(&inode->i_data, 0); |
1412 | end_writeback(inode); | 1413 | end_writeback(inode); |
1414 | pnfs_destroy_layout(NFS_I(inode)); | ||
1413 | /* If we are holding a delegation, return it! */ | 1415 | /* If we are holding a delegation, return it! */ |
1414 | nfs_inode_return_delegation_noreclaim(inode); | 1416 | nfs_inode_return_delegation_noreclaim(inode); |
1415 | /* First call standard NFS clear_inode() code */ | 1417 | /* First call standard NFS clear_inode() code */ |
@@ -1447,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) | |||
1447 | nfsi->delegation = NULL; | 1449 | nfsi->delegation = NULL; |
1448 | nfsi->delegation_state = 0; | 1450 | nfsi->delegation_state = 0; |
1449 | init_rwsem(&nfsi->rwsem); | 1451 | init_rwsem(&nfsi->rwsem); |
1452 | nfsi->layout = NULL; | ||
1450 | #endif | 1453 | #endif |
1451 | } | 1454 | } |
1452 | 1455 | ||
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c new file mode 100644 index 000000000000..2e92f0d8d654 --- /dev/null +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -0,0 +1,280 @@ | |||
1 | /* | ||
2 | * Module for the pnfs nfs4 file layout driver. | ||
3 | * Defines all I/O and Policy interface operations, plus code | ||
4 | * to register itself with the pNFS client. | ||
5 | * | ||
6 | * Copyright (c) 2002 | ||
7 | * The Regents of the University of Michigan | ||
8 | * All Rights Reserved | ||
9 | * | ||
10 | * Dean Hildebrand <dhildebz@umich.edu> | ||
11 | * | ||
12 | * Permission is granted to use, copy, create derivative works, and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the University of Michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. If | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * University of Michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * This software is provided as is, without representation or warranty | ||
22 | * of any kind either express or implied, including without limitation | ||
23 | * the implied warranties of merchantability, fitness for a particular | ||
24 | * purpose, or noninfringement. The Regents of the University of | ||
25 | * Michigan shall not be liable for any damages, including special, | ||
26 | * indirect, incidental, or consequential damages, with respect to any | ||
27 | * claim arising out of or in connection with the use of the software, | ||
28 | * even if it has been or is hereafter advised of the possibility of | ||
29 | * such damages. | ||
30 | */ | ||
31 | |||
32 | #include <linux/nfs_fs.h> | ||
33 | |||
34 | #include "internal.h" | ||
35 | #include "nfs4filelayout.h" | ||
36 | |||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
38 | |||
39 | MODULE_LICENSE("GPL"); | ||
40 | MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); | ||
41 | MODULE_DESCRIPTION("The NFSv4 file layout driver"); | ||
42 | |||
43 | static int | ||
44 | filelayout_set_layoutdriver(struct nfs_server *nfss) | ||
45 | { | ||
46 | int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, | ||
47 | nfs4_fl_free_deviceid_callback); | ||
48 | if (status) { | ||
49 | printk(KERN_WARNING "%s: deviceid cache could not be " | ||
50 | "initialized\n", __func__); | ||
51 | return status; | ||
52 | } | ||
53 | dprintk("%s: deviceid cache has been initialized successfully\n", | ||
54 | __func__); | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | /* Clear out the layout by destroying its device list */ | ||
59 | static int | ||
60 | filelayout_clear_layoutdriver(struct nfs_server *nfss) | ||
61 | { | ||
62 | dprintk("--> %s\n", __func__); | ||
63 | |||
64 | if (nfss->nfs_client->cl_devid_cache) | ||
65 | pnfs_put_deviceid_cache(nfss->nfs_client); | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * filelayout_check_layout() | ||
71 | * | ||
72 | * Make sure layout segment parameters are sane WRT the device. | ||
73 | * At this point no generic layer initialization of the lseg has occurred, | ||
74 | * and nothing has been added to the layout_hdr cache. | ||
75 | * | ||
76 | */ | ||
77 | static int | ||
78 | filelayout_check_layout(struct pnfs_layout_hdr *lo, | ||
79 | struct nfs4_filelayout_segment *fl, | ||
80 | struct nfs4_layoutget_res *lgr, | ||
81 | struct nfs4_deviceid *id) | ||
82 | { | ||
83 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
84 | int status = -EINVAL; | ||
85 | struct nfs_server *nfss = NFS_SERVER(lo->inode); | ||
86 | |||
87 | dprintk("--> %s\n", __func__); | ||
88 | |||
89 | if (fl->pattern_offset > lgr->range.offset) { | ||
90 | dprintk("%s pattern_offset %lld to large\n", | ||
91 | __func__, fl->pattern_offset); | ||
92 | goto out; | ||
93 | } | ||
94 | |||
95 | if (fl->stripe_unit % PAGE_SIZE) { | ||
96 | dprintk("%s Stripe unit (%u) not page aligned\n", | ||
97 | __func__, fl->stripe_unit); | ||
98 | goto out; | ||
99 | } | ||
100 | |||
101 | /* find and reference the deviceid */ | ||
102 | dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); | ||
103 | if (dsaddr == NULL) { | ||
104 | dsaddr = get_device_info(lo->inode, id); | ||
105 | if (dsaddr == NULL) | ||
106 | goto out; | ||
107 | } | ||
108 | fl->dsaddr = dsaddr; | ||
109 | |||
110 | if (fl->first_stripe_index < 0 || | ||
111 | fl->first_stripe_index >= dsaddr->stripe_count) { | ||
112 | dprintk("%s Bad first_stripe_index %d\n", | ||
113 | __func__, fl->first_stripe_index); | ||
114 | goto out_put; | ||
115 | } | ||
116 | |||
117 | if ((fl->stripe_type == STRIPE_SPARSE && | ||
118 | fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || | ||
119 | (fl->stripe_type == STRIPE_DENSE && | ||
120 | fl->num_fh != dsaddr->stripe_count)) { | ||
121 | dprintk("%s num_fh %u not valid for given packing\n", | ||
122 | __func__, fl->num_fh); | ||
123 | goto out_put; | ||
124 | } | ||
125 | |||
126 | if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { | ||
127 | dprintk("%s Stripe unit (%u) not aligned with rsize %u " | ||
128 | "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, | ||
129 | nfss->wsize); | ||
130 | } | ||
131 | |||
132 | status = 0; | ||
133 | out: | ||
134 | dprintk("--> %s returns %d\n", __func__, status); | ||
135 | return status; | ||
136 | out_put: | ||
137 | pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); | ||
138 | goto out; | ||
139 | } | ||
140 | |||
141 | static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) | ||
142 | { | ||
143 | int i; | ||
144 | |||
145 | for (i = 0; i < fl->num_fh; i++) { | ||
146 | if (!fl->fh_array[i]) | ||
147 | break; | ||
148 | kfree(fl->fh_array[i]); | ||
149 | } | ||
150 | kfree(fl->fh_array); | ||
151 | fl->fh_array = NULL; | ||
152 | } | ||
153 | |||
154 | static void | ||
155 | _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) | ||
156 | { | ||
157 | filelayout_free_fh_array(fl); | ||
158 | kfree(fl); | ||
159 | } | ||
160 | |||
161 | static int | ||
162 | filelayout_decode_layout(struct pnfs_layout_hdr *flo, | ||
163 | struct nfs4_filelayout_segment *fl, | ||
164 | struct nfs4_layoutget_res *lgr, | ||
165 | struct nfs4_deviceid *id) | ||
166 | { | ||
167 | uint32_t *p = (uint32_t *)lgr->layout.buf; | ||
168 | uint32_t nfl_util; | ||
169 | int i; | ||
170 | |||
171 | dprintk("%s: set_layout_map Begin\n", __func__); | ||
172 | |||
173 | memcpy(id, p, sizeof(*id)); | ||
174 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
175 | print_deviceid(id); | ||
176 | |||
177 | nfl_util = be32_to_cpup(p++); | ||
178 | if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) | ||
179 | fl->commit_through_mds = 1; | ||
180 | if (nfl_util & NFL4_UFLG_DENSE) | ||
181 | fl->stripe_type = STRIPE_DENSE; | ||
182 | else | ||
183 | fl->stripe_type = STRIPE_SPARSE; | ||
184 | fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK; | ||
185 | |||
186 | fl->first_stripe_index = be32_to_cpup(p++); | ||
187 | p = xdr_decode_hyper(p, &fl->pattern_offset); | ||
188 | fl->num_fh = be32_to_cpup(p++); | ||
189 | |||
190 | dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n", | ||
191 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, | ||
192 | fl->pattern_offset); | ||
193 | |||
194 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), | ||
195 | GFP_KERNEL); | ||
196 | if (!fl->fh_array) | ||
197 | return -ENOMEM; | ||
198 | |||
199 | for (i = 0; i < fl->num_fh; i++) { | ||
200 | /* Do we want to use a mempool here? */ | ||
201 | fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); | ||
202 | if (!fl->fh_array[i]) { | ||
203 | filelayout_free_fh_array(fl); | ||
204 | return -ENOMEM; | ||
205 | } | ||
206 | fl->fh_array[i]->size = be32_to_cpup(p++); | ||
207 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { | ||
208 | printk(KERN_ERR "Too big fh %d received %d\n", | ||
209 | i, fl->fh_array[i]->size); | ||
210 | filelayout_free_fh_array(fl); | ||
211 | return -EIO; | ||
212 | } | ||
213 | memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); | ||
214 | p += XDR_QUADLEN(fl->fh_array[i]->size); | ||
215 | dprintk("DEBUG: %s: fh len %d\n", __func__, | ||
216 | fl->fh_array[i]->size); | ||
217 | } | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | static struct pnfs_layout_segment * | ||
223 | filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | ||
224 | struct nfs4_layoutget_res *lgr) | ||
225 | { | ||
226 | struct nfs4_filelayout_segment *fl; | ||
227 | int rc; | ||
228 | struct nfs4_deviceid id; | ||
229 | |||
230 | dprintk("--> %s\n", __func__); | ||
231 | fl = kzalloc(sizeof(*fl), GFP_KERNEL); | ||
232 | if (!fl) | ||
233 | return NULL; | ||
234 | |||
235 | rc = filelayout_decode_layout(layoutid, fl, lgr, &id); | ||
236 | if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { | ||
237 | _filelayout_free_lseg(fl); | ||
238 | return NULL; | ||
239 | } | ||
240 | return &fl->generic_hdr; | ||
241 | } | ||
242 | |||
243 | static void | ||
244 | filelayout_free_lseg(struct pnfs_layout_segment *lseg) | ||
245 | { | ||
246 | struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode); | ||
247 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | ||
248 | |||
249 | dprintk("--> %s\n", __func__); | ||
250 | pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, | ||
251 | &fl->dsaddr->deviceid); | ||
252 | _filelayout_free_lseg(fl); | ||
253 | } | ||
254 | |||
255 | static struct pnfs_layoutdriver_type filelayout_type = { | ||
256 | .id = LAYOUT_NFSV4_1_FILES, | ||
257 | .name = "LAYOUT_NFSV4_1_FILES", | ||
258 | .owner = THIS_MODULE, | ||
259 | .set_layoutdriver = filelayout_set_layoutdriver, | ||
260 | .clear_layoutdriver = filelayout_clear_layoutdriver, | ||
261 | .alloc_lseg = filelayout_alloc_lseg, | ||
262 | .free_lseg = filelayout_free_lseg, | ||
263 | }; | ||
264 | |||
265 | static int __init nfs4filelayout_init(void) | ||
266 | { | ||
267 | printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n", | ||
268 | __func__); | ||
269 | return pnfs_register_layoutdriver(&filelayout_type); | ||
270 | } | ||
271 | |||
272 | static void __exit nfs4filelayout_exit(void) | ||
273 | { | ||
274 | printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n", | ||
275 | __func__); | ||
276 | pnfs_unregister_layoutdriver(&filelayout_type); | ||
277 | } | ||
278 | |||
279 | module_init(nfs4filelayout_init); | ||
280 | module_exit(nfs4filelayout_exit); | ||
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h new file mode 100644 index 000000000000..bbf60dd2ab9d --- /dev/null +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * NFSv4 file layout driver data structures. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * | ||
10 | * Permission is granted to use, copy, create derivative works, and | ||
11 | * redistribute this software and such derivative works for any purpose, | ||
12 | * so long as the name of the University of Michigan is not used in | ||
13 | * any advertising or publicity pertaining to the use or distribution | ||
14 | * of this software without specific, written prior authorization. If | ||
15 | * the above copyright notice or any other identification of the | ||
16 | * University of Michigan is included in any copy of any portion of | ||
17 | * this software, then the disclaimer below must also be included. | ||
18 | * | ||
19 | * This software is provided as is, without representation or warranty | ||
20 | * of any kind either express or implied, including without limitation | ||
21 | * the implied warranties of merchantability, fitness for a particular | ||
22 | * purpose, or noninfringement. The Regents of the University of | ||
23 | * Michigan shall not be liable for any damages, including special, | ||
24 | * indirect, incidental, or consequential damages, with respect to any | ||
25 | * claim arising out of or in connection with the use of the software, | ||
26 | * even if it has been or is hereafter advised of the possibility of | ||
27 | * such damages. | ||
28 | */ | ||
29 | |||
30 | #ifndef FS_NFS_NFS4FILELAYOUT_H | ||
31 | #define FS_NFS_NFS4FILELAYOUT_H | ||
32 | |||
33 | #include "pnfs.h" | ||
34 | |||
35 | /* | ||
36 | * Field testing shows we need to support upto 4096 stripe indices. | ||
37 | * We store each index as a u8 (u32 on the wire) to keep the memory footprint | ||
38 | * reasonable. This in turn means we support a maximum of 256 | ||
39 | * RFC 5661 multipath_list4 structures. | ||
40 | */ | ||
41 | #define NFS4_PNFS_MAX_STRIPE_CNT 4096 | ||
42 | #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ | ||
43 | |||
44 | enum stripetype4 { | ||
45 | STRIPE_SPARSE = 1, | ||
46 | STRIPE_DENSE = 2 | ||
47 | }; | ||
48 | |||
49 | /* Individual ip address */ | ||
50 | struct nfs4_pnfs_ds { | ||
51 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
52 | u32 ds_ip_addr; | ||
53 | u32 ds_port; | ||
54 | struct nfs_client *ds_clp; | ||
55 | atomic_t ds_count; | ||
56 | }; | ||
57 | |||
58 | struct nfs4_file_layout_dsaddr { | ||
59 | struct pnfs_deviceid_node deviceid; | ||
60 | u32 stripe_count; | ||
61 | u8 *stripe_indices; | ||
62 | u32 ds_num; | ||
63 | struct nfs4_pnfs_ds *ds_list[1]; | ||
64 | }; | ||
65 | |||
66 | struct nfs4_filelayout_segment { | ||
67 | struct pnfs_layout_segment generic_hdr; | ||
68 | u32 stripe_type; | ||
69 | u32 commit_through_mds; | ||
70 | u32 stripe_unit; | ||
71 | u32 first_stripe_index; | ||
72 | u64 pattern_offset; | ||
73 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ | ||
74 | unsigned int num_fh; | ||
75 | struct nfs_fh **fh_array; | ||
76 | }; | ||
77 | |||
78 | static inline struct nfs4_filelayout_segment * | ||
79 | FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) | ||
80 | { | ||
81 | return container_of(lseg, | ||
82 | struct nfs4_filelayout_segment, | ||
83 | generic_hdr); | ||
84 | } | ||
85 | |||
86 | extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); | ||
87 | extern void print_ds(struct nfs4_pnfs_ds *ds); | ||
88 | extern void print_deviceid(struct nfs4_deviceid *dev_id); | ||
89 | extern struct nfs4_file_layout_dsaddr * | ||
90 | nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); | ||
91 | struct nfs4_file_layout_dsaddr * | ||
92 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); | ||
93 | |||
94 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | ||
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c new file mode 100644 index 000000000000..51fe64ace55a --- /dev/null +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -0,0 +1,448 @@ | |||
1 | /* | ||
2 | * Device operations for the pnfs nfs4 file layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * Garth Goodson <Garth.Goodson@netapp.com> | ||
10 | * | ||
11 | * Permission is granted to use, copy, create derivative works, and | ||
12 | * redistribute this software and such derivative works for any purpose, | ||
13 | * so long as the name of the University of Michigan is not used in | ||
14 | * any advertising or publicity pertaining to the use or distribution | ||
15 | * of this software without specific, written prior authorization. If | ||
16 | * the above copyright notice or any other identification of the | ||
17 | * University of Michigan is included in any copy of any portion of | ||
18 | * this software, then the disclaimer below must also be included. | ||
19 | * | ||
20 | * This software is provided as is, without representation or warranty | ||
21 | * of any kind either express or implied, including without limitation | ||
22 | * the implied warranties of merchantability, fitness for a particular | ||
23 | * purpose, or noninfringement. The Regents of the University of | ||
24 | * Michigan shall not be liable for any damages, including special, | ||
25 | * indirect, incidental, or consequential damages, with respect to any | ||
26 | * claim arising out of or in connection with the use of the software, | ||
27 | * even if it has been or is hereafter advised of the possibility of | ||
28 | * such damages. | ||
29 | */ | ||
30 | |||
31 | #include <linux/nfs_fs.h> | ||
32 | #include <linux/vmalloc.h> | ||
33 | |||
34 | #include "internal.h" | ||
35 | #include "nfs4filelayout.h" | ||
36 | |||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
38 | |||
39 | /* | ||
40 | * Data server cache | ||
41 | * | ||
42 | * Data servers can be mapped to different device ids. | ||
43 | * nfs4_pnfs_ds reference counting | ||
44 | * - set to 1 on allocation | ||
45 | * - incremented when a device id maps a data server already in the cache. | ||
46 | * - decremented when deviceid is removed from the cache. | ||
47 | */ | ||
48 | DEFINE_SPINLOCK(nfs4_ds_cache_lock); | ||
49 | static LIST_HEAD(nfs4_data_server_cache); | ||
50 | |||
51 | /* Debug routines */ | ||
52 | void | ||
53 | print_ds(struct nfs4_pnfs_ds *ds) | ||
54 | { | ||
55 | if (ds == NULL) { | ||
56 | printk("%s NULL device\n", __func__); | ||
57 | return; | ||
58 | } | ||
59 | printk(" ip_addr %x port %hu\n" | ||
60 | " ref count %d\n" | ||
61 | " client %p\n" | ||
62 | " cl_exchange_flags %x\n", | ||
63 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | ||
64 | atomic_read(&ds->ds_count), ds->ds_clp, | ||
65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | ||
66 | } | ||
67 | |||
68 | void | ||
69 | print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) | ||
70 | { | ||
71 | int i; | ||
72 | |||
73 | ifdebug(FACILITY) { | ||
74 | printk("%s dsaddr->ds_num %d\n", __func__, | ||
75 | dsaddr->ds_num); | ||
76 | for (i = 0; i < dsaddr->ds_num; i++) | ||
77 | print_ds(dsaddr->ds_list[i]); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | void print_deviceid(struct nfs4_deviceid *id) | ||
82 | { | ||
83 | u32 *p = (u32 *)id; | ||
84 | |||
85 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | ||
86 | p[0], p[1], p[2], p[3]); | ||
87 | } | ||
88 | |||
89 | /* nfs4_ds_cache_lock is held */ | ||
90 | static struct nfs4_pnfs_ds * | ||
91 | _data_server_lookup_locked(u32 ip_addr, u32 port) | ||
92 | { | ||
93 | struct nfs4_pnfs_ds *ds; | ||
94 | |||
95 | dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", | ||
96 | ntohl(ip_addr), ntohs(port)); | ||
97 | |||
98 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | ||
99 | if (ds->ds_ip_addr == ip_addr && | ||
100 | ds->ds_port == port) { | ||
101 | return ds; | ||
102 | } | ||
103 | } | ||
104 | return NULL; | ||
105 | } | ||
106 | |||
107 | static void | ||
108 | destroy_ds(struct nfs4_pnfs_ds *ds) | ||
109 | { | ||
110 | dprintk("--> %s\n", __func__); | ||
111 | ifdebug(FACILITY) | ||
112 | print_ds(ds); | ||
113 | |||
114 | if (ds->ds_clp) | ||
115 | nfs_put_client(ds->ds_clp); | ||
116 | kfree(ds); | ||
117 | } | ||
118 | |||
119 | static void | ||
120 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | ||
121 | { | ||
122 | struct nfs4_pnfs_ds *ds; | ||
123 | int i; | ||
124 | |||
125 | print_deviceid(&dsaddr->deviceid.de_id); | ||
126 | |||
127 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
128 | ds = dsaddr->ds_list[i]; | ||
129 | if (ds != NULL) { | ||
130 | if (atomic_dec_and_lock(&ds->ds_count, | ||
131 | &nfs4_ds_cache_lock)) { | ||
132 | list_del_init(&ds->ds_node); | ||
133 | spin_unlock(&nfs4_ds_cache_lock); | ||
134 | destroy_ds(ds); | ||
135 | } | ||
136 | } | ||
137 | } | ||
138 | kfree(dsaddr->stripe_indices); | ||
139 | kfree(dsaddr); | ||
140 | } | ||
141 | |||
142 | void | ||
143 | nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device) | ||
144 | { | ||
145 | struct nfs4_file_layout_dsaddr *dsaddr = | ||
146 | container_of(device, struct nfs4_file_layout_dsaddr, deviceid); | ||
147 | |||
148 | nfs4_fl_free_deviceid(dsaddr); | ||
149 | } | ||
150 | |||
151 | static struct nfs4_pnfs_ds * | ||
152 | nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) | ||
153 | { | ||
154 | struct nfs4_pnfs_ds *tmp_ds, *ds; | ||
155 | |||
156 | ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); | ||
157 | if (!ds) | ||
158 | goto out; | ||
159 | |||
160 | spin_lock(&nfs4_ds_cache_lock); | ||
161 | tmp_ds = _data_server_lookup_locked(ip_addr, port); | ||
162 | if (tmp_ds == NULL) { | ||
163 | ds->ds_ip_addr = ip_addr; | ||
164 | ds->ds_port = port; | ||
165 | atomic_set(&ds->ds_count, 1); | ||
166 | INIT_LIST_HEAD(&ds->ds_node); | ||
167 | ds->ds_clp = NULL; | ||
168 | list_add(&ds->ds_node, &nfs4_data_server_cache); | ||
169 | dprintk("%s add new data server ip 0x%x\n", __func__, | ||
170 | ds->ds_ip_addr); | ||
171 | } else { | ||
172 | kfree(ds); | ||
173 | atomic_inc(&tmp_ds->ds_count); | ||
174 | dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", | ||
175 | __func__, tmp_ds->ds_ip_addr, | ||
176 | atomic_read(&tmp_ds->ds_count)); | ||
177 | ds = tmp_ds; | ||
178 | } | ||
179 | spin_unlock(&nfs4_ds_cache_lock); | ||
180 | out: | ||
181 | return ds; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Currently only support ipv4, and one multi-path address. | ||
186 | */ | ||
187 | static struct nfs4_pnfs_ds * | ||
188 | decode_and_add_ds(__be32 **pp, struct inode *inode) | ||
189 | { | ||
190 | struct nfs4_pnfs_ds *ds = NULL; | ||
191 | char *buf; | ||
192 | const char *ipend, *pstr; | ||
193 | u32 ip_addr, port; | ||
194 | int nlen, rlen, i; | ||
195 | int tmp[2]; | ||
196 | __be32 *r_netid, *r_addr, *p = *pp; | ||
197 | |||
198 | /* r_netid */ | ||
199 | nlen = be32_to_cpup(p++); | ||
200 | r_netid = p; | ||
201 | p += XDR_QUADLEN(nlen); | ||
202 | |||
203 | /* r_addr */ | ||
204 | rlen = be32_to_cpup(p++); | ||
205 | r_addr = p; | ||
206 | p += XDR_QUADLEN(rlen); | ||
207 | *pp = p; | ||
208 | |||
209 | /* Check that netid is "tcp" */ | ||
210 | if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { | ||
211 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | ||
212 | goto out_err; | ||
213 | } | ||
214 | |||
215 | /* ipv6 length plus port is legal */ | ||
216 | if (rlen > INET6_ADDRSTRLEN + 8) { | ||
217 | dprintk("%s Invalid address, length %d\n", __func__, | ||
218 | rlen); | ||
219 | goto out_err; | ||
220 | } | ||
221 | buf = kmalloc(rlen + 1, GFP_KERNEL); | ||
222 | buf[rlen] = '\0'; | ||
223 | memcpy(buf, r_addr, rlen); | ||
224 | |||
225 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | ||
226 | for (i = 0; i < 2; i++) { | ||
227 | char *res = strrchr(buf, '.'); | ||
228 | *res = '-'; | ||
229 | } | ||
230 | |||
231 | /* Currently only support ipv4 address */ | ||
232 | if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { | ||
233 | dprintk("%s: Only ipv4 addresses supported\n", __func__); | ||
234 | goto out_free; | ||
235 | } | ||
236 | |||
237 | /* port */ | ||
238 | pstr = ipend; | ||
239 | sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); | ||
240 | port = htons((tmp[0] << 8) | (tmp[1])); | ||
241 | |||
242 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port); | ||
243 | dprintk("%s Decoded address and port %s\n", __func__, buf); | ||
244 | out_free: | ||
245 | kfree(buf); | ||
246 | out_err: | ||
247 | return ds; | ||
248 | } | ||
249 | |||
250 | /* Decode opaque device data and return the result */ | ||
251 | static struct nfs4_file_layout_dsaddr* | ||
252 | decode_device(struct inode *ino, struct pnfs_device *pdev) | ||
253 | { | ||
254 | int i, dummy; | ||
255 | u32 cnt, num; | ||
256 | u8 *indexp; | ||
257 | __be32 *p = (__be32 *)pdev->area, *indicesp; | ||
258 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
259 | |||
260 | /* Get the stripe count (number of stripe index) */ | ||
261 | cnt = be32_to_cpup(p++); | ||
262 | dprintk("%s stripe count %d\n", __func__, cnt); | ||
263 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | ||
264 | printk(KERN_WARNING "%s: stripe count %d greater than " | ||
265 | "supported maximum %d\n", __func__, | ||
266 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | ||
267 | goto out_err; | ||
268 | } | ||
269 | |||
270 | /* Check the multipath list count */ | ||
271 | indicesp = p; | ||
272 | p += XDR_QUADLEN(cnt << 2); | ||
273 | num = be32_to_cpup(p++); | ||
274 | dprintk("%s ds_num %u\n", __func__, num); | ||
275 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | ||
276 | printk(KERN_WARNING "%s: multipath count %d greater than " | ||
277 | "supported maximum %d\n", __func__, | ||
278 | num, NFS4_PNFS_MAX_MULTI_CNT); | ||
279 | goto out_err; | ||
280 | } | ||
281 | dsaddr = kzalloc(sizeof(*dsaddr) + | ||
282 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | ||
283 | GFP_KERNEL); | ||
284 | if (!dsaddr) | ||
285 | goto out_err; | ||
286 | |||
287 | dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); | ||
288 | if (!dsaddr->stripe_indices) | ||
289 | goto out_err_free; | ||
290 | |||
291 | dsaddr->stripe_count = cnt; | ||
292 | dsaddr->ds_num = num; | ||
293 | |||
294 | memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); | ||
295 | |||
296 | /* Go back an read stripe indices */ | ||
297 | p = indicesp; | ||
298 | indexp = &dsaddr->stripe_indices[0]; | ||
299 | for (i = 0; i < dsaddr->stripe_count; i++) { | ||
300 | *indexp = be32_to_cpup(p++); | ||
301 | if (*indexp >= num) | ||
302 | goto out_err_free; | ||
303 | indexp++; | ||
304 | } | ||
305 | /* Skip already read multipath list count */ | ||
306 | p++; | ||
307 | |||
308 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
309 | int j; | ||
310 | |||
311 | dummy = be32_to_cpup(p++); /* multipath count */ | ||
312 | if (dummy > 1) { | ||
313 | printk(KERN_WARNING | ||
314 | "%s: Multipath count %d not supported, " | ||
315 | "skipping all greater than 1\n", __func__, | ||
316 | dummy); | ||
317 | } | ||
318 | for (j = 0; j < dummy; j++) { | ||
319 | if (j == 0) { | ||
320 | dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); | ||
321 | if (dsaddr->ds_list[i] == NULL) | ||
322 | goto out_err_free; | ||
323 | } else { | ||
324 | u32 len; | ||
325 | /* skip extra multipath */ | ||
326 | len = be32_to_cpup(p++); | ||
327 | p += XDR_QUADLEN(len); | ||
328 | len = be32_to_cpup(p++); | ||
329 | p += XDR_QUADLEN(len); | ||
330 | continue; | ||
331 | } | ||
332 | } | ||
333 | } | ||
334 | return dsaddr; | ||
335 | |||
336 | out_err_free: | ||
337 | nfs4_fl_free_deviceid(dsaddr); | ||
338 | out_err: | ||
339 | dprintk("%s ERROR: returning NULL\n", __func__); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Decode the opaque device specified in 'dev' | ||
345 | * and add it to the list of available devices. | ||
346 | * If the deviceid is already cached, nfs4_add_deviceid will return | ||
347 | * a pointer to the cached struct and throw away the new. | ||
348 | */ | ||
349 | static struct nfs4_file_layout_dsaddr* | ||
350 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev) | ||
351 | { | ||
352 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
353 | struct pnfs_deviceid_node *d; | ||
354 | |||
355 | dsaddr = decode_device(inode, dev); | ||
356 | if (!dsaddr) { | ||
357 | printk(KERN_WARNING "%s: Could not decode or add device\n", | ||
358 | __func__); | ||
359 | return NULL; | ||
360 | } | ||
361 | |||
362 | d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, | ||
363 | &dsaddr->deviceid); | ||
364 | |||
365 | return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * Retrieve the information for dev_id, add it to the list | ||
370 | * of available devices, and return it. | ||
371 | */ | ||
372 | struct nfs4_file_layout_dsaddr * | ||
373 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | ||
374 | { | ||
375 | struct pnfs_device *pdev = NULL; | ||
376 | u32 max_resp_sz; | ||
377 | int max_pages; | ||
378 | struct page **pages = NULL; | ||
379 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
380 | int rc, i; | ||
381 | struct nfs_server *server = NFS_SERVER(inode); | ||
382 | |||
383 | /* | ||
384 | * Use the session max response size as the basis for setting | ||
385 | * GETDEVICEINFO's maxcount | ||
386 | */ | ||
387 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
388 | max_pages = max_resp_sz >> PAGE_SHIFT; | ||
389 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | ||
390 | __func__, inode, max_resp_sz, max_pages); | ||
391 | |||
392 | pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); | ||
393 | if (pdev == NULL) | ||
394 | return NULL; | ||
395 | |||
396 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); | ||
397 | if (pages == NULL) { | ||
398 | kfree(pdev); | ||
399 | return NULL; | ||
400 | } | ||
401 | for (i = 0; i < max_pages; i++) { | ||
402 | pages[i] = alloc_page(GFP_KERNEL); | ||
403 | if (!pages[i]) | ||
404 | goto out_free; | ||
405 | } | ||
406 | |||
407 | /* set pdev->area */ | ||
408 | pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); | ||
409 | if (!pdev->area) | ||
410 | goto out_free; | ||
411 | |||
412 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
413 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | ||
414 | pdev->pages = pages; | ||
415 | pdev->pgbase = 0; | ||
416 | pdev->pglen = PAGE_SIZE * max_pages; | ||
417 | pdev->mincount = 0; | ||
418 | |||
419 | rc = nfs4_proc_getdeviceinfo(server, pdev); | ||
420 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
421 | if (rc) | ||
422 | goto out_free; | ||
423 | |||
424 | /* | ||
425 | * Found new device, need to decode it and then add it to the | ||
426 | * list of known devices for this mountpoint. | ||
427 | */ | ||
428 | dsaddr = decode_and_add_device(inode, pdev); | ||
429 | out_free: | ||
430 | if (pdev->area != NULL) | ||
431 | vunmap(pdev->area); | ||
432 | for (i = 0; i < max_pages; i++) | ||
433 | __free_page(pages[i]); | ||
434 | kfree(pages); | ||
435 | kfree(pdev); | ||
436 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | ||
437 | return dsaddr; | ||
438 | } | ||
439 | |||
440 | struct nfs4_file_layout_dsaddr * | ||
441 | nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) | ||
442 | { | ||
443 | struct pnfs_deviceid_node *d; | ||
444 | |||
445 | d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); | ||
446 | return (d == NULL) ? NULL : | ||
447 | container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | ||
448 | } | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e87fe612ca18..32c8758c99fd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include "internal.h" | 55 | #include "internal.h" |
56 | #include "iostat.h" | 56 | #include "iostat.h" |
57 | #include "callback.h" | 57 | #include "callback.h" |
58 | #include "pnfs.h" | ||
58 | 59 | ||
59 | #define NFSDBG_FACILITY NFSDBG_PROC | 60 | #define NFSDBG_FACILITY NFSDBG_PROC |
60 | 61 | ||
@@ -130,6 +131,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE | |||
130 | | FATTR4_WORD0_MAXWRITE | 131 | | FATTR4_WORD0_MAXWRITE |
131 | | FATTR4_WORD0_LEASE_TIME, | 132 | | FATTR4_WORD0_LEASE_TIME, |
132 | FATTR4_WORD1_TIME_DELTA | 133 | FATTR4_WORD1_TIME_DELTA |
134 | | FATTR4_WORD1_FS_LAYOUT_TYPES | ||
133 | }; | 135 | }; |
134 | 136 | ||
135 | const u32 nfs4_fs_locations_bitmap[2] = { | 137 | const u32 nfs4_fs_locations_bitmap[2] = { |
@@ -4840,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | |||
4840 | args->bc_attrs.max_reqs); | 4842 | args->bc_attrs.max_reqs); |
4841 | } | 4843 | } |
4842 | 4844 | ||
4843 | static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) | 4845 | static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) |
4844 | { | 4846 | { |
4845 | if (rcvd <= sent) | 4847 | struct nfs4_channel_attrs *sent = &args->fc_attrs; |
4846 | return 0; | 4848 | struct nfs4_channel_attrs *rcvd = &session->fc_attrs; |
4847 | printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " | 4849 | |
4848 | "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); | 4850 | if (rcvd->headerpadsz > sent->headerpadsz) |
4849 | return -EINVAL; | 4851 | return -EINVAL; |
4852 | if (rcvd->max_resp_sz > sent->max_resp_sz) | ||
4853 | return -EINVAL; | ||
4854 | /* | ||
4855 | * Our requested max_ops is the minimum we need; we're not | ||
4856 | * prepared to break up compounds into smaller pieces than that. | ||
4857 | * So, no point even trying to continue if the server won't | ||
4858 | * cooperate: | ||
4859 | */ | ||
4860 | if (rcvd->max_ops < sent->max_ops) | ||
4861 | return -EINVAL; | ||
4862 | if (rcvd->max_reqs == 0) | ||
4863 | return -EINVAL; | ||
4864 | return 0; | ||
4850 | } | 4865 | } |
4851 | 4866 | ||
4852 | #define _verify_fore_channel_attr(_name_) \ | 4867 | static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) |
4853 | _verify_channel_attr("fore", #_name_, \ | 4868 | { |
4854 | args->fc_attrs._name_, \ | 4869 | struct nfs4_channel_attrs *sent = &args->bc_attrs; |
4855 | session->fc_attrs._name_) | 4870 | struct nfs4_channel_attrs *rcvd = &session->bc_attrs; |
4856 | 4871 | ||
4857 | #define _verify_back_channel_attr(_name_) \ | 4872 | if (rcvd->max_rqst_sz > sent->max_rqst_sz) |
4858 | _verify_channel_attr("back", #_name_, \ | 4873 | return -EINVAL; |
4859 | args->bc_attrs._name_, \ | 4874 | if (rcvd->max_resp_sz < sent->max_resp_sz) |
4860 | session->bc_attrs._name_) | 4875 | return -EINVAL; |
4876 | if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) | ||
4877 | return -EINVAL; | ||
4878 | /* These would render the backchannel useless: */ | ||
4879 | if (rcvd->max_ops == 0) | ||
4880 | return -EINVAL; | ||
4881 | if (rcvd->max_reqs == 0) | ||
4882 | return -EINVAL; | ||
4883 | return 0; | ||
4884 | } | ||
4861 | 4885 | ||
4862 | /* | ||
4863 | * The server is not allowed to increase the fore channel header pad size, | ||
4864 | * maximum response size, or maximum number of operations. | ||
4865 | * | ||
4866 | * The back channel attributes are only negotiatied down: We send what the | ||
4867 | * (back channel) server insists upon. | ||
4868 | */ | ||
4869 | static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, | 4886 | static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, |
4870 | struct nfs4_session *session) | 4887 | struct nfs4_session *session) |
4871 | { | 4888 | { |
4872 | int ret = 0; | 4889 | int ret; |
4873 | |||
4874 | ret |= _verify_fore_channel_attr(headerpadsz); | ||
4875 | ret |= _verify_fore_channel_attr(max_resp_sz); | ||
4876 | ret |= _verify_fore_channel_attr(max_ops); | ||
4877 | |||
4878 | ret |= _verify_back_channel_attr(headerpadsz); | ||
4879 | ret |= _verify_back_channel_attr(max_rqst_sz); | ||
4880 | ret |= _verify_back_channel_attr(max_resp_sz); | ||
4881 | ret |= _verify_back_channel_attr(max_resp_sz_cached); | ||
4882 | ret |= _verify_back_channel_attr(max_ops); | ||
4883 | ret |= _verify_back_channel_attr(max_reqs); | ||
4884 | 4890 | ||
4885 | return ret; | 4891 | ret = nfs4_verify_fore_channel_attrs(args, session); |
4892 | if (ret) | ||
4893 | return ret; | ||
4894 | return nfs4_verify_back_channel_attrs(args, session); | ||
4886 | } | 4895 | } |
4887 | 4896 | ||
4888 | static int _nfs4_proc_create_session(struct nfs_client *clp) | 4897 | static int _nfs4_proc_create_session(struct nfs_client *clp) |
@@ -5255,6 +5264,147 @@ out: | |||
5255 | dprintk("<-- %s status=%d\n", __func__, status); | 5264 | dprintk("<-- %s status=%d\n", __func__, status); |
5256 | return status; | 5265 | return status; |
5257 | } | 5266 | } |
5267 | |||
5268 | static void | ||
5269 | nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | ||
5270 | { | ||
5271 | struct nfs4_layoutget *lgp = calldata; | ||
5272 | struct inode *ino = lgp->args.inode; | ||
5273 | struct nfs_server *server = NFS_SERVER(ino); | ||
5274 | |||
5275 | dprintk("--> %s\n", __func__); | ||
5276 | if (nfs4_setup_sequence(server, &lgp->args.seq_args, | ||
5277 | &lgp->res.seq_res, 0, task)) | ||
5278 | return; | ||
5279 | rpc_call_start(task); | ||
5280 | } | ||
5281 | |||
5282 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | ||
5283 | { | ||
5284 | struct nfs4_layoutget *lgp = calldata; | ||
5285 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
5286 | |||
5287 | dprintk("--> %s\n", __func__); | ||
5288 | |||
5289 | if (!nfs4_sequence_done(task, &lgp->res.seq_res)) | ||
5290 | return; | ||
5291 | |||
5292 | switch (task->tk_status) { | ||
5293 | case 0: | ||
5294 | break; | ||
5295 | case -NFS4ERR_LAYOUTTRYLATER: | ||
5296 | case -NFS4ERR_RECALLCONFLICT: | ||
5297 | task->tk_status = -NFS4ERR_DELAY; | ||
5298 | /* Fall through */ | ||
5299 | default: | ||
5300 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
5301 | rpc_restart_call_prepare(task); | ||
5302 | return; | ||
5303 | } | ||
5304 | } | ||
5305 | lgp->status = task->tk_status; | ||
5306 | dprintk("<-- %s\n", __func__); | ||
5307 | } | ||
5308 | |||
5309 | static void nfs4_layoutget_release(void *calldata) | ||
5310 | { | ||
5311 | struct nfs4_layoutget *lgp = calldata; | ||
5312 | |||
5313 | dprintk("--> %s\n", __func__); | ||
5314 | put_layout_hdr(lgp->args.inode); | ||
5315 | if (lgp->res.layout.buf != NULL) | ||
5316 | free_page((unsigned long) lgp->res.layout.buf); | ||
5317 | put_nfs_open_context(lgp->args.ctx); | ||
5318 | kfree(calldata); | ||
5319 | dprintk("<-- %s\n", __func__); | ||
5320 | } | ||
5321 | |||
5322 | static const struct rpc_call_ops nfs4_layoutget_call_ops = { | ||
5323 | .rpc_call_prepare = nfs4_layoutget_prepare, | ||
5324 | .rpc_call_done = nfs4_layoutget_done, | ||
5325 | .rpc_release = nfs4_layoutget_release, | ||
5326 | }; | ||
5327 | |||
5328 | int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | ||
5329 | { | ||
5330 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
5331 | struct rpc_task *task; | ||
5332 | struct rpc_message msg = { | ||
5333 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], | ||
5334 | .rpc_argp = &lgp->args, | ||
5335 | .rpc_resp = &lgp->res, | ||
5336 | }; | ||
5337 | struct rpc_task_setup task_setup_data = { | ||
5338 | .rpc_client = server->client, | ||
5339 | .rpc_message = &msg, | ||
5340 | .callback_ops = &nfs4_layoutget_call_ops, | ||
5341 | .callback_data = lgp, | ||
5342 | .flags = RPC_TASK_ASYNC, | ||
5343 | }; | ||
5344 | int status = 0; | ||
5345 | |||
5346 | dprintk("--> %s\n", __func__); | ||
5347 | |||
5348 | lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); | ||
5349 | if (lgp->res.layout.buf == NULL) { | ||
5350 | nfs4_layoutget_release(lgp); | ||
5351 | return -ENOMEM; | ||
5352 | } | ||
5353 | |||
5354 | lgp->res.seq_res.sr_slot = NULL; | ||
5355 | task = rpc_run_task(&task_setup_data); | ||
5356 | if (IS_ERR(task)) | ||
5357 | return PTR_ERR(task); | ||
5358 | status = nfs4_wait_for_completion_rpc_task(task); | ||
5359 | if (status != 0) | ||
5360 | goto out; | ||
5361 | status = lgp->status; | ||
5362 | if (status != 0) | ||
5363 | goto out; | ||
5364 | status = pnfs_layout_process(lgp); | ||
5365 | out: | ||
5366 | rpc_put_task(task); | ||
5367 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5368 | return status; | ||
5369 | } | ||
5370 | |||
5371 | static int | ||
5372 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
5373 | { | ||
5374 | struct nfs4_getdeviceinfo_args args = { | ||
5375 | .pdev = pdev, | ||
5376 | }; | ||
5377 | struct nfs4_getdeviceinfo_res res = { | ||
5378 | .pdev = pdev, | ||
5379 | }; | ||
5380 | struct rpc_message msg = { | ||
5381 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], | ||
5382 | .rpc_argp = &args, | ||
5383 | .rpc_resp = &res, | ||
5384 | }; | ||
5385 | int status; | ||
5386 | |||
5387 | dprintk("--> %s\n", __func__); | ||
5388 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | ||
5389 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5390 | |||
5391 | return status; | ||
5392 | } | ||
5393 | |||
5394 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
5395 | { | ||
5396 | struct nfs4_exception exception = { }; | ||
5397 | int err; | ||
5398 | |||
5399 | do { | ||
5400 | err = nfs4_handle_exception(server, | ||
5401 | _nfs4_proc_getdeviceinfo(server, pdev), | ||
5402 | &exception); | ||
5403 | } while (exception.retry); | ||
5404 | return err; | ||
5405 | } | ||
5406 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); | ||
5407 | |||
5258 | #endif /* CONFIG_NFS_V4_1 */ | 5408 | #endif /* CONFIG_NFS_V4_1 */ |
5259 | 5409 | ||
5260 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 5410 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index aa0b02a610c4..f575a3126737 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include "callback.h" | 54 | #include "callback.h" |
55 | #include "delegation.h" | 55 | #include "delegation.h" |
56 | #include "internal.h" | 56 | #include "internal.h" |
57 | #include "pnfs.h" | ||
57 | 58 | ||
58 | #define OPENOWNER_POOL_SIZE 8 | 59 | #define OPENOWNER_POOL_SIZE 8 |
59 | 60 | ||
@@ -1475,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1475 | } | 1476 | } |
1476 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); | 1477 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); |
1477 | set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); | 1478 | set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); |
1479 | pnfs_destroy_all_layouts(clp); | ||
1478 | } | 1480 | } |
1479 | 1481 | ||
1480 | if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { | 1482 | if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index bd2101d918c8..f313c4cce7e4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/nfs_idmap.h> | 52 | #include <linux/nfs_idmap.h> |
53 | #include "nfs4_fs.h" | 53 | #include "nfs4_fs.h" |
54 | #include "internal.h" | 54 | #include "internal.h" |
55 | #include "pnfs.h" | ||
55 | 56 | ||
56 | #define NFSDBG_FACILITY NFSDBG_XDR | 57 | #define NFSDBG_FACILITY NFSDBG_XDR |
57 | 58 | ||
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int); | |||
310 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 311 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
311 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 312 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
312 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 313 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
314 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
315 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | ||
316 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | ||
317 | 1 /* layout type */ + \ | ||
318 | 1 /* opaque devaddr4 length */ + \ | ||
319 | /* devaddr4 payload is read into page */ \ | ||
320 | 1 /* notification bitmap length */ + \ | ||
321 | 1 /* notification bitmap */) | ||
322 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ | ||
323 | encode_stateid_maxsz) | ||
324 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | ||
325 | decode_stateid_maxsz + \ | ||
326 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) | ||
313 | #else /* CONFIG_NFS_V4_1 */ | 327 | #else /* CONFIG_NFS_V4_1 */ |
314 | #define encode_sequence_maxsz 0 | 328 | #define encode_sequence_maxsz 0 |
315 | #define decode_sequence_maxsz 0 | 329 | #define decode_sequence_maxsz 0 |
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int); | |||
699 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 713 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
700 | decode_sequence_maxsz + \ | 714 | decode_sequence_maxsz + \ |
701 | decode_reclaim_complete_maxsz) | 715 | decode_reclaim_complete_maxsz) |
716 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | ||
717 | encode_sequence_maxsz +\ | ||
718 | encode_getdeviceinfo_maxsz) | ||
719 | #define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \ | ||
720 | decode_sequence_maxsz + \ | ||
721 | decode_getdeviceinfo_maxsz) | ||
722 | #define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \ | ||
723 | encode_sequence_maxsz + \ | ||
724 | encode_putfh_maxsz + \ | ||
725 | encode_layoutget_maxsz) | ||
726 | #define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \ | ||
727 | decode_sequence_maxsz + \ | ||
728 | decode_putfh_maxsz + \ | ||
729 | decode_layoutget_maxsz) | ||
702 | 730 | ||
703 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 731 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
704 | compound_encode_hdr_maxsz + | 732 | compound_encode_hdr_maxsz + |
@@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1737 | #endif /* CONFIG_NFS_V4_1 */ | 1765 | #endif /* CONFIG_NFS_V4_1 */ |
1738 | } | 1766 | } |
1739 | 1767 | ||
1768 | #ifdef CONFIG_NFS_V4_1 | ||
1769 | static void | ||
1770 | encode_getdeviceinfo(struct xdr_stream *xdr, | ||
1771 | const struct nfs4_getdeviceinfo_args *args, | ||
1772 | struct compound_hdr *hdr) | ||
1773 | { | ||
1774 | __be32 *p; | ||
1775 | |||
1776 | p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); | ||
1777 | *p++ = cpu_to_be32(OP_GETDEVICEINFO); | ||
1778 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | ||
1779 | NFS4_DEVICEID4_SIZE); | ||
1780 | *p++ = cpu_to_be32(args->pdev->layout_type); | ||
1781 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ | ||
1782 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | ||
1783 | hdr->nops++; | ||
1784 | hdr->replen += decode_getdeviceinfo_maxsz; | ||
1785 | } | ||
1786 | |||
1787 | static void | ||
1788 | encode_layoutget(struct xdr_stream *xdr, | ||
1789 | const struct nfs4_layoutget_args *args, | ||
1790 | struct compound_hdr *hdr) | ||
1791 | { | ||
1792 | nfs4_stateid stateid; | ||
1793 | __be32 *p; | ||
1794 | |||
1795 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); | ||
1796 | *p++ = cpu_to_be32(OP_LAYOUTGET); | ||
1797 | *p++ = cpu_to_be32(0); /* Signal layout available */ | ||
1798 | *p++ = cpu_to_be32(args->type); | ||
1799 | *p++ = cpu_to_be32(args->range.iomode); | ||
1800 | p = xdr_encode_hyper(p, args->range.offset); | ||
1801 | p = xdr_encode_hyper(p, args->range.length); | ||
1802 | p = xdr_encode_hyper(p, args->minlength); | ||
1803 | pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, | ||
1804 | args->ctx->state); | ||
1805 | p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE); | ||
1806 | *p = cpu_to_be32(args->maxcount); | ||
1807 | |||
1808 | dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", | ||
1809 | __func__, | ||
1810 | args->type, | ||
1811 | args->range.iomode, | ||
1812 | (unsigned long)args->range.offset, | ||
1813 | (unsigned long)args->range.length, | ||
1814 | args->maxcount); | ||
1815 | hdr->nops++; | ||
1816 | hdr->replen += decode_layoutget_maxsz; | ||
1817 | } | ||
1818 | #endif /* CONFIG_NFS_V4_1 */ | ||
1819 | |||
1740 | /* | 1820 | /* |
1741 | * END OF "GENERIC" ENCODE ROUTINES. | 1821 | * END OF "GENERIC" ENCODE ROUTINES. |
1742 | */ | 1822 | */ |
@@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, | |||
2554 | return 0; | 2634 | return 0; |
2555 | } | 2635 | } |
2556 | 2636 | ||
2637 | /* | ||
2638 | * Encode GETDEVICEINFO request | ||
2639 | */ | ||
2640 | static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p, | ||
2641 | struct nfs4_getdeviceinfo_args *args) | ||
2642 | { | ||
2643 | struct xdr_stream xdr; | ||
2644 | struct compound_hdr hdr = { | ||
2645 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2646 | }; | ||
2647 | |||
2648 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
2649 | encode_compound_hdr(&xdr, req, &hdr); | ||
2650 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
2651 | encode_getdeviceinfo(&xdr, args, &hdr); | ||
2652 | |||
2653 | /* set up reply kvec. Subtract notification bitmap max size (2) | ||
2654 | * so that notification bitmap is put in xdr_buf tail */ | ||
2655 | xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, | ||
2656 | args->pdev->pages, args->pdev->pgbase, | ||
2657 | args->pdev->pglen); | ||
2658 | |||
2659 | encode_nops(&hdr); | ||
2660 | return 0; | ||
2661 | } | ||
2662 | |||
2663 | /* | ||
2664 | * Encode LAYOUTGET request | ||
2665 | */ | ||
2666 | static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p, | ||
2667 | struct nfs4_layoutget_args *args) | ||
2668 | { | ||
2669 | struct xdr_stream xdr; | ||
2670 | struct compound_hdr hdr = { | ||
2671 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2672 | }; | ||
2673 | |||
2674 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
2675 | encode_compound_hdr(&xdr, req, &hdr); | ||
2676 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
2677 | encode_putfh(&xdr, NFS_FH(args->inode), &hdr); | ||
2678 | encode_layoutget(&xdr, args, &hdr); | ||
2679 | encode_nops(&hdr); | ||
2680 | return 0; | ||
2681 | } | ||
2557 | #endif /* CONFIG_NFS_V4_1 */ | 2682 | #endif /* CONFIG_NFS_V4_1 */ |
2558 | 2683 | ||
2559 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 2684 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -3978,6 +4103,61 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | |||
3978 | return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); | 4103 | return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); |
3979 | } | 4104 | } |
3980 | 4105 | ||
4106 | /* | ||
4107 | * Decode potentially multiple layout types. Currently we only support | ||
4108 | * one layout driver per file system. | ||
4109 | */ | ||
4110 | static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, | ||
4111 | uint32_t *layouttype) | ||
4112 | { | ||
4113 | uint32_t *p; | ||
4114 | int num; | ||
4115 | |||
4116 | p = xdr_inline_decode(xdr, 4); | ||
4117 | if (unlikely(!p)) | ||
4118 | goto out_overflow; | ||
4119 | num = be32_to_cpup(p); | ||
4120 | |||
4121 | /* pNFS is not supported by the underlying file system */ | ||
4122 | if (num == 0) { | ||
4123 | *layouttype = 0; | ||
4124 | return 0; | ||
4125 | } | ||
4126 | if (num > 1) | ||
4127 | printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " | ||
4128 | "per filesystem not supported\n", __func__); | ||
4129 | |||
4130 | /* Decode and set first layout type, move xdr->p past unused types */ | ||
4131 | p = xdr_inline_decode(xdr, num * 4); | ||
4132 | if (unlikely(!p)) | ||
4133 | goto out_overflow; | ||
4134 | *layouttype = be32_to_cpup(p); | ||
4135 | return 0; | ||
4136 | out_overflow: | ||
4137 | print_overflow_msg(__func__, xdr); | ||
4138 | return -EIO; | ||
4139 | } | ||
4140 | |||
4141 | /* | ||
4142 | * The type of file system exported. | ||
4143 | * Note we must ensure that layouttype is set in any non-error case. | ||
4144 | */ | ||
4145 | static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, | ||
4146 | uint32_t *layouttype) | ||
4147 | { | ||
4148 | int status = 0; | ||
4149 | |||
4150 | dprintk("%s: bitmap is %x\n", __func__, bitmap[1]); | ||
4151 | if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) | ||
4152 | return -EIO; | ||
4153 | if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) { | ||
4154 | status = decode_first_pnfs_layout_type(xdr, layouttype); | ||
4155 | bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; | ||
4156 | } else | ||
4157 | *layouttype = 0; | ||
4158 | return status; | ||
4159 | } | ||
4160 | |||
3981 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | 4161 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) |
3982 | { | 4162 | { |
3983 | __be32 *savep; | 4163 | __be32 *savep; |
@@ -4006,6 +4186,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | |||
4006 | status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); | 4186 | status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); |
4007 | if (status != 0) | 4187 | if (status != 0) |
4008 | goto xdr_error; | 4188 | goto xdr_error; |
4189 | status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); | ||
4190 | if (status != 0) | ||
4191 | goto xdr_error; | ||
4009 | 4192 | ||
4010 | status = verify_attr_len(xdr, savep, attrlen); | 4193 | status = verify_attr_len(xdr, savep, attrlen); |
4011 | xdr_error: | 4194 | xdr_error: |
@@ -4772,6 +4955,134 @@ out_overflow: | |||
4772 | #endif /* CONFIG_NFS_V4_1 */ | 4955 | #endif /* CONFIG_NFS_V4_1 */ |
4773 | } | 4956 | } |
4774 | 4957 | ||
4958 | #if defined(CONFIG_NFS_V4_1) | ||
4959 | |||
4960 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | ||
4961 | struct pnfs_device *pdev) | ||
4962 | { | ||
4963 | __be32 *p; | ||
4964 | uint32_t len, type; | ||
4965 | int status; | ||
4966 | |||
4967 | status = decode_op_hdr(xdr, OP_GETDEVICEINFO); | ||
4968 | if (status) { | ||
4969 | if (status == -ETOOSMALL) { | ||
4970 | p = xdr_inline_decode(xdr, 4); | ||
4971 | if (unlikely(!p)) | ||
4972 | goto out_overflow; | ||
4973 | pdev->mincount = be32_to_cpup(p); | ||
4974 | dprintk("%s: Min count too small. mincnt = %u\n", | ||
4975 | __func__, pdev->mincount); | ||
4976 | } | ||
4977 | return status; | ||
4978 | } | ||
4979 | |||
4980 | p = xdr_inline_decode(xdr, 8); | ||
4981 | if (unlikely(!p)) | ||
4982 | goto out_overflow; | ||
4983 | type = be32_to_cpup(p++); | ||
4984 | if (type != pdev->layout_type) { | ||
4985 | dprintk("%s: layout mismatch req: %u pdev: %u\n", | ||
4986 | __func__, pdev->layout_type, type); | ||
4987 | return -EINVAL; | ||
4988 | } | ||
4989 | /* | ||
4990 | * Get the length of the opaque device_addr4. xdr_read_pages places | ||
4991 | * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages) | ||
4992 | * and places the remaining xdr data in xdr_buf->tail | ||
4993 | */ | ||
4994 | pdev->mincount = be32_to_cpup(p); | ||
4995 | xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ | ||
4996 | |||
4997 | /* Parse notification bitmap, verifying that it is zero. */ | ||
4998 | p = xdr_inline_decode(xdr, 4); | ||
4999 | if (unlikely(!p)) | ||
5000 | goto out_overflow; | ||
5001 | len = be32_to_cpup(p); | ||
5002 | if (len) { | ||
5003 | int i; | ||
5004 | |||
5005 | p = xdr_inline_decode(xdr, 4 * len); | ||
5006 | if (unlikely(!p)) | ||
5007 | goto out_overflow; | ||
5008 | for (i = 0; i < len; i++, p++) { | ||
5009 | if (be32_to_cpup(p)) { | ||
5010 | dprintk("%s: notifications not supported\n", | ||
5011 | __func__); | ||
5012 | return -EIO; | ||
5013 | } | ||
5014 | } | ||
5015 | } | ||
5016 | return 0; | ||
5017 | out_overflow: | ||
5018 | print_overflow_msg(__func__, xdr); | ||
5019 | return -EIO; | ||
5020 | } | ||
5021 | |||
5022 | static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | ||
5023 | struct nfs4_layoutget_res *res) | ||
5024 | { | ||
5025 | __be32 *p; | ||
5026 | int status; | ||
5027 | u32 layout_count; | ||
5028 | |||
5029 | status = decode_op_hdr(xdr, OP_LAYOUTGET); | ||
5030 | if (status) | ||
5031 | return status; | ||
5032 | p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); | ||
5033 | if (unlikely(!p)) | ||
5034 | goto out_overflow; | ||
5035 | res->return_on_close = be32_to_cpup(p++); | ||
5036 | p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); | ||
5037 | layout_count = be32_to_cpup(p); | ||
5038 | if (!layout_count) { | ||
5039 | dprintk("%s: server responded with empty layout array\n", | ||
5040 | __func__); | ||
5041 | return -EINVAL; | ||
5042 | } | ||
5043 | |||
5044 | p = xdr_inline_decode(xdr, 24); | ||
5045 | if (unlikely(!p)) | ||
5046 | goto out_overflow; | ||
5047 | p = xdr_decode_hyper(p, &res->range.offset); | ||
5048 | p = xdr_decode_hyper(p, &res->range.length); | ||
5049 | res->range.iomode = be32_to_cpup(p++); | ||
5050 | res->type = be32_to_cpup(p++); | ||
5051 | |||
5052 | status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); | ||
5053 | if (unlikely(status)) | ||
5054 | return status; | ||
5055 | |||
5056 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", | ||
5057 | __func__, | ||
5058 | (unsigned long)res->range.offset, | ||
5059 | (unsigned long)res->range.length, | ||
5060 | res->range.iomode, | ||
5061 | res->type, | ||
5062 | res->layout.len); | ||
5063 | |||
5064 | /* nfs4_proc_layoutget allocated a single page */ | ||
5065 | if (res->layout.len > PAGE_SIZE) | ||
5066 | return -ENOMEM; | ||
5067 | memcpy(res->layout.buf, p, res->layout.len); | ||
5068 | |||
5069 | if (layout_count > 1) { | ||
5070 | /* We only handle a length one array at the moment. Any | ||
5071 | * further entries are just ignored. Note that this means | ||
5072 | * the client may see a response that is less than the | ||
5073 | * minimum it requested. | ||
5074 | */ | ||
5075 | dprintk("%s: server responded with %d layouts, dropping tail\n", | ||
5076 | __func__, layout_count); | ||
5077 | } | ||
5078 | |||
5079 | return 0; | ||
5080 | out_overflow: | ||
5081 | print_overflow_msg(__func__, xdr); | ||
5082 | return -EIO; | ||
5083 | } | ||
5084 | #endif /* CONFIG_NFS_V4_1 */ | ||
5085 | |||
4775 | /* | 5086 | /* |
4776 | * END OF "GENERIC" DECODE ROUTINES. | 5087 | * END OF "GENERIC" DECODE ROUTINES. |
4777 | */ | 5088 | */ |
@@ -5799,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, | |||
5799 | status = decode_reclaim_complete(&xdr, (void *)NULL); | 6110 | status = decode_reclaim_complete(&xdr, (void *)NULL); |
5800 | return status; | 6111 | return status; |
5801 | } | 6112 | } |
6113 | |||
6114 | /* | ||
6115 | * Decode GETDEVINFO response | ||
6116 | */ | ||
6117 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p, | ||
6118 | struct nfs4_getdeviceinfo_res *res) | ||
6119 | { | ||
6120 | struct xdr_stream xdr; | ||
6121 | struct compound_hdr hdr; | ||
6122 | int status; | ||
6123 | |||
6124 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
6125 | status = decode_compound_hdr(&xdr, &hdr); | ||
6126 | if (status != 0) | ||
6127 | goto out; | ||
6128 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
6129 | if (status != 0) | ||
6130 | goto out; | ||
6131 | status = decode_getdeviceinfo(&xdr, res->pdev); | ||
6132 | out: | ||
6133 | return status; | ||
6134 | } | ||
6135 | |||
6136 | /* | ||
6137 | * Decode LAYOUTGET response | ||
6138 | */ | ||
6139 | static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p, | ||
6140 | struct nfs4_layoutget_res *res) | ||
6141 | { | ||
6142 | struct xdr_stream xdr; | ||
6143 | struct compound_hdr hdr; | ||
6144 | int status; | ||
6145 | |||
6146 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
6147 | status = decode_compound_hdr(&xdr, &hdr); | ||
6148 | if (status) | ||
6149 | goto out; | ||
6150 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
6151 | if (status) | ||
6152 | goto out; | ||
6153 | status = decode_putfh(&xdr); | ||
6154 | if (status) | ||
6155 | goto out; | ||
6156 | status = decode_layoutget(&xdr, rqstp, res); | ||
6157 | out: | ||
6158 | return status; | ||
6159 | } | ||
5802 | #endif /* CONFIG_NFS_V4_1 */ | 6160 | #endif /* CONFIG_NFS_V4_1 */ |
5803 | 6161 | ||
5804 | __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | 6162 | __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, |
@@ -5990,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
5990 | PROC(SEQUENCE, enc_sequence, dec_sequence), | 6348 | PROC(SEQUENCE, enc_sequence, dec_sequence), |
5991 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), | 6349 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), |
5992 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), | 6350 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), |
6351 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | ||
6352 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | ||
5993 | #endif /* CONFIG_NFS_V4_1 */ | 6353 | #endif /* CONFIG_NFS_V4_1 */ |
5994 | }; | 6354 | }; |
5995 | 6355 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c new file mode 100644 index 000000000000..db773428f95f --- /dev/null +++ b/fs/nfs/pnfs.c | |||
@@ -0,0 +1,783 @@ | |||
1 | /* | ||
2 | * pNFS functions to call and manage layout drivers. | ||
3 | * | ||
4 | * Copyright (c) 2002 [year of first publication] | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * | ||
10 | * Permission is granted to use, copy, create derivative works, and | ||
11 | * redistribute this software and such derivative works for any purpose, | ||
12 | * so long as the name of the University of Michigan is not used in | ||
13 | * any advertising or publicity pertaining to the use or distribution | ||
14 | * of this software without specific, written prior authorization. If | ||
15 | * the above copyright notice or any other identification of the | ||
16 | * University of Michigan is included in any copy of any portion of | ||
17 | * this software, then the disclaimer below must also be included. | ||
18 | * | ||
19 | * This software is provided as is, without representation or warranty | ||
20 | * of any kind either express or implied, including without limitation | ||
21 | * the implied warranties of merchantability, fitness for a particular | ||
22 | * purpose, or noninfringement. The Regents of the University of | ||
23 | * Michigan shall not be liable for any damages, including special, | ||
24 | * indirect, incidental, or consequential damages, with respect to any | ||
25 | * claim arising out of or in connection with the use of the software, | ||
26 | * even if it has been or is hereafter advised of the possibility of | ||
27 | * such damages. | ||
28 | */ | ||
29 | |||
30 | #include <linux/nfs_fs.h> | ||
31 | #include "internal.h" | ||
32 | #include "pnfs.h" | ||
33 | |||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS | ||
35 | |||
36 | /* Locking: | ||
37 | * | ||
38 | * pnfs_spinlock: | ||
39 | * protects pnfs_modules_tbl. | ||
40 | */ | ||
41 | static DEFINE_SPINLOCK(pnfs_spinlock); | ||
42 | |||
43 | /* | ||
44 | * pnfs_modules_tbl holds all pnfs modules | ||
45 | */ | ||
46 | static LIST_HEAD(pnfs_modules_tbl); | ||
47 | |||
48 | /* Return the registered pnfs layout driver module matching given id */ | ||
49 | static struct pnfs_layoutdriver_type * | ||
50 | find_pnfs_driver_locked(u32 id) | ||
51 | { | ||
52 | struct pnfs_layoutdriver_type *local; | ||
53 | |||
54 | list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) | ||
55 | if (local->id == id) | ||
56 | goto out; | ||
57 | local = NULL; | ||
58 | out: | ||
59 | dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); | ||
60 | return local; | ||
61 | } | ||
62 | |||
63 | static struct pnfs_layoutdriver_type * | ||
64 | find_pnfs_driver(u32 id) | ||
65 | { | ||
66 | struct pnfs_layoutdriver_type *local; | ||
67 | |||
68 | spin_lock(&pnfs_spinlock); | ||
69 | local = find_pnfs_driver_locked(id); | ||
70 | spin_unlock(&pnfs_spinlock); | ||
71 | return local; | ||
72 | } | ||
73 | |||
74 | void | ||
75 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | ||
76 | { | ||
77 | if (nfss->pnfs_curr_ld) { | ||
78 | nfss->pnfs_curr_ld->clear_layoutdriver(nfss); | ||
79 | module_put(nfss->pnfs_curr_ld->owner); | ||
80 | } | ||
81 | nfss->pnfs_curr_ld = NULL; | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * Try to set the server's pnfs module to the pnfs layout type specified by id. | ||
86 | * Currently only one pNFS layout driver per filesystem is supported. | ||
87 | * | ||
88 | * @id layout type. Zero (illegal layout type) indicates pNFS not in use. | ||
89 | */ | ||
90 | void | ||
91 | set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | ||
92 | { | ||
93 | struct pnfs_layoutdriver_type *ld_type = NULL; | ||
94 | |||
95 | if (id == 0) | ||
96 | goto out_no_driver; | ||
97 | if (!(server->nfs_client->cl_exchange_flags & | ||
98 | (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { | ||
99 | printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, | ||
100 | id, server->nfs_client->cl_exchange_flags); | ||
101 | goto out_no_driver; | ||
102 | } | ||
103 | ld_type = find_pnfs_driver(id); | ||
104 | if (!ld_type) { | ||
105 | request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); | ||
106 | ld_type = find_pnfs_driver(id); | ||
107 | if (!ld_type) { | ||
108 | dprintk("%s: No pNFS module found for %u.\n", | ||
109 | __func__, id); | ||
110 | goto out_no_driver; | ||
111 | } | ||
112 | } | ||
113 | if (!try_module_get(ld_type->owner)) { | ||
114 | dprintk("%s: Could not grab reference on module\n", __func__); | ||
115 | goto out_no_driver; | ||
116 | } | ||
117 | server->pnfs_curr_ld = ld_type; | ||
118 | if (ld_type->set_layoutdriver(server)) { | ||
119 | printk(KERN_ERR | ||
120 | "%s: Error initializing mount point for layout driver %u.\n", | ||
121 | __func__, id); | ||
122 | module_put(ld_type->owner); | ||
123 | goto out_no_driver; | ||
124 | } | ||
125 | dprintk("%s: pNFS module for %u set\n", __func__, id); | ||
126 | return; | ||
127 | |||
128 | out_no_driver: | ||
129 | dprintk("%s: Using NFSv4 I/O\n", __func__); | ||
130 | server->pnfs_curr_ld = NULL; | ||
131 | } | ||
132 | |||
133 | int | ||
134 | pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
135 | { | ||
136 | int status = -EINVAL; | ||
137 | struct pnfs_layoutdriver_type *tmp; | ||
138 | |||
139 | if (ld_type->id == 0) { | ||
140 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); | ||
141 | return status; | ||
142 | } | ||
143 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { | ||
144 | printk(KERN_ERR "%s Layout driver must provide " | ||
145 | "alloc_lseg and free_lseg.\n", __func__); | ||
146 | return status; | ||
147 | } | ||
148 | |||
149 | spin_lock(&pnfs_spinlock); | ||
150 | tmp = find_pnfs_driver_locked(ld_type->id); | ||
151 | if (!tmp) { | ||
152 | list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); | ||
153 | status = 0; | ||
154 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, | ||
155 | ld_type->name); | ||
156 | } else { | ||
157 | printk(KERN_ERR "%s Module with id %d already loaded!\n", | ||
158 | __func__, ld_type->id); | ||
159 | } | ||
160 | spin_unlock(&pnfs_spinlock); | ||
161 | |||
162 | return status; | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); | ||
165 | |||
166 | void | ||
167 | pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
168 | { | ||
169 | dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); | ||
170 | spin_lock(&pnfs_spinlock); | ||
171 | list_del(&ld_type->pnfs_tblid); | ||
172 | spin_unlock(&pnfs_spinlock); | ||
173 | } | ||
174 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | ||
175 | |||
176 | /* | ||
177 | * pNFS client layout cache | ||
178 | */ | ||
179 | |||
180 | static void | ||
181 | get_layout_hdr_locked(struct pnfs_layout_hdr *lo) | ||
182 | { | ||
183 | assert_spin_locked(&lo->inode->i_lock); | ||
184 | lo->refcount++; | ||
185 | } | ||
186 | |||
187 | static void | ||
188 | put_layout_hdr_locked(struct pnfs_layout_hdr *lo) | ||
189 | { | ||
190 | assert_spin_locked(&lo->inode->i_lock); | ||
191 | BUG_ON(lo->refcount == 0); | ||
192 | |||
193 | lo->refcount--; | ||
194 | if (!lo->refcount) { | ||
195 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | ||
196 | BUG_ON(!list_empty(&lo->layouts)); | ||
197 | NFS_I(lo->inode)->layout = NULL; | ||
198 | kfree(lo); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | void | ||
203 | put_layout_hdr(struct inode *inode) | ||
204 | { | ||
205 | spin_lock(&inode->i_lock); | ||
206 | put_layout_hdr_locked(NFS_I(inode)->layout); | ||
207 | spin_unlock(&inode->i_lock); | ||
208 | } | ||
209 | |||
210 | static void | ||
211 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) | ||
212 | { | ||
213 | INIT_LIST_HEAD(&lseg->fi_list); | ||
214 | kref_init(&lseg->kref); | ||
215 | lseg->layout = lo; | ||
216 | } | ||
217 | |||
218 | /* Called without i_lock held, as the free_lseg call may sleep */ | ||
219 | static void | ||
220 | destroy_lseg(struct kref *kref) | ||
221 | { | ||
222 | struct pnfs_layout_segment *lseg = | ||
223 | container_of(kref, struct pnfs_layout_segment, kref); | ||
224 | struct inode *ino = lseg->layout->inode; | ||
225 | |||
226 | dprintk("--> %s\n", __func__); | ||
227 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | ||
228 | /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ | ||
229 | put_layout_hdr(ino); | ||
230 | } | ||
231 | |||
232 | static void | ||
233 | put_lseg(struct pnfs_layout_segment *lseg) | ||
234 | { | ||
235 | if (!lseg) | ||
236 | return; | ||
237 | |||
238 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, | ||
239 | atomic_read(&lseg->kref.refcount)); | ||
240 | kref_put(&lseg->kref, destroy_lseg); | ||
241 | } | ||
242 | |||
243 | static void | ||
244 | pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) | ||
245 | { | ||
246 | struct pnfs_layout_segment *lseg, *next; | ||
247 | struct nfs_client *clp; | ||
248 | |||
249 | dprintk("%s:Begin lo %p\n", __func__, lo); | ||
250 | |||
251 | assert_spin_locked(&lo->inode->i_lock); | ||
252 | list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { | ||
253 | dprintk("%s: freeing lseg %p\n", __func__, lseg); | ||
254 | list_move(&lseg->fi_list, tmp_list); | ||
255 | } | ||
256 | clp = NFS_SERVER(lo->inode)->nfs_client; | ||
257 | spin_lock(&clp->cl_lock); | ||
258 | /* List does not take a reference, so no need for put here */ | ||
259 | list_del_init(&lo->layouts); | ||
260 | spin_unlock(&clp->cl_lock); | ||
261 | write_seqlock(&lo->seqlock); | ||
262 | clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
263 | write_sequnlock(&lo->seqlock); | ||
264 | |||
265 | dprintk("%s:Return\n", __func__); | ||
266 | } | ||
267 | |||
268 | static void | ||
269 | pnfs_free_lseg_list(struct list_head *tmp_list) | ||
270 | { | ||
271 | struct pnfs_layout_segment *lseg; | ||
272 | |||
273 | while (!list_empty(tmp_list)) { | ||
274 | lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, | ||
275 | fi_list); | ||
276 | dprintk("%s calling put_lseg on %p\n", __func__, lseg); | ||
277 | list_del(&lseg->fi_list); | ||
278 | put_lseg(lseg); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | void | ||
283 | pnfs_destroy_layout(struct nfs_inode *nfsi) | ||
284 | { | ||
285 | struct pnfs_layout_hdr *lo; | ||
286 | LIST_HEAD(tmp_list); | ||
287 | |||
288 | spin_lock(&nfsi->vfs_inode.i_lock); | ||
289 | lo = nfsi->layout; | ||
290 | if (lo) { | ||
291 | pnfs_clear_lseg_list(lo, &tmp_list); | ||
292 | /* Matched by refcount set to 1 in alloc_init_layout_hdr */ | ||
293 | put_layout_hdr_locked(lo); | ||
294 | } | ||
295 | spin_unlock(&nfsi->vfs_inode.i_lock); | ||
296 | pnfs_free_lseg_list(&tmp_list); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Called by the state manger to remove all layouts established under an | ||
301 | * expired lease. | ||
302 | */ | ||
303 | void | ||
304 | pnfs_destroy_all_layouts(struct nfs_client *clp) | ||
305 | { | ||
306 | struct pnfs_layout_hdr *lo; | ||
307 | LIST_HEAD(tmp_list); | ||
308 | |||
309 | spin_lock(&clp->cl_lock); | ||
310 | list_splice_init(&clp->cl_layouts, &tmp_list); | ||
311 | spin_unlock(&clp->cl_lock); | ||
312 | |||
313 | while (!list_empty(&tmp_list)) { | ||
314 | lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, | ||
315 | layouts); | ||
316 | dprintk("%s freeing layout for inode %lu\n", __func__, | ||
317 | lo->inode->i_ino); | ||
318 | pnfs_destroy_layout(NFS_I(lo->inode)); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | /* update lo->stateid with new if is more recent | ||
323 | * | ||
324 | * lo->stateid could be the open stateid, in which case we just use what given. | ||
325 | */ | ||
326 | static void | ||
327 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | ||
328 | const nfs4_stateid *new) | ||
329 | { | ||
330 | nfs4_stateid *old = &lo->stateid; | ||
331 | bool overwrite = false; | ||
332 | |||
333 | write_seqlock(&lo->seqlock); | ||
334 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || | ||
335 | memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) | ||
336 | overwrite = true; | ||
337 | else { | ||
338 | u32 oldseq, newseq; | ||
339 | |||
340 | oldseq = be32_to_cpu(old->stateid.seqid); | ||
341 | newseq = be32_to_cpu(new->stateid.seqid); | ||
342 | if ((int)(newseq - oldseq) > 0) | ||
343 | overwrite = true; | ||
344 | } | ||
345 | if (overwrite) | ||
346 | memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); | ||
347 | write_sequnlock(&lo->seqlock); | ||
348 | } | ||
349 | |||
350 | static void | ||
351 | pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, | ||
352 | struct nfs4_state *state) | ||
353 | { | ||
354 | int seq; | ||
355 | |||
356 | dprintk("--> %s\n", __func__); | ||
357 | write_seqlock(&lo->seqlock); | ||
358 | do { | ||
359 | seq = read_seqbegin(&state->seqlock); | ||
360 | memcpy(lo->stateid.data, state->stateid.data, | ||
361 | sizeof(state->stateid.data)); | ||
362 | } while (read_seqretry(&state->seqlock, seq)); | ||
363 | set_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
364 | write_sequnlock(&lo->seqlock); | ||
365 | dprintk("<-- %s\n", __func__); | ||
366 | } | ||
367 | |||
368 | void | ||
369 | pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
370 | struct nfs4_state *open_state) | ||
371 | { | ||
372 | int seq; | ||
373 | |||
374 | dprintk("--> %s\n", __func__); | ||
375 | do { | ||
376 | seq = read_seqbegin(&lo->seqlock); | ||
377 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { | ||
378 | /* This will trigger retry of the read */ | ||
379 | pnfs_layout_from_open_stateid(lo, open_state); | ||
380 | } else | ||
381 | memcpy(dst->data, lo->stateid.data, | ||
382 | sizeof(lo->stateid.data)); | ||
383 | } while (read_seqretry(&lo->seqlock, seq)); | ||
384 | dprintk("<-- %s\n", __func__); | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Get layout from server. | ||
389 | * for now, assume that whole file layouts are requested. | ||
390 | * arg->offset: 0 | ||
391 | * arg->length: all ones | ||
392 | */ | ||
393 | static struct pnfs_layout_segment * | ||
394 | send_layoutget(struct pnfs_layout_hdr *lo, | ||
395 | struct nfs_open_context *ctx, | ||
396 | u32 iomode) | ||
397 | { | ||
398 | struct inode *ino = lo->inode; | ||
399 | struct nfs_server *server = NFS_SERVER(ino); | ||
400 | struct nfs4_layoutget *lgp; | ||
401 | struct pnfs_layout_segment *lseg = NULL; | ||
402 | |||
403 | dprintk("--> %s\n", __func__); | ||
404 | |||
405 | BUG_ON(ctx == NULL); | ||
406 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); | ||
407 | if (lgp == NULL) { | ||
408 | put_layout_hdr(lo->inode); | ||
409 | return NULL; | ||
410 | } | ||
411 | lgp->args.minlength = NFS4_MAX_UINT64; | ||
412 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | ||
413 | lgp->args.range.iomode = iomode; | ||
414 | lgp->args.range.offset = 0; | ||
415 | lgp->args.range.length = NFS4_MAX_UINT64; | ||
416 | lgp->args.type = server->pnfs_curr_ld->id; | ||
417 | lgp->args.inode = ino; | ||
418 | lgp->args.ctx = get_nfs_open_context(ctx); | ||
419 | lgp->lsegpp = &lseg; | ||
420 | |||
421 | /* Synchronously retrieve layout information from server and | ||
422 | * store in lseg. | ||
423 | */ | ||
424 | nfs4_proc_layoutget(lgp); | ||
425 | if (!lseg) { | ||
426 | /* remember that LAYOUTGET failed and suspend trying */ | ||
427 | set_bit(lo_fail_bit(iomode), &lo->state); | ||
428 | } | ||
429 | return lseg; | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * Compare two layout segments for sorting into layout cache. | ||
434 | * We want to preferentially return RW over RO layouts, so ensure those | ||
435 | * are seen first. | ||
436 | */ | ||
437 | static s64 | ||
438 | cmp_layout(u32 iomode1, u32 iomode2) | ||
439 | { | ||
440 | /* read > read/write */ | ||
441 | return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); | ||
442 | } | ||
443 | |||
444 | static void | ||
445 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, | ||
446 | struct pnfs_layout_segment *lseg) | ||
447 | { | ||
448 | struct pnfs_layout_segment *lp; | ||
449 | int found = 0; | ||
450 | |||
451 | dprintk("%s:Begin\n", __func__); | ||
452 | |||
453 | assert_spin_locked(&lo->inode->i_lock); | ||
454 | if (list_empty(&lo->segs)) { | ||
455 | struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client; | ||
456 | |||
457 | spin_lock(&clp->cl_lock); | ||
458 | BUG_ON(!list_empty(&lo->layouts)); | ||
459 | list_add_tail(&lo->layouts, &clp->cl_layouts); | ||
460 | spin_unlock(&clp->cl_lock); | ||
461 | } | ||
462 | list_for_each_entry(lp, &lo->segs, fi_list) { | ||
463 | if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0) | ||
464 | continue; | ||
465 | list_add_tail(&lseg->fi_list, &lp->fi_list); | ||
466 | dprintk("%s: inserted lseg %p " | ||
467 | "iomode %d offset %llu length %llu before " | ||
468 | "lp %p iomode %d offset %llu length %llu\n", | ||
469 | __func__, lseg, lseg->range.iomode, | ||
470 | lseg->range.offset, lseg->range.length, | ||
471 | lp, lp->range.iomode, lp->range.offset, | ||
472 | lp->range.length); | ||
473 | found = 1; | ||
474 | break; | ||
475 | } | ||
476 | if (!found) { | ||
477 | list_add_tail(&lseg->fi_list, &lo->segs); | ||
478 | dprintk("%s: inserted lseg %p " | ||
479 | "iomode %d offset %llu length %llu at tail\n", | ||
480 | __func__, lseg, lseg->range.iomode, | ||
481 | lseg->range.offset, lseg->range.length); | ||
482 | } | ||
483 | get_layout_hdr_locked(lo); | ||
484 | |||
485 | dprintk("%s:Return\n", __func__); | ||
486 | } | ||
487 | |||
488 | static struct pnfs_layout_hdr * | ||
489 | alloc_init_layout_hdr(struct inode *ino) | ||
490 | { | ||
491 | struct pnfs_layout_hdr *lo; | ||
492 | |||
493 | lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); | ||
494 | if (!lo) | ||
495 | return NULL; | ||
496 | lo->refcount = 1; | ||
497 | INIT_LIST_HEAD(&lo->layouts); | ||
498 | INIT_LIST_HEAD(&lo->segs); | ||
499 | seqlock_init(&lo->seqlock); | ||
500 | lo->inode = ino; | ||
501 | return lo; | ||
502 | } | ||
503 | |||
504 | static struct pnfs_layout_hdr * | ||
505 | pnfs_find_alloc_layout(struct inode *ino) | ||
506 | { | ||
507 | struct nfs_inode *nfsi = NFS_I(ino); | ||
508 | struct pnfs_layout_hdr *new = NULL; | ||
509 | |||
510 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); | ||
511 | |||
512 | assert_spin_locked(&ino->i_lock); | ||
513 | if (nfsi->layout) | ||
514 | return nfsi->layout; | ||
515 | |||
516 | spin_unlock(&ino->i_lock); | ||
517 | new = alloc_init_layout_hdr(ino); | ||
518 | spin_lock(&ino->i_lock); | ||
519 | |||
520 | if (likely(nfsi->layout == NULL)) /* Won the race? */ | ||
521 | nfsi->layout = new; | ||
522 | else | ||
523 | kfree(new); | ||
524 | return nfsi->layout; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * iomode matching rules: | ||
529 | * iomode lseg match | ||
530 | * ----- ----- ----- | ||
531 | * ANY READ true | ||
532 | * ANY RW true | ||
533 | * RW READ false | ||
534 | * RW RW true | ||
535 | * READ READ true | ||
536 | * READ RW true | ||
537 | */ | ||
538 | static int | ||
539 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | ||
540 | { | ||
541 | return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * lookup range in layout | ||
546 | */ | ||
547 | static struct pnfs_layout_segment * | ||
548 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) | ||
549 | { | ||
550 | struct pnfs_layout_segment *lseg, *ret = NULL; | ||
551 | |||
552 | dprintk("%s:Begin\n", __func__); | ||
553 | |||
554 | assert_spin_locked(&lo->inode->i_lock); | ||
555 | list_for_each_entry(lseg, &lo->segs, fi_list) { | ||
556 | if (is_matching_lseg(lseg, iomode)) { | ||
557 | ret = lseg; | ||
558 | break; | ||
559 | } | ||
560 | if (cmp_layout(iomode, lseg->range.iomode) > 0) | ||
561 | break; | ||
562 | } | ||
563 | |||
564 | dprintk("%s:Return lseg %p ref %d\n", | ||
565 | __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); | ||
566 | return ret; | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Layout segment is retreived from the server if not cached. | ||
571 | * The appropriate layout segment is referenced and returned to the caller. | ||
572 | */ | ||
573 | struct pnfs_layout_segment * | ||
574 | pnfs_update_layout(struct inode *ino, | ||
575 | struct nfs_open_context *ctx, | ||
576 | enum pnfs_iomode iomode) | ||
577 | { | ||
578 | struct nfs_inode *nfsi = NFS_I(ino); | ||
579 | struct pnfs_layout_hdr *lo; | ||
580 | struct pnfs_layout_segment *lseg = NULL; | ||
581 | |||
582 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | ||
583 | return NULL; | ||
584 | spin_lock(&ino->i_lock); | ||
585 | lo = pnfs_find_alloc_layout(ino); | ||
586 | if (lo == NULL) { | ||
587 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); | ||
588 | goto out_unlock; | ||
589 | } | ||
590 | |||
591 | /* Check to see if the layout for the given range already exists */ | ||
592 | lseg = pnfs_has_layout(lo, iomode); | ||
593 | if (lseg) { | ||
594 | dprintk("%s: Using cached lseg %p for iomode %d)\n", | ||
595 | __func__, lseg, iomode); | ||
596 | goto out_unlock; | ||
597 | } | ||
598 | |||
599 | /* if LAYOUTGET already failed once we don't try again */ | ||
600 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) | ||
601 | goto out_unlock; | ||
602 | |||
603 | get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ | ||
604 | spin_unlock(&ino->i_lock); | ||
605 | |||
606 | lseg = send_layoutget(lo, ctx, iomode); | ||
607 | out: | ||
608 | dprintk("%s end, state 0x%lx lseg %p\n", __func__, | ||
609 | nfsi->layout->state, lseg); | ||
610 | return lseg; | ||
611 | out_unlock: | ||
612 | spin_unlock(&ino->i_lock); | ||
613 | goto out; | ||
614 | } | ||
615 | |||
616 | int | ||
617 | pnfs_layout_process(struct nfs4_layoutget *lgp) | ||
618 | { | ||
619 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | ||
620 | struct nfs4_layoutget_res *res = &lgp->res; | ||
621 | struct pnfs_layout_segment *lseg; | ||
622 | struct inode *ino = lo->inode; | ||
623 | int status = 0; | ||
624 | |||
625 | /* Inject layout blob into I/O device driver */ | ||
626 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); | ||
627 | if (!lseg || IS_ERR(lseg)) { | ||
628 | if (!lseg) | ||
629 | status = -ENOMEM; | ||
630 | else | ||
631 | status = PTR_ERR(lseg); | ||
632 | dprintk("%s: Could not allocate layout: error %d\n", | ||
633 | __func__, status); | ||
634 | goto out; | ||
635 | } | ||
636 | |||
637 | spin_lock(&ino->i_lock); | ||
638 | init_lseg(lo, lseg); | ||
639 | lseg->range = res->range; | ||
640 | *lgp->lsegpp = lseg; | ||
641 | pnfs_insert_layout(lo, lseg); | ||
642 | |||
643 | /* Done processing layoutget. Set the layout stateid */ | ||
644 | pnfs_set_layout_stateid(lo, &res->stateid); | ||
645 | spin_unlock(&ino->i_lock); | ||
646 | out: | ||
647 | return status; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * Device ID cache. Currently supports one layout type per struct nfs_client. | ||
652 | * Add layout type to the lookup key to expand to support multiple types. | ||
653 | */ | ||
654 | int | ||
655 | pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, | ||
656 | void (*free_callback)(struct pnfs_deviceid_node *)) | ||
657 | { | ||
658 | struct pnfs_deviceid_cache *c; | ||
659 | |||
660 | c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); | ||
661 | if (!c) | ||
662 | return -ENOMEM; | ||
663 | spin_lock(&clp->cl_lock); | ||
664 | if (clp->cl_devid_cache != NULL) { | ||
665 | atomic_inc(&clp->cl_devid_cache->dc_ref); | ||
666 | dprintk("%s [kref [%d]]\n", __func__, | ||
667 | atomic_read(&clp->cl_devid_cache->dc_ref)); | ||
668 | kfree(c); | ||
669 | } else { | ||
670 | /* kzalloc initializes hlists */ | ||
671 | spin_lock_init(&c->dc_lock); | ||
672 | atomic_set(&c->dc_ref, 1); | ||
673 | c->dc_free_callback = free_callback; | ||
674 | clp->cl_devid_cache = c; | ||
675 | dprintk("%s [new]\n", __func__); | ||
676 | } | ||
677 | spin_unlock(&clp->cl_lock); | ||
678 | return 0; | ||
679 | } | ||
680 | EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); | ||
681 | |||
682 | /* | ||
683 | * Called from pnfs_layoutdriver_type->free_lseg | ||
684 | * last layout segment reference frees deviceid | ||
685 | */ | ||
686 | void | ||
687 | pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
688 | struct pnfs_deviceid_node *devid) | ||
689 | { | ||
690 | struct nfs4_deviceid *id = &devid->de_id; | ||
691 | struct pnfs_deviceid_node *d; | ||
692 | struct hlist_node *n; | ||
693 | long h = nfs4_deviceid_hash(id); | ||
694 | |||
695 | dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); | ||
696 | if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) | ||
697 | return; | ||
698 | |||
699 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) | ||
700 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
701 | hlist_del_rcu(&d->de_node); | ||
702 | spin_unlock(&c->dc_lock); | ||
703 | synchronize_rcu(); | ||
704 | c->dc_free_callback(devid); | ||
705 | return; | ||
706 | } | ||
707 | spin_unlock(&c->dc_lock); | ||
708 | /* Why wasn't it found in the list? */ | ||
709 | BUG(); | ||
710 | } | ||
711 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid); | ||
712 | |||
713 | /* Find and reference a deviceid */ | ||
714 | struct pnfs_deviceid_node * | ||
715 | pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) | ||
716 | { | ||
717 | struct pnfs_deviceid_node *d; | ||
718 | struct hlist_node *n; | ||
719 | long hash = nfs4_deviceid_hash(id); | ||
720 | |||
721 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
722 | rcu_read_lock(); | ||
723 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { | ||
724 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
725 | if (!atomic_inc_not_zero(&d->de_ref)) { | ||
726 | goto fail; | ||
727 | } else { | ||
728 | rcu_read_unlock(); | ||
729 | return d; | ||
730 | } | ||
731 | } | ||
732 | } | ||
733 | fail: | ||
734 | rcu_read_unlock(); | ||
735 | return NULL; | ||
736 | } | ||
737 | EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); | ||
738 | |||
739 | /* | ||
740 | * Add a deviceid to the cache. | ||
741 | * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new | ||
742 | */ | ||
743 | struct pnfs_deviceid_node * | ||
744 | pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) | ||
745 | { | ||
746 | struct pnfs_deviceid_node *d; | ||
747 | long hash = nfs4_deviceid_hash(&new->de_id); | ||
748 | |||
749 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
750 | spin_lock(&c->dc_lock); | ||
751 | d = pnfs_find_get_deviceid(c, &new->de_id); | ||
752 | if (d) { | ||
753 | spin_unlock(&c->dc_lock); | ||
754 | dprintk("%s [discard]\n", __func__); | ||
755 | c->dc_free_callback(new); | ||
756 | return d; | ||
757 | } | ||
758 | INIT_HLIST_NODE(&new->de_node); | ||
759 | atomic_set(&new->de_ref, 1); | ||
760 | hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); | ||
761 | spin_unlock(&c->dc_lock); | ||
762 | dprintk("%s [new]\n", __func__); | ||
763 | return new; | ||
764 | } | ||
765 | EXPORT_SYMBOL_GPL(pnfs_add_deviceid); | ||
766 | |||
767 | void | ||
768 | pnfs_put_deviceid_cache(struct nfs_client *clp) | ||
769 | { | ||
770 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; | ||
771 | |||
772 | dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); | ||
773 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { | ||
774 | int i; | ||
775 | /* Verify cache is empty */ | ||
776 | for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) | ||
777 | BUG_ON(!hlist_empty(&local->dc_deviceids[i])); | ||
778 | clp->cl_devid_cache = NULL; | ||
779 | spin_unlock(&clp->cl_lock); | ||
780 | kfree(local); | ||
781 | } | ||
782 | } | ||
783 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h new file mode 100644 index 000000000000..e12367d50489 --- /dev/null +++ b/fs/nfs/pnfs.h | |||
@@ -0,0 +1,189 @@ | |||
1 | /* | ||
2 | * pNFS client data structures. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * | ||
10 | * Permission is granted to use, copy, create derivative works, and | ||
11 | * redistribute this software and such derivative works for any purpose, | ||
12 | * so long as the name of the University of Michigan is not used in | ||
13 | * any advertising or publicity pertaining to the use or distribution | ||
14 | * of this software without specific, written prior authorization. If | ||
15 | * the above copyright notice or any other identification of the | ||
16 | * University of Michigan is included in any copy of any portion of | ||
17 | * this software, then the disclaimer below must also be included. | ||
18 | * | ||
19 | * This software is provided as is, without representation or warranty | ||
20 | * of any kind either express or implied, including without limitation | ||
21 | * the implied warranties of merchantability, fitness for a particular | ||
22 | * purpose, or noninfringement. The Regents of the University of | ||
23 | * Michigan shall not be liable for any damages, including special, | ||
24 | * indirect, incidental, or consequential damages, with respect to any | ||
25 | * claim arising out of or in connection with the use of the software, | ||
26 | * even if it has been or is hereafter advised of the possibility of | ||
27 | * such damages. | ||
28 | */ | ||
29 | |||
30 | #ifndef FS_NFS_PNFS_H | ||
31 | #define FS_NFS_PNFS_H | ||
32 | |||
33 | struct pnfs_layout_segment { | ||
34 | struct list_head fi_list; | ||
35 | struct pnfs_layout_range range; | ||
36 | struct kref kref; | ||
37 | struct pnfs_layout_hdr *layout; | ||
38 | }; | ||
39 | |||
40 | #ifdef CONFIG_NFS_V4_1 | ||
41 | |||
42 | #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" | ||
43 | |||
44 | enum { | ||
45 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ | ||
46 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ | ||
47 | NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */ | ||
48 | }; | ||
49 | |||
50 | /* Per-layout driver specific registration structure */ | ||
51 | struct pnfs_layoutdriver_type { | ||
52 | struct list_head pnfs_tblid; | ||
53 | const u32 id; | ||
54 | const char *name; | ||
55 | struct module *owner; | ||
56 | int (*set_layoutdriver) (struct nfs_server *); | ||
57 | int (*clear_layoutdriver) (struct nfs_server *); | ||
58 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); | ||
59 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | ||
60 | }; | ||
61 | |||
62 | struct pnfs_layout_hdr { | ||
63 | unsigned long refcount; | ||
64 | struct list_head layouts; /* other client layouts */ | ||
65 | struct list_head segs; /* layout segments list */ | ||
66 | seqlock_t seqlock; /* Protects the stateid */ | ||
67 | nfs4_stateid stateid; | ||
68 | unsigned long state; | ||
69 | struct inode *inode; | ||
70 | }; | ||
71 | |||
72 | struct pnfs_device { | ||
73 | struct nfs4_deviceid dev_id; | ||
74 | unsigned int layout_type; | ||
75 | unsigned int mincount; | ||
76 | struct page **pages; | ||
77 | void *area; | ||
78 | unsigned int pgbase; | ||
79 | unsigned int pglen; | ||
80 | }; | ||
81 | |||
82 | /* | ||
83 | * Device ID RCU cache. A device ID is unique per client ID and layout type. | ||
84 | */ | ||
85 | #define NFS4_DEVICE_ID_HASH_BITS 5 | ||
86 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | ||
87 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | ||
88 | |||
89 | static inline u32 | ||
90 | nfs4_deviceid_hash(struct nfs4_deviceid *id) | ||
91 | { | ||
92 | unsigned char *cptr = (unsigned char *)id->data; | ||
93 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
94 | u32 x = 0; | ||
95 | |||
96 | while (nbytes--) { | ||
97 | x *= 37; | ||
98 | x += *cptr++; | ||
99 | } | ||
100 | return x & NFS4_DEVICE_ID_HASH_MASK; | ||
101 | } | ||
102 | |||
103 | struct pnfs_deviceid_node { | ||
104 | struct hlist_node de_node; | ||
105 | struct nfs4_deviceid de_id; | ||
106 | atomic_t de_ref; | ||
107 | }; | ||
108 | |||
109 | struct pnfs_deviceid_cache { | ||
110 | spinlock_t dc_lock; | ||
111 | atomic_t dc_ref; | ||
112 | void (*dc_free_callback)(struct pnfs_deviceid_node *); | ||
113 | struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE]; | ||
114 | }; | ||
115 | |||
116 | extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *, | ||
117 | void (*free_callback)(struct pnfs_deviceid_node *)); | ||
118 | extern void pnfs_put_deviceid_cache(struct nfs_client *); | ||
119 | extern struct pnfs_deviceid_node *pnfs_find_get_deviceid( | ||
120 | struct pnfs_deviceid_cache *, | ||
121 | struct nfs4_deviceid *); | ||
122 | extern struct pnfs_deviceid_node *pnfs_add_deviceid( | ||
123 | struct pnfs_deviceid_cache *, | ||
124 | struct pnfs_deviceid_node *); | ||
125 | extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
126 | struct pnfs_deviceid_node *devid); | ||
127 | |||
128 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | ||
129 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | ||
130 | |||
131 | /* nfs4proc.c */ | ||
132 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | ||
133 | struct pnfs_device *dev); | ||
134 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | ||
135 | |||
136 | /* pnfs.c */ | ||
137 | struct pnfs_layout_segment * | ||
138 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | ||
139 | enum pnfs_iomode access_type); | ||
140 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | ||
141 | void unset_pnfs_layoutdriver(struct nfs_server *); | ||
142 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | ||
143 | void pnfs_destroy_layout(struct nfs_inode *); | ||
144 | void pnfs_destroy_all_layouts(struct nfs_client *); | ||
145 | void put_layout_hdr(struct inode *inode); | ||
146 | void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
147 | struct nfs4_state *open_state); | ||
148 | |||
149 | |||
150 | static inline int lo_fail_bit(u32 iomode) | ||
151 | { | ||
152 | return iomode == IOMODE_RW ? | ||
153 | NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; | ||
154 | } | ||
155 | |||
156 | /* Return true if a layout driver is being used for this mountpoint */ | ||
157 | static inline int pnfs_enabled_sb(struct nfs_server *nfss) | ||
158 | { | ||
159 | return nfss->pnfs_curr_ld != NULL; | ||
160 | } | ||
161 | |||
162 | #else /* CONFIG_NFS_V4_1 */ | ||
163 | |||
164 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | ||
165 | { | ||
166 | } | ||
167 | |||
168 | static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) | ||
169 | { | ||
170 | } | ||
171 | |||
172 | static inline struct pnfs_layout_segment * | ||
173 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | ||
174 | enum pnfs_iomode access_type) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) | ||
180 | { | ||
181 | } | ||
182 | |||
183 | static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | ||
184 | { | ||
185 | } | ||
186 | |||
187 | #endif /* CONFIG_NFS_V4_1 */ | ||
188 | |||
189 | #endif /* FS_NFS_PNFS_H */ | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 79859c81a943..e4b62c6f5a6e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "internal.h" | 25 | #include "internal.h" |
26 | #include "iostat.h" | 26 | #include "iostat.h" |
27 | #include "fscache.h" | 27 | #include "fscache.h" |
28 | #include "pnfs.h" | ||
28 | 29 | ||
29 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 30 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
30 | 31 | ||
@@ -120,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
120 | len = nfs_page_length(page); | 121 | len = nfs_page_length(page); |
121 | if (len == 0) | 122 | if (len == 0) |
122 | return nfs_return_empty_page(page); | 123 | return nfs_return_empty_page(page); |
124 | pnfs_update_layout(inode, ctx, IOMODE_READ); | ||
123 | new = nfs_create_request(ctx, inode, page, 0, len); | 125 | new = nfs_create_request(ctx, inode, page, 0, len); |
124 | if (IS_ERR(new)) { | 126 | if (IS_ERR(new)) { |
125 | unlock_page(page); | 127 | unlock_page(page); |
@@ -624,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
624 | if (ret == 0) | 626 | if (ret == 0) |
625 | goto read_complete; /* all pages were read */ | 627 | goto read_complete; /* all pages were read */ |
626 | 628 | ||
629 | pnfs_update_layout(inode, desc.ctx, IOMODE_READ); | ||
627 | if (rsize < PAGE_CACHE_SIZE) | 630 | if (rsize < PAGE_CACHE_SIZE) |
628 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | 631 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); |
629 | else | 632 | else |