diff options
Diffstat (limited to 'fs/nfs/filelayout/filelayoutdev.c')
-rw-r--r-- | fs/nfs/filelayout/filelayoutdev.c | 843 |
1 files changed, 843 insertions, 0 deletions
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c new file mode 100644 index 000000000000..44bf0140a4c7 --- /dev/null +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -0,0 +1,843 @@ | |||
1 | /* | ||
2 | * Device operations for the pnfs nfs4 file layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * Garth Goodson <Garth.Goodson@netapp.com> | ||
10 | * | ||
11 | * Permission is granted to use, copy, create derivative works, and | ||
12 | * redistribute this software and such derivative works for any purpose, | ||
13 | * so long as the name of the University of Michigan is not used in | ||
14 | * any advertising or publicity pertaining to the use or distribution | ||
15 | * of this software without specific, written prior authorization. If | ||
16 | * the above copyright notice or any other identification of the | ||
17 | * University of Michigan is included in any copy of any portion of | ||
18 | * this software, then the disclaimer below must also be included. | ||
19 | * | ||
20 | * This software is provided as is, without representation or warranty | ||
21 | * of any kind either express or implied, including without limitation | ||
22 | * the implied warranties of merchantability, fitness for a particular | ||
23 | * purpose, or noninfringement. The Regents of the University of | ||
24 | * Michigan shall not be liable for any damages, including special, | ||
25 | * indirect, incidental, or consequential damages, with respect to any | ||
26 | * claim arising out of or in connection with the use of the software, | ||
27 | * even if it has been or is hereafter advised of the possibility of | ||
28 | * such damages. | ||
29 | */ | ||
30 | |||
31 | #include <linux/nfs_fs.h> | ||
32 | #include <linux/vmalloc.h> | ||
33 | #include <linux/module.h> | ||
34 | #include <linux/sunrpc/addr.h> | ||
35 | |||
36 | #include "../internal.h" | ||
37 | #include "../nfs4session.h" | ||
38 | #include "filelayout.h" | ||
39 | |||
40 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
41 | |||
42 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | ||
43 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | ||
44 | |||
45 | /* | ||
46 | * Data server cache | ||
47 | * | ||
48 | * Data servers can be mapped to different device ids. | ||
49 | * nfs4_pnfs_ds reference counting | ||
50 | * - set to 1 on allocation | ||
51 | * - incremented when a device id maps a data server already in the cache. | ||
52 | * - decremented when deviceid is removed from the cache. | ||
53 | */ | ||
54 | static DEFINE_SPINLOCK(nfs4_ds_cache_lock); | ||
55 | static LIST_HEAD(nfs4_data_server_cache); | ||
56 | |||
57 | /* Debug routines */ | ||
58 | void | ||
59 | print_ds(struct nfs4_pnfs_ds *ds) | ||
60 | { | ||
61 | if (ds == NULL) { | ||
62 | printk("%s NULL device\n", __func__); | ||
63 | return; | ||
64 | } | ||
65 | printk(" ds %s\n" | ||
66 | " ref count %d\n" | ||
67 | " client %p\n" | ||
68 | " cl_exchange_flags %x\n", | ||
69 | ds->ds_remotestr, | ||
70 | atomic_read(&ds->ds_count), ds->ds_clp, | ||
71 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | ||
72 | } | ||
73 | |||
74 | static bool | ||
75 | same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) | ||
76 | { | ||
77 | struct sockaddr_in *a, *b; | ||
78 | struct sockaddr_in6 *a6, *b6; | ||
79 | |||
80 | if (addr1->sa_family != addr2->sa_family) | ||
81 | return false; | ||
82 | |||
83 | switch (addr1->sa_family) { | ||
84 | case AF_INET: | ||
85 | a = (struct sockaddr_in *)addr1; | ||
86 | b = (struct sockaddr_in *)addr2; | ||
87 | |||
88 | if (a->sin_addr.s_addr == b->sin_addr.s_addr && | ||
89 | a->sin_port == b->sin_port) | ||
90 | return true; | ||
91 | break; | ||
92 | |||
93 | case AF_INET6: | ||
94 | a6 = (struct sockaddr_in6 *)addr1; | ||
95 | b6 = (struct sockaddr_in6 *)addr2; | ||
96 | |||
97 | /* LINKLOCAL addresses must have matching scope_id */ | ||
98 | if (ipv6_addr_src_scope(&a6->sin6_addr) == | ||
99 | IPV6_ADDR_SCOPE_LINKLOCAL && | ||
100 | a6->sin6_scope_id != b6->sin6_scope_id) | ||
101 | return false; | ||
102 | |||
103 | if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && | ||
104 | a6->sin6_port == b6->sin6_port) | ||
105 | return true; | ||
106 | break; | ||
107 | |||
108 | default: | ||
109 | dprintk("%s: unhandled address family: %u\n", | ||
110 | __func__, addr1->sa_family); | ||
111 | return false; | ||
112 | } | ||
113 | |||
114 | return false; | ||
115 | } | ||
116 | |||
117 | static bool | ||
118 | _same_data_server_addrs_locked(const struct list_head *dsaddrs1, | ||
119 | const struct list_head *dsaddrs2) | ||
120 | { | ||
121 | struct nfs4_pnfs_ds_addr *da1, *da2; | ||
122 | |||
123 | /* step through both lists, comparing as we go */ | ||
124 | for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), | ||
125 | da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); | ||
126 | da1 != NULL && da2 != NULL; | ||
127 | da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), | ||
128 | da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { | ||
129 | if (!same_sockaddr((struct sockaddr *)&da1->da_addr, | ||
130 | (struct sockaddr *)&da2->da_addr)) | ||
131 | return false; | ||
132 | } | ||
133 | if (da1 == NULL && da2 == NULL) | ||
134 | return true; | ||
135 | |||
136 | return false; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Lookup DS by addresses. nfs4_ds_cache_lock is held | ||
141 | */ | ||
142 | static struct nfs4_pnfs_ds * | ||
143 | _data_server_lookup_locked(const struct list_head *dsaddrs) | ||
144 | { | ||
145 | struct nfs4_pnfs_ds *ds; | ||
146 | |||
147 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) | ||
148 | if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) | ||
149 | return ds; | ||
150 | return NULL; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Create an rpc connection to the nfs4_pnfs_ds data server | ||
155 | * Currently only supports IPv4 and IPv6 addresses | ||
156 | */ | ||
157 | static int | ||
158 | nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | ||
159 | { | ||
160 | struct nfs_client *clp = ERR_PTR(-EIO); | ||
161 | struct nfs4_pnfs_ds_addr *da; | ||
162 | int status = 0; | ||
163 | |||
164 | dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, | ||
165 | mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); | ||
166 | |||
167 | list_for_each_entry(da, &ds->ds_addrs, da_node) { | ||
168 | dprintk("%s: DS %s: trying address %s\n", | ||
169 | __func__, ds->ds_remotestr, da->da_remotestr); | ||
170 | |||
171 | clp = nfs4_set_ds_client(mds_srv->nfs_client, | ||
172 | (struct sockaddr *)&da->da_addr, | ||
173 | da->da_addrlen, IPPROTO_TCP, | ||
174 | dataserver_timeo, dataserver_retrans); | ||
175 | if (!IS_ERR(clp)) | ||
176 | break; | ||
177 | } | ||
178 | |||
179 | if (IS_ERR(clp)) { | ||
180 | status = PTR_ERR(clp); | ||
181 | goto out; | ||
182 | } | ||
183 | |||
184 | status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); | ||
185 | if (status) | ||
186 | goto out_put; | ||
187 | |||
188 | smp_wmb(); | ||
189 | ds->ds_clp = clp; | ||
190 | dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); | ||
191 | out: | ||
192 | return status; | ||
193 | out_put: | ||
194 | nfs_put_client(clp); | ||
195 | goto out; | ||
196 | } | ||
197 | |||
198 | static void | ||
199 | destroy_ds(struct nfs4_pnfs_ds *ds) | ||
200 | { | ||
201 | struct nfs4_pnfs_ds_addr *da; | ||
202 | |||
203 | dprintk("--> %s\n", __func__); | ||
204 | ifdebug(FACILITY) | ||
205 | print_ds(ds); | ||
206 | |||
207 | if (ds->ds_clp) | ||
208 | nfs_put_client(ds->ds_clp); | ||
209 | |||
210 | while (!list_empty(&ds->ds_addrs)) { | ||
211 | da = list_first_entry(&ds->ds_addrs, | ||
212 | struct nfs4_pnfs_ds_addr, | ||
213 | da_node); | ||
214 | list_del_init(&da->da_node); | ||
215 | kfree(da->da_remotestr); | ||
216 | kfree(da); | ||
217 | } | ||
218 | |||
219 | kfree(ds->ds_remotestr); | ||
220 | kfree(ds); | ||
221 | } | ||
222 | |||
223 | void | ||
224 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | ||
225 | { | ||
226 | struct nfs4_pnfs_ds *ds; | ||
227 | int i; | ||
228 | |||
229 | nfs4_print_deviceid(&dsaddr->id_node.deviceid); | ||
230 | |||
231 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
232 | ds = dsaddr->ds_list[i]; | ||
233 | if (ds != NULL) { | ||
234 | if (atomic_dec_and_lock(&ds->ds_count, | ||
235 | &nfs4_ds_cache_lock)) { | ||
236 | list_del_init(&ds->ds_node); | ||
237 | spin_unlock(&nfs4_ds_cache_lock); | ||
238 | destroy_ds(ds); | ||
239 | } | ||
240 | } | ||
241 | } | ||
242 | kfree(dsaddr->stripe_indices); | ||
243 | kfree(dsaddr); | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Create a string with a human readable address and port to avoid | ||
248 | * complicated setup around many dprinks. | ||
249 | */ | ||
250 | static char * | ||
251 | nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
252 | { | ||
253 | struct nfs4_pnfs_ds_addr *da; | ||
254 | char *remotestr; | ||
255 | size_t len; | ||
256 | char *p; | ||
257 | |||
258 | len = 3; /* '{', '}' and eol */ | ||
259 | list_for_each_entry(da, dsaddrs, da_node) { | ||
260 | len += strlen(da->da_remotestr) + 1; /* string plus comma */ | ||
261 | } | ||
262 | |||
263 | remotestr = kzalloc(len, gfp_flags); | ||
264 | if (!remotestr) | ||
265 | return NULL; | ||
266 | |||
267 | p = remotestr; | ||
268 | *(p++) = '{'; | ||
269 | len--; | ||
270 | list_for_each_entry(da, dsaddrs, da_node) { | ||
271 | size_t ll = strlen(da->da_remotestr); | ||
272 | |||
273 | if (ll > len) | ||
274 | goto out_err; | ||
275 | |||
276 | memcpy(p, da->da_remotestr, ll); | ||
277 | p += ll; | ||
278 | len -= ll; | ||
279 | |||
280 | if (len < 1) | ||
281 | goto out_err; | ||
282 | (*p++) = ','; | ||
283 | len--; | ||
284 | } | ||
285 | if (len < 2) | ||
286 | goto out_err; | ||
287 | *(p++) = '}'; | ||
288 | *p = '\0'; | ||
289 | return remotestr; | ||
290 | out_err: | ||
291 | kfree(remotestr); | ||
292 | return NULL; | ||
293 | } | ||
294 | |||
295 | static struct nfs4_pnfs_ds * | ||
296 | nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
297 | { | ||
298 | struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; | ||
299 | char *remotestr; | ||
300 | |||
301 | if (list_empty(dsaddrs)) { | ||
302 | dprintk("%s: no addresses defined\n", __func__); | ||
303 | goto out; | ||
304 | } | ||
305 | |||
306 | ds = kzalloc(sizeof(*ds), gfp_flags); | ||
307 | if (!ds) | ||
308 | goto out; | ||
309 | |||
310 | /* this is only used for debugging, so it's ok if its NULL */ | ||
311 | remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); | ||
312 | |||
313 | spin_lock(&nfs4_ds_cache_lock); | ||
314 | tmp_ds = _data_server_lookup_locked(dsaddrs); | ||
315 | if (tmp_ds == NULL) { | ||
316 | INIT_LIST_HEAD(&ds->ds_addrs); | ||
317 | list_splice_init(dsaddrs, &ds->ds_addrs); | ||
318 | ds->ds_remotestr = remotestr; | ||
319 | atomic_set(&ds->ds_count, 1); | ||
320 | INIT_LIST_HEAD(&ds->ds_node); | ||
321 | ds->ds_clp = NULL; | ||
322 | list_add(&ds->ds_node, &nfs4_data_server_cache); | ||
323 | dprintk("%s add new data server %s\n", __func__, | ||
324 | ds->ds_remotestr); | ||
325 | } else { | ||
326 | kfree(remotestr); | ||
327 | kfree(ds); | ||
328 | atomic_inc(&tmp_ds->ds_count); | ||
329 | dprintk("%s data server %s found, inc'ed ds_count to %d\n", | ||
330 | __func__, tmp_ds->ds_remotestr, | ||
331 | atomic_read(&tmp_ds->ds_count)); | ||
332 | ds = tmp_ds; | ||
333 | } | ||
334 | spin_unlock(&nfs4_ds_cache_lock); | ||
335 | out: | ||
336 | return ds; | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * Currently only supports ipv4, ipv6 and one multi-path address. | ||
341 | */ | ||
342 | static struct nfs4_pnfs_ds_addr * | ||
343 | decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) | ||
344 | { | ||
345 | struct nfs4_pnfs_ds_addr *da = NULL; | ||
346 | char *buf, *portstr; | ||
347 | __be16 port; | ||
348 | int nlen, rlen; | ||
349 | int tmp[2]; | ||
350 | __be32 *p; | ||
351 | char *netid, *match_netid; | ||
352 | size_t len, match_netid_len; | ||
353 | char *startsep = ""; | ||
354 | char *endsep = ""; | ||
355 | |||
356 | |||
357 | /* r_netid */ | ||
358 | p = xdr_inline_decode(streamp, 4); | ||
359 | if (unlikely(!p)) | ||
360 | goto out_err; | ||
361 | nlen = be32_to_cpup(p++); | ||
362 | |||
363 | p = xdr_inline_decode(streamp, nlen); | ||
364 | if (unlikely(!p)) | ||
365 | goto out_err; | ||
366 | |||
367 | netid = kmalloc(nlen+1, gfp_flags); | ||
368 | if (unlikely(!netid)) | ||
369 | goto out_err; | ||
370 | |||
371 | netid[nlen] = '\0'; | ||
372 | memcpy(netid, p, nlen); | ||
373 | |||
374 | /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ | ||
375 | p = xdr_inline_decode(streamp, 4); | ||
376 | if (unlikely(!p)) | ||
377 | goto out_free_netid; | ||
378 | rlen = be32_to_cpup(p); | ||
379 | |||
380 | p = xdr_inline_decode(streamp, rlen); | ||
381 | if (unlikely(!p)) | ||
382 | goto out_free_netid; | ||
383 | |||
384 | /* port is ".ABC.DEF", 8 chars max */ | ||
385 | if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { | ||
386 | dprintk("%s: Invalid address, length %d\n", __func__, | ||
387 | rlen); | ||
388 | goto out_free_netid; | ||
389 | } | ||
390 | buf = kmalloc(rlen + 1, gfp_flags); | ||
391 | if (!buf) { | ||
392 | dprintk("%s: Not enough memory\n", __func__); | ||
393 | goto out_free_netid; | ||
394 | } | ||
395 | buf[rlen] = '\0'; | ||
396 | memcpy(buf, p, rlen); | ||
397 | |||
398 | /* replace port '.' with '-' */ | ||
399 | portstr = strrchr(buf, '.'); | ||
400 | if (!portstr) { | ||
401 | dprintk("%s: Failed finding expected dot in port\n", | ||
402 | __func__); | ||
403 | goto out_free_buf; | ||
404 | } | ||
405 | *portstr = '-'; | ||
406 | |||
407 | /* find '.' between address and port */ | ||
408 | portstr = strrchr(buf, '.'); | ||
409 | if (!portstr) { | ||
410 | dprintk("%s: Failed finding expected dot between address and " | ||
411 | "port\n", __func__); | ||
412 | goto out_free_buf; | ||
413 | } | ||
414 | *portstr = '\0'; | ||
415 | |||
416 | da = kzalloc(sizeof(*da), gfp_flags); | ||
417 | if (unlikely(!da)) | ||
418 | goto out_free_buf; | ||
419 | |||
420 | INIT_LIST_HEAD(&da->da_node); | ||
421 | |||
422 | if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, | ||
423 | sizeof(da->da_addr))) { | ||
424 | dprintk("%s: error parsing address %s\n", __func__, buf); | ||
425 | goto out_free_da; | ||
426 | } | ||
427 | |||
428 | portstr++; | ||
429 | sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); | ||
430 | port = htons((tmp[0] << 8) | (tmp[1])); | ||
431 | |||
432 | switch (da->da_addr.ss_family) { | ||
433 | case AF_INET: | ||
434 | ((struct sockaddr_in *)&da->da_addr)->sin_port = port; | ||
435 | da->da_addrlen = sizeof(struct sockaddr_in); | ||
436 | match_netid = "tcp"; | ||
437 | match_netid_len = 3; | ||
438 | break; | ||
439 | |||
440 | case AF_INET6: | ||
441 | ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; | ||
442 | da->da_addrlen = sizeof(struct sockaddr_in6); | ||
443 | match_netid = "tcp6"; | ||
444 | match_netid_len = 4; | ||
445 | startsep = "["; | ||
446 | endsep = "]"; | ||
447 | break; | ||
448 | |||
449 | default: | ||
450 | dprintk("%s: unsupported address family: %u\n", | ||
451 | __func__, da->da_addr.ss_family); | ||
452 | goto out_free_da; | ||
453 | } | ||
454 | |||
455 | if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { | ||
456 | dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", | ||
457 | __func__, netid, match_netid); | ||
458 | goto out_free_da; | ||
459 | } | ||
460 | |||
461 | /* save human readable address */ | ||
462 | len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; | ||
463 | da->da_remotestr = kzalloc(len, gfp_flags); | ||
464 | |||
465 | /* NULL is ok, only used for dprintk */ | ||
466 | if (da->da_remotestr) | ||
467 | snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, | ||
468 | buf, endsep, ntohs(port)); | ||
469 | |||
470 | dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); | ||
471 | kfree(buf); | ||
472 | kfree(netid); | ||
473 | return da; | ||
474 | |||
475 | out_free_da: | ||
476 | kfree(da); | ||
477 | out_free_buf: | ||
478 | dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); | ||
479 | kfree(buf); | ||
480 | out_free_netid: | ||
481 | kfree(netid); | ||
482 | out_err: | ||
483 | return NULL; | ||
484 | } | ||
485 | |||
486 | /* Decode opaque device data and return the result */ | ||
487 | static struct nfs4_file_layout_dsaddr* | ||
488 | decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | ||
489 | { | ||
490 | int i; | ||
491 | u32 cnt, num; | ||
492 | u8 *indexp; | ||
493 | __be32 *p; | ||
494 | u8 *stripe_indices; | ||
495 | u8 max_stripe_index; | ||
496 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
497 | struct xdr_stream stream; | ||
498 | struct xdr_buf buf; | ||
499 | struct page *scratch; | ||
500 | struct list_head dsaddrs; | ||
501 | struct nfs4_pnfs_ds_addr *da; | ||
502 | |||
503 | /* set up xdr stream */ | ||
504 | scratch = alloc_page(gfp_flags); | ||
505 | if (!scratch) | ||
506 | goto out_err; | ||
507 | |||
508 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); | ||
509 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
510 | |||
511 | /* Get the stripe count (number of stripe index) */ | ||
512 | p = xdr_inline_decode(&stream, 4); | ||
513 | if (unlikely(!p)) | ||
514 | goto out_err_free_scratch; | ||
515 | |||
516 | cnt = be32_to_cpup(p); | ||
517 | dprintk("%s stripe count %d\n", __func__, cnt); | ||
518 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | ||
519 | printk(KERN_WARNING "NFS: %s: stripe count %d greater than " | ||
520 | "supported maximum %d\n", __func__, | ||
521 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | ||
522 | goto out_err_free_scratch; | ||
523 | } | ||
524 | |||
525 | /* read stripe indices */ | ||
526 | stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); | ||
527 | if (!stripe_indices) | ||
528 | goto out_err_free_scratch; | ||
529 | |||
530 | p = xdr_inline_decode(&stream, cnt << 2); | ||
531 | if (unlikely(!p)) | ||
532 | goto out_err_free_stripe_indices; | ||
533 | |||
534 | indexp = &stripe_indices[0]; | ||
535 | max_stripe_index = 0; | ||
536 | for (i = 0; i < cnt; i++) { | ||
537 | *indexp = be32_to_cpup(p++); | ||
538 | max_stripe_index = max(max_stripe_index, *indexp); | ||
539 | indexp++; | ||
540 | } | ||
541 | |||
542 | /* Check the multipath list count */ | ||
543 | p = xdr_inline_decode(&stream, 4); | ||
544 | if (unlikely(!p)) | ||
545 | goto out_err_free_stripe_indices; | ||
546 | |||
547 | num = be32_to_cpup(p); | ||
548 | dprintk("%s ds_num %u\n", __func__, num); | ||
549 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | ||
550 | printk(KERN_WARNING "NFS: %s: multipath count %d greater than " | ||
551 | "supported maximum %d\n", __func__, | ||
552 | num, NFS4_PNFS_MAX_MULTI_CNT); | ||
553 | goto out_err_free_stripe_indices; | ||
554 | } | ||
555 | |||
556 | /* validate stripe indices are all < num */ | ||
557 | if (max_stripe_index >= num) { | ||
558 | printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", | ||
559 | __func__, max_stripe_index, num); | ||
560 | goto out_err_free_stripe_indices; | ||
561 | } | ||
562 | |||
563 | dsaddr = kzalloc(sizeof(*dsaddr) + | ||
564 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | ||
565 | gfp_flags); | ||
566 | if (!dsaddr) | ||
567 | goto out_err_free_stripe_indices; | ||
568 | |||
569 | dsaddr->stripe_count = cnt; | ||
570 | dsaddr->stripe_indices = stripe_indices; | ||
571 | stripe_indices = NULL; | ||
572 | dsaddr->ds_num = num; | ||
573 | nfs4_init_deviceid_node(&dsaddr->id_node, | ||
574 | NFS_SERVER(ino)->pnfs_curr_ld, | ||
575 | NFS_SERVER(ino)->nfs_client, | ||
576 | &pdev->dev_id); | ||
577 | |||
578 | INIT_LIST_HEAD(&dsaddrs); | ||
579 | |||
580 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
581 | int j; | ||
582 | u32 mp_count; | ||
583 | |||
584 | p = xdr_inline_decode(&stream, 4); | ||
585 | if (unlikely(!p)) | ||
586 | goto out_err_free_deviceid; | ||
587 | |||
588 | mp_count = be32_to_cpup(p); /* multipath count */ | ||
589 | for (j = 0; j < mp_count; j++) { | ||
590 | da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, | ||
591 | &stream, gfp_flags); | ||
592 | if (da) | ||
593 | list_add_tail(&da->da_node, &dsaddrs); | ||
594 | } | ||
595 | if (list_empty(&dsaddrs)) { | ||
596 | dprintk("%s: no suitable DS addresses found\n", | ||
597 | __func__); | ||
598 | goto out_err_free_deviceid; | ||
599 | } | ||
600 | |||
601 | dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | ||
602 | if (!dsaddr->ds_list[i]) | ||
603 | goto out_err_drain_dsaddrs; | ||
604 | |||
605 | /* If DS was already in cache, free ds addrs */ | ||
606 | while (!list_empty(&dsaddrs)) { | ||
607 | da = list_first_entry(&dsaddrs, | ||
608 | struct nfs4_pnfs_ds_addr, | ||
609 | da_node); | ||
610 | list_del_init(&da->da_node); | ||
611 | kfree(da->da_remotestr); | ||
612 | kfree(da); | ||
613 | } | ||
614 | } | ||
615 | |||
616 | __free_page(scratch); | ||
617 | return dsaddr; | ||
618 | |||
619 | out_err_drain_dsaddrs: | ||
620 | while (!list_empty(&dsaddrs)) { | ||
621 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | ||
622 | da_node); | ||
623 | list_del_init(&da->da_node); | ||
624 | kfree(da->da_remotestr); | ||
625 | kfree(da); | ||
626 | } | ||
627 | out_err_free_deviceid: | ||
628 | nfs4_fl_free_deviceid(dsaddr); | ||
629 | /* stripe_indicies was part of dsaddr */ | ||
630 | goto out_err_free_scratch; | ||
631 | out_err_free_stripe_indices: | ||
632 | kfree(stripe_indices); | ||
633 | out_err_free_scratch: | ||
634 | __free_page(scratch); | ||
635 | out_err: | ||
636 | dprintk("%s ERROR: returning NULL\n", __func__); | ||
637 | return NULL; | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * Decode the opaque device specified in 'dev' and add it to the cache of | ||
642 | * available devices. | ||
643 | */ | ||
644 | static struct nfs4_file_layout_dsaddr * | ||
645 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) | ||
646 | { | ||
647 | struct nfs4_deviceid_node *d; | ||
648 | struct nfs4_file_layout_dsaddr *n, *new; | ||
649 | |||
650 | new = decode_device(inode, dev, gfp_flags); | ||
651 | if (!new) { | ||
652 | printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", | ||
653 | __func__); | ||
654 | return NULL; | ||
655 | } | ||
656 | |||
657 | d = nfs4_insert_deviceid_node(&new->id_node); | ||
658 | n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
659 | if (n != new) { | ||
660 | nfs4_fl_free_deviceid(new); | ||
661 | return n; | ||
662 | } | ||
663 | |||
664 | return new; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Retrieve the information for dev_id, add it to the list | ||
669 | * of available devices, and return it. | ||
670 | */ | ||
671 | struct nfs4_file_layout_dsaddr * | ||
672 | filelayout_get_device_info(struct inode *inode, | ||
673 | struct nfs4_deviceid *dev_id, | ||
674 | struct rpc_cred *cred, | ||
675 | gfp_t gfp_flags) | ||
676 | { | ||
677 | struct pnfs_device *pdev = NULL; | ||
678 | u32 max_resp_sz; | ||
679 | int max_pages; | ||
680 | struct page **pages = NULL; | ||
681 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
682 | int rc, i; | ||
683 | struct nfs_server *server = NFS_SERVER(inode); | ||
684 | |||
685 | /* | ||
686 | * Use the session max response size as the basis for setting | ||
687 | * GETDEVICEINFO's maxcount | ||
688 | */ | ||
689 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
690 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
691 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | ||
692 | __func__, inode, max_resp_sz, max_pages); | ||
693 | |||
694 | pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); | ||
695 | if (pdev == NULL) | ||
696 | return NULL; | ||
697 | |||
698 | pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); | ||
699 | if (pages == NULL) { | ||
700 | kfree(pdev); | ||
701 | return NULL; | ||
702 | } | ||
703 | for (i = 0; i < max_pages; i++) { | ||
704 | pages[i] = alloc_page(gfp_flags); | ||
705 | if (!pages[i]) | ||
706 | goto out_free; | ||
707 | } | ||
708 | |||
709 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
710 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | ||
711 | pdev->pages = pages; | ||
712 | pdev->pgbase = 0; | ||
713 | pdev->pglen = max_resp_sz; | ||
714 | pdev->mincount = 0; | ||
715 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
716 | |||
717 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); | ||
718 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
719 | if (rc) | ||
720 | goto out_free; | ||
721 | |||
722 | /* | ||
723 | * Found new device, need to decode it and then add it to the | ||
724 | * list of known devices for this mountpoint. | ||
725 | */ | ||
726 | dsaddr = decode_and_add_device(inode, pdev, gfp_flags); | ||
727 | out_free: | ||
728 | for (i = 0; i < max_pages; i++) | ||
729 | __free_page(pages[i]); | ||
730 | kfree(pages); | ||
731 | kfree(pdev); | ||
732 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | ||
733 | return dsaddr; | ||
734 | } | ||
735 | |||
736 | void | ||
737 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | ||
738 | { | ||
739 | nfs4_put_deviceid_node(&dsaddr->id_node); | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit | ||
744 | * Then: ((res + fsi) % dsaddr->stripe_count) | ||
745 | */ | ||
746 | u32 | ||
747 | nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) | ||
748 | { | ||
749 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | ||
750 | u64 tmp; | ||
751 | |||
752 | tmp = offset - flseg->pattern_offset; | ||
753 | do_div(tmp, flseg->stripe_unit); | ||
754 | tmp += flseg->first_stripe_index; | ||
755 | return do_div(tmp, flseg->dsaddr->stripe_count); | ||
756 | } | ||
757 | |||
758 | u32 | ||
759 | nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) | ||
760 | { | ||
761 | return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; | ||
762 | } | ||
763 | |||
764 | struct nfs_fh * | ||
765 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | ||
766 | { | ||
767 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | ||
768 | u32 i; | ||
769 | |||
770 | if (flseg->stripe_type == STRIPE_SPARSE) { | ||
771 | if (flseg->num_fh == 1) | ||
772 | i = 0; | ||
773 | else if (flseg->num_fh == 0) | ||
774 | /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ | ||
775 | return NULL; | ||
776 | else | ||
777 | i = nfs4_fl_calc_ds_index(lseg, j); | ||
778 | } else | ||
779 | i = j; | ||
780 | return flseg->fh_array[i]; | ||
781 | } | ||
782 | |||
783 | static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) | ||
784 | { | ||
785 | might_sleep(); | ||
786 | wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, | ||
787 | nfs_wait_bit_killable, TASK_KILLABLE); | ||
788 | } | ||
789 | |||
790 | static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) | ||
791 | { | ||
792 | smp_mb__before_atomic(); | ||
793 | clear_bit(NFS4DS_CONNECTING, &ds->ds_state); | ||
794 | smp_mb__after_atomic(); | ||
795 | wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); | ||
796 | } | ||
797 | |||
798 | |||
799 | struct nfs4_pnfs_ds * | ||
800 | nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | ||
801 | { | ||
802 | struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; | ||
803 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | ||
804 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); | ||
805 | struct nfs4_pnfs_ds *ret = ds; | ||
806 | |||
807 | if (ds == NULL) { | ||
808 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | ||
809 | __func__, ds_idx); | ||
810 | filelayout_mark_devid_invalid(devid); | ||
811 | goto out; | ||
812 | } | ||
813 | smp_rmb(); | ||
814 | if (ds->ds_clp) | ||
815 | goto out_test_devid; | ||
816 | |||
817 | if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { | ||
818 | struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); | ||
819 | int err; | ||
820 | |||
821 | err = nfs4_ds_connect(s, ds); | ||
822 | if (err) | ||
823 | nfs4_mark_deviceid_unavailable(devid); | ||
824 | nfs4_clear_ds_conn_bit(ds); | ||
825 | } else { | ||
826 | /* Either ds is connected, or ds is NULL */ | ||
827 | nfs4_wait_ds_connect(ds); | ||
828 | } | ||
829 | out_test_devid: | ||
830 | if (filelayout_test_devid_unavailable(devid)) | ||
831 | ret = NULL; | ||
832 | out: | ||
833 | return ret; | ||
834 | } | ||
835 | |||
836 | module_param(dataserver_retrans, uint, 0644); | ||
837 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | ||
838 | "retries a request before it attempts further " | ||
839 | " recovery action."); | ||
840 | module_param(dataserver_timeo, uint, 0644); | ||
841 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | ||
842 | "NFSv4.1 client waits for a response from a " | ||
843 | " data server before it retries an NFS request."); | ||