diff options
Diffstat (limited to 'fs/nfs/nfs4filelayoutdev.c')
-rw-r--r-- | fs/nfs/nfs4filelayoutdev.c | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c new file mode 100644 index 000000000000..51fe64ace55a --- /dev/null +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -0,0 +1,448 @@ | |||
1 | /* | ||
2 | * Device operations for the pnfs nfs4 file layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * Garth Goodson <Garth.Goodson@netapp.com> | ||
10 | * | ||
11 | * Permission is granted to use, copy, create derivative works, and | ||
12 | * redistribute this software and such derivative works for any purpose, | ||
13 | * so long as the name of the University of Michigan is not used in | ||
14 | * any advertising or publicity pertaining to the use or distribution | ||
15 | * of this software without specific, written prior authorization. If | ||
16 | * the above copyright notice or any other identification of the | ||
17 | * University of Michigan is included in any copy of any portion of | ||
18 | * this software, then the disclaimer below must also be included. | ||
19 | * | ||
20 | * This software is provided as is, without representation or warranty | ||
21 | * of any kind either express or implied, including without limitation | ||
22 | * the implied warranties of merchantability, fitness for a particular | ||
23 | * purpose, or noninfringement. The Regents of the University of | ||
24 | * Michigan shall not be liable for any damages, including special, | ||
25 | * indirect, incidental, or consequential damages, with respect to any | ||
26 | * claim arising out of or in connection with the use of the software, | ||
27 | * even if it has been or is hereafter advised of the possibility of | ||
28 | * such damages. | ||
29 | */ | ||
30 | |||
31 | #include <linux/nfs_fs.h> | ||
32 | #include <linux/vmalloc.h> | ||
33 | |||
34 | #include "internal.h" | ||
35 | #include "nfs4filelayout.h" | ||
36 | |||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
38 | |||
39 | /* | ||
40 | * Data server cache | ||
41 | * | ||
42 | * Data servers can be mapped to different device ids. | ||
43 | * nfs4_pnfs_ds reference counting | ||
44 | * - set to 1 on allocation | ||
45 | * - incremented when a device id maps a data server already in the cache. | ||
46 | * - decremented when deviceid is removed from the cache. | ||
47 | */ | ||
48 | DEFINE_SPINLOCK(nfs4_ds_cache_lock); | ||
49 | static LIST_HEAD(nfs4_data_server_cache); | ||
50 | |||
51 | /* Debug routines */ | ||
52 | void | ||
53 | print_ds(struct nfs4_pnfs_ds *ds) | ||
54 | { | ||
55 | if (ds == NULL) { | ||
56 | printk("%s NULL device\n", __func__); | ||
57 | return; | ||
58 | } | ||
59 | printk(" ip_addr %x port %hu\n" | ||
60 | " ref count %d\n" | ||
61 | " client %p\n" | ||
62 | " cl_exchange_flags %x\n", | ||
63 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | ||
64 | atomic_read(&ds->ds_count), ds->ds_clp, | ||
65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | ||
66 | } | ||
67 | |||
68 | void | ||
69 | print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) | ||
70 | { | ||
71 | int i; | ||
72 | |||
73 | ifdebug(FACILITY) { | ||
74 | printk("%s dsaddr->ds_num %d\n", __func__, | ||
75 | dsaddr->ds_num); | ||
76 | for (i = 0; i < dsaddr->ds_num; i++) | ||
77 | print_ds(dsaddr->ds_list[i]); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | void print_deviceid(struct nfs4_deviceid *id) | ||
82 | { | ||
83 | u32 *p = (u32 *)id; | ||
84 | |||
85 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | ||
86 | p[0], p[1], p[2], p[3]); | ||
87 | } | ||
88 | |||
89 | /* nfs4_ds_cache_lock is held */ | ||
90 | static struct nfs4_pnfs_ds * | ||
91 | _data_server_lookup_locked(u32 ip_addr, u32 port) | ||
92 | { | ||
93 | struct nfs4_pnfs_ds *ds; | ||
94 | |||
95 | dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", | ||
96 | ntohl(ip_addr), ntohs(port)); | ||
97 | |||
98 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | ||
99 | if (ds->ds_ip_addr == ip_addr && | ||
100 | ds->ds_port == port) { | ||
101 | return ds; | ||
102 | } | ||
103 | } | ||
104 | return NULL; | ||
105 | } | ||
106 | |||
107 | static void | ||
108 | destroy_ds(struct nfs4_pnfs_ds *ds) | ||
109 | { | ||
110 | dprintk("--> %s\n", __func__); | ||
111 | ifdebug(FACILITY) | ||
112 | print_ds(ds); | ||
113 | |||
114 | if (ds->ds_clp) | ||
115 | nfs_put_client(ds->ds_clp); | ||
116 | kfree(ds); | ||
117 | } | ||
118 | |||
119 | static void | ||
120 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | ||
121 | { | ||
122 | struct nfs4_pnfs_ds *ds; | ||
123 | int i; | ||
124 | |||
125 | print_deviceid(&dsaddr->deviceid.de_id); | ||
126 | |||
127 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
128 | ds = dsaddr->ds_list[i]; | ||
129 | if (ds != NULL) { | ||
130 | if (atomic_dec_and_lock(&ds->ds_count, | ||
131 | &nfs4_ds_cache_lock)) { | ||
132 | list_del_init(&ds->ds_node); | ||
133 | spin_unlock(&nfs4_ds_cache_lock); | ||
134 | destroy_ds(ds); | ||
135 | } | ||
136 | } | ||
137 | } | ||
138 | kfree(dsaddr->stripe_indices); | ||
139 | kfree(dsaddr); | ||
140 | } | ||
141 | |||
142 | void | ||
143 | nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device) | ||
144 | { | ||
145 | struct nfs4_file_layout_dsaddr *dsaddr = | ||
146 | container_of(device, struct nfs4_file_layout_dsaddr, deviceid); | ||
147 | |||
148 | nfs4_fl_free_deviceid(dsaddr); | ||
149 | } | ||
150 | |||
151 | static struct nfs4_pnfs_ds * | ||
152 | nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) | ||
153 | { | ||
154 | struct nfs4_pnfs_ds *tmp_ds, *ds; | ||
155 | |||
156 | ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); | ||
157 | if (!ds) | ||
158 | goto out; | ||
159 | |||
160 | spin_lock(&nfs4_ds_cache_lock); | ||
161 | tmp_ds = _data_server_lookup_locked(ip_addr, port); | ||
162 | if (tmp_ds == NULL) { | ||
163 | ds->ds_ip_addr = ip_addr; | ||
164 | ds->ds_port = port; | ||
165 | atomic_set(&ds->ds_count, 1); | ||
166 | INIT_LIST_HEAD(&ds->ds_node); | ||
167 | ds->ds_clp = NULL; | ||
168 | list_add(&ds->ds_node, &nfs4_data_server_cache); | ||
169 | dprintk("%s add new data server ip 0x%x\n", __func__, | ||
170 | ds->ds_ip_addr); | ||
171 | } else { | ||
172 | kfree(ds); | ||
173 | atomic_inc(&tmp_ds->ds_count); | ||
174 | dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", | ||
175 | __func__, tmp_ds->ds_ip_addr, | ||
176 | atomic_read(&tmp_ds->ds_count)); | ||
177 | ds = tmp_ds; | ||
178 | } | ||
179 | spin_unlock(&nfs4_ds_cache_lock); | ||
180 | out: | ||
181 | return ds; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Currently only support ipv4, and one multi-path address. | ||
186 | */ | ||
187 | static struct nfs4_pnfs_ds * | ||
188 | decode_and_add_ds(__be32 **pp, struct inode *inode) | ||
189 | { | ||
190 | struct nfs4_pnfs_ds *ds = NULL; | ||
191 | char *buf; | ||
192 | const char *ipend, *pstr; | ||
193 | u32 ip_addr, port; | ||
194 | int nlen, rlen, i; | ||
195 | int tmp[2]; | ||
196 | __be32 *r_netid, *r_addr, *p = *pp; | ||
197 | |||
198 | /* r_netid */ | ||
199 | nlen = be32_to_cpup(p++); | ||
200 | r_netid = p; | ||
201 | p += XDR_QUADLEN(nlen); | ||
202 | |||
203 | /* r_addr */ | ||
204 | rlen = be32_to_cpup(p++); | ||
205 | r_addr = p; | ||
206 | p += XDR_QUADLEN(rlen); | ||
207 | *pp = p; | ||
208 | |||
209 | /* Check that netid is "tcp" */ | ||
210 | if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { | ||
211 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | ||
212 | goto out_err; | ||
213 | } | ||
214 | |||
215 | /* ipv6 length plus port is legal */ | ||
216 | if (rlen > INET6_ADDRSTRLEN + 8) { | ||
217 | dprintk("%s Invalid address, length %d\n", __func__, | ||
218 | rlen); | ||
219 | goto out_err; | ||
220 | } | ||
221 | buf = kmalloc(rlen + 1, GFP_KERNEL); | ||
222 | buf[rlen] = '\0'; | ||
223 | memcpy(buf, r_addr, rlen); | ||
224 | |||
225 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | ||
226 | for (i = 0; i < 2; i++) { | ||
227 | char *res = strrchr(buf, '.'); | ||
228 | *res = '-'; | ||
229 | } | ||
230 | |||
231 | /* Currently only support ipv4 address */ | ||
232 | if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { | ||
233 | dprintk("%s: Only ipv4 addresses supported\n", __func__); | ||
234 | goto out_free; | ||
235 | } | ||
236 | |||
237 | /* port */ | ||
238 | pstr = ipend; | ||
239 | sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); | ||
240 | port = htons((tmp[0] << 8) | (tmp[1])); | ||
241 | |||
242 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port); | ||
243 | dprintk("%s Decoded address and port %s\n", __func__, buf); | ||
244 | out_free: | ||
245 | kfree(buf); | ||
246 | out_err: | ||
247 | return ds; | ||
248 | } | ||
249 | |||
250 | /* Decode opaque device data and return the result */ | ||
251 | static struct nfs4_file_layout_dsaddr* | ||
252 | decode_device(struct inode *ino, struct pnfs_device *pdev) | ||
253 | { | ||
254 | int i, dummy; | ||
255 | u32 cnt, num; | ||
256 | u8 *indexp; | ||
257 | __be32 *p = (__be32 *)pdev->area, *indicesp; | ||
258 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
259 | |||
260 | /* Get the stripe count (number of stripe index) */ | ||
261 | cnt = be32_to_cpup(p++); | ||
262 | dprintk("%s stripe count %d\n", __func__, cnt); | ||
263 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | ||
264 | printk(KERN_WARNING "%s: stripe count %d greater than " | ||
265 | "supported maximum %d\n", __func__, | ||
266 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | ||
267 | goto out_err; | ||
268 | } | ||
269 | |||
270 | /* Check the multipath list count */ | ||
271 | indicesp = p; | ||
272 | p += XDR_QUADLEN(cnt << 2); | ||
273 | num = be32_to_cpup(p++); | ||
274 | dprintk("%s ds_num %u\n", __func__, num); | ||
275 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | ||
276 | printk(KERN_WARNING "%s: multipath count %d greater than " | ||
277 | "supported maximum %d\n", __func__, | ||
278 | num, NFS4_PNFS_MAX_MULTI_CNT); | ||
279 | goto out_err; | ||
280 | } | ||
281 | dsaddr = kzalloc(sizeof(*dsaddr) + | ||
282 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | ||
283 | GFP_KERNEL); | ||
284 | if (!dsaddr) | ||
285 | goto out_err; | ||
286 | |||
287 | dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); | ||
288 | if (!dsaddr->stripe_indices) | ||
289 | goto out_err_free; | ||
290 | |||
291 | dsaddr->stripe_count = cnt; | ||
292 | dsaddr->ds_num = num; | ||
293 | |||
294 | memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); | ||
295 | |||
296 | /* Go back an read stripe indices */ | ||
297 | p = indicesp; | ||
298 | indexp = &dsaddr->stripe_indices[0]; | ||
299 | for (i = 0; i < dsaddr->stripe_count; i++) { | ||
300 | *indexp = be32_to_cpup(p++); | ||
301 | if (*indexp >= num) | ||
302 | goto out_err_free; | ||
303 | indexp++; | ||
304 | } | ||
305 | /* Skip already read multipath list count */ | ||
306 | p++; | ||
307 | |||
308 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
309 | int j; | ||
310 | |||
311 | dummy = be32_to_cpup(p++); /* multipath count */ | ||
312 | if (dummy > 1) { | ||
313 | printk(KERN_WARNING | ||
314 | "%s: Multipath count %d not supported, " | ||
315 | "skipping all greater than 1\n", __func__, | ||
316 | dummy); | ||
317 | } | ||
318 | for (j = 0; j < dummy; j++) { | ||
319 | if (j == 0) { | ||
320 | dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); | ||
321 | if (dsaddr->ds_list[i] == NULL) | ||
322 | goto out_err_free; | ||
323 | } else { | ||
324 | u32 len; | ||
325 | /* skip extra multipath */ | ||
326 | len = be32_to_cpup(p++); | ||
327 | p += XDR_QUADLEN(len); | ||
328 | len = be32_to_cpup(p++); | ||
329 | p += XDR_QUADLEN(len); | ||
330 | continue; | ||
331 | } | ||
332 | } | ||
333 | } | ||
334 | return dsaddr; | ||
335 | |||
336 | out_err_free: | ||
337 | nfs4_fl_free_deviceid(dsaddr); | ||
338 | out_err: | ||
339 | dprintk("%s ERROR: returning NULL\n", __func__); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Decode the opaque device specified in 'dev' | ||
345 | * and add it to the list of available devices. | ||
346 | * If the deviceid is already cached, nfs4_add_deviceid will return | ||
347 | * a pointer to the cached struct and throw away the new. | ||
348 | */ | ||
349 | static struct nfs4_file_layout_dsaddr* | ||
350 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev) | ||
351 | { | ||
352 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
353 | struct pnfs_deviceid_node *d; | ||
354 | |||
355 | dsaddr = decode_device(inode, dev); | ||
356 | if (!dsaddr) { | ||
357 | printk(KERN_WARNING "%s: Could not decode or add device\n", | ||
358 | __func__); | ||
359 | return NULL; | ||
360 | } | ||
361 | |||
362 | d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, | ||
363 | &dsaddr->deviceid); | ||
364 | |||
365 | return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * Retrieve the information for dev_id, add it to the list | ||
370 | * of available devices, and return it. | ||
371 | */ | ||
372 | struct nfs4_file_layout_dsaddr * | ||
373 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | ||
374 | { | ||
375 | struct pnfs_device *pdev = NULL; | ||
376 | u32 max_resp_sz; | ||
377 | int max_pages; | ||
378 | struct page **pages = NULL; | ||
379 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
380 | int rc, i; | ||
381 | struct nfs_server *server = NFS_SERVER(inode); | ||
382 | |||
383 | /* | ||
384 | * Use the session max response size as the basis for setting | ||
385 | * GETDEVICEINFO's maxcount | ||
386 | */ | ||
387 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
388 | max_pages = max_resp_sz >> PAGE_SHIFT; | ||
389 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | ||
390 | __func__, inode, max_resp_sz, max_pages); | ||
391 | |||
392 | pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); | ||
393 | if (pdev == NULL) | ||
394 | return NULL; | ||
395 | |||
396 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); | ||
397 | if (pages == NULL) { | ||
398 | kfree(pdev); | ||
399 | return NULL; | ||
400 | } | ||
401 | for (i = 0; i < max_pages; i++) { | ||
402 | pages[i] = alloc_page(GFP_KERNEL); | ||
403 | if (!pages[i]) | ||
404 | goto out_free; | ||
405 | } | ||
406 | |||
407 | /* set pdev->area */ | ||
408 | pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); | ||
409 | if (!pdev->area) | ||
410 | goto out_free; | ||
411 | |||
412 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
413 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | ||
414 | pdev->pages = pages; | ||
415 | pdev->pgbase = 0; | ||
416 | pdev->pglen = PAGE_SIZE * max_pages; | ||
417 | pdev->mincount = 0; | ||
418 | |||
419 | rc = nfs4_proc_getdeviceinfo(server, pdev); | ||
420 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
421 | if (rc) | ||
422 | goto out_free; | ||
423 | |||
424 | /* | ||
425 | * Found new device, need to decode it and then add it to the | ||
426 | * list of known devices for this mountpoint. | ||
427 | */ | ||
428 | dsaddr = decode_and_add_device(inode, pdev); | ||
429 | out_free: | ||
430 | if (pdev->area != NULL) | ||
431 | vunmap(pdev->area); | ||
432 | for (i = 0; i < max_pages; i++) | ||
433 | __free_page(pages[i]); | ||
434 | kfree(pages); | ||
435 | kfree(pdev); | ||
436 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | ||
437 | return dsaddr; | ||
438 | } | ||
439 | |||
440 | struct nfs4_file_layout_dsaddr * | ||
441 | nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) | ||
442 | { | ||
443 | struct pnfs_deviceid_node *d; | ||
444 | |||
445 | d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); | ||
446 | return (d == NULL) ? NULL : | ||
447 | container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | ||
448 | } | ||