diff options
Diffstat (limited to 'fs/afs/volume.c')
-rw-r--r-- | fs/afs/volume.c | 520 |
1 files changed, 520 insertions, 0 deletions
diff --git a/fs/afs/volume.c b/fs/afs/volume.c new file mode 100644 index 000000000000..0ff4b86476e3 --- /dev/null +++ b/fs/afs/volume.c | |||
@@ -0,0 +1,520 @@ | |||
1 | /* volume.c: AFS volume management | ||
2 | * | ||
3 | * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/fs.h> | ||
17 | #include <linux/pagemap.h> | ||
18 | #include "volume.h" | ||
19 | #include "vnode.h" | ||
20 | #include "cell.h" | ||
21 | #include "cache.h" | ||
22 | #include "cmservice.h" | ||
23 | #include "fsclient.h" | ||
24 | #include "vlclient.h" | ||
25 | #include "internal.h" | ||
26 | |||
27 | #ifdef __KDEBUG | ||
28 | static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; | ||
29 | #endif | ||
30 | |||
31 | #ifdef AFS_CACHING_SUPPORT | ||
32 | static cachefs_match_val_t afs_volume_cache_match(void *target, | ||
33 | const void *entry); | ||
34 | static void afs_volume_cache_update(void *source, void *entry); | ||
35 | |||
36 | struct cachefs_index_def afs_volume_cache_index_def = { | ||
37 | .name = "volume", | ||
38 | .data_size = sizeof(struct afs_cache_vhash), | ||
39 | .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 }, | ||
40 | .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 }, | ||
41 | .match = afs_volume_cache_match, | ||
42 | .update = afs_volume_cache_update, | ||
43 | }; | ||
44 | #endif | ||
45 | |||
46 | /*****************************************************************************/ | ||
47 | /* | ||
48 | * lookup a volume by name | ||
49 | * - this can be one of the following: | ||
50 | * "%[cell:]volume[.]" R/W volume | ||
51 | * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), | ||
52 | * or R/W (rwparent=1) volume | ||
53 | * "%[cell:]volume.readonly" R/O volume | ||
54 | * "#[cell:]volume.readonly" R/O volume | ||
55 | * "%[cell:]volume.backup" Backup volume | ||
56 | * "#[cell:]volume.backup" Backup volume | ||
57 | * | ||
58 | * The cell name is optional, and defaults to the current cell. | ||
59 | * | ||
60 | * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin | ||
61 | * Guide | ||
62 | * - Rule 1: Explicit type suffix forces access of that type or nothing | ||
63 | * (no suffix, then use Rule 2 & 3) | ||
64 | * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W | ||
65 | * if not available | ||
66 | * - Rule 3: If parent volume is R/W, then only mount R/W volume unless | ||
67 | * explicitly told otherwise | ||
68 | */ | ||
69 | int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath, | ||
70 | struct afs_volume **_volume) | ||
71 | { | ||
72 | struct afs_vlocation *vlocation = NULL; | ||
73 | struct afs_volume *volume = NULL; | ||
74 | afs_voltype_t type; | ||
75 | const char *cellname, *volname, *suffix; | ||
76 | char srvtmask; | ||
77 | int force, ret, loop, cellnamesz, volnamesz; | ||
78 | |||
79 | _enter("%s,,%d,", name, rwpath); | ||
80 | |||
81 | if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) { | ||
82 | printk("kAFS: unparsable volume name\n"); | ||
83 | return -EINVAL; | ||
84 | } | ||
85 | |||
86 | /* determine the type of volume we're looking for */ | ||
87 | force = 0; | ||
88 | type = AFSVL_ROVOL; | ||
89 | |||
90 | if (rwpath || name[0] == '%') { | ||
91 | type = AFSVL_RWVOL; | ||
92 | force = 1; | ||
93 | } | ||
94 | |||
95 | suffix = strrchr(name, '.'); | ||
96 | if (suffix) { | ||
97 | if (strcmp(suffix, ".readonly") == 0) { | ||
98 | type = AFSVL_ROVOL; | ||
99 | force = 1; | ||
100 | } | ||
101 | else if (strcmp(suffix, ".backup") == 0) { | ||
102 | type = AFSVL_BACKVOL; | ||
103 | force = 1; | ||
104 | } | ||
105 | else if (suffix[1] == 0) { | ||
106 | } | ||
107 | else { | ||
108 | suffix = NULL; | ||
109 | } | ||
110 | } | ||
111 | |||
112 | /* split the cell and volume names */ | ||
113 | name++; | ||
114 | volname = strchr(name, ':'); | ||
115 | if (volname) { | ||
116 | cellname = name; | ||
117 | cellnamesz = volname - name; | ||
118 | volname++; | ||
119 | } | ||
120 | else { | ||
121 | volname = name; | ||
122 | cellname = NULL; | ||
123 | cellnamesz = 0; | ||
124 | } | ||
125 | |||
126 | volnamesz = suffix ? suffix - volname : strlen(volname); | ||
127 | |||
128 | _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", | ||
129 | cellnamesz, cellnamesz, cellname ?: "", cell, | ||
130 | volnamesz, volnamesz, volname, suffix ?: "-", | ||
131 | type, | ||
132 | force ? " FORCE" : ""); | ||
133 | |||
134 | /* lookup the cell record */ | ||
135 | if (cellname || !cell) { | ||
136 | ret = afs_cell_lookup(cellname, cellnamesz, &cell); | ||
137 | if (ret<0) { | ||
138 | printk("kAFS: unable to lookup cell '%s'\n", | ||
139 | cellname ?: ""); | ||
140 | goto error; | ||
141 | } | ||
142 | } | ||
143 | else { | ||
144 | afs_get_cell(cell); | ||
145 | } | ||
146 | |||
147 | /* lookup the volume location record */ | ||
148 | ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation); | ||
149 | if (ret < 0) | ||
150 | goto error; | ||
151 | |||
152 | /* make the final decision on the type we want */ | ||
153 | ret = -ENOMEDIUM; | ||
154 | if (force && !(vlocation->vldb.vidmask & (1 << type))) | ||
155 | goto error; | ||
156 | |||
157 | srvtmask = 0; | ||
158 | for (loop = 0; loop < vlocation->vldb.nservers; loop++) | ||
159 | srvtmask |= vlocation->vldb.srvtmask[loop]; | ||
160 | |||
161 | if (force) { | ||
162 | if (!(srvtmask & (1 << type))) | ||
163 | goto error; | ||
164 | } | ||
165 | else if (srvtmask & AFS_VOL_VTM_RO) { | ||
166 | type = AFSVL_ROVOL; | ||
167 | } | ||
168 | else if (srvtmask & AFS_VOL_VTM_RW) { | ||
169 | type = AFSVL_RWVOL; | ||
170 | } | ||
171 | else { | ||
172 | goto error; | ||
173 | } | ||
174 | |||
175 | down_write(&cell->vl_sem); | ||
176 | |||
177 | /* is the volume already active? */ | ||
178 | if (vlocation->vols[type]) { | ||
179 | /* yes - re-use it */ | ||
180 | volume = vlocation->vols[type]; | ||
181 | afs_get_volume(volume); | ||
182 | goto success; | ||
183 | } | ||
184 | |||
185 | /* create a new volume record */ | ||
186 | _debug("creating new volume record"); | ||
187 | |||
188 | ret = -ENOMEM; | ||
189 | volume = kmalloc(sizeof(struct afs_volume), GFP_KERNEL); | ||
190 | if (!volume) | ||
191 | goto error_up; | ||
192 | |||
193 | memset(volume, 0, sizeof(struct afs_volume)); | ||
194 | atomic_set(&volume->usage, 1); | ||
195 | volume->type = type; | ||
196 | volume->type_force = force; | ||
197 | volume->cell = cell; | ||
198 | volume->vid = vlocation->vldb.vid[type]; | ||
199 | |||
200 | init_rwsem(&volume->server_sem); | ||
201 | |||
202 | /* look up all the applicable server records */ | ||
203 | for (loop = 0; loop < 8; loop++) { | ||
204 | if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { | ||
205 | ret = afs_server_lookup( | ||
206 | volume->cell, | ||
207 | &vlocation->vldb.servers[loop], | ||
208 | &volume->servers[volume->nservers]); | ||
209 | if (ret < 0) | ||
210 | goto error_discard; | ||
211 | |||
212 | volume->nservers++; | ||
213 | } | ||
214 | } | ||
215 | |||
216 | /* attach the cache and volume location */ | ||
217 | #ifdef AFS_CACHING_SUPPORT | ||
218 | cachefs_acquire_cookie(vlocation->cache, | ||
219 | &afs_vnode_cache_index_def, | ||
220 | volume, | ||
221 | &volume->cache); | ||
222 | #endif | ||
223 | |||
224 | afs_get_vlocation(vlocation); | ||
225 | volume->vlocation = vlocation; | ||
226 | |||
227 | vlocation->vols[type] = volume; | ||
228 | |||
229 | success: | ||
230 | _debug("kAFS selected %s volume %08x", | ||
231 | afs_voltypes[volume->type], volume->vid); | ||
232 | *_volume = volume; | ||
233 | ret = 0; | ||
234 | |||
235 | /* clean up */ | ||
236 | error_up: | ||
237 | up_write(&cell->vl_sem); | ||
238 | error: | ||
239 | afs_put_vlocation(vlocation); | ||
240 | afs_put_cell(cell); | ||
241 | |||
242 | _leave(" = %d (%p)", ret, volume); | ||
243 | return ret; | ||
244 | |||
245 | error_discard: | ||
246 | up_write(&cell->vl_sem); | ||
247 | |||
248 | for (loop = volume->nservers - 1; loop >= 0; loop--) | ||
249 | afs_put_server(volume->servers[loop]); | ||
250 | |||
251 | kfree(volume); | ||
252 | goto error; | ||
253 | } /* end afs_volume_lookup() */ | ||
254 | |||
255 | /*****************************************************************************/ | ||
256 | /* | ||
257 | * destroy a volume record | ||
258 | */ | ||
259 | void afs_put_volume(struct afs_volume *volume) | ||
260 | { | ||
261 | struct afs_vlocation *vlocation; | ||
262 | int loop; | ||
263 | |||
264 | if (!volume) | ||
265 | return; | ||
266 | |||
267 | _enter("%p", volume); | ||
268 | |||
269 | vlocation = volume->vlocation; | ||
270 | |||
271 | /* sanity check */ | ||
272 | BUG_ON(atomic_read(&volume->usage) <= 0); | ||
273 | |||
274 | /* to prevent a race, the decrement and the dequeue must be effectively | ||
275 | * atomic */ | ||
276 | down_write(&vlocation->cell->vl_sem); | ||
277 | |||
278 | if (likely(!atomic_dec_and_test(&volume->usage))) { | ||
279 | up_write(&vlocation->cell->vl_sem); | ||
280 | _leave(""); | ||
281 | return; | ||
282 | } | ||
283 | |||
284 | vlocation->vols[volume->type] = NULL; | ||
285 | |||
286 | up_write(&vlocation->cell->vl_sem); | ||
287 | |||
288 | /* finish cleaning up the volume */ | ||
289 | #ifdef AFS_CACHING_SUPPORT | ||
290 | cachefs_relinquish_cookie(volume->cache, 0); | ||
291 | #endif | ||
292 | afs_put_vlocation(vlocation); | ||
293 | |||
294 | for (loop = volume->nservers - 1; loop >= 0; loop--) | ||
295 | afs_put_server(volume->servers[loop]); | ||
296 | |||
297 | kfree(volume); | ||
298 | |||
299 | _leave(" [destroyed]"); | ||
300 | } /* end afs_put_volume() */ | ||
301 | |||
302 | /*****************************************************************************/ | ||
303 | /* | ||
304 | * pick a server to use to try accessing this volume | ||
305 | * - returns with an elevated usage count on the server chosen | ||
306 | */ | ||
307 | int afs_volume_pick_fileserver(struct afs_volume *volume, | ||
308 | struct afs_server **_server) | ||
309 | { | ||
310 | struct afs_server *server; | ||
311 | int ret, state, loop; | ||
312 | |||
313 | _enter("%s", volume->vlocation->vldb.name); | ||
314 | |||
315 | down_read(&volume->server_sem); | ||
316 | |||
317 | /* handle the no-server case */ | ||
318 | if (volume->nservers == 0) { | ||
319 | ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; | ||
320 | up_read(&volume->server_sem); | ||
321 | _leave(" = %d [no servers]", ret); | ||
322 | return ret; | ||
323 | } | ||
324 | |||
325 | /* basically, just search the list for the first live server and use | ||
326 | * that */ | ||
327 | ret = 0; | ||
328 | for (loop = 0; loop < volume->nservers; loop++) { | ||
329 | server = volume->servers[loop]; | ||
330 | state = server->fs_state; | ||
331 | |||
332 | switch (state) { | ||
333 | /* found an apparently healthy server */ | ||
334 | case 0: | ||
335 | afs_get_server(server); | ||
336 | up_read(&volume->server_sem); | ||
337 | *_server = server; | ||
338 | _leave(" = 0 (picked %08x)", | ||
339 | ntohl(server->addr.s_addr)); | ||
340 | return 0; | ||
341 | |||
342 | case -ENETUNREACH: | ||
343 | if (ret == 0) | ||
344 | ret = state; | ||
345 | break; | ||
346 | |||
347 | case -EHOSTUNREACH: | ||
348 | if (ret == 0 || | ||
349 | ret == -ENETUNREACH) | ||
350 | ret = state; | ||
351 | break; | ||
352 | |||
353 | case -ECONNREFUSED: | ||
354 | if (ret == 0 || | ||
355 | ret == -ENETUNREACH || | ||
356 | ret == -EHOSTUNREACH) | ||
357 | ret = state; | ||
358 | break; | ||
359 | |||
360 | default: | ||
361 | case -EREMOTEIO: | ||
362 | if (ret == 0 || | ||
363 | ret == -ENETUNREACH || | ||
364 | ret == -EHOSTUNREACH || | ||
365 | ret == -ECONNREFUSED) | ||
366 | ret = state; | ||
367 | break; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | /* no available servers | ||
372 | * - TODO: handle the no active servers case better | ||
373 | */ | ||
374 | up_read(&volume->server_sem); | ||
375 | _leave(" = %d", ret); | ||
376 | return ret; | ||
377 | } /* end afs_volume_pick_fileserver() */ | ||
378 | |||
379 | /*****************************************************************************/ | ||
380 | /* | ||
381 | * release a server after use | ||
382 | * - releases the ref on the server struct that was acquired by picking | ||
383 | * - records result of using a particular server to access a volume | ||
384 | * - return 0 to try again, 1 if okay or to issue error | ||
385 | */ | ||
386 | int afs_volume_release_fileserver(struct afs_volume *volume, | ||
387 | struct afs_server *server, | ||
388 | int result) | ||
389 | { | ||
390 | unsigned loop; | ||
391 | |||
392 | _enter("%s,%08x,%d", | ||
393 | volume->vlocation->vldb.name, ntohl(server->addr.s_addr), | ||
394 | result); | ||
395 | |||
396 | switch (result) { | ||
397 | /* success */ | ||
398 | case 0: | ||
399 | server->fs_act_jif = jiffies; | ||
400 | break; | ||
401 | |||
402 | /* the fileserver denied all knowledge of the volume */ | ||
403 | case -ENOMEDIUM: | ||
404 | server->fs_act_jif = jiffies; | ||
405 | down_write(&volume->server_sem); | ||
406 | |||
407 | /* first, find where the server is in the active list (if it | ||
408 | * is) */ | ||
409 | for (loop = 0; loop < volume->nservers; loop++) | ||
410 | if (volume->servers[loop] == server) | ||
411 | goto present; | ||
412 | |||
413 | /* no longer there - may have been discarded by another op */ | ||
414 | goto try_next_server_upw; | ||
415 | |||
416 | present: | ||
417 | volume->nservers--; | ||
418 | memmove(&volume->servers[loop], | ||
419 | &volume->servers[loop + 1], | ||
420 | sizeof(volume->servers[loop]) * | ||
421 | (volume->nservers - loop)); | ||
422 | volume->servers[volume->nservers] = NULL; | ||
423 | afs_put_server(server); | ||
424 | volume->rjservers++; | ||
425 | |||
426 | if (volume->nservers > 0) | ||
427 | /* another server might acknowledge its existence */ | ||
428 | goto try_next_server_upw; | ||
429 | |||
430 | /* handle the case where all the fileservers have rejected the | ||
431 | * volume | ||
432 | * - TODO: try asking the fileservers for volume information | ||
433 | * - TODO: contact the VL server again to see if the volume is | ||
434 | * no longer registered | ||
435 | */ | ||
436 | up_write(&volume->server_sem); | ||
437 | afs_put_server(server); | ||
438 | _leave(" [completely rejected]"); | ||
439 | return 1; | ||
440 | |||
441 | /* problem reaching the server */ | ||
442 | case -ENETUNREACH: | ||
443 | case -EHOSTUNREACH: | ||
444 | case -ECONNREFUSED: | ||
445 | case -ETIMEDOUT: | ||
446 | case -EREMOTEIO: | ||
447 | /* mark the server as dead | ||
448 | * TODO: vary dead timeout depending on error | ||
449 | */ | ||
450 | spin_lock(&server->fs_lock); | ||
451 | if (!server->fs_state) { | ||
452 | server->fs_dead_jif = jiffies + HZ * 10; | ||
453 | server->fs_state = result; | ||
454 | printk("kAFS: SERVER DEAD state=%d\n", result); | ||
455 | } | ||
456 | spin_unlock(&server->fs_lock); | ||
457 | goto try_next_server; | ||
458 | |||
459 | /* miscellaneous error */ | ||
460 | default: | ||
461 | server->fs_act_jif = jiffies; | ||
462 | case -ENOMEM: | ||
463 | case -ENONET: | ||
464 | break; | ||
465 | } | ||
466 | |||
467 | /* tell the caller to accept the result */ | ||
468 | afs_put_server(server); | ||
469 | _leave(""); | ||
470 | return 1; | ||
471 | |||
472 | /* tell the caller to loop around and try the next server */ | ||
473 | try_next_server_upw: | ||
474 | up_write(&volume->server_sem); | ||
475 | try_next_server: | ||
476 | afs_put_server(server); | ||
477 | _leave(" [try next server]"); | ||
478 | return 0; | ||
479 | |||
480 | } /* end afs_volume_release_fileserver() */ | ||
481 | |||
482 | /*****************************************************************************/ | ||
483 | /* | ||
484 | * match a volume hash record stored in the cache | ||
485 | */ | ||
486 | #ifdef AFS_CACHING_SUPPORT | ||
487 | static cachefs_match_val_t afs_volume_cache_match(void *target, | ||
488 | const void *entry) | ||
489 | { | ||
490 | const struct afs_cache_vhash *vhash = entry; | ||
491 | struct afs_volume *volume = target; | ||
492 | |||
493 | _enter("{%u},{%u}", volume->type, vhash->vtype); | ||
494 | |||
495 | if (volume->type == vhash->vtype) { | ||
496 | _leave(" = SUCCESS"); | ||
497 | return CACHEFS_MATCH_SUCCESS; | ||
498 | } | ||
499 | |||
500 | _leave(" = FAILED"); | ||
501 | return CACHEFS_MATCH_FAILED; | ||
502 | } /* end afs_volume_cache_match() */ | ||
503 | #endif | ||
504 | |||
505 | /*****************************************************************************/ | ||
506 | /* | ||
507 | * update a volume hash record stored in the cache | ||
508 | */ | ||
509 | #ifdef AFS_CACHING_SUPPORT | ||
510 | static void afs_volume_cache_update(void *source, void *entry) | ||
511 | { | ||
512 | struct afs_cache_vhash *vhash = entry; | ||
513 | struct afs_volume *volume = source; | ||
514 | |||
515 | _enter(""); | ||
516 | |||
517 | vhash->vtype = volume->type; | ||
518 | |||
519 | } /* end afs_volume_cache_update() */ | ||
520 | #endif | ||