aboutsummaryrefslogtreecommitdiffstats
path: root/fs/afs/rotate.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2018-10-19 19:57:59 -0400
committerDavid Howells <dhowells@redhat.com>2018-10-23 19:41:09 -0400
commit3bf0fb6f33dd545693da5e65f5b1b9b9f0bfc35e (patch)
treedf215e6a6ad11b6ac8158461144667e168591d28 /fs/afs/rotate.c
parent18ac61853cc4e44eb30e125fc8344a3b25c7b6fe (diff)
afs: Probe multiple fileservers simultaneously
Send probes to all the unprobed fileservers in a fileserver list on all addresses simultaneously in an attempt to find out the fastest route whilst not getting stuck for 20s on any server or address that we don't get a reply from. This alleviates the problem whereby attempting to access a new server can take a long time because the rotation algorithm ends up rotating through all servers and addresses until it finds one that responds. Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/afs/rotate.c')
-rw-r--r--fs/afs/rotate.c174
1 files changed, 114 insertions, 60 deletions
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 7c4487781637..00504254c1c2 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -19,14 +19,6 @@
19#include "afs_fs.h" 19#include "afs_fs.h"
20 20
21/* 21/*
22 * Initialise a filesystem server cursor for iterating over FS servers.
23 */
24static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
25{
26 memset(fc, 0, sizeof(*fc));
27}
28
29/*
30 * Begin an operation on the fileserver. 22 * Begin an operation on the fileserver.
31 * 23 *
32 * Fileserver operations are serialised on the server by vnode, so we serialise 24 * Fileserver operations are serialised on the server by vnode, so we serialise
@@ -35,7 +27,7 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode
35bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, 27bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
36 struct key *key) 28 struct key *key)
37{ 29{
38 afs_init_fs_cursor(fc, vnode); 30 memset(fc, 0, sizeof(*fc));
39 fc->vnode = vnode; 31 fc->vnode = vnode;
40 fc->key = key; 32 fc->key = key;
41 fc->ac.error = SHRT_MAX; 33 fc->ac.error = SHRT_MAX;
@@ -66,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
66 fc->server_list = afs_get_serverlist(vnode->volume->servers); 58 fc->server_list = afs_get_serverlist(vnode->volume->servers);
67 read_unlock(&vnode->volume->servers_lock); 59 read_unlock(&vnode->volume->servers_lock);
68 60
61 fc->untried = (1UL << fc->server_list->nr_servers) - 1;
62 fc->index = READ_ONCE(fc->server_list->preferred);
63
69 cbi = vnode->cb_interest; 64 cbi = vnode->cb_interest;
70 if (cbi) { 65 if (cbi) {
71 /* See if the vnode's preferred record is still available */ 66 /* See if the vnode's preferred record is still available */
72 for (i = 0; i < fc->server_list->nr_servers; i++) { 67 for (i = 0; i < fc->server_list->nr_servers; i++) {
73 if (fc->server_list->servers[i].cb_interest == cbi) { 68 if (fc->server_list->servers[i].cb_interest == cbi) {
74 fc->start = i; 69 fc->index = i;
75 goto found_interest; 70 goto found_interest;
76 } 71 }
77 } 72 }
@@ -95,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
95 90
96 afs_put_cb_interest(afs_v2net(vnode), cbi); 91 afs_put_cb_interest(afs_v2net(vnode), cbi);
97 cbi = NULL; 92 cbi = NULL;
98 } else {
99 fc->start = READ_ONCE(fc->server_list->index);
100 } 93 }
101 94
102found_interest: 95found_interest:
103 fc->index = fc->start;
104 return true; 96 return true;
105} 97}
106 98
@@ -144,11 +136,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
144 struct afs_addr_list *alist; 136 struct afs_addr_list *alist;
145 struct afs_server *server; 137 struct afs_server *server;
146 struct afs_vnode *vnode = fc->vnode; 138 struct afs_vnode *vnode = fc->vnode;
147 int error = fc->ac.error; 139 u32 rtt, abort_code;
140 int error = fc->ac.error, i;
148 141
149 _enter("%u/%u,%u/%u,%d,%d", 142 _enter("%lx[%d],%lx[%d],%d,%d",
150 fc->index, fc->start, 143 fc->untried, fc->index,
151 fc->ac.index, fc->ac.start, 144 fc->ac.tried, fc->ac.index,
152 error, fc->ac.abort_code); 145 error, fc->ac.abort_code);
153 146
154 if (fc->flags & AFS_FS_CURSOR_STOP) { 147 if (fc->flags & AFS_FS_CURSOR_STOP) {
@@ -345,8 +338,50 @@ start:
345 if (!afs_start_fs_iteration(fc, vnode)) 338 if (!afs_start_fs_iteration(fc, vnode))
346 goto failed; 339 goto failed;
347 340
348use_server: 341 _debug("__ VOL %llx __", vnode->volume->vid);
349 _debug("use"); 342 error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
343 if (error < 0)
344 goto failed_set_error;
345
346pick_server:
347 _debug("pick [%lx]", fc->untried);
348
349 error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
350 if (error < 0)
351 goto failed_set_error;
352
353 /* Pick the untried server with the lowest RTT. If we have outstanding
354 * callbacks, we stick with the server we're already using if we can.
355 */
356 if (fc->cbi) {
357 _debug("cbi %u", fc->index);
358 if (test_bit(fc->index, &fc->untried))
359 goto selected_server;
360 afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
361 fc->cbi = NULL;
362 _debug("nocbi");
363 }
364
365 fc->index = -1;
366 rtt = U32_MAX;
367 for (i = 0; i < fc->server_list->nr_servers; i++) {
368 struct afs_server *s = fc->server_list->servers[i].server;
369
370 if (!test_bit(i, &fc->untried) || !s->probe.responded)
371 continue;
372 if (s->probe.rtt < rtt) {
373 fc->index = i;
374 rtt = s->probe.rtt;
375 }
376 }
377
378 if (fc->index == -1)
379 goto no_more_servers;
380
381selected_server:
382 _debug("use %d", fc->index);
383 __clear_bit(fc->index, &fc->untried);
384
350 /* We're starting on a different fileserver from the list. We need to 385 /* We're starting on a different fileserver from the list. We need to
351 * check it, create a callback intercept, find its address list and 386 * check it, create a callback intercept, find its address list and
352 * probe its capabilities before we use it. 387 * probe its capabilities before we use it.
@@ -379,60 +414,81 @@ use_server:
379 414
380 memset(&fc->ac, 0, sizeof(fc->ac)); 415 memset(&fc->ac, 0, sizeof(fc->ac));
381 416
382 /* Probe the current fileserver if we haven't done so yet. */
383 if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
384 fc->ac.alist = afs_get_addrlist(alist);
385
386 if (!afs_probe_fileserver(fc)) {
387 switch (fc->ac.error) {
388 case -ENOMEM:
389 case -ERESTARTSYS:
390 case -EINTR:
391 goto failed;
392 default:
393 goto next_server;
394 }
395 }
396 }
397
398 if (!fc->ac.alist) 417 if (!fc->ac.alist)
399 fc->ac.alist = alist; 418 fc->ac.alist = alist;
400 else 419 else
401 afs_put_addrlist(alist); 420 afs_put_addrlist(alist);
402 421
403 fc->ac.start = READ_ONCE(alist->index); 422 fc->ac.index = -1;
404 fc->ac.index = fc->ac.start;
405 423
406iterate_address: 424iterate_address:
407 ASSERT(fc->ac.alist); 425 ASSERT(fc->ac.alist);
408 _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
409 /* Iterate over the current server's address list to try and find an 426 /* Iterate over the current server's address list to try and find an
410 * address on which it will respond to us. 427 * address on which it will respond to us.
411 */ 428 */
412 if (!afs_iterate_addresses(&fc->ac)) 429 if (!afs_iterate_addresses(&fc->ac))
413 goto next_server; 430 goto next_server;
414 431
432 _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
433
415 _leave(" = t"); 434 _leave(" = t");
416 return true; 435 return true;
417 436
418next_server: 437next_server:
419 _debug("next"); 438 _debug("next");
420 afs_end_cursor(&fc->ac); 439 afs_end_cursor(&fc->ac);
421 afs_put_cb_interest(afs_v2net(vnode), fc->cbi); 440 goto pick_server;
422 fc->cbi = NULL;
423 fc->index++;
424 if (fc->index >= fc->server_list->nr_servers)
425 fc->index = 0;
426 if (fc->index != fc->start)
427 goto use_server;
428 441
442no_more_servers:
429 /* That's all the servers poked to no good effect. Try again if some 443 /* That's all the servers poked to no good effect. Try again if some
430 * of them were busy. 444 * of them were busy.
431 */ 445 */
432 if (fc->flags & AFS_FS_CURSOR_VBUSY) 446 if (fc->flags & AFS_FS_CURSOR_VBUSY)
433 goto restart_from_beginning; 447 goto restart_from_beginning;
434 448
435 goto failed; 449 abort_code = 0;
450 error = -EDESTADDRREQ;
451 for (i = 0; i < fc->server_list->nr_servers; i++) {
452 struct afs_server *s = fc->server_list->servers[i].server;
453 int probe_error = READ_ONCE(s->probe.error);
454
455 switch (probe_error) {
456 case 0:
457 continue;
458 default:
459 if (error == -ETIMEDOUT ||
460 error == -ETIME)
461 continue;
462 case -ETIMEDOUT:
463 case -ETIME:
464 if (error == -ENOMEM ||
465 error == -ENONET)
466 continue;
467 case -ENOMEM:
468 case -ENONET:
469 if (error == -ENETUNREACH)
470 continue;
471 case -ENETUNREACH:
472 if (error == -EHOSTUNREACH)
473 continue;
474 case -EHOSTUNREACH:
475 if (error == -ECONNREFUSED)
476 continue;
477 case -ECONNREFUSED:
478 if (error == -ECONNRESET)
479 continue;
480 case -ECONNRESET: /* Responded, but call expired. */
481 if (error == -ECONNABORTED)
482 continue;
483 case -ECONNABORTED:
484 abort_code = s->probe.abort_code;
485 error = probe_error;
486 continue;
487 }
488 }
489
490 if (error == -ECONNABORTED)
491 error = afs_abort_to_error(abort_code);
436 492
437failed_set_error: 493failed_set_error:
438 fc->error = error; 494 fc->error = error;
@@ -480,8 +536,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
480 536
481 memset(&fc->ac, 0, sizeof(fc->ac)); 537 memset(&fc->ac, 0, sizeof(fc->ac));
482 fc->ac.alist = alist; 538 fc->ac.alist = alist;
483 fc->ac.start = READ_ONCE(alist->index); 539 fc->ac.index = -1;
484 fc->ac.index = fc->ac.start;
485 goto iterate_address; 540 goto iterate_address;
486 541
487 case 0: 542 case 0:
@@ -538,13 +593,13 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
538 pr_notice("EDESTADDR occurred\n"); 593 pr_notice("EDESTADDR occurred\n");
539 pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n", 594 pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
540 fc->cb_break, fc->cb_break_2, fc->flags, fc->error); 595 fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
541 pr_notice("FC: st=%u ix=%u ni=%u\n", 596 pr_notice("FC: ut=%lx ix=%d ni=%u\n",
542 fc->start, fc->index, fc->nr_iterations); 597 fc->untried, fc->index, fc->nr_iterations);
543 598
544 if (fc->server_list) { 599 if (fc->server_list) {
545 const struct afs_server_list *sl = fc->server_list; 600 const struct afs_server_list *sl = fc->server_list;
546 pr_notice("FC: SL nr=%u ix=%u vnov=%hx\n", 601 pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
547 sl->nr_servers, sl->index, sl->vnovol_mask); 602 sl->nr_servers, sl->preferred, sl->vnovol_mask);
548 for (i = 0; i < sl->nr_servers; i++) { 603 for (i = 0; i < sl->nr_servers; i++) {
549 const struct afs_server *s = sl->servers[i].server; 604 const struct afs_server *s = sl->servers[i].server;
550 pr_notice("FC: server fl=%lx av=%u %pU\n", 605 pr_notice("FC: server fl=%lx av=%u %pU\n",
@@ -552,22 +607,21 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
552 if (s->addresses) { 607 if (s->addresses) {
553 const struct afs_addr_list *a = 608 const struct afs_addr_list *a =
554 rcu_dereference(s->addresses); 609 rcu_dereference(s->addresses);
555 pr_notice("FC: - av=%u nr=%u/%u/%u ax=%u\n", 610 pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
556 a->version, 611 a->version,
557 a->nr_ipv4, a->nr_addrs, a->max_addrs, 612 a->nr_ipv4, a->nr_addrs, a->max_addrs,
558 a->index); 613 a->preferred);
559 pr_notice("FC: - pr=%lx yf=%lx\n", 614 pr_notice("FC: - pr=%lx R=%lx F=%lx\n",
560 a->probed, a->yfs); 615 a->probed, a->responded, a->failed);
561 if (a == fc->ac.alist) 616 if (a == fc->ac.alist)
562 pr_notice("FC: - current\n"); 617 pr_notice("FC: - current\n");
563 } 618 }
564 } 619 }
565 } 620 }
566 621
567 pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%u\n", 622 pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
568 fc->ac.start, fc->ac.index, fc->ac.abort_code, fc->ac.error, 623 fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
569 fc->ac.begun, fc->ac.responded, fc->ac.nr_iterations); 624 fc->ac.responded, fc->ac.nr_iterations);
570
571 rcu_read_unlock(); 625 rcu_read_unlock();
572} 626}
573 627