summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2018-10-19 19:57:59 -0400
committerDavid Howells <dhowells@redhat.com>2018-10-23 19:41:09 -0400
commit3bf0fb6f33dd545693da5e65f5b1b9b9f0bfc35e (patch)
treedf215e6a6ad11b6ac8158461144667e168591d28
parent18ac61853cc4e44eb30e125fc8344a3b25c7b6fe (diff)
afs: Probe multiple fileservers simultaneously
Send probes to all the unprobed fileservers in a fileserver list on all addresses simultaneously in an attempt to find out the fastest route whilst not getting stuck for 20s on any server or address that we don't get a reply from. This alleviates the problem whereby attempting to access a new server can take a long time because the rotation algorithm ends up rotating through all servers and addresses until it finds one that responds. Signed-off-by: David Howells <dhowells@redhat.com>
-rw-r--r--fs/afs/Makefile4
-rw-r--r--fs/afs/addr_list.c40
-rw-r--r--fs/afs/cmservice.c129
-rw-r--r--fs/afs/fs_probe.c270
-rw-r--r--fs/afs/fsclient.c27
-rw-r--r--fs/afs/internal.h98
-rw-r--r--fs/afs/proc.c6
-rw-r--r--fs/afs/rotate.c174
-rw-r--r--fs/afs/rxrpc.c44
-rw-r--r--fs/afs/server.c109
-rw-r--r--fs/afs/server_list.c6
-rw-r--r--fs/afs/vl_list.c6
-rw-r--r--fs/afs/vl_probe.c273
-rw-r--r--fs/afs/vl_rotate.c159
-rw-r--r--fs/afs/vlclient.c35
-rw-r--r--fs/afs/volume.c16
-rw-r--r--include/trace/events/afs.h4
17 files changed, 1050 insertions, 350 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index cc942b790cff..0738e2bf5193 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -17,6 +17,7 @@ kafs-y := \
17 file.o \ 17 file.o \
18 flock.o \ 18 flock.o \
19 fsclient.o \ 19 fsclient.o \
20 fs_probe.o \
20 inode.o \ 21 inode.o \
21 main.o \ 22 main.o \
22 misc.o \ 23 misc.o \
@@ -29,8 +30,9 @@ kafs-y := \
29 super.o \ 30 super.o \
30 netdevices.o \ 31 netdevices.o \
31 vlclient.o \ 32 vlclient.o \
32 vl_rotate.o \
33 vl_list.o \ 33 vl_list.o \
34 vl_probe.o \
35 vl_rotate.o \
34 volume.o \ 36 volume.o \
35 write.o \ 37 write.o \
36 xattr.o \ 38 xattr.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 1536d1d21c33..967db336d11a 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -303,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
303 sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); 303 sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
304 304
305 srx = &alist->addrs[i]; 305 srx = &alist->addrs[i];
306 srx->srx_family = AF_RXRPC;
307 srx->transport_type = SOCK_DGRAM;
306 srx->transport_len = sizeof(srx->transport.sin); 308 srx->transport_len = sizeof(srx->transport.sin);
307 srx->transport.sin.sin_family = AF_INET; 309 srx->transport.sin.sin_family = AF_INET;
308 srx->transport.sin.sin_port = htons(port); 310 srx->transport.sin.sin_port = htons(port);
@@ -341,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
341 sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); 343 sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
342 344
343 srx = &alist->addrs[i]; 345 srx = &alist->addrs[i];
346 srx->srx_family = AF_RXRPC;
347 srx->transport_type = SOCK_DGRAM;
344 srx->transport_len = sizeof(srx->transport.sin6); 348 srx->transport_len = sizeof(srx->transport.sin6);
345 srx->transport.sin6.sin6_family = AF_INET6; 349 srx->transport.sin6.sin6_family = AF_INET6;
346 srx->transport.sin6.sin6_port = htons(port); 350 srx->transport.sin6.sin6_port = htons(port);
@@ -353,23 +357,32 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
353 */ 357 */
354bool afs_iterate_addresses(struct afs_addr_cursor *ac) 358bool afs_iterate_addresses(struct afs_addr_cursor *ac)
355{ 359{
356 _enter("%hu+%hd", ac->start, (short)ac->index); 360 unsigned long set, failed;
361 int index;
357 362
358 if (!ac->alist) 363 if (!ac->alist)
359 return false; 364 return false;
360 365
366 set = ac->alist->responded;
367 failed = ac->alist->failed;
368 _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
369
361 ac->nr_iterations++; 370 ac->nr_iterations++;
362 371
363 if (ac->begun) { 372 set &= ~(failed | ac->tried);
364 ac->index++;
365 if (ac->index == ac->alist->nr_addrs)
366 ac->index = 0;
367 373
368 if (ac->index == ac->start) 374 if (!set)
369 return false; 375 return false;
370 } 376
377 index = READ_ONCE(ac->alist->preferred);
378 if (test_bit(index, &set))
379 goto selected;
380
381 index = __ffs(set);
371 382
372 ac->begun = true; 383selected:
384 ac->index = index;
385 set_bit(index, &ac->tried);
373 ac->responded = false; 386 ac->responded = false;
374 return true; 387 return true;
375} 388}
@@ -383,12 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
383 396
384 alist = ac->alist; 397 alist = ac->alist;
385 if (alist) { 398 if (alist) {
386 if (ac->responded && ac->index != ac->start) 399 if (ac->responded &&
387 WRITE_ONCE(alist->index, ac->index); 400 ac->index != alist->preferred &&
401 test_bit(ac->alist->preferred, &ac->tried))
402 WRITE_ONCE(alist->preferred, ac->index);
388 afs_put_addrlist(alist); 403 afs_put_addrlist(alist);
404 ac->alist = NULL;
389 } 405 }
390 406
391 ac->alist = NULL;
392 ac->begun = false;
393 return ac->error; 407 return ac->error;
394} 408}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 8cf8d10daa6c..8ee5972893ed 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -122,6 +122,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
122{ 122{
123 _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID); 123 _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
124 124
125 call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
126
125 switch (call->operation_ID) { 127 switch (call->operation_ID) {
126 case CBCallBack: 128 case CBCallBack:
127 call->type = &afs_SRXCBCallBack; 129 call->type = &afs_SRXCBCallBack;
@@ -152,6 +154,91 @@ bool afs_cm_incoming_call(struct afs_call *call)
152} 154}
153 155
154/* 156/*
157 * Record a probe to the cache manager from a server.
158 */
159static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
160{
161 _enter("");
162
163 if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
164 !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
165 if (server->cm_epoch == call->epoch)
166 return 0;
167
168 if (!server->probe.said_rebooted) {
169 pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
170 server->probe.said_rebooted = true;
171 }
172 }
173
174 spin_lock(&server->probe_lock);
175
176 if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
177 server->cm_epoch = call->epoch;
178 server->probe.cm_epoch = call->epoch;
179 goto out;
180 }
181
182 if (server->probe.cm_probed &&
183 call->epoch != server->probe.cm_epoch &&
184 !server->probe.said_inconsistent) {
185 pr_notice("kAFS: FS endpoints inconsistent %pU\n",
186 &server->uuid);
187 server->probe.said_inconsistent = true;
188 }
189
190 if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
191 server->probe.cm_epoch = server->cm_epoch;
192
193out:
194 server->probe.cm_probed = true;
195 spin_unlock(&server->probe_lock);
196 return 0;
197}
198
199/*
200 * Find the server record by peer address and record a probe to the cache
201 * manager from a server.
202 */
203static int afs_find_cm_server_by_peer(struct afs_call *call)
204{
205 struct sockaddr_rxrpc srx;
206 struct afs_server *server;
207
208 rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
209
210 server = afs_find_server(call->net, &srx);
211 if (!server) {
212 trace_afs_cm_no_server(call, &srx);
213 return 0;
214 }
215
216 call->cm_server = server;
217 return afs_record_cm_probe(call, server);
218}
219
220/*
221 * Find the server record by server UUID and record a probe to the cache
222 * manager from a server.
223 */
224static int afs_find_cm_server_by_uuid(struct afs_call *call,
225 struct afs_uuid *uuid)
226{
227 struct afs_server *server;
228
229 rcu_read_lock();
230 server = afs_find_server_by_uuid(call->net, call->request);
231 rcu_read_unlock();
232 if (!server) {
233 trace_afs_cm_no_server_u(call, call->request);
234 return 0;
235 }
236
237 call->cm_server = server;
238 return afs_record_cm_probe(call, server);
239}
240
241/*
155 * Clean up a cache manager call. 242 * Clean up a cache manager call.
156 */ 243 */
157static void afs_cm_destructor(struct afs_call *call) 244static void afs_cm_destructor(struct afs_call *call)
@@ -187,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
187static int afs_deliver_cb_callback(struct afs_call *call) 274static int afs_deliver_cb_callback(struct afs_call *call)
188{ 275{
189 struct afs_callback_break *cb; 276 struct afs_callback_break *cb;
190 struct sockaddr_rxrpc srx;
191 __be32 *bp; 277 __be32 *bp;
192 int ret, loop; 278 int ret, loop;
193 279
@@ -276,12 +362,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
276 362
277 /* we'll need the file server record as that tells us which set of 363 /* we'll need the file server record as that tells us which set of
278 * vnodes to operate upon */ 364 * vnodes to operate upon */
279 rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); 365 return afs_find_cm_server_by_peer(call);
280 call->cm_server = afs_find_server(call->net, &srx);
281 if (!call->cm_server)
282 trace_afs_cm_no_server(call, &srx);
283
284 return afs_queue_call_work(call);
285} 366}
286 367
287/* 368/*
@@ -305,13 +386,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
305 */ 386 */
306static int afs_deliver_cb_init_call_back_state(struct afs_call *call) 387static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
307{ 388{
308 struct sockaddr_rxrpc srx;
309 int ret; 389 int ret;
310 390
311 _enter(""); 391 _enter("");
312 392
313 rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
314
315 afs_extract_discard(call, 0); 393 afs_extract_discard(call, 0);
316 ret = afs_extract_data(call, false); 394 ret = afs_extract_data(call, false);
317 if (ret < 0) 395 if (ret < 0)
@@ -319,11 +397,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
319 397
320 /* we'll need the file server record as that tells us which set of 398 /* we'll need the file server record as that tells us which set of
321 * vnodes to operate upon */ 399 * vnodes to operate upon */
322 call->cm_server = afs_find_server(call->net, &srx); 400 return afs_find_cm_server_by_peer(call);
323 if (!call->cm_server)
324 trace_afs_cm_no_server(call, &srx);
325
326 return afs_queue_call_work(call);
327} 401}
328 402
329/* 403/*
@@ -384,13 +458,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
384 458
385 /* we'll need the file server record as that tells us which set of 459 /* we'll need the file server record as that tells us which set of
386 * vnodes to operate upon */ 460 * vnodes to operate upon */
387 rcu_read_lock(); 461 return afs_find_cm_server_by_uuid(call, call->request);
388 call->cm_server = afs_find_server_by_uuid(call->net, call->request);
389 rcu_read_unlock();
390 if (!call->cm_server)
391 trace_afs_cm_no_server_u(call, call->request);
392
393 return afs_queue_call_work(call);
394} 462}
395 463
396/* 464/*
@@ -422,8 +490,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
422 490
423 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) 491 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
424 return afs_io_error(call, afs_io_error_cm_reply); 492 return afs_io_error(call, afs_io_error_cm_reply);
425 493 return afs_find_cm_server_by_peer(call);
426 return afs_queue_call_work(call);
427} 494}
428 495
429/* 496/*
@@ -503,8 +570,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
503 570
504 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) 571 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
505 return afs_io_error(call, afs_io_error_cm_reply); 572 return afs_io_error(call, afs_io_error_cm_reply);
506 573 return afs_find_cm_server_by_uuid(call, call->request);
507 return afs_queue_call_work(call);
508} 574}
509 575
510/* 576/*
@@ -586,8 +652,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
586 652
587 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) 653 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
588 return afs_io_error(call, afs_io_error_cm_reply); 654 return afs_io_error(call, afs_io_error_cm_reply);
589 655 return afs_find_cm_server_by_peer(call);
590 return afs_queue_call_work(call);
591} 656}
592 657
593/* 658/*
@@ -596,7 +661,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
596static int afs_deliver_yfs_cb_callback(struct afs_call *call) 661static int afs_deliver_yfs_cb_callback(struct afs_call *call)
597{ 662{
598 struct afs_callback_break *cb; 663 struct afs_callback_break *cb;
599 struct sockaddr_rxrpc srx;
600 struct yfs_xdr_YFSFid *bp; 664 struct yfs_xdr_YFSFid *bp;
601 size_t size; 665 size_t size;
602 int ret, loop; 666 int ret, loop;
@@ -664,10 +728,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
664 /* We'll need the file server record as that tells us which set of 728 /* We'll need the file server record as that tells us which set of
665 * vnodes to operate upon. 729 * vnodes to operate upon.
666 */ 730 */
667 rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); 731 return afs_find_cm_server_by_peer(call);
668 call->cm_server = afs_find_server(call->net, &srx);
669 if (!call->cm_server)
670 trace_afs_cm_no_server(call, &srx);
671
672 return afs_queue_call_work(call);
673} 732}
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644
index 000000000000..d049cb459742
--- /dev/null
+++ b/fs/afs/fs_probe.c
@@ -0,0 +1,270 @@
1/* AFS fileserver probing
2 *
3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include "afs_fs.h"
15#include "internal.h"
16#include "protocol_yfs.h"
17
18static bool afs_fs_probe_done(struct afs_server *server)
19{
20 if (!atomic_dec_and_test(&server->probe_outstanding))
21 return false;
22
23 wake_up_var(&server->probe_outstanding);
24 clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
25 wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
26 return true;
27}
28
29/*
30 * Process the result of probing a fileserver. This is called after successful
31 * or failed delivery of an FS.GetCapabilities operation.
32 */
33void afs_fileserver_probe_result(struct afs_call *call)
34{
35 struct afs_addr_list *alist = call->alist;
36 struct afs_server *server = call->reply[0];
37 unsigned int server_index = (long)call->reply[1];
38 unsigned int index = call->addr_ix;
39 unsigned int rtt = UINT_MAX;
40 bool have_result = false;
41 u64 _rtt;
42 int ret = call->error;
43
44 _enter("%pU,%u", &server->uuid, index);
45
46 spin_lock(&server->probe_lock);
47
48 switch (ret) {
49 case 0:
50 server->probe.error = 0;
51 goto responded;
52 case -ECONNABORTED:
53 if (!server->probe.responded) {
54 server->probe.abort_code = call->abort_code;
55 server->probe.error = ret;
56 }
57 goto responded;
58 case -ENOMEM:
59 case -ENONET:
60 server->probe.local_failure = true;
61 afs_io_error(call, afs_io_error_fs_probe_fail);
62 goto out;
63 case -ECONNRESET: /* Responded, but call expired. */
64 case -ENETUNREACH:
65 case -EHOSTUNREACH:
66 case -ECONNREFUSED:
67 case -ETIMEDOUT:
68 case -ETIME:
69 default:
70 clear_bit(index, &alist->responded);
71 set_bit(index, &alist->failed);
72 if (!server->probe.responded &&
73 (server->probe.error == 0 ||
74 server->probe.error == -ETIMEDOUT ||
75 server->probe.error == -ETIME))
76 server->probe.error = ret;
77 afs_io_error(call, afs_io_error_fs_probe_fail);
78 goto out;
79 }
80
81responded:
82 set_bit(index, &alist->responded);
83 clear_bit(index, &alist->failed);
84
85 if (call->service_id == YFS_FS_SERVICE) {
86 server->probe.is_yfs = true;
87 set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
88 alist->addrs[index].srx_service = call->service_id;
89 } else {
90 server->probe.not_yfs = true;
91 if (!server->probe.is_yfs) {
92 clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
93 alist->addrs[index].srx_service = call->service_id;
94 }
95 }
96
97 /* Get the RTT and scale it to fit into a 32-bit value that represents
98 * over a minute of time so that we can access it with one instruction
99 * on a 32-bit system.
100 */
101 _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
102 _rtt /= 64;
103 rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
104 if (rtt < server->probe.rtt) {
105 server->probe.rtt = rtt;
106 alist->preferred = index;
107 have_result = true;
108 }
109
110 smp_wmb(); /* Set rtt before responded. */
111 server->probe.responded = true;
112 set_bit(AFS_SERVER_FL_PROBED, &server->flags);
113out:
114 spin_unlock(&server->probe_lock);
115
116 _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
117 server_index, index, &alist->addrs[index].transport,
118 (unsigned int)rtt, ret);
119
120 have_result |= afs_fs_probe_done(server);
121 if (have_result) {
122 server->probe.have_result = true;
123 wake_up_var(&server->probe.have_result);
124 wake_up_all(&server->probe_wq);
125 }
126}
127
128/*
129 * Probe all of a fileserver's addresses to find out the best route and to
130 * query its capabilities.
131 */
132static int afs_do_probe_fileserver(struct afs_net *net,
133 struct afs_server *server,
134 struct key *key,
135 unsigned int server_index)
136{
137 struct afs_addr_cursor ac = {
138 .index = 0,
139 };
140 int ret;
141
142 _enter("%pU", &server->uuid);
143
144 read_lock(&server->fs_lock);
145 ac.alist = rcu_dereference_protected(server->addresses,
146 lockdep_is_held(&server->fs_lock));
147 read_unlock(&server->fs_lock);
148
149 atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
150 memset(&server->probe, 0, sizeof(server->probe));
151 server->probe.rtt = UINT_MAX;
152
153 for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
154 ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
155 true);
156 if (ret != -EINPROGRESS) {
157 afs_fs_probe_done(server);
158 return ret;
159 }
160 }
161
162 return 0;
163}
164
165/*
166 * Send off probes to all unprobed servers.
167 */
168int afs_probe_fileservers(struct afs_net *net, struct key *key,
169 struct afs_server_list *list)
170{
171 struct afs_server *server;
172 int i, ret;
173
174 for (i = 0; i < list->nr_servers; i++) {
175 server = list->servers[i].server;
176 if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
177 continue;
178
179 if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
180 ret = afs_do_probe_fileserver(net, server, key, i);
181 if (ret)
182 return ret;
183 }
184 }
185
186 return 0;
187}
188
189/*
190 * Wait for the first as-yet untried fileserver to respond.
191 */
192int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
193{
194 struct wait_queue_entry *waits;
195 struct afs_server *server;
196 unsigned int rtt = UINT_MAX;
197 bool have_responders = false;
198 int pref = -1, i;
199
200 _enter("%u,%lx", slist->nr_servers, untried);
201
202 /* Only wait for servers that have a probe outstanding. */
203 for (i = 0; i < slist->nr_servers; i++) {
204 if (test_bit(i, &untried)) {
205 server = slist->servers[i].server;
206 if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
207 __clear_bit(i, &untried);
208 if (server->probe.responded)
209 have_responders = true;
210 }
211 }
212 if (have_responders || !untried)
213 return 0;
214
215 waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
216 if (!waits)
217 return -ENOMEM;
218
219 for (i = 0; i < slist->nr_servers; i++) {
220 if (test_bit(i, &untried)) {
221 server = slist->servers[i].server;
222 init_waitqueue_entry(&waits[i], current);
223 add_wait_queue(&server->probe_wq, &waits[i]);
224 }
225 }
226
227 for (;;) {
228 bool still_probing = false;
229
230 set_current_state(TASK_INTERRUPTIBLE);
231 for (i = 0; i < slist->nr_servers; i++) {
232 if (test_bit(i, &untried)) {
233 server = slist->servers[i].server;
234 if (server->probe.responded)
235 goto stop;
236 if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
237 still_probing = true;
238 }
239 }
240
241 if (!still_probing || unlikely(signal_pending(current)))
242 goto stop;
243 schedule();
244 }
245
246stop:
247 set_current_state(TASK_RUNNING);
248
249 for (i = 0; i < slist->nr_servers; i++) {
250 if (test_bit(i, &untried)) {
251 server = slist->servers[i].server;
252 if (server->probe.responded &&
253 server->probe.rtt < rtt) {
254 pref = i;
255 rtt = server->probe.rtt;
256 }
257
258 remove_wait_queue(&server->probe_wq, &waits[i]);
259 }
260 }
261
262 kfree(waits);
263
264 if (pref == -1 && signal_pending(current))
265 return -ERESTARTSYS;
266
267 if (pref >= 0)
268 slist->preferred = pref;
269 return 0;
270}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 7c75a1813321..ca08c83168f5 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -2006,7 +2006,6 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
2006 */ 2006 */
2007static int afs_deliver_fs_get_capabilities(struct afs_call *call) 2007static int afs_deliver_fs_get_capabilities(struct afs_call *call)
2008{ 2008{
2009 struct afs_server *server = call->reply[0];
2010 u32 count; 2009 u32 count;
2011 int ret; 2010 int ret;
2012 2011
@@ -2042,15 +2041,18 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
2042 break; 2041 break;
2043 } 2042 }
2044 2043
2045 if (call->service_id == YFS_FS_SERVICE)
2046 set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
2047 else
2048 clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
2049
2050 _leave(" = 0 [done]"); 2044 _leave(" = 0 [done]");
2051 return 0; 2045 return 0;
2052} 2046}
2053 2047
2048static void afs_destroy_fs_get_capabilities(struct afs_call *call)
2049{
2050 struct afs_server *server = call->reply[0];
2051
2052 afs_put_server(call->net, server);
2053 afs_flat_call_destructor(call);
2054}
2055
2054/* 2056/*
2055 * FS.GetCapabilities operation type 2057 * FS.GetCapabilities operation type
2056 */ 2058 */
@@ -2058,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
2058 .name = "FS.GetCapabilities", 2060 .name = "FS.GetCapabilities",
2059 .op = afs_FS_GetCapabilities, 2061 .op = afs_FS_GetCapabilities,
2060 .deliver = afs_deliver_fs_get_capabilities, 2062 .deliver = afs_deliver_fs_get_capabilities,
2061 .destructor = afs_flat_call_destructor, 2063 .done = afs_fileserver_probe_result,
2064 .destructor = afs_destroy_fs_get_capabilities,
2062}; 2065};
2063 2066
2064/* 2067/*
@@ -2068,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
2068int afs_fs_get_capabilities(struct afs_net *net, 2071int afs_fs_get_capabilities(struct afs_net *net,
2069 struct afs_server *server, 2072 struct afs_server *server,
2070 struct afs_addr_cursor *ac, 2073 struct afs_addr_cursor *ac,
2071 struct key *key) 2074 struct key *key,
2075 unsigned int server_index,
2076 bool async)
2072{ 2077{
2073 struct afs_call *call; 2078 struct afs_call *call;
2074 __be32 *bp; 2079 __be32 *bp;
@@ -2080,8 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net,
2080 return -ENOMEM; 2085 return -ENOMEM;
2081 2086
2082 call->key = key; 2087 call->key = key;
2083 call->reply[0] = server; 2088 call->reply[0] = afs_get_server(server);
2089 call->reply[1] = (void *)(long)server_index;
2084 call->upgrade = true; 2090 call->upgrade = true;
2091 call->want_reply_time = true;
2085 2092
2086 /* marshall the parameters */ 2093 /* marshall the parameters */
2087 bp = call->request; 2094 bp = call->request;
@@ -2089,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net,
2089 2096
2090 /* Can't take a ref on server */ 2097 /* Can't take a ref on server */
2091 trace_afs_make_fs_call(call, NULL); 2098 trace_afs_make_fs_call(call, NULL);
2092 return afs_make_call(ac, call, GFP_NOFS, false); 2099 return afs_make_call(ac, call, GFP_NOFS, async);
2093} 2100}
2094 2101
2095/* 2102/*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b60d15212975..5da3b09b7518 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -76,12 +76,13 @@ struct afs_addr_list {
76 u32 version; /* Version */ 76 u32 version; /* Version */
77 unsigned char max_addrs; 77 unsigned char max_addrs;
78 unsigned char nr_addrs; 78 unsigned char nr_addrs;
79 unsigned char index; /* Address currently in use */ 79 unsigned char preferred; /* Preferred address */
80 unsigned char nr_ipv4; /* Number of IPv4 addresses */ 80 unsigned char nr_ipv4; /* Number of IPv4 addresses */
81 enum dns_record_source source:8; 81 enum dns_record_source source:8;
82 enum dns_lookup_status status:8; 82 enum dns_lookup_status status:8;
83 unsigned long probed; /* Mask of servers that have been probed */ 83 unsigned long probed; /* Mask of servers that have been probed */
84 unsigned long yfs; /* Mask of servers that are YFS */ 84 unsigned long failed; /* Mask of addrs that failed locally/ICMP */
85 unsigned long responded; /* Mask of addrs that responded */
85 struct sockaddr_rxrpc addrs[]; 86 struct sockaddr_rxrpc addrs[];
86#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) 87#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
87}; 88};
@@ -91,6 +92,7 @@ struct afs_addr_list {
91 */ 92 */
92struct afs_call { 93struct afs_call {
93 const struct afs_call_type *type; /* type of call */ 94 const struct afs_call_type *type; /* type of call */
95 struct afs_addr_list *alist; /* Address is alist[addr_ix] */
94 wait_queue_head_t waitq; /* processes awaiting completion */ 96 wait_queue_head_t waitq; /* processes awaiting completion */
95 struct work_struct async_work; /* async I/O processor */ 97 struct work_struct async_work; /* async I/O processor */
96 struct work_struct work; /* actual work processor */ 98 struct work_struct work; /* actual work processor */
@@ -116,6 +118,7 @@ struct afs_call {
116 spinlock_t state_lock; 118 spinlock_t state_lock;
117 int error; /* error code */ 119 int error; /* error code */
118 u32 abort_code; /* Remote abort ID or 0 */ 120 u32 abort_code; /* Remote abort ID or 0 */
121 u32 epoch;
119 unsigned request_size; /* size of request data */ 122 unsigned request_size; /* size of request data */
120 unsigned reply_max; /* maximum size of reply */ 123 unsigned reply_max; /* maximum size of reply */
121 unsigned first_offset; /* offset into mapping[first] */ 124 unsigned first_offset; /* offset into mapping[first] */
@@ -125,13 +128,14 @@ struct afs_call {
125 unsigned count2; /* count used in unmarshalling */ 128 unsigned count2; /* count used in unmarshalling */
126 }; 129 };
127 unsigned char unmarshall; /* unmarshalling phase */ 130 unsigned char unmarshall; /* unmarshalling phase */
131 unsigned char addr_ix; /* Address in ->alist */
128 bool incoming; /* T if incoming call */ 132 bool incoming; /* T if incoming call */
129 bool send_pages; /* T if data from mapping should be sent */ 133 bool send_pages; /* T if data from mapping should be sent */
130 bool need_attention; /* T if RxRPC poked us */ 134 bool need_attention; /* T if RxRPC poked us */
131 bool async; /* T if asynchronous */ 135 bool async; /* T if asynchronous */
132 bool ret_reply0; /* T if should return reply[0] on success */ 136 bool ret_reply0; /* T if should return reply[0] on success */
133 bool upgrade; /* T to request service upgrade */ 137 bool upgrade; /* T to request service upgrade */
134 bool want_reply_time; /* T if want reply_time */ 138 bool want_reply_time; /* T if want reply_time */
135 u16 service_id; /* Actual service ID (after upgrade) */ 139 u16 service_id; /* Actual service ID (after upgrade) */
136 unsigned int debug_id; /* Trace ID */ 140 unsigned int debug_id; /* Trace ID */
137 u32 operation_ID; /* operation ID for an incoming call */ 141 u32 operation_ID; /* operation ID for an incoming call */
@@ -162,6 +166,9 @@ struct afs_call_type {
162 166
163 /* Work function */ 167 /* Work function */
164 void (*work)(struct work_struct *work); 168 void (*work)(struct work_struct *work);
169
170 /* Call done function (gets called immediately on success or failure) */
171 void (*done)(struct afs_call *call);
165}; 172};
166 173
167/* 174/*
@@ -376,10 +383,27 @@ struct afs_vlserver {
376 unsigned long flags; 383 unsigned long flags;
377#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ 384#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */
378#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ 385#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */
386#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */
379 rwlock_t lock; /* Lock on addresses */ 387 rwlock_t lock; /* Lock on addresses */
380 atomic_t usage; 388 atomic_t usage;
381 u16 name_len; /* Length of name */ 389
390 /* Probe state */
391 wait_queue_head_t probe_wq;
392 atomic_t probe_outstanding;
393 spinlock_t probe_lock;
394 struct {
395 unsigned int rtt; /* RTT as ktime/64 */
396 u32 abort_code;
397 short error;
398 bool have_result;
399 bool responded:1;
400 bool is_yfs:1;
401 bool not_yfs:1;
402 bool local_failure:1;
403 } probe;
404
382 u16 port; 405 u16 port;
406 u16 name_len; /* Length of name */
383 char name[]; /* Server name, case-flattened */ 407 char name[]; /* Server name, case-flattened */
384}; 408};
385 409
@@ -399,6 +423,7 @@ struct afs_vlserver_list {
399 atomic_t usage; 423 atomic_t usage;
400 u8 nr_servers; 424 u8 nr_servers;
401 u8 index; /* Server currently in use */ 425 u8 index; /* Server currently in use */
426 u8 preferred; /* Preferred server */
402 enum dns_record_source source:8; 427 enum dns_record_source source:8;
403 enum dns_lookup_status status:8; 428 enum dns_lookup_status status:8;
404 rwlock_t lock; 429 rwlock_t lock;
@@ -461,8 +486,10 @@ struct afs_server {
461#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ 486#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
462#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ 487#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */
463#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ 488#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */
489#define AFS_SERVER_FL_HAVE_EPOCH 11 /* ->epoch is valid */
464 atomic_t usage; 490 atomic_t usage;
465 u32 addr_version; /* Address list version */ 491 u32 addr_version; /* Address list version */
492 u32 cm_epoch; /* Server RxRPC epoch */
466 493
467 /* file service access */ 494 /* file service access */
468 rwlock_t fs_lock; /* access lock */ 495 rwlock_t fs_lock; /* access lock */
@@ -471,6 +498,26 @@ struct afs_server {
471 struct hlist_head cb_volumes; /* List of volume interests on this server */ 498 struct hlist_head cb_volumes; /* List of volume interests on this server */
472 unsigned cb_s_break; /* Break-everything counter. */ 499 unsigned cb_s_break; /* Break-everything counter. */
473 rwlock_t cb_break_lock; /* Volume finding lock */ 500 rwlock_t cb_break_lock; /* Volume finding lock */
501
502 /* Probe state */
503 wait_queue_head_t probe_wq;
504 atomic_t probe_outstanding;
505 spinlock_t probe_lock;
506 struct {
507 unsigned int rtt; /* RTT as ktime/64 */
508 u32 abort_code;
509 u32 cm_epoch;
510 short error;
511 bool have_result;
512 bool responded:1;
513 bool is_yfs:1;
514 bool not_yfs:1;
515 bool local_failure:1;
516 bool no_epoch:1;
517 bool cm_probed:1;
518 bool said_rebooted:1;
519 bool said_inconsistent:1;
520 } probe;
474}; 521};
475 522
476/* 523/*
@@ -505,8 +552,8 @@ struct afs_server_entry {
505 552
506struct afs_server_list { 553struct afs_server_list {
507 refcount_t usage; 554 refcount_t usage;
508 unsigned short nr_servers; 555 unsigned char nr_servers;
509 unsigned short index; /* Server currently in use */ 556 unsigned char preferred; /* Preferred server */
510 unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ 557 unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
511 unsigned int seq; /* Set to ->servers_seq when installed */ 558 unsigned int seq; /* Set to ->servers_seq when installed */
512 rwlock_t lock; 559 rwlock_t lock;
@@ -653,13 +700,12 @@ struct afs_interface {
653 */ 700 */
654struct afs_addr_cursor { 701struct afs_addr_cursor {
655 struct afs_addr_list *alist; /* Current address list (pins ref) */ 702 struct afs_addr_list *alist; /* Current address list (pins ref) */
656 u32 abort_code; 703 unsigned long tried; /* Tried addresses */
657 unsigned short start; /* Starting point in alist->addrs[] */ 704 signed char index; /* Current address */
658 unsigned short index; /* Wrapping offset from start to current addr */
659 short error;
660 bool begun; /* T if we've begun iteration */
661 bool responded; /* T if the current address responded */ 705 bool responded; /* T if the current address responded */
662 unsigned short nr_iterations; /* Number of address iterations */ 706 unsigned short nr_iterations; /* Number of address iterations */
707 short error;
708 u32 abort_code;
663}; 709};
664 710
665/* 711/*
@@ -669,9 +715,10 @@ struct afs_vl_cursor {
669 struct afs_addr_cursor ac; 715 struct afs_addr_cursor ac;
670 struct afs_cell *cell; /* The cell we're querying */ 716 struct afs_cell *cell; /* The cell we're querying */
671 struct afs_vlserver_list *server_list; /* Current server list (pins ref) */ 717 struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
718 struct afs_vlserver *server; /* Server on which this resides */
672 struct key *key; /* Key for the server */ 719 struct key *key; /* Key for the server */
673 unsigned char start; /* Initial index in server list */ 720 unsigned long untried; /* Bitmask of untried servers */
674 unsigned char index; /* Number of servers tried beyond start */ 721 short index; /* Current server */
675 short error; 722 short error;
676 unsigned short flags; 723 unsigned short flags;
677#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ 724#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
@@ -689,10 +736,10 @@ struct afs_fs_cursor {
689 struct afs_server_list *server_list; /* Current server list (pins ref) */ 736 struct afs_server_list *server_list; /* Current server list (pins ref) */
690 struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */ 737 struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */
691 struct key *key; /* Key for the server */ 738 struct key *key; /* Key for the server */
739 unsigned long untried; /* Bitmask of untried servers */
692 unsigned int cb_break; /* cb_break + cb_s_break before the call */ 740 unsigned int cb_break; /* cb_break + cb_s_break before the call */
693 unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ 741 unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */
694 unsigned char start; /* Initial index in server list */ 742 short index; /* Current server */
695 unsigned char index; /* Number of servers tried beyond start */
696 short error; 743 short error;
697 unsigned short flags; 744 unsigned short flags;
698#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ 745#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */
@@ -888,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *);
888extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, 935extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
889 struct afs_addr_cursor *, struct key *); 936 struct afs_addr_cursor *, struct key *);
890extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, 937extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
891 struct afs_addr_cursor *, struct key *); 938 struct afs_addr_cursor *, struct key *, unsigned int, bool);
892extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, 939extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
893 struct afs_fid *, struct afs_file_status *, 940 struct afs_fid *, struct afs_file_status *,
894 struct afs_callback *, unsigned int, 941 struct afs_callback *, unsigned int,
@@ -898,6 +945,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
898 struct afs_callback *, struct afs_volsync *); 945 struct afs_callback *, struct afs_volsync *);
899 946
900/* 947/*
948 * fs_probe.c
949 */
950extern void afs_fileserver_probe_result(struct afs_call *);
951extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *);
952extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
953
954/*
901 * inode.c 955 * inode.c
902 */ 956 */
903extern int afs_fetch_status(struct afs_vnode *, struct key *, bool); 957extern int afs_fetch_status(struct afs_vnode *, struct key *, bool);
@@ -1013,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *);
1013extern void __net_exit afs_close_socket(struct afs_net *); 1067extern void __net_exit afs_close_socket(struct afs_net *);
1014extern void afs_charge_preallocation(struct work_struct *); 1068extern void afs_charge_preallocation(struct work_struct *);
1015extern void afs_put_call(struct afs_call *); 1069extern void afs_put_call(struct afs_call *);
1016extern int afs_queue_call_work(struct afs_call *);
1017extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool); 1070extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
1018extern struct afs_call *afs_alloc_flat_call(struct afs_net *, 1071extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
1019 const struct afs_call_type *, 1072 const struct afs_call_type *,
@@ -1130,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *);
1130extern void afs_manage_servers(struct work_struct *); 1183extern void afs_manage_servers(struct work_struct *);
1131extern void afs_servers_timer(struct timer_list *); 1184extern void afs_servers_timer(struct timer_list *);
1132extern void __net_exit afs_purge_servers(struct afs_net *); 1185extern void __net_exit afs_purge_servers(struct afs_net *);
1133extern bool afs_probe_fileserver(struct afs_fs_cursor *);
1134extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *); 1186extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
1135 1187
1136/* 1188/*
@@ -1160,10 +1212,18 @@ extern void afs_fs_exit(void);
1160extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *, 1212extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
1161 const char *, int); 1213 const char *, int);
1162extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *); 1214extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
1163extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); 1215extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
1216 struct afs_vlserver *, unsigned int, bool);
1164extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); 1217extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
1165 1218
1166/* 1219/*
1220 * vl_probe.c
1221 */
1222extern void afs_vlserver_probe_result(struct afs_call *);
1223extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
1224extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
1225
1226/*
1167 * vl_rotate.c 1227 * vl_rotate.c
1168 */ 1228 */
1169extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *, 1229extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index d887f822f4eb..be2ee3bbd0a9 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -312,7 +312,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
312 if (alist) { 312 if (alist) {
313 for (i = 0; i < alist->nr_addrs; i++) 313 for (i = 0; i < alist->nr_addrs; i++)
314 seq_printf(m, " %c %pISpc\n", 314 seq_printf(m, " %c %pISpc\n",
315 alist->index == i ? '>' : '-', 315 alist->preferred == i ? '>' : '-',
316 &alist->addrs[i].transport); 316 &alist->addrs[i].transport);
317 } 317 }
318 return 0; 318 return 0;
@@ -391,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
391 &server->uuid, 391 &server->uuid,
392 atomic_read(&server->usage), 392 atomic_read(&server->usage),
393 &alist->addrs[0].transport, 393 &alist->addrs[0].transport,
394 alist->index == 0 ? "*" : ""); 394 alist->preferred == 0 ? "*" : "");
395 for (i = 1; i < alist->nr_addrs; i++) 395 for (i = 1; i < alist->nr_addrs; i++)
396 seq_printf(m, " %pISpc%s\n", 396 seq_printf(m, " %pISpc%s\n",
397 &alist->addrs[i].transport, 397 &alist->addrs[i].transport,
398 alist->index == i ? "*" : ""); 398 alist->preferred == i ? "*" : "");
399 return 0; 399 return 0;
400} 400}
401 401
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 7c4487781637..00504254c1c2 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -19,14 +19,6 @@
19#include "afs_fs.h" 19#include "afs_fs.h"
20 20
21/* 21/*
22 * Initialise a filesystem server cursor for iterating over FS servers.
23 */
24static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
25{
26 memset(fc, 0, sizeof(*fc));
27}
28
29/*
30 * Begin an operation on the fileserver. 22 * Begin an operation on the fileserver.
31 * 23 *
32 * Fileserver operations are serialised on the server by vnode, so we serialise 24 * Fileserver operations are serialised on the server by vnode, so we serialise
@@ -35,7 +27,7 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode
35bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, 27bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
36 struct key *key) 28 struct key *key)
37{ 29{
38 afs_init_fs_cursor(fc, vnode); 30 memset(fc, 0, sizeof(*fc));
39 fc->vnode = vnode; 31 fc->vnode = vnode;
40 fc->key = key; 32 fc->key = key;
41 fc->ac.error = SHRT_MAX; 33 fc->ac.error = SHRT_MAX;
@@ -66,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
66 fc->server_list = afs_get_serverlist(vnode->volume->servers); 58 fc->server_list = afs_get_serverlist(vnode->volume->servers);
67 read_unlock(&vnode->volume->servers_lock); 59 read_unlock(&vnode->volume->servers_lock);
68 60
61 fc->untried = (1UL << fc->server_list->nr_servers) - 1;
62 fc->index = READ_ONCE(fc->server_list->preferred);
63
69 cbi = vnode->cb_interest; 64 cbi = vnode->cb_interest;
70 if (cbi) { 65 if (cbi) {
71 /* See if the vnode's preferred record is still available */ 66 /* See if the vnode's preferred record is still available */
72 for (i = 0; i < fc->server_list->nr_servers; i++) { 67 for (i = 0; i < fc->server_list->nr_servers; i++) {
73 if (fc->server_list->servers[i].cb_interest == cbi) { 68 if (fc->server_list->servers[i].cb_interest == cbi) {
74 fc->start = i; 69 fc->index = i;
75 goto found_interest; 70 goto found_interest;
76 } 71 }
77 } 72 }
@@ -95,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
95 90
96 afs_put_cb_interest(afs_v2net(vnode), cbi); 91 afs_put_cb_interest(afs_v2net(vnode), cbi);
97 cbi = NULL; 92 cbi = NULL;
98 } else {
99 fc->start = READ_ONCE(fc->server_list->index);
100 } 93 }
101 94
102found_interest: 95found_interest:
103 fc->index = fc->start;
104 return true; 96 return true;
105} 97}
106 98
@@ -144,11 +136,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
144 struct afs_addr_list *alist; 136 struct afs_addr_list *alist;
145 struct afs_server *server; 137 struct afs_server *server;
146 struct afs_vnode *vnode = fc->vnode; 138 struct afs_vnode *vnode = fc->vnode;
147 int error = fc->ac.error; 139 u32 rtt, abort_code;
140 int error = fc->ac.error, i;
148 141
149 _enter("%u/%u,%u/%u,%d,%d", 142 _enter("%lx[%d],%lx[%d],%d,%d",
150 fc->index, fc->start, 143 fc->untried, fc->index,
151 fc->ac.index, fc->ac.start, 144 fc->ac.tried, fc->ac.index,
152 error, fc->ac.abort_code); 145 error, fc->ac.abort_code);
153 146
154 if (fc->flags & AFS_FS_CURSOR_STOP) { 147 if (fc->flags & AFS_FS_CURSOR_STOP) {
@@ -345,8 +338,50 @@ start:
345 if (!afs_start_fs_iteration(fc, vnode)) 338 if (!afs_start_fs_iteration(fc, vnode))
346 goto failed; 339 goto failed;
347 340
348use_server: 341 _debug("__ VOL %llx __", vnode->volume->vid);
349 _debug("use"); 342 error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
343 if (error < 0)
344 goto failed_set_error;
345
346pick_server:
347 _debug("pick [%lx]", fc->untried);
348
349 error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
350 if (error < 0)
351 goto failed_set_error;
352
353 /* Pick the untried server with the lowest RTT. If we have outstanding
354 * callbacks, we stick with the server we're already using if we can.
355 */
356 if (fc->cbi) {
357 _debug("cbi %u", fc->index);
358 if (test_bit(fc->index, &fc->untried))
359 goto selected_server;
360 afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
361 fc->cbi = NULL;
362 _debug("nocbi");
363 }
364
365 fc->index = -1;
366 rtt = U32_MAX;
367 for (i = 0; i < fc->server_list->nr_servers; i++) {
368 struct afs_server *s = fc->server_list->servers[i].server;
369
370 if (!test_bit(i, &fc->untried) || !s->probe.responded)
371 continue;
372 if (s->probe.rtt < rtt) {
373 fc->index = i;
374 rtt = s->probe.rtt;
375 }
376 }
377
378 if (fc->index == -1)
379 goto no_more_servers;
380
381selected_server:
382 _debug("use %d", fc->index);
383 __clear_bit(fc->index, &fc->untried);
384
350 /* We're starting on a different fileserver from the list. We need to 385 /* We're starting on a different fileserver from the list. We need to
351 * check it, create a callback intercept, find its address list and 386 * check it, create a callback intercept, find its address list and
352 * probe its capabilities before we use it. 387 * probe its capabilities before we use it.
@@ -379,60 +414,81 @@ use_server:
379 414
380 memset(&fc->ac, 0, sizeof(fc->ac)); 415 memset(&fc->ac, 0, sizeof(fc->ac));
381 416
382 /* Probe the current fileserver if we haven't done so yet. */
383 if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
384 fc->ac.alist = afs_get_addrlist(alist);
385
386 if (!afs_probe_fileserver(fc)) {
387 switch (fc->ac.error) {
388 case -ENOMEM:
389 case -ERESTARTSYS:
390 case -EINTR:
391 goto failed;
392 default:
393 goto next_server;
394 }
395 }
396 }
397
398 if (!fc->ac.alist) 417 if (!fc->ac.alist)
399 fc->ac.alist = alist; 418 fc->ac.alist = alist;
400 else 419 else
401 afs_put_addrlist(alist); 420 afs_put_addrlist(alist);
402 421
403 fc->ac.start = READ_ONCE(alist->index); 422 fc->ac.index = -1;
404 fc->ac.index = fc->ac.start;
405 423
406iterate_address: 424iterate_address:
407 ASSERT(fc->ac.alist); 425 ASSERT(fc->ac.alist);
408 _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
409 /* Iterate over the current server's address list to try and find an 426 /* Iterate over the current server's address list to try and find an
410 * address on which it will respond to us. 427 * address on which it will respond to us.
411 */ 428 */
412 if (!afs_iterate_addresses(&fc->ac)) 429 if (!afs_iterate_addresses(&fc->ac))
413 goto next_server; 430 goto next_server;
414 431
432 _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
433
415 _leave(" = t"); 434 _leave(" = t");
416 return true; 435 return true;
417 436
418next_server: 437next_server:
419 _debug("next"); 438 _debug("next");
420 afs_end_cursor(&fc->ac); 439 afs_end_cursor(&fc->ac);
421 afs_put_cb_interest(afs_v2net(vnode), fc->cbi); 440 goto pick_server;
422 fc->cbi = NULL;
423 fc->index++;
424 if (fc->index >= fc->server_list->nr_servers)
425 fc->index = 0;
426 if (fc->index != fc->start)
427 goto use_server;
428 441
442no_more_servers:
429 /* That's all the servers poked to no good effect. Try again if some 443 /* That's all the servers poked to no good effect. Try again if some
430 * of them were busy. 444 * of them were busy.
431 */ 445 */
432 if (fc->flags & AFS_FS_CURSOR_VBUSY) 446 if (fc->flags & AFS_FS_CURSOR_VBUSY)
433 goto restart_from_beginning; 447 goto restart_from_beginning;
434 448
435 goto failed; 449 abort_code = 0;
450 error = -EDESTADDRREQ;
451 for (i = 0; i < fc->server_list->nr_servers; i++) {
452 struct afs_server *s = fc->server_list->servers[i].server;
453 int probe_error = READ_ONCE(s->probe.error);
454
455 switch (probe_error) {
456 case 0:
457 continue;
458 default:
459 if (error == -ETIMEDOUT ||
460 error == -ETIME)
461 continue;
462 case -ETIMEDOUT:
463 case -ETIME:
464 if (error == -ENOMEM ||
465 error == -ENONET)
466 continue;
467 case -ENOMEM:
468 case -ENONET:
469 if (error == -ENETUNREACH)
470 continue;
471 case -ENETUNREACH:
472 if (error == -EHOSTUNREACH)
473 continue;
474 case -EHOSTUNREACH:
475 if (error == -ECONNREFUSED)
476 continue;
477 case -ECONNREFUSED:
478 if (error == -ECONNRESET)
479 continue;
480 case -ECONNRESET: /* Responded, but call expired. */
481 if (error == -ECONNABORTED)
482 continue;
483 case -ECONNABORTED:
484 abort_code = s->probe.abort_code;
485 error = probe_error;
486 continue;
487 }
488 }
489
490 if (error == -ECONNABORTED)
491 error = afs_abort_to_error(abort_code);
436 492
437failed_set_error: 493failed_set_error:
438 fc->error = error; 494 fc->error = error;
@@ -480,8 +536,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
480 536
481 memset(&fc->ac, 0, sizeof(fc->ac)); 537 memset(&fc->ac, 0, sizeof(fc->ac));
482 fc->ac.alist = alist; 538 fc->ac.alist = alist;
483 fc->ac.start = READ_ONCE(alist->index); 539 fc->ac.index = -1;
484 fc->ac.index = fc->ac.start;
485 goto iterate_address; 540 goto iterate_address;
486 541
487 case 0: 542 case 0:
@@ -538,13 +593,13 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
538 pr_notice("EDESTADDR occurred\n"); 593 pr_notice("EDESTADDR occurred\n");
539 pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n", 594 pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
540 fc->cb_break, fc->cb_break_2, fc->flags, fc->error); 595 fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
541 pr_notice("FC: st=%u ix=%u ni=%u\n", 596 pr_notice("FC: ut=%lx ix=%d ni=%u\n",
542 fc->start, fc->index, fc->nr_iterations); 597 fc->untried, fc->index, fc->nr_iterations);
543 598
544 if (fc->server_list) { 599 if (fc->server_list) {
545 const struct afs_server_list *sl = fc->server_list; 600 const struct afs_server_list *sl = fc->server_list;
546 pr_notice("FC: SL nr=%u ix=%u vnov=%hx\n", 601 pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
547 sl->nr_servers, sl->index, sl->vnovol_mask); 602 sl->nr_servers, sl->preferred, sl->vnovol_mask);
548 for (i = 0; i < sl->nr_servers; i++) { 603 for (i = 0; i < sl->nr_servers; i++) {
549 const struct afs_server *s = sl->servers[i].server; 604 const struct afs_server *s = sl->servers[i].server;
550 pr_notice("FC: server fl=%lx av=%u %pU\n", 605 pr_notice("FC: server fl=%lx av=%u %pU\n",
@@ -552,22 +607,21 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
552 if (s->addresses) { 607 if (s->addresses) {
553 const struct afs_addr_list *a = 608 const struct afs_addr_list *a =
554 rcu_dereference(s->addresses); 609 rcu_dereference(s->addresses);
555 pr_notice("FC: - av=%u nr=%u/%u/%u ax=%u\n", 610 pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
556 a->version, 611 a->version,
557 a->nr_ipv4, a->nr_addrs, a->max_addrs, 612 a->nr_ipv4, a->nr_addrs, a->max_addrs,
558 a->index); 613 a->preferred);
559 pr_notice("FC: - pr=%lx yf=%lx\n", 614 pr_notice("FC: - pr=%lx R=%lx F=%lx\n",
560 a->probed, a->yfs); 615 a->probed, a->responded, a->failed);
561 if (a == fc->ac.alist) 616 if (a == fc->ac.alist)
562 pr_notice("FC: - current\n"); 617 pr_notice("FC: - current\n");
563 } 618 }
564 } 619 }
565 } 620 }
566 621
567 pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%u\n", 622 pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
568 fc->ac.start, fc->ac.index, fc->ac.abort_code, fc->ac.error, 623 fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
569 fc->ac.begun, fc->ac.responded, fc->ac.nr_iterations); 624 fc->ac.responded, fc->ac.nr_iterations);
570
571 rcu_read_unlock(); 625 rcu_read_unlock();
572} 626}
573 627
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 42e1ea7372e9..59970886690f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -43,7 +43,6 @@ int afs_open_socket(struct afs_net *net)
43 struct sockaddr_rxrpc srx; 43 struct sockaddr_rxrpc srx;
44 struct socket *socket; 44 struct socket *socket;
45 unsigned int min_level; 45 unsigned int min_level;
46 u16 service_upgrade[2];
47 int ret; 46 int ret;
48 47
49 _enter(""); 48 _enter("");
@@ -82,13 +81,12 @@ int afs_open_socket(struct afs_net *net)
82 if (ret < 0) 81 if (ret < 0)
83 goto error_2; 82 goto error_2;
84 83
85 service_upgrade[0] = CM_SERVICE; 84 /* Ideally, we'd turn on service upgrade here, but we can't because
86 service_upgrade[1] = YFS_CM_SERVICE; 85 * OpenAFS is buggy and leaks the userStatus field from packet to
87 ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE, 86 * packet and between FS packets and CB packets - so if we try to do an
88 (void *)service_upgrade, sizeof(service_upgrade)); 87 * upgrade on an FS packet, OpenAFS will leak that into the CB packet
89 if (ret < 0) 88 * it sends back to us.
90 goto error_2; 89 */
91
92 90
93 rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, 91 rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
94 afs_rx_discard_new_call); 92 afs_rx_discard_new_call);
@@ -192,6 +190,7 @@ void afs_put_call(struct afs_call *call)
192 190
193 afs_put_server(call->net, call->cm_server); 191 afs_put_server(call->net, call->cm_server);
194 afs_put_cb_interest(call->net, call->cbi); 192 afs_put_cb_interest(call->net, call->cbi);
193 afs_put_addrlist(call->alist);
195 kfree(call->request); 194 kfree(call->request);
196 195
197 trace_afs_call(call, afs_call_trace_free, 0, o, 196 trace_afs_call(call, afs_call_trace_free, 0, o,
@@ -205,21 +204,22 @@ void afs_put_call(struct afs_call *call)
205} 204}
206 205
207/* 206/*
208 * Queue the call for actual work. Returns 0 unconditionally for convenience. 207 * Queue the call for actual work.
209 */ 208 */
210int afs_queue_call_work(struct afs_call *call) 209static void afs_queue_call_work(struct afs_call *call)
211{ 210{
212 int u = atomic_inc_return(&call->usage); 211 if (call->type->work) {
212 int u = atomic_inc_return(&call->usage);
213 213
214 trace_afs_call(call, afs_call_trace_work, u, 214 trace_afs_call(call, afs_call_trace_work, u,
215 atomic_read(&call->net->nr_outstanding_calls), 215 atomic_read(&call->net->nr_outstanding_calls),
216 __builtin_return_address(0)); 216 __builtin_return_address(0));
217 217
218 INIT_WORK(&call->work, call->type->work); 218 INIT_WORK(&call->work, call->type->work);
219 219
220 if (!queue_work(afs_wq, &call->work)) 220 if (!queue_work(afs_wq, &call->work))
221 afs_put_call(call); 221 afs_put_call(call);
222 return 0; 222 }
223} 223}
224 224
225/* 225/*
@@ -376,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
376 atomic_read(&call->net->nr_outstanding_calls)); 376 atomic_read(&call->net->nr_outstanding_calls));
377 377
378 call->async = async; 378 call->async = async;
379 call->addr_ix = ac->index;
380 call->alist = afs_get_addrlist(ac->alist);
379 381
380 /* Work out the length we're going to transmit. This is awkward for 382 /* Work out the length we're going to transmit. This is awkward for
381 * calls such as FS.StoreData where there's an extra injection of data 383 * calls such as FS.StoreData where there's an extra injection of data
@@ -407,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
407 call->debug_id); 409 call->debug_id);
408 if (IS_ERR(rxcall)) { 410 if (IS_ERR(rxcall)) {
409 ret = PTR_ERR(rxcall); 411 ret = PTR_ERR(rxcall);
412 call->error = ret;
410 goto error_kill_call; 413 goto error_kill_call;
411 } 414 }
412 415
@@ -458,6 +461,8 @@ error_do_abort:
458 call->error = ret; 461 call->error = ret;
459 trace_afs_call_done(call); 462 trace_afs_call_done(call);
460error_kill_call: 463error_kill_call:
464 if (call->type->done)
465 call->type->done(call);
461 afs_put_call(call); 466 afs_put_call(call);
462 ac->error = ret; 467 ac->error = ret;
463 _leave(" = %d", ret); 468 _leave(" = %d", ret);
@@ -509,6 +514,7 @@ static void afs_deliver_to_call(struct afs_call *call)
509 state = READ_ONCE(call->state); 514 state = READ_ONCE(call->state);
510 switch (ret) { 515 switch (ret) {
511 case 0: 516 case 0:
517 afs_queue_call_work(call);
512 if (state == AFS_CALL_CL_PROC_REPLY) { 518 if (state == AFS_CALL_CL_PROC_REPLY) {
513 if (call->cbi) 519 if (call->cbi)
514 set_bit(AFS_SERVER_FL_MAY_HAVE_CB, 520 set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
@@ -546,6 +552,8 @@ static void afs_deliver_to_call(struct afs_call *call)
546 } 552 }
547 553
548done: 554done:
555 if (call->type->done)
556 call->type->done(call);
549 if (state == AFS_CALL_COMPLETE && call->incoming) 557 if (state == AFS_CALL_COMPLETE && call->incoming)
550 afs_put_call(call); 558 afs_put_call(call);
551out: 559out:
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 7c1be8b4dc9a..642afa2e9783 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -231,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
231 rwlock_init(&server->fs_lock); 231 rwlock_init(&server->fs_lock);
232 INIT_HLIST_HEAD(&server->cb_volumes); 232 INIT_HLIST_HEAD(&server->cb_volumes);
233 rwlock_init(&server->cb_break_lock); 233 rwlock_init(&server->cb_break_lock);
234 init_waitqueue_head(&server->probe_wq);
235 spin_lock_init(&server->probe_lock);
234 236
235 afs_inc_servers_outstanding(net); 237 afs_inc_servers_outstanding(net);
236 _leave(" = %p", server); 238 _leave(" = %p", server);
@@ -254,7 +256,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
254 ret = -ERESTARTSYS; 256 ret = -ERESTARTSYS;
255 if (afs_begin_vlserver_operation(&vc, cell, key)) { 257 if (afs_begin_vlserver_operation(&vc, cell, key)) {
256 while (afs_select_vlserver(&vc)) { 258 while (afs_select_vlserver(&vc)) {
257 if (test_bit(vc.ac.index, &vc.ac.alist->yfs)) 259 if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
258 alist = afs_yfsvl_get_endpoints(&vc, uuid); 260 alist = afs_yfsvl_get_endpoints(&vc, uuid);
259 else 261 else
260 alist = afs_vl_get_addrs_u(&vc, uuid); 262 alist = afs_vl_get_addrs_u(&vc, uuid);
@@ -365,8 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
365 struct afs_addr_list *alist = rcu_access_pointer(server->addresses); 367 struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
366 struct afs_addr_cursor ac = { 368 struct afs_addr_cursor ac = {
367 .alist = alist, 369 .alist = alist,
368 .start = alist->index, 370 .index = alist->preferred,
369 .index = 0,
370 .error = 0, 371 .error = 0,
371 }; 372 };
372 _enter("%p", server); 373 _enter("%p", server);
@@ -374,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
374 if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) 375 if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
375 afs_fs_give_up_all_callbacks(net, server, &ac, NULL); 376 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
376 377
378 wait_var_event(&server->probe_outstanding,
379 atomic_read(&server->probe_outstanding) == 0);
380
377 call_rcu(&server->rcu, afs_server_rcu); 381 call_rcu(&server->rcu, afs_server_rcu);
378 afs_dec_servers_outstanding(net); 382 afs_dec_servers_outstanding(net);
379} 383}
@@ -507,105 +511,6 @@ void afs_purge_servers(struct afs_net *net)
507} 511}
508 512
509/* 513/*
510 * Probe a fileserver to find its capabilities.
511 *
512 * TODO: Try service upgrade.
513 */
514static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
515{
516 int i;
517
518 _enter("");
519
520 fc->ac.start = READ_ONCE(fc->ac.alist->index);
521 fc->ac.index = fc->ac.start;
522 fc->ac.error = 0;
523 fc->ac.begun = false;
524
525 while (afs_iterate_addresses(&fc->ac)) {
526 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
527 &fc->ac, fc->key);
528 switch (fc->ac.error) {
529 case 0:
530 if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) {
531 for (i = 0; i < fc->ac.alist->nr_addrs; i++)
532 fc->ac.alist->addrs[i].srx_service =
533 YFS_FS_SERVICE;
534 }
535 afs_end_cursor(&fc->ac);
536 set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
537 return true;
538 case -ECONNABORTED:
539 fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
540 goto error;
541 case -ENOMEM:
542 case -ENONET:
543 goto error;
544 case -ENETUNREACH:
545 case -EHOSTUNREACH:
546 case -ECONNREFUSED:
547 case -ETIMEDOUT:
548 case -ETIME:
549 break;
550 default:
551 fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail);
552 goto error;
553 }
554 }
555
556error:
557 afs_end_cursor(&fc->ac);
558 return false;
559}
560
561/*
562 * If we haven't already, try probing the fileserver to get its capabilities.
563 * We try not to instigate parallel probes, but it's possible that the parallel
564 * probes will fail due to authentication failure when ours would succeed.
565 *
566 * TODO: Try sending an anonymous probe if an authenticated probe fails.
567 */
568bool afs_probe_fileserver(struct afs_fs_cursor *fc)
569{
570 bool success;
571 int ret, retries = 0;
572
573 _enter("");
574
575retry:
576 if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
577 _leave(" = t");
578 return true;
579 }
580
581 if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
582 success = afs_do_probe_fileserver(fc);
583 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
584 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
585 _leave(" = t");
586 return success;
587 }
588
589 _debug("wait");
590 ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
591 TASK_INTERRUPTIBLE);
592 if (ret == -ERESTARTSYS) {
593 fc->ac.error = ret;
594 _leave(" = f [%d]", ret);
595 return false;
596 }
597
598 retries++;
599 if (retries == 4) {
600 fc->ac.error = -ESTALE;
601 _leave(" = f [stale]");
602 return false;
603 }
604 _debug("retry");
605 goto retry;
606}
607
608/*
609 * Get an update for a server's address list. 514 * Get an update for a server's address list.
610 */ 515 */
611static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 516static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 8a5760aa5832..95d0761cdb34 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new,
118 return false; 118 return false;
119 119
120changed: 120changed:
121 /* Maintain the same current server as before if possible. */ 121 /* Maintain the same preferred server as before if possible. */
122 cur = old->servers[old->index].server; 122 cur = old->servers[old->preferred].server;
123 for (j = 0; j < new->nr_servers; j++) { 123 for (j = 0; j < new->nr_servers; j++) {
124 if (new->servers[j].server == cur) { 124 if (new->servers[j].server == cur) {
125 new->index = j; 125 new->preferred = j;
126 break; 126 break;
127 } 127 }
128 } 128 }
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
index c1e316ba105a..b4f1a84519b9 100644
--- a/fs/afs/vl_list.c
+++ b/fs/afs/vl_list.c
@@ -23,6 +23,8 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
23 if (vlserver) { 23 if (vlserver) {
24 atomic_set(&vlserver->usage, 1); 24 atomic_set(&vlserver->usage, 1);
25 rwlock_init(&vlserver->lock); 25 rwlock_init(&vlserver->lock);
26 init_waitqueue_head(&vlserver->probe_wq);
27 spin_lock_init(&vlserver->probe_lock);
26 vlserver->name_len = name_len; 28 vlserver->name_len = name_len;
27 vlserver->port = port; 29 vlserver->port = port;
28 memcpy(vlserver->name, name, name_len); 30 memcpy(vlserver->name, name, name_len);
@@ -141,7 +143,7 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
141 143
142 /* Start with IPv6 if available. */ 144 /* Start with IPv6 if available. */
143 if (alist->nr_ipv4 < alist->nr_addrs) 145 if (alist->nr_ipv4 < alist->nr_addrs)
144 alist->index = alist->nr_ipv4; 146 alist->preferred = alist->nr_ipv4;
145 147
146 *_b = b; 148 *_b = b;
147 return alist; 149 return alist;
@@ -307,6 +309,8 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
307 (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry)); 309 (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
308 } 310 }
309 311
312 clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
313
310 vllist->servers[j].priority = bs.priority; 314 vllist->servers[j].priority = bs.priority;
311 vllist->servers[j].weight = bs.weight; 315 vllist->servers[j].weight = bs.weight;
312 vllist->servers[j].server = server; 316 vllist->servers[j].server = server;
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
new file mode 100644
index 000000000000..c0f616bd70cb
--- /dev/null
+++ b/fs/afs/vl_probe.c
@@ -0,0 +1,273 @@
1/* AFS vlserver probing
2 *
3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include "afs_fs.h"
15#include "internal.h"
16#include "protocol_yfs.h"
17
18static bool afs_vl_probe_done(struct afs_vlserver *server)
19{
20 if (!atomic_dec_and_test(&server->probe_outstanding))
21 return false;
22
23 wake_up_var(&server->probe_outstanding);
24 clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
25 wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
26 return true;
27}
28
29/*
30 * Process the result of probing a vlserver. This is called after successful
31 * or failed delivery of an VL.GetCapabilities operation.
32 */
33void afs_vlserver_probe_result(struct afs_call *call)
34{
35 struct afs_addr_list *alist = call->alist;
36 struct afs_vlserver *server = call->reply[0];
37 unsigned int server_index = (long)call->reply[1];
38 unsigned int index = call->addr_ix;
39 unsigned int rtt = UINT_MAX;
40 bool have_result = false;
41 u64 _rtt;
42 int ret = call->error;
43
44 _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
45
46 spin_lock(&server->probe_lock);
47
48 switch (ret) {
49 case 0:
50 server->probe.error = 0;
51 goto responded;
52 case -ECONNABORTED:
53 if (!server->probe.responded) {
54 server->probe.abort_code = call->abort_code;
55 server->probe.error = ret;
56 }
57 goto responded;
58 case -ENOMEM:
59 case -ENONET:
60 server->probe.local_failure = true;
61 afs_io_error(call, afs_io_error_vl_probe_fail);
62 goto out;
63 case -ECONNRESET: /* Responded, but call expired. */
64 case -ENETUNREACH:
65 case -EHOSTUNREACH:
66 case -ECONNREFUSED:
67 case -ETIMEDOUT:
68 case -ETIME:
69 default:
70 clear_bit(index, &alist->responded);
71 set_bit(index, &alist->failed);
72 if (!server->probe.responded &&
73 (server->probe.error == 0 ||
74 server->probe.error == -ETIMEDOUT ||
75 server->probe.error == -ETIME))
76 server->probe.error = ret;
77 afs_io_error(call, afs_io_error_vl_probe_fail);
78 goto out;
79 }
80
81responded:
82 set_bit(index, &alist->responded);
83 clear_bit(index, &alist->failed);
84
85 if (call->service_id == YFS_VL_SERVICE) {
86 server->probe.is_yfs = true;
87 set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
88 alist->addrs[index].srx_service = call->service_id;
89 } else {
90 server->probe.not_yfs = true;
91 if (!server->probe.is_yfs) {
92 clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
93 alist->addrs[index].srx_service = call->service_id;
94 }
95 }
96
97 /* Get the RTT and scale it to fit into a 32-bit value that represents
98 * over a minute of time so that we can access it with one instruction
99 * on a 32-bit system.
100 */
101 _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
102 _rtt /= 64;
103 rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
104 if (rtt < server->probe.rtt) {
105 server->probe.rtt = rtt;
106 alist->preferred = index;
107 have_result = true;
108 }
109
110 smp_wmb(); /* Set rtt before responded. */
111 server->probe.responded = true;
112 set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
113out:
114 spin_unlock(&server->probe_lock);
115
116 _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
117 server_index, index, &alist->addrs[index].transport,
118 (unsigned int)rtt, ret);
119
120 have_result |= afs_vl_probe_done(server);
121 if (have_result) {
122 server->probe.have_result = true;
123 wake_up_var(&server->probe.have_result);
124 wake_up_all(&server->probe_wq);
125 }
126}
127
128/*
129 * Probe all of a vlserver's addresses to find out the best route and to
130 * query its capabilities.
131 */
132static int afs_do_probe_vlserver(struct afs_net *net,
133 struct afs_vlserver *server,
134 struct key *key,
135 unsigned int server_index)
136{
137 struct afs_addr_cursor ac = {
138 .index = 0,
139 };
140 int ret;
141
142 _enter("%s", server->name);
143
144 read_lock(&server->lock);
145 ac.alist = rcu_dereference_protected(server->addresses,
146 lockdep_is_held(&server->lock));
147 read_unlock(&server->lock);
148
149 atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
150 memset(&server->probe, 0, sizeof(server->probe));
151 server->probe.rtt = UINT_MAX;
152
153 for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
154 ret = afs_vl_get_capabilities(net, &ac, key, server,
155 server_index, true);
156 if (ret != -EINPROGRESS) {
157 afs_vl_probe_done(server);
158 return ret;
159 }
160 }
161
162 return 0;
163}
164
165/*
166 * Send off probes to all unprobed servers.
167 */
168int afs_send_vl_probes(struct afs_net *net, struct key *key,
169 struct afs_vlserver_list *vllist)
170{
171 struct afs_vlserver *server;
172 int i, ret;
173
174 for (i = 0; i < vllist->nr_servers; i++) {
175 server = vllist->servers[i].server;
176 if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
177 continue;
178
179 if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
180 ret = afs_do_probe_vlserver(net, server, key, i);
181 if (ret)
182 return ret;
183 }
184 }
185
186 return 0;
187}
188
189/*
190 * Wait for the first as-yet untried server to respond.
191 */
192int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
193 unsigned long untried)
194{
195 struct wait_queue_entry *waits;
196 struct afs_vlserver *server;
197 unsigned int rtt = UINT_MAX;
198 bool have_responders = false;
199 int pref = -1, i;
200
201 _enter("%u,%lx", vllist->nr_servers, untried);
202
203 /* Only wait for servers that have a probe outstanding. */
204 for (i = 0; i < vllist->nr_servers; i++) {
205 if (test_bit(i, &untried)) {
206 server = vllist->servers[i].server;
207 if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
208 __clear_bit(i, &untried);
209 if (server->probe.responded)
210 have_responders = true;
211 }
212 }
213 if (have_responders || !untried)
214 return 0;
215
216 waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
217 if (!waits)
218 return -ENOMEM;
219
220 for (i = 0; i < vllist->nr_servers; i++) {
221 if (test_bit(i, &untried)) {
222 server = vllist->servers[i].server;
223 init_waitqueue_entry(&waits[i], current);
224 add_wait_queue(&server->probe_wq, &waits[i]);
225 }
226 }
227
228 for (;;) {
229 bool still_probing = false;
230
231 set_current_state(TASK_INTERRUPTIBLE);
232 for (i = 0; i < vllist->nr_servers; i++) {
233 if (test_bit(i, &untried)) {
234 server = vllist->servers[i].server;
235 if (server->probe.responded)
236 goto stop;
237 if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
238 still_probing = true;
239 }
240 }
241
242 if (!still_probing || unlikely(signal_pending(current)))
243 goto stop;
244 schedule();
245 }
246
247stop:
248 set_current_state(TASK_RUNNING);
249
250 for (i = 0; i < vllist->nr_servers; i++) {
251 if (test_bit(i, &untried)) {
252 server = vllist->servers[i].server;
253 if (server->probe.responded &&
254 server->probe.rtt < rtt) {
255 pref = i;
256 rtt = server->probe.rtt;
257 }
258
259 remove_wait_queue(&server->probe_wq, &waits[i]);
260 }
261 }
262
263 kfree(waits);
264
265 if (pref == -1 && signal_pending(current))
266 return -ERESTARTSYS;
267
268 if (pref >= 0)
269 vllist->preferred = pref;
270
271 _leave(" = 0 [%u]", pref);
272 return 0;
273}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index ead6dedbb561..b64a284b99d2 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -58,8 +58,8 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
58 if (!vc->server_list || !vc->server_list->nr_servers) 58 if (!vc->server_list || !vc->server_list->nr_servers)
59 return false; 59 return false;
60 60
61 vc->start = READ_ONCE(vc->server_list->index); 61 vc->untried = (1UL << vc->server_list->nr_servers) - 1;
62 vc->index = vc->start; 62 vc->index = -1;
63 return true; 63 return true;
64} 64}
65 65
@@ -71,11 +71,12 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
71{ 71{
72 struct afs_addr_list *alist; 72 struct afs_addr_list *alist;
73 struct afs_vlserver *vlserver; 73 struct afs_vlserver *vlserver;
74 int error = vc->ac.error; 74 u32 rtt;
75 int error = vc->ac.error, abort_code, i;
75 76
76 _enter("%u/%u,%u/%u,%d,%d", 77 _enter("%lx[%d],%lx[%d],%d,%d",
77 vc->index, vc->start, 78 vc->untried, vc->index,
78 vc->ac.index, vc->ac.start, 79 vc->ac.tried, vc->ac.index,
79 error, vc->ac.abort_code); 80 error, vc->ac.abort_code);
80 81
81 if (vc->flags & AFS_VL_CURSOR_STOP) { 82 if (vc->flags & AFS_VL_CURSOR_STOP) {
@@ -145,23 +146,52 @@ restart_from_beginning:
145start: 146start:
146 _debug("start"); 147 _debug("start");
147 148
148 /* TODO: Consider checking the VL server list */
149
150 if (!afs_start_vl_iteration(vc)) 149 if (!afs_start_vl_iteration(vc))
151 goto failed; 150 goto failed;
152 151
153use_server: 152 error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
154 _debug("use"); 153 if (error < 0)
154 goto failed_set_error;
155
156pick_server:
157 _debug("pick [%lx]", vc->untried);
158
159 error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
160 if (error < 0)
161 goto failed_set_error;
162
163 /* Pick the untried server with the lowest RTT. */
164 vc->index = vc->server_list->preferred;
165 if (test_bit(vc->index, &vc->untried))
166 goto selected_server;
167
168 vc->index = -1;
169 rtt = U32_MAX;
170 for (i = 0; i < vc->server_list->nr_servers; i++) {
171 struct afs_vlserver *s = vc->server_list->servers[i].server;
172
173 if (!test_bit(i, &vc->untried) || !s->probe.responded)
174 continue;
175 if (s->probe.rtt < rtt) {
176 vc->index = i;
177 rtt = s->probe.rtt;
178 }
179 }
180
181 if (vc->index == -1)
182 goto no_more_servers;
183
184selected_server:
185 _debug("use %d", vc->index);
186 __clear_bit(vc->index, &vc->untried);
187
155 /* We're starting on a different vlserver from the list. We need to 188 /* We're starting on a different vlserver from the list. We need to
156 * check it, find its address list and probe its capabilities before we 189 * check it, find its address list and probe its capabilities before we
157 * use it. 190 * use it.
158 */ 191 */
159 ASSERTCMP(vc->ac.alist, ==, NULL); 192 ASSERTCMP(vc->ac.alist, ==, NULL);
160 vlserver = vc->server_list->servers[vc->index].server; 193 vlserver = vc->server_list->servers[vc->index].server;
161 194 vc->server = vlserver;
162 // TODO: Check the vlserver occasionally
163 //if (!afs_check_vlserver_record(vc, vlserver))
164 // goto failed;
165 195
166 _debug("USING VLSERVER: %s", vlserver->name); 196 _debug("USING VLSERVER: %s", vlserver->name);
167 197
@@ -173,62 +203,84 @@ use_server:
173 203
174 memset(&vc->ac, 0, sizeof(vc->ac)); 204 memset(&vc->ac, 0, sizeof(vc->ac));
175 205
176 /* Probe the current vlserver if we haven't done so yet. */
177#if 0 // TODO
178 if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) {
179 vc->ac.alist = afs_get_addrlist(alist);
180
181 if (!afs_probe_vlserver(vc)) {
182 error = vc->ac.error;
183 switch (error) {
184 case -ENOMEM:
185 case -ERESTARTSYS:
186 case -EINTR:
187 goto failed_set_error;
188 default:
189 goto next_server;
190 }
191 }
192 }
193#endif
194
195 if (!vc->ac.alist) 206 if (!vc->ac.alist)
196 vc->ac.alist = alist; 207 vc->ac.alist = alist;
197 else 208 else
198 afs_put_addrlist(alist); 209 afs_put_addrlist(alist);
199 210
200 vc->ac.start = READ_ONCE(alist->index); 211 vc->ac.index = -1;
201 vc->ac.index = vc->ac.start;
202 212
203iterate_address: 213iterate_address:
204 ASSERT(vc->ac.alist); 214 ASSERT(vc->ac.alist);
205 _debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
206 /* Iterate over the current server's address list to try and find an 215 /* Iterate over the current server's address list to try and find an
207 * address on which it will respond to us. 216 * address on which it will respond to us.
208 */ 217 */
209 if (!afs_iterate_addresses(&vc->ac)) 218 if (!afs_iterate_addresses(&vc->ac))
210 goto next_server; 219 goto next_server;
211 220
221 _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
222
212 _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); 223 _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
213 return true; 224 return true;
214 225
215next_server: 226next_server:
216 _debug("next"); 227 _debug("next");
217 afs_end_cursor(&vc->ac); 228 afs_end_cursor(&vc->ac);
218 vc->index++; 229 goto pick_server;
219 if (vc->index >= vc->server_list->nr_servers)
220 vc->index = 0;
221 if (vc->index != vc->start)
222 goto use_server;
223 230
231no_more_servers:
224 /* That's all the servers poked to no good effect. Try again if some 232 /* That's all the servers poked to no good effect. Try again if some
225 * of them were busy. 233 * of them were busy.
226 */ 234 */
227 if (vc->flags & AFS_VL_CURSOR_RETRY) 235 if (vc->flags & AFS_VL_CURSOR_RETRY)
228 goto restart_from_beginning; 236 goto restart_from_beginning;
229 237
230 goto failed; 238 abort_code = 0;
239 error = -EDESTADDRREQ;
240 for (i = 0; i < vc->server_list->nr_servers; i++) {
241 struct afs_vlserver *s = vc->server_list->servers[i].server;
242 int probe_error = READ_ONCE(s->probe.error);
243
244 switch (probe_error) {
245 case 0:
246 continue;
247 default:
248 if (error == -ETIMEDOUT ||
249 error == -ETIME)
250 continue;
251 case -ETIMEDOUT:
252 case -ETIME:
253 if (error == -ENOMEM ||
254 error == -ENONET)
255 continue;
256 case -ENOMEM:
257 case -ENONET:
258 if (error == -ENETUNREACH)
259 continue;
260 case -ENETUNREACH:
261 if (error == -EHOSTUNREACH)
262 continue;
263 case -EHOSTUNREACH:
264 if (error == -ECONNREFUSED)
265 continue;
266 case -ECONNREFUSED:
267 if (error == -ECONNRESET)
268 continue;
269 case -ECONNRESET: /* Responded, but call expired. */
270 if (error == -ECONNABORTED)
271 continue;
272 case -ECONNABORTED:
273 abort_code = s->probe.abort_code;
274 error = probe_error;
275 continue;
276 }
277 }
278
279 if (error == -ECONNABORTED)
280 error = afs_abort_to_error(abort_code);
231 281
282failed_set_error:
283 vc->error = error;
232failed: 284failed:
233 vc->flags |= AFS_VL_CURSOR_STOP; 285 vc->flags |= AFS_VL_CURSOR_STOP;
234 afs_end_cursor(&vc->ac); 286 afs_end_cursor(&vc->ac);
@@ -250,8 +302,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
250 302
251 rcu_read_lock(); 303 rcu_read_lock();
252 pr_notice("EDESTADDR occurred\n"); 304 pr_notice("EDESTADDR occurred\n");
253 pr_notice("VC: st=%u ix=%u ni=%hu fl=%hx err=%hd\n", 305 pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
254 vc->start, vc->index, vc->nr_iterations, vc->flags, vc->error); 306 vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
255 307
256 if (vc->server_list) { 308 if (vc->server_list) {
257 const struct afs_vlserver_list *sl = vc->server_list; 309 const struct afs_vlserver_list *sl = vc->server_list;
@@ -259,26 +311,25 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
259 sl->nr_servers, sl->index); 311 sl->nr_servers, sl->index);
260 for (i = 0; i < sl->nr_servers; i++) { 312 for (i = 0; i < sl->nr_servers; i++) {
261 const struct afs_vlserver *s = sl->servers[i].server; 313 const struct afs_vlserver *s = sl->servers[i].server;
262 pr_notice("VC: server fl=%lx %s+%hu\n", 314 pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
263 s->flags, s->name, s->port); 315 s->name, s->port, s->flags, s->probe.error);
264 if (s->addresses) { 316 if (s->addresses) {
265 const struct afs_addr_list *a = 317 const struct afs_addr_list *a =
266 rcu_dereference(s->addresses); 318 rcu_dereference(s->addresses);
267 pr_notice("VC: - av=%u nr=%u/%u/%u ax=%u\n", 319 pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
268 a->version,
269 a->nr_ipv4, a->nr_addrs, a->max_addrs, 320 a->nr_ipv4, a->nr_addrs, a->max_addrs,
270 a->index); 321 a->preferred);
271 pr_notice("VC: - pr=%lx yf=%lx\n", 322 pr_notice("VC: - pr=%lx R=%lx F=%lx\n",
272 a->probed, a->yfs); 323 a->probed, a->responded, a->failed);
273 if (a == vc->ac.alist) 324 if (a == vc->ac.alist)
274 pr_notice("VC: - current\n"); 325 pr_notice("VC: - current\n");
275 } 326 }
276 } 327 }
277 } 328 }
278 329
279 pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%hu\n", 330 pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
280 vc->ac.start, vc->ac.index, vc->ac.abort_code, vc->ac.error, 331 vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
281 vc->ac.begun, vc->ac.responded, vc->ac.nr_iterations); 332 vc->ac.responded, vc->ac.nr_iterations);
282 rcu_read_unlock(); 333 rcu_read_unlock();
283} 334}
284 335
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 3127ab9b5521..c3d9e5a5f67e 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -348,12 +348,18 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
348 break; 348 break;
349 } 349 }
350 350
351 call->reply[0] = (void *)(unsigned long)call->service_id;
352
353 _leave(" = 0 [done]"); 351 _leave(" = 0 [done]");
354 return 0; 352 return 0;
355} 353}
356 354
355static void afs_destroy_vl_get_capabilities(struct afs_call *call)
356{
357 struct afs_vlserver *server = call->reply[0];
358
359 afs_put_vlserver(call->net, server);
360 afs_flat_call_destructor(call);
361}
362
357/* 363/*
358 * VL.GetCapabilities operation type 364 * VL.GetCapabilities operation type
359 */ 365 */
@@ -361,7 +367,8 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
361 .name = "VL.GetCapabilities", 367 .name = "VL.GetCapabilities",
362 .op = afs_VL_GetCapabilities, 368 .op = afs_VL_GetCapabilities,
363 .deliver = afs_deliver_vl_get_capabilities, 369 .deliver = afs_deliver_vl_get_capabilities,
364 .destructor = afs_flat_call_destructor, 370 .done = afs_vlserver_probe_result,
371 .destructor = afs_destroy_vl_get_capabilities,
365}; 372};
366 373
367/* 374/*
@@ -371,8 +378,12 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
371 * We use this to probe for service upgrade to determine what the server at the 378 * We use this to probe for service upgrade to determine what the server at the
372 * other end supports. 379 * other end supports.
373 */ 380 */
374int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac, 381int afs_vl_get_capabilities(struct afs_net *net,
375 struct key *key) 382 struct afs_addr_cursor *ac,
383 struct key *key,
384 struct afs_vlserver *server,
385 unsigned int server_index,
386 bool async)
376{ 387{
377 struct afs_call *call; 388 struct afs_call *call;
378 __be32 *bp; 389 __be32 *bp;
@@ -384,9 +395,10 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
384 return -ENOMEM; 395 return -ENOMEM;
385 396
386 call->key = key; 397 call->key = key;
387 call->upgrade = true; /* Let's see if this is a YFS server */ 398 call->reply[0] = afs_get_vlserver(server);
388 call->reply[0] = (void *)VLGETCAPABILITIES; 399 call->reply[1] = (void *)(long)server_index;
389 call->ret_reply0 = true; 400 call->upgrade = true;
401 call->want_reply_time = true;
390 402
391 /* marshall the parameters */ 403 /* marshall the parameters */
392 bp = call->request; 404 bp = call->request;
@@ -394,7 +406,7 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
394 406
395 /* Can't take a ref on server */ 407 /* Can't take a ref on server */
396 trace_afs_make_vl_call(call); 408 trace_afs_make_vl_call(call);
397 return afs_make_call(ac, call, GFP_KERNEL, false); 409 return afs_make_call(ac, call, GFP_KERNEL, async);
398} 410}
399 411
400/* 412/*
@@ -591,11 +603,6 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
591 } 603 }
592 604
593 alist = call->reply[0]; 605 alist = call->reply[0];
594
595 /* Start with IPv6 if available. */
596 if (alist->nr_ipv4 < alist->nr_addrs)
597 alist->index = alist->nr_ipv4;
598
599 _leave(" = 0 [done]"); 606 _leave(" = 0 [done]");
600 return 0; 607 return 0;
601} 608}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 7527c081726e..00975ed3640f 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -82,22 +82,6 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
82 return ERR_PTR(-ERESTARTSYS); 82 return ERR_PTR(-ERESTARTSYS);
83 83
84 while (afs_select_vlserver(&vc)) { 84 while (afs_select_vlserver(&vc)) {
85 if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) {
86 ret = afs_vl_get_capabilities(cell->net, &vc.ac, key);
87 switch (ret) {
88 case VL_SERVICE:
89 clear_bit(vc.ac.index, &vc.ac.alist->yfs);
90 set_bit(vc.ac.index, &vc.ac.alist->probed);
91 vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
92 break;
93 case YFS_VL_SERVICE:
94 set_bit(vc.ac.index, &vc.ac.alist->yfs);
95 set_bit(vc.ac.index, &vc.ac.alist->probed);
96 vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
97 break;
98 }
99 }
100
101 vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); 85 vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
102 } 86 }
103 87
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index ed155042236b..33d291888ba9 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -137,6 +137,7 @@ enum afs_io_error {
137 afs_io_error_extract, 137 afs_io_error_extract,
138 afs_io_error_fs_probe_fail, 138 afs_io_error_fs_probe_fail,
139 afs_io_error_vl_lookup_fail, 139 afs_io_error_vl_lookup_fail,
140 afs_io_error_vl_probe_fail,
140}; 141};
141 142
142enum afs_file_error { 143enum afs_file_error {
@@ -261,7 +262,8 @@ enum afs_file_error {
261 EM(afs_io_error_cm_reply, "CM_REPLY") \ 262 EM(afs_io_error_cm_reply, "CM_REPLY") \
262 EM(afs_io_error_extract, "EXTRACT") \ 263 EM(afs_io_error_extract, "EXTRACT") \
263 EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \ 264 EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \
264 E_(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") 265 EM(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") \
266 E_(afs_io_error_vl_probe_fail, "VL_PROBE_FAIL")
265 267
266#define afs_file_errors \ 268#define afs_file_errors \
267 EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \ 269 EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \