summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2018-10-19 19:57:57 -0400
committerDavid Howells <dhowells@redhat.com>2018-10-23 19:41:07 -0400
commit0a5143f2f89cc88d8a3eada8e8ccd86c1e988257 (patch)
tree249b578b9fd7a79f7b52d4534a0e9ba4cc5b8cc9
parente7f680f45bd1deb4ca479c2348b395e1a4d44b17 (diff)
afs: Implement VL server rotation
Track VL servers as independent entities rather than lumping all their addresses together into one set and implement server-level rotation by: (1) Add the concept of a VL server list, where each server has its own separate address list. This code is similar to the FS server list. (2) Use the DNS resolver to retrieve a set of servers and their associated addresses, ports, preference and weight ratings. (3) In the case of a legacy DNS resolver or an address list given directly through /proc/net/afs/cells, create a list containing just a dummy server record and attach all the addresses to that. (4) Implement a simple rotation policy, for the moment ignoring the priorities and weights assigned to the servers. (5) Show the address list through /proc/net/afs/<cell>/vlservers. This also displays the source and status of the data as indicated by the upcall. Signed-off-by: David Howells <dhowells@redhat.com>
-rw-r--r--fs/afs/Makefile2
-rw-r--r--fs/afs/addr_list.c163
-rw-r--r--fs/afs/cell.c39
-rw-r--r--fs/afs/dynroot.c2
-rw-r--r--fs/afs/internal.h114
-rw-r--r--fs/afs/proc.c90
-rw-r--r--fs/afs/server.c42
-rw-r--r--fs/afs/vl_list.c336
-rw-r--r--fs/afs/vl_rotate.c251
-rw-r--r--fs/afs/vlclient.c32
-rw-r--r--fs/afs/volume.c52
11 files changed, 905 insertions, 218 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 546874057bd3..03e9f7afea1b 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -29,6 +29,8 @@ kafs-y := \
29 super.o \ 29 super.o \
30 netdevices.o \ 30 netdevices.o \
31 vlclient.o \ 31 vlclient.o \
32 vl_rotate.o \
33 vl_list.o \
32 volume.o \ 34 volume.o \
33 write.o \ 35 write.o \
34 xattr.o 36 xattr.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 7b34fad4f8f5..3f60b4012587 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
64/* 64/*
65 * Parse a text string consisting of delimited addresses. 65 * Parse a text string consisting of delimited addresses.
66 */ 66 */
67struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, 67struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
68 char delim, 68 const char *text, size_t len,
69 unsigned short service, 69 char delim,
70 unsigned short port) 70 unsigned short service,
71 unsigned short port)
71{ 72{
73 struct afs_vlserver_list *vllist;
72 struct afs_addr_list *alist; 74 struct afs_addr_list *alist;
73 const char *p, *end = text + len; 75 const char *p, *end = text + len;
76 const char *problem;
74 unsigned int nr = 0; 77 unsigned int nr = 0;
78 int ret = -ENOMEM;
75 79
76 _enter("%*.*s,%c", (int)len, (int)len, text, delim); 80 _enter("%*.*s,%c", (int)len, (int)len, text, delim);
77 81
78 if (!len) 82 if (!len) {
83 _leave(" = -EDESTADDRREQ [empty]");
79 return ERR_PTR(-EDESTADDRREQ); 84 return ERR_PTR(-EDESTADDRREQ);
85 }
80 86
81 if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len))) 87 if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
82 delim = ','; 88 delim = ',';
@@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
84 /* Count the addresses */ 90 /* Count the addresses */
85 p = text; 91 p = text;
86 do { 92 do {
87 if (!*p) 93 if (!*p) {
88 return ERR_PTR(-EINVAL); 94 problem = "nul";
95 goto inval;
96 }
89 if (*p == delim) 97 if (*p == delim)
90 continue; 98 continue;
91 nr++; 99 nr++;
92 if (*p == '[') { 100 if (*p == '[') {
93 p++; 101 p++;
94 if (p == end) 102 if (p == end) {
95 return ERR_PTR(-EINVAL); 103 problem = "brace1";
104 goto inval;
105 }
96 p = memchr(p, ']', end - p); 106 p = memchr(p, ']', end - p);
97 if (!p) 107 if (!p) {
98 return ERR_PTR(-EINVAL); 108 problem = "brace2";
109 goto inval;
110 }
99 p++; 111 p++;
100 if (p >= end) 112 if (p >= end)
101 break; 113 break;
@@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
109 121
110 _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); 122 _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
111 123
112 alist = afs_alloc_addrlist(nr, service, port); 124 vllist = afs_alloc_vlserver_list(1);
113 if (!alist) 125 if (!vllist)
114 return ERR_PTR(-ENOMEM); 126 return ERR_PTR(-ENOMEM);
115 127
128 vllist->nr_servers = 1;
129 vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
130 if (!vllist->servers[0].server)
131 goto error_vl;
132
133 alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
134 if (!alist)
135 goto error;
136
116 /* Extract the addresses */ 137 /* Extract the addresses */
117 p = text; 138 p = text;
118 do { 139 do {
@@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
135 break; 156 break;
136 } 157 }
137 158
138 if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) 159 if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
139 family = AF_INET; 160 family = AF_INET;
140 else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) 161 } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
141 family = AF_INET6; 162 family = AF_INET6;
142 else 163 } else {
164 problem = "family";
143 goto bad_address; 165 goto bad_address;
166 }
144 167
145 if (stop != q) 168 p = q;
169 if (stop != p) {
170 problem = "nostop";
146 goto bad_address; 171 goto bad_address;
172 }
147 173
148 p = q;
149 if (q < end && *q == ']') 174 if (q < end && *q == ']')
150 p++; 175 p++;
151 176
@@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
154 /* Port number specification "+1234" */ 179 /* Port number specification "+1234" */
155 xport = 0; 180 xport = 0;
156 p++; 181 p++;
157 if (p >= end || !isdigit(*p)) 182 if (p >= end || !isdigit(*p)) {
183 problem = "port";
158 goto bad_address; 184 goto bad_address;
185 }
159 do { 186 do {
160 xport *= 10; 187 xport *= 10;
161 xport += *p - '0'; 188 xport += *p - '0';
162 if (xport > 65535) 189 if (xport > 65535) {
190 problem = "pval";
163 goto bad_address; 191 goto bad_address;
192 }
164 p++; 193 p++;
165 } while (p < end && isdigit(*p)); 194 } while (p < end && isdigit(*p));
166 } else if (*p == delim) { 195 } else if (*p == delim) {
167 p++; 196 p++;
168 } else { 197 } else {
198 problem = "weird";
169 goto bad_address; 199 goto bad_address;
170 } 200 }
171 } 201 }
@@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
177 207
178 } while (p < end); 208 } while (p < end);
179 209
210 rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
180 _leave(" = [nr %u]", alist->nr_addrs); 211 _leave(" = [nr %u]", alist->nr_addrs);
181 return alist; 212 return vllist;
182 213
183bad_address: 214inval:
184 kfree(alist); 215 _leave(" = -EINVAL [%s %zu %*.*s]",
216 problem, p - text, (int)len, (int)len, text);
185 return ERR_PTR(-EINVAL); 217 return ERR_PTR(-EINVAL);
218bad_address:
219 _leave(" = -EINVAL [%s %zu %*.*s]",
220 problem, p - text, (int)len, (int)len, text);
221 ret = -EINVAL;
222error:
223 afs_put_addrlist(alist);
224error_vl:
225 afs_put_vlserverlist(net, vllist);
226 return ERR_PTR(ret);
186} 227}
187 228
188/* 229/*
@@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1,
201/* 242/*
202 * Perform a DNS query for VL servers and build a up an address list. 243 * Perform a DNS query for VL servers and build a up an address list.
203 */ 244 */
204struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) 245struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
205{ 246{
206 struct afs_addr_list *alist; 247 struct afs_vlserver_list *vllist;
207 char *vllist = NULL; 248 char *result = NULL;
208 int ret; 249 int ret;
209 250
210 _enter("%s", cell->name); 251 _enter("%s", cell->name);
211 252
212 ret = dns_query("afsdb", cell->name, cell->name_len, 253 ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
213 "", &vllist, _expiry); 254 &result, _expiry);
214 if (ret < 0) 255 if (ret < 0) {
256 _leave(" = %d [dns]", ret);
215 return ERR_PTR(ret); 257 return ERR_PTR(ret);
216
217 alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
218 VL_SERVICE, AFS_VL_PORT);
219 if (IS_ERR(alist)) {
220 kfree(vllist);
221 if (alist != ERR_PTR(-ENOMEM))
222 pr_err("Failed to parse DNS data\n");
223 return alist;
224 } 258 }
225 259
226 kfree(vllist); 260 if (*_expiry == 0)
227 return alist; 261 *_expiry = ktime_get_real_seconds() + 60;
262
263 if (ret > 1 && result[0] == 0)
264 vllist = afs_extract_vlserver_list(cell, result, ret);
265 else
266 vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
267 VL_SERVICE, AFS_VL_PORT);
268 kfree(result);
269 if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
270 pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));
271
272 return vllist;
228} 273}
229 274
230/* 275/*
@@ -347,43 +392,3 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
347 ac->begun = false; 392 ac->begun = false;
348 return ac->error; 393 return ac->error;
349} 394}
350
351/*
352 * Set the address cursor for iterating over VL servers.
353 */
354int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
355{
356 struct afs_addr_list *alist;
357 int ret;
358
359 if (!rcu_access_pointer(cell->vl_addrs)) {
360 ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
361 TASK_INTERRUPTIBLE);
362 if (ret < 0)
363 return ret;
364
365 if (!rcu_access_pointer(cell->vl_addrs) &&
366 ktime_get_real_seconds() < cell->dns_expiry)
367 return cell->error;
368 }
369
370 read_lock(&cell->vl_addrs_lock);
371 alist = rcu_dereference_protected(cell->vl_addrs,
372 lockdep_is_held(&cell->vl_addrs_lock));
373 if (alist->nr_addrs > 0)
374 afs_get_addrlist(alist);
375 else
376 alist = NULL;
377 read_unlock(&cell->vl_addrs_lock);
378
379 if (!alist)
380 return -EDESTADDRREQ;
381
382 ac->alist = alist;
383 ac->addr = NULL;
384 ac->start = READ_ONCE(alist->index);
385 ac->index = ac->start;
386 ac->error = 0;
387 ac->begun = false;
388 return 0;
389}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 6127f0fcd62c..963b6fa51fdf 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -119,7 +119,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
119 */ 119 */
120static struct afs_cell *afs_alloc_cell(struct afs_net *net, 120static struct afs_cell *afs_alloc_cell(struct afs_net *net,
121 const char *name, unsigned int namelen, 121 const char *name, unsigned int namelen,
122 const char *vllist) 122 const char *addresses)
123{ 123{
124 struct afs_cell *cell; 124 struct afs_cell *cell;
125 int i, ret; 125 int i, ret;
@@ -134,7 +134,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
134 if (namelen == 5 && memcmp(name, "@cell", 5) == 0) 134 if (namelen == 5 && memcmp(name, "@cell", 5) == 0)
135 return ERR_PTR(-EINVAL); 135 return ERR_PTR(-EINVAL);
136 136
137 _enter("%*.*s,%s", namelen, namelen, name, vllist); 137 _enter("%*.*s,%s", namelen, namelen, name, addresses);
138 138
139 cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); 139 cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
140 if (!cell) { 140 if (!cell) {
@@ -153,22 +153,23 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
153 (1 << AFS_CELL_FL_NO_LOOKUP_YET)); 153 (1 << AFS_CELL_FL_NO_LOOKUP_YET));
154 INIT_LIST_HEAD(&cell->proc_volumes); 154 INIT_LIST_HEAD(&cell->proc_volumes);
155 rwlock_init(&cell->proc_lock); 155 rwlock_init(&cell->proc_lock);
156 rwlock_init(&cell->vl_addrs_lock); 156 rwlock_init(&cell->vl_servers_lock);
157 157
158 /* Fill in the VL server list if we were given a list of addresses to 158 /* Fill in the VL server list if we were given a list of addresses to
159 * use. 159 * use.
160 */ 160 */
161 if (vllist) { 161 if (addresses) {
162 struct afs_addr_list *alist; 162 struct afs_vlserver_list *vllist;
163 163
164 alist = afs_parse_text_addrs(vllist, strlen(vllist), ':', 164 vllist = afs_parse_text_addrs(net,
165 VL_SERVICE, AFS_VL_PORT); 165 addresses, strlen(addresses), ':',
166 if (IS_ERR(alist)) { 166 VL_SERVICE, AFS_VL_PORT);
167 ret = PTR_ERR(alist); 167 if (IS_ERR(vllist)) {
168 ret = PTR_ERR(vllist);
168 goto parse_failed; 169 goto parse_failed;
169 } 170 }
170 171
171 rcu_assign_pointer(cell->vl_addrs, alist); 172 rcu_assign_pointer(cell->vl_servers, vllist);
172 cell->dns_expiry = TIME64_MAX; 173 cell->dns_expiry = TIME64_MAX;
173 } 174 }
174 175
@@ -356,14 +357,14 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
356 */ 357 */
357static void afs_update_cell(struct afs_cell *cell) 358static void afs_update_cell(struct afs_cell *cell)
358{ 359{
359 struct afs_addr_list *alist, *old; 360 struct afs_vlserver_list *vllist, *old;
360 time64_t now, expiry; 361 time64_t now, expiry;
361 362
362 _enter("%s", cell->name); 363 _enter("%s", cell->name);
363 364
364 alist = afs_dns_query(cell, &expiry); 365 vllist = afs_dns_query(cell, &expiry);
365 if (IS_ERR(alist)) { 366 if (IS_ERR(vllist)) {
366 switch (PTR_ERR(alist)) { 367 switch (PTR_ERR(vllist)) {
367 case -ENODATA: 368 case -ENODATA:
368 /* The DNS said that the cell does not exist */ 369 /* The DNS said that the cell does not exist */
369 set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); 370 set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
@@ -387,12 +388,12 @@ static void afs_update_cell(struct afs_cell *cell)
387 /* Exclusion on changing vl_addrs is achieved by a 388 /* Exclusion on changing vl_addrs is achieved by a
388 * non-reentrant work item. 389 * non-reentrant work item.
389 */ 390 */
390 old = rcu_dereference_protected(cell->vl_addrs, true); 391 old = rcu_dereference_protected(cell->vl_servers, true);
391 rcu_assign_pointer(cell->vl_addrs, alist); 392 rcu_assign_pointer(cell->vl_servers, vllist);
392 cell->dns_expiry = expiry; 393 cell->dns_expiry = expiry;
393 394
394 if (old) 395 if (old)
395 afs_put_addrlist(old); 396 afs_put_vlserverlist(cell->net, old);
396 } 397 }
397 398
398 if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) 399 if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
@@ -414,7 +415,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
414 415
415 ASSERTCMP(atomic_read(&cell->usage), ==, 0); 416 ASSERTCMP(atomic_read(&cell->usage), ==, 0);
416 417
417 afs_put_addrlist(rcu_access_pointer(cell->vl_addrs)); 418 afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
418 key_put(cell->anonymous_key); 419 key_put(cell->anonymous_key);
419 kfree(cell); 420 kfree(cell);
420 421
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index f29c6dade7f6..0efed0a63080 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
46 return 0; 46 return 0;
47 } 47 }
48 48
49 ret = dns_query("afsdb", name, len, "", NULL, NULL); 49 ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
50 if (ret == -ENODATA) 50 if (ret == -ENODATA)
51 ret = -EDESTADDRREQ; 51 ret = -EDESTADDRREQ;
52 return ret; 52 return ret;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 81936a4d5035..7e264cb9b4f7 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -22,6 +22,7 @@
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/uuid.h> 23#include <linux/uuid.h>
24#include <linux/mm_types.h> 24#include <linux/mm_types.h>
25#include <linux/dns_resolver.h>
25#include <net/net_namespace.h> 26#include <net/net_namespace.h>
26#include <net/netns/generic.h> 27#include <net/netns/generic.h>
27#include <net/sock.h> 28#include <net/sock.h>
@@ -77,6 +78,8 @@ struct afs_addr_list {
77 unsigned char nr_addrs; 78 unsigned char nr_addrs;
78 unsigned char index; /* Address currently in use */ 79 unsigned char index; /* Address currently in use */
79 unsigned char nr_ipv4; /* Number of IPv4 addresses */ 80 unsigned char nr_ipv4; /* Number of IPv4 addresses */
81 enum dns_record_source source:8;
82 enum dns_lookup_status status:8;
80 unsigned long probed; /* Mask of servers that have been probed */ 83 unsigned long probed; /* Mask of servers that have been probed */
81 unsigned long yfs; /* Mask of servers that are YFS */ 84 unsigned long yfs; /* Mask of servers that are YFS */
82 struct sockaddr_rxrpc addrs[]; 85 struct sockaddr_rxrpc addrs[];
@@ -355,13 +358,52 @@ struct afs_cell {
355 rwlock_t proc_lock; 358 rwlock_t proc_lock;
356 359
357 /* VL server list. */ 360 /* VL server list. */
358 rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ 361 rwlock_t vl_servers_lock; /* Lock on vl_servers */
359 struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ 362 struct afs_vlserver_list __rcu *vl_servers;
363
360 u8 name_len; /* Length of name */ 364 u8 name_len; /* Length of name */
361 char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ 365 char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */
362}; 366};
363 367
364/* 368/*
369 * Volume Location server record.
370 */
371struct afs_vlserver {
372 struct rcu_head rcu;
373 struct afs_addr_list __rcu *addresses; /* List of addresses for this VL server */
374 unsigned long flags;
375#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */
376#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */
377 rwlock_t lock; /* Lock on addresses */
378 atomic_t usage;
379 u16 name_len; /* Length of name */
380 u16 port;
381 char name[]; /* Server name, case-flattened */
382};
383
384/*
385 * Weighted list of Volume Location servers.
386 */
387struct afs_vlserver_entry {
388 u16 priority; /* Preference (as SRV) */
389 u16 weight; /* Weight (as SRV) */
390 enum dns_record_source source:8;
391 enum dns_lookup_status status:8;
392 struct afs_vlserver *server;
393};
394
395struct afs_vlserver_list {
396 struct rcu_head rcu;
397 atomic_t usage;
398 u8 nr_servers;
399 u8 index; /* Server currently in use */
400 enum dns_record_source source:8;
401 enum dns_lookup_status status:8;
402 rwlock_t lock;
403 struct afs_vlserver_entry servers[];
404};
405
406/*
365 * Cached VLDB entry. 407 * Cached VLDB entry.
366 * 408 *
367 * This is pointed to by cell->vldb_entries, indexed by name. 409 * This is pointed to by cell->vldb_entries, indexed by name.
@@ -617,6 +659,23 @@ struct afs_addr_cursor {
617}; 659};
618 660
619/* 661/*
662 * Cursor for iterating over a set of volume location servers.
663 */
664struct afs_vl_cursor {
665 struct afs_addr_cursor ac;
666 struct afs_cell *cell; /* The cell we're querying */
667 struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
668 struct key *key; /* Key for the server */
669 unsigned char start; /* Initial index in server list */
670 unsigned char index; /* Number of servers tried beyond start */
671 short error;
672 unsigned short flags;
673#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
674#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */
675#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */
676};
677
678/*
620 * Cursor for iterating over a set of fileservers. 679 * Cursor for iterating over a set of fileservers.
621 */ 680 */
622struct afs_fs_cursor { 681struct afs_fs_cursor {
@@ -662,12 +721,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
662 unsigned short, 721 unsigned short,
663 unsigned short); 722 unsigned short);
664extern void afs_put_addrlist(struct afs_addr_list *); 723extern void afs_put_addrlist(struct afs_addr_list *);
665extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char, 724extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
666 unsigned short, unsigned short); 725 const char *, size_t, char,
667extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *); 726 unsigned short, unsigned short);
727extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
668extern bool afs_iterate_addresses(struct afs_addr_cursor *); 728extern bool afs_iterate_addresses(struct afs_addr_cursor *);
669extern int afs_end_cursor(struct afs_addr_cursor *); 729extern int afs_end_cursor(struct afs_addr_cursor *);
670extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
671 730
672extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); 731extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
673extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); 732extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
@@ -1088,14 +1147,43 @@ extern void afs_fs_exit(void);
1088/* 1147/*
1089 * vlclient.c 1148 * vlclient.c
1090 */ 1149 */
1091extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, 1150extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
1092 struct afs_addr_cursor *, 1151 const char *, int);
1093 struct key *, const char *, int); 1152extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
1094extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
1095 struct key *, const uuid_t *);
1096extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); 1153extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
1097extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *, 1154extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
1098 struct key *, const uuid_t *); 1155
1156/*
1157 * vl_rotate.c
1158 */
1159extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
1160 struct afs_cell *, struct key *);
1161extern bool afs_select_vlserver(struct afs_vl_cursor *);
1162extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
1163extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
1164
1165/*
1166 * vlserver_list.c
1167 */
1168static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
1169{
1170 atomic_inc(&vlserver->usage);
1171 return vlserver;
1172}
1173
1174static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
1175{
1176 if (vllist)
1177 atomic_inc(&vllist->usage);
1178 return vllist;
1179}
1180
1181extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
1182extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
1183extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
1184extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
1185extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
1186 const void *, size_t);
1099 1187
1100/* 1188/*
1101 * volume.c 1189 * volume.c
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9101f62707af..6585f4bec0d3 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -17,6 +17,11 @@
17#include <linux/uaccess.h> 17#include <linux/uaccess.h>
18#include "internal.h" 18#include "internal.h"
19 19
20struct afs_vl_seq_net_private {
21 struct seq_net_private seq; /* Must be first */
22 struct afs_vlserver_list *vllist;
23};
24
20static inline struct afs_net *afs_seq2net(struct seq_file *m) 25static inline struct afs_net *afs_seq2net(struct seq_file *m)
21{ 26{
22 return afs_net(seq_file_net(m)); 27 return afs_net(seq_file_net(m));
@@ -247,61 +252,102 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
247 .show = afs_proc_cell_volumes_show, 252 .show = afs_proc_cell_volumes_show,
248}; 253};
249 254
255static const char *const dns_record_sources[NR__dns_record_source + 1] = {
256 [DNS_RECORD_UNAVAILABLE] = "unav",
257 [DNS_RECORD_FROM_CONFIG] = "cfg",
258 [DNS_RECORD_FROM_DNS_A] = "A",
259 [DNS_RECORD_FROM_DNS_AFSDB] = "AFSDB",
260 [DNS_RECORD_FROM_DNS_SRV] = "SRV",
261 [DNS_RECORD_FROM_NSS] = "nss",
262 [NR__dns_record_source] = "[weird]"
263};
264
265static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = {
266 [DNS_LOOKUP_NOT_DONE] = "no-lookup",
267 [DNS_LOOKUP_GOOD] = "good",
268 [DNS_LOOKUP_GOOD_WITH_BAD] = "good/bad",
269 [DNS_LOOKUP_BAD] = "bad",
270 [DNS_LOOKUP_GOT_NOT_FOUND] = "not-found",
271 [DNS_LOOKUP_GOT_LOCAL_FAILURE] = "local-failure",
272 [DNS_LOOKUP_GOT_TEMP_FAILURE] = "temp-failure",
273 [DNS_LOOKUP_GOT_NS_FAILURE] = "ns-failure",
274 [NR__dns_lookup_status] = "[weird]"
275};
276
250/* 277/*
251 * Display the list of Volume Location servers we're using for a cell. 278 * Display the list of Volume Location servers we're using for a cell.
252 */ 279 */
253static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) 280static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
254{ 281{
255 struct sockaddr_rxrpc *addr = v; 282 const struct afs_vl_seq_net_private *priv = m->private;
283 const struct afs_vlserver_list *vllist = priv->vllist;
284 const struct afs_vlserver_entry *entry;
285 const struct afs_vlserver *vlserver;
286 const struct afs_addr_list *alist;
287 int i;
256 288
257 /* display header on line 1 */ 289 if (v == SEQ_START_TOKEN) {
258 if (v == (void *)1) { 290 seq_printf(m, "# source %s, status %s\n",
259 seq_puts(m, "ADDRESS\n"); 291 dns_record_sources[vllist->source],
292 dns_lookup_statuses[vllist->status]);
260 return 0; 293 return 0;
261 } 294 }
262 295
263 /* display one cell per line on subsequent lines */ 296 entry = v;
264 seq_printf(m, "%pISp\n", &addr->transport); 297 vlserver = entry->server;
298 alist = rcu_dereference(vlserver->addresses);
299
300 seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n",
301 vlserver->name, entry->priority, entry->weight,
302 dns_record_sources[alist ? alist->source : entry->source],
303 dns_lookup_statuses[alist ? alist->status : entry->status]);
304 if (alist) {
305 for (i = 0; i < alist->nr_addrs; i++)
306 seq_printf(m, " %c %pISpc\n",
307 alist->index == i ? '>' : '-',
308 &alist->addrs[i].transport);
309 }
265 return 0; 310 return 0;
266} 311}
267 312
268static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) 313static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
269 __acquires(rcu) 314 __acquires(rcu)
270{ 315{
271 struct afs_addr_list *alist; 316 struct afs_vl_seq_net_private *priv = m->private;
317 struct afs_vlserver_list *vllist;
272 struct afs_cell *cell = PDE_DATA(file_inode(m->file)); 318 struct afs_cell *cell = PDE_DATA(file_inode(m->file));
273 loff_t pos = *_pos; 319 loff_t pos = *_pos;
274 320
275 rcu_read_lock(); 321 rcu_read_lock();
276 322
277 alist = rcu_dereference(cell->vl_addrs); 323 vllist = rcu_dereference(cell->vl_servers);
324 priv->vllist = vllist;
278 325
279 /* allow for the header line */ 326 if (pos < 0)
280 if (!pos) 327 *_pos = pos = 0;
281 return (void *) 1; 328 if (pos == 0)
282 pos--; 329 return SEQ_START_TOKEN;
283 330
284 if (!alist || pos >= alist->nr_addrs) 331 if (!vllist || pos - 1 >= vllist->nr_servers)
285 return NULL; 332 return NULL;
286 333
287 return alist->addrs + pos; 334 return &vllist->servers[pos - 1];
288} 335}
289 336
290static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v, 337static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v,
291 loff_t *_pos) 338 loff_t *_pos)
292{ 339{
293 struct afs_addr_list *alist; 340 struct afs_vl_seq_net_private *priv = m->private;
294 struct afs_cell *cell = PDE_DATA(file_inode(m->file)); 341 struct afs_vlserver_list *vllist = priv->vllist;
295 loff_t pos; 342 loff_t pos;
296 343
297 alist = rcu_dereference(cell->vl_addrs);
298
299 pos = *_pos; 344 pos = *_pos;
300 (*_pos)++; 345 pos++;
301 if (!alist || pos >= alist->nr_addrs) 346 *_pos = pos;
347 if (!vllist || pos - 1 >= vllist->nr_servers)
302 return NULL; 348 return NULL;
303 349
304 return alist->addrs + pos; 350 return &vllist->servers[pos - 1];
305} 351}
306 352
307static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v) 353static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v)
@@ -562,7 +608,7 @@ int afs_proc_cell_setup(struct afs_cell *cell)
562 608
563 if (!proc_create_net_data("vlservers", 0444, dir, 609 if (!proc_create_net_data("vlservers", 0444, dir,
564 &afs_proc_cell_vlservers_ops, 610 &afs_proc_cell_vlservers_ops,
565 sizeof(struct seq_net_private), 611 sizeof(struct afs_vl_seq_net_private),
566 cell) || 612 cell) ||
567 !proc_create_net_data("volumes", 0444, dir, 613 !proc_create_net_data("volumes", 0444, dir,
568 &afs_proc_cell_volumes_ops, 614 &afs_proc_cell_volumes_ops,
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 1d329e6981d5..6102ea9ee3fb 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -246,41 +246,23 @@ enomem:
246static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, 246static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
247 struct key *key, const uuid_t *uuid) 247 struct key *key, const uuid_t *uuid)
248{ 248{
249 struct afs_addr_cursor ac; 249 struct afs_vl_cursor vc;
250 struct afs_addr_list *alist; 250 struct afs_addr_list *alist = NULL;
251 int ret; 251 int ret;
252 252
253 ret = afs_set_vl_cursor(&ac, cell); 253 ret = -ERESTARTSYS;
254 if (ret < 0) 254 if (afs_begin_vlserver_operation(&vc, cell, key)) {
255 return ERR_PTR(ret); 255 while (afs_select_vlserver(&vc)) {
256 256 if (test_bit(vc.ac.index, &vc.ac.alist->yfs))
257 while (afs_iterate_addresses(&ac)) { 257 alist = afs_yfsvl_get_endpoints(&vc, uuid);
258 if (test_bit(ac.index, &ac.alist->yfs)) 258 else
259 alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); 259 alist = afs_vl_get_addrs_u(&vc, uuid);
260 else
261 alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
262 switch (ac.error) {
263 case 0:
264 afs_end_cursor(&ac);
265 return alist;
266 case -ECONNABORTED:
267 ac.error = afs_abort_to_error(ac.abort_code);
268 goto error;
269 case -ENOMEM:
270 case -ENONET:
271 goto error;
272 case -ENETUNREACH:
273 case -EHOSTUNREACH:
274 case -ECONNREFUSED:
275 break;
276 default:
277 ac.error = -EIO;
278 goto error;
279 } 260 }
261
262 ret = afs_end_vlserver_operation(&vc);
280 } 263 }
281 264
282error: 265 return ret < 0 ? ERR_PTR(ret) : alist;
283 return ERR_PTR(afs_end_cursor(&ac));
284} 266}
285 267
286/* 268/*
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
new file mode 100644
index 000000000000..c1e316ba105a
--- /dev/null
+++ b/fs/afs/vl_list.c
@@ -0,0 +1,336 @@
1/* AFS vlserver list management.
2 *
3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include "internal.h"
15
16struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
17 unsigned short port)
18{
19 struct afs_vlserver *vlserver;
20
21 vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
22 GFP_KERNEL);
23 if (vlserver) {
24 atomic_set(&vlserver->usage, 1);
25 rwlock_init(&vlserver->lock);
26 vlserver->name_len = name_len;
27 vlserver->port = port;
28 memcpy(vlserver->name, name, name_len);
29 }
30 return vlserver;
31}
32
33static void afs_vlserver_rcu(struct rcu_head *rcu)
34{
35 struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
36
37 afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
38 kfree_rcu(vlserver, rcu);
39}
40
41void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
42{
43 if (vlserver) {
44 unsigned int u = atomic_dec_return(&vlserver->usage);
45 //_debug("VL PUT %p{%u}", vlserver, u);
46
47 if (u == 0)
48 call_rcu(&vlserver->rcu, afs_vlserver_rcu);
49 }
50}
51
52struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
53{
54 struct afs_vlserver_list *vllist;
55
56 vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
57 if (vllist) {
58 atomic_set(&vllist->usage, 1);
59 rwlock_init(&vllist->lock);
60 }
61
62 return vllist;
63}
64
65void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
66{
67 if (vllist) {
68 unsigned int u = atomic_dec_return(&vllist->usage);
69
70 //_debug("VLLS PUT %p{%u}", vllist, u);
71 if (u == 0) {
72 int i;
73
74 for (i = 0; i < vllist->nr_servers; i++) {
75 afs_put_vlserver(net, vllist->servers[i].server);
76 }
77 kfree_rcu(vllist, rcu);
78 }
79 }
80}
81
82static u16 afs_extract_le16(const u8 **_b)
83{
84 u16 val;
85
86 val = (u16)*(*_b)++ << 0;
87 val |= (u16)*(*_b)++ << 8;
88 return val;
89}
90
91/*
92 * Build a VL server address list from a DNS queried server list.
93 */
94static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
95 u8 nr_addrs, u16 port)
96{
97 struct afs_addr_list *alist;
98 const u8 *b = *_b;
99 int ret = -EINVAL;
100
101 alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
102 if (!alist)
103 return ERR_PTR(-ENOMEM);
104 if (nr_addrs == 0)
105 return alist;
106
107 for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
108 struct dns_server_list_v1_address hdr;
109 __be32 x[4];
110
111 hdr.address_type = *b++;
112
113 switch (hdr.address_type) {
114 case DNS_ADDRESS_IS_IPV4:
115 if (end - b < 4) {
116 _leave(" = -EINVAL [short inet]");
117 goto error;
118 }
119 memcpy(x, b, 4);
120 afs_merge_fs_addr4(alist, x[0], port);
121 b += 4;
122 break;
123
124 case DNS_ADDRESS_IS_IPV6:
125 if (end - b < 16) {
126 _leave(" = -EINVAL [short inet6]");
127 goto error;
128 }
129 memcpy(x, b, 16);
130 afs_merge_fs_addr6(alist, x, port);
131 b += 16;
132 break;
133
134 default:
135 _leave(" = -EADDRNOTAVAIL [unknown af %u]",
136 hdr.address_type);
137 ret = -EADDRNOTAVAIL;
138 goto error;
139 }
140 }
141
142 /* Start with IPv6 if available. */
143 if (alist->nr_ipv4 < alist->nr_addrs)
144 alist->index = alist->nr_ipv4;
145
146 *_b = b;
147 return alist;
148
149error:
150 *_b = b;
151 afs_put_addrlist(alist);
152 return ERR_PTR(ret);
153}
154
155/*
156 * Build a VL server list from a DNS queried server list.
157 */
158struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
159 const void *buffer,
160 size_t buffer_size)
161{
162 const struct dns_server_list_v1_header *hdr = buffer;
163 struct dns_server_list_v1_server bs;
164 struct afs_vlserver_list *vllist, *previous;
165 struct afs_addr_list *addrs;
166 struct afs_vlserver *server;
167 const u8 *b = buffer, *end = buffer + buffer_size;
168 int ret = -ENOMEM, nr_servers, i, j;
169
170 _enter("");
171
172 /* Check that it's a server list, v1 */
173 if (end - b < sizeof(*hdr) ||
174 hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST ||
175 hdr->hdr.version != 1) {
176 pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n",
177 hdr->hdr.content, hdr->hdr.version, end - b);
178 ret = -EDESTADDRREQ;
179 goto dump;
180 }
181
182 nr_servers = hdr->nr_servers;
183
184 vllist = afs_alloc_vlserver_list(nr_servers);
185 if (!vllist)
186 return ERR_PTR(-ENOMEM);
187
188 vllist->source = (hdr->source < NR__dns_record_source) ?
189 hdr->source : NR__dns_record_source;
190 vllist->status = (hdr->status < NR__dns_lookup_status) ?
191 hdr->status : NR__dns_lookup_status;
192
193 read_lock(&cell->vl_servers_lock);
194 previous = afs_get_vlserverlist(
195 rcu_dereference_protected(cell->vl_servers,
196 lockdep_is_held(&cell->vl_servers_lock)));
197 read_unlock(&cell->vl_servers_lock);
198
199 b += sizeof(*hdr);
200 while (end - b >= sizeof(bs)) {
201 bs.name_len = afs_extract_le16(&b);
202 bs.priority = afs_extract_le16(&b);
203 bs.weight = afs_extract_le16(&b);
204 bs.port = afs_extract_le16(&b);
205 bs.source = *b++;
206 bs.status = *b++;
207 bs.protocol = *b++;
208 bs.nr_addrs = *b++;
209
210 _debug("extract %u %u %u %u %u %u %*.*s",
211 bs.name_len, bs.priority, bs.weight,
212 bs.port, bs.protocol, bs.nr_addrs,
213 bs.name_len, bs.name_len, b);
214
215 if (end - b < bs.name_len)
216 break;
217
218 ret = -EPROTONOSUPPORT;
219 if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) {
220 bs.protocol = DNS_SERVER_PROTOCOL_UDP;
221 } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) {
222 _leave(" = [proto %u]", bs.protocol);
223 goto error;
224 }
225
226 if (bs.port == 0)
227 bs.port = AFS_VL_PORT;
228 if (bs.source > NR__dns_record_source)
229 bs.source = NR__dns_record_source;
230 if (bs.status > NR__dns_lookup_status)
231 bs.status = NR__dns_lookup_status;
232
233 server = NULL;
234 if (previous) {
235 /* See if we can update an old server record */
236 for (i = 0; i < previous->nr_servers; i++) {
237 struct afs_vlserver *p = previous->servers[i].server;
238
239 if (p->name_len == bs.name_len &&
240 p->port == bs.port &&
241 strncasecmp(b, p->name, bs.name_len) == 0) {
242 server = afs_get_vlserver(p);
243 break;
244 }
245 }
246 }
247
248 if (!server) {
249 ret = -ENOMEM;
250 server = afs_alloc_vlserver(b, bs.name_len, bs.port);
251 if (!server)
252 goto error;
253 }
254
255 b += bs.name_len;
256
257 /* Extract the addresses - note that we can't skip this as we
258 * have to advance the payload pointer.
259 */
260 addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
261 if (IS_ERR(addrs)) {
262 ret = PTR_ERR(addrs);
263 goto error_2;
264 }
265
266 if (vllist->nr_servers >= nr_servers) {
267 _debug("skip %u >= %u", vllist->nr_servers, nr_servers);
268 afs_put_addrlist(addrs);
269 afs_put_vlserver(cell->net, server);
270 continue;
271 }
272
273 addrs->source = bs.source;
274 addrs->status = bs.status;
275
276 if (addrs->nr_addrs == 0) {
277 afs_put_addrlist(addrs);
278 if (!rcu_access_pointer(server->addresses)) {
279 afs_put_vlserver(cell->net, server);
280 continue;
281 }
282 } else {
283 struct afs_addr_list *old = addrs;
284
285 write_lock(&server->lock);
286 rcu_swap_protected(server->addresses, old,
287 lockdep_is_held(&server->lock));
288 write_unlock(&server->lock);
289 afs_put_addrlist(old);
290 }
291
292
293 /* TODO: Might want to check for duplicates */
294
295 /* Insertion-sort by priority and weight */
296 for (j = 0; j < vllist->nr_servers; j++) {
297 if (bs.priority < vllist->servers[j].priority)
298 break; /* Lower preferable */
299 if (bs.priority == vllist->servers[j].priority &&
300 bs.weight > vllist->servers[j].weight)
301 break; /* Higher preferable */
302 }
303
304 if (j < vllist->nr_servers) {
305 memmove(vllist->servers + j + 1,
306 vllist->servers + j,
307 (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
308 }
309
310 vllist->servers[j].priority = bs.priority;
311 vllist->servers[j].weight = bs.weight;
312 vllist->servers[j].server = server;
313 vllist->nr_servers++;
314 }
315
316 if (b != end) {
317 _debug("parse error %zd", b - end);
318 goto error;
319 }
320
321 afs_put_vlserverlist(cell->net, previous);
322 _leave(" = ok [%u]", vllist->nr_servers);
323 return vllist;
324
325error_2:
326 afs_put_vlserver(cell->net, server);
327error:
328 afs_put_vlserverlist(cell->net, vllist);
329 afs_put_vlserverlist(cell->net, previous);
330dump:
331 if (ret != -ENOMEM) {
332 printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer);
333 print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size);
334 }
335 return ERR_PTR(ret);
336}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
new file mode 100644
index 000000000000..44a936ad9c7a
--- /dev/null
+++ b/fs/afs/vl_rotate.c
@@ -0,0 +1,251 @@
1/* Handle vlserver selection and rotation.
2 *
3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/sched.h>
14#include <linux/sched/signal.h>
15#include "internal.h"
16#include "afs_vl.h"
17
18/*
19 * Begin an operation on a volume location server.
20 */
21bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
22 struct key *key)
23{
24 memset(vc, 0, sizeof(*vc));
25 vc->cell = cell;
26 vc->key = key;
27 vc->error = -EDESTADDRREQ;
28 vc->ac.error = SHRT_MAX;
29
30 if (signal_pending(current)) {
31 vc->error = -EINTR;
32 vc->flags |= AFS_VL_CURSOR_STOP;
33 return false;
34 }
35
36 return true;
37}
38
39/*
40 * Begin iteration through a server list, starting with the last used server if
41 * possible, or the last recorded good server if not.
42 */
43static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
44{
45 struct afs_cell *cell = vc->cell;
46
47 if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
48 TASK_INTERRUPTIBLE)) {
49 vc->error = -ERESTARTSYS;
50 return false;
51 }
52
53 read_lock(&cell->vl_servers_lock);
54 vc->server_list = afs_get_vlserverlist(
55 rcu_dereference_protected(cell->vl_servers,
56 lockdep_is_held(&cell->vl_servers_lock)));
57 read_unlock(&cell->vl_servers_lock);
58 if (!vc->server_list || !vc->server_list->nr_servers)
59 return false;
60
61 vc->start = READ_ONCE(vc->server_list->index);
62 vc->index = vc->start;
63 return true;
64}
65
66/*
67 * Select the vlserver to use. May be called multiple times to rotate
68 * through the vlservers.
69 */
70bool afs_select_vlserver(struct afs_vl_cursor *vc)
71{
72 struct afs_addr_list *alist;
73 struct afs_vlserver *vlserver;
74 int error = vc->ac.error;
75
76 _enter("%u/%u,%u/%u,%d,%d",
77 vc->index, vc->start,
78 vc->ac.index, vc->ac.start,
79 error, vc->ac.abort_code);
80
81 if (vc->flags & AFS_VL_CURSOR_STOP) {
82 _leave(" = f [stopped]");
83 return false;
84 }
85
86 /* Evaluate the result of the previous operation, if there was one. */
87 switch (error) {
88 case SHRT_MAX:
89 goto start;
90
91 default:
92 case 0:
93 /* Success or local failure. Stop. */
94 vc->error = error;
95 vc->flags |= AFS_VL_CURSOR_STOP;
96 _leave(" = f [okay/local %d]", vc->ac.error);
97 return false;
98
99 case -ECONNABORTED:
100 /* The far side rejected the operation on some grounds. This
101 * might involve the server being busy or the volume having been moved.
102 */
103 switch (vc->ac.abort_code) {
104 case AFSVL_IO:
105 case AFSVL_BADVOLOPER:
106 case AFSVL_NOMEM:
107 /* The server went weird. */
108 vc->error = -EREMOTEIO;
109 //write_lock(&vc->cell->vl_servers_lock);
110 //vc->server_list->weird_mask |= 1 << vc->index;
111 //write_unlock(&vc->cell->vl_servers_lock);
112 goto next_server;
113
114 default:
115 vc->error = afs_abort_to_error(vc->ac.abort_code);
116 goto failed;
117 }
118
119 case -ENETUNREACH:
120 case -EHOSTUNREACH:
121 case -ECONNREFUSED:
122 case -ETIMEDOUT:
123 case -ETIME:
124 _debug("no conn %d", error);
125 vc->error = error;
126 goto iterate_address;
127
128 case -ECONNRESET:
129 _debug("call reset");
130 vc->error = error;
131 vc->flags |= AFS_VL_CURSOR_RETRY;
132 goto next_server;
133 }
134
135restart_from_beginning:
136 _debug("restart");
137 afs_end_cursor(&vc->ac);
138 afs_put_vlserverlist(vc->cell->net, vc->server_list);
139 vc->server_list = NULL;
140 if (vc->flags & AFS_VL_CURSOR_RETRIED)
141 goto failed;
142 vc->flags |= AFS_VL_CURSOR_RETRIED;
143start:
144 _debug("start");
145
146 /* TODO: Consider checking the VL server list */
147
148 if (!afs_start_vl_iteration(vc))
149 goto failed;
150
151use_server:
152 _debug("use");
153 /* We're starting on a different vlserver from the list. We need to
154 * check it, find its address list and probe its capabilities before we
155 * use it.
156 */
157 ASSERTCMP(vc->ac.alist, ==, NULL);
158 vlserver = vc->server_list->servers[vc->index].server;
159
160 // TODO: Check the vlserver occasionally
161 //if (!afs_check_vlserver_record(vc, vlserver))
162 // goto failed;
163
164 _debug("USING VLSERVER: %s", vlserver->name);
165
166 read_lock(&vlserver->lock);
167 alist = rcu_dereference_protected(vlserver->addresses,
168 lockdep_is_held(&vlserver->lock));
169 afs_get_addrlist(alist);
170 read_unlock(&vlserver->lock);
171
172 memset(&vc->ac, 0, sizeof(vc->ac));
173
174 /* Probe the current vlserver if we haven't done so yet. */
175#if 0 // TODO
176 if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) {
177 vc->ac.alist = afs_get_addrlist(alist);
178
179 if (!afs_probe_vlserver(vc)) {
180 error = vc->ac.error;
181 switch (error) {
182 case -ENOMEM:
183 case -ERESTARTSYS:
184 case -EINTR:
185 goto failed_set_error;
186 default:
187 goto next_server;
188 }
189 }
190 }
191#endif
192
193 if (!vc->ac.alist)
194 vc->ac.alist = alist;
195 else
196 afs_put_addrlist(alist);
197
198 vc->ac.start = READ_ONCE(alist->index);
199 vc->ac.index = vc->ac.start;
200
201iterate_address:
202 ASSERT(vc->ac.alist);
203 _debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
204 /* Iterate over the current server's address list to try and find an
205 * address on which it will respond to us.
206 */
207 if (!afs_iterate_addresses(&vc->ac))
208 goto next_server;
209
210 _leave(" = t %pISpc", &vc->ac.addr->transport);
211 return true;
212
213next_server:
214 _debug("next");
215 afs_end_cursor(&vc->ac);
216 vc->index++;
217 if (vc->index >= vc->server_list->nr_servers)
218 vc->index = 0;
219 if (vc->index != vc->start)
220 goto use_server;
221
222 /* That's all the servers poked to no good effect. Try again if some
223 * of them were busy.
224 */
225 if (vc->flags & AFS_VL_CURSOR_RETRY)
226 goto restart_from_beginning;
227
228 goto failed;
229
230failed:
231 vc->flags |= AFS_VL_CURSOR_STOP;
232 afs_end_cursor(&vc->ac);
233 _leave(" = f [failed %d]", vc->error);
234 return false;
235}
236
237/*
238 * Tidy up a volume location server cursor and unlock the vnode.
239 */
240int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
241{
242 struct afs_net *net = vc->cell->net;
243
244 afs_end_cursor(&vc->ac);
245 afs_put_vlserverlist(net, vc->server_list);
246
247 if (vc->error == -ECONNABORTED)
248 vc->error = afs_abort_to_error(vc->ac.abort_code);
249
250 return vc->error;
251}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index e18c51742daa..3127ab9b5521 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -128,14 +128,13 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = {
128 * Dispatch a get volume entry by name or ID operation (uuid variant). If the 128 * Dispatch a get volume entry by name or ID operation (uuid variant). If the
129 * volname is a decimal number then it's a volume ID not a volume name. 129 * volname is a decimal number then it's a volume ID not a volume name.
130 */ 130 */
131struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, 131struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
132 struct afs_addr_cursor *ac,
133 struct key *key,
134 const char *volname, 132 const char *volname,
135 int volnamesz) 133 int volnamesz)
136{ 134{
137 struct afs_vldb_entry *entry; 135 struct afs_vldb_entry *entry;
138 struct afs_call *call; 136 struct afs_call *call;
137 struct afs_net *net = vc->cell->net;
139 size_t reqsz, padsz; 138 size_t reqsz, padsz;
140 __be32 *bp; 139 __be32 *bp;
141 140
@@ -155,7 +154,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
155 return ERR_PTR(-ENOMEM); 154 return ERR_PTR(-ENOMEM);
156 } 155 }
157 156
158 call->key = key; 157 call->key = vc->key;
159 call->reply[0] = entry; 158 call->reply[0] = entry;
160 call->ret_reply0 = true; 159 call->ret_reply0 = true;
161 160
@@ -168,7 +167,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
168 memset((void *)bp + volnamesz, 0, padsz); 167 memset((void *)bp + volnamesz, 0, padsz);
169 168
170 trace_afs_make_vl_call(call); 169 trace_afs_make_vl_call(call);
171 return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); 170 return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
172} 171}
173 172
174/* 173/*
@@ -266,14 +265,13 @@ static const struct afs_call_type afs_RXVLGetAddrsU = {
266 * Dispatch an operation to get the addresses for a server, where the server is 265 * Dispatch an operation to get the addresses for a server, where the server is
267 * nominated by UUID. 266 * nominated by UUID.
268 */ 267 */
269struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, 268struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
270 struct afs_addr_cursor *ac,
271 struct key *key,
272 const uuid_t *uuid) 269 const uuid_t *uuid)
273{ 270{
274 struct afs_ListAddrByAttributes__xdr *r; 271 struct afs_ListAddrByAttributes__xdr *r;
275 const struct afs_uuid *u = (const struct afs_uuid *)uuid; 272 const struct afs_uuid *u = (const struct afs_uuid *)uuid;
276 struct afs_call *call; 273 struct afs_call *call;
274 struct afs_net *net = vc->cell->net;
277 __be32 *bp; 275 __be32 *bp;
278 int i; 276 int i;
279 277
@@ -285,7 +283,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
285 if (!call) 283 if (!call)
286 return ERR_PTR(-ENOMEM); 284 return ERR_PTR(-ENOMEM);
287 285
288 call->key = key; 286 call->key = vc->key;
289 call->reply[0] = NULL; 287 call->reply[0] = NULL;
290 call->ret_reply0 = true; 288 call->ret_reply0 = true;
291 289
@@ -306,7 +304,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
306 r->uuid.node[i] = htonl(u->node[i]); 304 r->uuid.node[i] = htonl(u->node[i]);
307 305
308 trace_afs_make_vl_call(call); 306 trace_afs_make_vl_call(call);
309 return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); 307 return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
310} 308}
311 309
312/* 310/*
@@ -367,14 +365,13 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
367}; 365};
368 366
369/* 367/*
370 * Probe a fileserver for the capabilities that it supports. This can 368 * Probe a volume server for the capabilities that it supports. This can
371 * return up to 196 words. 369 * return up to 196 words.
372 * 370 *
373 * We use this to probe for service upgrade to determine what the server at the 371 * We use this to probe for service upgrade to determine what the server at the
374 * other end supports. 372 * other end supports.
375 */ 373 */
376int afs_vl_get_capabilities(struct afs_net *net, 374int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
377 struct afs_addr_cursor *ac,
378 struct key *key) 375 struct key *key)
379{ 376{
380 struct afs_call *call; 377 struct afs_call *call;
@@ -617,12 +614,11 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
617 * Dispatch an operation to get the addresses for a server, where the server is 614 * Dispatch an operation to get the addresses for a server, where the server is
618 * nominated by UUID. 615 * nominated by UUID.
619 */ 616 */
620struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, 617struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
621 struct afs_addr_cursor *ac,
622 struct key *key,
623 const uuid_t *uuid) 618 const uuid_t *uuid)
624{ 619{
625 struct afs_call *call; 620 struct afs_call *call;
621 struct afs_net *net = vc->cell->net;
626 __be32 *bp; 622 __be32 *bp;
627 623
628 _enter(""); 624 _enter("");
@@ -633,7 +629,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
633 if (!call) 629 if (!call)
634 return ERR_PTR(-ENOMEM); 630 return ERR_PTR(-ENOMEM);
635 631
636 call->key = key; 632 call->key = vc->key;
637 call->reply[0] = NULL; 633 call->reply[0] = NULL;
638 call->ret_reply0 = true; 634 call->ret_reply0 = true;
639 635
@@ -644,5 +640,5 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
644 memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ 640 memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
645 641
646 trace_afs_make_vl_call(call); 642 trace_afs_make_vl_call(call);
647 return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); 643 return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
648} 644}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 3037bd01f617..1cd263fa6028 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -74,55 +74,35 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
74 const char *volname, 74 const char *volname,
75 size_t volnamesz) 75 size_t volnamesz)
76{ 76{
77 struct afs_addr_cursor ac; 77 struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
78 struct afs_vldb_entry *vldb; 78 struct afs_vl_cursor vc;
79 int ret; 79 int ret;
80 80
81 ret = afs_set_vl_cursor(&ac, cell); 81 if (!afs_begin_vlserver_operation(&vc, cell, key))
82 if (ret < 0) 82 return ERR_PTR(-ERESTARTSYS);
83 return ERR_PTR(ret);
84 83
85 while (afs_iterate_addresses(&ac)) { 84 while (afs_select_vlserver(&vc)) {
86 if (!test_bit(ac.index, &ac.alist->probed)) { 85 if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) {
87 ret = afs_vl_get_capabilities(cell->net, &ac, key); 86 ret = afs_vl_get_capabilities(cell->net, &vc.ac, key);
88 switch (ret) { 87 switch (ret) {
89 case VL_SERVICE: 88 case VL_SERVICE:
90 clear_bit(ac.index, &ac.alist->yfs); 89 clear_bit(vc.ac.index, &vc.ac.alist->yfs);
91 set_bit(ac.index, &ac.alist->probed); 90 set_bit(vc.ac.index, &vc.ac.alist->probed);
92 ac.addr->srx_service = ret; 91 vc.ac.addr->srx_service = ret;
93 break; 92 break;
94 case YFS_VL_SERVICE: 93 case YFS_VL_SERVICE:
95 set_bit(ac.index, &ac.alist->yfs); 94 set_bit(vc.ac.index, &vc.ac.alist->yfs);
96 set_bit(ac.index, &ac.alist->probed); 95 set_bit(vc.ac.index, &vc.ac.alist->probed);
97 ac.addr->srx_service = ret; 96 vc.ac.addr->srx_service = ret;
98 break; 97 break;
99 } 98 }
100 } 99 }
101 100
102 vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, 101 vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
103 volname, volnamesz);
104 switch (ac.error) {
105 case 0:
106 afs_end_cursor(&ac);
107 return vldb;
108 case -ECONNABORTED:
109 ac.error = afs_abort_to_error(ac.abort_code);
110 goto error;
111 case -ENOMEM:
112 case -ENONET:
113 goto error;
114 case -ENETUNREACH:
115 case -EHOSTUNREACH:
116 case -ECONNREFUSED:
117 break;
118 default:
119 ac.error = -EIO;
120 goto error;
121 }
122 } 102 }
123 103
124error: 104 ret = afs_end_vlserver_operation(&vc);
125 return ERR_PTR(afs_end_cursor(&ac)); 105 return ret < 0 ? ERR_PTR(ret) : vldb;
126} 106}
127 107
128/* 108/*