diff options
author | David Howells <dhowells@redhat.com> | 2017-11-02 11:27:50 -0400 |
---|---|---|
committer | David Howells <dhowells@redhat.com> | 2017-11-13 10:38:18 -0500 |
commit | 989782dcdc91a5e6d5999c7a52a84a60a0811e56 (patch) | |
tree | 138ed46554536280e0d4d1834a16c28740e8cdae /fs | |
parent | be080a6f43c40976afc950ee55e9b7f8e2b53525 (diff) |
afs: Overhaul cell database management
Overhaul the way that the in-kernel AFS client keeps track of cells in the
following manner:
(1) Cells are now held in an rbtree to make walking them quicker and RCU
managed (though this is probably overkill).
(2) Cells now have a manager work item that:
(A) Looks after fetching and refreshing the VL server list.
(B) Manages cell record lifetime, including initialising and
destruction.
(B) Manages cell record caching whereby threads are kept around for a
certain time after last use and then destroyed.
(C) Manages the FS-Cache index cookie for a cell. It is not permitted
for a cookie to be in use twice, so we have to be careful to not
allow a new cell record to exist at the same time as an old record
of the same name.
(3) Each AFS network namespace is given a manager work item that manages
the cells within it, maintaining a single timer to prod cells into
updating their DNS records.
This uses the reduce_timer() facility to make the timer expire at the
soonest timed event that needs happening.
(4) When a module is being unloaded, cells and cell managers are now
counted out using dec_after_work() to make sure the module text is
pinned until after the data structures have been cleaned up.
(5) Each cell's VL server list is now protected by a seqlock rather than a
semaphore.
Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/cell.c | 916 | ||||
-rw-r--r-- | fs/afs/internal.h | 60 | ||||
-rw-r--r-- | fs/afs/main.c | 16 | ||||
-rw-r--r-- | fs/afs/proc.c | 15 | ||||
-rw-r--r-- | fs/afs/super.c | 12 | ||||
-rw-r--r-- | fs/afs/xattr.c | 2 |
6 files changed, 704 insertions, 317 deletions
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 216821fd1a61..e83103e8a6fb 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* AFS cell and server record management | 1 | /* AFS cell and server record management |
2 | * | 2 | * |
3 | * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -19,128 +19,194 @@ | |||
19 | #include <keys/rxrpc-type.h> | 19 | #include <keys/rxrpc-type.h> |
20 | #include "internal.h" | 20 | #include "internal.h" |
21 | 21 | ||
22 | unsigned __read_mostly afs_cell_gc_delay = 10; | ||
23 | |||
24 | static void afs_manage_cell(struct work_struct *); | ||
25 | |||
26 | static void afs_dec_cells_outstanding(struct afs_net *net) | ||
27 | { | ||
28 | if (atomic_dec_and_test(&net->cells_outstanding)) | ||
29 | wake_up_atomic_t(&net->cells_outstanding); | ||
30 | } | ||
31 | |||
22 | /* | 32 | /* |
23 | * allocate a cell record and fill in its name, VL server address list and | 33 | * Set the cell timer to fire after a given delay, assuming it's not already |
24 | * allocate an anonymous key | 34 | * set for an earlier time. |
25 | */ | 35 | */ |
26 | static struct afs_cell *afs_cell_alloc(struct afs_net *net, | 36 | static void afs_set_cell_timer(struct afs_net *net, time64_t delay) |
27 | const char *name, unsigned namelen, | ||
28 | char *vllist) | ||
29 | { | 37 | { |
30 | struct afs_cell *cell; | 38 | if (net->live) { |
31 | struct key *key; | 39 | atomic_inc(&net->cells_outstanding); |
32 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; | 40 | if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) |
33 | char *dvllist = NULL, *_vllist = NULL; | 41 | afs_dec_cells_outstanding(net); |
34 | char delimiter = ':'; | 42 | } |
35 | int ret, i; | 43 | } |
44 | |||
45 | /* | ||
46 | * Look up and get an activation reference on a cell record under RCU | ||
47 | * conditions. The caller must hold the RCU read lock. | ||
48 | */ | ||
49 | struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, | ||
50 | const char *name, unsigned int namesz) | ||
51 | { | ||
52 | struct afs_cell *cell = NULL; | ||
53 | struct rb_node *p; | ||
54 | int n, seq = 0, ret = 0; | ||
55 | |||
56 | _enter("%*.*s", namesz, namesz, name); | ||
57 | |||
58 | if (name && namesz == 0) | ||
59 | return ERR_PTR(-EINVAL); | ||
60 | if (namesz > AFS_MAXCELLNAME) | ||
61 | return ERR_PTR(-ENAMETOOLONG); | ||
62 | |||
63 | do { | ||
64 | /* Unfortunately, rbtree walking doesn't give reliable results | ||
65 | * under just the RCU read lock, so we have to check for | ||
66 | * changes. | ||
67 | */ | ||
68 | if (cell) | ||
69 | afs_put_cell(net, cell); | ||
70 | cell = NULL; | ||
71 | ret = -ENOENT; | ||
72 | |||
73 | read_seqbegin_or_lock(&net->cells_lock, &seq); | ||
74 | |||
75 | if (!name) { | ||
76 | cell = rcu_dereference_raw(net->ws_cell); | ||
77 | if (cell) { | ||
78 | afs_get_cell(cell); | ||
79 | continue; | ||
80 | } | ||
81 | ret = -EDESTADDRREQ; | ||
82 | continue; | ||
83 | } | ||
84 | |||
85 | p = rcu_dereference_raw(net->cells.rb_node); | ||
86 | while (p) { | ||
87 | cell = rb_entry(p, struct afs_cell, net_node); | ||
88 | |||
89 | n = strncasecmp(cell->name, name, | ||
90 | min_t(size_t, cell->name_len, namesz)); | ||
91 | if (n == 0) | ||
92 | n = cell->name_len - namesz; | ||
93 | if (n < 0) { | ||
94 | p = rcu_dereference_raw(p->rb_left); | ||
95 | } else if (n > 0) { | ||
96 | p = rcu_dereference_raw(p->rb_right); | ||
97 | } else { | ||
98 | if (atomic_inc_not_zero(&cell->usage)) { | ||
99 | ret = 0; | ||
100 | break; | ||
101 | } | ||
102 | /* We want to repeat the search, this time with | ||
103 | * the lock properly locked. | ||
104 | */ | ||
105 | } | ||
106 | cell = NULL; | ||
107 | } | ||
36 | 108 | ||
37 | _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); | 109 | } while (need_seqretry(&net->cells_lock, seq)); |
38 | 110 | ||
39 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ | 111 | done_seqretry(&net->cells_lock, seq); |
40 | 112 | ||
113 | return ret == 0 ? cell : ERR_PTR(ret); | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Set up a cell record and fill in its name, VL server address list and | ||
118 | * allocate an anonymous key | ||
119 | */ | ||
120 | static struct afs_cell *afs_alloc_cell(struct afs_net *net, | ||
121 | const char *name, unsigned int namelen, | ||
122 | const char *vllist) | ||
123 | { | ||
124 | struct afs_cell *cell; | ||
125 | int i, ret; | ||
126 | |||
127 | ASSERT(name); | ||
128 | if (namelen == 0) | ||
129 | return ERR_PTR(-EINVAL); | ||
41 | if (namelen > AFS_MAXCELLNAME) { | 130 | if (namelen > AFS_MAXCELLNAME) { |
42 | _leave(" = -ENAMETOOLONG"); | 131 | _leave(" = -ENAMETOOLONG"); |
43 | return ERR_PTR(-ENAMETOOLONG); | 132 | return ERR_PTR(-ENAMETOOLONG); |
44 | } | 133 | } |
45 | 134 | ||
46 | /* allocate and initialise a cell record */ | 135 | _enter("%*.*s,%s", namelen, namelen, name, vllist); |
47 | cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); | 136 | |
137 | cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); | ||
48 | if (!cell) { | 138 | if (!cell) { |
49 | _leave(" = -ENOMEM"); | 139 | _leave(" = -ENOMEM"); |
50 | return ERR_PTR(-ENOMEM); | 140 | return ERR_PTR(-ENOMEM); |
51 | } | 141 | } |
52 | 142 | ||
53 | memcpy(cell->name, name, namelen); | ||
54 | cell->name[namelen] = 0; | ||
55 | |||
56 | atomic_set(&cell->usage, 1); | ||
57 | INIT_LIST_HEAD(&cell->link); | ||
58 | cell->net = net; | 143 | cell->net = net; |
144 | cell->name_len = namelen; | ||
145 | for (i = 0; i < namelen; i++) | ||
146 | cell->name[i] = tolower(name[i]); | ||
147 | |||
148 | atomic_set(&cell->usage, 2); | ||
149 | INIT_WORK(&cell->manager, afs_manage_cell); | ||
59 | rwlock_init(&cell->servers_lock); | 150 | rwlock_init(&cell->servers_lock); |
60 | INIT_LIST_HEAD(&cell->servers); | 151 | INIT_LIST_HEAD(&cell->servers); |
61 | init_rwsem(&cell->vl_sem); | 152 | init_rwsem(&cell->vl_sem); |
62 | INIT_LIST_HEAD(&cell->vl_list); | 153 | INIT_LIST_HEAD(&cell->vl_list); |
63 | spin_lock_init(&cell->vl_lock); | 154 | spin_lock_init(&cell->vl_lock); |
155 | seqlock_init(&cell->vl_addrs_lock); | ||
156 | cell->flags = (1 << AFS_CELL_FL_NOT_READY); | ||
64 | 157 | ||
65 | for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) { | 158 | for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) { |
66 | struct sockaddr_rxrpc *srx = &cell->vl_addrs[i]; | 159 | struct sockaddr_rxrpc *srx = &cell->vl_addrs[i]; |
67 | srx->srx_family = AF_RXRPC; | 160 | srx->srx_family = AF_RXRPC; |
68 | srx->srx_service = VL_SERVICE; | 161 | srx->srx_service = VL_SERVICE; |
69 | srx->transport_type = SOCK_DGRAM; | 162 | srx->transport_type = SOCK_DGRAM; |
70 | srx->transport.sin.sin_port = htons(AFS_VL_PORT); | 163 | srx->transport.sin6.sin6_family = AF_INET6; |
164 | srx->transport.sin6.sin6_port = htons(AFS_VL_PORT); | ||
71 | } | 165 | } |
72 | 166 | ||
73 | /* if the ip address is invalid, try dns query */ | 167 | /* Fill in the VL server list if we were given a list of addresses to |
74 | if (!vllist || strlen(vllist) < 7) { | 168 | * use. |
75 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); | 169 | */ |
76 | if (ret < 0) { | 170 | if (vllist) { |
77 | if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY) | 171 | char delim = ':'; |
78 | /* translate these errors into something | ||
79 | * userspace might understand */ | ||
80 | ret = -EDESTADDRREQ; | ||
81 | _leave(" = %d", ret); | ||
82 | return ERR_PTR(ret); | ||
83 | } | ||
84 | _vllist = dvllist; | ||
85 | |||
86 | /* change the delimiter for user-space reply */ | ||
87 | delimiter = ','; | ||
88 | 172 | ||
89 | } else { | ||
90 | if (strchr(vllist, ',') || !strchr(vllist, '.')) | 173 | if (strchr(vllist, ',') || !strchr(vllist, '.')) |
91 | delimiter = ','; | 174 | delim = ','; |
92 | _vllist = vllist; | 175 | |
93 | } | 176 | do { |
94 | 177 | struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; | |
95 | /* fill in the VL server list from the rest of the string */ | 178 | |
96 | do { | 179 | if (in4_pton(vllist, -1, |
97 | struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; | 180 | (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], |
98 | const char *end; | 181 | delim, &vllist)) { |
99 | 182 | srx->transport_len = sizeof(struct sockaddr_in6); | |
100 | next = strchr(_vllist, delimiter); | 183 | srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; |
101 | if (next) | 184 | srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; |
102 | *next++ = 0; | 185 | srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); |
103 | 186 | } else if (in6_pton(vllist, -1, | |
104 | if (in4_pton(_vllist, -1, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], | 187 | srx->transport.sin6.sin6_addr.s6_addr, |
105 | -1, &end)) { | 188 | delim, &vllist)) { |
106 | srx->transport_len = sizeof(struct sockaddr_in6); | 189 | srx->transport_len = sizeof(struct sockaddr_in6); |
107 | srx->transport.sin6.sin6_family = AF_INET6; | 190 | srx->transport.sin6.sin6_family = AF_INET6; |
108 | srx->transport.sin6.sin6_flowinfo = 0; | 191 | } else { |
109 | srx->transport.sin6.sin6_scope_id = 0; | 192 | goto bad_address; |
110 | srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; | 193 | } |
111 | srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; | ||
112 | srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); | ||
113 | } else if (in6_pton(_vllist, -1, srx->transport.sin6.sin6_addr.s6_addr, | ||
114 | -1, &end)) { | ||
115 | srx->transport_len = sizeof(struct sockaddr_in6); | ||
116 | srx->transport.sin6.sin6_family = AF_INET6; | ||
117 | srx->transport.sin6.sin6_flowinfo = 0; | ||
118 | srx->transport.sin6.sin6_scope_id = 0; | ||
119 | } else { | ||
120 | goto bad_address; | ||
121 | } | ||
122 | 194 | ||
123 | } while (cell->vl_naddrs++, | 195 | cell->vl_naddrs++; |
124 | cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); | 196 | if (!*vllist) |
197 | break; | ||
198 | vllist++; | ||
125 | 199 | ||
126 | /* create a key to represent an anonymous user */ | 200 | } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && vllist); |
127 | memcpy(keyname, "afs@", 4); | ||
128 | dp = keyname + 4; | ||
129 | cp = cell->name; | ||
130 | do { | ||
131 | *dp++ = toupper(*cp); | ||
132 | } while (*cp++); | ||
133 | 201 | ||
134 | key = rxrpc_get_null_key(keyname); | 202 | /* Disable DNS refresh for manually-specified cells */ |
135 | if (IS_ERR(key)) { | 203 | cell->dns_expiry = TIME64_MAX; |
136 | _debug("no key"); | 204 | } else { |
137 | ret = PTR_ERR(key); | 205 | /* We're going to need to 'refresh' this cell's VL server list |
138 | goto error; | 206 | * from the DNS before we can use it. |
207 | */ | ||
208 | cell->dns_expiry = S64_MIN; | ||
139 | } | 209 | } |
140 | cell->anonymous_key = key; | ||
141 | |||
142 | _debug("anon key %p{%x}", | ||
143 | cell->anonymous_key, key_serial(cell->anonymous_key)); | ||
144 | 210 | ||
145 | _leave(" = %p", cell); | 211 | _leave(" = %p", cell); |
146 | return cell; | 212 | return cell; |
@@ -148,92 +214,129 @@ static struct afs_cell *afs_cell_alloc(struct afs_net *net, | |||
148 | bad_address: | 214 | bad_address: |
149 | printk(KERN_ERR "kAFS: bad VL server IP address\n"); | 215 | printk(KERN_ERR "kAFS: bad VL server IP address\n"); |
150 | ret = -EINVAL; | 216 | ret = -EINVAL; |
151 | error: | ||
152 | key_put(cell->anonymous_key); | ||
153 | kfree(dvllist); | ||
154 | kfree(cell); | 217 | kfree(cell); |
155 | _leave(" = %d", ret); | 218 | _leave(" = %d", ret); |
156 | return ERR_PTR(ret); | 219 | return ERR_PTR(ret); |
157 | } | 220 | } |
158 | 221 | ||
159 | /* | 222 | /* |
160 | * afs_cell_crate() - create a cell record | 223 | * afs_lookup_cell - Look up or create a cell record. |
161 | * @net: The network namespace | 224 | * @net: The network namespace |
162 | * @name: is the name of the cell. | 225 | * @name: The name of the cell. |
163 | * @namsesz: is the strlen of the cell name. | 226 | * @namesz: The strlen of the cell name. |
164 | * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. | 227 | * @vllist: A colon/comma separated list of numeric IP addresses or NULL. |
165 | * @retref: is T to return the cell reference when the cell exists. | 228 | * @excl: T if an error should be given if the cell name already exists. |
229 | * | ||
230 | * Look up a cell record by name and query the DNS for VL server addresses if | ||
231 | * needed. Note that that actual DNS query is punted off to the manager thread | ||
232 | * so that this function can return immediately if interrupted whilst allowing | ||
233 | * cell records to be shared even if not yet fully constructed. | ||
166 | */ | 234 | */ |
167 | struct afs_cell *afs_cell_create(struct afs_net *net, | 235 | struct afs_cell *afs_lookup_cell(struct afs_net *net, |
168 | const char *name, unsigned namesz, | 236 | const char *name, unsigned int namesz, |
169 | char *vllist, bool retref) | 237 | const char *vllist, bool excl) |
170 | { | 238 | { |
171 | struct afs_cell *cell; | 239 | struct afs_cell *cell, *candidate, *cursor; |
172 | int ret; | 240 | struct rb_node *parent, **pp; |
173 | 241 | int ret, n; | |
174 | _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); | 242 | |
243 | _enter("%s,%s", name, vllist); | ||
244 | |||
245 | if (!excl) { | ||
246 | rcu_read_lock(); | ||
247 | cell = afs_lookup_cell_rcu(net, name, namesz); | ||
248 | rcu_read_unlock(); | ||
249 | if (!IS_ERR(cell)) { | ||
250 | if (excl) { | ||
251 | afs_put_cell(net, cell); | ||
252 | return ERR_PTR(-EEXIST); | ||
253 | } | ||
254 | goto wait_for_cell; | ||
255 | } | ||
256 | } | ||
175 | 257 | ||
176 | down_write(&net->cells_sem); | 258 | /* Assume we're probably going to create a cell and preallocate and |
177 | read_lock(&net->cells_lock); | 259 | * mostly set up a candidate record. We can then use this to stash the |
178 | list_for_each_entry(cell, &net->cells, link) { | 260 | * name, the net namespace and VL server addresses. |
179 | if (strncasecmp(cell->name, name, namesz) == 0) | 261 | * |
180 | goto duplicate_name; | 262 | * We also want to do this before we hold any locks as it may involve |
263 | * upcalling to userspace to make DNS queries. | ||
264 | */ | ||
265 | candidate = afs_alloc_cell(net, name, namesz, vllist); | ||
266 | if (IS_ERR(candidate)) { | ||
267 | _leave(" = %ld", PTR_ERR(candidate)); | ||
268 | return candidate; | ||
181 | } | 269 | } |
182 | read_unlock(&net->cells_lock); | ||
183 | 270 | ||
184 | cell = afs_cell_alloc(net, name, namesz, vllist); | 271 | /* Find the insertion point and check to see if someone else added a |
185 | if (IS_ERR(cell)) { | 272 | * cell whilst we were allocating. |
186 | _leave(" = %ld", PTR_ERR(cell)); | 273 | */ |
187 | up_write(&net->cells_sem); | 274 | write_seqlock(&net->cells_lock); |
188 | return cell; | 275 | |
276 | pp = &net->cells.rb_node; | ||
277 | parent = NULL; | ||
278 | while (*pp) { | ||
279 | parent = *pp; | ||
280 | cursor = rb_entry(parent, struct afs_cell, net_node); | ||
281 | |||
282 | n = strncasecmp(cursor->name, name, | ||
283 | min_t(size_t, cursor->name_len, namesz)); | ||
284 | if (n == 0) | ||
285 | n = cursor->name_len - namesz; | ||
286 | if (n < 0) | ||
287 | pp = &(*pp)->rb_left; | ||
288 | else if (n > 0) | ||
289 | pp = &(*pp)->rb_right; | ||
290 | else | ||
291 | goto cell_already_exists; | ||
189 | } | 292 | } |
190 | 293 | ||
191 | /* add a proc directory for this cell */ | 294 | cell = candidate; |
192 | ret = afs_proc_cell_setup(net, cell); | 295 | candidate = NULL; |
193 | if (ret < 0) | 296 | rb_link_node_rcu(&cell->net_node, parent, pp); |
194 | goto error; | 297 | rb_insert_color(&cell->net_node, &net->cells); |
298 | atomic_inc(&net->cells_outstanding); | ||
299 | write_sequnlock(&net->cells_lock); | ||
195 | 300 | ||
196 | #ifdef CONFIG_AFS_FSCACHE | 301 | queue_work(afs_wq, &cell->manager); |
197 | /* put it up for caching (this never returns an error) */ | ||
198 | cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, | ||
199 | &afs_cell_cache_index_def, | ||
200 | cell, true); | ||
201 | #endif | ||
202 | 302 | ||
203 | /* add to the cell lists */ | 303 | wait_for_cell: |
204 | write_lock(&net->cells_lock); | 304 | _debug("wait_for_cell"); |
205 | list_add_tail(&cell->link, &net->cells); | 305 | ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE); |
206 | write_unlock(&net->cells_lock); | 306 | smp_rmb(); |
207 | 307 | ||
208 | down_write(&net->proc_cells_sem); | 308 | switch (READ_ONCE(cell->state)) { |
209 | list_add_tail(&cell->proc_link, &net->proc_cells); | 309 | case AFS_CELL_FAILED: |
210 | up_write(&net->proc_cells_sem); | 310 | ret = cell->error; |
211 | up_write(&net->cells_sem); | 311 | goto error; |
312 | default: | ||
313 | _debug("weird %u %d", cell->state, cell->error); | ||
314 | goto error; | ||
315 | case AFS_CELL_ACTIVE: | ||
316 | break; | ||
317 | } | ||
212 | 318 | ||
213 | _leave(" = %p", cell); | 319 | _leave(" = %p [cell]", cell); |
214 | return cell; | 320 | return cell; |
215 | 321 | ||
322 | cell_already_exists: | ||
323 | _debug("cell exists"); | ||
324 | cell = cursor; | ||
325 | if (excl) { | ||
326 | ret = -EEXIST; | ||
327 | } else { | ||
328 | ASSERTCMP(atomic_read(&cursor->usage), >=, 1); | ||
329 | afs_get_cell(cursor); | ||
330 | ret = 0; | ||
331 | } | ||
332 | write_sequnlock(&net->cells_lock); | ||
333 | kfree(candidate); | ||
334 | if (ret == 0) | ||
335 | goto wait_for_cell; | ||
216 | error: | 336 | error: |
217 | up_write(&net->cells_sem); | 337 | afs_put_cell(net, cell); |
218 | key_put(cell->anonymous_key); | 338 | _leave(" = %d [error]", ret); |
219 | kfree(cell); | ||
220 | _leave(" = %d", ret); | ||
221 | return ERR_PTR(ret); | 339 | return ERR_PTR(ret); |
222 | |||
223 | duplicate_name: | ||
224 | if (retref && !IS_ERR(cell)) | ||
225 | afs_get_cell(cell); | ||
226 | |||
227 | read_unlock(&net->cells_lock); | ||
228 | up_write(&net->cells_sem); | ||
229 | |||
230 | if (retref) { | ||
231 | _leave(" = %p", cell); | ||
232 | return cell; | ||
233 | } | ||
234 | |||
235 | _leave(" = -EEXIST"); | ||
236 | return ERR_PTR(-EEXIST); | ||
237 | } | 340 | } |
238 | 341 | ||
239 | /* | 342 | /* |
@@ -241,10 +344,11 @@ duplicate_name: | |||
241 | * - can be called with a module parameter string | 344 | * - can be called with a module parameter string |
242 | * - can be called from a write to /proc/fs/afs/rootcell | 345 | * - can be called from a write to /proc/fs/afs/rootcell |
243 | */ | 346 | */ |
244 | int afs_cell_init(struct afs_net *net, char *rootcell) | 347 | int afs_cell_init(struct afs_net *net, const char *rootcell) |
245 | { | 348 | { |
246 | struct afs_cell *old_root, *new_root; | 349 | struct afs_cell *old_root, *new_root; |
247 | char *cp; | 350 | const char *cp, *vllist; |
351 | size_t len; | ||
248 | 352 | ||
249 | _enter(""); | 353 | _enter(""); |
250 | 354 | ||
@@ -257,223 +361,471 @@ int afs_cell_init(struct afs_net *net, char *rootcell) | |||
257 | } | 361 | } |
258 | 362 | ||
259 | cp = strchr(rootcell, ':'); | 363 | cp = strchr(rootcell, ':'); |
260 | if (!cp) | 364 | if (!cp) { |
261 | _debug("kAFS: no VL server IP addresses specified"); | 365 | _debug("kAFS: no VL server IP addresses specified"); |
262 | else | 366 | vllist = NULL; |
263 | *cp++ = 0; | 367 | len = strlen(rootcell); |
368 | } else { | ||
369 | vllist = cp + 1; | ||
370 | len = cp - rootcell; | ||
371 | } | ||
264 | 372 | ||
265 | /* allocate a cell record for the root cell */ | 373 | /* allocate a cell record for the root cell */ |
266 | new_root = afs_cell_create(net, rootcell, strlen(rootcell), cp, false); | 374 | new_root = afs_lookup_cell(net, rootcell, len, vllist, false); |
267 | if (IS_ERR(new_root)) { | 375 | if (IS_ERR(new_root)) { |
268 | _leave(" = %ld", PTR_ERR(new_root)); | 376 | _leave(" = %ld", PTR_ERR(new_root)); |
269 | return PTR_ERR(new_root); | 377 | return PTR_ERR(new_root); |
270 | } | 378 | } |
271 | 379 | ||
380 | set_bit(AFS_CELL_FL_NO_GC, &new_root->flags); | ||
381 | afs_get_cell(new_root); | ||
382 | |||
272 | /* install the new cell */ | 383 | /* install the new cell */ |
273 | write_lock(&net->cells_lock); | 384 | write_seqlock(&net->cells_lock); |
274 | old_root = net->ws_cell; | 385 | old_root = net->ws_cell; |
275 | net->ws_cell = new_root; | 386 | net->ws_cell = new_root; |
276 | write_unlock(&net->cells_lock); | 387 | write_sequnlock(&net->cells_lock); |
277 | afs_put_cell(net, old_root); | ||
278 | 388 | ||
389 | afs_put_cell(net, old_root); | ||
279 | _leave(" = 0"); | 390 | _leave(" = 0"); |
280 | return 0; | 391 | return 0; |
281 | } | 392 | } |
282 | 393 | ||
283 | /* | 394 | /* |
284 | * lookup a cell record | 395 | * Update a cell's VL server address list from the DNS. |
285 | */ | 396 | */ |
286 | struct afs_cell *afs_cell_lookup(struct afs_net *net, | 397 | static void afs_update_cell(struct afs_cell *cell) |
287 | const char *name, unsigned namesz, | ||
288 | bool dns_cell) | ||
289 | { | 398 | { |
290 | struct afs_cell *cell; | 399 | time64_t now, expiry; |
400 | char *vllist = NULL; | ||
401 | int ret; | ||
291 | 402 | ||
292 | _enter("\"%*.*s\",", namesz, namesz, name ?: ""); | 403 | _enter("%s", cell->name); |
404 | |||
405 | ret = dns_query("afsdb", cell->name, cell->name_len, | ||
406 | "ipv4", &vllist, &expiry); | ||
407 | _debug("query %d", ret); | ||
408 | switch (ret) { | ||
409 | case 0 ... INT_MAX: | ||
410 | clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); | ||
411 | clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); | ||
412 | goto parse_dns_data; | ||
413 | |||
414 | case -ENODATA: | ||
415 | clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); | ||
416 | set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); | ||
417 | cell->dns_expiry = ktime_get_real_seconds() + 61; | ||
418 | cell->error = -EDESTADDRREQ; | ||
419 | goto out; | ||
420 | |||
421 | case -EAGAIN: | ||
422 | case -ECONNREFUSED: | ||
423 | default: | ||
424 | /* Unable to query DNS. */ | ||
425 | set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); | ||
426 | cell->dns_expiry = ktime_get_real_seconds() + 10; | ||
427 | cell->error = -EDESTADDRREQ; | ||
428 | goto out; | ||
429 | } | ||
293 | 430 | ||
294 | down_read(&net->cells_sem); | 431 | parse_dns_data: |
295 | read_lock(&net->cells_lock); | 432 | write_seqlock(&cell->vl_addrs_lock); |
296 | 433 | ||
297 | if (name) { | 434 | ret = -EINVAL; |
298 | /* if the cell was named, look for it in the cell record list */ | 435 | do { |
299 | list_for_each_entry(cell, &net->cells, link) { | 436 | struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs]; |
300 | if (strncmp(cell->name, name, namesz) == 0) { | 437 | |
301 | afs_get_cell(cell); | 438 | if (in4_pton(vllist, -1, |
302 | goto found; | 439 | (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], |
303 | } | 440 | ',', (const char **)&vllist)) { |
304 | } | 441 | srx->transport_len = sizeof(struct sockaddr_in6); |
305 | cell = ERR_PTR(-ENOENT); | 442 | srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; |
306 | if (dns_cell) | 443 | srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; |
307 | goto create_cell; | 444 | srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); |
308 | found: | 445 | } else if (in6_pton(vllist, -1, |
309 | ; | 446 | srx->transport.sin6.sin6_addr.s6_addr, |
310 | } else { | 447 | ',', (const char **)&vllist)) { |
311 | cell = net->ws_cell; | 448 | srx->transport_len = sizeof(struct sockaddr_in6); |
312 | if (!cell) { | 449 | srx->transport.sin6.sin6_family = AF_INET6; |
313 | /* this should not happen unless user tries to mount | ||
314 | * when root cell is not set. Return an impossibly | ||
315 | * bizarre errno to alert the user. Things like | ||
316 | * ENOENT might be "more appropriate" but they happen | ||
317 | * for other reasons. | ||
318 | */ | ||
319 | cell = ERR_PTR(-EDESTADDRREQ); | ||
320 | } else { | 450 | } else { |
321 | afs_get_cell(cell); | 451 | goto bad_address; |
322 | } | 452 | } |
323 | 453 | ||
324 | } | 454 | cell->vl_naddrs++; |
455 | if (!*vllist) | ||
456 | break; | ||
457 | vllist++; | ||
325 | 458 | ||
326 | read_unlock(&net->cells_lock); | 459 | } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS); |
327 | up_read(&net->cells_sem); | ||
328 | _leave(" = %p", cell); | ||
329 | return cell; | ||
330 | |||
331 | create_cell: | ||
332 | read_unlock(&net->cells_lock); | ||
333 | up_read(&net->cells_sem); | ||
334 | 460 | ||
335 | cell = afs_cell_create(net, name, namesz, NULL, true); | 461 | if (cell->vl_naddrs < AFS_CELL_MAX_ADDRS) |
462 | memset(cell->vl_addrs + cell->vl_naddrs, 0, | ||
463 | (AFS_CELL_MAX_ADDRS - cell->vl_naddrs) * sizeof(cell->vl_addrs[0])); | ||
336 | 464 | ||
337 | _leave(" = %p", cell); | 465 | now = ktime_get_real_seconds(); |
338 | return cell; | 466 | cell->dns_expiry = expiry; |
467 | afs_set_cell_timer(cell->net, expiry - now); | ||
468 | bad_address: | ||
469 | write_sequnlock(&cell->vl_addrs_lock); | ||
470 | out: | ||
471 | _leave(""); | ||
339 | } | 472 | } |
340 | 473 | ||
341 | #if 0 | ||
342 | /* | 474 | /* |
343 | * try and get a cell record | 475 | * Destroy a cell record |
344 | */ | 476 | */ |
345 | struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell) | 477 | static void afs_cell_destroy(struct rcu_head *rcu) |
346 | { | 478 | { |
347 | write_lock(&net->cells_lock); | 479 | struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu); |
348 | 480 | ||
349 | if (cell && !list_empty(&cell->link)) | 481 | _enter("%p{%s}", cell, cell->name); |
350 | afs_get_cell(cell); | ||
351 | else | ||
352 | cell = NULL; | ||
353 | 482 | ||
354 | write_unlock(&net->cells_lock); | 483 | ASSERTCMP(atomic_read(&cell->usage), ==, 0); |
355 | return cell; | 484 | |
485 | key_put(cell->anonymous_key); | ||
486 | kfree(cell); | ||
487 | |||
488 | _leave(" [destroyed]"); | ||
356 | } | 489 | } |
357 | #endif /* 0 */ | ||
358 | 490 | ||
359 | /* | 491 | /* |
360 | * destroy a cell record | 492 | * Queue the cell manager. |
361 | */ | 493 | */ |
362 | void afs_put_cell(struct afs_net *net, struct afs_cell *cell) | 494 | static void afs_queue_cell_manager(struct afs_net *net) |
363 | { | 495 | { |
364 | if (!cell) | 496 | int outstanding = atomic_inc_return(&net->cells_outstanding); |
365 | return; | ||
366 | 497 | ||
367 | _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); | 498 | _enter("%d", outstanding); |
368 | 499 | ||
369 | ASSERTCMP(atomic_read(&cell->usage), >, 0); | 500 | if (!queue_work(afs_wq, &net->cells_manager)) |
501 | afs_dec_cells_outstanding(net); | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Cell management timer. We have an increment on cells_outstanding that we | ||
506 | * need to pass along to the work item. | ||
507 | */ | ||
508 | void afs_cells_timer(struct timer_list *timer) | ||
509 | { | ||
510 | struct afs_net *net = container_of(timer, struct afs_net, cells_timer); | ||
511 | |||
512 | _enter(""); | ||
513 | if (!queue_work(afs_wq, &net->cells_manager)) | ||
514 | afs_dec_cells_outstanding(net); | ||
515 | } | ||
370 | 516 | ||
371 | /* to prevent a race, the decrement and the dequeue must be effectively | 517 | /* |
372 | * atomic */ | 518 | * Drop a reference on a cell record. |
373 | write_lock(&net->cells_lock); | 519 | */ |
520 | void afs_put_cell(struct afs_net *net, struct afs_cell *cell) | ||
521 | { | ||
522 | time64_t now, expire_delay; | ||
374 | 523 | ||
375 | if (likely(!atomic_dec_and_test(&cell->usage))) { | 524 | if (!cell) |
376 | write_unlock(&net->cells_lock); | ||
377 | _leave(""); | ||
378 | return; | 525 | return; |
379 | } | ||
380 | 526 | ||
381 | ASSERT(list_empty(&cell->servers)); | 527 | _enter("%s", cell->name); |
382 | ASSERT(list_empty(&cell->vl_list)); | ||
383 | 528 | ||
384 | wake_up(&net->cells_freeable_wq); | 529 | now = ktime_get_real_seconds(); |
530 | cell->last_inactive = now; | ||
531 | expire_delay = 0; | ||
532 | if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && | ||
533 | !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) | ||
534 | expire_delay = afs_cell_gc_delay; | ||
385 | 535 | ||
386 | write_unlock(&net->cells_lock); | 536 | if (atomic_dec_return(&cell->usage) > 1) |
537 | return; | ||
387 | 538 | ||
388 | _leave(" [unused]"); | 539 | /* 'cell' may now be garbage collected. */ |
540 | afs_set_cell_timer(net, expire_delay); | ||
389 | } | 541 | } |
390 | 542 | ||
391 | /* | 543 | /* |
392 | * destroy a cell record | 544 | * Allocate a key to use as a placeholder for anonymous user security. |
393 | * - must be called with the net->cells_sem write-locked | ||
394 | * - cell->link should have been broken by the caller | ||
395 | */ | 545 | */ |
396 | static void afs_cell_destroy(struct afs_net *net, struct afs_cell *cell) | 546 | static int afs_alloc_anon_key(struct afs_cell *cell) |
397 | { | 547 | { |
398 | _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); | 548 | struct key *key; |
549 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp; | ||
399 | 550 | ||
400 | ASSERTCMP(atomic_read(&cell->usage), >=, 0); | 551 | /* Create a key to represent an anonymous user. */ |
401 | ASSERT(list_empty(&cell->link)); | 552 | memcpy(keyname, "afs@", 4); |
553 | dp = keyname + 4; | ||
554 | cp = cell->name; | ||
555 | do { | ||
556 | *dp++ = tolower(*cp); | ||
557 | } while (*cp++); | ||
402 | 558 | ||
403 | /* wait for everyone to stop using the cell */ | 559 | key = rxrpc_get_null_key(keyname); |
404 | if (atomic_read(&cell->usage) > 0) { | 560 | if (IS_ERR(key)) |
405 | DECLARE_WAITQUEUE(myself, current); | 561 | return PTR_ERR(key); |
406 | 562 | ||
407 | _debug("wait for cell %s", cell->name); | 563 | cell->anonymous_key = key; |
408 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
409 | add_wait_queue(&net->cells_freeable_wq, &myself); | ||
410 | 564 | ||
411 | while (atomic_read(&cell->usage) > 0) { | 565 | _debug("anon key %p{%x}", |
412 | schedule(); | 566 | cell->anonymous_key, key_serial(cell->anonymous_key)); |
413 | set_current_state(TASK_UNINTERRUPTIBLE); | 567 | return 0; |
414 | } | 568 | } |
415 | 569 | ||
416 | remove_wait_queue(&net->cells_freeable_wq, &myself); | 570 | /* |
417 | set_current_state(TASK_RUNNING); | 571 | * Activate a cell. |
572 | */ | ||
573 | static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) | ||
574 | { | ||
575 | int ret; | ||
576 | |||
577 | if (!cell->anonymous_key) { | ||
578 | ret = afs_alloc_anon_key(cell); | ||
579 | if (ret < 0) | ||
580 | return ret; | ||
418 | } | 581 | } |
419 | 582 | ||
420 | _debug("cell dead"); | 583 | #ifdef CONFIG_AFS_FSCACHE |
421 | ASSERTCMP(atomic_read(&cell->usage), ==, 0); | 584 | cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, |
422 | ASSERT(list_empty(&cell->servers)); | 585 | &afs_cell_cache_index_def, |
423 | ASSERT(list_empty(&cell->vl_list)); | 586 | cell, true); |
587 | #endif | ||
588 | ret = afs_proc_cell_setup(net, cell); | ||
589 | if (ret < 0) | ||
590 | return ret; | ||
591 | spin_lock(&net->proc_cells_lock); | ||
592 | list_add_tail(&cell->proc_link, &net->proc_cells); | ||
593 | spin_unlock(&net->proc_cells_lock); | ||
594 | return 0; | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * Deactivate a cell. | ||
599 | */ | ||
600 | static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) | ||
601 | { | ||
602 | _enter("%s", cell->name); | ||
424 | 603 | ||
425 | afs_proc_cell_remove(net, cell); | 604 | afs_proc_cell_remove(net, cell); |
426 | 605 | ||
427 | down_write(&net->proc_cells_sem); | 606 | spin_lock(&net->proc_cells_lock); |
428 | list_del_init(&cell->proc_link); | 607 | list_del_init(&cell->proc_link); |
429 | up_write(&net->proc_cells_sem); | 608 | spin_unlock(&net->proc_cells_lock); |
430 | 609 | ||
431 | #ifdef CONFIG_AFS_FSCACHE | 610 | #ifdef CONFIG_AFS_FSCACHE |
432 | fscache_relinquish_cookie(cell->cache, 0); | 611 | fscache_relinquish_cookie(cell->cache, 0); |
612 | cell->cache = NULL; | ||
433 | #endif | 613 | #endif |
434 | key_put(cell->anonymous_key); | ||
435 | kfree(cell); | ||
436 | 614 | ||
437 | _leave(" [destroyed]"); | 615 | _leave(""); |
438 | } | 616 | } |
439 | 617 | ||
440 | /* | 618 | /* |
441 | * purge in-memory cell database on module unload or afs_init() failure | 619 | * Manage a cell record, initialising and destroying it, maintaining its DNS |
442 | * - the timeout daemon is stopped before calling this | 620 | * records. |
443 | */ | 621 | */ |
444 | void afs_cell_purge(struct afs_net *net) | 622 | static void afs_manage_cell(struct work_struct *work) |
445 | { | 623 | { |
446 | struct afs_cell *cell; | 624 | struct afs_cell *cell = container_of(work, struct afs_cell, manager); |
625 | struct afs_net *net = cell->net; | ||
626 | bool deleted; | ||
627 | int ret, usage; | ||
628 | |||
629 | _enter("%s", cell->name); | ||
630 | |||
631 | again: | ||
632 | _debug("state %u", cell->state); | ||
633 | switch (cell->state) { | ||
634 | case AFS_CELL_INACTIVE: | ||
635 | case AFS_CELL_FAILED: | ||
636 | write_seqlock(&net->cells_lock); | ||
637 | usage = 1; | ||
638 | deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0); | ||
639 | if (deleted) | ||
640 | rb_erase(&cell->net_node, &net->cells); | ||
641 | write_sequnlock(&net->cells_lock); | ||
642 | if (deleted) | ||
643 | goto final_destruction; | ||
644 | if (cell->state == AFS_CELL_FAILED) | ||
645 | goto done; | ||
646 | cell->state = AFS_CELL_UNSET; | ||
647 | goto again; | ||
648 | |||
649 | case AFS_CELL_UNSET: | ||
650 | cell->state = AFS_CELL_ACTIVATING; | ||
651 | goto again; | ||
652 | |||
653 | case AFS_CELL_ACTIVATING: | ||
654 | ret = afs_activate_cell(net, cell); | ||
655 | if (ret < 0) | ||
656 | goto activation_failed; | ||
657 | |||
658 | cell->state = AFS_CELL_ACTIVE; | ||
659 | smp_wmb(); | ||
660 | clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); | ||
661 | wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); | ||
662 | goto again; | ||
663 | |||
664 | case AFS_CELL_ACTIVE: | ||
665 | if (atomic_read(&cell->usage) > 1) { | ||
666 | time64_t now = ktime_get_real_seconds(); | ||
667 | if (cell->dns_expiry <= now && net->live) | ||
668 | afs_update_cell(cell); | ||
669 | goto done; | ||
670 | } | ||
671 | cell->state = AFS_CELL_DEACTIVATING; | ||
672 | goto again; | ||
673 | |||
674 | case AFS_CELL_DEACTIVATING: | ||
675 | set_bit(AFS_CELL_FL_NOT_READY, &cell->flags); | ||
676 | if (atomic_read(&cell->usage) > 1) | ||
677 | goto reverse_deactivation; | ||
678 | afs_deactivate_cell(net, cell); | ||
679 | cell->state = AFS_CELL_INACTIVE; | ||
680 | goto again; | ||
681 | |||
682 | default: | ||
683 | break; | ||
684 | } | ||
685 | _debug("bad state %u", cell->state); | ||
686 | BUG(); /* Unhandled state */ | ||
687 | |||
688 | activation_failed: | ||
689 | cell->error = ret; | ||
690 | afs_deactivate_cell(net, cell); | ||
691 | |||
692 | cell->state = AFS_CELL_FAILED; | ||
693 | smp_wmb(); | ||
694 | if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags)) | ||
695 | wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); | ||
696 | goto again; | ||
697 | |||
698 | reverse_deactivation: | ||
699 | cell->state = AFS_CELL_ACTIVE; | ||
700 | smp_wmb(); | ||
701 | clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); | ||
702 | wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); | ||
703 | _leave(" [deact->act]"); | ||
704 | return; | ||
705 | |||
706 | done: | ||
707 | _leave(" [done %u]", cell->state); | ||
708 | return; | ||
709 | |||
710 | final_destruction: | ||
711 | call_rcu(&cell->rcu, afs_cell_destroy); | ||
712 | afs_dec_cells_outstanding(net); | ||
713 | _leave(" [destruct %d]", atomic_read(&net->cells_outstanding)); | ||
714 | } | ||
715 | |||
716 | /* | ||
717 | * Manage the records of cells known to a network namespace. This includes | ||
718 | * updating the DNS records and garbage collecting unused cells that were | ||
719 | * automatically added. | ||
720 | * | ||
721 | * Note that constructed cell records may only be removed from net->cells by | ||
722 | * this work item, so it is safe for this work item to stash a cursor pointing | ||
723 | * into the tree and then return to caller (provided it skips cells that are | ||
724 | * still under construction). | ||
725 | * | ||
726 | * Note also that we were given an increment on net->cells_outstanding by | ||
727 | * whoever queued us that we need to deal with before returning. | ||
728 | */ | ||
729 | void afs_manage_cells(struct work_struct *work) | ||
730 | { | ||
731 | struct afs_net *net = container_of(work, struct afs_net, cells_manager); | ||
732 | struct rb_node *cursor; | ||
733 | time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; | ||
734 | bool purging = !net->live; | ||
447 | 735 | ||
448 | _enter(""); | 736 | _enter(""); |
449 | 737 | ||
450 | afs_put_cell(net, net->ws_cell); | 738 | /* Trawl the cell database looking for cells that have expired from |
739 | * lack of use and cells whose DNS results have expired and dispatch | ||
740 | * their managers. | ||
741 | */ | ||
742 | read_seqlock_excl(&net->cells_lock); | ||
451 | 743 | ||
452 | down_write(&net->cells_sem); | 744 | for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { |
745 | struct afs_cell *cell = | ||
746 | rb_entry(cursor, struct afs_cell, net_node); | ||
747 | unsigned usage; | ||
748 | bool sched_cell = false; | ||
453 | 749 | ||
454 | while (!list_empty(&net->cells)) { | 750 | usage = atomic_read(&cell->usage); |
455 | cell = NULL; | 751 | _debug("manage %s %u", cell->name, usage); |
752 | |||
753 | ASSERTCMP(usage, >=, 1); | ||
754 | |||
755 | if (purging) { | ||
756 | if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) | ||
757 | usage = atomic_dec_return(&cell->usage); | ||
758 | ASSERTCMP(usage, ==, 1); | ||
759 | } | ||
456 | 760 | ||
457 | /* remove the next cell from the front of the list */ | 761 | if (usage == 1) { |
458 | write_lock(&net->cells_lock); | 762 | time64_t expire_at = cell->last_inactive; |
459 | 763 | ||
460 | if (!list_empty(&net->cells)) { | 764 | if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && |
461 | cell = list_entry(net->cells.next, | 765 | !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) |
462 | struct afs_cell, link); | 766 | expire_at += afs_cell_gc_delay; |
463 | list_del_init(&cell->link); | 767 | if (purging || expire_at <= now) |
768 | sched_cell = true; | ||
769 | else if (expire_at < next_manage) | ||
770 | next_manage = expire_at; | ||
464 | } | 771 | } |
465 | 772 | ||
466 | write_unlock(&net->cells_lock); | 773 | if (!purging) { |
774 | if (cell->dns_expiry <= now) | ||
775 | sched_cell = true; | ||
776 | else if (cell->dns_expiry <= next_manage) | ||
777 | next_manage = cell->dns_expiry; | ||
778 | } | ||
779 | |||
780 | if (sched_cell) | ||
781 | queue_work(afs_wq, &cell->manager); | ||
782 | } | ||
783 | |||
784 | read_sequnlock_excl(&net->cells_lock); | ||
467 | 785 | ||
468 | if (cell) { | 786 | /* Update the timer on the way out. We have to pass an increment on |
469 | _debug("PURGING CELL %s (%d)", | 787 | * cells_outstanding in the namespace that we are in to the timer or |
470 | cell->name, atomic_read(&cell->usage)); | 788 | * the work scheduler. |
789 | */ | ||
790 | if (!purging && next_manage < TIME64_MAX) { | ||
791 | now = ktime_get_real_seconds(); | ||
471 | 792 | ||
472 | /* now the cell should be left with no references */ | 793 | if (next_manage - now <= 0) { |
473 | afs_cell_destroy(net, cell); | 794 | if (queue_work(afs_wq, &net->cells_manager)) |
795 | atomic_inc(&net->cells_outstanding); | ||
796 | } else { | ||
797 | afs_set_cell_timer(net, next_manage - now); | ||
474 | } | 798 | } |
475 | } | 799 | } |
476 | 800 | ||
477 | up_write(&net->cells_sem); | 801 | afs_dec_cells_outstanding(net); |
802 | _leave(" [%d]", atomic_read(&net->cells_outstanding)); | ||
803 | } | ||
804 | |||
805 | /* | ||
806 | * Purge in-memory cell database. | ||
807 | */ | ||
808 | void afs_cell_purge(struct afs_net *net) | ||
809 | { | ||
810 | struct afs_cell *ws; | ||
811 | |||
812 | _enter(""); | ||
813 | |||
814 | write_seqlock(&net->cells_lock); | ||
815 | ws = net->ws_cell; | ||
816 | net->ws_cell = NULL; | ||
817 | write_sequnlock(&net->cells_lock); | ||
818 | afs_put_cell(net, ws); | ||
819 | |||
820 | _debug("del timer"); | ||
821 | if (del_timer_sync(&net->cells_timer)) | ||
822 | atomic_dec(&net->cells_outstanding); | ||
823 | |||
824 | _debug("kick mgr"); | ||
825 | afs_queue_cell_manager(net); | ||
826 | |||
827 | _debug("wait"); | ||
828 | wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait, | ||
829 | TASK_UNINTERRUPTIBLE); | ||
478 | _leave(""); | 830 | _leave(""); |
479 | } | 831 | } |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7c318666e436..51e3825b5ffb 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -207,13 +207,14 @@ struct afs_net { | |||
207 | atomic_t nr_superblocks; | 207 | atomic_t nr_superblocks; |
208 | 208 | ||
209 | /* Cell database */ | 209 | /* Cell database */ |
210 | struct list_head cells; | 210 | struct rb_root cells; |
211 | struct afs_cell *ws_cell; | 211 | struct afs_cell *ws_cell; |
212 | rwlock_t cells_lock; | 212 | struct work_struct cells_manager; |
213 | struct rw_semaphore cells_sem; | 213 | struct timer_list cells_timer; |
214 | wait_queue_head_t cells_freeable_wq; | 214 | atomic_t cells_outstanding; |
215 | seqlock_t cells_lock; | ||
215 | 216 | ||
216 | struct rw_semaphore proc_cells_sem; | 217 | spinlock_t proc_cells_lock; |
217 | struct list_head proc_cells; | 218 | struct list_head proc_cells; |
218 | 219 | ||
219 | /* Volume location database */ | 220 | /* Volume location database */ |
@@ -242,14 +243,26 @@ struct afs_net { | |||
242 | 243 | ||
243 | extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns | 244 | extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns |
244 | 245 | ||
246 | enum afs_cell_state { | ||
247 | AFS_CELL_UNSET, | ||
248 | AFS_CELL_ACTIVATING, | ||
249 | AFS_CELL_ACTIVE, | ||
250 | AFS_CELL_DEACTIVATING, | ||
251 | AFS_CELL_INACTIVE, | ||
252 | AFS_CELL_FAILED, | ||
253 | }; | ||
254 | |||
245 | /* | 255 | /* |
246 | * AFS cell record | 256 | * AFS cell record |
247 | */ | 257 | */ |
248 | struct afs_cell { | 258 | struct afs_cell { |
249 | atomic_t usage; | 259 | union { |
250 | struct list_head link; /* main cell list link */ | 260 | struct rcu_head rcu; |
251 | struct afs_net *net; /* The network namespace */ | 261 | struct rb_node net_node; /* Node in net->cells */ |
262 | }; | ||
263 | struct afs_net *net; | ||
252 | struct key *anonymous_key; /* anonymous user key for this cell */ | 264 | struct key *anonymous_key; /* anonymous user key for this cell */ |
265 | struct work_struct manager; /* Manager for init/deinit/dns */ | ||
253 | struct list_head proc_link; /* /proc cell list link */ | 266 | struct list_head proc_link; /* /proc cell list link */ |
254 | #ifdef CONFIG_AFS_FSCACHE | 267 | #ifdef CONFIG_AFS_FSCACHE |
255 | struct fscache_cookie *cache; /* caching cookie */ | 268 | struct fscache_cookie *cache; /* caching cookie */ |
@@ -262,12 +275,26 @@ struct afs_cell { | |||
262 | /* volume location record management */ | 275 | /* volume location record management */ |
263 | struct rw_semaphore vl_sem; /* volume management serialisation semaphore */ | 276 | struct rw_semaphore vl_sem; /* volume management serialisation semaphore */ |
264 | struct list_head vl_list; /* cell's active VL record list */ | 277 | struct list_head vl_list; /* cell's active VL record list */ |
278 | time64_t dns_expiry; /* Time AFSDB/SRV record expires */ | ||
279 | time64_t last_inactive; /* Time of last drop of usage count */ | ||
280 | atomic_t usage; | ||
281 | unsigned long flags; | ||
282 | #define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */ | ||
283 | #define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */ | ||
284 | #define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */ | ||
285 | #define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */ | ||
286 | enum afs_cell_state state; | ||
287 | short error; | ||
288 | |||
265 | spinlock_t vl_lock; /* vl_list lock */ | 289 | spinlock_t vl_lock; /* vl_list lock */ |
290 | |||
291 | /* VLDB server list. */ | ||
292 | seqlock_t vl_addrs_lock; | ||
266 | unsigned short vl_naddrs; /* number of VL servers in addr list */ | 293 | unsigned short vl_naddrs; /* number of VL servers in addr list */ |
267 | unsigned short vl_curr_svix; /* current server index */ | 294 | unsigned short vl_curr_svix; /* current server index */ |
268 | struct sockaddr_rxrpc vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ | 295 | struct sockaddr_rxrpc vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ |
269 | 296 | u8 name_len; /* Length of name */ | |
270 | char name[0]; /* cell name - must go last */ | 297 | char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ |
271 | }; | 298 | }; |
272 | 299 | ||
273 | /* | 300 | /* |
@@ -494,17 +521,20 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest | |||
494 | /* | 521 | /* |
495 | * cell.c | 522 | * cell.c |
496 | */ | 523 | */ |
497 | static inline struct afs_cell *afs_get_cell(struct afs_cell *cell) | 524 | static inline struct afs_cell *afs_get_cell(struct afs_cell *cell) |
498 | { | 525 | { |
499 | if (cell) | 526 | if (cell) |
500 | atomic_inc(&cell->usage); | 527 | atomic_inc(&cell->usage); |
501 | return cell; | 528 | return cell; |
502 | } | 529 | } |
503 | extern int afs_cell_init(struct afs_net *, char *); | 530 | |
504 | extern struct afs_cell *afs_cell_create(struct afs_net *, const char *, unsigned, char *, bool); | 531 | extern int afs_cell_init(struct afs_net *, const char *); |
505 | extern struct afs_cell *afs_cell_lookup(struct afs_net *, const char *, unsigned, bool); | 532 | extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned); |
506 | extern struct afs_cell *afs_grab_cell(struct afs_cell *); | 533 | extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, |
534 | const char *, bool); | ||
507 | extern void afs_put_cell(struct afs_net *, struct afs_cell *); | 535 | extern void afs_put_cell(struct afs_net *, struct afs_cell *); |
536 | extern void afs_manage_cells(struct work_struct *); | ||
537 | extern void afs_cells_timer(struct timer_list *); | ||
508 | extern void __net_exit afs_cell_purge(struct afs_net *); | 538 | extern void __net_exit afs_cell_purge(struct afs_net *); |
509 | 539 | ||
510 | /* | 540 | /* |
diff --git a/fs/afs/main.c b/fs/afs/main.c index 010e2e1a40f4..e7f87d723761 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
@@ -46,12 +46,15 @@ static int __net_init afs_net_init(struct afs_net *net) | |||
46 | 46 | ||
47 | INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); | 47 | INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); |
48 | mutex_init(&net->socket_mutex); | 48 | mutex_init(&net->socket_mutex); |
49 | INIT_LIST_HEAD(&net->cells); | 49 | |
50 | rwlock_init(&net->cells_lock); | 50 | net->cells = RB_ROOT; |
51 | init_rwsem(&net->cells_sem); | 51 | seqlock_init(&net->cells_lock); |
52 | init_waitqueue_head(&net->cells_freeable_wq); | 52 | INIT_WORK(&net->cells_manager, afs_manage_cells); |
53 | init_rwsem(&net->proc_cells_sem); | 53 | timer_setup(&net->cells_timer, afs_cells_timer, 0); |
54 | |||
55 | spin_lock_init(&net->proc_cells_lock); | ||
54 | INIT_LIST_HEAD(&net->proc_cells); | 56 | INIT_LIST_HEAD(&net->proc_cells); |
57 | |||
55 | INIT_LIST_HEAD(&net->vl_updates); | 58 | INIT_LIST_HEAD(&net->vl_updates); |
56 | INIT_LIST_HEAD(&net->vl_graveyard); | 59 | INIT_LIST_HEAD(&net->vl_graveyard); |
57 | INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper); | 60 | INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper); |
@@ -83,11 +86,14 @@ static int __net_init afs_net_init(struct afs_net *net) | |||
83 | return 0; | 86 | return 0; |
84 | 87 | ||
85 | error_open_socket: | 88 | error_open_socket: |
89 | net->live = false; | ||
86 | afs_vlocation_purge(net); | 90 | afs_vlocation_purge(net); |
87 | afs_cell_purge(net); | 91 | afs_cell_purge(net); |
88 | error_cell_init: | 92 | error_cell_init: |
93 | net->live = false; | ||
89 | afs_proc_cleanup(net); | 94 | afs_proc_cleanup(net); |
90 | error_proc: | 95 | error_proc: |
96 | net->live = false; | ||
91 | return ret; | 97 | return ret; |
92 | } | 98 | } |
93 | 99 | ||
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index d00d550ff2ef..08565429615d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
@@ -186,7 +186,7 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) | |||
186 | { | 186 | { |
187 | struct afs_net *net = afs_seq2net(m); | 187 | struct afs_net *net = afs_seq2net(m); |
188 | 188 | ||
189 | down_read(&net->proc_cells_sem); | 189 | rcu_read_lock(); |
190 | return seq_list_start_head(&net->proc_cells, *_pos); | 190 | return seq_list_start_head(&net->proc_cells, *_pos); |
191 | } | 191 | } |
192 | 192 | ||
@@ -205,9 +205,7 @@ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) | |||
205 | */ | 205 | */ |
206 | static void afs_proc_cells_stop(struct seq_file *m, void *v) | 206 | static void afs_proc_cells_stop(struct seq_file *m, void *v) |
207 | { | 207 | { |
208 | struct afs_net *net = afs_seq2net(m); | 208 | rcu_read_unlock(); |
209 | |||
210 | up_read(&net->proc_cells_sem); | ||
211 | } | 209 | } |
212 | 210 | ||
213 | /* | 211 | /* |
@@ -225,8 +223,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) | |||
225 | } | 223 | } |
226 | 224 | ||
227 | /* display one cell per line on subsequent lines */ | 225 | /* display one cell per line on subsequent lines */ |
228 | seq_printf(m, "%3d %s\n", | 226 | seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); |
229 | atomic_read(&cell->usage), cell->name); | ||
230 | return 0; | 227 | return 0; |
231 | } | 228 | } |
232 | 229 | ||
@@ -279,13 +276,13 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, | |||
279 | if (strcmp(kbuf, "add") == 0) { | 276 | if (strcmp(kbuf, "add") == 0) { |
280 | struct afs_cell *cell; | 277 | struct afs_cell *cell; |
281 | 278 | ||
282 | cell = afs_cell_create(net, name, strlen(name), args, false); | 279 | cell = afs_lookup_cell(net, name, strlen(name), args, true); |
283 | if (IS_ERR(cell)) { | 280 | if (IS_ERR(cell)) { |
284 | ret = PTR_ERR(cell); | 281 | ret = PTR_ERR(cell); |
285 | goto done; | 282 | goto done; |
286 | } | 283 | } |
287 | 284 | ||
288 | afs_put_cell(net, cell); | 285 | set_bit(AFS_CELL_FL_NO_GC, &cell->flags); |
289 | printk("kAFS: Added new cell '%s'\n", name); | 286 | printk("kAFS: Added new cell '%s'\n", name); |
290 | } else { | 287 | } else { |
291 | goto inval; | 288 | goto inval; |
@@ -354,7 +351,7 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) | |||
354 | { | 351 | { |
355 | struct proc_dir_entry *dir; | 352 | struct proc_dir_entry *dir; |
356 | 353 | ||
357 | _enter("%p{%s}", cell, cell->name); | 354 | _enter("%p{%s},%p", cell, cell->name, net->proc_afs); |
358 | 355 | ||
359 | dir = proc_mkdir(cell->name, net->proc_afs); | 356 | dir = proc_mkdir(cell->name, net->proc_afs); |
360 | if (!dir) | 357 | if (!dir) |
diff --git a/fs/afs/super.c b/fs/afs/super.c index e62fb1bdadc6..3d53b78b350d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -200,10 +200,11 @@ static int afs_parse_options(struct afs_mount_params *params, | |||
200 | token = match_token(p, afs_options_list, args); | 200 | token = match_token(p, afs_options_list, args); |
201 | switch (token) { | 201 | switch (token) { |
202 | case afs_opt_cell: | 202 | case afs_opt_cell: |
203 | cell = afs_cell_lookup(params->net, | 203 | rcu_read_lock(); |
204 | args[0].from, | 204 | cell = afs_lookup_cell_rcu(params->net, |
205 | args[0].to - args[0].from, | 205 | args[0].from, |
206 | false); | 206 | args[0].to - args[0].from); |
207 | rcu_read_unlock(); | ||
207 | if (IS_ERR(cell)) | 208 | if (IS_ERR(cell)) |
208 | return PTR_ERR(cell); | 209 | return PTR_ERR(cell); |
209 | afs_put_cell(params->net, params->cell); | 210 | afs_put_cell(params->net, params->cell); |
@@ -308,7 +309,8 @@ static int afs_parse_device_name(struct afs_mount_params *params, | |||
308 | 309 | ||
309 | /* lookup the cell record */ | 310 | /* lookup the cell record */ |
310 | if (cellname || !params->cell) { | 311 | if (cellname || !params->cell) { |
311 | cell = afs_cell_lookup(params->net, cellname, cellnamesz, true); | 312 | cell = afs_lookup_cell(params->net, cellname, cellnamesz, |
313 | NULL, false); | ||
312 | if (IS_ERR(cell)) { | 314 | if (IS_ERR(cell)) { |
313 | printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", | 315 | printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", |
314 | cellnamesz, cellnamesz, cellname ?: ""); | 316 | cellnamesz, cellnamesz, cellname ?: ""); |
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 2830e4f48d85..e58e00ee9747 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c | |||
@@ -45,7 +45,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler, | |||
45 | struct afs_cell *cell = vnode->volume->cell; | 45 | struct afs_cell *cell = vnode->volume->cell; |
46 | size_t namelen; | 46 | size_t namelen; |
47 | 47 | ||
48 | namelen = strlen(cell->name); | 48 | namelen = cell->name_len; |
49 | if (size == 0) | 49 | if (size == 0) |
50 | return namelen; | 50 | return namelen; |
51 | if (namelen > size) | 51 | if (namelen > size) |