aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 14:41:22 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 14:41:22 -0500
commit487e2c9f44c4b5ea23bfe87bb34679f7297a0bce (patch)
treee9dcf16175078ae2bed9a2fc120e6bd0b28f48e9
parentb630a23a731a436f9edbd9fa00739aaa3e174c15 (diff)
parent98bf40cd99fcfed0705812b6cbdbb3b441a42970 (diff)
Merge tag 'afs-next-20171113' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull AFS updates from David Howells: "kAFS filesystem driver overhaul. The major points of the overhaul are: (1) Preliminary groundwork is laid for supporting network-namespacing of kAFS. The remainder of the namespacing work requires some way to pass namespace information to submounts triggered by an automount. This requires something like the mount overhaul that's in progress. (2) sockaddr_rxrpc is used in preference to in_addr for holding addresses internally and add support for talking to the YFS VL server. With this, kAFS can do everything over IPv6 as well as IPv4 if it's talking to servers that support it. (3) Callback handling is overhauled to be generally passive rather than active. 'Callbacks' are promises by the server to tell us about data and metadata changes. Callbacks are now checked when we next touch an inode rather than actively going and looking for it where possible. (4) File access permit caching is overhauled to store the caching information per-inode rather than per-directory, shared over subordinate files. Whilst older AFS servers only allow ACLs on directories (shared to the files in that directory), newer AFS servers break that restriction. To improve memory usage and to make it easier to do mass-key removal, permit combinations are cached and shared. (5) Cell database management is overhauled to allow lighter locks to be used and to make cell records autonomous state machines that look after getting their own DNS records and cleaning themselves up, in particular preventing races in acquiring and relinquishing the fscache token for the cell. (6) Volume caching is overhauled. The afs_vlocation record is got rid of to simplify things and the superblock is now keyed on the cell and the numeric volume ID only. The volume record is tied to a superblock and normal superblock management is used to mediate the lifetime of the volume fscache token. (7) File server record caching is overhauled to make server records independent of cells and volumes. A server can be in multiple cells (in such a case, the administrator must make sure that the VL services for all cells correctly reflect the volumes shared between those cells). Server records are now indexed using the UUID of the server rather than the address since a server can have multiple addresses. (8) File server rotation is overhauled to handle VMOVED, VBUSY (and similar), VOFFLINE and VNOVOL indications and to handle rotation both of servers and addresses of those servers. The rotation will also wait and retry if the server says it is busy. (9) Data writeback is overhauled. Each inode no longer stores a list of modified sections tagged with the key that authorised it in favour of noting the modified region of a page in page->private and storing a list of keys that made modifications in the inode. This simplifies things and allows other keys to be used to actually write to the server if a key that made a modification becomes useless. (10) Writable mmap() is implemented. This allows a kernel to be build entirely on AFS. Note that Pre AFS-3.4 servers are no longer supported, though this can be added back if necessary (AFS-3.4 was released in 1998)" * tag 'afs-next-20171113' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: (35 commits) afs: Protect call->state changes against signals afs: Trace page dirty/clean afs: Implement shared-writeable mmap afs: Get rid of the afs_writeback record afs: Introduce a file-private data record afs: Use a dynamic port if 7001 is in use afs: Fix directory read/modify race afs: Trace the sending of pages afs: Trace the initiation and completion of client calls afs: Fix documentation on # vs % prefix in mount source specification afs: Fix total-length calculation for multiple-page send afs: Only progress call state at end of Tx phase from rxrpc callback afs: Make use of the YFS service upgrade to fully support IPv6 afs: Overhaul volume and server record caching and fileserver rotation afs: Move server rotation code into its own file afs: Add an address list concept afs: Overhaul cell database management afs: Overhaul permit caching afs: Overhaul the callback handling afs: Rename struct afs_call server member to cm_server ...
-rw-r--r--Documentation/filesystems/afs.txt4
-rw-r--r--arch/mips/kernel/traps.c14
-rw-r--r--drivers/gpu/drm/drm_dp_aux_dev.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c10
-rw-r--r--drivers/media/platform/qcom/venus/hfi.c8
-rw-r--r--fs/afs/Makefile5
-rw-r--r--fs/afs/addr_list.c381
-rw-r--r--fs/afs/afs.h35
-rw-r--r--fs/afs/afs_fs.h6
-rw-r--r--fs/afs/afs_vl.h73
-rw-r--r--fs/afs/cache.c239
-rw-r--r--fs/afs/callback.c504
-rw-r--r--fs/afs/cell.c887
-rw-r--r--fs/afs/cmservice.c77
-rw-r--r--fs/afs/dir.c461
-rw-r--r--fs/afs/file.c194
-rw-r--r--fs/afs/flock.c159
-rw-r--r--fs/afs/fsclient.c830
-rw-r--r--fs/afs/inode.c177
-rw-r--r--fs/afs/internal.h905
-rw-r--r--fs/afs/main.c145
-rw-r--r--fs/afs/misc.c38
-rw-r--r--fs/afs/proc.c247
-rw-r--r--fs/afs/rotate.c715
-rw-r--r--fs/afs/rxrpc.c335
-rw-r--r--fs/afs/security.c378
-rw-r--r--fs/afs/server.c782
-rw-r--r--fs/afs/server_list.c153
-rw-r--r--fs/afs/super.c190
-rw-r--r--fs/afs/vlclient.c682
-rw-r--r--fs/afs/vlocation.c720
-rw-r--r--fs/afs/vnode.c1025
-rw-r--r--fs/afs/volume.c611
-rw-r--r--fs/afs/write.c709
-rw-r--r--fs/afs/xattr.c4
-rw-r--r--fs/btrfs/extent-tree.c27
-rw-r--r--fs/fscache/cookie.c2
-rw-r--r--fs/fscache/internal.h2
-rw-r--r--fs/fscache/main.c9
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/ocfs2/filecheck.c8
-rw-r--r--include/linux/wait_bit.h15
-rw-r--r--include/trace/events/afs.h293
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--kernel/sched/wait_bit.c18
-rw-r--r--mm/filemap.c1
47 files changed, 6769 insertions, 5324 deletions
diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt
index 060da408923b..ba99b5ac4fd8 100644
--- a/Documentation/filesystems/afs.txt
+++ b/Documentation/filesystems/afs.txt
@@ -91,8 +91,8 @@ Filesystems can be mounted anywhere by commands similar to the following:
91 mount -t afs "#root.cell." /afs/cambridge 91 mount -t afs "#root.cell." /afs/cambridge
92 92
93Where the initial character is either a hash or a percent symbol depending on 93Where the initial character is either a hash or a percent symbol depending on
94whether you definitely want a R/W volume (hash) or whether you'd prefer a R/O 94whether you definitely want a R/W volume (percent) or whether you'd prefer a
95volume, but are willing to use a R/W volume instead (percent). 95R/O volume, but are willing to use a R/W volume instead (hash).
96 96
97The name of the volume can be suffixes with ".backup" or ".readonly" to 97The name of the volume can be suffixes with ".backup" or ".readonly" to
98specify connection to only volumes of those types. 98specify connection to only volumes of those types.
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5669d3b8bd38..5d19ed07e99d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1233,18 +1233,6 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
1233 return NOTIFY_OK; 1233 return NOTIFY_OK;
1234} 1234}
1235 1235
1236static int wait_on_fp_mode_switch(atomic_t *p)
1237{
1238 /*
1239 * The FP mode for this task is currently being switched. That may
1240 * involve modifications to the format of this tasks FP context which
1241 * make it unsafe to proceed with execution for the moment. Instead,
1242 * schedule some other task.
1243 */
1244 schedule();
1245 return 0;
1246}
1247
1248static int enable_restore_fp_context(int msa) 1236static int enable_restore_fp_context(int msa)
1249{ 1237{
1250 int err, was_fpu_owner, prior_msa; 1238 int err, was_fpu_owner, prior_msa;
@@ -1254,7 +1242,7 @@ static int enable_restore_fp_context(int msa)
1254 * complete before proceeding. 1242 * complete before proceeding.
1255 */ 1243 */
1256 wait_on_atomic_t(&current->mm->context.fp_mode_switching, 1244 wait_on_atomic_t(&current->mm->context.fp_mode_switching,
1257 wait_on_fp_mode_switch, TASK_KILLABLE); 1245 atomic_t_wait, TASK_KILLABLE);
1258 1246
1259 if (!used_math()) { 1247 if (!used_math()) {
1260 /* First time FP context user. */ 1248 /* First time FP context user. */
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index d34e5096887a..053044201e31 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -263,12 +263,6 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_aux(struct drm_dp_aux *aux)
263 return aux_dev; 263 return aux_dev;
264} 264}
265 265
266static int auxdev_wait_atomic_t(atomic_t *p)
267{
268 schedule();
269 return 0;
270}
271
272void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux) 266void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux)
273{ 267{
274 struct drm_dp_aux_dev *aux_dev; 268 struct drm_dp_aux_dev *aux_dev;
@@ -283,7 +277,7 @@ void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux)
283 mutex_unlock(&aux_idr_mutex); 277 mutex_unlock(&aux_idr_mutex);
284 278
285 atomic_dec(&aux_dev->usecount); 279 atomic_dec(&aux_dev->usecount);
286 wait_on_atomic_t(&aux_dev->usecount, auxdev_wait_atomic_t, 280 wait_on_atomic_t(&aux_dev->usecount, atomic_t_wait,
287 TASK_UNINTERRUPTIBLE); 281 TASK_UNINTERRUPTIBLE);
288 282
289 minor = aux_dev->index; 283 minor = aux_dev->index;
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
index 828904b7d468..54fc571b1102 100644
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
@@ -271,13 +271,7 @@ struct igt_wakeup {
271 u32 seqno; 271 u32 seqno;
272}; 272};
273 273
274static int wait_atomic(atomic_t *p) 274static int wait_atomic_timeout(atomic_t *p, unsigned int mode)
275{
276 schedule();
277 return 0;
278}
279
280static int wait_atomic_timeout(atomic_t *p)
281{ 275{
282 return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; 276 return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT;
283} 277}
@@ -348,7 +342,7 @@ static void igt_wake_all_sync(atomic_t *ready,
348 atomic_set(ready, 0); 342 atomic_set(ready, 0);
349 wake_up_all(wq); 343 wake_up_all(wq);
350 344
351 wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); 345 wait_on_atomic_t(set, atomic_t_wait, TASK_UNINTERRUPTIBLE);
352 atomic_set(ready, count); 346 atomic_set(ready, count);
353 atomic_set(done, count); 347 atomic_set(done, count);
354} 348}
diff --git a/drivers/media/platform/qcom/venus/hfi.c b/drivers/media/platform/qcom/venus/hfi.c
index ba29fd4d4984..1baf78d3c02d 100644
--- a/drivers/media/platform/qcom/venus/hfi.c
+++ b/drivers/media/platform/qcom/venus/hfi.c
@@ -88,12 +88,6 @@ unlock:
88 return ret; 88 return ret;
89} 89}
90 90
91static int core_deinit_wait_atomic_t(atomic_t *p)
92{
93 schedule();
94 return 0;
95}
96
97int hfi_core_deinit(struct venus_core *core, bool blocking) 91int hfi_core_deinit(struct venus_core *core, bool blocking)
98{ 92{
99 int ret = 0, empty; 93 int ret = 0, empty;
@@ -112,7 +106,7 @@ int hfi_core_deinit(struct venus_core *core, bool blocking)
112 106
113 if (!empty) { 107 if (!empty) {
114 mutex_unlock(&core->lock); 108 mutex_unlock(&core->lock);
115 wait_on_atomic_t(&core->insts_count, core_deinit_wait_atomic_t, 109 wait_on_atomic_t(&core->insts_count, atomic_t_wait,
116 TASK_UNINTERRUPTIBLE); 110 TASK_UNINTERRUPTIBLE);
117 mutex_lock(&core->lock); 111 mutex_lock(&core->lock);
118 } 112 }
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 641148208e90..45b7fc405fa6 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -7,6 +7,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o
7 7
8kafs-objs := \ 8kafs-objs := \
9 $(afs-cache-y) \ 9 $(afs-cache-y) \
10 addr_list.o \
10 callback.o \ 11 callback.o \
11 cell.o \ 12 cell.o \
12 cmservice.o \ 13 cmservice.o \
@@ -19,14 +20,14 @@ kafs-objs := \
19 misc.o \ 20 misc.o \
20 mntpt.o \ 21 mntpt.o \
21 proc.o \ 22 proc.o \
23 rotate.o \
22 rxrpc.o \ 24 rxrpc.o \
23 security.o \ 25 security.o \
24 server.o \ 26 server.o \
27 server_list.o \
25 super.o \ 28 super.o \
26 netdevices.o \ 29 netdevices.o \
27 vlclient.o \ 30 vlclient.o \
28 vlocation.o \
29 vnode.o \
30 volume.o \ 31 volume.o \
31 write.o \ 32 write.o \
32 xattr.o 33 xattr.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
new file mode 100644
index 000000000000..a537368ba0db
--- /dev/null
+++ b/fs/afs/addr_list.c
@@ -0,0 +1,381 @@
1/* Server address list management
2 *
3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/slab.h>
13#include <linux/ctype.h>
14#include <linux/dns_resolver.h>
15#include <linux/inet.h>
16#include <keys/rxrpc-type.h>
17#include "internal.h"
18#include "afs_fs.h"
19
20//#define AFS_MAX_ADDRESSES
21// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /
22// sizeof(struct sockaddr_rxrpc)))
23#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
24
25/*
26 * Release an address list.
27 */
28void afs_put_addrlist(struct afs_addr_list *alist)
29{
30 if (alist && refcount_dec_and_test(&alist->usage))
31 call_rcu(&alist->rcu, (rcu_callback_t)kfree);
32}
33
34/*
35 * Allocate an address list.
36 */
37struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
38 unsigned short service,
39 unsigned short port)
40{
41 struct afs_addr_list *alist;
42 unsigned int i;
43
44 _enter("%u,%u,%u", nr, service, port);
45
46 alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr,
47 GFP_KERNEL);
48 if (!alist)
49 return NULL;
50
51 refcount_set(&alist->usage, 1);
52
53 for (i = 0; i < nr; i++) {
54 struct sockaddr_rxrpc *srx = &alist->addrs[i];
55 srx->srx_family = AF_RXRPC;
56 srx->srx_service = service;
57 srx->transport_type = SOCK_DGRAM;
58 srx->transport_len = sizeof(srx->transport.sin6);
59 srx->transport.sin6.sin6_family = AF_INET6;
60 srx->transport.sin6.sin6_port = htons(port);
61 }
62
63 return alist;
64}
65
66/*
67 * Parse a text string consisting of delimited addresses.
68 */
69struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
70 char delim,
71 unsigned short service,
72 unsigned short port)
73{
74 struct afs_addr_list *alist;
75 const char *p, *end = text + len;
76 unsigned int nr = 0;
77
78 _enter("%*.*s,%c", (int)len, (int)len, text, delim);
79
80 if (!len)
81 return ERR_PTR(-EDESTADDRREQ);
82
83 if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
84 delim = ',';
85
86 /* Count the addresses */
87 p = text;
88 do {
89 if (!*p)
90 return ERR_PTR(-EINVAL);
91 if (*p == delim)
92 continue;
93 nr++;
94 if (*p == '[') {
95 p++;
96 if (p == end)
97 return ERR_PTR(-EINVAL);
98 p = memchr(p, ']', end - p);
99 if (!p)
100 return ERR_PTR(-EINVAL);
101 p++;
102 if (p >= end)
103 break;
104 }
105
106 p = memchr(p, delim, end - p);
107 if (!p)
108 break;
109 p++;
110 } while (p < end);
111
112 _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
113 if (nr > AFS_MAX_ADDRESSES)
114 nr = AFS_MAX_ADDRESSES;
115
116 alist = afs_alloc_addrlist(nr, service, port);
117 if (!alist)
118 return ERR_PTR(-ENOMEM);
119
120 /* Extract the addresses */
121 p = text;
122 do {
123 struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
124 char tdelim = delim;
125
126 if (*p == delim) {
127 p++;
128 continue;
129 }
130
131 if (*p == '[') {
132 p++;
133 tdelim = ']';
134 }
135
136 if (in4_pton(p, end - p,
137 (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
138 tdelim, &p)) {
139 srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
140 srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
141 srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
142 } else if (in6_pton(p, end - p,
143 srx->transport.sin6.sin6_addr.s6_addr,
144 tdelim, &p)) {
145 /* Nothing to do */
146 } else {
147 goto bad_address;
148 }
149
150 if (tdelim == ']') {
151 if (p == end || *p != ']')
152 goto bad_address;
153 p++;
154 }
155
156 if (p < end) {
157 if (*p == '+') {
158 /* Port number specification "+1234" */
159 unsigned int xport = 0;
160 p++;
161 if (p >= end || !isdigit(*p))
162 goto bad_address;
163 do {
164 xport *= 10;
165 xport += *p - '0';
166 if (xport > 65535)
167 goto bad_address;
168 p++;
169 } while (p < end && isdigit(*p));
170 srx->transport.sin6.sin6_port = htons(xport);
171 } else if (*p == delim) {
172 p++;
173 } else {
174 goto bad_address;
175 }
176 }
177
178 alist->nr_addrs++;
179 } while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
180
181 _leave(" = [nr %u]", alist->nr_addrs);
182 return alist;
183
184bad_address:
185 kfree(alist);
186 return ERR_PTR(-EINVAL);
187}
188
189/*
190 * Compare old and new address lists to see if there's been any change.
191 * - How to do this in better than O(Nlog(N)) time?
192 * - We don't really want to sort the address list, but would rather take the
193 * list as we got it so as not to undo record rotation by the DNS server.
194 */
195#if 0
196static int afs_cmp_addr_list(const struct afs_addr_list *a1,
197 const struct afs_addr_list *a2)
198{
199}
200#endif
201
202/*
203 * Perform a DNS query for VL servers and build a up an address list.
204 */
205struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
206{
207 struct afs_addr_list *alist;
208 char *vllist = NULL;
209 int ret;
210
211 _enter("%s", cell->name);
212
213 ret = dns_query("afsdb", cell->name, cell->name_len,
214 "ipv4", &vllist, _expiry);
215 if (ret < 0)
216 return ERR_PTR(ret);
217
218 alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
219 VL_SERVICE, AFS_VL_PORT);
220 if (IS_ERR(alist)) {
221 kfree(vllist);
222 if (alist != ERR_PTR(-ENOMEM))
223 pr_err("Failed to parse DNS data\n");
224 return alist;
225 }
226
227 kfree(vllist);
228 return alist;
229}
230
231/*
232 * Merge an IPv4 entry into a fileserver address list.
233 */
234void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
235{
236 struct sockaddr_in6 *a;
237 __be16 xport = htons(port);
238 int i;
239
240 for (i = 0; i < alist->nr_ipv4; i++) {
241 a = &alist->addrs[i].transport.sin6;
242 if (xdr == a->sin6_addr.s6_addr32[3] &&
243 xport == a->sin6_port)
244 return;
245 if (xdr == a->sin6_addr.s6_addr32[3] &&
246 xport < a->sin6_port)
247 break;
248 if (xdr < a->sin6_addr.s6_addr32[3])
249 break;
250 }
251
252 if (i < alist->nr_addrs)
253 memmove(alist->addrs + i + 1,
254 alist->addrs + i,
255 sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
256
257 a = &alist->addrs[i].transport.sin6;
258 a->sin6_port = xport;
259 a->sin6_addr.s6_addr32[0] = 0;
260 a->sin6_addr.s6_addr32[1] = 0;
261 a->sin6_addr.s6_addr32[2] = htonl(0xffff);
262 a->sin6_addr.s6_addr32[3] = xdr;
263 alist->nr_ipv4++;
264 alist->nr_addrs++;
265}
266
267/*
268 * Merge an IPv6 entry into a fileserver address list.
269 */
270void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
271{
272 struct sockaddr_in6 *a;
273 __be16 xport = htons(port);
274 int i, diff;
275
276 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
277 a = &alist->addrs[i].transport.sin6;
278 diff = memcmp(xdr, &a->sin6_addr, 16);
279 if (diff == 0 &&
280 xport == a->sin6_port)
281 return;
282 if (diff == 0 &&
283 xport < a->sin6_port)
284 break;
285 if (diff < 0)
286 break;
287 }
288
289 if (i < alist->nr_addrs)
290 memmove(alist->addrs + i + 1,
291 alist->addrs + i,
292 sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
293
294 a = &alist->addrs[i].transport.sin6;
295 a->sin6_port = xport;
296 a->sin6_addr.s6_addr32[0] = xdr[0];
297 a->sin6_addr.s6_addr32[1] = xdr[1];
298 a->sin6_addr.s6_addr32[2] = xdr[2];
299 a->sin6_addr.s6_addr32[3] = xdr[3];
300 alist->nr_addrs++;
301}
302
303/*
304 * Get an address to try.
305 */
306bool afs_iterate_addresses(struct afs_addr_cursor *ac)
307{
308 _enter("%hu+%hd", ac->start, (short)ac->index);
309
310 if (!ac->alist)
311 return false;
312
313 if (ac->begun) {
314 ac->index++;
315 if (ac->index == ac->alist->nr_addrs)
316 ac->index = 0;
317
318 if (ac->index == ac->start) {
319 ac->error = -EDESTADDRREQ;
320 return false;
321 }
322 }
323
324 ac->begun = true;
325 ac->responded = false;
326 ac->addr = &ac->alist->addrs[ac->index];
327 return true;
328}
329
330/*
331 * Release an address list cursor.
332 */
333int afs_end_cursor(struct afs_addr_cursor *ac)
334{
335 if (ac->responded && ac->index != ac->start)
336 WRITE_ONCE(ac->alist->index, ac->index);
337
338 afs_put_addrlist(ac->alist);
339 ac->alist = NULL;
340 return ac->error;
341}
342
343/*
344 * Set the address cursor for iterating over VL servers.
345 */
346int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
347{
348 struct afs_addr_list *alist;
349 int ret;
350
351 if (!rcu_access_pointer(cell->vl_addrs)) {
352 ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
353 TASK_INTERRUPTIBLE);
354 if (ret < 0)
355 return ret;
356
357 if (!rcu_access_pointer(cell->vl_addrs) &&
358 ktime_get_real_seconds() < cell->dns_expiry)
359 return cell->error;
360 }
361
362 read_lock(&cell->vl_addrs_lock);
363 alist = rcu_dereference_protected(cell->vl_addrs,
364 lockdep_is_held(&cell->vl_addrs_lock));
365 if (alist->nr_addrs > 0)
366 afs_get_addrlist(alist);
367 else
368 alist = NULL;
369 read_unlock(&cell->vl_addrs_lock);
370
371 if (!alist)
372 return -EDESTADDRREQ;
373
374 ac->alist = alist;
375 ac->addr = NULL;
376 ac->start = READ_ONCE(alist->index);
377 ac->index = ac->start;
378 ac->error = 0;
379 ac->begun = false;
380 return 0;
381}
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 3c462ff6db63..b94d0edc2b78 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -14,11 +14,14 @@
14 14
15#include <linux/in.h> 15#include <linux/in.h>
16 16
17#define AFS_MAXCELLNAME 64 /* maximum length of a cell name */ 17#define AFS_MAXCELLNAME 64 /* Maximum length of a cell name */
18#define AFS_MAXVOLNAME 64 /* maximum length of a volume name */ 18#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */
19#define AFSNAMEMAX 256 /* maximum length of a filename plus NUL */ 19#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */
20#define AFSPATHMAX 1024 /* maximum length of a pathname plus NUL */ 20#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */
21#define AFSOPAQUEMAX 1024 /* maximum length of an opaque field */ 21#define AFS_MAXTYPES 3 /* Maximum number of volume types */
22#define AFSNAMEMAX 256 /* Maximum length of a filename plus NUL */
23#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
24#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
22 25
23typedef unsigned afs_volid_t; 26typedef unsigned afs_volid_t;
24typedef unsigned afs_vnodeid_t; 27typedef unsigned afs_vnodeid_t;
@@ -72,6 +75,15 @@ struct afs_callback {
72 75
73#define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ 76#define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */
74 77
78struct afs_uuid {
79 __be32 time_low; /* low part of timestamp */
80 __be16 time_mid; /* mid part of timestamp */
81 __be16 time_hi_and_version; /* high part of timestamp and version */
82 __s8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
83 __s8 clock_seq_low; /* clock seq low */
84 __s8 node[6]; /* spatially unique node ID (MAC addr) */
85};
86
75/* 87/*
76 * AFS volume information 88 * AFS volume information
77 */ 89 */
@@ -124,7 +136,6 @@ struct afs_file_status {
124 afs_access_t caller_access; /* access rights for authenticated caller */ 136 afs_access_t caller_access; /* access rights for authenticated caller */
125 afs_access_t anon_access; /* access rights for unauthenticated caller */ 137 afs_access_t anon_access; /* access rights for unauthenticated caller */
126 umode_t mode; /* UNIX mode */ 138 umode_t mode; /* UNIX mode */
127 struct afs_fid parent; /* parent dir ID for non-dirs only */
128 time_t mtime_client; /* last time client changed data */ 139 time_t mtime_client; /* last time client changed data */
129 time_t mtime_server; /* last time server changed data */ 140 time_t mtime_server; /* last time server changed data */
130 s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ 141 s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
@@ -167,4 +178,16 @@ struct afs_volume_status {
167 178
168#define AFS_BLOCK_SIZE 1024 179#define AFS_BLOCK_SIZE 1024
169 180
181/*
182 * XDR encoding of UUID in AFS.
183 */
184struct afs_uuid__xdr {
185 __be32 time_low;
186 __be32 time_mid;
187 __be32 time_hi_and_version;
188 __be32 clock_seq_hi_and_reserved;
189 __be32 clock_seq_low;
190 __be32 node[6];
191};
192
170#endif /* AFS_H */ 193#endif /* AFS_H */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index eb647323d8f0..d47b6d01e4c0 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -37,9 +37,12 @@ enum AFS_FS_Operations {
37 FSLOOKUP = 161, /* AFS lookup file in directory */ 37 FSLOOKUP = 161, /* AFS lookup file in directory */
38 FSFETCHDATA64 = 65537, /* AFS Fetch file data */ 38 FSFETCHDATA64 = 65537, /* AFS Fetch file data */
39 FSSTOREDATA64 = 65538, /* AFS Store file data */ 39 FSSTOREDATA64 = 65538, /* AFS Store file data */
40 FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */
41 FSGETCAPABILITIES = 65540, /* Probe and get the capabilities of a fileserver */
40}; 42};
41 43
42enum AFS_FS_Errors { 44enum AFS_FS_Errors {
45 VRESTARTING = -100, /* Server is restarting */
43 VSALVAGE = 101, /* volume needs salvaging */ 46 VSALVAGE = 101, /* volume needs salvaging */
44 VNOVNODE = 102, /* no such file/dir (vnode) */ 47 VNOVNODE = 102, /* no such file/dir (vnode) */
45 VNOVOL = 103, /* no such volume or volume unavailable */ 48 VNOVOL = 103, /* no such volume or volume unavailable */
@@ -51,6 +54,9 @@ enum AFS_FS_Errors {
51 VOVERQUOTA = 109, /* volume's maximum quota exceeded */ 54 VOVERQUOTA = 109, /* volume's maximum quota exceeded */
52 VBUSY = 110, /* volume is temporarily unavailable */ 55 VBUSY = 110, /* volume is temporarily unavailable */
53 VMOVED = 111, /* volume moved to new server - ask this FS where */ 56 VMOVED = 111, /* volume moved to new server - ask this FS where */
57 VIO = 112, /* I/O error in volume */
58 VSALVAGING = 113, /* Volume is being salvaged */
59 VRESTRICTED = 120, /* Volume is restricted from using */
54}; 60};
55 61
56#endif /* AFS_FS_H */ 62#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 800f607ffaf5..e3c4688f573b 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -16,11 +16,17 @@
16 16
17#define AFS_VL_PORT 7003 /* volume location service port */ 17#define AFS_VL_PORT 7003 /* volume location service port */
18#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */ 18#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
19#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
19 20
20enum AFSVL_Operations { 21enum AFSVL_Operations {
21 VLGETENTRYBYID = 503, /* AFS Get Cache Entry By ID operation ID */ 22 VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */
22 VLGETENTRYBYNAME = 504, /* AFS Get Cache Entry By Name operation ID */ 23 VLGETENTRYBYNAME = 504, /* AFS Get VLDB entry by name */
23 VLPROBE = 514, /* AFS Probe Volume Location Service operation ID */ 24 VLPROBE = 514, /* AFS probe VL service */
25 VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */
26 VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */
27 VLGETADDRSU = 533, /* AFS Get addrs for fileserver */
28 YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */
29 VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */
24}; 30};
25 31
26enum AFSVL_Errors { 32enum AFSVL_Errors {
@@ -54,6 +60,19 @@ enum AFSVL_Errors {
54 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */ 60 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
55}; 61};
56 62
63enum {
64 YFS_SERVER_INDEX = 0,
65 YFS_SERVER_UUID = 1,
66 YFS_SERVER_ENDPOINT = 2,
67};
68
69enum {
70 YFS_ENDPOINT_IPV4 = 0,
71 YFS_ENDPOINT_IPV6 = 1,
72};
73
74#define YFS_MAXENDPOINTS 16
75
57/* 76/*
58 * maps to "struct vldbentry" in vvl-spec.pdf 77 * maps to "struct vldbentry" in vvl-spec.pdf
59 */ 78 */
@@ -74,11 +93,57 @@ struct afs_vldbentry {
74 struct in_addr addr; /* server address */ 93 struct in_addr addr; /* server address */
75 unsigned partition; /* partition ID on this server */ 94 unsigned partition; /* partition ID on this server */
76 unsigned flags; /* server specific flags */ 95 unsigned flags; /* server specific flags */
77#define AFS_VLSF_NEWREPSITE 0x0001 /* unused */ 96#define AFS_VLSF_NEWREPSITE 0x0001 /* Ignore all 'non-new' servers */
78#define AFS_VLSF_ROVOL 0x0002 /* this server holds a R/O instance of the volume */ 97#define AFS_VLSF_ROVOL 0x0002 /* this server holds a R/O instance of the volume */
79#define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */ 98#define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */
80#define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */ 99#define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */
100#define AFS_VLSF_UUID 0x0010 /* This server is referred to by its UUID */
101#define AFS_VLSF_DONTUSE 0x0020 /* This server ref should be ignored */
81 } servers[8]; 102 } servers[8];
82}; 103};
83 104
105#define AFS_VLDB_MAXNAMELEN 65
106
107
108struct afs_ListAddrByAttributes__xdr {
109 __be32 Mask;
110#define AFS_VLADDR_IPADDR 0x1 /* Match by ->ipaddr */
111#define AFS_VLADDR_INDEX 0x2 /* Match by ->index */
112#define AFS_VLADDR_UUID 0x4 /* Match by ->uuid */
113 __be32 ipaddr;
114 __be32 index;
115 __be32 spare;
116 struct afs_uuid__xdr uuid;
117};
118
119struct afs_uvldbentry__xdr {
120 __be32 name[AFS_VLDB_MAXNAMELEN];
121 __be32 nServers;
122 struct afs_uuid__xdr serverNumber[AFS_NMAXNSERVERS];
123 __be32 serverUnique[AFS_NMAXNSERVERS];
124 __be32 serverPartition[AFS_NMAXNSERVERS];
125 __be32 serverFlags[AFS_NMAXNSERVERS];
126 __be32 volumeId[AFS_MAXTYPES];
127 __be32 cloneId;
128 __be32 flags;
129 __be32 spares1;
130 __be32 spares2;
131 __be32 spares3;
132 __be32 spares4;
133 __be32 spares5;
134 __be32 spares6;
135 __be32 spares7;
136 __be32 spares8;
137 __be32 spares9;
138};
139
140struct afs_address_list {
141 refcount_t usage;
142 unsigned int version;
143 unsigned int nr_addrs;
144 struct sockaddr_rxrpc addrs[];
145};
146
147extern void afs_put_address_list(struct afs_address_list *alist);
148
84#endif /* AFS_VL_H */ 149#endif /* AFS_VL_H */
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index 1fe855191261..f62ff71d28c9 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -14,19 +14,6 @@
14 14
15static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, 15static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
16 void *buffer, uint16_t buflen); 16 void *buffer, uint16_t buflen);
17static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
18 void *buffer, uint16_t buflen);
19static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
20 const void *buffer,
21 uint16_t buflen);
22
23static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
24 void *buffer, uint16_t buflen);
25static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
26 void *buffer, uint16_t buflen);
27static enum fscache_checkaux afs_vlocation_cache_check_aux(
28 void *cookie_netfs_data, const void *buffer, uint16_t buflen);
29
30static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, 17static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
31 void *buffer, uint16_t buflen); 18 void *buffer, uint16_t buflen);
32 19
@@ -42,23 +29,13 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
42 29
43struct fscache_netfs afs_cache_netfs = { 30struct fscache_netfs afs_cache_netfs = {
44 .name = "afs", 31 .name = "afs",
45 .version = 0, 32 .version = 1,
46}; 33};
47 34
48struct fscache_cookie_def afs_cell_cache_index_def = { 35struct fscache_cookie_def afs_cell_cache_index_def = {
49 .name = "AFS.cell", 36 .name = "AFS.cell",
50 .type = FSCACHE_COOKIE_TYPE_INDEX, 37 .type = FSCACHE_COOKIE_TYPE_INDEX,
51 .get_key = afs_cell_cache_get_key, 38 .get_key = afs_cell_cache_get_key,
52 .get_aux = afs_cell_cache_get_aux,
53 .check_aux = afs_cell_cache_check_aux,
54};
55
56struct fscache_cookie_def afs_vlocation_cache_index_def = {
57 .name = "AFS.vldb",
58 .type = FSCACHE_COOKIE_TYPE_INDEX,
59 .get_key = afs_vlocation_cache_get_key,
60 .get_aux = afs_vlocation_cache_get_aux,
61 .check_aux = afs_vlocation_cache_check_aux,
62}; 39};
63 40
64struct fscache_cookie_def afs_volume_cache_index_def = { 41struct fscache_cookie_def afs_volume_cache_index_def = {
@@ -95,150 +72,26 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
95 return klen; 72 return klen;
96} 73}
97 74
98/*
99 * provide new auxiliary cache data
100 */
101static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
102 void *buffer, uint16_t bufmax)
103{
104 const struct afs_cell *cell = cookie_netfs_data;
105 uint16_t dlen;
106
107 _enter("%p,%p,%u", cell, buffer, bufmax);
108
109 dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
110 dlen = min(dlen, bufmax);
111 dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
112
113 memcpy(buffer, cell->vl_addrs, dlen);
114 return dlen;
115}
116
117/*
118 * check that the auxiliary data indicates that the entry is still valid
119 */
120static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
121 const void *buffer,
122 uint16_t buflen)
123{
124 _leave(" = OKAY");
125 return FSCACHE_CHECKAUX_OKAY;
126}
127
128/*****************************************************************************/
129/*
130 * set the key for the index entry
131 */
132static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
133 void *buffer, uint16_t bufmax)
134{
135 const struct afs_vlocation *vlocation = cookie_netfs_data;
136 uint16_t klen;
137
138 _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
139
140 klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
141 if (klen > bufmax)
142 return 0;
143
144 memcpy(buffer, vlocation->vldb.name, klen);
145
146 _leave(" = %u", klen);
147 return klen;
148}
149
150/*
151 * provide new auxiliary cache data
152 */
153static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
154 void *buffer, uint16_t bufmax)
155{
156 const struct afs_vlocation *vlocation = cookie_netfs_data;
157 uint16_t dlen;
158
159 _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
160
161 dlen = sizeof(struct afs_cache_vlocation);
162 dlen -= offsetof(struct afs_cache_vlocation, nservers);
163 if (dlen > bufmax)
164 return 0;
165
166 memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
167
168 _leave(" = %u", dlen);
169 return dlen;
170}
171
172/*
173 * check that the auxiliary data indicates that the entry is still valid
174 */
175static
176enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
177 const void *buffer,
178 uint16_t buflen)
179{
180 const struct afs_cache_vlocation *cvldb;
181 struct afs_vlocation *vlocation = cookie_netfs_data;
182 uint16_t dlen;
183
184 _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
185
186 /* check the size of the data is what we're expecting */
187 dlen = sizeof(struct afs_cache_vlocation);
188 dlen -= offsetof(struct afs_cache_vlocation, nservers);
189 if (dlen != buflen)
190 return FSCACHE_CHECKAUX_OBSOLETE;
191
192 cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
193
194 /* if what's on disk is more valid than what's in memory, then use the
195 * VL record from the cache */
196 if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
197 memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
198 vlocation->valid = 1;
199 _leave(" = SUCCESS [c->m]");
200 return FSCACHE_CHECKAUX_OKAY;
201 }
202
203 /* need to update the cache if the cached info differs */
204 if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
205 /* delete if the volume IDs for this name differ */
206 if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
207 sizeof(cvldb->vid)) != 0
208 ) {
209 _leave(" = OBSOLETE");
210 return FSCACHE_CHECKAUX_OBSOLETE;
211 }
212
213 _leave(" = UPDATE");
214 return FSCACHE_CHECKAUX_NEEDS_UPDATE;
215 }
216
217 _leave(" = OKAY");
218 return FSCACHE_CHECKAUX_OKAY;
219}
220
221/*****************************************************************************/ 75/*****************************************************************************/
222/* 76/*
223 * set the key for the volume index entry 77 * set the key for the volume index entry
224 */ 78 */
225static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, 79static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
226 void *buffer, uint16_t bufmax) 80 void *buffer, uint16_t bufmax)
227{ 81{
228 const struct afs_volume *volume = cookie_netfs_data; 82 const struct afs_volume *volume = cookie_netfs_data;
229 uint16_t klen; 83 struct {
84 u64 volid;
85 } __packed key;
230 86
231 _enter("{%u},%p,%u", volume->type, buffer, bufmax); 87 _enter("{%u},%p,%u", volume->type, buffer, bufmax);
232 88
233 klen = sizeof(volume->type); 89 if (bufmax < sizeof(key))
234 if (klen > bufmax)
235 return 0; 90 return 0;
236 91
237 memcpy(buffer, &volume->type, sizeof(volume->type)); 92 key.volid = volume->vid;
238 93 memcpy(buffer, &key, sizeof(key));
239 _leave(" = %u", klen); 94 return sizeof(key);
240 return klen;
241
242} 95}
243 96
244/*****************************************************************************/ 97/*****************************************************************************/
@@ -249,20 +102,25 @@ static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
249 void *buffer, uint16_t bufmax) 102 void *buffer, uint16_t bufmax)
250{ 103{
251 const struct afs_vnode *vnode = cookie_netfs_data; 104 const struct afs_vnode *vnode = cookie_netfs_data;
252 uint16_t klen; 105 struct {
106 u32 vnode_id[3];
107 } __packed key;
253 108
254 _enter("{%x,%x,%llx},%p,%u", 109 _enter("{%x,%x,%llx},%p,%u",
255 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, 110 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
256 buffer, bufmax); 111 buffer, bufmax);
257 112
258 klen = sizeof(vnode->fid.vnode); 113 /* Allow for a 96-bit key */
259 if (klen > bufmax) 114 memset(&key, 0, sizeof(key));
260 return 0; 115 key.vnode_id[0] = vnode->fid.vnode;
116 key.vnode_id[1] = 0;
117 key.vnode_id[2] = 0;
261 118
262 memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); 119 if (sizeof(key) > bufmax)
120 return 0;
263 121
264 _leave(" = %u", klen); 122 memcpy(buffer, &key, sizeof(key));
265 return klen; 123 return sizeof(key);
266} 124}
267 125
268/* 126/*
@@ -280,6 +138,11 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
280 *size = vnode->status.size; 138 *size = vnode->status.size;
281} 139}
282 140
141struct afs_vnode_cache_aux {
142 u64 data_version;
143 u32 fid_unique;
144} __packed;
145
283/* 146/*
284 * provide new auxiliary cache data 147 * provide new auxiliary cache data
285 */ 148 */
@@ -287,23 +150,21 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
287 void *buffer, uint16_t bufmax) 150 void *buffer, uint16_t bufmax)
288{ 151{
289 const struct afs_vnode *vnode = cookie_netfs_data; 152 const struct afs_vnode *vnode = cookie_netfs_data;
290 uint16_t dlen; 153 struct afs_vnode_cache_aux aux;
291 154
292 _enter("{%x,%x,%Lx},%p,%u", 155 _enter("{%x,%x,%Lx},%p,%u",
293 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, 156 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
294 buffer, bufmax); 157 buffer, bufmax);
295 158
296 dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); 159 memset(&aux, 0, sizeof(aux));
297 if (dlen > bufmax) 160 aux.data_version = vnode->status.data_version;
298 return 0; 161 aux.fid_unique = vnode->fid.unique;
299 162
300 memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); 163 if (bufmax < sizeof(aux))
301 buffer += sizeof(vnode->fid.unique); 164 return 0;
302 memcpy(buffer, &vnode->status.data_version,
303 sizeof(vnode->status.data_version));
304 165
305 _leave(" = %u", dlen); 166 memcpy(buffer, &aux, sizeof(aux));
306 return dlen; 167 return sizeof(aux);
307} 168}
308 169
309/* 170/*
@@ -314,43 +175,29 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
314 uint16_t buflen) 175 uint16_t buflen)
315{ 176{
316 struct afs_vnode *vnode = cookie_netfs_data; 177 struct afs_vnode *vnode = cookie_netfs_data;
317 uint16_t dlen; 178 struct afs_vnode_cache_aux aux;
318 179
319 _enter("{%x,%x,%llx},%p,%u", 180 _enter("{%x,%x,%llx},%p,%u",
320 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, 181 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
321 buffer, buflen); 182 buffer, buflen);
322 183
184 memcpy(&aux, buffer, sizeof(aux));
185
323 /* check the size of the data is what we're expecting */ 186 /* check the size of the data is what we're expecting */
324 dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); 187 if (buflen != sizeof(aux)) {
325 if (dlen != buflen) { 188 _leave(" = OBSOLETE [len %hx != %zx]", buflen, sizeof(aux));
326 _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
327 return FSCACHE_CHECKAUX_OBSOLETE; 189 return FSCACHE_CHECKAUX_OBSOLETE;
328 } 190 }
329 191
330 if (memcmp(buffer, 192 if (vnode->fid.unique != aux.fid_unique) {
331 &vnode->fid.unique,
332 sizeof(vnode->fid.unique)
333 ) != 0) {
334 unsigned unique;
335
336 memcpy(&unique, buffer, sizeof(unique));
337
338 _leave(" = OBSOLETE [uniq %x != %x]", 193 _leave(" = OBSOLETE [uniq %x != %x]",
339 unique, vnode->fid.unique); 194 aux.fid_unique, vnode->fid.unique);
340 return FSCACHE_CHECKAUX_OBSOLETE; 195 return FSCACHE_CHECKAUX_OBSOLETE;
341 } 196 }
342 197
343 if (memcmp(buffer + sizeof(vnode->fid.unique), 198 if (vnode->status.data_version != aux.data_version) {
344 &vnode->status.data_version,
345 sizeof(vnode->status.data_version)
346 ) != 0) {
347 afs_dataversion_t version;
348
349 memcpy(&version, buffer + sizeof(vnode->fid.unique),
350 sizeof(version));
351
352 _leave(" = OBSOLETE [vers %llx != %llx]", 199 _leave(" = OBSOLETE [vers %llx != %llx]",
353 version, vnode->status.data_version); 200 aux.data_version, vnode->status.data_version);
354 return FSCACHE_CHECKAUX_OBSOLETE; 201 return FSCACHE_CHECKAUX_OBSOLETE;
355 } 202 }
356 203
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 25d404d22cae..f4291b576054 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -20,118 +20,151 @@
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include "internal.h" 21#include "internal.h"
22 22
23#if 0
24unsigned afs_vnode_update_timeout = 10;
25#endif /* 0 */
26
27#define afs_breakring_space(server) \
28 CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail, \
29 ARRAY_SIZE((server)->cb_break))
30
31//static void afs_callback_updater(struct work_struct *);
32
33static struct workqueue_struct *afs_callback_update_worker;
34
35/* 23/*
36 * allow the fileserver to request callback state (re-)initialisation 24 * Set up an interest-in-callbacks record for a volume on a server and
25 * register it with the server.
26 * - Called with volume->server_sem held.
37 */ 27 */
38void afs_init_callback_state(struct afs_server *server) 28int afs_register_server_cb_interest(struct afs_vnode *vnode,
29 struct afs_server_entry *entry)
39{ 30{
40 struct afs_vnode *vnode; 31 struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
41 32 struct afs_server *server = entry->server;
42 _enter("{%p}", server); 33
34again:
35 vcbi = vnode->cb_interest;
36 if (vcbi) {
37 if (vcbi == cbi)
38 return 0;
39
40 if (cbi && vcbi->server == cbi->server) {
41 write_seqlock(&vnode->cb_lock);
42 vnode->cb_interest = afs_get_cb_interest(cbi);
43 write_sequnlock(&vnode->cb_lock);
44 afs_put_cb_interest(afs_v2net(vnode), cbi);
45 return 0;
46 }
43 47
44 spin_lock(&server->cb_lock); 48 if (!cbi && vcbi->server == server) {
49 afs_get_cb_interest(vcbi);
50 x = cmpxchg(&entry->cb_interest, cbi, vcbi);
51 if (x != cbi) {
52 cbi = x;
53 afs_put_cb_interest(afs_v2net(vnode), vcbi);
54 goto again;
55 }
56 return 0;
57 }
58 }
45 59
46 /* kill all the promises on record from this server */ 60 if (!cbi) {
47 while (!RB_EMPTY_ROOT(&server->cb_promises)) { 61 new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL);
48 vnode = rb_entry(server->cb_promises.rb_node, 62 if (!new)
49 struct afs_vnode, cb_promise); 63 return -ENOMEM;
50 _debug("UNPROMISE { vid=%x:%u uq=%u}", 64
51 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); 65 refcount_set(&new->usage, 1);
52 rb_erase(&vnode->cb_promise, &server->cb_promises); 66 new->sb = vnode->vfs_inode.i_sb;
53 vnode->cb_promised = false; 67 new->vid = vnode->volume->vid;
68 new->server = afs_get_server(server);
69 INIT_LIST_HEAD(&new->cb_link);
70
71 write_lock(&server->cb_break_lock);
72 list_add_tail(&new->cb_link, &server->cb_interests);
73 write_unlock(&server->cb_break_lock);
74
75 x = cmpxchg(&entry->cb_interest, cbi, new);
76 if (x == cbi) {
77 cbi = new;
78 } else {
79 cbi = x;
80 afs_put_cb_interest(afs_v2net(vnode), new);
81 }
54 } 82 }
55 83
56 spin_unlock(&server->cb_lock); 84 ASSERT(cbi);
57 _leave(""); 85
86 /* Change the server the vnode is using. This entails scrubbing any
87 * interest the vnode had in the previous server it was using.
88 */
89 write_seqlock(&vnode->cb_lock);
90
91 vnode->cb_interest = afs_get_cb_interest(cbi);
92 vnode->cb_s_break = cbi->server->cb_s_break;
93 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
94
95 write_sequnlock(&vnode->cb_lock);
96 return 0;
58} 97}
59 98
60/* 99/*
61 * handle the data invalidation side of a callback being broken 100 * Set a vnode's interest on a server.
62 */ 101 */
63void afs_broken_callback_work(struct work_struct *work) 102void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi)
64{ 103{
65 struct afs_vnode *vnode = 104 struct afs_cb_interest *old_cbi = NULL;
66 container_of(work, struct afs_vnode, cb_broken_work);
67 105
68 _enter(""); 106 if (vnode->cb_interest == cbi)
69
70 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
71 return; 107 return;
72 108
73 /* we're only interested in dealing with a broken callback on *this* 109 write_seqlock(&vnode->cb_lock);
74 * vnode and only if no-one else has dealt with it yet */ 110 if (vnode->cb_interest != cbi) {
75 if (!mutex_trylock(&vnode->validate_lock)) 111 afs_get_cb_interest(cbi);
76 return; /* someone else is dealing with it */ 112 old_cbi = vnode->cb_interest;
77 113 vnode->cb_interest = cbi;
78 if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
79 if (S_ISDIR(vnode->vfs_inode.i_mode))
80 afs_clear_permits(vnode);
81
82 if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0)
83 goto out;
84
85 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
86 goto out;
87
88 /* if the vnode's data version number changed then its contents
89 * are different */
90 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
91 afs_zap_data(vnode);
92 } 114 }
115 write_sequnlock(&vnode->cb_lock);
116 afs_put_cb_interest(afs_v2net(vnode), cbi);
117}
93 118
94out: 119/*
95 mutex_unlock(&vnode->validate_lock); 120 * Remove an interest on a server.
96 121 */
97 /* avoid the potential race whereby the mutex_trylock() in this 122void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
98 * function happens again between the clear_bit() and the 123{
99 * mutex_unlock() */ 124 if (cbi && refcount_dec_and_test(&cbi->usage)) {
100 if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) { 125 if (!list_empty(&cbi->cb_link)) {
101 _debug("requeue"); 126 write_lock(&cbi->server->cb_break_lock);
102 queue_work(afs_callback_update_worker, &vnode->cb_broken_work); 127 list_del_init(&cbi->cb_link);
128 write_unlock(&cbi->server->cb_break_lock);
129 afs_put_server(net, cbi->server);
130 }
131 kfree(cbi);
103 } 132 }
104 _leave(""); 133}
134
135/*
136 * allow the fileserver to request callback state (re-)initialisation
137 */
138void afs_init_callback_state(struct afs_server *server)
139{
140 if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
141 server->cb_s_break++;
105} 142}
106 143
107/* 144/*
108 * actually break a callback 145 * actually break a callback
109 */ 146 */
110static void afs_break_callback(struct afs_server *server, 147void afs_break_callback(struct afs_vnode *vnode)
111 struct afs_vnode *vnode)
112{ 148{
113 _enter(""); 149 _enter("");
114 150
115 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); 151 write_seqlock(&vnode->cb_lock);
152
153 if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
154 vnode->cb_break++;
155 afs_clear_permits(vnode);
116 156
117 if (vnode->cb_promised) {
118 spin_lock(&vnode->lock); 157 spin_lock(&vnode->lock);
119 158
120 _debug("break callback"); 159 _debug("break callback");
121 160
122 spin_lock(&server->cb_lock);
123 if (vnode->cb_promised) {
124 rb_erase(&vnode->cb_promise, &server->cb_promises);
125 vnode->cb_promised = false;
126 }
127 spin_unlock(&server->cb_lock);
128
129 queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
130 if (list_empty(&vnode->granted_locks) && 161 if (list_empty(&vnode->granted_locks) &&
131 !list_empty(&vnode->pending_locks)) 162 !list_empty(&vnode->pending_locks))
132 afs_lock_may_be_available(vnode); 163 afs_lock_may_be_available(vnode);
133 spin_unlock(&vnode->lock); 164 spin_unlock(&vnode->lock);
134 } 165 }
166
167 write_sequnlock(&vnode->cb_lock);
135} 168}
136 169
137/* 170/*
@@ -143,49 +176,31 @@ static void afs_break_callback(struct afs_server *server,
143static void afs_break_one_callback(struct afs_server *server, 176static void afs_break_one_callback(struct afs_server *server,
144 struct afs_fid *fid) 177 struct afs_fid *fid)
145{ 178{
179 struct afs_cb_interest *cbi;
180 struct afs_iget_data data;
146 struct afs_vnode *vnode; 181 struct afs_vnode *vnode;
147 struct rb_node *p; 182 struct inode *inode;
148
149 _debug("find");
150 spin_lock(&server->fs_lock);
151 p = server->fs_vnodes.rb_node;
152 while (p) {
153 vnode = rb_entry(p, struct afs_vnode, server_rb);
154 if (fid->vid < vnode->fid.vid)
155 p = p->rb_left;
156 else if (fid->vid > vnode->fid.vid)
157 p = p->rb_right;
158 else if (fid->vnode < vnode->fid.vnode)
159 p = p->rb_left;
160 else if (fid->vnode > vnode->fid.vnode)
161 p = p->rb_right;
162 else if (fid->unique < vnode->fid.unique)
163 p = p->rb_left;
164 else if (fid->unique > vnode->fid.unique)
165 p = p->rb_right;
166 else
167 goto found;
168 }
169
170 /* not found so we just ignore it (it may have moved to another
171 * server) */
172not_available:
173 _debug("not avail");
174 spin_unlock(&server->fs_lock);
175 _leave("");
176 return;
177 183
178found: 184 read_lock(&server->cb_break_lock);
179 _debug("found");
180 ASSERTCMP(server, ==, vnode->server);
181 185
182 if (!igrab(AFS_VNODE_TO_I(vnode))) 186 /* Step through all interested superblocks. There may be more than one
183 goto not_available; 187 * because of cell aliasing.
184 spin_unlock(&server->fs_lock); 188 */
189 list_for_each_entry(cbi, &server->cb_interests, cb_link) {
190 if (cbi->vid != fid->vid)
191 continue;
192
193 data.volume = NULL;
194 data.fid = *fid;
195 inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
196 if (inode) {
197 vnode = AFS_FS_I(inode);
198 afs_break_callback(vnode);
199 iput(inode);
200 }
201 }
185 202
186 afs_break_callback(server, vnode); 203 read_unlock(&server->cb_break_lock);
187 iput(&vnode->vfs_inode);
188 _leave("");
189} 204}
190 205
191/* 206/*
@@ -216,261 +231,14 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
216} 231}
217 232
218/* 233/*
219 * record the callback for breaking 234 * Clear the callback interests in a server list.
220 * - the caller must hold server->cb_lock
221 */ 235 */
222static void afs_do_give_up_callback(struct afs_server *server, 236void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist)
223 struct afs_vnode *vnode)
224{ 237{
225 struct afs_callback *cb; 238 int i;
226
227 _enter("%p,%p", server, vnode);
228
229 cb = &server->cb_break[server->cb_break_head];
230 cb->fid = vnode->fid;
231 cb->version = vnode->cb_version;
232 cb->expiry = vnode->cb_expiry;
233 cb->type = vnode->cb_type;
234 smp_wmb();
235 server->cb_break_head =
236 (server->cb_break_head + 1) &
237 (ARRAY_SIZE(server->cb_break) - 1);
238
239 /* defer the breaking of callbacks to try and collect as many as
240 * possible to ship in one operation */
241 switch (atomic_inc_return(&server->cb_break_n)) {
242 case 1 ... AFSCBMAX - 1:
243 queue_delayed_work(afs_callback_update_worker,
244 &server->cb_break_work, HZ * 2);
245 break;
246 case AFSCBMAX:
247 afs_flush_callback_breaks(server);
248 break;
249 default:
250 break;
251 }
252
253 ASSERT(server->cb_promises.rb_node != NULL);
254 rb_erase(&vnode->cb_promise, &server->cb_promises);
255 vnode->cb_promised = false;
256 _leave("");
257}
258
259/*
260 * discard the callback on a deleted item
261 */
262void afs_discard_callback_on_delete(struct afs_vnode *vnode)
263{
264 struct afs_server *server = vnode->server;
265 239
266 _enter("%d", vnode->cb_promised); 240 for (i = 0; i < slist->nr_servers; i++) {
267 241 afs_put_cb_interest(net, slist->servers[i].cb_interest);
268 if (!vnode->cb_promised) { 242 slist->servers[i].cb_interest = NULL;
269 _leave(" [not promised]");
270 return;
271 }
272
273 ASSERT(server != NULL);
274
275 spin_lock(&server->cb_lock);
276 if (vnode->cb_promised) {
277 ASSERT(server->cb_promises.rb_node != NULL);
278 rb_erase(&vnode->cb_promise, &server->cb_promises);
279 vnode->cb_promised = false;
280 } 243 }
281 spin_unlock(&server->cb_lock);
282 _leave("");
283}
284
285/*
286 * give up the callback registered for a vnode on the file server when the
287 * inode is being cleared
288 */
289void afs_give_up_callback(struct afs_vnode *vnode)
290{
291 struct afs_server *server = vnode->server;
292
293 DECLARE_WAITQUEUE(myself, current);
294
295 _enter("%d", vnode->cb_promised);
296
297 _debug("GIVE UP INODE %p", &vnode->vfs_inode);
298
299 if (!vnode->cb_promised) {
300 _leave(" [not promised]");
301 return;
302 }
303
304 ASSERT(server != NULL);
305
306 spin_lock(&server->cb_lock);
307 if (vnode->cb_promised && afs_breakring_space(server) == 0) {
308 add_wait_queue(&server->cb_break_waitq, &myself);
309 for (;;) {
310 set_current_state(TASK_UNINTERRUPTIBLE);
311 if (!vnode->cb_promised ||
312 afs_breakring_space(server) != 0)
313 break;
314 spin_unlock(&server->cb_lock);
315 schedule();
316 spin_lock(&server->cb_lock);
317 }
318 remove_wait_queue(&server->cb_break_waitq, &myself);
319 __set_current_state(TASK_RUNNING);
320 }
321
322 /* of course, it's always possible for the server to break this vnode's
323 * callback first... */
324 if (vnode->cb_promised)
325 afs_do_give_up_callback(server, vnode);
326
327 spin_unlock(&server->cb_lock);
328 _leave("");
329}
330
331/*
332 * dispatch a deferred give up callbacks operation
333 */
334void afs_dispatch_give_up_callbacks(struct work_struct *work)
335{
336 struct afs_server *server =
337 container_of(work, struct afs_server, cb_break_work.work);
338
339 _enter("");
340
341 /* tell the fileserver to discard the callback promises it has
342 * - in the event of ENOMEM or some other error, we just forget that we
343 * had callbacks entirely, and the server will call us later to break
344 * them
345 */
346 afs_fs_give_up_callbacks(server, true);
347}
348
349/*
350 * flush the outstanding callback breaks on a server
351 */
352void afs_flush_callback_breaks(struct afs_server *server)
353{
354 mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0);
355}
356
357#if 0
358/*
359 * update a bunch of callbacks
360 */
361static void afs_callback_updater(struct work_struct *work)
362{
363 struct afs_server *server;
364 struct afs_vnode *vnode, *xvnode;
365 time64_t now;
366 long timeout;
367 int ret;
368
369 server = container_of(work, struct afs_server, updater);
370
371 _enter("");
372
373 now = ktime_get_real_seconds();
374
375 /* find the first vnode to update */
376 spin_lock(&server->cb_lock);
377 for (;;) {
378 if (RB_EMPTY_ROOT(&server->cb_promises)) {
379 spin_unlock(&server->cb_lock);
380 _leave(" [nothing]");
381 return;
382 }
383
384 vnode = rb_entry(rb_first(&server->cb_promises),
385 struct afs_vnode, cb_promise);
386 if (atomic_read(&vnode->usage) > 0)
387 break;
388 rb_erase(&vnode->cb_promise, &server->cb_promises);
389 vnode->cb_promised = false;
390 }
391
392 timeout = vnode->update_at - now;
393 if (timeout > 0) {
394 queue_delayed_work(afs_vnode_update_worker,
395 &afs_vnode_update, timeout * HZ);
396 spin_unlock(&server->cb_lock);
397 _leave(" [nothing]");
398 return;
399 }
400
401 list_del_init(&vnode->update);
402 atomic_inc(&vnode->usage);
403 spin_unlock(&server->cb_lock);
404
405 /* we can now perform the update */
406 _debug("update %s", vnode->vldb.name);
407 vnode->state = AFS_VL_UPDATING;
408 vnode->upd_rej_cnt = 0;
409 vnode->upd_busy_cnt = 0;
410
411 ret = afs_vnode_update_record(vl, &vldb);
412 switch (ret) {
413 case 0:
414 afs_vnode_apply_update(vl, &vldb);
415 vnode->state = AFS_VL_UPDATING;
416 break;
417 case -ENOMEDIUM:
418 vnode->state = AFS_VL_VOLUME_DELETED;
419 break;
420 default:
421 vnode->state = AFS_VL_UNCERTAIN;
422 break;
423 }
424
425 /* and then reschedule */
426 _debug("reschedule");
427 vnode->update_at = ktime_get_real_seconds() +
428 afs_vnode_update_timeout;
429
430 spin_lock(&server->cb_lock);
431
432 if (!list_empty(&server->cb_promises)) {
433 /* next update in 10 minutes, but wait at least 1 second more
434 * than the newest record already queued so that we don't spam
435 * the VL server suddenly with lots of requests
436 */
437 xvnode = list_entry(server->cb_promises.prev,
438 struct afs_vnode, update);
439 if (vnode->update_at <= xvnode->update_at)
440 vnode->update_at = xvnode->update_at + 1;
441 xvnode = list_entry(server->cb_promises.next,
442 struct afs_vnode, update);
443 timeout = xvnode->update_at - now;
444 if (timeout < 0)
445 timeout = 0;
446 } else {
447 timeout = afs_vnode_update_timeout;
448 }
449
450 list_add_tail(&vnode->update, &server->cb_promises);
451
452 _debug("timeout %ld", timeout);
453 queue_delayed_work(afs_vnode_update_worker,
454 &afs_vnode_update, timeout * HZ);
455 spin_unlock(&server->cb_lock);
456 afs_put_vnode(vl);
457}
458#endif
459
460/*
461 * initialise the callback update process
462 */
463int __init afs_callback_update_init(void)
464{
465 afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd",
466 WQ_MEM_RECLAIM);
467 return afs_callback_update_worker ? 0 : -ENOMEM;
468}
469
470/*
471 * shut down the callback update process
472 */
473void afs_callback_update_kill(void)
474{
475 destroy_workqueue(afs_callback_update_worker);
476} 244}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index ca0a3cf93791..1858c91169e4 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -1,6 +1,6 @@
1/* AFS cell and server record management 1/* AFS cell and server record management
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -9,213 +9,296 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/module.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/key.h> 13#include <linux/key.h>
15#include <linux/ctype.h> 14#include <linux/ctype.h>
16#include <linux/dns_resolver.h> 15#include <linux/dns_resolver.h>
17#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/inet.h>
18#include <keys/rxrpc-type.h> 18#include <keys/rxrpc-type.h>
19#include "internal.h" 19#include "internal.h"
20 20
21DECLARE_RWSEM(afs_proc_cells_sem); 21unsigned __read_mostly afs_cell_gc_delay = 10;
22LIST_HEAD(afs_proc_cells);
23 22
24static LIST_HEAD(afs_cells); 23static void afs_manage_cell(struct work_struct *);
25static DEFINE_RWLOCK(afs_cells_lock); 24
26static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ 25static void afs_dec_cells_outstanding(struct afs_net *net)
27static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); 26{
28static struct afs_cell *afs_cell_root; 27 if (atomic_dec_and_test(&net->cells_outstanding))
28 wake_up_atomic_t(&net->cells_outstanding);
29}
29 30
30/* 31/*
31 * allocate a cell record and fill in its name, VL server address list and 32 * Set the cell timer to fire after a given delay, assuming it's not already
32 * allocate an anonymous key 33 * set for an earlier time.
33 */ 34 */
34static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen, 35static void afs_set_cell_timer(struct afs_net *net, time64_t delay)
35 char *vllist)
36{ 36{
37 struct afs_cell *cell; 37 if (net->live) {
38 struct key *key; 38 atomic_inc(&net->cells_outstanding);
39 char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; 39 if (timer_reduce(&net->cells_timer, jiffies + delay * HZ))
40 char *dvllist = NULL, *_vllist = NULL; 40 afs_dec_cells_outstanding(net);
41 char delimiter = ':'; 41 }
42 int ret; 42}
43 43
44 _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); 44/*
45 * Look up and get an activation reference on a cell record under RCU
46 * conditions. The caller must hold the RCU read lock.
47 */
48struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
49 const char *name, unsigned int namesz)
50{
51 struct afs_cell *cell = NULL;
52 struct rb_node *p;
53 int n, seq = 0, ret = 0;
45 54
46 BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ 55 _enter("%*.*s", namesz, namesz, name);
47 56
48 if (namelen > AFS_MAXCELLNAME) { 57 if (name && namesz == 0)
49 _leave(" = -ENAMETOOLONG"); 58 return ERR_PTR(-EINVAL);
59 if (namesz > AFS_MAXCELLNAME)
50 return ERR_PTR(-ENAMETOOLONG); 60 return ERR_PTR(-ENAMETOOLONG);
51 }
52 61
53 /* allocate and initialise a cell record */ 62 do {
54 cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); 63 /* Unfortunately, rbtree walking doesn't give reliable results
55 if (!cell) { 64 * under just the RCU read lock, so we have to check for
56 _leave(" = -ENOMEM"); 65 * changes.
57 return ERR_PTR(-ENOMEM); 66 */
58 } 67 if (cell)
68 afs_put_cell(net, cell);
69 cell = NULL;
70 ret = -ENOENT;
59 71
60 memcpy(cell->name, name, namelen); 72 read_seqbegin_or_lock(&net->cells_lock, &seq);
61 cell->name[namelen] = 0; 73
62 74 if (!name) {
63 atomic_set(&cell->usage, 1); 75 cell = rcu_dereference_raw(net->ws_cell);
64 INIT_LIST_HEAD(&cell->link); 76 if (cell) {
65 rwlock_init(&cell->servers_lock); 77 afs_get_cell(cell);
66 INIT_LIST_HEAD(&cell->servers); 78 continue;
67 init_rwsem(&cell->vl_sem); 79 }
68 INIT_LIST_HEAD(&cell->vl_list); 80 ret = -EDESTADDRREQ;
69 spin_lock_init(&cell->vl_lock); 81 continue;
70
71 /* if the ip address is invalid, try dns query */
72 if (!vllist || strlen(vllist) < 7) {
73 ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
74 if (ret < 0) {
75 if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY)
76 /* translate these errors into something
77 * userspace might understand */
78 ret = -EDESTADDRREQ;
79 _leave(" = %d", ret);
80 return ERR_PTR(ret);
81 } 82 }
82 _vllist = dvllist;
83 83
84 /* change the delimiter for user-space reply */ 84 p = rcu_dereference_raw(net->cells.rb_node);
85 delimiter = ','; 85 while (p) {
86 cell = rb_entry(p, struct afs_cell, net_node);
87
88 n = strncasecmp(cell->name, name,
89 min_t(size_t, cell->name_len, namesz));
90 if (n == 0)
91 n = cell->name_len - namesz;
92 if (n < 0) {
93 p = rcu_dereference_raw(p->rb_left);
94 } else if (n > 0) {
95 p = rcu_dereference_raw(p->rb_right);
96 } else {
97 if (atomic_inc_not_zero(&cell->usage)) {
98 ret = 0;
99 break;
100 }
101 /* We want to repeat the search, this time with
102 * the lock properly locked.
103 */
104 }
105 cell = NULL;
106 }
86 107
87 } else { 108 } while (need_seqretry(&net->cells_lock, seq));
88 _vllist = vllist;
89 }
90 109
91 /* fill in the VL server list from the rest of the string */ 110 done_seqretry(&net->cells_lock, seq);
92 do {
93 unsigned a, b, c, d;
94 111
95 next = strchr(_vllist, delimiter); 112 return ret == 0 ? cell : ERR_PTR(ret);
96 if (next) 113}
97 *next++ = 0;
98 114
99 if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) 115/*
100 goto bad_address; 116 * Set up a cell record and fill in its name, VL server address list and
117 * allocate an anonymous key
118 */
119static struct afs_cell *afs_alloc_cell(struct afs_net *net,
120 const char *name, unsigned int namelen,
121 const char *vllist)
122{
123 struct afs_cell *cell;
124 int i, ret;
101 125
102 if (a > 255 || b > 255 || c > 255 || d > 255) 126 ASSERT(name);
103 goto bad_address; 127 if (namelen == 0)
128 return ERR_PTR(-EINVAL);
129 if (namelen > AFS_MAXCELLNAME) {
130 _leave(" = -ENAMETOOLONG");
131 return ERR_PTR(-ENAMETOOLONG);
132 }
104 133
105 cell->vl_addrs[cell->vl_naddrs++].s_addr = 134 _enter("%*.*s,%s", namelen, namelen, name, vllist);
106 htonl((a << 24) | (b << 16) | (c << 8) | d);
107 135
108 } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); 136 cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
137 if (!cell) {
138 _leave(" = -ENOMEM");
139 return ERR_PTR(-ENOMEM);
140 }
109 141
110 /* create a key to represent an anonymous user */ 142 cell->net = net;
111 memcpy(keyname, "afs@", 4); 143 cell->name_len = namelen;
112 dp = keyname + 4; 144 for (i = 0; i < namelen; i++)
113 cp = cell->name; 145 cell->name[i] = tolower(name[i]);
114 do { 146
115 *dp++ = toupper(*cp); 147 atomic_set(&cell->usage, 2);
116 } while (*cp++); 148 INIT_WORK(&cell->manager, afs_manage_cell);
149 cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
150 (1 << AFS_CELL_FL_NO_LOOKUP_YET));
151 INIT_LIST_HEAD(&cell->proc_volumes);
152 rwlock_init(&cell->proc_lock);
153 rwlock_init(&cell->vl_addrs_lock);
154
155 /* Fill in the VL server list if we were given a list of addresses to
156 * use.
157 */
158 if (vllist) {
159 struct afs_addr_list *alist;
160
161 alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
162 VL_SERVICE, AFS_VL_PORT);
163 if (IS_ERR(alist)) {
164 ret = PTR_ERR(alist);
165 goto parse_failed;
166 }
117 167
118 key = rxrpc_get_null_key(keyname); 168 rcu_assign_pointer(cell->vl_addrs, alist);
119 if (IS_ERR(key)) { 169 cell->dns_expiry = TIME64_MAX;
120 _debug("no key");
121 ret = PTR_ERR(key);
122 goto error;
123 } 170 }
124 cell->anonymous_key = key;
125
126 _debug("anon key %p{%x}",
127 cell->anonymous_key, key_serial(cell->anonymous_key));
128 171
129 _leave(" = %p", cell); 172 _leave(" = %p", cell);
130 return cell; 173 return cell;
131 174
132bad_address: 175parse_failed:
133 printk(KERN_ERR "kAFS: bad VL server IP address\n"); 176 if (ret == -EINVAL)
134 ret = -EINVAL; 177 printk(KERN_ERR "kAFS: bad VL server IP address\n");
135error:
136 key_put(cell->anonymous_key);
137 kfree(dvllist);
138 kfree(cell); 178 kfree(cell);
139 _leave(" = %d", ret); 179 _leave(" = %d", ret);
140 return ERR_PTR(ret); 180 return ERR_PTR(ret);
141} 181}
142 182
143/* 183/*
144 * afs_cell_crate() - create a cell record 184 * afs_lookup_cell - Look up or create a cell record.
145 * @name: is the name of the cell. 185 * @net: The network namespace
146 * @namsesz: is the strlen of the cell name. 186 * @name: The name of the cell.
147 * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. 187 * @namesz: The strlen of the cell name.
148 * @retref: is T to return the cell reference when the cell exists. 188 * @vllist: A colon/comma separated list of numeric IP addresses or NULL.
189 * @excl: T if an error should be given if the cell name already exists.
190 *
191 * Look up a cell record by name and query the DNS for VL server addresses if
192 * needed. Note that that actual DNS query is punted off to the manager thread
193 * so that this function can return immediately if interrupted whilst allowing
194 * cell records to be shared even if not yet fully constructed.
149 */ 195 */
150struct afs_cell *afs_cell_create(const char *name, unsigned namesz, 196struct afs_cell *afs_lookup_cell(struct afs_net *net,
151 char *vllist, bool retref) 197 const char *name, unsigned int namesz,
198 const char *vllist, bool excl)
152{ 199{
153 struct afs_cell *cell; 200 struct afs_cell *cell, *candidate, *cursor;
154 int ret; 201 struct rb_node *parent, **pp;
155 202 int ret, n;
156 _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); 203
204 _enter("%s,%s", name, vllist);
205
206 if (!excl) {
207 rcu_read_lock();
208 cell = afs_lookup_cell_rcu(net, name, namesz);
209 rcu_read_unlock();
210 if (!IS_ERR(cell)) {
211 if (excl) {
212 afs_put_cell(net, cell);
213 return ERR_PTR(-EEXIST);
214 }
215 goto wait_for_cell;
216 }
217 }
157 218
158 down_write(&afs_cells_sem); 219 /* Assume we're probably going to create a cell and preallocate and
159 read_lock(&afs_cells_lock); 220 * mostly set up a candidate record. We can then use this to stash the
160 list_for_each_entry(cell, &afs_cells, link) { 221 * name, the net namespace and VL server addresses.
161 if (strncasecmp(cell->name, name, namesz) == 0) 222 *
162 goto duplicate_name; 223 * We also want to do this before we hold any locks as it may involve
224 * upcalling to userspace to make DNS queries.
225 */
226 candidate = afs_alloc_cell(net, name, namesz, vllist);
227 if (IS_ERR(candidate)) {
228 _leave(" = %ld", PTR_ERR(candidate));
229 return candidate;
163 } 230 }
164 read_unlock(&afs_cells_lock);
165 231
166 cell = afs_cell_alloc(name, namesz, vllist); 232 /* Find the insertion point and check to see if someone else added a
167 if (IS_ERR(cell)) { 233 * cell whilst we were allocating.
168 _leave(" = %ld", PTR_ERR(cell)); 234 */
169 up_write(&afs_cells_sem); 235 write_seqlock(&net->cells_lock);
170 return cell; 236
237 pp = &net->cells.rb_node;
238 parent = NULL;
239 while (*pp) {
240 parent = *pp;
241 cursor = rb_entry(parent, struct afs_cell, net_node);
242
243 n = strncasecmp(cursor->name, name,
244 min_t(size_t, cursor->name_len, namesz));
245 if (n == 0)
246 n = cursor->name_len - namesz;
247 if (n < 0)
248 pp = &(*pp)->rb_left;
249 else if (n > 0)
250 pp = &(*pp)->rb_right;
251 else
252 goto cell_already_exists;
171 } 253 }
172 254
173 /* add a proc directory for this cell */ 255 cell = candidate;
174 ret = afs_proc_cell_setup(cell); 256 candidate = NULL;
175 if (ret < 0) 257 rb_link_node_rcu(&cell->net_node, parent, pp);
176 goto error; 258 rb_insert_color(&cell->net_node, &net->cells);
259 atomic_inc(&net->cells_outstanding);
260 write_sequnlock(&net->cells_lock);
177 261
178#ifdef CONFIG_AFS_FSCACHE 262 queue_work(afs_wq, &cell->manager);
179 /* put it up for caching (this never returns an error) */
180 cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
181 &afs_cell_cache_index_def,
182 cell, true);
183#endif
184 263
185 /* add to the cell lists */ 264wait_for_cell:
186 write_lock(&afs_cells_lock); 265 _debug("wait_for_cell");
187 list_add_tail(&cell->link, &afs_cells); 266 ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE);
188 write_unlock(&afs_cells_lock); 267 smp_rmb();
189 268
190 down_write(&afs_proc_cells_sem); 269 switch (READ_ONCE(cell->state)) {
191 list_add_tail(&cell->proc_link, &afs_proc_cells); 270 case AFS_CELL_FAILED:
192 up_write(&afs_proc_cells_sem); 271 ret = cell->error;
193 up_write(&afs_cells_sem); 272 goto error;
273 default:
274 _debug("weird %u %d", cell->state, cell->error);
275 goto error;
276 case AFS_CELL_ACTIVE:
277 break;
278 }
194 279
195 _leave(" = %p", cell); 280 _leave(" = %p [cell]", cell);
196 return cell; 281 return cell;
197 282
283cell_already_exists:
284 _debug("cell exists");
285 cell = cursor;
286 if (excl) {
287 ret = -EEXIST;
288 } else {
289 afs_get_cell(cursor);
290 ret = 0;
291 }
292 write_sequnlock(&net->cells_lock);
293 kfree(candidate);
294 if (ret == 0)
295 goto wait_for_cell;
296 goto error_noput;
198error: 297error:
199 up_write(&afs_cells_sem); 298 afs_put_cell(net, cell);
200 key_put(cell->anonymous_key); 299error_noput:
201 kfree(cell); 300 _leave(" = %d [error]", ret);
202 _leave(" = %d", ret);
203 return ERR_PTR(ret); 301 return ERR_PTR(ret);
204
205duplicate_name:
206 if (retref && !IS_ERR(cell))
207 afs_get_cell(cell);
208
209 read_unlock(&afs_cells_lock);
210 up_write(&afs_cells_sem);
211
212 if (retref) {
213 _leave(" = %p", cell);
214 return cell;
215 }
216
217 _leave(" = -EEXIST");
218 return ERR_PTR(-EEXIST);
219} 302}
220 303
221/* 304/*
@@ -223,10 +306,11 @@ duplicate_name:
223 * - can be called with a module parameter string 306 * - can be called with a module parameter string
224 * - can be called from a write to /proc/fs/afs/rootcell 307 * - can be called from a write to /proc/fs/afs/rootcell
225 */ 308 */
226int afs_cell_init(char *rootcell) 309int afs_cell_init(struct afs_net *net, const char *rootcell)
227{ 310{
228 struct afs_cell *old_root, *new_root; 311 struct afs_cell *old_root, *new_root;
229 char *cp; 312 const char *cp, *vllist;
313 size_t len;
230 314
231 _enter(""); 315 _enter("");
232 316
@@ -239,222 +323,453 @@ int afs_cell_init(char *rootcell)
239 } 323 }
240 324
241 cp = strchr(rootcell, ':'); 325 cp = strchr(rootcell, ':');
242 if (!cp) 326 if (!cp) {
243 _debug("kAFS: no VL server IP addresses specified"); 327 _debug("kAFS: no VL server IP addresses specified");
244 else 328 vllist = NULL;
245 *cp++ = 0; 329 len = strlen(rootcell);
330 } else {
331 vllist = cp + 1;
332 len = cp - rootcell;
333 }
246 334
247 /* allocate a cell record for the root cell */ 335 /* allocate a cell record for the root cell */
248 new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false); 336 new_root = afs_lookup_cell(net, rootcell, len, vllist, false);
249 if (IS_ERR(new_root)) { 337 if (IS_ERR(new_root)) {
250 _leave(" = %ld", PTR_ERR(new_root)); 338 _leave(" = %ld", PTR_ERR(new_root));
251 return PTR_ERR(new_root); 339 return PTR_ERR(new_root);
252 } 340 }
253 341
342 set_bit(AFS_CELL_FL_NO_GC, &new_root->flags);
343 afs_get_cell(new_root);
344
254 /* install the new cell */ 345 /* install the new cell */
255 write_lock(&afs_cells_lock); 346 write_seqlock(&net->cells_lock);
256 old_root = afs_cell_root; 347 old_root = net->ws_cell;
257 afs_cell_root = new_root; 348 net->ws_cell = new_root;
258 write_unlock(&afs_cells_lock); 349 write_sequnlock(&net->cells_lock);
259 afs_put_cell(old_root);
260 350
351 afs_put_cell(net, old_root);
261 _leave(" = 0"); 352 _leave(" = 0");
262 return 0; 353 return 0;
263} 354}
264 355
265/* 356/*
266 * lookup a cell record 357 * Update a cell's VL server address list from the DNS.
267 */ 358 */
268struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, 359static void afs_update_cell(struct afs_cell *cell)
269 bool dns_cell)
270{ 360{
271 struct afs_cell *cell; 361 struct afs_addr_list *alist, *old;
272 362 time64_t now, expiry;
273 _enter("\"%*.*s\",", namesz, namesz, name ?: ""); 363
274 364 _enter("%s", cell->name);
275 down_read(&afs_cells_sem); 365
276 read_lock(&afs_cells_lock); 366 alist = afs_dns_query(cell, &expiry);
277 367 if (IS_ERR(alist)) {
278 if (name) { 368 switch (PTR_ERR(alist)) {
279 /* if the cell was named, look for it in the cell record list */ 369 case -ENODATA:
280 list_for_each_entry(cell, &afs_cells, link) { 370 /* The DNS said that the cell does not exist */
281 if (strncmp(cell->name, name, namesz) == 0) { 371 set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
282 afs_get_cell(cell); 372 clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
283 goto found; 373 cell->dns_expiry = ktime_get_real_seconds() + 61;
284 } 374 break;
375
376 case -EAGAIN:
377 case -ECONNREFUSED:
378 default:
379 set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
380 cell->dns_expiry = ktime_get_real_seconds() + 10;
381 break;
285 } 382 }
286 cell = ERR_PTR(-ENOENT); 383
287 if (dns_cell) 384 cell->error = -EDESTADDRREQ;
288 goto create_cell;
289 found:
290 ;
291 } else { 385 } else {
292 cell = afs_cell_root; 386 clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
293 if (!cell) { 387 clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
294 /* this should not happen unless user tries to mount 388
295 * when root cell is not set. Return an impossibly 389 /* Exclusion on changing vl_addrs is achieved by a
296 * bizarre errno to alert the user. Things like 390 * non-reentrant work item.
297 * ENOENT might be "more appropriate" but they happen 391 */
298 * for other reasons. 392 old = rcu_dereference_protected(cell->vl_addrs, true);
299 */ 393 rcu_assign_pointer(cell->vl_addrs, alist);
300 cell = ERR_PTR(-EDESTADDRREQ); 394 cell->dns_expiry = expiry;
301 } else {
302 afs_get_cell(cell);
303 }
304 395
396 if (old)
397 afs_put_addrlist(old);
305 } 398 }
306 399
307 read_unlock(&afs_cells_lock); 400 if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
308 up_read(&afs_cells_sem); 401 wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);
309 _leave(" = %p", cell);
310 return cell;
311 402
312create_cell: 403 now = ktime_get_real_seconds();
313 read_unlock(&afs_cells_lock); 404 afs_set_cell_timer(cell->net, cell->dns_expiry - now);
314 up_read(&afs_cells_sem); 405 _leave("");
406}
315 407
316 cell = afs_cell_create(name, namesz, NULL, true); 408/*
409 * Destroy a cell record
410 */
411static void afs_cell_destroy(struct rcu_head *rcu)
412{
413 struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu);
317 414
318 _leave(" = %p", cell); 415 _enter("%p{%s}", cell, cell->name);
319 return cell; 416
417 ASSERTCMP(atomic_read(&cell->usage), ==, 0);
418
419 afs_put_addrlist(cell->vl_addrs);
420 key_put(cell->anonymous_key);
421 kfree(cell);
422
423 _leave(" [destroyed]");
320} 424}
321 425
322#if 0
323/* 426/*
324 * try and get a cell record 427 * Queue the cell manager.
325 */ 428 */
326struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell) 429static void afs_queue_cell_manager(struct afs_net *net)
327{ 430{
328 write_lock(&afs_cells_lock); 431 int outstanding = atomic_inc_return(&net->cells_outstanding);
329 432
330 if (cell && !list_empty(&cell->link)) 433 _enter("%d", outstanding);
331 afs_get_cell(cell);
332 else
333 cell = NULL;
334 434
335 write_unlock(&afs_cells_lock); 435 if (!queue_work(afs_wq, &net->cells_manager))
336 return cell; 436 afs_dec_cells_outstanding(net);
337} 437}
338#endif /* 0 */
339 438
340/* 439/*
341 * destroy a cell record 440 * Cell management timer. We have an increment on cells_outstanding that we
441 * need to pass along to the work item.
342 */ 442 */
343void afs_put_cell(struct afs_cell *cell) 443void afs_cells_timer(struct timer_list *timer)
344{ 444{
345 if (!cell) 445 struct afs_net *net = container_of(timer, struct afs_net, cells_timer);
346 return;
347 446
348 _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); 447 _enter("");
448 if (!queue_work(afs_wq, &net->cells_manager))
449 afs_dec_cells_outstanding(net);
450}
349 451
350 ASSERTCMP(atomic_read(&cell->usage), >, 0); 452/*
453 * Get a reference on a cell record.
454 */
455struct afs_cell *afs_get_cell(struct afs_cell *cell)
456{
457 atomic_inc(&cell->usage);
458 return cell;
459}
351 460
352 /* to prevent a race, the decrement and the dequeue must be effectively 461/*
353 * atomic */ 462 * Drop a reference on a cell record.
354 write_lock(&afs_cells_lock); 463 */
464void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
465{
466 time64_t now, expire_delay;
355 467
356 if (likely(!atomic_dec_and_test(&cell->usage))) { 468 if (!cell)
357 write_unlock(&afs_cells_lock);
358 _leave("");
359 return; 469 return;
360 }
361 470
362 ASSERT(list_empty(&cell->servers)); 471 _enter("%s", cell->name);
363 ASSERT(list_empty(&cell->vl_list));
364 472
365 write_unlock(&afs_cells_lock); 473 now = ktime_get_real_seconds();
474 cell->last_inactive = now;
475 expire_delay = 0;
476 if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
477 !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
478 expire_delay = afs_cell_gc_delay;
366 479
367 wake_up(&afs_cells_freeable_wq); 480 if (atomic_dec_return(&cell->usage) > 1)
481 return;
368 482
369 _leave(" [unused]"); 483 /* 'cell' may now be garbage collected. */
484 afs_set_cell_timer(net, expire_delay);
370} 485}
371 486
372/* 487/*
373 * destroy a cell record 488 * Allocate a key to use as a placeholder for anonymous user security.
374 * - must be called with the afs_cells_sem write-locked
375 * - cell->link should have been broken by the caller
376 */ 489 */
377static void afs_cell_destroy(struct afs_cell *cell) 490static int afs_alloc_anon_key(struct afs_cell *cell)
378{ 491{
379 _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); 492 struct key *key;
493 char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp;
380 494
381 ASSERTCMP(atomic_read(&cell->usage), >=, 0); 495 /* Create a key to represent an anonymous user. */
382 ASSERT(list_empty(&cell->link)); 496 memcpy(keyname, "afs@", 4);
497 dp = keyname + 4;
498 cp = cell->name;
499 do {
500 *dp++ = tolower(*cp);
501 } while (*cp++);
383 502
384 /* wait for everyone to stop using the cell */ 503 key = rxrpc_get_null_key(keyname);
385 if (atomic_read(&cell->usage) > 0) { 504 if (IS_ERR(key))
386 DECLARE_WAITQUEUE(myself, current); 505 return PTR_ERR(key);
387 506
388 _debug("wait for cell %s", cell->name); 507 cell->anonymous_key = key;
389 set_current_state(TASK_UNINTERRUPTIBLE);
390 add_wait_queue(&afs_cells_freeable_wq, &myself);
391 508
392 while (atomic_read(&cell->usage) > 0) { 509 _debug("anon key %p{%x}",
393 schedule(); 510 cell->anonymous_key, key_serial(cell->anonymous_key));
394 set_current_state(TASK_UNINTERRUPTIBLE); 511 return 0;
395 } 512}
396 513
397 remove_wait_queue(&afs_cells_freeable_wq, &myself); 514/*
398 set_current_state(TASK_RUNNING); 515 * Activate a cell.
516 */
517static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
518{
519 int ret;
520
521 if (!cell->anonymous_key) {
522 ret = afs_alloc_anon_key(cell);
523 if (ret < 0)
524 return ret;
399 } 525 }
400 526
401 _debug("cell dead"); 527#ifdef CONFIG_AFS_FSCACHE
402 ASSERTCMP(atomic_read(&cell->usage), ==, 0); 528 cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
403 ASSERT(list_empty(&cell->servers)); 529 &afs_cell_cache_index_def,
404 ASSERT(list_empty(&cell->vl_list)); 530 cell, true);
531#endif
532 ret = afs_proc_cell_setup(net, cell);
533 if (ret < 0)
534 return ret;
535 spin_lock(&net->proc_cells_lock);
536 list_add_tail(&cell->proc_link, &net->proc_cells);
537 spin_unlock(&net->proc_cells_lock);
538 return 0;
539}
405 540
406 afs_proc_cell_remove(cell); 541/*
542 * Deactivate a cell.
543 */
544static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
545{
546 _enter("%s", cell->name);
547
548 afs_proc_cell_remove(net, cell);
407 549
408 down_write(&afs_proc_cells_sem); 550 spin_lock(&net->proc_cells_lock);
409 list_del_init(&cell->proc_link); 551 list_del_init(&cell->proc_link);
410 up_write(&afs_proc_cells_sem); 552 spin_unlock(&net->proc_cells_lock);
411 553
412#ifdef CONFIG_AFS_FSCACHE 554#ifdef CONFIG_AFS_FSCACHE
413 fscache_relinquish_cookie(cell->cache, 0); 555 fscache_relinquish_cookie(cell->cache, 0);
556 cell->cache = NULL;
414#endif 557#endif
415 key_put(cell->anonymous_key);
416 kfree(cell);
417 558
418 _leave(" [destroyed]"); 559 _leave("");
419} 560}
420 561
421/* 562/*
422 * purge in-memory cell database on module unload or afs_init() failure 563 * Manage a cell record, initialising and destroying it, maintaining its DNS
423 * - the timeout daemon is stopped before calling this 564 * records.
424 */ 565 */
425void afs_cell_purge(void) 566static void afs_manage_cell(struct work_struct *work)
426{ 567{
427 struct afs_cell *cell; 568 struct afs_cell *cell = container_of(work, struct afs_cell, manager);
569 struct afs_net *net = cell->net;
570 bool deleted;
571 int ret, usage;
572
573 _enter("%s", cell->name);
574
575again:
576 _debug("state %u", cell->state);
577 switch (cell->state) {
578 case AFS_CELL_INACTIVE:
579 case AFS_CELL_FAILED:
580 write_seqlock(&net->cells_lock);
581 usage = 1;
582 deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0);
583 if (deleted)
584 rb_erase(&cell->net_node, &net->cells);
585 write_sequnlock(&net->cells_lock);
586 if (deleted)
587 goto final_destruction;
588 if (cell->state == AFS_CELL_FAILED)
589 goto done;
590 cell->state = AFS_CELL_UNSET;
591 goto again;
592
593 case AFS_CELL_UNSET:
594 cell->state = AFS_CELL_ACTIVATING;
595 goto again;
596
597 case AFS_CELL_ACTIVATING:
598 ret = afs_activate_cell(net, cell);
599 if (ret < 0)
600 goto activation_failed;
601
602 cell->state = AFS_CELL_ACTIVE;
603 smp_wmb();
604 clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
605 wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
606 goto again;
607
608 case AFS_CELL_ACTIVE:
609 if (atomic_read(&cell->usage) > 1) {
610 time64_t now = ktime_get_real_seconds();
611 if (cell->dns_expiry <= now && net->live)
612 afs_update_cell(cell);
613 goto done;
614 }
615 cell->state = AFS_CELL_DEACTIVATING;
616 goto again;
617
618 case AFS_CELL_DEACTIVATING:
619 set_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
620 if (atomic_read(&cell->usage) > 1)
621 goto reverse_deactivation;
622 afs_deactivate_cell(net, cell);
623 cell->state = AFS_CELL_INACTIVE;
624 goto again;
625
626 default:
627 break;
628 }
629 _debug("bad state %u", cell->state);
630 BUG(); /* Unhandled state */
631
632activation_failed:
633 cell->error = ret;
634 afs_deactivate_cell(net, cell);
635
636 cell->state = AFS_CELL_FAILED;
637 smp_wmb();
638 if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags))
639 wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
640 goto again;
641
642reverse_deactivation:
643 cell->state = AFS_CELL_ACTIVE;
644 smp_wmb();
645 clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
646 wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
647 _leave(" [deact->act]");
648 return;
649
650done:
651 _leave(" [done %u]", cell->state);
652 return;
653
654final_destruction:
655 call_rcu(&cell->rcu, afs_cell_destroy);
656 afs_dec_cells_outstanding(net);
657 _leave(" [destruct %d]", atomic_read(&net->cells_outstanding));
658}
659
660/*
661 * Manage the records of cells known to a network namespace. This includes
662 * updating the DNS records and garbage collecting unused cells that were
663 * automatically added.
664 *
665 * Note that constructed cell records may only be removed from net->cells by
666 * this work item, so it is safe for this work item to stash a cursor pointing
667 * into the tree and then return to caller (provided it skips cells that are
668 * still under construction).
669 *
670 * Note also that we were given an increment on net->cells_outstanding by
671 * whoever queued us that we need to deal with before returning.
672 */
673void afs_manage_cells(struct work_struct *work)
674{
675 struct afs_net *net = container_of(work, struct afs_net, cells_manager);
676 struct rb_node *cursor;
677 time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
678 bool purging = !net->live;
428 679
429 _enter(""); 680 _enter("");
430 681
431 afs_put_cell(afs_cell_root); 682 /* Trawl the cell database looking for cells that have expired from
683 * lack of use and cells whose DNS results have expired and dispatch
684 * their managers.
685 */
686 read_seqlock_excl(&net->cells_lock);
432 687
433 down_write(&afs_cells_sem); 688 for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
689 struct afs_cell *cell =
690 rb_entry(cursor, struct afs_cell, net_node);
691 unsigned usage;
692 bool sched_cell = false;
434 693
435 while (!list_empty(&afs_cells)) { 694 usage = atomic_read(&cell->usage);
436 cell = NULL; 695 _debug("manage %s %u", cell->name, usage);
696
697 ASSERTCMP(usage, >=, 1);
437 698
438 /* remove the next cell from the front of the list */ 699 if (purging) {
439 write_lock(&afs_cells_lock); 700 if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags))
701 usage = atomic_dec_return(&cell->usage);
702 ASSERTCMP(usage, ==, 1);
703 }
704
705 if (usage == 1) {
706 time64_t expire_at = cell->last_inactive;
707
708 if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
709 !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
710 expire_at += afs_cell_gc_delay;
711 if (purging || expire_at <= now)
712 sched_cell = true;
713 else if (expire_at < next_manage)
714 next_manage = expire_at;
715 }
440 716
441 if (!list_empty(&afs_cells)) { 717 if (!purging) {
442 cell = list_entry(afs_cells.next, 718 if (cell->dns_expiry <= now)
443 struct afs_cell, link); 719 sched_cell = true;
444 list_del_init(&cell->link); 720 else if (cell->dns_expiry <= next_manage)
721 next_manage = cell->dns_expiry;
445 } 722 }
446 723
447 write_unlock(&afs_cells_lock); 724 if (sched_cell)
725 queue_work(afs_wq, &cell->manager);
726 }
727
728 read_sequnlock_excl(&net->cells_lock);
448 729
449 if (cell) { 730 /* Update the timer on the way out. We have to pass an increment on
450 _debug("PURGING CELL %s (%d)", 731 * cells_outstanding in the namespace that we are in to the timer or
451 cell->name, atomic_read(&cell->usage)); 732 * the work scheduler.
733 */
734 if (!purging && next_manage < TIME64_MAX) {
735 now = ktime_get_real_seconds();
452 736
453 /* now the cell should be left with no references */ 737 if (next_manage - now <= 0) {
454 afs_cell_destroy(cell); 738 if (queue_work(afs_wq, &net->cells_manager))
739 atomic_inc(&net->cells_outstanding);
740 } else {
741 afs_set_cell_timer(net, next_manage - now);
455 } 742 }
456 } 743 }
457 744
458 up_write(&afs_cells_sem); 745 afs_dec_cells_outstanding(net);
746 _leave(" [%d]", atomic_read(&net->cells_outstanding));
747}
748
749/*
750 * Purge in-memory cell database.
751 */
752void afs_cell_purge(struct afs_net *net)
753{
754 struct afs_cell *ws;
755
756 _enter("");
757
758 write_seqlock(&net->cells_lock);
759 ws = net->ws_cell;
760 net->ws_cell = NULL;
761 write_sequnlock(&net->cells_lock);
762 afs_put_cell(net, ws);
763
764 _debug("del timer");
765 if (del_timer_sync(&net->cells_timer))
766 atomic_dec(&net->cells_outstanding);
767
768 _debug("kick mgr");
769 afs_queue_cell_manager(net);
770
771 _debug("wait");
772 wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait,
773 TASK_UNINTERRUPTIBLE);
459 _leave(""); 774 _leave("");
460} 775}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 782d4d05a53b..41e277f57b20 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -41,7 +41,6 @@ static CM_NAME(CallBack);
41static const struct afs_call_type afs_SRXCBCallBack = { 41static const struct afs_call_type afs_SRXCBCallBack = {
42 .name = afs_SRXCBCallBack_name, 42 .name = afs_SRXCBCallBack_name,
43 .deliver = afs_deliver_cb_callback, 43 .deliver = afs_deliver_cb_callback,
44 .abort_to_error = afs_abort_to_error,
45 .destructor = afs_cm_destructor, 44 .destructor = afs_cm_destructor,
46 .work = SRXAFSCB_CallBack, 45 .work = SRXAFSCB_CallBack,
47}; 46};
@@ -53,7 +52,6 @@ static CM_NAME(InitCallBackState);
53static const struct afs_call_type afs_SRXCBInitCallBackState = { 52static const struct afs_call_type afs_SRXCBInitCallBackState = {
54 .name = afs_SRXCBInitCallBackState_name, 53 .name = afs_SRXCBInitCallBackState_name,
55 .deliver = afs_deliver_cb_init_call_back_state, 54 .deliver = afs_deliver_cb_init_call_back_state,
56 .abort_to_error = afs_abort_to_error,
57 .destructor = afs_cm_destructor, 55 .destructor = afs_cm_destructor,
58 .work = SRXAFSCB_InitCallBackState, 56 .work = SRXAFSCB_InitCallBackState,
59}; 57};
@@ -65,7 +63,6 @@ static CM_NAME(InitCallBackState3);
65static const struct afs_call_type afs_SRXCBInitCallBackState3 = { 63static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
66 .name = afs_SRXCBInitCallBackState3_name, 64 .name = afs_SRXCBInitCallBackState3_name,
67 .deliver = afs_deliver_cb_init_call_back_state3, 65 .deliver = afs_deliver_cb_init_call_back_state3,
68 .abort_to_error = afs_abort_to_error,
69 .destructor = afs_cm_destructor, 66 .destructor = afs_cm_destructor,
70 .work = SRXAFSCB_InitCallBackState, 67 .work = SRXAFSCB_InitCallBackState,
71}; 68};
@@ -77,7 +74,6 @@ static CM_NAME(Probe);
77static const struct afs_call_type afs_SRXCBProbe = { 74static const struct afs_call_type afs_SRXCBProbe = {
78 .name = afs_SRXCBProbe_name, 75 .name = afs_SRXCBProbe_name,
79 .deliver = afs_deliver_cb_probe, 76 .deliver = afs_deliver_cb_probe,
80 .abort_to_error = afs_abort_to_error,
81 .destructor = afs_cm_destructor, 77 .destructor = afs_cm_destructor,
82 .work = SRXAFSCB_Probe, 78 .work = SRXAFSCB_Probe,
83}; 79};
@@ -89,7 +85,6 @@ static CM_NAME(ProbeUuid);
89static const struct afs_call_type afs_SRXCBProbeUuid = { 85static const struct afs_call_type afs_SRXCBProbeUuid = {
90 .name = afs_SRXCBProbeUuid_name, 86 .name = afs_SRXCBProbeUuid_name,
91 .deliver = afs_deliver_cb_probe_uuid, 87 .deliver = afs_deliver_cb_probe_uuid,
92 .abort_to_error = afs_abort_to_error,
93 .destructor = afs_cm_destructor, 88 .destructor = afs_cm_destructor,
94 .work = SRXAFSCB_ProbeUuid, 89 .work = SRXAFSCB_ProbeUuid,
95}; 90};
@@ -101,7 +96,6 @@ static CM_NAME(TellMeAboutYourself);
101static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { 96static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
102 .name = afs_SRXCBTellMeAboutYourself_name, 97 .name = afs_SRXCBTellMeAboutYourself_name,
103 .deliver = afs_deliver_cb_tell_me_about_yourself, 98 .deliver = afs_deliver_cb_tell_me_about_yourself,
104 .abort_to_error = afs_abort_to_error,
105 .destructor = afs_cm_destructor, 99 .destructor = afs_cm_destructor,
106 .work = SRXAFSCB_TellMeAboutYourself, 100 .work = SRXAFSCB_TellMeAboutYourself,
107}; 101};
@@ -127,6 +121,9 @@ bool afs_cm_incoming_call(struct afs_call *call)
127 case CBProbe: 121 case CBProbe:
128 call->type = &afs_SRXCBProbe; 122 call->type = &afs_SRXCBProbe;
129 return true; 123 return true;
124 case CBProbeUuid:
125 call->type = &afs_SRXCBProbeUuid;
126 return true;
130 case CBTellMeAboutYourself: 127 case CBTellMeAboutYourself:
131 call->type = &afs_SRXCBTellMeAboutYourself; 128 call->type = &afs_SRXCBTellMeAboutYourself;
132 return true; 129 return true;
@@ -147,18 +144,16 @@ static void afs_cm_destructor(struct afs_call *call)
147 * afs_deliver_cb_callback(). 144 * afs_deliver_cb_callback().
148 */ 145 */
149 if (call->unmarshall == 5) { 146 if (call->unmarshall == 5) {
150 ASSERT(call->server && call->count && call->request); 147 ASSERT(call->cm_server && call->count && call->request);
151 afs_break_callbacks(call->server, call->count, call->request); 148 afs_break_callbacks(call->cm_server, call->count, call->request);
152 } 149 }
153 150
154 afs_put_server(call->server);
155 call->server = NULL;
156 kfree(call->buffer); 151 kfree(call->buffer);
157 call->buffer = NULL; 152 call->buffer = NULL;
158} 153}
159 154
160/* 155/*
161 * allow the fileserver to see if the cache manager is still alive 156 * The server supplied a list of callbacks that it wanted to break.
162 */ 157 */
163static void SRXAFSCB_CallBack(struct work_struct *work) 158static void SRXAFSCB_CallBack(struct work_struct *work)
164{ 159{
@@ -173,7 +168,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
173 * yet */ 168 * yet */
174 afs_send_empty_reply(call); 169 afs_send_empty_reply(call);
175 170
176 afs_break_callbacks(call->server, call->count, call->request); 171 afs_break_callbacks(call->cm_server, call->count, call->request);
177 afs_put_call(call); 172 afs_put_call(call);
178 _leave(""); 173 _leave("");
179} 174}
@@ -193,7 +188,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
193 188
194 switch (call->unmarshall) { 189 switch (call->unmarshall) {
195 case 0: 190 case 0:
196 rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
197 call->offset = 0; 191 call->offset = 0;
198 call->unmarshall++; 192 call->unmarshall++;
199 193
@@ -286,14 +280,16 @@ static int afs_deliver_cb_callback(struct afs_call *call)
286 break; 280 break;
287 } 281 }
288 282
289 call->state = AFS_CALL_REPLYING; 283 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
284 return -EIO;
290 285
291 /* we'll need the file server record as that tells us which set of 286 /* we'll need the file server record as that tells us which set of
292 * vnodes to operate upon */ 287 * vnodes to operate upon */
293 server = afs_find_server(&srx); 288 rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
289 server = afs_find_server(call->net, &srx);
294 if (!server) 290 if (!server)
295 return -ENOTCONN; 291 return -ENOTCONN;
296 call->server = server; 292 call->cm_server = server;
297 293
298 return afs_queue_call_work(call); 294 return afs_queue_call_work(call);
299} 295}
@@ -305,9 +301,9 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
305{ 301{
306 struct afs_call *call = container_of(work, struct afs_call, work); 302 struct afs_call *call = container_of(work, struct afs_call, work);
307 303
308 _enter("{%p}", call->server); 304 _enter("{%p}", call->cm_server);
309 305
310 afs_init_callback_state(call->server); 306 afs_init_callback_state(call->cm_server);
311 afs_send_empty_reply(call); 307 afs_send_empty_reply(call);
312 afs_put_call(call); 308 afs_put_call(call);
313 _leave(""); 309 _leave("");
@@ -324,21 +320,18 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
324 320
325 _enter(""); 321 _enter("");
326 322
327 rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); 323 rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
328 324
329 ret = afs_extract_data(call, NULL, 0, false); 325 ret = afs_extract_data(call, NULL, 0, false);
330 if (ret < 0) 326 if (ret < 0)
331 return ret; 327 return ret;
332 328
333 /* no unmarshalling required */
334 call->state = AFS_CALL_REPLYING;
335
336 /* we'll need the file server record as that tells us which set of 329 /* we'll need the file server record as that tells us which set of
337 * vnodes to operate upon */ 330 * vnodes to operate upon */
338 server = afs_find_server(&srx); 331 server = afs_find_server(call->net, &srx);
339 if (!server) 332 if (!server)
340 return -ENOTCONN; 333 return -ENOTCONN;
341 call->server = server; 334 call->cm_server = server;
342 335
343 return afs_queue_call_work(call); 336 return afs_queue_call_work(call);
344} 337}
@@ -357,8 +350,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
357 350
358 _enter(""); 351 _enter("");
359 352
360 rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
361
362 _enter("{%u}", call->unmarshall); 353 _enter("{%u}", call->unmarshall);
363 354
364 switch (call->unmarshall) { 355 switch (call->unmarshall) {
@@ -402,15 +393,16 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
402 break; 393 break;
403 } 394 }
404 395
405 /* no unmarshalling required */ 396 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
406 call->state = AFS_CALL_REPLYING; 397 return -EIO;
407 398
408 /* we'll need the file server record as that tells us which set of 399 /* we'll need the file server record as that tells us which set of
409 * vnodes to operate upon */ 400 * vnodes to operate upon */
410 server = afs_find_server(&srx); 401 rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
402 server = afs_find_server(call->net, &srx);
411 if (!server) 403 if (!server)
412 return -ENOTCONN; 404 return -ENOTCONN;
413 call->server = server; 405 call->cm_server = server;
414 406
415 return afs_queue_call_work(call); 407 return afs_queue_call_work(call);
416} 408}
@@ -441,8 +433,8 @@ static int afs_deliver_cb_probe(struct afs_call *call)
441 if (ret < 0) 433 if (ret < 0)
442 return ret; 434 return ret;
443 435
444 /* no unmarshalling required */ 436 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
445 call->state = AFS_CALL_REPLYING; 437 return -EIO;
446 438
447 return afs_queue_call_work(call); 439 return afs_queue_call_work(call);
448} 440}
@@ -461,7 +453,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
461 453
462 _enter(""); 454 _enter("");
463 455
464 if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) 456 if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
465 reply.match = htonl(0); 457 reply.match = htonl(0);
466 else 458 else
467 reply.match = htonl(1); 459 reply.match = htonl(1);
@@ -524,7 +516,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
524 break; 516 break;
525 } 517 }
526 518
527 call->state = AFS_CALL_REPLYING; 519 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
520 return -EIO;
528 521
529 return afs_queue_call_work(call); 522 return afs_queue_call_work(call);
530} 523}
@@ -568,13 +561,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
568 memset(&reply, 0, sizeof(reply)); 561 memset(&reply, 0, sizeof(reply));
569 reply.ia.nifs = htonl(nifs); 562 reply.ia.nifs = htonl(nifs);
570 563
571 reply.ia.uuid[0] = afs_uuid.time_low; 564 reply.ia.uuid[0] = call->net->uuid.time_low;
572 reply.ia.uuid[1] = htonl(ntohs(afs_uuid.time_mid)); 565 reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid));
573 reply.ia.uuid[2] = htonl(ntohs(afs_uuid.time_hi_and_version)); 566 reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version));
574 reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved); 567 reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved);
575 reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low); 568 reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low);
576 for (loop = 0; loop < 6; loop++) 569 for (loop = 0; loop < 6; loop++)
577 reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]); 570 reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]);
578 571
579 if (ifs) { 572 if (ifs) {
580 for (loop = 0; loop < nifs; loop++) { 573 for (loop = 0; loop < nifs; loop++) {
@@ -605,8 +598,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
605 if (ret < 0) 598 if (ret < 0)
606 return ret; 599 return ret;
607 600
608 /* no unmarshalling required */ 601 if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
609 call->state = AFS_CALL_REPLYING; 602 return -EIO;
610 603
611 return afs_queue_call_work(call); 604 return afs_queue_call_work(call);
612} 605}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 613a77058263..ab618d32554c 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -130,10 +130,11 @@ struct afs_lookup_cookie {
130/* 130/*
131 * check that a directory page is valid 131 * check that a directory page is valid
132 */ 132 */
133static inline bool afs_dir_check_page(struct inode *dir, struct page *page) 133bool afs_dir_check_page(struct inode *dir, struct page *page)
134{ 134{
135 struct afs_dir_page *dbuf; 135 struct afs_dir_page *dbuf;
136 loff_t latter; 136 struct afs_vnode *vnode = AFS_FS_I(dir);
137 loff_t latter, i_size, off;
137 int tmp, qty; 138 int tmp, qty;
138 139
139#if 0 140#if 0
@@ -150,8 +151,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
150 } 151 }
151#endif 152#endif
152 153
153 /* determine how many magic numbers there should be in this page */ 154 /* Determine how many magic numbers there should be in this page, but
154 latter = dir->i_size - page_offset(page); 155 * we must take care because the directory may change size under us.
156 */
157 off = page_offset(page);
158 i_size = i_size_read(dir);
159 if (i_size <= off)
160 goto checked;
161
162 latter = i_size - off;
155 if (latter >= PAGE_SIZE) 163 if (latter >= PAGE_SIZE)
156 qty = PAGE_SIZE; 164 qty = PAGE_SIZE;
157 else 165 else
@@ -162,13 +170,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
162 dbuf = page_address(page); 170 dbuf = page_address(page);
163 for (tmp = 0; tmp < qty; tmp++) { 171 for (tmp = 0; tmp < qty; tmp++) {
164 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { 172 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
165 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", 173 printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
166 __func__, dir->i_ino, tmp, qty, 174 __func__, dir->i_ino, tmp, qty,
167 ntohs(dbuf->blocks[tmp].pagehdr.magic)); 175 ntohs(dbuf->blocks[tmp].pagehdr.magic));
176 trace_afs_dir_check_failed(vnode, off, i_size);
168 goto error; 177 goto error;
169 } 178 }
170 } 179 }
171 180
181checked:
172 SetPageChecked(page); 182 SetPageChecked(page);
173 return true; 183 return true;
174 184
@@ -183,6 +193,7 @@ error:
183static inline void afs_dir_put_page(struct page *page) 193static inline void afs_dir_put_page(struct page *page)
184{ 194{
185 kunmap(page); 195 kunmap(page);
196 unlock_page(page);
186 put_page(page); 197 put_page(page);
187} 198}
188 199
@@ -197,9 +208,10 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
197 208
198 page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); 209 page = read_cache_page(dir->i_mapping, index, afs_page_filler, key);
199 if (!IS_ERR(page)) { 210 if (!IS_ERR(page)) {
211 lock_page(page);
200 kmap(page); 212 kmap(page);
201 if (unlikely(!PageChecked(page))) { 213 if (unlikely(!PageChecked(page))) {
202 if (PageError(page) || !afs_dir_check_page(dir, page)) 214 if (PageError(page))
203 goto fail; 215 goto fail;
204 } 216 }
205 } 217 }
@@ -384,8 +396,7 @@ out:
384 */ 396 */
385static int afs_readdir(struct file *file, struct dir_context *ctx) 397static int afs_readdir(struct file *file, struct dir_context *ctx)
386{ 398{
387 return afs_dir_iterate(file_inode(file), 399 return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file));
388 ctx, file->private_data);
389} 400}
390 401
391/* 402/*
@@ -553,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
553 dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version; 564 dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
554 565
555 /* instantiate the dentry */ 566 /* instantiate the dentry */
556 inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL); 567 inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL);
557 key_put(key); 568 key_put(key);
558 if (IS_ERR(inode)) { 569 if (IS_ERR(inode)) {
559 _leave(" = %ld", PTR_ERR(inode)); 570 _leave(" = %ld", PTR_ERR(inode));
@@ -581,6 +592,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
581 struct afs_vnode *vnode, *dir; 592 struct afs_vnode *vnode, *dir;
582 struct afs_fid uninitialized_var(fid); 593 struct afs_fid uninitialized_var(fid);
583 struct dentry *parent; 594 struct dentry *parent;
595 struct inode *inode;
584 struct key *key; 596 struct key *key;
585 void *dir_version; 597 void *dir_version;
586 int ret; 598 int ret;
@@ -588,30 +600,39 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
588 if (flags & LOOKUP_RCU) 600 if (flags & LOOKUP_RCU)
589 return -ECHILD; 601 return -ECHILD;
590 602
591 vnode = AFS_FS_I(d_inode(dentry)); 603 if (d_really_is_positive(dentry)) {
592 604 vnode = AFS_FS_I(d_inode(dentry));
593 if (d_really_is_positive(dentry))
594 _enter("{v={%x:%u} n=%pd fl=%lx},", 605 _enter("{v={%x:%u} n=%pd fl=%lx},",
595 vnode->fid.vid, vnode->fid.vnode, dentry, 606 vnode->fid.vid, vnode->fid.vnode, dentry,
596 vnode->flags); 607 vnode->flags);
597 else 608 } else {
598 _enter("{neg n=%pd}", dentry); 609 _enter("{neg n=%pd}", dentry);
610 }
599 611
600 key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell); 612 key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
601 if (IS_ERR(key)) 613 if (IS_ERR(key))
602 key = NULL; 614 key = NULL;
603 615
616 if (d_really_is_positive(dentry)) {
617 inode = d_inode(dentry);
618 if (inode) {
619 vnode = AFS_FS_I(inode);
620 afs_validate(vnode, key);
621 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
622 goto out_bad;
623 }
624 }
625
604 /* lock down the parent dentry so we can peer at it */ 626 /* lock down the parent dentry so we can peer at it */
605 parent = dget_parent(dentry); 627 parent = dget_parent(dentry);
606 dir = AFS_FS_I(d_inode(parent)); 628 dir = AFS_FS_I(d_inode(parent));
607 629
608 /* validate the parent directory */ 630 /* validate the parent directory */
609 if (test_bit(AFS_VNODE_MODIFIED, &dir->flags)) 631 afs_validate(dir, key);
610 afs_validate(dir, key);
611 632
612 if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { 633 if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
613 _debug("%pd: parent dir deleted", dentry); 634 _debug("%pd: parent dir deleted", dentry);
614 goto out_bad; 635 goto out_bad_parent;
615 } 636 }
616 637
617 dir_version = (void *) (unsigned long) dir->status.data_version; 638 dir_version = (void *) (unsigned long) dir->status.data_version;
@@ -626,13 +647,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
626 case 0: 647 case 0:
627 /* the filename maps to something */ 648 /* the filename maps to something */
628 if (d_really_is_negative(dentry)) 649 if (d_really_is_negative(dentry))
629 goto out_bad; 650 goto out_bad_parent;
630 if (is_bad_inode(d_inode(dentry))) { 651 inode = d_inode(dentry);
652 if (is_bad_inode(inode)) {
631 printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", 653 printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
632 dentry); 654 dentry);
633 goto out_bad; 655 goto out_bad_parent;
634 } 656 }
635 657
658 vnode = AFS_FS_I(inode);
659
636 /* if the vnode ID has changed, then the dirent points to a 660 /* if the vnode ID has changed, then the dirent points to a
637 * different file */ 661 * different file */
638 if (fid.vnode != vnode->fid.vnode) { 662 if (fid.vnode != vnode->fid.vnode) {
@@ -649,10 +673,10 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
649 _debug("%pd: file deleted (uq %u -> %u I:%u)", 673 _debug("%pd: file deleted (uq %u -> %u I:%u)",
650 dentry, fid.unique, 674 dentry, fid.unique,
651 vnode->fid.unique, 675 vnode->fid.unique,
652 d_inode(dentry)->i_generation); 676 vnode->vfs_inode.i_generation);
653 spin_lock(&vnode->lock); 677 write_seqlock(&vnode->cb_lock);
654 set_bit(AFS_VNODE_DELETED, &vnode->flags); 678 set_bit(AFS_VNODE_DELETED, &vnode->flags);
655 spin_unlock(&vnode->lock); 679 write_sequnlock(&vnode->cb_lock);
656 goto not_found; 680 goto not_found;
657 } 681 }
658 goto out_valid; 682 goto out_valid;
@@ -667,7 +691,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
667 default: 691 default:
668 _debug("failed to iterate dir %pd: %d", 692 _debug("failed to iterate dir %pd: %d",
669 parent, ret); 693 parent, ret);
670 goto out_bad; 694 goto out_bad_parent;
671 } 695 }
672 696
673out_valid: 697out_valid:
@@ -683,9 +707,10 @@ not_found:
683 dentry->d_flags |= DCACHE_NFSFS_RENAMED; 707 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
684 spin_unlock(&dentry->d_lock); 708 spin_unlock(&dentry->d_lock);
685 709
686out_bad: 710out_bad_parent:
687 _debug("dropping dentry %pd2", dentry); 711 _debug("dropping dentry %pd2", dentry);
688 dput(parent); 712 dput(parent);
713out_bad:
689 key_put(key); 714 key_put(key);
690 715
691 _leave(" = 0 [bad]"); 716 _leave(" = 0 [bad]");
@@ -727,20 +752,48 @@ static void afs_d_release(struct dentry *dentry)
727} 752}
728 753
729/* 754/*
755 * Create a new inode for create/mkdir/symlink
756 */
757static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
758 struct dentry *new_dentry,
759 struct afs_fid *newfid,
760 struct afs_file_status *newstatus,
761 struct afs_callback *newcb)
762{
763 struct inode *inode;
764
765 if (fc->ac.error < 0)
766 return;
767
768 inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
769 newfid, newstatus, newcb, fc->cbi);
770 if (IS_ERR(inode)) {
771 /* ENOMEM or EINTR at a really inconvenient time - just abandon
772 * the new directory on the server.
773 */
774 fc->ac.error = PTR_ERR(inode);
775 return;
776 }
777
778 d_instantiate(new_dentry, inode);
779 if (d_unhashed(new_dentry))
780 d_rehash(new_dentry);
781}
782
783/*
730 * create a directory on an AFS filesystem 784 * create a directory on an AFS filesystem
731 */ 785 */
732static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 786static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
733{ 787{
734 struct afs_file_status status; 788 struct afs_file_status newstatus;
735 struct afs_callback cb; 789 struct afs_fs_cursor fc;
736 struct afs_server *server; 790 struct afs_callback newcb;
737 struct afs_vnode *dvnode, *vnode; 791 struct afs_vnode *dvnode = AFS_FS_I(dir);
738 struct afs_fid fid; 792 struct afs_fid newfid;
739 struct inode *inode;
740 struct key *key; 793 struct key *key;
741 int ret; 794 int ret;
742 795
743 dvnode = AFS_FS_I(dir); 796 mode |= S_IFDIR;
744 797
745 _enter("{%x:%u},{%pd},%ho", 798 _enter("{%x:%u},{%pd},%ho",
746 dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); 799 dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
@@ -751,40 +804,27 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
751 goto error; 804 goto error;
752 } 805 }
753 806
754 mode |= S_IFDIR; 807 ret = -ERESTARTSYS;
755 ret = afs_vnode_create(dvnode, key, dentry->d_name.name, 808 if (afs_begin_vnode_operation(&fc, dvnode, key)) {
756 mode, &fid, &status, &cb, &server); 809 while (afs_select_fileserver(&fc)) {
757 if (ret < 0) 810 fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
758 goto mkdir_error; 811 afs_fs_create(&fc, dentry->d_name.name, mode,
812 &newfid, &newstatus, &newcb);
813 }
759 814
760 inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); 815 afs_check_for_remote_deletion(&fc, fc.vnode);
761 if (IS_ERR(inode)) { 816 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
762 /* ENOMEM at a really inconvenient time - just abandon the new 817 afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
763 * directory on the server */ 818 ret = afs_end_vnode_operation(&fc);
764 ret = PTR_ERR(inode); 819 if (ret < 0)
765 goto iget_error; 820 goto error_key;
766 } 821 }
767 822
768 /* apply the status report we've got for the new vnode */
769 vnode = AFS_FS_I(inode);
770 spin_lock(&vnode->lock);
771 vnode->update_cnt++;
772 spin_unlock(&vnode->lock);
773 afs_vnode_finalise_status_update(vnode, server);
774 afs_put_server(server);
775
776 d_instantiate(dentry, inode);
777 if (d_unhashed(dentry)) {
778 _debug("not hashed");
779 d_rehash(dentry);
780 }
781 key_put(key); 823 key_put(key);
782 _leave(" = 0"); 824 _leave(" = 0");
783 return 0; 825 return 0;
784 826
785iget_error: 827error_key:
786 afs_put_server(server);
787mkdir_error:
788 key_put(key); 828 key_put(key);
789error: 829error:
790 d_drop(dentry); 830 d_drop(dentry);
@@ -793,16 +833,29 @@ error:
793} 833}
794 834
795/* 835/*
836 * Remove a subdir from a directory.
837 */
838static void afs_dir_remove_subdir(struct dentry *dentry)
839{
840 if (d_really_is_positive(dentry)) {
841 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
842
843 clear_nlink(&vnode->vfs_inode);
844 set_bit(AFS_VNODE_DELETED, &vnode->flags);
845 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
846 }
847}
848
849/*
796 * remove a directory from an AFS filesystem 850 * remove a directory from an AFS filesystem
797 */ 851 */
798static int afs_rmdir(struct inode *dir, struct dentry *dentry) 852static int afs_rmdir(struct inode *dir, struct dentry *dentry)
799{ 853{
800 struct afs_vnode *dvnode, *vnode; 854 struct afs_fs_cursor fc;
855 struct afs_vnode *dvnode = AFS_FS_I(dir);
801 struct key *key; 856 struct key *key;
802 int ret; 857 int ret;
803 858
804 dvnode = AFS_FS_I(dir);
805
806 _enter("{%x:%u},{%pd}", 859 _enter("{%x:%u},{%pd}",
807 dvnode->fid.vid, dvnode->fid.vnode, dentry); 860 dvnode->fid.vid, dvnode->fid.vnode, dentry);
808 861
@@ -812,45 +865,69 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
812 goto error; 865 goto error;
813 } 866 }
814 867
815 ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true); 868 ret = -ERESTARTSYS;
816 if (ret < 0) 869 if (afs_begin_vnode_operation(&fc, dvnode, key)) {
817 goto rmdir_error; 870 while (afs_select_fileserver(&fc)) {
871 fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
872 afs_fs_remove(&fc, dentry->d_name.name, true);
873 }
818 874
819 if (d_really_is_positive(dentry)) { 875 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
820 vnode = AFS_FS_I(d_inode(dentry)); 876 ret = afs_end_vnode_operation(&fc);
821 clear_nlink(&vnode->vfs_inode); 877 if (ret == 0)
822 set_bit(AFS_VNODE_DELETED, &vnode->flags); 878 afs_dir_remove_subdir(dentry);
823 afs_discard_callback_on_delete(vnode);
824 } 879 }
825 880
826 key_put(key); 881 key_put(key);
827 _leave(" = 0");
828 return 0;
829
830rmdir_error:
831 key_put(key);
832error: 882error:
833 _leave(" = %d", ret);
834 return ret; 883 return ret;
835} 884}
836 885
837/* 886/*
838 * remove a file from an AFS filesystem 887 * Remove a link to a file or symlink from a directory.
888 *
889 * If the file was not deleted due to excess hard links, the fileserver will
890 * break the callback promise on the file - if it had one - before it returns
891 * to us, and if it was deleted, it won't
892 *
893 * However, if we didn't have a callback promise outstanding, or it was
894 * outstanding on a different server, then it won't break it either...
895 */
896static int afs_dir_remove_link(struct dentry *dentry, struct key *key)
897{
898 int ret = 0;
899
900 if (d_really_is_positive(dentry)) {
901 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
902
903 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
904 kdebug("AFS_VNODE_DELETED");
905 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
906
907 ret = afs_validate(vnode, key);
908 if (ret == -ESTALE)
909 ret = 0;
910 _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
911 }
912
913 return ret;
914}
915
916/*
917 * Remove a file or symlink from an AFS filesystem.
839 */ 918 */
840static int afs_unlink(struct inode *dir, struct dentry *dentry) 919static int afs_unlink(struct inode *dir, struct dentry *dentry)
841{ 920{
842 struct afs_vnode *dvnode, *vnode; 921 struct afs_fs_cursor fc;
922 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
843 struct key *key; 923 struct key *key;
844 int ret; 924 int ret;
845 925
846 dvnode = AFS_FS_I(dir);
847
848 _enter("{%x:%u},{%pd}", 926 _enter("{%x:%u},{%pd}",
849 dvnode->fid.vid, dvnode->fid.vnode, dentry); 927 dvnode->fid.vid, dvnode->fid.vnode, dentry);
850 928
851 ret = -ENAMETOOLONG;
852 if (dentry->d_name.len >= AFSNAMEMAX) 929 if (dentry->d_name.len >= AFSNAMEMAX)
853 goto error; 930 return -ENAMETOOLONG;
854 931
855 key = afs_request_key(dvnode->volume->cell); 932 key = afs_request_key(dvnode->volume->cell);
856 if (IS_ERR(key)) { 933 if (IS_ERR(key)) {
@@ -858,44 +935,28 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
858 goto error; 935 goto error;
859 } 936 }
860 937
938 /* Try to make sure we have a callback promise on the victim. */
861 if (d_really_is_positive(dentry)) { 939 if (d_really_is_positive(dentry)) {
862 vnode = AFS_FS_I(d_inode(dentry)); 940 vnode = AFS_FS_I(d_inode(dentry));
863
864 /* make sure we have a callback promise on the victim */
865 ret = afs_validate(vnode, key); 941 ret = afs_validate(vnode, key);
866 if (ret < 0) 942 if (ret < 0)
867 goto error; 943 goto error_key;
868 } 944 }
869 945
870 ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false); 946 ret = -ERESTARTSYS;
871 if (ret < 0) 947 if (afs_begin_vnode_operation(&fc, dvnode, key)) {
872 goto remove_error; 948 while (afs_select_fileserver(&fc)) {
949 fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
950 afs_fs_remove(&fc, dentry->d_name.name, false);
951 }
873 952
874 if (d_really_is_positive(dentry)) { 953 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
875 /* if the file wasn't deleted due to excess hard links, the 954 ret = afs_end_vnode_operation(&fc);
876 * fileserver will break the callback promise on the file - if 955 if (ret == 0)
877 * it had one - before it returns to us, and if it was deleted, 956 ret = afs_dir_remove_link(dentry, key);
878 * it won't
879 *
880 * however, if we didn't have a callback promise outstanding,
881 * or it was outstanding on a different server, then it won't
882 * break it either...
883 */
884 vnode = AFS_FS_I(d_inode(dentry));
885 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
886 _debug("AFS_VNODE_DELETED");
887 if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
888 _debug("AFS_VNODE_CB_BROKEN");
889 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
890 ret = afs_validate(vnode, key);
891 _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
892 } 957 }
893 958
894 key_put(key); 959error_key:
895 _leave(" = 0");
896 return 0;
897
898remove_error:
899 key_put(key); 960 key_put(key);
900error: 961error:
901 _leave(" = %d", ret); 962 _leave(" = %d", ret);
@@ -908,60 +969,50 @@ error:
908static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 969static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
909 bool excl) 970 bool excl)
910{ 971{
911 struct afs_file_status status; 972 struct afs_fs_cursor fc;
912 struct afs_callback cb; 973 struct afs_file_status newstatus;
913 struct afs_server *server; 974 struct afs_callback newcb;
914 struct afs_vnode *dvnode, *vnode; 975 struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir);
915 struct afs_fid fid; 976 struct afs_fid newfid;
916 struct inode *inode;
917 struct key *key; 977 struct key *key;
918 int ret; 978 int ret;
919 979
920 dvnode = AFS_FS_I(dir); 980 mode |= S_IFREG;
921 981
922 _enter("{%x:%u},{%pd},%ho,", 982 _enter("{%x:%u},{%pd},%ho,",
923 dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); 983 dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
924 984
985 ret = -ENAMETOOLONG;
986 if (dentry->d_name.len >= AFSNAMEMAX)
987 goto error;
988
925 key = afs_request_key(dvnode->volume->cell); 989 key = afs_request_key(dvnode->volume->cell);
926 if (IS_ERR(key)) { 990 if (IS_ERR(key)) {
927 ret = PTR_ERR(key); 991 ret = PTR_ERR(key);
928 goto error; 992 goto error;
929 } 993 }
930 994
931 mode |= S_IFREG; 995 ret = -ERESTARTSYS;
932 ret = afs_vnode_create(dvnode, key, dentry->d_name.name, 996 if (afs_begin_vnode_operation(&fc, dvnode, key)) {
933 mode, &fid, &status, &cb, &server); 997 while (afs_select_fileserver(&fc)) {
934 if (ret < 0) 998 fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
935 goto create_error; 999 afs_fs_create(&fc, dentry->d_name.name, mode,
1000 &newfid, &newstatus, &newcb);
1001 }
936 1002
937 inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); 1003 afs_check_for_remote_deletion(&fc, fc.vnode);
938 if (IS_ERR(inode)) { 1004 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
939 /* ENOMEM at a really inconvenient time - just abandon the new 1005 afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
940 * directory on the server */ 1006 ret = afs_end_vnode_operation(&fc);
941 ret = PTR_ERR(inode); 1007 if (ret < 0)
942 goto iget_error; 1008 goto error_key;
943 } 1009 }
944 1010
945 /* apply the status report we've got for the new vnode */
946 vnode = AFS_FS_I(inode);
947 spin_lock(&vnode->lock);
948 vnode->update_cnt++;
949 spin_unlock(&vnode->lock);
950 afs_vnode_finalise_status_update(vnode, server);
951 afs_put_server(server);
952
953 d_instantiate(dentry, inode);
954 if (d_unhashed(dentry)) {
955 _debug("not hashed");
956 d_rehash(dentry);
957 }
958 key_put(key); 1011 key_put(key);
959 _leave(" = 0"); 1012 _leave(" = 0");
960 return 0; 1013 return 0;
961 1014
962iget_error: 1015error_key:
963 afs_put_server(server);
964create_error:
965 key_put(key); 1016 key_put(key);
966error: 1017error:
967 d_drop(dentry); 1018 d_drop(dentry);
@@ -975,6 +1026,7 @@ error:
975static int afs_link(struct dentry *from, struct inode *dir, 1026static int afs_link(struct dentry *from, struct inode *dir,
976 struct dentry *dentry) 1027 struct dentry *dentry)
977{ 1028{
1029 struct afs_fs_cursor fc;
978 struct afs_vnode *dvnode, *vnode; 1030 struct afs_vnode *dvnode, *vnode;
979 struct key *key; 1031 struct key *key;
980 int ret; 1032 int ret;
@@ -987,23 +1039,45 @@ static int afs_link(struct dentry *from, struct inode *dir,
987 dvnode->fid.vid, dvnode->fid.vnode, 1039 dvnode->fid.vid, dvnode->fid.vnode,
988 dentry); 1040 dentry);
989 1041
1042 ret = -ENAMETOOLONG;
1043 if (dentry->d_name.len >= AFSNAMEMAX)
1044 goto error;
1045
990 key = afs_request_key(dvnode->volume->cell); 1046 key = afs_request_key(dvnode->volume->cell);
991 if (IS_ERR(key)) { 1047 if (IS_ERR(key)) {
992 ret = PTR_ERR(key); 1048 ret = PTR_ERR(key);
993 goto error; 1049 goto error;
994 } 1050 }
995 1051
996 ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name); 1052 ret = -ERESTARTSYS;
997 if (ret < 0) 1053 if (afs_begin_vnode_operation(&fc, dvnode, key)) {
998 goto link_error; 1054 if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
1055 afs_end_vnode_operation(&fc);
1056 return -ERESTARTSYS;
1057 }
1058
1059 while (afs_select_fileserver(&fc)) {
1060 fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
1061 fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
1062 afs_fs_link(&fc, vnode, dentry->d_name.name);
1063 }
1064
1065 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
1066 afs_vnode_commit_status(&fc, vnode, fc.cb_break_2);
1067 ihold(&vnode->vfs_inode);
1068 d_instantiate(dentry, &vnode->vfs_inode);
1069
1070 mutex_unlock(&vnode->io_lock);
1071 ret = afs_end_vnode_operation(&fc);
1072 if (ret < 0)
1073 goto error_key;
1074 }
999 1075
1000 ihold(&vnode->vfs_inode);
1001 d_instantiate(dentry, &vnode->vfs_inode);
1002 key_put(key); 1076 key_put(key);
1003 _leave(" = 0"); 1077 _leave(" = 0");
1004 return 0; 1078 return 0;
1005 1079
1006link_error: 1080error_key:
1007 key_put(key); 1081 key_put(key);
1008error: 1082error:
1009 d_drop(dentry); 1083 d_drop(dentry);
@@ -1017,20 +1091,21 @@ error:
1017static int afs_symlink(struct inode *dir, struct dentry *dentry, 1091static int afs_symlink(struct inode *dir, struct dentry *dentry,
1018 const char *content) 1092 const char *content)
1019{ 1093{
1020 struct afs_file_status status; 1094 struct afs_fs_cursor fc;
1021 struct afs_server *server; 1095 struct afs_file_status newstatus;
1022 struct afs_vnode *dvnode, *vnode; 1096 struct afs_vnode *dvnode = AFS_FS_I(dir);
1023 struct afs_fid fid; 1097 struct afs_fid newfid;
1024 struct inode *inode;
1025 struct key *key; 1098 struct key *key;
1026 int ret; 1099 int ret;
1027 1100
1028 dvnode = AFS_FS_I(dir);
1029
1030 _enter("{%x:%u},{%pd},%s", 1101 _enter("{%x:%u},{%pd},%s",
1031 dvnode->fid.vid, dvnode->fid.vnode, dentry, 1102 dvnode->fid.vid, dvnode->fid.vnode, dentry,
1032 content); 1103 content);
1033 1104
1105 ret = -ENAMETOOLONG;
1106 if (dentry->d_name.len >= AFSNAMEMAX)
1107 goto error;
1108
1034 ret = -EINVAL; 1109 ret = -EINVAL;
1035 if (strlen(content) >= AFSPATHMAX) 1110 if (strlen(content) >= AFSPATHMAX)
1036 goto error; 1111 goto error;
@@ -1041,39 +1116,27 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
1041 goto error; 1116 goto error;
1042 } 1117 }
1043 1118
1044 ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content, 1119 ret = -ERESTARTSYS;
1045 &fid, &status, &server); 1120 if (afs_begin_vnode_operation(&fc, dvnode, key)) {
1046 if (ret < 0) 1121 while (afs_select_fileserver(&fc)) {
1047 goto create_error; 1122 fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
1123 afs_fs_symlink(&fc, dentry->d_name.name, content,
1124 &newfid, &newstatus);
1125 }
1048 1126
1049 inode = afs_iget(dir->i_sb, key, &fid, &status, NULL); 1127 afs_check_for_remote_deletion(&fc, fc.vnode);
1050 if (IS_ERR(inode)) { 1128 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
1051 /* ENOMEM at a really inconvenient time - just abandon the new 1129 afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL);
1052 * directory on the server */ 1130 ret = afs_end_vnode_operation(&fc);
1053 ret = PTR_ERR(inode); 1131 if (ret < 0)
1054 goto iget_error; 1132 goto error_key;
1055 } 1133 }
1056 1134
1057 /* apply the status report we've got for the new vnode */
1058 vnode = AFS_FS_I(inode);
1059 spin_lock(&vnode->lock);
1060 vnode->update_cnt++;
1061 spin_unlock(&vnode->lock);
1062 afs_vnode_finalise_status_update(vnode, server);
1063 afs_put_server(server);
1064
1065 d_instantiate(dentry, inode);
1066 if (d_unhashed(dentry)) {
1067 _debug("not hashed");
1068 d_rehash(dentry);
1069 }
1070 key_put(key); 1135 key_put(key);
1071 _leave(" = 0"); 1136 _leave(" = 0");
1072 return 0; 1137 return 0;
1073 1138
1074iget_error: 1139error_key:
1075 afs_put_server(server);
1076create_error:
1077 key_put(key); 1140 key_put(key);
1078error: 1141error:
1079 d_drop(dentry); 1142 d_drop(dentry);
@@ -1088,6 +1151,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1088 struct inode *new_dir, struct dentry *new_dentry, 1151 struct inode *new_dir, struct dentry *new_dentry,
1089 unsigned int flags) 1152 unsigned int flags)
1090{ 1153{
1154 struct afs_fs_cursor fc;
1091 struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; 1155 struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
1092 struct key *key; 1156 struct key *key;
1093 int ret; 1157 int ret;
@@ -1111,16 +1175,35 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1111 goto error; 1175 goto error;
1112 } 1176 }
1113 1177
1114 ret = afs_vnode_rename(orig_dvnode, new_dvnode, key, 1178 ret = -ERESTARTSYS;
1115 old_dentry->d_name.name, 1179 if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
1116 new_dentry->d_name.name); 1180 if (orig_dvnode != new_dvnode) {
1117 if (ret < 0) 1181 if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
1118 goto rename_error; 1182 afs_end_vnode_operation(&fc);
1183 return -ERESTARTSYS;
1184 }
1185 }
1186 while (afs_select_fileserver(&fc)) {
1187 fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
1188 fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
1189 afs_fs_rename(&fc, old_dentry->d_name.name,
1190 new_dvnode, new_dentry->d_name.name);
1191 }
1192
1193 afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break);
1194 afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2);
1195 if (orig_dvnode != new_dvnode)
1196 mutex_unlock(&new_dvnode->io_lock);
1197 ret = afs_end_vnode_operation(&fc);
1198 if (ret < 0)
1199 goto error_key;
1200 }
1201
1119 key_put(key); 1202 key_put(key);
1120 _leave(" = 0"); 1203 _leave(" = 0");
1121 return 0; 1204 return 0;
1122 1205
1123rename_error: 1206error_key:
1124 key_put(key); 1207 key_put(key);
1125error: 1208error:
1126 d_drop(new_dentry); 1209 d_drop(new_dentry);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 510cba15fa56..a39192ced99e 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -19,11 +19,11 @@
19#include <linux/task_io_accounting_ops.h> 19#include <linux/task_io_accounting_ops.h>
20#include "internal.h" 20#include "internal.h"
21 21
22static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
22static int afs_readpage(struct file *file, struct page *page); 23static int afs_readpage(struct file *file, struct page *page);
23static void afs_invalidatepage(struct page *page, unsigned int offset, 24static void afs_invalidatepage(struct page *page, unsigned int offset,
24 unsigned int length); 25 unsigned int length);
25static int afs_releasepage(struct page *page, gfp_t gfp_flags); 26static int afs_releasepage(struct page *page, gfp_t gfp_flags);
26static int afs_launder_page(struct page *page);
27 27
28static int afs_readpages(struct file *filp, struct address_space *mapping, 28static int afs_readpages(struct file *filp, struct address_space *mapping,
29 struct list_head *pages, unsigned nr_pages); 29 struct list_head *pages, unsigned nr_pages);
@@ -35,7 +35,7 @@ const struct file_operations afs_file_operations = {
35 .llseek = generic_file_llseek, 35 .llseek = generic_file_llseek,
36 .read_iter = generic_file_read_iter, 36 .read_iter = generic_file_read_iter,
37 .write_iter = afs_file_write, 37 .write_iter = afs_file_write,
38 .mmap = generic_file_readonly_mmap, 38 .mmap = afs_file_mmap,
39 .splice_read = generic_file_splice_read, 39 .splice_read = generic_file_splice_read,
40 .fsync = afs_fsync, 40 .fsync = afs_fsync,
41 .lock = afs_lock, 41 .lock = afs_lock,
@@ -62,12 +62,63 @@ const struct address_space_operations afs_fs_aops = {
62 .writepages = afs_writepages, 62 .writepages = afs_writepages,
63}; 63};
64 64
65static const struct vm_operations_struct afs_vm_ops = {
66 .fault = filemap_fault,
67 .map_pages = filemap_map_pages,
68 .page_mkwrite = afs_page_mkwrite,
69};
70
71/*
72 * Discard a pin on a writeback key.
73 */
74void afs_put_wb_key(struct afs_wb_key *wbk)
75{
76 if (refcount_dec_and_test(&wbk->usage)) {
77 key_put(wbk->key);
78 kfree(wbk);
79 }
80}
81
82/*
83 * Cache key for writeback.
84 */
85int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af)
86{
87 struct afs_wb_key *wbk, *p;
88
89 wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL);
90 if (!wbk)
91 return -ENOMEM;
92 refcount_set(&wbk->usage, 2);
93 wbk->key = af->key;
94
95 spin_lock(&vnode->wb_lock);
96 list_for_each_entry(p, &vnode->wb_keys, vnode_link) {
97 if (p->key == wbk->key)
98 goto found;
99 }
100
101 key_get(wbk->key);
102 list_add_tail(&wbk->vnode_link, &vnode->wb_keys);
103 spin_unlock(&vnode->wb_lock);
104 af->wb = wbk;
105 return 0;
106
107found:
108 refcount_inc(&p->usage);
109 spin_unlock(&vnode->wb_lock);
110 af->wb = p;
111 kfree(wbk);
112 return 0;
113}
114
65/* 115/*
66 * open an AFS file or directory and attach a key to it 116 * open an AFS file or directory and attach a key to it
67 */ 117 */
68int afs_open(struct inode *inode, struct file *file) 118int afs_open(struct inode *inode, struct file *file)
69{ 119{
70 struct afs_vnode *vnode = AFS_FS_I(inode); 120 struct afs_vnode *vnode = AFS_FS_I(inode);
121 struct afs_file *af;
71 struct key *key; 122 struct key *key;
72 int ret; 123 int ret;
73 124
@@ -75,19 +126,38 @@ int afs_open(struct inode *inode, struct file *file)
75 126
76 key = afs_request_key(vnode->volume->cell); 127 key = afs_request_key(vnode->volume->cell);
77 if (IS_ERR(key)) { 128 if (IS_ERR(key)) {
78 _leave(" = %ld [key]", PTR_ERR(key)); 129 ret = PTR_ERR(key);
79 return PTR_ERR(key); 130 goto error;
80 } 131 }
81 132
82 ret = afs_validate(vnode, key); 133 af = kzalloc(sizeof(*af), GFP_KERNEL);
83 if (ret < 0) { 134 if (!af) {
84 _leave(" = %d [val]", ret); 135 ret = -ENOMEM;
85 return ret; 136 goto error_key;
86 } 137 }
138 af->key = key;
139
140 ret = afs_validate(vnode, key);
141 if (ret < 0)
142 goto error_af;
87 143
88 file->private_data = key; 144 if (file->f_mode & FMODE_WRITE) {
145 ret = afs_cache_wb_key(vnode, af);
146 if (ret < 0)
147 goto error_af;
148 }
149
150 file->private_data = af;
89 _leave(" = 0"); 151 _leave(" = 0");
90 return 0; 152 return 0;
153
154error_af:
155 kfree(af);
156error_key:
157 key_put(key);
158error:
159 _leave(" = %d", ret);
160 return ret;
91} 161}
92 162
93/* 163/*
@@ -96,10 +166,16 @@ int afs_open(struct inode *inode, struct file *file)
96int afs_release(struct inode *inode, struct file *file) 166int afs_release(struct inode *inode, struct file *file)
97{ 167{
98 struct afs_vnode *vnode = AFS_FS_I(inode); 168 struct afs_vnode *vnode = AFS_FS_I(inode);
169 struct afs_file *af = file->private_data;
99 170
100 _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); 171 _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
101 172
102 key_put(file->private_data); 173 file->private_data = NULL;
174 if (af->wb)
175 afs_put_wb_key(af->wb);
176 key_put(af->key);
177 kfree(af);
178 afs_prune_wb_keys(vnode);
103 _leave(" = 0"); 179 _leave(" = 0");
104 return 0; 180 return 0;
105} 181}
@@ -138,6 +214,37 @@ static void afs_file_readpage_read_complete(struct page *page,
138#endif 214#endif
139 215
140/* 216/*
217 * Fetch file data from the volume.
218 */
219int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc)
220{
221 struct afs_fs_cursor fc;
222 int ret;
223
224 _enter("%s{%x:%u.%u},%x,,,",
225 vnode->volume->name,
226 vnode->fid.vid,
227 vnode->fid.vnode,
228 vnode->fid.unique,
229 key_serial(key));
230
231 ret = -ERESTARTSYS;
232 if (afs_begin_vnode_operation(&fc, vnode, key)) {
233 while (afs_select_fileserver(&fc)) {
234 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
235 afs_fs_fetch_data(&fc, desc);
236 }
237
238 afs_check_for_remote_deletion(&fc, fc.vnode);
239 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
240 ret = afs_end_vnode_operation(&fc);
241 }
242
243 _leave(" = %d", ret);
244 return ret;
245}
246
247/*
141 * read page from file, directory or symlink, given a key to use 248 * read page from file, directory or symlink, given a key to use
142 */ 249 */
143int afs_page_filler(void *data, struct page *page) 250int afs_page_filler(void *data, struct page *page)
@@ -199,8 +306,13 @@ int afs_page_filler(void *data, struct page *page)
199 306
200 /* read the contents of the file from the server into the 307 /* read the contents of the file from the server into the
201 * page */ 308 * page */
202 ret = afs_vnode_fetch_data(vnode, key, req); 309 ret = afs_fetch_data(vnode, key, req);
203 afs_put_read(req); 310 afs_put_read(req);
311
312 if (ret >= 0 && S_ISDIR(inode->i_mode) &&
313 !afs_dir_check_page(inode, page))
314 ret = -EIO;
315
204 if (ret < 0) { 316 if (ret < 0) {
205 if (ret == -ENOENT) { 317 if (ret == -ENOENT) {
206 _debug("got NOENT from server" 318 _debug("got NOENT from server"
@@ -259,12 +371,12 @@ static int afs_readpage(struct file *file, struct page *page)
259 int ret; 371 int ret;
260 372
261 if (file) { 373 if (file) {
262 key = file->private_data; 374 key = afs_file_key(file);
263 ASSERT(key != NULL); 375 ASSERT(key != NULL);
264 ret = afs_page_filler(key, page); 376 ret = afs_page_filler(key, page);
265 } else { 377 } else {
266 struct inode *inode = page->mapping->host; 378 struct inode *inode = page->mapping->host;
267 key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); 379 key = afs_request_key(AFS_FS_S(inode->i_sb)->cell);
268 if (IS_ERR(key)) { 380 if (IS_ERR(key)) {
269 ret = PTR_ERR(key); 381 ret = PTR_ERR(key);
270 } else { 382 } else {
@@ -281,7 +393,7 @@ static int afs_readpage(struct file *file, struct page *page)
281static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req) 393static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
282{ 394{
283#ifdef CONFIG_AFS_FSCACHE 395#ifdef CONFIG_AFS_FSCACHE
284 struct afs_vnode *vnode = call->reply; 396 struct afs_vnode *vnode = call->reply[0];
285#endif 397#endif
286 struct page *page = req->pages[req->index]; 398 struct page *page = req->pages[req->index];
287 399
@@ -310,7 +422,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
310 struct afs_read *req; 422 struct afs_read *req;
311 struct list_head *p; 423 struct list_head *p;
312 struct page *first, *page; 424 struct page *first, *page;
313 struct key *key = file->private_data; 425 struct key *key = afs_file_key(file);
314 pgoff_t index; 426 pgoff_t index;
315 int ret, n, i; 427 int ret, n, i;
316 428
@@ -369,7 +481,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
369 return 0; 481 return 0;
370 } 482 }
371 483
372 ret = afs_vnode_fetch_data(vnode, key, req); 484 ret = afs_fetch_data(vnode, key, req);
373 if (ret < 0) 485 if (ret < 0)
374 goto error; 486 goto error;
375 487
@@ -406,7 +518,7 @@ error:
406static int afs_readpages(struct file *file, struct address_space *mapping, 518static int afs_readpages(struct file *file, struct address_space *mapping,
407 struct list_head *pages, unsigned nr_pages) 519 struct list_head *pages, unsigned nr_pages)
408{ 520{
409 struct key *key = file->private_data; 521 struct key *key = afs_file_key(file);
410 struct afs_vnode *vnode; 522 struct afs_vnode *vnode;
411 int ret = 0; 523 int ret = 0;
412 524
@@ -464,16 +576,6 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
464} 576}
465 577
466/* 578/*
467 * write back a dirty page
468 */
469static int afs_launder_page(struct page *page)
470{
471 _enter("{%lu}", page->index);
472
473 return 0;
474}
475
476/*
477 * invalidate part or all of a page 579 * invalidate part or all of a page
478 * - release a page and clean up its private data if offset is 0 (indicating 580 * - release a page and clean up its private data if offset is 0 (indicating
479 * the entire page) 581 * the entire page)
@@ -481,7 +583,8 @@ static int afs_launder_page(struct page *page)
481static void afs_invalidatepage(struct page *page, unsigned int offset, 583static void afs_invalidatepage(struct page *page, unsigned int offset,
482 unsigned int length) 584 unsigned int length)
483{ 585{
484 struct afs_writeback *wb = (struct afs_writeback *) page_private(page); 586 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
587 unsigned long priv;
485 588
486 _enter("{%lu},%u,%u", page->index, offset, length); 589 _enter("{%lu},%u,%u", page->index, offset, length);
487 590
@@ -498,13 +601,11 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
498#endif 601#endif
499 602
500 if (PagePrivate(page)) { 603 if (PagePrivate(page)) {
501 if (wb && !PageWriteback(page)) { 604 priv = page_private(page);
502 set_page_private(page, 0); 605 trace_afs_page_dirty(vnode, tracepoint_string("inval"),
503 afs_put_writeback(wb); 606 page->index, priv);
504 } 607 set_page_private(page, 0);
505 608 ClearPagePrivate(page);
506 if (!page_private(page))
507 ClearPagePrivate(page);
508 } 609 }
509 } 610 }
510 611
@@ -517,8 +618,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
517 */ 618 */
518static int afs_releasepage(struct page *page, gfp_t gfp_flags) 619static int afs_releasepage(struct page *page, gfp_t gfp_flags)
519{ 620{
520 struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
521 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); 621 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
622 unsigned long priv;
522 623
523 _enter("{{%x:%u}[%lu],%lx},%x", 624 _enter("{{%x:%u}[%lu],%lx},%x",
524 vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, 625 vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
@@ -534,10 +635,10 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
534#endif 635#endif
535 636
536 if (PagePrivate(page)) { 637 if (PagePrivate(page)) {
537 if (wb) { 638 priv = page_private(page);
538 set_page_private(page, 0); 639 trace_afs_page_dirty(vnode, tracepoint_string("rel"),
539 afs_put_writeback(wb); 640 page->index, priv);
540 } 641 set_page_private(page, 0);
541 ClearPagePrivate(page); 642 ClearPagePrivate(page);
542 } 643 }
543 644
@@ -545,3 +646,16 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
545 _leave(" = T"); 646 _leave(" = T");
546 return 1; 647 return 1;
547} 648}
649
650/*
651 * Handle setting up a memory mapping on an AFS file.
652 */
653static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
654{
655 int ret;
656
657 ret = generic_file_mmap(file, vma);
658 if (ret == 0)
659 vma->vm_ops = &afs_vm_ops;
660 return ret;
661}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3191dff2c156..7571a5dfd5a3 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -14,48 +14,17 @@
14#define AFS_LOCK_GRANTED 0 14#define AFS_LOCK_GRANTED 0
15#define AFS_LOCK_PENDING 1 15#define AFS_LOCK_PENDING 1
16 16
17struct workqueue_struct *afs_lock_manager;
18
17static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl); 19static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
18static void afs_fl_release_private(struct file_lock *fl); 20static void afs_fl_release_private(struct file_lock *fl);
19 21
20static struct workqueue_struct *afs_lock_manager;
21static DEFINE_MUTEX(afs_lock_manager_mutex);
22
23static const struct file_lock_operations afs_lock_ops = { 22static const struct file_lock_operations afs_lock_ops = {
24 .fl_copy_lock = afs_fl_copy_lock, 23 .fl_copy_lock = afs_fl_copy_lock,
25 .fl_release_private = afs_fl_release_private, 24 .fl_release_private = afs_fl_release_private,
26}; 25};
27 26
28/* 27/*
29 * initialise the lock manager thread if it isn't already running
30 */
31static int afs_init_lock_manager(void)
32{
33 int ret;
34
35 ret = 0;
36 if (!afs_lock_manager) {
37 mutex_lock(&afs_lock_manager_mutex);
38 if (!afs_lock_manager) {
39 afs_lock_manager = alloc_workqueue("kafs_lockd",
40 WQ_MEM_RECLAIM, 0);
41 if (!afs_lock_manager)
42 ret = -ENOMEM;
43 }
44 mutex_unlock(&afs_lock_manager_mutex);
45 }
46 return ret;
47}
48
49/*
50 * destroy the lock manager thread if it's running
51 */
52void __exit afs_kill_lock_manager(void)
53{
54 if (afs_lock_manager)
55 destroy_workqueue(afs_lock_manager);
56}
57
58/*
59 * if the callback is broken on this vnode, then the lock may now be available 28 * if the callback is broken on this vnode, then the lock may now be available
60 */ 29 */
61void afs_lock_may_be_available(struct afs_vnode *vnode) 30void afs_lock_may_be_available(struct afs_vnode *vnode)
@@ -99,6 +68,100 @@ static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
99} 68}
100 69
101/* 70/*
71 * Get a lock on a file
72 */
73static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
74 afs_lock_type_t type)
75{
76 struct afs_fs_cursor fc;
77 int ret;
78
79 _enter("%s{%x:%u.%u},%x,%u",
80 vnode->volume->name,
81 vnode->fid.vid,
82 vnode->fid.vnode,
83 vnode->fid.unique,
84 key_serial(key), type);
85
86 ret = -ERESTARTSYS;
87 if (afs_begin_vnode_operation(&fc, vnode, key)) {
88 while (afs_select_fileserver(&fc)) {
89 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
90 afs_fs_set_lock(&fc, type);
91 }
92
93 afs_check_for_remote_deletion(&fc, fc.vnode);
94 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
95 ret = afs_end_vnode_operation(&fc);
96 }
97
98 _leave(" = %d", ret);
99 return ret;
100}
101
102/*
103 * Extend a lock on a file
104 */
105static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
106{
107 struct afs_fs_cursor fc;
108 int ret;
109
110 _enter("%s{%x:%u.%u},%x",
111 vnode->volume->name,
112 vnode->fid.vid,
113 vnode->fid.vnode,
114 vnode->fid.unique,
115 key_serial(key));
116
117 ret = -ERESTARTSYS;
118 if (afs_begin_vnode_operation(&fc, vnode, key)) {
119 while (afs_select_current_fileserver(&fc)) {
120 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
121 afs_fs_extend_lock(&fc);
122 }
123
124 afs_check_for_remote_deletion(&fc, fc.vnode);
125 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
126 ret = afs_end_vnode_operation(&fc);
127 }
128
129 _leave(" = %d", ret);
130 return ret;
131}
132
133/*
134 * Release a lock on a file
135 */
136static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
137{
138 struct afs_fs_cursor fc;
139 int ret;
140
141 _enter("%s{%x:%u.%u},%x",
142 vnode->volume->name,
143 vnode->fid.vid,
144 vnode->fid.vnode,
145 vnode->fid.unique,
146 key_serial(key));
147
148 ret = -ERESTARTSYS;
149 if (afs_begin_vnode_operation(&fc, vnode, key)) {
150 while (afs_select_current_fileserver(&fc)) {
151 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
152 afs_fs_release_lock(&fc);
153 }
154
155 afs_check_for_remote_deletion(&fc, fc.vnode);
156 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
157 ret = afs_end_vnode_operation(&fc);
158 }
159
160 _leave(" = %d", ret);
161 return ret;
162}
163
164/*
102 * do work for a lock, including: 165 * do work for a lock, including:
103 * - probing for a lock we're waiting on but didn't get immediately 166 * - probing for a lock we're waiting on but didn't get immediately
104 * - extending a lock that's close to timing out 167 * - extending a lock that's close to timing out
@@ -122,7 +185,7 @@ void afs_lock_work(struct work_struct *work)
122 185
123 /* attempt to release the server lock; if it fails, we just 186 /* attempt to release the server lock; if it fails, we just
124 * wait 5 minutes and it'll time out anyway */ 187 * wait 5 minutes and it'll time out anyway */
125 ret = afs_vnode_release_lock(vnode, vnode->unlock_key); 188 ret = afs_release_lock(vnode, vnode->unlock_key);
126 if (ret < 0) 189 if (ret < 0)
127 printk(KERN_WARNING "AFS:" 190 printk(KERN_WARNING "AFS:"
128 " Failed to release lock on {%x:%x} error %d\n", 191 " Failed to release lock on {%x:%x} error %d\n",
@@ -143,10 +206,10 @@ void afs_lock_work(struct work_struct *work)
143 BUG(); 206 BUG();
144 fl = list_entry(vnode->granted_locks.next, 207 fl = list_entry(vnode->granted_locks.next,
145 struct file_lock, fl_u.afs.link); 208 struct file_lock, fl_u.afs.link);
146 key = key_get(fl->fl_file->private_data); 209 key = key_get(afs_file_key(fl->fl_file));
147 spin_unlock(&vnode->lock); 210 spin_unlock(&vnode->lock);
148 211
149 ret = afs_vnode_extend_lock(vnode, key); 212 ret = afs_extend_lock(vnode, key);
150 clear_bit(AFS_VNODE_LOCKING, &vnode->flags); 213 clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
151 key_put(key); 214 key_put(key);
152 switch (ret) { 215 switch (ret) {
@@ -177,12 +240,12 @@ void afs_lock_work(struct work_struct *work)
177 BUG(); 240 BUG();
178 fl = list_entry(vnode->pending_locks.next, 241 fl = list_entry(vnode->pending_locks.next,
179 struct file_lock, fl_u.afs.link); 242 struct file_lock, fl_u.afs.link);
180 key = key_get(fl->fl_file->private_data); 243 key = key_get(afs_file_key(fl->fl_file));
181 type = (fl->fl_type == F_RDLCK) ? 244 type = (fl->fl_type == F_RDLCK) ?
182 AFS_LOCK_READ : AFS_LOCK_WRITE; 245 AFS_LOCK_READ : AFS_LOCK_WRITE;
183 spin_unlock(&vnode->lock); 246 spin_unlock(&vnode->lock);
184 247
185 ret = afs_vnode_set_lock(vnode, key, type); 248 ret = afs_set_lock(vnode, key, type);
186 clear_bit(AFS_VNODE_LOCKING, &vnode->flags); 249 clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
187 switch (ret) { 250 switch (ret) {
188 case -EWOULDBLOCK: 251 case -EWOULDBLOCK:
@@ -213,7 +276,7 @@ void afs_lock_work(struct work_struct *work)
213 clear_bit(AFS_VNODE_READLOCKED, &vnode->flags); 276 clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
214 clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); 277 clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
215 spin_unlock(&vnode->lock); 278 spin_unlock(&vnode->lock);
216 afs_vnode_release_lock(vnode, key); 279 afs_release_lock(vnode, key);
217 if (!list_empty(&vnode->pending_locks)) 280 if (!list_empty(&vnode->pending_locks))
218 afs_lock_may_be_available(vnode); 281 afs_lock_may_be_available(vnode);
219 } 282 }
@@ -255,7 +318,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
255 struct inode *inode = file_inode(file); 318 struct inode *inode = file_inode(file);
256 struct afs_vnode *vnode = AFS_FS_I(inode); 319 struct afs_vnode *vnode = AFS_FS_I(inode);
257 afs_lock_type_t type; 320 afs_lock_type_t type;
258 struct key *key = file->private_data; 321 struct key *key = afs_file_key(file);
259 int ret; 322 int ret;
260 323
261 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); 324 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
@@ -264,10 +327,6 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
264 if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) 327 if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
265 return -EINVAL; 328 return -EINVAL;
266 329
267 ret = afs_init_lock_manager();
268 if (ret < 0)
269 return ret;
270
271 fl->fl_ops = &afs_lock_ops; 330 fl->fl_ops = &afs_lock_ops;
272 INIT_LIST_HEAD(&fl->fl_u.afs.link); 331 INIT_LIST_HEAD(&fl->fl_u.afs.link);
273 fl->fl_u.afs.state = AFS_LOCK_PENDING; 332 fl->fl_u.afs.state = AFS_LOCK_PENDING;
@@ -278,7 +337,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
278 337
279 /* make sure we've got a callback on this file and that our view of the 338 /* make sure we've got a callback on this file and that our view of the
280 * data version is up to date */ 339 * data version is up to date */
281 ret = afs_vnode_fetch_status(vnode, NULL, key); 340 ret = afs_validate(vnode, key);
282 if (ret < 0) 341 if (ret < 0)
283 goto error; 342 goto error;
284 343
@@ -315,7 +374,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
315 set_bit(AFS_VNODE_LOCKING, &vnode->flags); 374 set_bit(AFS_VNODE_LOCKING, &vnode->flags);
316 spin_unlock(&vnode->lock); 375 spin_unlock(&vnode->lock);
317 376
318 ret = afs_vnode_set_lock(vnode, key, type); 377 ret = afs_set_lock(vnode, key, type);
319 clear_bit(AFS_VNODE_LOCKING, &vnode->flags); 378 clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
320 switch (ret) { 379 switch (ret) {
321 case 0: 380 case 0:
@@ -418,7 +477,7 @@ given_lock:
418 /* again, make sure we've got a callback on this file and, again, make 477 /* again, make sure we've got a callback on this file and, again, make
419 * sure that our view of the data version is up to date (we ignore 478 * sure that our view of the data version is up to date (we ignore
420 * errors incurred here and deal with the consequences elsewhere) */ 479 * errors incurred here and deal with the consequences elsewhere) */
421 afs_vnode_fetch_status(vnode, NULL, key); 480 afs_validate(vnode, key);
422 481
423error: 482error:
424 spin_unlock(&inode->i_lock); 483 spin_unlock(&inode->i_lock);
@@ -441,7 +500,7 @@ vfs_rejected_lock:
441static int afs_do_unlk(struct file *file, struct file_lock *fl) 500static int afs_do_unlk(struct file *file, struct file_lock *fl)
442{ 501{
443 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); 502 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
444 struct key *key = file->private_data; 503 struct key *key = afs_file_key(file);
445 int ret; 504 int ret;
446 505
447 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); 506 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
@@ -476,7 +535,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
476static int afs_do_getlk(struct file *file, struct file_lock *fl) 535static int afs_do_getlk(struct file *file, struct file_lock *fl)
477{ 536{
478 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); 537 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
479 struct key *key = file->private_data; 538 struct key *key = afs_file_key(file);
480 int ret, lock_count; 539 int ret, lock_count;
481 540
482 _enter(""); 541 _enter("");
@@ -490,7 +549,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
490 posix_test_lock(file, fl); 549 posix_test_lock(file, fl);
491 if (fl->fl_type == F_UNLCK) { 550 if (fl->fl_type == F_UNLCK) {
492 /* no local locks; consult the server */ 551 /* no local locks; consult the server */
493 ret = afs_vnode_fetch_status(vnode, NULL, key); 552 ret = afs_fetch_status(vnode, key);
494 if (ret < 0) 553 if (ret < 0)
495 goto error; 554 goto error;
496 lock_count = vnode->status.lock_count; 555 lock_count = vnode->status.lock_count;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 19f76ae36982..b90ef39ae914 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -16,12 +16,19 @@
16#include "internal.h" 16#include "internal.h"
17#include "afs_fs.h" 17#include "afs_fs.h"
18 18
19static const struct afs_fid afs_zero_fid;
20
19/* 21/*
20 * We need somewhere to discard into in case the server helpfully returns more 22 * We need somewhere to discard into in case the server helpfully returns more
21 * than we asked for in FS.FetchData{,64}. 23 * than we asked for in FS.FetchData{,64}.
22 */ 24 */
23static u8 afs_discard_buffer[64]; 25static u8 afs_discard_buffer[64];
24 26
27static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
28{
29 call->cbi = afs_get_cb_interest(cbi);
30}
31
25/* 32/*
26 * decode an AFSFid block 33 * decode an AFSFid block
27 */ 34 */
@@ -47,14 +54,18 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
47 const __be32 *bp = *_bp; 54 const __be32 *bp = *_bp;
48 umode_t mode; 55 umode_t mode;
49 u64 data_version, size; 56 u64 data_version, size;
50 u32 changed = 0; /* becomes non-zero if ctime-type changes seen */ 57 bool changed = false;
51 kuid_t owner; 58 kuid_t owner;
52 kgid_t group; 59 kgid_t group;
53 60
61 if (vnode)
62 write_seqlock(&vnode->cb_lock);
63
54#define EXTRACT(DST) \ 64#define EXTRACT(DST) \
55 do { \ 65 do { \
56 u32 x = ntohl(*bp++); \ 66 u32 x = ntohl(*bp++); \
57 changed |= DST - x; \ 67 if (DST != x) \
68 changed |= true; \
58 DST = x; \ 69 DST = x; \
59 } while (0) 70 } while (0)
60 71
@@ -70,8 +81,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
70 EXTRACT(status->caller_access); /* call ticket dependent */ 81 EXTRACT(status->caller_access); /* call ticket dependent */
71 EXTRACT(status->anon_access); 82 EXTRACT(status->anon_access);
72 EXTRACT(status->mode); 83 EXTRACT(status->mode);
73 EXTRACT(status->parent.vnode); 84 bp++; /* parent.vnode */
74 EXTRACT(status->parent.unique); 85 bp++; /* parent.unique */
75 bp++; /* seg size */ 86 bp++; /* seg size */
76 status->mtime_client = ntohl(*bp++); 87 status->mtime_client = ntohl(*bp++);
77 status->mtime_server = ntohl(*bp++); 88 status->mtime_server = ntohl(*bp++);
@@ -95,7 +106,6 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
95 status->mtime_client, status->mtime_server); 106 status->mtime_client, status->mtime_server);
96 107
97 if (vnode) { 108 if (vnode) {
98 status->parent.vid = vnode->fid.vid;
99 if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { 109 if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
100 _debug("vnode changed"); 110 _debug("vnode changed");
101 i_size_write(&vnode->vfs_inode, size); 111 i_size_write(&vnode->vfs_inode, size);
@@ -127,25 +137,47 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
127 _debug("vnode modified %llx on {%x:%u}", 137 _debug("vnode modified %llx on {%x:%u}",
128 (unsigned long long) data_version, 138 (unsigned long long) data_version,
129 vnode->fid.vid, vnode->fid.vnode); 139 vnode->fid.vid, vnode->fid.vnode);
130 set_bit(AFS_VNODE_MODIFIED, &vnode->flags); 140 set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
131 set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); 141 set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
132 } 142 }
133 } else if (store_version) { 143 } else if (store_version) {
134 status->data_version = data_version; 144 status->data_version = data_version;
135 } 145 }
146
147 if (vnode)
148 write_sequnlock(&vnode->cb_lock);
136} 149}
137 150
138/* 151/*
139 * decode an AFSCallBack block 152 * decode an AFSCallBack block
140 */ 153 */
141static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) 154static void xdr_decode_AFSCallBack(struct afs_call *call,
155 struct afs_vnode *vnode,
156 const __be32 **_bp)
142{ 157{
158 struct afs_cb_interest *old, *cbi = call->cbi;
143 const __be32 *bp = *_bp; 159 const __be32 *bp = *_bp;
160 u32 cb_expiry;
161
162 write_seqlock(&vnode->cb_lock);
163
164 if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
165 vnode->cb_version = ntohl(*bp++);
166 cb_expiry = ntohl(*bp++);
167 vnode->cb_type = ntohl(*bp++);
168 vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds();
169 old = vnode->cb_interest;
170 if (old != call->cbi) {
171 vnode->cb_interest = cbi;
172 cbi = old;
173 }
174 set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
175 } else {
176 bp += 3;
177 }
144 178
145 vnode->cb_version = ntohl(*bp++); 179 write_sequnlock(&vnode->cb_lock);
146 vnode->cb_expiry = ntohl(*bp++); 180 call->cbi = cbi;
147 vnode->cb_type = ntohl(*bp++);
148 vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds();
149 *_bp = bp; 181 *_bp = bp;
150} 182}
151 183
@@ -243,22 +275,22 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
243 */ 275 */
244static int afs_deliver_fs_fetch_status(struct afs_call *call) 276static int afs_deliver_fs_fetch_status(struct afs_call *call)
245{ 277{
246 struct afs_vnode *vnode = call->reply; 278 struct afs_vnode *vnode = call->reply[0];
247 const __be32 *bp; 279 const __be32 *bp;
248 int ret; 280 int ret;
249 281
250 _enter("");
251
252 ret = afs_transfer_reply(call); 282 ret = afs_transfer_reply(call);
253 if (ret < 0) 283 if (ret < 0)
254 return ret; 284 return ret;
255 285
286 _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
287
256 /* unmarshall the reply once we've received all of it */ 288 /* unmarshall the reply once we've received all of it */
257 bp = call->buffer; 289 bp = call->buffer;
258 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); 290 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
259 xdr_decode_AFSCallBack(&bp, vnode); 291 xdr_decode_AFSCallBack(call, vnode, &bp);
260 if (call->reply2) 292 if (call->reply[1])
261 xdr_decode_AFSVolSync(&bp, call->reply2); 293 xdr_decode_AFSVolSync(&bp, call->reply[1]);
262 294
263 _leave(" = 0 [done]"); 295 _leave(" = 0 [done]");
264 return 0; 296 return 0;
@@ -269,35 +301,33 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
269 */ 301 */
270static const struct afs_call_type afs_RXFSFetchStatus = { 302static const struct afs_call_type afs_RXFSFetchStatus = {
271 .name = "FS.FetchStatus", 303 .name = "FS.FetchStatus",
304 .op = afs_FS_FetchStatus,
272 .deliver = afs_deliver_fs_fetch_status, 305 .deliver = afs_deliver_fs_fetch_status,
273 .abort_to_error = afs_abort_to_error,
274 .destructor = afs_flat_call_destructor, 306 .destructor = afs_flat_call_destructor,
275}; 307};
276 308
277/* 309/*
278 * fetch the status information for a file 310 * fetch the status information for a file
279 */ 311 */
280int afs_fs_fetch_file_status(struct afs_server *server, 312int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync)
281 struct key *key,
282 struct afs_vnode *vnode,
283 struct afs_volsync *volsync,
284 bool async)
285{ 313{
314 struct afs_vnode *vnode = fc->vnode;
286 struct afs_call *call; 315 struct afs_call *call;
316 struct afs_net *net = afs_v2net(vnode);
287 __be32 *bp; 317 __be32 *bp;
288 318
289 _enter(",%x,{%x:%u},,", 319 _enter(",%x,{%x:%u},,",
290 key_serial(key), vnode->fid.vid, vnode->fid.vnode); 320 key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
291 321
292 call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); 322 call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
293 if (!call) 323 if (!call) {
324 fc->ac.error = -ENOMEM;
294 return -ENOMEM; 325 return -ENOMEM;
326 }
295 327
296 call->key = key; 328 call->key = fc->key;
297 call->reply = vnode; 329 call->reply[0] = vnode;
298 call->reply2 = volsync; 330 call->reply[1] = volsync;
299 call->service_id = FS_SERVICE;
300 call->port = htons(AFS_FS_PORT);
301 331
302 /* marshall the parameters */ 332 /* marshall the parameters */
303 bp = call->request; 333 bp = call->request;
@@ -306,7 +336,10 @@ int afs_fs_fetch_file_status(struct afs_server *server,
306 bp[2] = htonl(vnode->fid.vnode); 336 bp[2] = htonl(vnode->fid.vnode);
307 bp[3] = htonl(vnode->fid.unique); 337 bp[3] = htonl(vnode->fid.unique);
308 338
309 return afs_make_call(&server->addr, call, GFP_NOFS, async); 339 call->cb_break = fc->cb_break;
340 afs_use_fs_server(call, fc->cbi);
341 trace_afs_make_fs_call(call, &vnode->fid);
342 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
310} 343}
311 344
312/* 345/*
@@ -314,8 +347,8 @@ int afs_fs_fetch_file_status(struct afs_server *server,
314 */ 347 */
315static int afs_deliver_fs_fetch_data(struct afs_call *call) 348static int afs_deliver_fs_fetch_data(struct afs_call *call)
316{ 349{
317 struct afs_vnode *vnode = call->reply; 350 struct afs_vnode *vnode = call->reply[0];
318 struct afs_read *req = call->reply3; 351 struct afs_read *req = call->reply[2];
319 const __be32 *bp; 352 const __be32 *bp;
320 unsigned int size; 353 unsigned int size;
321 void *buffer; 354 void *buffer;
@@ -431,9 +464,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
431 464
432 bp = call->buffer; 465 bp = call->buffer;
433 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); 466 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
434 xdr_decode_AFSCallBack(&bp, vnode); 467 xdr_decode_AFSCallBack(call, vnode, &bp);
435 if (call->reply2) 468 if (call->reply[1])
436 xdr_decode_AFSVolSync(&bp, call->reply2); 469 xdr_decode_AFSVolSync(&bp, call->reply[1]);
437 470
438 call->offset = 0; 471 call->offset = 0;
439 call->unmarshall++; 472 call->unmarshall++;
@@ -457,7 +490,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
457 490
458static void afs_fetch_data_destructor(struct afs_call *call) 491static void afs_fetch_data_destructor(struct afs_call *call)
459{ 492{
460 struct afs_read *req = call->reply3; 493 struct afs_read *req = call->reply[2];
461 494
462 afs_put_read(req); 495 afs_put_read(req);
463 afs_flat_call_destructor(call); 496 afs_flat_call_destructor(call);
@@ -468,43 +501,38 @@ static void afs_fetch_data_destructor(struct afs_call *call)
468 */ 501 */
469static const struct afs_call_type afs_RXFSFetchData = { 502static const struct afs_call_type afs_RXFSFetchData = {
470 .name = "FS.FetchData", 503 .name = "FS.FetchData",
504 .op = afs_FS_FetchData,
471 .deliver = afs_deliver_fs_fetch_data, 505 .deliver = afs_deliver_fs_fetch_data,
472 .abort_to_error = afs_abort_to_error,
473 .destructor = afs_fetch_data_destructor, 506 .destructor = afs_fetch_data_destructor,
474}; 507};
475 508
476static const struct afs_call_type afs_RXFSFetchData64 = { 509static const struct afs_call_type afs_RXFSFetchData64 = {
477 .name = "FS.FetchData64", 510 .name = "FS.FetchData64",
511 .op = afs_FS_FetchData64,
478 .deliver = afs_deliver_fs_fetch_data, 512 .deliver = afs_deliver_fs_fetch_data,
479 .abort_to_error = afs_abort_to_error,
480 .destructor = afs_fetch_data_destructor, 513 .destructor = afs_fetch_data_destructor,
481}; 514};
482 515
483/* 516/*
484 * fetch data from a very large file 517 * fetch data from a very large file
485 */ 518 */
486static int afs_fs_fetch_data64(struct afs_server *server, 519static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
487 struct key *key,
488 struct afs_vnode *vnode,
489 struct afs_read *req,
490 bool async)
491{ 520{
521 struct afs_vnode *vnode = fc->vnode;
492 struct afs_call *call; 522 struct afs_call *call;
523 struct afs_net *net = afs_v2net(vnode);
493 __be32 *bp; 524 __be32 *bp;
494 525
495 _enter(""); 526 _enter("");
496 527
497 call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); 528 call = afs_alloc_flat_call(net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
498 if (!call) 529 if (!call)
499 return -ENOMEM; 530 return -ENOMEM;
500 531
501 call->key = key; 532 call->key = fc->key;
502 call->reply = vnode; 533 call->reply[0] = vnode;
503 call->reply2 = NULL; /* volsync */ 534 call->reply[1] = NULL; /* volsync */
504 call->reply3 = req; 535 call->reply[2] = req;
505 call->service_id = FS_SERVICE;
506 call->port = htons(AFS_FS_PORT);
507 call->operation_ID = FSFETCHDATA64;
508 536
509 /* marshall the parameters */ 537 /* marshall the parameters */
510 bp = call->request; 538 bp = call->request;
@@ -518,39 +546,37 @@ static int afs_fs_fetch_data64(struct afs_server *server,
518 bp[7] = htonl(lower_32_bits(req->len)); 546 bp[7] = htonl(lower_32_bits(req->len));
519 547
520 atomic_inc(&req->usage); 548 atomic_inc(&req->usage);
521 return afs_make_call(&server->addr, call, GFP_NOFS, async); 549 call->cb_break = fc->cb_break;
550 afs_use_fs_server(call, fc->cbi);
551 trace_afs_make_fs_call(call, &vnode->fid);
552 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
522} 553}
523 554
524/* 555/*
525 * fetch data from a file 556 * fetch data from a file
526 */ 557 */
527int afs_fs_fetch_data(struct afs_server *server, 558int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
528 struct key *key,
529 struct afs_vnode *vnode,
530 struct afs_read *req,
531 bool async)
532{ 559{
560 struct afs_vnode *vnode = fc->vnode;
533 struct afs_call *call; 561 struct afs_call *call;
562 struct afs_net *net = afs_v2net(vnode);
534 __be32 *bp; 563 __be32 *bp;
535 564
536 if (upper_32_bits(req->pos) || 565 if (upper_32_bits(req->pos) ||
537 upper_32_bits(req->len) || 566 upper_32_bits(req->len) ||
538 upper_32_bits(req->pos + req->len)) 567 upper_32_bits(req->pos + req->len))
539 return afs_fs_fetch_data64(server, key, vnode, req, async); 568 return afs_fs_fetch_data64(fc, req);
540 569
541 _enter(""); 570 _enter("");
542 571
543 call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); 572 call = afs_alloc_flat_call(net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
544 if (!call) 573 if (!call)
545 return -ENOMEM; 574 return -ENOMEM;
546 575
547 call->key = key; 576 call->key = fc->key;
548 call->reply = vnode; 577 call->reply[0] = vnode;
549 call->reply2 = NULL; /* volsync */ 578 call->reply[1] = NULL; /* volsync */
550 call->reply3 = req; 579 call->reply[2] = req;
551 call->service_id = FS_SERVICE;
552 call->port = htons(AFS_FS_PORT);
553 call->operation_ID = FSFETCHDATA;
554 580
555 /* marshall the parameters */ 581 /* marshall the parameters */
556 bp = call->request; 582 bp = call->request;
@@ -562,90 +588,10 @@ int afs_fs_fetch_data(struct afs_server *server,
562 bp[5] = htonl(lower_32_bits(req->len)); 588 bp[5] = htonl(lower_32_bits(req->len));
563 589
564 atomic_inc(&req->usage); 590 atomic_inc(&req->usage);
565 return afs_make_call(&server->addr, call, GFP_NOFS, async); 591 call->cb_break = fc->cb_break;
566} 592 afs_use_fs_server(call, fc->cbi);
567 593 trace_afs_make_fs_call(call, &vnode->fid);
568/* 594 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
569 * deliver reply data to an FS.GiveUpCallBacks
570 */
571static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
572{
573 _enter("");
574
575 /* shouldn't be any reply data */
576 return afs_extract_data(call, NULL, 0, false);
577}
578
579/*
580 * FS.GiveUpCallBacks operation type
581 */
582static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
583 .name = "FS.GiveUpCallBacks",
584 .deliver = afs_deliver_fs_give_up_callbacks,
585 .abort_to_error = afs_abort_to_error,
586 .destructor = afs_flat_call_destructor,
587};
588
589/*
590 * give up a set of callbacks
591 * - the callbacks are held in the server->cb_break ring
592 */
593int afs_fs_give_up_callbacks(struct afs_server *server,
594 bool async)
595{
596 struct afs_call *call;
597 size_t ncallbacks;
598 __be32 *bp, *tp;
599 int loop;
600
601 ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail,
602 ARRAY_SIZE(server->cb_break));
603
604 _enter("{%zu},", ncallbacks);
605
606 if (ncallbacks == 0)
607 return 0;
608 if (ncallbacks > AFSCBMAX)
609 ncallbacks = AFSCBMAX;
610
611 _debug("break %zu callbacks", ncallbacks);
612
613 call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks,
614 12 + ncallbacks * 6 * 4, 0);
615 if (!call)
616 return -ENOMEM;
617
618 call->service_id = FS_SERVICE;
619 call->port = htons(AFS_FS_PORT);
620
621 /* marshall the parameters */
622 bp = call->request;
623 tp = bp + 2 + ncallbacks * 3;
624 *bp++ = htonl(FSGIVEUPCALLBACKS);
625 *bp++ = htonl(ncallbacks);
626 *tp++ = htonl(ncallbacks);
627
628 atomic_sub(ncallbacks, &server->cb_break_n);
629 for (loop = ncallbacks; loop > 0; loop--) {
630 struct afs_callback *cb =
631 &server->cb_break[server->cb_break_tail];
632
633 *bp++ = htonl(cb->fid.vid);
634 *bp++ = htonl(cb->fid.vnode);
635 *bp++ = htonl(cb->fid.unique);
636 *tp++ = htonl(cb->version);
637 *tp++ = htonl(cb->expiry);
638 *tp++ = htonl(cb->type);
639 smp_mb();
640 server->cb_break_tail =
641 (server->cb_break_tail + 1) &
642 (ARRAY_SIZE(server->cb_break) - 1);
643 }
644
645 ASSERT(ncallbacks > 0);
646 wake_up_nr(&server->cb_break_waitq, ncallbacks);
647
648 return afs_make_call(&server->addr, call, GFP_NOFS, async);
649} 595}
650 596
651/* 597/*
@@ -653,7 +599,7 @@ int afs_fs_give_up_callbacks(struct afs_server *server,
653 */ 599 */
654static int afs_deliver_fs_create_vnode(struct afs_call *call) 600static int afs_deliver_fs_create_vnode(struct afs_call *call)
655{ 601{
656 struct afs_vnode *vnode = call->reply; 602 struct afs_vnode *vnode = call->reply[0];
657 const __be32 *bp; 603 const __be32 *bp;
658 int ret; 604 int ret;
659 605
@@ -665,11 +611,11 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
665 611
666 /* unmarshall the reply once we've received all of it */ 612 /* unmarshall the reply once we've received all of it */
667 bp = call->buffer; 613 bp = call->buffer;
668 xdr_decode_AFSFid(&bp, call->reply2); 614 xdr_decode_AFSFid(&bp, call->reply[1]);
669 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); 615 xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
670 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); 616 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
671 xdr_decode_AFSCallBack_raw(&bp, call->reply4); 617 xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
672 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 618 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
673 619
674 _leave(" = 0 [done]"); 620 _leave(" = 0 [done]");
675 return 0; 621 return 0;
@@ -678,27 +624,33 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
678/* 624/*
679 * FS.CreateFile and FS.MakeDir operation type 625 * FS.CreateFile and FS.MakeDir operation type
680 */ 626 */
681static const struct afs_call_type afs_RXFSCreateXXXX = { 627static const struct afs_call_type afs_RXFSCreateFile = {
682 .name = "FS.CreateXXXX", 628 .name = "FS.CreateFile",
629 .op = afs_FS_CreateFile,
630 .deliver = afs_deliver_fs_create_vnode,
631 .destructor = afs_flat_call_destructor,
632};
633
634static const struct afs_call_type afs_RXFSMakeDir = {
635 .name = "FS.MakeDir",
636 .op = afs_FS_MakeDir,
683 .deliver = afs_deliver_fs_create_vnode, 637 .deliver = afs_deliver_fs_create_vnode,
684 .abort_to_error = afs_abort_to_error,
685 .destructor = afs_flat_call_destructor, 638 .destructor = afs_flat_call_destructor,
686}; 639};
687 640
688/* 641/*
689 * create a file or make a directory 642 * create a file or make a directory
690 */ 643 */
691int afs_fs_create(struct afs_server *server, 644int afs_fs_create(struct afs_fs_cursor *fc,
692 struct key *key,
693 struct afs_vnode *vnode,
694 const char *name, 645 const char *name,
695 umode_t mode, 646 umode_t mode,
696 struct afs_fid *newfid, 647 struct afs_fid *newfid,
697 struct afs_file_status *newstatus, 648 struct afs_file_status *newstatus,
698 struct afs_callback *newcb, 649 struct afs_callback *newcb)
699 bool async)
700{ 650{
651 struct afs_vnode *vnode = fc->vnode;
701 struct afs_call *call; 652 struct afs_call *call;
653 struct afs_net *net = afs_v2net(vnode);
702 size_t namesz, reqsz, padsz; 654 size_t namesz, reqsz, padsz;
703 __be32 *bp; 655 __be32 *bp;
704 656
@@ -708,18 +660,17 @@ int afs_fs_create(struct afs_server *server,
708 padsz = (4 - (namesz & 3)) & 3; 660 padsz = (4 - (namesz & 3)) & 3;
709 reqsz = (5 * 4) + namesz + padsz + (6 * 4); 661 reqsz = (5 * 4) + namesz + padsz + (6 * 4);
710 662
711 call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz, 663 call = afs_alloc_flat_call(
712 (3 + 21 + 21 + 3 + 6) * 4); 664 net, S_ISDIR(mode) ? &afs_RXFSMakeDir : &afs_RXFSCreateFile,
665 reqsz, (3 + 21 + 21 + 3 + 6) * 4);
713 if (!call) 666 if (!call)
714 return -ENOMEM; 667 return -ENOMEM;
715 668
716 call->key = key; 669 call->key = fc->key;
717 call->reply = vnode; 670 call->reply[0] = vnode;
718 call->reply2 = newfid; 671 call->reply[1] = newfid;
719 call->reply3 = newstatus; 672 call->reply[2] = newstatus;
720 call->reply4 = newcb; 673 call->reply[3] = newcb;
721 call->service_id = FS_SERVICE;
722 call->port = htons(AFS_FS_PORT);
723 674
724 /* marshall the parameters */ 675 /* marshall the parameters */
725 bp = call->request; 676 bp = call->request;
@@ -741,7 +692,9 @@ int afs_fs_create(struct afs_server *server,
741 *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ 692 *bp++ = htonl(mode & S_IALLUGO); /* unix mode */
742 *bp++ = 0; /* segment size */ 693 *bp++ = 0; /* segment size */
743 694
744 return afs_make_call(&server->addr, call, GFP_NOFS, async); 695 afs_use_fs_server(call, fc->cbi);
696 trace_afs_make_fs_call(call, &vnode->fid);
697 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
745} 698}
746 699
747/* 700/*
@@ -749,7 +702,7 @@ int afs_fs_create(struct afs_server *server,
749 */ 702 */
750static int afs_deliver_fs_remove(struct afs_call *call) 703static int afs_deliver_fs_remove(struct afs_call *call)
751{ 704{
752 struct afs_vnode *vnode = call->reply; 705 struct afs_vnode *vnode = call->reply[0];
753 const __be32 *bp; 706 const __be32 *bp;
754 int ret; 707 int ret;
755 708
@@ -762,7 +715,7 @@ static int afs_deliver_fs_remove(struct afs_call *call)
762 /* unmarshall the reply once we've received all of it */ 715 /* unmarshall the reply once we've received all of it */
763 bp = call->buffer; 716 bp = call->buffer;
764 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); 717 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
765 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 718 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
766 719
767 _leave(" = 0 [done]"); 720 _leave(" = 0 [done]");
768 return 0; 721 return 0;
@@ -771,24 +724,28 @@ static int afs_deliver_fs_remove(struct afs_call *call)
771/* 724/*
772 * FS.RemoveDir/FS.RemoveFile operation type 725 * FS.RemoveDir/FS.RemoveFile operation type
773 */ 726 */
774static const struct afs_call_type afs_RXFSRemoveXXXX = { 727static const struct afs_call_type afs_RXFSRemoveFile = {
775 .name = "FS.RemoveXXXX", 728 .name = "FS.RemoveFile",
729 .op = afs_FS_RemoveFile,
730 .deliver = afs_deliver_fs_remove,
731 .destructor = afs_flat_call_destructor,
732};
733
734static const struct afs_call_type afs_RXFSRemoveDir = {
735 .name = "FS.RemoveDir",
736 .op = afs_FS_RemoveDir,
776 .deliver = afs_deliver_fs_remove, 737 .deliver = afs_deliver_fs_remove,
777 .abort_to_error = afs_abort_to_error,
778 .destructor = afs_flat_call_destructor, 738 .destructor = afs_flat_call_destructor,
779}; 739};
780 740
781/* 741/*
782 * remove a file or directory 742 * remove a file or directory
783 */ 743 */
784int afs_fs_remove(struct afs_server *server, 744int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir)
785 struct key *key,
786 struct afs_vnode *vnode,
787 const char *name,
788 bool isdir,
789 bool async)
790{ 745{
746 struct afs_vnode *vnode = fc->vnode;
791 struct afs_call *call; 747 struct afs_call *call;
748 struct afs_net *net = afs_v2net(vnode);
792 size_t namesz, reqsz, padsz; 749 size_t namesz, reqsz, padsz;
793 __be32 *bp; 750 __be32 *bp;
794 751
@@ -798,14 +755,14 @@ int afs_fs_remove(struct afs_server *server,
798 padsz = (4 - (namesz & 3)) & 3; 755 padsz = (4 - (namesz & 3)) & 3;
799 reqsz = (5 * 4) + namesz + padsz; 756 reqsz = (5 * 4) + namesz + padsz;
800 757
801 call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4); 758 call = afs_alloc_flat_call(
759 net, isdir ? &afs_RXFSRemoveDir : &afs_RXFSRemoveFile,
760 reqsz, (21 + 6) * 4);
802 if (!call) 761 if (!call)
803 return -ENOMEM; 762 return -ENOMEM;
804 763
805 call->key = key; 764 call->key = fc->key;
806 call->reply = vnode; 765 call->reply[0] = vnode;
807 call->service_id = FS_SERVICE;
808 call->port = htons(AFS_FS_PORT);
809 766
810 /* marshall the parameters */ 767 /* marshall the parameters */
811 bp = call->request; 768 bp = call->request;
@@ -821,7 +778,9 @@ int afs_fs_remove(struct afs_server *server,
821 bp = (void *) bp + padsz; 778 bp = (void *) bp + padsz;
822 } 779 }
823 780
824 return afs_make_call(&server->addr, call, GFP_NOFS, async); 781 afs_use_fs_server(call, fc->cbi);
782 trace_afs_make_fs_call(call, &vnode->fid);
783 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
825} 784}
826 785
827/* 786/*
@@ -829,7 +788,7 @@ int afs_fs_remove(struct afs_server *server,
829 */ 788 */
830static int afs_deliver_fs_link(struct afs_call *call) 789static int afs_deliver_fs_link(struct afs_call *call)
831{ 790{
832 struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; 791 struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
833 const __be32 *bp; 792 const __be32 *bp;
834 int ret; 793 int ret;
835 794
@@ -843,7 +802,7 @@ static int afs_deliver_fs_link(struct afs_call *call)
843 bp = call->buffer; 802 bp = call->buffer;
844 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); 803 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
845 xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); 804 xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL);
846 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 805 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
847 806
848 _leave(" = 0 [done]"); 807 _leave(" = 0 [done]");
849 return 0; 808 return 0;
@@ -854,22 +813,20 @@ static int afs_deliver_fs_link(struct afs_call *call)
854 */ 813 */
855static const struct afs_call_type afs_RXFSLink = { 814static const struct afs_call_type afs_RXFSLink = {
856 .name = "FS.Link", 815 .name = "FS.Link",
816 .op = afs_FS_Link,
857 .deliver = afs_deliver_fs_link, 817 .deliver = afs_deliver_fs_link,
858 .abort_to_error = afs_abort_to_error,
859 .destructor = afs_flat_call_destructor, 818 .destructor = afs_flat_call_destructor,
860}; 819};
861 820
862/* 821/*
863 * make a hard link 822 * make a hard link
864 */ 823 */
865int afs_fs_link(struct afs_server *server, 824int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
866 struct key *key, 825 const char *name)
867 struct afs_vnode *dvnode,
868 struct afs_vnode *vnode,
869 const char *name,
870 bool async)
871{ 826{
827 struct afs_vnode *dvnode = fc->vnode;
872 struct afs_call *call; 828 struct afs_call *call;
829 struct afs_net *net = afs_v2net(vnode);
873 size_t namesz, reqsz, padsz; 830 size_t namesz, reqsz, padsz;
874 __be32 *bp; 831 __be32 *bp;
875 832
@@ -879,15 +836,13 @@ int afs_fs_link(struct afs_server *server,
879 padsz = (4 - (namesz & 3)) & 3; 836 padsz = (4 - (namesz & 3)) & 3;
880 reqsz = (5 * 4) + namesz + padsz + (3 * 4); 837 reqsz = (5 * 4) + namesz + padsz + (3 * 4);
881 838
882 call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); 839 call = afs_alloc_flat_call(net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
883 if (!call) 840 if (!call)
884 return -ENOMEM; 841 return -ENOMEM;
885 842
886 call->key = key; 843 call->key = fc->key;
887 call->reply = dvnode; 844 call->reply[0] = dvnode;
888 call->reply2 = vnode; 845 call->reply[1] = vnode;
889 call->service_id = FS_SERVICE;
890 call->port = htons(AFS_FS_PORT);
891 846
892 /* marshall the parameters */ 847 /* marshall the parameters */
893 bp = call->request; 848 bp = call->request;
@@ -906,7 +861,9 @@ int afs_fs_link(struct afs_server *server,
906 *bp++ = htonl(vnode->fid.vnode); 861 *bp++ = htonl(vnode->fid.vnode);
907 *bp++ = htonl(vnode->fid.unique); 862 *bp++ = htonl(vnode->fid.unique);
908 863
909 return afs_make_call(&server->addr, call, GFP_NOFS, async); 864 afs_use_fs_server(call, fc->cbi);
865 trace_afs_make_fs_call(call, &vnode->fid);
866 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
910} 867}
911 868
912/* 869/*
@@ -914,7 +871,7 @@ int afs_fs_link(struct afs_server *server,
914 */ 871 */
915static int afs_deliver_fs_symlink(struct afs_call *call) 872static int afs_deliver_fs_symlink(struct afs_call *call)
916{ 873{
917 struct afs_vnode *vnode = call->reply; 874 struct afs_vnode *vnode = call->reply[0];
918 const __be32 *bp; 875 const __be32 *bp;
919 int ret; 876 int ret;
920 877
@@ -926,10 +883,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
926 883
927 /* unmarshall the reply once we've received all of it */ 884 /* unmarshall the reply once we've received all of it */
928 bp = call->buffer; 885 bp = call->buffer;
929 xdr_decode_AFSFid(&bp, call->reply2); 886 xdr_decode_AFSFid(&bp, call->reply[1]);
930 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); 887 xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
931 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); 888 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
932 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 889 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
933 890
934 _leave(" = 0 [done]"); 891 _leave(" = 0 [done]");
935 return 0; 892 return 0;
@@ -940,24 +897,23 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
940 */ 897 */
941static const struct afs_call_type afs_RXFSSymlink = { 898static const struct afs_call_type afs_RXFSSymlink = {
942 .name = "FS.Symlink", 899 .name = "FS.Symlink",
900 .op = afs_FS_Symlink,
943 .deliver = afs_deliver_fs_symlink, 901 .deliver = afs_deliver_fs_symlink,
944 .abort_to_error = afs_abort_to_error,
945 .destructor = afs_flat_call_destructor, 902 .destructor = afs_flat_call_destructor,
946}; 903};
947 904
948/* 905/*
949 * create a symbolic link 906 * create a symbolic link
950 */ 907 */
951int afs_fs_symlink(struct afs_server *server, 908int afs_fs_symlink(struct afs_fs_cursor *fc,
952 struct key *key,
953 struct afs_vnode *vnode,
954 const char *name, 909 const char *name,
955 const char *contents, 910 const char *contents,
956 struct afs_fid *newfid, 911 struct afs_fid *newfid,
957 struct afs_file_status *newstatus, 912 struct afs_file_status *newstatus)
958 bool async)
959{ 913{
914 struct afs_vnode *vnode = fc->vnode;
960 struct afs_call *call; 915 struct afs_call *call;
916 struct afs_net *net = afs_v2net(vnode);
961 size_t namesz, reqsz, padsz, c_namesz, c_padsz; 917 size_t namesz, reqsz, padsz, c_namesz, c_padsz;
962 __be32 *bp; 918 __be32 *bp;
963 919
@@ -971,17 +927,15 @@ int afs_fs_symlink(struct afs_server *server,
971 927
972 reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4); 928 reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
973 929
974 call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz, 930 call = afs_alloc_flat_call(net, &afs_RXFSSymlink, reqsz,
975 (3 + 21 + 21 + 6) * 4); 931 (3 + 21 + 21 + 6) * 4);
976 if (!call) 932 if (!call)
977 return -ENOMEM; 933 return -ENOMEM;
978 934
979 call->key = key; 935 call->key = fc->key;
980 call->reply = vnode; 936 call->reply[0] = vnode;
981 call->reply2 = newfid; 937 call->reply[1] = newfid;
982 call->reply3 = newstatus; 938 call->reply[2] = newstatus;
983 call->service_id = FS_SERVICE;
984 call->port = htons(AFS_FS_PORT);
985 939
986 /* marshall the parameters */ 940 /* marshall the parameters */
987 bp = call->request; 941 bp = call->request;
@@ -1010,7 +964,9 @@ int afs_fs_symlink(struct afs_server *server,
1010 *bp++ = htonl(S_IRWXUGO); /* unix mode */ 964 *bp++ = htonl(S_IRWXUGO); /* unix mode */
1011 *bp++ = 0; /* segment size */ 965 *bp++ = 0; /* segment size */
1012 966
1013 return afs_make_call(&server->addr, call, GFP_NOFS, async); 967 afs_use_fs_server(call, fc->cbi);
968 trace_afs_make_fs_call(call, &vnode->fid);
969 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1014} 970}
1015 971
1016/* 972/*
@@ -1018,7 +974,7 @@ int afs_fs_symlink(struct afs_server *server,
1018 */ 974 */
1019static int afs_deliver_fs_rename(struct afs_call *call) 975static int afs_deliver_fs_rename(struct afs_call *call)
1020{ 976{
1021 struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; 977 struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1];
1022 const __be32 *bp; 978 const __be32 *bp;
1023 int ret; 979 int ret;
1024 980
@@ -1034,7 +990,7 @@ static int afs_deliver_fs_rename(struct afs_call *call)
1034 if (new_dvnode != orig_dvnode) 990 if (new_dvnode != orig_dvnode)
1035 xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, 991 xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode,
1036 NULL); 992 NULL);
1037 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 993 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
1038 994
1039 _leave(" = 0 [done]"); 995 _leave(" = 0 [done]");
1040 return 0; 996 return 0;
@@ -1045,23 +1001,22 @@ static int afs_deliver_fs_rename(struct afs_call *call)
1045 */ 1001 */
1046static const struct afs_call_type afs_RXFSRename = { 1002static const struct afs_call_type afs_RXFSRename = {
1047 .name = "FS.Rename", 1003 .name = "FS.Rename",
1004 .op = afs_FS_Rename,
1048 .deliver = afs_deliver_fs_rename, 1005 .deliver = afs_deliver_fs_rename,
1049 .abort_to_error = afs_abort_to_error,
1050 .destructor = afs_flat_call_destructor, 1006 .destructor = afs_flat_call_destructor,
1051}; 1007};
1052 1008
1053/* 1009/*
1054 * create a symbolic link 1010 * create a symbolic link
1055 */ 1011 */
1056int afs_fs_rename(struct afs_server *server, 1012int afs_fs_rename(struct afs_fs_cursor *fc,
1057 struct key *key,
1058 struct afs_vnode *orig_dvnode,
1059 const char *orig_name, 1013 const char *orig_name,
1060 struct afs_vnode *new_dvnode, 1014 struct afs_vnode *new_dvnode,
1061 const char *new_name, 1015 const char *new_name)
1062 bool async)
1063{ 1016{
1017 struct afs_vnode *orig_dvnode = fc->vnode;
1064 struct afs_call *call; 1018 struct afs_call *call;
1019 struct afs_net *net = afs_v2net(orig_dvnode);
1065 size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; 1020 size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
1066 __be32 *bp; 1021 __be32 *bp;
1067 1022
@@ -1078,15 +1033,13 @@ int afs_fs_rename(struct afs_server *server,
1078 (3 * 4) + 1033 (3 * 4) +
1079 4 + n_namesz + n_padsz; 1034 4 + n_namesz + n_padsz;
1080 1035
1081 call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); 1036 call = afs_alloc_flat_call(net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
1082 if (!call) 1037 if (!call)
1083 return -ENOMEM; 1038 return -ENOMEM;
1084 1039
1085 call->key = key; 1040 call->key = fc->key;
1086 call->reply = orig_dvnode; 1041 call->reply[0] = orig_dvnode;
1087 call->reply2 = new_dvnode; 1042 call->reply[1] = new_dvnode;
1088 call->service_id = FS_SERVICE;
1089 call->port = htons(AFS_FS_PORT);
1090 1043
1091 /* marshall the parameters */ 1044 /* marshall the parameters */
1092 bp = call->request; 1045 bp = call->request;
@@ -1113,7 +1066,9 @@ int afs_fs_rename(struct afs_server *server,
1113 bp = (void *) bp + n_padsz; 1066 bp = (void *) bp + n_padsz;
1114 } 1067 }
1115 1068
1116 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1069 afs_use_fs_server(call, fc->cbi);
1070 trace_afs_make_fs_call(call, &orig_dvnode->fid);
1071 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1117} 1072}
1118 1073
1119/* 1074/*
@@ -1121,7 +1076,7 @@ int afs_fs_rename(struct afs_server *server,
1121 */ 1076 */
1122static int afs_deliver_fs_store_data(struct afs_call *call) 1077static int afs_deliver_fs_store_data(struct afs_call *call)
1123{ 1078{
1124 struct afs_vnode *vnode = call->reply; 1079 struct afs_vnode *vnode = call->reply[0];
1125 const __be32 *bp; 1080 const __be32 *bp;
1126 int ret; 1081 int ret;
1127 1082
@@ -1135,7 +1090,7 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
1135 bp = call->buffer; 1090 bp = call->buffer;
1136 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, 1091 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode,
1137 &call->store_version); 1092 &call->store_version);
1138 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 1093 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
1139 1094
1140 afs_pages_written_back(vnode, call); 1095 afs_pages_written_back(vnode, call);
1141 1096
@@ -1148,47 +1103,44 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
1148 */ 1103 */
1149static const struct afs_call_type afs_RXFSStoreData = { 1104static const struct afs_call_type afs_RXFSStoreData = {
1150 .name = "FS.StoreData", 1105 .name = "FS.StoreData",
1106 .op = afs_FS_StoreData,
1151 .deliver = afs_deliver_fs_store_data, 1107 .deliver = afs_deliver_fs_store_data,
1152 .abort_to_error = afs_abort_to_error,
1153 .destructor = afs_flat_call_destructor, 1108 .destructor = afs_flat_call_destructor,
1154}; 1109};
1155 1110
1156static const struct afs_call_type afs_RXFSStoreData64 = { 1111static const struct afs_call_type afs_RXFSStoreData64 = {
1157 .name = "FS.StoreData64", 1112 .name = "FS.StoreData64",
1113 .op = afs_FS_StoreData64,
1158 .deliver = afs_deliver_fs_store_data, 1114 .deliver = afs_deliver_fs_store_data,
1159 .abort_to_error = afs_abort_to_error,
1160 .destructor = afs_flat_call_destructor, 1115 .destructor = afs_flat_call_destructor,
1161}; 1116};
1162 1117
1163/* 1118/*
1164 * store a set of pages to a very large file 1119 * store a set of pages to a very large file
1165 */ 1120 */
1166static int afs_fs_store_data64(struct afs_server *server, 1121static int afs_fs_store_data64(struct afs_fs_cursor *fc,
1167 struct afs_writeback *wb, 1122 struct address_space *mapping,
1168 pgoff_t first, pgoff_t last, 1123 pgoff_t first, pgoff_t last,
1169 unsigned offset, unsigned to, 1124 unsigned offset, unsigned to,
1170 loff_t size, loff_t pos, loff_t i_size, 1125 loff_t size, loff_t pos, loff_t i_size)
1171 bool async)
1172{ 1126{
1173 struct afs_vnode *vnode = wb->vnode; 1127 struct afs_vnode *vnode = fc->vnode;
1174 struct afs_call *call; 1128 struct afs_call *call;
1129 struct afs_net *net = afs_v2net(vnode);
1175 __be32 *bp; 1130 __be32 *bp;
1176 1131
1177 _enter(",%x,{%x:%u},,", 1132 _enter(",%x,{%x:%u},,",
1178 key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); 1133 key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
1179 1134
1180 call = afs_alloc_flat_call(&afs_RXFSStoreData64, 1135 call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
1181 (4 + 6 + 3 * 2) * 4, 1136 (4 + 6 + 3 * 2) * 4,
1182 (21 + 6) * 4); 1137 (21 + 6) * 4);
1183 if (!call) 1138 if (!call)
1184 return -ENOMEM; 1139 return -ENOMEM;
1185 1140
1186 call->wb = wb; 1141 call->key = fc->key;
1187 call->key = wb->key; 1142 call->mapping = mapping;
1188 call->reply = vnode; 1143 call->reply[0] = vnode;
1189 call->service_id = FS_SERVICE;
1190 call->port = htons(AFS_FS_PORT);
1191 call->mapping = vnode->vfs_inode.i_mapping;
1192 call->first = first; 1144 call->first = first;
1193 call->last = last; 1145 call->last = last;
1194 call->first_offset = offset; 1146 call->first_offset = offset;
@@ -1217,24 +1169,25 @@ static int afs_fs_store_data64(struct afs_server *server,
1217 *bp++ = htonl(i_size >> 32); 1169 *bp++ = htonl(i_size >> 32);
1218 *bp++ = htonl((u32) i_size); 1170 *bp++ = htonl((u32) i_size);
1219 1171
1220 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1172 trace_afs_make_fs_call(call, &vnode->fid);
1173 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1221} 1174}
1222 1175
1223/* 1176/*
1224 * store a set of pages 1177 * store a set of pages
1225 */ 1178 */
1226int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, 1179int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
1227 pgoff_t first, pgoff_t last, 1180 pgoff_t first, pgoff_t last,
1228 unsigned offset, unsigned to, 1181 unsigned offset, unsigned to)
1229 bool async)
1230{ 1182{
1231 struct afs_vnode *vnode = wb->vnode; 1183 struct afs_vnode *vnode = fc->vnode;
1232 struct afs_call *call; 1184 struct afs_call *call;
1185 struct afs_net *net = afs_v2net(vnode);
1233 loff_t size, pos, i_size; 1186 loff_t size, pos, i_size;
1234 __be32 *bp; 1187 __be32 *bp;
1235 1188
1236 _enter(",%x,{%x:%u},,", 1189 _enter(",%x,{%x:%u},,",
1237 key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); 1190 key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
1238 1191
1239 size = (loff_t)to - (loff_t)offset; 1192 size = (loff_t)to - (loff_t)offset;
1240 if (first != last) 1193 if (first != last)
@@ -1251,21 +1204,18 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
1251 (unsigned long long) i_size); 1204 (unsigned long long) i_size);
1252 1205
1253 if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) 1206 if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
1254 return afs_fs_store_data64(server, wb, first, last, offset, to, 1207 return afs_fs_store_data64(fc, mapping, first, last, offset, to,
1255 size, pos, i_size, async); 1208 size, pos, i_size);
1256 1209
1257 call = afs_alloc_flat_call(&afs_RXFSStoreData, 1210 call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
1258 (4 + 6 + 3) * 4, 1211 (4 + 6 + 3) * 4,
1259 (21 + 6) * 4); 1212 (21 + 6) * 4);
1260 if (!call) 1213 if (!call)
1261 return -ENOMEM; 1214 return -ENOMEM;
1262 1215
1263 call->wb = wb; 1216 call->key = fc->key;
1264 call->key = wb->key; 1217 call->mapping = mapping;
1265 call->reply = vnode; 1218 call->reply[0] = vnode;
1266 call->service_id = FS_SERVICE;
1267 call->port = htons(AFS_FS_PORT);
1268 call->mapping = vnode->vfs_inode.i_mapping;
1269 call->first = first; 1219 call->first = first;
1270 call->last = last; 1220 call->last = last;
1271 call->first_offset = offset; 1221 call->first_offset = offset;
@@ -1291,7 +1241,9 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
1291 *bp++ = htonl(size); 1241 *bp++ = htonl(size);
1292 *bp++ = htonl(i_size); 1242 *bp++ = htonl(i_size);
1293 1243
1294 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1244 afs_use_fs_server(call, fc->cbi);
1245 trace_afs_make_fs_call(call, &vnode->fid);
1246 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1295} 1247}
1296 1248
1297/* 1249/*
@@ -1300,7 +1252,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
1300static int afs_deliver_fs_store_status(struct afs_call *call) 1252static int afs_deliver_fs_store_status(struct afs_call *call)
1301{ 1253{
1302 afs_dataversion_t *store_version; 1254 afs_dataversion_t *store_version;
1303 struct afs_vnode *vnode = call->reply; 1255 struct afs_vnode *vnode = call->reply[0];
1304 const __be32 *bp; 1256 const __be32 *bp;
1305 int ret; 1257 int ret;
1306 1258
@@ -1317,7 +1269,7 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
1317 1269
1318 bp = call->buffer; 1270 bp = call->buffer;
1319 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); 1271 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version);
1320 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 1272 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
1321 1273
1322 _leave(" = 0 [done]"); 1274 _leave(" = 0 [done]");
1323 return 0; 1275 return 0;
@@ -1328,22 +1280,22 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
1328 */ 1280 */
1329static const struct afs_call_type afs_RXFSStoreStatus = { 1281static const struct afs_call_type afs_RXFSStoreStatus = {
1330 .name = "FS.StoreStatus", 1282 .name = "FS.StoreStatus",
1283 .op = afs_FS_StoreStatus,
1331 .deliver = afs_deliver_fs_store_status, 1284 .deliver = afs_deliver_fs_store_status,
1332 .abort_to_error = afs_abort_to_error,
1333 .destructor = afs_flat_call_destructor, 1285 .destructor = afs_flat_call_destructor,
1334}; 1286};
1335 1287
1336static const struct afs_call_type afs_RXFSStoreData_as_Status = { 1288static const struct afs_call_type afs_RXFSStoreData_as_Status = {
1337 .name = "FS.StoreData", 1289 .name = "FS.StoreData",
1290 .op = afs_FS_StoreData,
1338 .deliver = afs_deliver_fs_store_status, 1291 .deliver = afs_deliver_fs_store_status,
1339 .abort_to_error = afs_abort_to_error,
1340 .destructor = afs_flat_call_destructor, 1292 .destructor = afs_flat_call_destructor,
1341}; 1293};
1342 1294
1343static const struct afs_call_type afs_RXFSStoreData64_as_Status = { 1295static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
1344 .name = "FS.StoreData64", 1296 .name = "FS.StoreData64",
1297 .op = afs_FS_StoreData64,
1345 .deliver = afs_deliver_fs_store_status, 1298 .deliver = afs_deliver_fs_store_status,
1346 .abort_to_error = afs_abort_to_error,
1347 .destructor = afs_flat_call_destructor, 1299 .destructor = afs_flat_call_destructor,
1348}; 1300};
1349 1301
@@ -1351,30 +1303,27 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
1351 * set the attributes on a very large file, using FS.StoreData rather than 1303 * set the attributes on a very large file, using FS.StoreData rather than
1352 * FS.StoreStatus so as to alter the file size also 1304 * FS.StoreStatus so as to alter the file size also
1353 */ 1305 */
1354static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, 1306static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
1355 struct afs_vnode *vnode, struct iattr *attr,
1356 bool async)
1357{ 1307{
1308 struct afs_vnode *vnode = fc->vnode;
1358 struct afs_call *call; 1309 struct afs_call *call;
1310 struct afs_net *net = afs_v2net(vnode);
1359 __be32 *bp; 1311 __be32 *bp;
1360 1312
1361 _enter(",%x,{%x:%u},,", 1313 _enter(",%x,{%x:%u},,",
1362 key_serial(key), vnode->fid.vid, vnode->fid.vnode); 1314 key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
1363 1315
1364 ASSERT(attr->ia_valid & ATTR_SIZE); 1316 ASSERT(attr->ia_valid & ATTR_SIZE);
1365 1317
1366 call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status, 1318 call = afs_alloc_flat_call(net, &afs_RXFSStoreData64_as_Status,
1367 (4 + 6 + 3 * 2) * 4, 1319 (4 + 6 + 3 * 2) * 4,
1368 (21 + 6) * 4); 1320 (21 + 6) * 4);
1369 if (!call) 1321 if (!call)
1370 return -ENOMEM; 1322 return -ENOMEM;
1371 1323
1372 call->key = key; 1324 call->key = fc->key;
1373 call->reply = vnode; 1325 call->reply[0] = vnode;
1374 call->service_id = FS_SERVICE;
1375 call->port = htons(AFS_FS_PORT);
1376 call->store_version = vnode->status.data_version + 1; 1326 call->store_version = vnode->status.data_version + 1;
1377 call->operation_ID = FSSTOREDATA;
1378 1327
1379 /* marshall the parameters */ 1328 /* marshall the parameters */
1380 bp = call->request; 1329 bp = call->request;
@@ -1392,40 +1341,38 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
1392 *bp++ = htonl(attr->ia_size >> 32); /* new file length */ 1341 *bp++ = htonl(attr->ia_size >> 32); /* new file length */
1393 *bp++ = htonl((u32) attr->ia_size); 1342 *bp++ = htonl((u32) attr->ia_size);
1394 1343
1395 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1344 afs_use_fs_server(call, fc->cbi);
1345 trace_afs_make_fs_call(call, &vnode->fid);
1346 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1396} 1347}
1397 1348
1398/* 1349/*
1399 * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus 1350 * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
1400 * so as to alter the file size also 1351 * so as to alter the file size also
1401 */ 1352 */
1402static int afs_fs_setattr_size(struct afs_server *server, struct key *key, 1353static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
1403 struct afs_vnode *vnode, struct iattr *attr,
1404 bool async)
1405{ 1354{
1355 struct afs_vnode *vnode = fc->vnode;
1406 struct afs_call *call; 1356 struct afs_call *call;
1357 struct afs_net *net = afs_v2net(vnode);
1407 __be32 *bp; 1358 __be32 *bp;
1408 1359
1409 _enter(",%x,{%x:%u},,", 1360 _enter(",%x,{%x:%u},,",
1410 key_serial(key), vnode->fid.vid, vnode->fid.vnode); 1361 key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
1411 1362
1412 ASSERT(attr->ia_valid & ATTR_SIZE); 1363 ASSERT(attr->ia_valid & ATTR_SIZE);
1413 if (attr->ia_size >> 32) 1364 if (attr->ia_size >> 32)
1414 return afs_fs_setattr_size64(server, key, vnode, attr, 1365 return afs_fs_setattr_size64(fc, attr);
1415 async);
1416 1366
1417 call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status, 1367 call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status,
1418 (4 + 6 + 3) * 4, 1368 (4 + 6 + 3) * 4,
1419 (21 + 6) * 4); 1369 (21 + 6) * 4);
1420 if (!call) 1370 if (!call)
1421 return -ENOMEM; 1371 return -ENOMEM;
1422 1372
1423 call->key = key; 1373 call->key = fc->key;
1424 call->reply = vnode; 1374 call->reply[0] = vnode;
1425 call->service_id = FS_SERVICE;
1426 call->port = htons(AFS_FS_PORT);
1427 call->store_version = vnode->status.data_version + 1; 1375 call->store_version = vnode->status.data_version + 1;
1428 call->operation_ID = FSSTOREDATA;
1429 1376
1430 /* marshall the parameters */ 1377 /* marshall the parameters */
1431 bp = call->request; 1378 bp = call->request;
@@ -1440,38 +1387,36 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
1440 *bp++ = 0; /* size of write */ 1387 *bp++ = 0; /* size of write */
1441 *bp++ = htonl(attr->ia_size); /* new file length */ 1388 *bp++ = htonl(attr->ia_size); /* new file length */
1442 1389
1443 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1390 afs_use_fs_server(call, fc->cbi);
1391 trace_afs_make_fs_call(call, &vnode->fid);
1392 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1444} 1393}
1445 1394
1446/* 1395/*
1447 * set the attributes on a file, using FS.StoreData if there's a change in file 1396 * set the attributes on a file, using FS.StoreData if there's a change in file
1448 * size, and FS.StoreStatus otherwise 1397 * size, and FS.StoreStatus otherwise
1449 */ 1398 */
1450int afs_fs_setattr(struct afs_server *server, struct key *key, 1399int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
1451 struct afs_vnode *vnode, struct iattr *attr,
1452 bool async)
1453{ 1400{
1401 struct afs_vnode *vnode = fc->vnode;
1454 struct afs_call *call; 1402 struct afs_call *call;
1403 struct afs_net *net = afs_v2net(vnode);
1455 __be32 *bp; 1404 __be32 *bp;
1456 1405
1457 if (attr->ia_valid & ATTR_SIZE) 1406 if (attr->ia_valid & ATTR_SIZE)
1458 return afs_fs_setattr_size(server, key, vnode, attr, 1407 return afs_fs_setattr_size(fc, attr);
1459 async);
1460 1408
1461 _enter(",%x,{%x:%u},,", 1409 _enter(",%x,{%x:%u},,",
1462 key_serial(key), vnode->fid.vid, vnode->fid.vnode); 1410 key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
1463 1411
1464 call = afs_alloc_flat_call(&afs_RXFSStoreStatus, 1412 call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
1465 (4 + 6) * 4, 1413 (4 + 6) * 4,
1466 (21 + 6) * 4); 1414 (21 + 6) * 4);
1467 if (!call) 1415 if (!call)
1468 return -ENOMEM; 1416 return -ENOMEM;
1469 1417
1470 call->key = key; 1418 call->key = fc->key;
1471 call->reply = vnode; 1419 call->reply[0] = vnode;
1472 call->service_id = FS_SERVICE;
1473 call->port = htons(AFS_FS_PORT);
1474 call->operation_ID = FSSTORESTATUS;
1475 1420
1476 /* marshall the parameters */ 1421 /* marshall the parameters */
1477 bp = call->request; 1422 bp = call->request;
@@ -1482,7 +1427,9 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
1482 1427
1483 xdr_encode_AFS_StoreStatus(&bp, attr); 1428 xdr_encode_AFS_StoreStatus(&bp, attr);
1484 1429
1485 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1430 afs_use_fs_server(call, fc->cbi);
1431 trace_afs_make_fs_call(call, &vnode->fid);
1432 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1486} 1433}
1487 1434
1488/* 1435/*
@@ -1510,7 +1457,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1510 return ret; 1457 return ret;
1511 1458
1512 bp = call->buffer; 1459 bp = call->buffer;
1513 xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); 1460 xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
1514 call->offset = 0; 1461 call->offset = 0;
1515 call->unmarshall++; 1462 call->unmarshall++;
1516 1463
@@ -1531,13 +1478,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1531 case 3: 1478 case 3:
1532 _debug("extract volname"); 1479 _debug("extract volname");
1533 if (call->count > 0) { 1480 if (call->count > 0) {
1534 ret = afs_extract_data(call, call->reply3, 1481 ret = afs_extract_data(call, call->reply[2],
1535 call->count, true); 1482 call->count, true);
1536 if (ret < 0) 1483 if (ret < 0)
1537 return ret; 1484 return ret;
1538 } 1485 }
1539 1486
1540 p = call->reply3; 1487 p = call->reply[2];
1541 p[call->count] = 0; 1488 p[call->count] = 0;
1542 _debug("volname '%s'", p); 1489 _debug("volname '%s'", p);
1543 1490
@@ -1578,13 +1525,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1578 case 6: 1525 case 6:
1579 _debug("extract offline"); 1526 _debug("extract offline");
1580 if (call->count > 0) { 1527 if (call->count > 0) {
1581 ret = afs_extract_data(call, call->reply3, 1528 ret = afs_extract_data(call, call->reply[2],
1582 call->count, true); 1529 call->count, true);
1583 if (ret < 0) 1530 if (ret < 0)
1584 return ret; 1531 return ret;
1585 } 1532 }
1586 1533
1587 p = call->reply3; 1534 p = call->reply[2];
1588 p[call->count] = 0; 1535 p[call->count] = 0;
1589 _debug("offline '%s'", p); 1536 _debug("offline '%s'", p);
1590 1537
@@ -1625,13 +1572,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1625 case 9: 1572 case 9:
1626 _debug("extract motd"); 1573 _debug("extract motd");
1627 if (call->count > 0) { 1574 if (call->count > 0) {
1628 ret = afs_extract_data(call, call->reply3, 1575 ret = afs_extract_data(call, call->reply[2],
1629 call->count, true); 1576 call->count, true);
1630 if (ret < 0) 1577 if (ret < 0)
1631 return ret; 1578 return ret;
1632 } 1579 }
1633 1580
1634 p = call->reply3; 1581 p = call->reply[2];
1635 p[call->count] = 0; 1582 p[call->count] = 0;
1636 _debug("motd '%s'", p); 1583 _debug("motd '%s'", p);
1637 1584
@@ -1662,8 +1609,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1662 */ 1609 */
1663static void afs_get_volume_status_call_destructor(struct afs_call *call) 1610static void afs_get_volume_status_call_destructor(struct afs_call *call)
1664{ 1611{
1665 kfree(call->reply3); 1612 kfree(call->reply[2]);
1666 call->reply3 = NULL; 1613 call->reply[2] = NULL;
1667 afs_flat_call_destructor(call); 1614 afs_flat_call_destructor(call);
1668} 1615}
1669 1616
@@ -1672,21 +1619,20 @@ static void afs_get_volume_status_call_destructor(struct afs_call *call)
1672 */ 1619 */
1673static const struct afs_call_type afs_RXFSGetVolumeStatus = { 1620static const struct afs_call_type afs_RXFSGetVolumeStatus = {
1674 .name = "FS.GetVolumeStatus", 1621 .name = "FS.GetVolumeStatus",
1622 .op = afs_FS_GetVolumeStatus,
1675 .deliver = afs_deliver_fs_get_volume_status, 1623 .deliver = afs_deliver_fs_get_volume_status,
1676 .abort_to_error = afs_abort_to_error,
1677 .destructor = afs_get_volume_status_call_destructor, 1624 .destructor = afs_get_volume_status_call_destructor,
1678}; 1625};
1679 1626
1680/* 1627/*
1681 * fetch the status of a volume 1628 * fetch the status of a volume
1682 */ 1629 */
1683int afs_fs_get_volume_status(struct afs_server *server, 1630int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
1684 struct key *key, 1631 struct afs_volume_status *vs)
1685 struct afs_vnode *vnode,
1686 struct afs_volume_status *vs,
1687 bool async)
1688{ 1632{
1633 struct afs_vnode *vnode = fc->vnode;
1689 struct afs_call *call; 1634 struct afs_call *call;
1635 struct afs_net *net = afs_v2net(vnode);
1690 __be32 *bp; 1636 __be32 *bp;
1691 void *tmpbuf; 1637 void *tmpbuf;
1692 1638
@@ -1696,25 +1642,25 @@ int afs_fs_get_volume_status(struct afs_server *server,
1696 if (!tmpbuf) 1642 if (!tmpbuf)
1697 return -ENOMEM; 1643 return -ENOMEM;
1698 1644
1699 call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); 1645 call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
1700 if (!call) { 1646 if (!call) {
1701 kfree(tmpbuf); 1647 kfree(tmpbuf);
1702 return -ENOMEM; 1648 return -ENOMEM;
1703 } 1649 }
1704 1650
1705 call->key = key; 1651 call->key = fc->key;
1706 call->reply = vnode; 1652 call->reply[0] = vnode;
1707 call->reply2 = vs; 1653 call->reply[1] = vs;
1708 call->reply3 = tmpbuf; 1654 call->reply[2] = tmpbuf;
1709 call->service_id = FS_SERVICE;
1710 call->port = htons(AFS_FS_PORT);
1711 1655
1712 /* marshall the parameters */ 1656 /* marshall the parameters */
1713 bp = call->request; 1657 bp = call->request;
1714 bp[0] = htonl(FSGETVOLUMESTATUS); 1658 bp[0] = htonl(FSGETVOLUMESTATUS);
1715 bp[1] = htonl(vnode->fid.vid); 1659 bp[1] = htonl(vnode->fid.vid);
1716 1660
1717 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1661 afs_use_fs_server(call, fc->cbi);
1662 trace_afs_make_fs_call(call, &vnode->fid);
1663 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1718} 1664}
1719 1665
1720/* 1666/*
@@ -1733,7 +1679,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
1733 1679
1734 /* unmarshall the reply once we've received all of it */ 1680 /* unmarshall the reply once we've received all of it */
1735 bp = call->buffer; 1681 bp = call->buffer;
1736 /* xdr_decode_AFSVolSync(&bp, call->replyX); */ 1682 /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
1737 1683
1738 _leave(" = 0 [done]"); 1684 _leave(" = 0 [done]");
1739 return 0; 1685 return 0;
@@ -1744,8 +1690,8 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
1744 */ 1690 */
1745static const struct afs_call_type afs_RXFSSetLock = { 1691static const struct afs_call_type afs_RXFSSetLock = {
1746 .name = "FS.SetLock", 1692 .name = "FS.SetLock",
1693 .op = afs_FS_SetLock,
1747 .deliver = afs_deliver_fs_xxxx_lock, 1694 .deliver = afs_deliver_fs_xxxx_lock,
1748 .abort_to_error = afs_abort_to_error,
1749 .destructor = afs_flat_call_destructor, 1695 .destructor = afs_flat_call_destructor,
1750}; 1696};
1751 1697
@@ -1754,8 +1700,8 @@ static const struct afs_call_type afs_RXFSSetLock = {
1754 */ 1700 */
1755static const struct afs_call_type afs_RXFSExtendLock = { 1701static const struct afs_call_type afs_RXFSExtendLock = {
1756 .name = "FS.ExtendLock", 1702 .name = "FS.ExtendLock",
1703 .op = afs_FS_ExtendLock,
1757 .deliver = afs_deliver_fs_xxxx_lock, 1704 .deliver = afs_deliver_fs_xxxx_lock,
1758 .abort_to_error = afs_abort_to_error,
1759 .destructor = afs_flat_call_destructor, 1705 .destructor = afs_flat_call_destructor,
1760}; 1706};
1761 1707
@@ -1764,33 +1710,29 @@ static const struct afs_call_type afs_RXFSExtendLock = {
1764 */ 1710 */
1765static const struct afs_call_type afs_RXFSReleaseLock = { 1711static const struct afs_call_type afs_RXFSReleaseLock = {
1766 .name = "FS.ReleaseLock", 1712 .name = "FS.ReleaseLock",
1713 .op = afs_FS_ReleaseLock,
1767 .deliver = afs_deliver_fs_xxxx_lock, 1714 .deliver = afs_deliver_fs_xxxx_lock,
1768 .abort_to_error = afs_abort_to_error,
1769 .destructor = afs_flat_call_destructor, 1715 .destructor = afs_flat_call_destructor,
1770}; 1716};
1771 1717
1772/* 1718/*
1773 * get a lock on a file 1719 * Set a lock on a file
1774 */ 1720 */
1775int afs_fs_set_lock(struct afs_server *server, 1721int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
1776 struct key *key,
1777 struct afs_vnode *vnode,
1778 afs_lock_type_t type,
1779 bool async)
1780{ 1722{
1723 struct afs_vnode *vnode = fc->vnode;
1781 struct afs_call *call; 1724 struct afs_call *call;
1725 struct afs_net *net = afs_v2net(vnode);
1782 __be32 *bp; 1726 __be32 *bp;
1783 1727
1784 _enter(""); 1728 _enter("");
1785 1729
1786 call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4); 1730 call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
1787 if (!call) 1731 if (!call)
1788 return -ENOMEM; 1732 return -ENOMEM;
1789 1733
1790 call->key = key; 1734 call->key = fc->key;
1791 call->reply = vnode; 1735 call->reply[0] = vnode;
1792 call->service_id = FS_SERVICE;
1793 call->port = htons(AFS_FS_PORT);
1794 1736
1795 /* marshall the parameters */ 1737 /* marshall the parameters */
1796 bp = call->request; 1738 bp = call->request;
@@ -1800,30 +1742,29 @@ int afs_fs_set_lock(struct afs_server *server,
1800 *bp++ = htonl(vnode->fid.unique); 1742 *bp++ = htonl(vnode->fid.unique);
1801 *bp++ = htonl(type); 1743 *bp++ = htonl(type);
1802 1744
1803 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1745 afs_use_fs_server(call, fc->cbi);
1746 trace_afs_make_fs_call(call, &vnode->fid);
1747 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1804} 1748}
1805 1749
1806/* 1750/*
1807 * extend a lock on a file 1751 * extend a lock on a file
1808 */ 1752 */
1809int afs_fs_extend_lock(struct afs_server *server, 1753int afs_fs_extend_lock(struct afs_fs_cursor *fc)
1810 struct key *key,
1811 struct afs_vnode *vnode,
1812 bool async)
1813{ 1754{
1755 struct afs_vnode *vnode = fc->vnode;
1814 struct afs_call *call; 1756 struct afs_call *call;
1757 struct afs_net *net = afs_v2net(vnode);
1815 __be32 *bp; 1758 __be32 *bp;
1816 1759
1817 _enter(""); 1760 _enter("");
1818 1761
1819 call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4); 1762 call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
1820 if (!call) 1763 if (!call)
1821 return -ENOMEM; 1764 return -ENOMEM;
1822 1765
1823 call->key = key; 1766 call->key = fc->key;
1824 call->reply = vnode; 1767 call->reply[0] = vnode;
1825 call->service_id = FS_SERVICE;
1826 call->port = htons(AFS_FS_PORT);
1827 1768
1828 /* marshall the parameters */ 1769 /* marshall the parameters */
1829 bp = call->request; 1770 bp = call->request;
@@ -1832,30 +1773,29 @@ int afs_fs_extend_lock(struct afs_server *server,
1832 *bp++ = htonl(vnode->fid.vnode); 1773 *bp++ = htonl(vnode->fid.vnode);
1833 *bp++ = htonl(vnode->fid.unique); 1774 *bp++ = htonl(vnode->fid.unique);
1834 1775
1835 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1776 afs_use_fs_server(call, fc->cbi);
1777 trace_afs_make_fs_call(call, &vnode->fid);
1778 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1836} 1779}
1837 1780
1838/* 1781/*
1839 * release a lock on a file 1782 * release a lock on a file
1840 */ 1783 */
1841int afs_fs_release_lock(struct afs_server *server, 1784int afs_fs_release_lock(struct afs_fs_cursor *fc)
1842 struct key *key,
1843 struct afs_vnode *vnode,
1844 bool async)
1845{ 1785{
1786 struct afs_vnode *vnode = fc->vnode;
1846 struct afs_call *call; 1787 struct afs_call *call;
1788 struct afs_net *net = afs_v2net(vnode);
1847 __be32 *bp; 1789 __be32 *bp;
1848 1790
1849 _enter(""); 1791 _enter("");
1850 1792
1851 call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4); 1793 call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
1852 if (!call) 1794 if (!call)
1853 return -ENOMEM; 1795 return -ENOMEM;
1854 1796
1855 call->key = key; 1797 call->key = fc->key;
1856 call->reply = vnode; 1798 call->reply[0] = vnode;
1857 call->service_id = FS_SERVICE;
1858 call->port = htons(AFS_FS_PORT);
1859 1799
1860 /* marshall the parameters */ 1800 /* marshall the parameters */
1861 bp = call->request; 1801 bp = call->request;
@@ -1864,5 +1804,145 @@ int afs_fs_release_lock(struct afs_server *server,
1864 *bp++ = htonl(vnode->fid.vnode); 1804 *bp++ = htonl(vnode->fid.vnode);
1865 *bp++ = htonl(vnode->fid.unique); 1805 *bp++ = htonl(vnode->fid.unique);
1866 1806
1867 return afs_make_call(&server->addr, call, GFP_NOFS, async); 1807 afs_use_fs_server(call, fc->cbi);
1808 trace_afs_make_fs_call(call, &vnode->fid);
1809 return afs_make_call(&fc->ac, call, GFP_NOFS, false);
1810}
1811
1812/*
1813 * Deliver reply data to an FS.GiveUpAllCallBacks operation.
1814 */
1815static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call)
1816{
1817 return afs_transfer_reply(call);
1818}
1819
1820/*
1821 * FS.GiveUpAllCallBacks operation type
1822 */
1823static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
1824 .name = "FS.GiveUpAllCallBacks",
1825 .op = afs_FS_GiveUpAllCallBacks,
1826 .deliver = afs_deliver_fs_give_up_all_callbacks,
1827 .destructor = afs_flat_call_destructor,
1828};
1829
1830/*
1831 * Flush all the callbacks we have on a server.
1832 */
1833int afs_fs_give_up_all_callbacks(struct afs_net *net,
1834 struct afs_server *server,
1835 struct afs_addr_cursor *ac,
1836 struct key *key)
1837{
1838 struct afs_call *call;
1839 __be32 *bp;
1840
1841 _enter("");
1842
1843 call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0);
1844 if (!call)
1845 return -ENOMEM;
1846
1847 call->key = key;
1848
1849 /* marshall the parameters */
1850 bp = call->request;
1851 *bp++ = htonl(FSGIVEUPALLCALLBACKS);
1852
1853 /* Can't take a ref on server */
1854 return afs_make_call(ac, call, GFP_NOFS, false);
1855}
1856
1857/*
1858 * Deliver reply data to an FS.GetCapabilities operation.
1859 */
1860static int afs_deliver_fs_get_capabilities(struct afs_call *call)
1861{
1862 u32 count;
1863 int ret;
1864
1865 _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
1866
1867again:
1868 switch (call->unmarshall) {
1869 case 0:
1870 call->offset = 0;
1871 call->unmarshall++;
1872
1873 /* Extract the capabilities word count */
1874 case 1:
1875 ret = afs_extract_data(call, &call->tmp,
1876 1 * sizeof(__be32),
1877 true);
1878 if (ret < 0)
1879 return ret;
1880
1881 count = ntohl(call->tmp);
1882
1883 call->count = count;
1884 call->count2 = count;
1885 call->offset = 0;
1886 call->unmarshall++;
1887
1888 /* Extract capabilities words */
1889 case 2:
1890 count = min(call->count, 16U);
1891 ret = afs_extract_data(call, call->buffer,
1892 count * sizeof(__be32),
1893 call->count > 16);
1894 if (ret < 0)
1895 return ret;
1896
1897 /* TODO: Examine capabilities */
1898
1899 call->count -= count;
1900 if (call->count > 0)
1901 goto again;
1902 call->offset = 0;
1903 call->unmarshall++;
1904 break;
1905 }
1906
1907 _leave(" = 0 [done]");
1908 return 0;
1909}
1910
1911/*
1912 * FS.GetCapabilities operation type
1913 */
1914static const struct afs_call_type afs_RXFSGetCapabilities = {
1915 .name = "FS.GetCapabilities",
1916 .op = afs_FS_GetCapabilities,
1917 .deliver = afs_deliver_fs_get_capabilities,
1918 .destructor = afs_flat_call_destructor,
1919};
1920
1921/*
1922 * Probe a fileserver for the capabilities that it supports. This can
1923 * return up to 196 words.
1924 */
1925int afs_fs_get_capabilities(struct afs_net *net,
1926 struct afs_server *server,
1927 struct afs_addr_cursor *ac,
1928 struct key *key)
1929{
1930 struct afs_call *call;
1931 __be32 *bp;
1932
1933 _enter("");
1934
1935 call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4);
1936 if (!call)
1937 return -ENOMEM;
1938
1939 call->key = key;
1940
1941 /* marshall the parameters */
1942 bp = call->request;
1943 *bp++ = htonl(FSGETCAPABILITIES);
1944
1945 /* Can't take a ref on server */
1946 trace_afs_make_fs_call(call, NULL);
1947 return afs_make_call(ac, call, GFP_NOFS, false);
1868} 1948}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 342316a9e3e0..3415eb7484f6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -23,11 +23,6 @@
23#include <linux/namei.h> 23#include <linux/namei.h>
24#include "internal.h" 24#include "internal.h"
25 25
26struct afs_iget_data {
27 struct afs_fid fid;
28 struct afs_volume *volume; /* volume on which resides */
29};
30
31static const struct inode_operations afs_symlink_inode_operations = { 26static const struct inode_operations afs_symlink_inode_operations = {
32 .get_link = page_get_link, 27 .get_link = page_get_link,
33 .listxattr = afs_listxattr, 28 .listxattr = afs_listxattr,
@@ -39,6 +34,7 @@ static const struct inode_operations afs_symlink_inode_operations = {
39static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) 34static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
40{ 35{
41 struct inode *inode = AFS_VNODE_TO_I(vnode); 36 struct inode *inode = AFS_VNODE_TO_I(vnode);
37 bool changed;
42 38
43 _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", 39 _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
44 vnode->status.type, 40 vnode->status.type,
@@ -47,6 +43,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
47 vnode->status.data_version, 43 vnode->status.data_version,
48 vnode->status.mode); 44 vnode->status.mode);
49 45
46 read_seqlock_excl(&vnode->cb_lock);
47
50 switch (vnode->status.type) { 48 switch (vnode->status.type) {
51 case AFS_FTYPE_FILE: 49 case AFS_FTYPE_FILE:
52 inode->i_mode = S_IFREG | vnode->status.mode; 50 inode->i_mode = S_IFREG | vnode->status.mode;
@@ -63,9 +61,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
63 if ((vnode->status.mode & 0777) == 0644) { 61 if ((vnode->status.mode & 0777) == 0644) {
64 inode->i_flags |= S_AUTOMOUNT; 62 inode->i_flags |= S_AUTOMOUNT;
65 63
66 spin_lock(&vnode->lock);
67 set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); 64 set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
68 spin_unlock(&vnode->lock);
69 65
70 inode->i_mode = S_IFDIR | 0555; 66 inode->i_mode = S_IFDIR | 0555;
71 inode->i_op = &afs_mntpt_inode_operations; 67 inode->i_op = &afs_mntpt_inode_operations;
@@ -78,13 +74,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
78 break; 74 break;
79 default: 75 default:
80 printk("kAFS: AFS vnode with undefined type\n"); 76 printk("kAFS: AFS vnode with undefined type\n");
77 read_sequnlock_excl(&vnode->cb_lock);
81 return -EBADMSG; 78 return -EBADMSG;
82 } 79 }
83 80
84#ifdef CONFIG_AFS_FSCACHE 81 changed = (vnode->status.size != inode->i_size);
85 if (vnode->status.size != inode->i_size)
86 fscache_attr_changed(vnode->cache);
87#endif
88 82
89 set_nlink(inode, vnode->status.nlink); 83 set_nlink(inode, vnode->status.nlink);
90 inode->i_uid = vnode->status.owner; 84 inode->i_uid = vnode->status.owner;
@@ -97,13 +91,49 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
97 inode->i_generation = vnode->fid.unique; 91 inode->i_generation = vnode->fid.unique;
98 inode->i_version = vnode->status.data_version; 92 inode->i_version = vnode->status.data_version;
99 inode->i_mapping->a_ops = &afs_fs_aops; 93 inode->i_mapping->a_ops = &afs_fs_aops;
94
95 read_sequnlock_excl(&vnode->cb_lock);
96
97#ifdef CONFIG_AFS_FSCACHE
98 if (changed)
99 fscache_attr_changed(vnode->cache);
100#endif
100 return 0; 101 return 0;
101} 102}
102 103
103/* 104/*
105 * Fetch file status from the volume.
106 */
107int afs_fetch_status(struct afs_vnode *vnode, struct key *key)
108{
109 struct afs_fs_cursor fc;
110 int ret;
111
112 _enter("%s,{%x:%u.%u,S=%lx}",
113 vnode->volume->name,
114 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
115 vnode->flags);
116
117 ret = -ERESTARTSYS;
118 if (afs_begin_vnode_operation(&fc, vnode, key)) {
119 while (afs_select_fileserver(&fc)) {
120 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
121 afs_fs_fetch_file_status(&fc, NULL);
122 }
123
124 afs_check_for_remote_deletion(&fc, fc.vnode);
125 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
126 ret = afs_end_vnode_operation(&fc);
127 }
128
129 _leave(" = %d", ret);
130 return ret;
131}
132
133/*
104 * iget5() comparator 134 * iget5() comparator
105 */ 135 */
106static int afs_iget5_test(struct inode *inode, void *opaque) 136int afs_iget5_test(struct inode *inode, void *opaque)
107{ 137{
108 struct afs_iget_data *data = opaque; 138 struct afs_iget_data *data = opaque;
109 139
@@ -204,7 +234,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
204 */ 234 */
205struct inode *afs_iget(struct super_block *sb, struct key *key, 235struct inode *afs_iget(struct super_block *sb, struct key *key,
206 struct afs_fid *fid, struct afs_file_status *status, 236 struct afs_fid *fid, struct afs_file_status *status,
207 struct afs_callback *cb) 237 struct afs_callback *cb, struct afs_cb_interest *cbi)
208{ 238{
209 struct afs_iget_data data = { .fid = *fid }; 239 struct afs_iget_data data = { .fid = *fid };
210 struct afs_super_info *as; 240 struct afs_super_info *as;
@@ -237,8 +267,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
237 267
238 if (!status) { 268 if (!status) {
239 /* it's a remotely extant inode */ 269 /* it's a remotely extant inode */
240 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); 270 ret = afs_fetch_status(vnode, key);
241 ret = afs_vnode_fetch_status(vnode, NULL, key);
242 if (ret < 0) 271 if (ret < 0)
243 goto bad_inode; 272 goto bad_inode;
244 } else { 273 } else {
@@ -249,16 +278,17 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
249 /* it's a symlink we just created (the fileserver 278 /* it's a symlink we just created (the fileserver
250 * didn't give us a callback) */ 279 * didn't give us a callback) */
251 vnode->cb_version = 0; 280 vnode->cb_version = 0;
252 vnode->cb_expiry = 0;
253 vnode->cb_type = 0; 281 vnode->cb_type = 0;
254 vnode->cb_expires = ktime_get_real_seconds(); 282 vnode->cb_expires_at = 0;
255 } else { 283 } else {
256 vnode->cb_version = cb->version; 284 vnode->cb_version = cb->version;
257 vnode->cb_expiry = cb->expiry;
258 vnode->cb_type = cb->type; 285 vnode->cb_type = cb->type;
259 vnode->cb_expires = vnode->cb_expiry + 286 vnode->cb_expires_at = cb->expiry;
260 ktime_get_real_seconds(); 287 vnode->cb_interest = afs_get_cb_interest(cbi);
288 set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
261 } 289 }
290
291 vnode->cb_expires_at += ktime_get_real_seconds();
262 } 292 }
263 293
264 /* set up caching before mapping the status, as map-status reads the 294 /* set up caching before mapping the status, as map-status reads the
@@ -320,25 +350,34 @@ void afs_zap_data(struct afs_vnode *vnode)
320 */ 350 */
321int afs_validate(struct afs_vnode *vnode, struct key *key) 351int afs_validate(struct afs_vnode *vnode, struct key *key)
322{ 352{
353 time64_t now = ktime_get_real_seconds();
354 bool valid = false;
323 int ret; 355 int ret;
324 356
325 _enter("{v={%x:%u} fl=%lx},%x", 357 _enter("{v={%x:%u} fl=%lx},%x",
326 vnode->fid.vid, vnode->fid.vnode, vnode->flags, 358 vnode->fid.vid, vnode->fid.vnode, vnode->flags,
327 key_serial(key)); 359 key_serial(key));
328 360
329 if (vnode->cb_promised && 361 /* Quickly check the callback state. Ideally, we'd use read_seqbegin
330 !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && 362 * here, but we have no way to pass the net namespace to the RCU
331 !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && 363 * cleanup for the server record.
332 !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { 364 */
333 if (vnode->cb_expires < ktime_get_real_seconds() + 10) { 365 read_seqlock_excl(&vnode->cb_lock);
334 _debug("callback expired"); 366
335 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); 367 if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
336 } else { 368 if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
337 goto valid; 369 vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
370 } else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) &&
371 !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
372 vnode->cb_expires_at - 10 > now) {
373 valid = true;
338 } 374 }
375 } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
376 valid = true;
339 } 377 }
340 378
341 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) 379 read_sequnlock_excl(&vnode->cb_lock);
380 if (valid)
342 goto valid; 381 goto valid;
343 382
344 mutex_lock(&vnode->validate_lock); 383 mutex_lock(&vnode->validate_lock);
@@ -347,12 +386,16 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
347 * a new promise - note that if the (parent) directory's metadata was 386 * a new promise - note that if the (parent) directory's metadata was
348 * changed then the security may be different and we may no longer have 387 * changed then the security may be different and we may no longer have
349 * access */ 388 * access */
350 if (!vnode->cb_promised || 389 if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
351 test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
352 _debug("not promised"); 390 _debug("not promised");
353 ret = afs_vnode_fetch_status(vnode, NULL, key); 391 ret = afs_fetch_status(vnode, key);
354 if (ret < 0) 392 if (ret < 0) {
393 if (ret == -ENOENT) {
394 set_bit(AFS_VNODE_DELETED, &vnode->flags);
395 ret = -ESTALE;
396 }
355 goto error_unlock; 397 goto error_unlock;
398 }
356 _debug("new promise [fl=%lx]", vnode->flags); 399 _debug("new promise [fl=%lx]", vnode->flags);
357 } 400 }
358 401
@@ -367,7 +410,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
367 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) 410 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
368 afs_zap_data(vnode); 411 afs_zap_data(vnode);
369 412
370 clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); 413 clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
371 mutex_unlock(&vnode->validate_lock); 414 mutex_unlock(&vnode->validate_lock);
372valid: 415valid:
373 _leave(" = 0"); 416 _leave(" = 0");
@@ -386,10 +429,17 @@ int afs_getattr(const struct path *path, struct kstat *stat,
386 u32 request_mask, unsigned int query_flags) 429 u32 request_mask, unsigned int query_flags)
387{ 430{
388 struct inode *inode = d_inode(path->dentry); 431 struct inode *inode = d_inode(path->dentry);
432 struct afs_vnode *vnode = AFS_FS_I(inode);
433 int seq = 0;
389 434
390 _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); 435 _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
391 436
392 generic_fillattr(inode, stat); 437 do {
438 read_seqbegin_or_lock(&vnode->cb_lock, &seq);
439 generic_fillattr(inode, stat);
440 } while (need_seqretry(&vnode->cb_lock, seq));
441
442 done_seqretry(&vnode->cb_lock, seq);
393 return 0; 443 return 0;
394} 444}
395 445
@@ -411,18 +461,14 @@ int afs_drop_inode(struct inode *inode)
411 */ 461 */
412void afs_evict_inode(struct inode *inode) 462void afs_evict_inode(struct inode *inode)
413{ 463{
414 struct afs_permits *permits;
415 struct afs_vnode *vnode; 464 struct afs_vnode *vnode;
416 465
417 vnode = AFS_FS_I(inode); 466 vnode = AFS_FS_I(inode);
418 467
419 _enter("{%x:%u.%d} v=%u x=%u t=%u }", 468 _enter("{%x:%u.%d}",
420 vnode->fid.vid, 469 vnode->fid.vid,
421 vnode->fid.vnode, 470 vnode->fid.vnode,
422 vnode->fid.unique, 471 vnode->fid.unique);
423 vnode->cb_version,
424 vnode->cb_expiry,
425 vnode->cb_type);
426 472
427 _debug("CLEAR INODE %p", inode); 473 _debug("CLEAR INODE %p", inode);
428 474
@@ -431,31 +477,24 @@ void afs_evict_inode(struct inode *inode)
431 truncate_inode_pages_final(&inode->i_data); 477 truncate_inode_pages_final(&inode->i_data);
432 clear_inode(inode); 478 clear_inode(inode);
433 479
434 afs_give_up_callback(vnode); 480 if (vnode->cb_interest) {
435 481 afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
436 if (vnode->server) { 482 vnode->cb_interest = NULL;
437 spin_lock(&vnode->server->fs_lock);
438 rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
439 spin_unlock(&vnode->server->fs_lock);
440 afs_put_server(vnode->server);
441 vnode->server = NULL;
442 } 483 }
443 484
444 ASSERT(list_empty(&vnode->writebacks)); 485 while (!list_empty(&vnode->wb_keys)) {
445 ASSERT(!vnode->cb_promised); 486 struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
487 struct afs_wb_key, vnode_link);
488 list_del(&wbk->vnode_link);
489 afs_put_wb_key(wbk);
490 }
446 491
447#ifdef CONFIG_AFS_FSCACHE 492#ifdef CONFIG_AFS_FSCACHE
448 fscache_relinquish_cookie(vnode->cache, 0); 493 fscache_relinquish_cookie(vnode->cache, 0);
449 vnode->cache = NULL; 494 vnode->cache = NULL;
450#endif 495#endif
451 496
452 mutex_lock(&vnode->permits_lock); 497 afs_put_permits(vnode->permit_cache);
453 permits = vnode->permits;
454 RCU_INIT_POINTER(vnode->permits, NULL);
455 mutex_unlock(&vnode->permits_lock);
456 if (permits)
457 call_rcu(&permits->rcu, afs_zap_permits);
458
459 _leave(""); 498 _leave("");
460} 499}
461 500
@@ -464,6 +503,7 @@ void afs_evict_inode(struct inode *inode)
464 */ 503 */
465int afs_setattr(struct dentry *dentry, struct iattr *attr) 504int afs_setattr(struct dentry *dentry, struct iattr *attr)
466{ 505{
506 struct afs_fs_cursor fc;
467 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); 507 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
468 struct key *key; 508 struct key *key;
469 int ret; 509 int ret;
@@ -479,13 +519,11 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
479 } 519 }
480 520
481 /* flush any dirty data outstanding on a regular file */ 521 /* flush any dirty data outstanding on a regular file */
482 if (S_ISREG(vnode->vfs_inode.i_mode)) { 522 if (S_ISREG(vnode->vfs_inode.i_mode))
483 filemap_write_and_wait(vnode->vfs_inode.i_mapping); 523 filemap_write_and_wait(vnode->vfs_inode.i_mapping);
484 afs_writeback_all(vnode);
485 }
486 524
487 if (attr->ia_valid & ATTR_FILE) { 525 if (attr->ia_valid & ATTR_FILE) {
488 key = attr->ia_file->private_data; 526 key = afs_file_key(attr->ia_file);
489 } else { 527 } else {
490 key = afs_request_key(vnode->volume->cell); 528 key = afs_request_key(vnode->volume->cell);
491 if (IS_ERR(key)) { 529 if (IS_ERR(key)) {
@@ -494,7 +532,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
494 } 532 }
495 } 533 }
496 534
497 ret = afs_vnode_setattr(vnode, key, attr); 535 ret = -ERESTARTSYS;
536 if (afs_begin_vnode_operation(&fc, vnode, key)) {
537 while (afs_select_fileserver(&fc)) {
538 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
539 afs_fs_setattr(&fc, attr);
540 }
541
542 afs_check_for_remote_deletion(&fc, fc.vnode);
543 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
544 ret = afs_end_vnode_operation(&fc);
545 }
546
498 if (!(attr->ia_valid & ATTR_FILE)) 547 if (!(attr->ia_valid & ATTR_FILE))
499 key_put(key); 548 key_put(key);
500 549
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 3f03f7888302..bd8dcee7e066 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
21#include <linux/fscache.h> 21#include <linux/fscache.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/uuid.h> 23#include <linux/uuid.h>
24#include <net/net_namespace.h>
24#include <net/af_rxrpc.h> 25#include <net/af_rxrpc.h>
25 26
26#include "afs.h" 27#include "afs.h"
@@ -31,16 +32,6 @@
31struct pagevec; 32struct pagevec;
32struct afs_call; 33struct afs_call;
33 34
34typedef enum {
35 AFS_VL_NEW, /* new, uninitialised record */
36 AFS_VL_CREATING, /* creating record */
37 AFS_VL_VALID, /* record is pending */
38 AFS_VL_NO_VOLUME, /* no such volume available */
39 AFS_VL_UPDATING, /* update in progress */
40 AFS_VL_VOLUME_DELETED, /* volume was deleted */
41 AFS_VL_UNCERTAIN, /* uncertain state (update failed) */
42} __attribute__((packed)) afs_vlocation_state_t;
43
44struct afs_mount_params { 35struct afs_mount_params {
45 bool rwpath; /* T if the parent should be considered R/W */ 36 bool rwpath; /* T if the parent should be considered R/W */
46 bool force; /* T to force cell type */ 37 bool force; /* T to force cell type */
@@ -48,20 +39,43 @@ struct afs_mount_params {
48 afs_voltype_t type; /* type of volume requested */ 39 afs_voltype_t type; /* type of volume requested */
49 int volnamesz; /* size of volume name */ 40 int volnamesz; /* size of volume name */
50 const char *volname; /* name of volume to mount */ 41 const char *volname; /* name of volume to mount */
42 struct afs_net *net; /* Network namespace in effect */
51 struct afs_cell *cell; /* cell in which to find volume */ 43 struct afs_cell *cell; /* cell in which to find volume */
52 struct afs_volume *volume; /* volume record */ 44 struct afs_volume *volume; /* volume record */
53 struct key *key; /* key to use for secure mounting */ 45 struct key *key; /* key to use for secure mounting */
54}; 46};
55 47
48struct afs_iget_data {
49 struct afs_fid fid;
50 struct afs_volume *volume; /* volume on which resides */
51};
52
56enum afs_call_state { 53enum afs_call_state {
57 AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ 54 AFS_CALL_CL_REQUESTING, /* Client: Request is being sent */
58 AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ 55 AFS_CALL_CL_AWAIT_REPLY, /* Client: Awaiting reply */
59 AFS_CALL_AWAIT_OP_ID, /* awaiting op ID on incoming call */ 56 AFS_CALL_CL_PROC_REPLY, /* Client: rxrpc call complete; processing reply */
60 AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */ 57 AFS_CALL_SV_AWAIT_OP_ID, /* Server: Awaiting op ID */
61 AFS_CALL_REPLYING, /* replying to incoming call */ 58 AFS_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */
62 AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ 59 AFS_CALL_SV_REPLYING, /* Server: Replying */
63 AFS_CALL_COMPLETE, /* Completed or failed */ 60 AFS_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */
61 AFS_CALL_COMPLETE, /* Completed or failed */
64}; 62};
63
64/*
65 * List of server addresses.
66 */
67struct afs_addr_list {
68 struct rcu_head rcu; /* Must be first */
69 refcount_t usage;
70 u32 version; /* Version */
71 unsigned short nr_addrs;
72 unsigned short index; /* Address currently in use */
73 unsigned short nr_ipv4; /* Number of IPv4 addresses */
74 unsigned long probed; /* Mask of servers that have been probed */
75 unsigned long yfs; /* Mask of servers that are YFS */
76 struct sockaddr_rxrpc addrs[];
77};
78
65/* 79/*
66 * a record of an in-progress RxRPC call 80 * a record of an in-progress RxRPC call
67 */ 81 */
@@ -72,25 +86,25 @@ struct afs_call {
72 struct work_struct work; /* actual work processor */ 86 struct work_struct work; /* actual work processor */
73 struct rxrpc_call *rxcall; /* RxRPC call handle */ 87 struct rxrpc_call *rxcall; /* RxRPC call handle */
74 struct key *key; /* security for this call */ 88 struct key *key; /* security for this call */
75 struct afs_server *server; /* server affected by incoming CM call */ 89 struct afs_net *net; /* The network namespace */
90 struct afs_server *cm_server; /* Server affected by incoming CM call */
91 struct afs_cb_interest *cbi; /* Callback interest for server used */
76 void *request; /* request data (first part) */ 92 void *request; /* request data (first part) */
77 struct address_space *mapping; /* page set */ 93 struct address_space *mapping; /* Pages being written from */
78 struct afs_writeback *wb; /* writeback being performed */
79 void *buffer; /* reply receive buffer */ 94 void *buffer; /* reply receive buffer */
80 void *reply; /* reply buffer (first part) */ 95 void *reply[4]; /* Where to put the reply */
81 void *reply2; /* reply buffer (second part) */
82 void *reply3; /* reply buffer (third part) */
83 void *reply4; /* reply buffer (fourth part) */
84 pgoff_t first; /* first page in mapping to deal with */ 96 pgoff_t first; /* first page in mapping to deal with */
85 pgoff_t last; /* last page in mapping to deal with */ 97 pgoff_t last; /* last page in mapping to deal with */
86 size_t offset; /* offset into received data store */ 98 size_t offset; /* offset into received data store */
87 atomic_t usage; 99 atomic_t usage;
88 enum afs_call_state state; 100 enum afs_call_state state;
101 spinlock_t state_lock;
89 int error; /* error code */ 102 int error; /* error code */
90 u32 abort_code; /* Remote abort ID or 0 */ 103 u32 abort_code; /* Remote abort ID or 0 */
91 unsigned request_size; /* size of request data */ 104 unsigned request_size; /* size of request data */
92 unsigned reply_max; /* maximum size of reply */ 105 unsigned reply_max; /* maximum size of reply */
93 unsigned first_offset; /* offset into mapping[first] */ 106 unsigned first_offset; /* offset into mapping[first] */
107 unsigned int cb_break; /* cb_break + cb_s_break before the call */
94 union { 108 union {
95 unsigned last_to; /* amount of mapping[last] */ 109 unsigned last_to; /* amount of mapping[last] */
96 unsigned count2; /* count used in unmarshalling */ 110 unsigned count2; /* count used in unmarshalling */
@@ -100,9 +114,9 @@ struct afs_call {
100 bool send_pages; /* T if data from mapping should be sent */ 114 bool send_pages; /* T if data from mapping should be sent */
101 bool need_attention; /* T if RxRPC poked us */ 115 bool need_attention; /* T if RxRPC poked us */
102 bool async; /* T if asynchronous */ 116 bool async; /* T if asynchronous */
117 bool ret_reply0; /* T if should return reply[0] on success */
103 bool upgrade; /* T to request service upgrade */ 118 bool upgrade; /* T to request service upgrade */
104 u16 service_id; /* RxRPC service ID to call */ 119 u16 service_id; /* Actual service ID (after upgrade) */
105 __be16 port; /* target UDP port */
106 u32 operation_ID; /* operation ID for an incoming call */ 120 u32 operation_ID; /* operation ID for an incoming call */
107 u32 count; /* count for use in unmarshalling */ 121 u32 count; /* count for use in unmarshalling */
108 __be32 tmp; /* place to extract temporary data */ 122 __be32 tmp; /* place to extract temporary data */
@@ -111,15 +125,13 @@ struct afs_call {
111 125
112struct afs_call_type { 126struct afs_call_type {
113 const char *name; 127 const char *name;
128 unsigned int op; /* Really enum afs_fs_operation */
114 129
115 /* deliver request or reply data to an call 130 /* deliver request or reply data to an call
116 * - returning an error will cause the call to be aborted 131 * - returning an error will cause the call to be aborted
117 */ 132 */
118 int (*deliver)(struct afs_call *call); 133 int (*deliver)(struct afs_call *call);
119 134
120 /* map an abort code to an error number */
121 int (*abort_to_error)(u32 abort_code);
122
123 /* clean up a call */ 135 /* clean up a call */
124 void (*destructor)(struct afs_call *call); 136 void (*destructor)(struct afs_call *call);
125 137
@@ -128,6 +140,30 @@ struct afs_call_type {
128}; 140};
129 141
130/* 142/*
143 * Key available for writeback on a file.
144 */
145struct afs_wb_key {
146 refcount_t usage;
147 struct key *key;
148 struct list_head vnode_link; /* Link in vnode->wb_keys */
149};
150
151/*
152 * AFS open file information record. Pointed to by file->private_data.
153 */
154struct afs_file {
155 struct key *key; /* The key this file was opened with */
156 struct afs_wb_key *wb; /* Writeback key record for this file */
157};
158
159static inline struct key *afs_file_key(struct file *file)
160{
161 struct afs_file *af = file->private_data;
162
163 return af->key;
164}
165
166/*
131 * Record of an outstanding read operation on a vnode. 167 * Record of an outstanding read operation on a vnode.
132 */ 168 */
133struct afs_read { 169struct afs_read {
@@ -143,38 +179,13 @@ struct afs_read {
143}; 179};
144 180
145/* 181/*
146 * record of an outstanding writeback on a vnode
147 */
148struct afs_writeback {
149 struct list_head link; /* link in vnode->writebacks */
150 struct work_struct writer; /* work item to perform the writeback */
151 struct afs_vnode *vnode; /* vnode to which this write applies */
152 struct key *key; /* owner of this write */
153 wait_queue_head_t waitq; /* completion and ready wait queue */
154 pgoff_t first; /* first page in batch */
155 pgoff_t point; /* last page in current store op */
156 pgoff_t last; /* last page in batch (inclusive) */
157 unsigned offset_first; /* offset into first page of start of write */
158 unsigned to_last; /* offset into last page of end of write */
159 int num_conflicts; /* count of conflicting writes in list */
160 int usage;
161 bool conflicts; /* T if has dependent conflicts */
162 enum {
163 AFS_WBACK_SYNCING, /* synchronisation being performed */
164 AFS_WBACK_PENDING, /* write pending */
165 AFS_WBACK_CONFLICTING, /* conflicting writes posted */
166 AFS_WBACK_WRITING, /* writing back */
167 AFS_WBACK_COMPLETE /* the writeback record has been unlinked */
168 } state __attribute__((packed));
169};
170
171/*
172 * AFS superblock private data 182 * AFS superblock private data
173 * - there's one superblock per volume 183 * - there's one superblock per volume
174 */ 184 */
175struct afs_super_info { 185struct afs_super_info {
186 struct afs_net *net; /* Network namespace */
187 struct afs_cell *cell; /* The cell in which the volume resides */
176 struct afs_volume *volume; /* volume record */ 188 struct afs_volume *volume; /* volume record */
177 char rwparent; /* T if parent is R/W AFS volume */
178}; 189};
179 190
180static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) 191static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
@@ -185,149 +196,238 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
185extern struct file_system_type afs_fs_type; 196extern struct file_system_type afs_fs_type;
186 197
187/* 198/*
188 * entry in the cached cell catalogue 199 * AFS network namespace record.
189 */ 200 */
190struct afs_cache_cell { 201struct afs_net {
191 char name[AFS_MAXCELLNAME]; /* cell name (padded with NULs) */ 202 struct afs_uuid uuid;
192 struct in_addr vl_servers[15]; /* cached cell VL servers */ 203 bool live; /* F if this namespace is being removed */
204
205 /* AF_RXRPC I/O stuff */
206 struct socket *socket;
207 struct afs_call *spare_incoming_call;
208 struct work_struct charge_preallocation_work;
209 struct mutex socket_mutex;
210 atomic_t nr_outstanding_calls;
211 atomic_t nr_superblocks;
212
213 /* Cell database */
214 struct rb_root cells;
215 struct afs_cell *ws_cell;
216 struct work_struct cells_manager;
217 struct timer_list cells_timer;
218 atomic_t cells_outstanding;
219 seqlock_t cells_lock;
220
221 spinlock_t proc_cells_lock;
222 struct list_head proc_cells;
223
224 /* Known servers. Theoretically each fileserver can only be in one
225 * cell, but in practice, people create aliases and subsets and there's
226 * no easy way to distinguish them.
227 */
228 seqlock_t fs_lock; /* For fs_servers */
229 struct rb_root fs_servers; /* afs_server (by server UUID or address) */
230 struct list_head fs_updates; /* afs_server (by update_at) */
231 struct hlist_head fs_proc; /* procfs servers list */
232
233 struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */
234 struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */
235 seqlock_t fs_addr_lock; /* For fs_addresses[46] */
236
237 struct work_struct fs_manager;
238 struct timer_list fs_timer;
239 atomic_t servers_outstanding;
240
241 /* File locking renewal management */
242 struct mutex lock_manager_mutex;
243
244 /* Misc */
245 struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */
246};
247
248extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns
249
250enum afs_cell_state {
251 AFS_CELL_UNSET,
252 AFS_CELL_ACTIVATING,
253 AFS_CELL_ACTIVE,
254 AFS_CELL_DEACTIVATING,
255 AFS_CELL_INACTIVE,
256 AFS_CELL_FAILED,
193}; 257};
194 258
195/* 259/*
196 * AFS cell record 260 * AFS cell record.
261 *
262 * This is a tricky concept to get right as it is possible to create aliases
263 * simply by pointing AFSDB/SRV records for two names at the same set of VL
264 * servers; it is also possible to do things like setting up two sets of VL
265 * servers, one of which provides a superset of the volumes provided by the
266 * other (for internal/external division, for example).
267 *
268 * Cells only exist in the sense that (a) a cell's name maps to a set of VL
269 * servers and (b) a cell's name is used by the client to select the key to use
270 * for authentication and encryption. The cell name is not typically used in
271 * the protocol.
272 *
273 * There is no easy way to determine if two cells are aliases or one is a
274 * subset of another.
197 */ 275 */
198struct afs_cell { 276struct afs_cell {
199 atomic_t usage; 277 union {
200 struct list_head link; /* main cell list link */ 278 struct rcu_head rcu;
279 struct rb_node net_node; /* Node in net->cells */
280 };
281 struct afs_net *net;
201 struct key *anonymous_key; /* anonymous user key for this cell */ 282 struct key *anonymous_key; /* anonymous user key for this cell */
283 struct work_struct manager; /* Manager for init/deinit/dns */
202 struct list_head proc_link; /* /proc cell list link */ 284 struct list_head proc_link; /* /proc cell list link */
203#ifdef CONFIG_AFS_FSCACHE 285#ifdef CONFIG_AFS_FSCACHE
204 struct fscache_cookie *cache; /* caching cookie */ 286 struct fscache_cookie *cache; /* caching cookie */
205#endif 287#endif
206 288 time64_t dns_expiry; /* Time AFSDB/SRV record expires */
207 /* server record management */ 289 time64_t last_inactive; /* Time of last drop of usage count */
208 rwlock_t servers_lock; /* active server list lock */ 290 atomic_t usage;
209 struct list_head servers; /* active server list */ 291 unsigned long flags;
210 292#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */
211 /* volume location record management */ 293#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */
212 struct rw_semaphore vl_sem; /* volume management serialisation semaphore */ 294#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */
213 struct list_head vl_list; /* cell's active VL record list */ 295#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */
214 spinlock_t vl_lock; /* vl_list lock */ 296#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */
215 unsigned short vl_naddrs; /* number of VL servers in addr list */ 297 enum afs_cell_state state;
216 unsigned short vl_curr_svix; /* current server index */ 298 short error;
217 struct in_addr vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ 299
218 300 /* Active fileserver interaction state. */
219 char name[0]; /* cell name - must go last */ 301 struct list_head proc_volumes; /* procfs volume list */
302 rwlock_t proc_lock;
303
304 /* VL server list. */
305 rwlock_t vl_addrs_lock; /* Lock on vl_addrs */
306 struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */
307 u8 name_len; /* Length of name */
308 char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */
220}; 309};
221 310
222/* 311/*
223 * entry in the cached volume location catalogue 312 * Cached VLDB entry.
313 *
314 * This is pointed to by cell->vldb_entries, indexed by name.
224 */ 315 */
225struct afs_cache_vlocation { 316struct afs_vldb_entry {
226 /* volume name (lowercase, padded with NULs) */ 317 afs_volid_t vid[3]; /* Volume IDs for R/W, R/O and Bak volumes */
227 uint8_t name[AFS_MAXVOLNAME + 1];
228 318
229 uint8_t nservers; /* number of entries used in servers[] */ 319 unsigned long flags;
230 uint8_t vidmask; /* voltype mask for vid[] */ 320#define AFS_VLDB_HAS_RW 0 /* - R/W volume exists */
231 uint8_t srvtmask[8]; /* voltype masks for servers[] */ 321#define AFS_VLDB_HAS_RO 1 /* - R/O volume exists */
322#define AFS_VLDB_HAS_BAK 2 /* - Backup volume exists */
323#define AFS_VLDB_QUERY_VALID 3 /* - Record is valid */
324#define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */
325
326 uuid_t fs_server[AFS_NMAXNSERVERS];
327 u8 fs_mask[AFS_NMAXNSERVERS];
232#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */ 328#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
233#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */ 329#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
234#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */ 330#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
235 331 short error;
236 afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */ 332 u8 nr_servers; /* Number of server records */
237 struct in_addr servers[8]; /* fileserver addresses */ 333 u8 name_len;
238 time_t rtime; /* last retrieval time */ 334 u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
239}; 335};
240 336
241/* 337/*
242 * volume -> vnode hash table entry 338 * Record of fileserver with which we're actively communicating.
243 */ 339 */
244struct afs_cache_vhash { 340struct afs_server {
245 afs_voltype_t vtype; /* which volume variation */ 341 struct rcu_head rcu;
246 uint8_t hash_bucket; /* which hash bucket this represents */ 342 union {
247} __attribute__((packed)); 343 uuid_t uuid; /* Server ID */
344 struct afs_uuid _uuid;
345 };
248 346
249/* 347 struct afs_addr_list __rcu *addresses;
250 * AFS volume location record 348 struct rb_node uuid_rb; /* Link in net->servers */
251 */ 349 struct hlist_node addr4_link; /* Link in net->fs_addresses4 */
252struct afs_vlocation { 350 struct hlist_node addr6_link; /* Link in net->fs_addresses6 */
351 struct hlist_node proc_link; /* Link in net->fs_proc */
352 struct afs_server *gc_next; /* Next server in manager's list */
353 time64_t put_time; /* Time at which last put */
354 time64_t update_at; /* Time at which to next update the record */
355 unsigned long flags;
356#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */
357#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */
358#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */
359#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */
360#define AFS_SERVER_FL_UPDATING 4
361#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */
362#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */
253 atomic_t usage; 363 atomic_t usage;
254 time64_t time_of_death; /* time at which put reduced usage to 0 */ 364 u32 addr_version; /* Address list version */
255 struct list_head link; /* link in cell volume location list */ 365
256 struct list_head grave; /* link in master graveyard list */ 366 /* file service access */
257 struct list_head update; /* link in master update list */ 367 rwlock_t fs_lock; /* access lock */
258 struct afs_cell *cell; /* cell to which volume belongs */ 368
259#ifdef CONFIG_AFS_FSCACHE 369 /* callback promise management */
260 struct fscache_cookie *cache; /* caching cookie */ 370 struct list_head cb_interests; /* List of superblocks using this server */
261#endif 371 unsigned cb_s_break; /* Break-everything counter. */
262 struct afs_cache_vlocation vldb; /* volume information DB record */ 372 rwlock_t cb_break_lock; /* Volume finding lock */
263 struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
264 wait_queue_head_t waitq; /* status change waitqueue */
265 time64_t update_at; /* time at which record should be updated */
266 spinlock_t lock; /* access lock */
267 afs_vlocation_state_t state; /* volume location state */
268 unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
269 unsigned short upd_busy_cnt; /* EBUSY count during update */
270 bool valid; /* T if valid */
271}; 373};
272 374
273/* 375/*
274 * AFS fileserver record 376 * Interest by a superblock on a server.
275 */ 377 */
276struct afs_server { 378struct afs_cb_interest {
277 atomic_t usage; 379 struct list_head cb_link; /* Link in server->cb_interests */
278 time64_t time_of_death; /* time at which put reduced usage to 0 */ 380 struct afs_server *server; /* Server on which this interest resides */
279 struct in_addr addr; /* server address */ 381 struct super_block *sb; /* Superblock on which inodes reside */
280 struct afs_cell *cell; /* cell in which server resides */ 382 afs_volid_t vid; /* Volume ID to match */
281 struct list_head link; /* link in cell's server list */ 383 refcount_t usage;
282 struct list_head grave; /* link in master graveyard list */ 384};
283 struct rb_node master_rb; /* link in master by-addr tree */
284 struct rw_semaphore sem; /* access lock */
285 385
286 /* file service access */ 386/*
287 struct rb_root fs_vnodes; /* vnodes backed by this server (ordered by FID) */ 387 * Replaceable server list.
288 unsigned long fs_act_jif; /* time at which last activity occurred */ 388 */
289 unsigned long fs_dead_jif; /* time at which no longer to be considered dead */ 389struct afs_server_entry {
290 spinlock_t fs_lock; /* access lock */ 390 struct afs_server *server;
291 int fs_state; /* 0 or reason FS currently marked dead (-errno) */ 391 struct afs_cb_interest *cb_interest;
392};
292 393
293 /* callback promise management */ 394struct afs_server_list {
294 struct rb_root cb_promises; /* vnode expiration list (ordered earliest first) */ 395 refcount_t usage;
295 struct delayed_work cb_updater; /* callback updater */ 396 unsigned short nr_servers;
296 struct delayed_work cb_break_work; /* collected break dispatcher */ 397 unsigned short index; /* Server currently in use */
297 wait_queue_head_t cb_break_waitq; /* space available in cb_break waitqueue */ 398 unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
298 spinlock_t cb_lock; /* access lock */ 399 unsigned int seq; /* Set to ->servers_seq when installed */
299 struct afs_callback cb_break[64]; /* ring of callbacks awaiting breaking */ 400 struct afs_server_entry servers[];
300 atomic_t cb_break_n; /* number of pending breaks */
301 u8 cb_break_head; /* head of callback breaking ring */
302 u8 cb_break_tail; /* tail of callback breaking ring */
303}; 401};
304 402
305/* 403/*
306 * AFS volume access record 404 * Live AFS volume management.
307 */ 405 */
308struct afs_volume { 406struct afs_volume {
407 afs_volid_t vid; /* volume ID */
309 atomic_t usage; 408 atomic_t usage;
310 struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ 409 time64_t update_at; /* Time at which to next update */
311 struct afs_vlocation *vlocation; /* volume location */ 410 struct afs_cell *cell; /* Cell to which belongs (pins ref) */
411 struct list_head proc_link; /* Link in cell->vl_proc */
412 unsigned long flags;
413#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */
414#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */
415#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */
416#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */
417#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
418#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
312#ifdef CONFIG_AFS_FSCACHE 419#ifdef CONFIG_AFS_FSCACHE
313 struct fscache_cookie *cache; /* caching cookie */ 420 struct fscache_cookie *cache; /* caching cookie */
314#endif 421#endif
315 afs_volid_t vid; /* volume ID */ 422 struct afs_server_list *servers; /* List of servers on which volume resides */
423 rwlock_t servers_lock; /* Lock for ->servers */
424 unsigned int servers_seq; /* Incremented each time ->servers changes */
425
316 afs_voltype_t type; /* type of volume */ 426 afs_voltype_t type; /* type of volume */
427 short error;
317 char type_force; /* force volume type (suppress R/O -> R/W) */ 428 char type_force; /* force volume type (suppress R/O -> R/W) */
318 unsigned short nservers; /* number of server slots filled */ 429 u8 name_len;
319 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ 430 u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
320 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
321 struct rw_semaphore server_sem; /* lock for accessing current server */
322};
323
324/*
325 * vnode catalogue entry
326 */
327struct afs_cache_vnode {
328 afs_vnodeid_t vnode_id; /* vnode ID */
329 unsigned vnode_unique; /* vnode ID uniquifier */
330 afs_dataversion_t data_version; /* data version */
331}; 431};
332 432
333/* 433/*
@@ -337,24 +437,20 @@ struct afs_vnode {
337 struct inode vfs_inode; /* the VFS's inode record */ 437 struct inode vfs_inode; /* the VFS's inode record */
338 438
339 struct afs_volume *volume; /* volume on which vnode resides */ 439 struct afs_volume *volume; /* volume on which vnode resides */
340 struct afs_server *server; /* server currently supplying this file */
341 struct afs_fid fid; /* the file identifier for this inode */ 440 struct afs_fid fid; /* the file identifier for this inode */
342 struct afs_file_status status; /* AFS status info for this file */ 441 struct afs_file_status status; /* AFS status info for this file */
343#ifdef CONFIG_AFS_FSCACHE 442#ifdef CONFIG_AFS_FSCACHE
344 struct fscache_cookie *cache; /* caching cookie */ 443 struct fscache_cookie *cache; /* caching cookie */
345#endif 444#endif
346 struct afs_permits *permits; /* cache of permits so far obtained */ 445 struct afs_permits *permit_cache; /* cache of permits so far obtained */
347 struct mutex permits_lock; /* lock for altering permits list */ 446 struct mutex io_lock; /* Lock for serialising I/O on this mutex */
348 struct mutex validate_lock; /* lock for validating this vnode */ 447 struct mutex validate_lock; /* lock for validating this vnode */
349 wait_queue_head_t update_waitq; /* status fetch waitqueue */ 448 spinlock_t wb_lock; /* lock for wb_keys */
350 int update_cnt; /* number of outstanding ops that will update the
351 * status */
352 spinlock_t writeback_lock; /* lock for writebacks */
353 spinlock_t lock; /* waitqueue/flags lock */ 449 spinlock_t lock; /* waitqueue/flags lock */
354 unsigned long flags; 450 unsigned long flags;
355#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ 451#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */
356#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ 452#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
357#define AFS_VNODE_MODIFIED 2 /* set if vnode's data modified */ 453#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */
358#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ 454#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
359#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ 455#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
360#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ 456#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
@@ -365,24 +461,21 @@ struct afs_vnode {
365#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ 461#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
366#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ 462#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
367 463
368 long acl_order; /* ACL check count (callback break count) */ 464 struct list_head wb_keys; /* List of keys available for writeback */
369
370 struct list_head writebacks; /* alterations in pagecache that need writing */
371 struct list_head pending_locks; /* locks waiting to be granted */ 465 struct list_head pending_locks; /* locks waiting to be granted */
372 struct list_head granted_locks; /* locks granted on this file */ 466 struct list_head granted_locks; /* locks granted on this file */
373 struct delayed_work lock_work; /* work to be done in locking */ 467 struct delayed_work lock_work; /* work to be done in locking */
374 struct key *unlock_key; /* key to be used in unlocking */ 468 struct key *unlock_key; /* key to be used in unlocking */
375 469
376 /* outstanding callback notification on this file */ 470 /* outstanding callback notification on this file */
377 struct rb_node server_rb; /* link in server->fs_vnodes */ 471 struct afs_cb_interest *cb_interest; /* Server on which this resides */
378 struct rb_node cb_promise; /* link in server->cb_promises */ 472 unsigned int cb_s_break; /* Mass break counter on ->server */
379 struct work_struct cb_broken_work; /* work to be done on callback break */ 473 unsigned int cb_break; /* Break counter on vnode */
380 time64_t cb_expires; /* time at which callback expires */ 474 seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
381 time64_t cb_expires_at; /* time used to order cb_promise */ 475
476 time64_t cb_expires_at; /* time at which callback expires */
382 unsigned cb_version; /* callback version */ 477 unsigned cb_version; /* callback version */
383 unsigned cb_expiry; /* callback expiry time */
384 afs_callback_type_t cb_type; /* type of callback */ 478 afs_callback_type_t cb_type; /* type of callback */
385 bool cb_promised; /* true if promise still holds */
386}; 479};
387 480
388/* 481/*
@@ -390,16 +483,21 @@ struct afs_vnode {
390 */ 483 */
391struct afs_permit { 484struct afs_permit {
392 struct key *key; /* RxRPC ticket holding a security context */ 485 struct key *key; /* RxRPC ticket holding a security context */
393 afs_access_t access_mask; /* access mask for this key */ 486 afs_access_t access; /* CallerAccess value for this key */
394}; 487};
395 488
396/* 489/*
397 * cache of security records from attempts to access a vnode 490 * Immutable cache of CallerAccess records from attempts to access vnodes.
491 * These may be shared between multiple vnodes.
398 */ 492 */
399struct afs_permits { 493struct afs_permits {
400 struct rcu_head rcu; /* disposal procedure */ 494 struct rcu_head rcu;
401 int count; /* number of records */ 495 struct hlist_node hash_node; /* Link in hash */
402 struct afs_permit permits[0]; /* the permits so far examined */ 496 unsigned long h; /* Hash value for this permit list */
497 refcount_t usage;
498 unsigned short nr_permits; /* Number of records */
499 bool invalidated; /* Invalidated due to key change */
500 struct afs_permit permits[]; /* List of permits sorted by key pointer */
403}; 501};
404 502
405/* 503/*
@@ -411,28 +509,78 @@ struct afs_interface {
411 unsigned mtu; /* MTU of interface */ 509 unsigned mtu; /* MTU of interface */
412}; 510};
413 511
414struct afs_uuid { 512/*
415 __be32 time_low; /* low part of timestamp */ 513 * Cursor for iterating over a server's address list.
416 __be16 time_mid; /* mid part of timestamp */ 514 */
417 __be16 time_hi_and_version; /* high part of timestamp and version */ 515struct afs_addr_cursor {
418 __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ 516 struct afs_addr_list *alist; /* Current address list (pins ref) */
419 __u8 clock_seq_low; /* clock seq low */ 517 struct sockaddr_rxrpc *addr;
420 __u8 node[6]; /* spatially unique node ID (MAC addr) */ 518 u32 abort_code;
519 unsigned short start; /* Starting point in alist->addrs[] */
520 unsigned short index; /* Wrapping offset from start to current addr */
521 short error;
522 bool begun; /* T if we've begun iteration */
523 bool responded; /* T if the current address responded */
524};
525
526/*
527 * Cursor for iterating over a set of fileservers.
528 */
529struct afs_fs_cursor {
530 struct afs_addr_cursor ac;
531 struct afs_vnode *vnode;
532 struct afs_server_list *server_list; /* Current server list (pins ref) */
533 struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */
534 struct key *key; /* Key for the server */
535 unsigned int cb_break; /* cb_break + cb_s_break before the call */
536 unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */
537 unsigned char start; /* Initial index in server list */
538 unsigned char index; /* Number of servers tried beyond start */
539 unsigned short flags;
540#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */
541#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */
542#define AFS_FS_CURSOR_VMOVED 0x0004 /* Set if seen VMOVED */
543#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */
544#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
545#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
421}; 546};
422 547
548#include <trace/events/afs.h>
549
423/*****************************************************************************/ 550/*****************************************************************************/
424/* 551/*
552 * addr_list.c
553 */
554static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
555{
556 if (alist)
557 refcount_inc(&alist->usage);
558 return alist;
559}
560extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
561 unsigned short,
562 unsigned short);
563extern void afs_put_addrlist(struct afs_addr_list *);
564extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
565 unsigned short, unsigned short);
566extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
567extern bool afs_iterate_addresses(struct afs_addr_cursor *);
568extern int afs_end_cursor(struct afs_addr_cursor *);
569extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
570
571extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
572extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
573
574/*
425 * cache.c 575 * cache.c
426 */ 576 */
427#ifdef CONFIG_AFS_FSCACHE 577#ifdef CONFIG_AFS_FSCACHE
428extern struct fscache_netfs afs_cache_netfs; 578extern struct fscache_netfs afs_cache_netfs;
429extern struct fscache_cookie_def afs_cell_cache_index_def; 579extern struct fscache_cookie_def afs_cell_cache_index_def;
430extern struct fscache_cookie_def afs_vlocation_cache_index_def;
431extern struct fscache_cookie_def afs_volume_cache_index_def; 580extern struct fscache_cookie_def afs_volume_cache_index_def;
432extern struct fscache_cookie_def afs_vnode_cache_index_def; 581extern struct fscache_cookie_def afs_vnode_cache_index_def;
433#else 582#else
434#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) 583#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL)
435#define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL)
436#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) 584#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL)
437#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) 585#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL)
438#endif 586#endif
@@ -441,29 +589,31 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
441 * callback.c 589 * callback.c
442 */ 590 */
443extern void afs_init_callback_state(struct afs_server *); 591extern void afs_init_callback_state(struct afs_server *);
444extern void afs_broken_callback_work(struct work_struct *); 592extern void afs_break_callback(struct afs_vnode *);
445extern void afs_break_callbacks(struct afs_server *, size_t, 593extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]);
446 struct afs_callback[]); 594
447extern void afs_discard_callback_on_delete(struct afs_vnode *); 595extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
448extern void afs_give_up_callback(struct afs_vnode *); 596extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
449extern void afs_dispatch_give_up_callbacks(struct work_struct *); 597extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
450extern void afs_flush_callback_breaks(struct afs_server *); 598
451extern int __init afs_callback_update_init(void); 599static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
452extern void afs_callback_update_kill(void); 600{
601 refcount_inc(&cbi->usage);
602 return cbi;
603}
453 604
454/* 605/*
455 * cell.c 606 * cell.c
456 */ 607 */
457extern struct rw_semaphore afs_proc_cells_sem; 608extern int afs_cell_init(struct afs_net *, const char *);
458extern struct list_head afs_proc_cells; 609extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
459 610extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
460#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) 611 const char *, bool);
461extern int afs_cell_init(char *); 612extern struct afs_cell *afs_get_cell(struct afs_cell *);
462extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool); 613extern void afs_put_cell(struct afs_net *, struct afs_cell *);
463extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool); 614extern void afs_manage_cells(struct work_struct *);
464extern struct afs_cell *afs_grab_cell(struct afs_cell *); 615extern void afs_cells_timer(struct timer_list *);
465extern void afs_put_cell(struct afs_cell *); 616extern void __net_exit afs_cell_purge(struct afs_net *);
466extern void afs_cell_purge(void);
467 617
468/* 618/*
469 * cmservice.c 619 * cmservice.c
@@ -473,6 +623,7 @@ extern bool afs_cm_incoming_call(struct afs_call *);
473/* 623/*
474 * dir.c 624 * dir.c
475 */ 625 */
626extern bool afs_dir_check_page(struct inode *, struct page *);
476extern const struct inode_operations afs_dir_inode_operations; 627extern const struct inode_operations afs_dir_inode_operations;
477extern const struct dentry_operations afs_fs_dentry_operations; 628extern const struct dentry_operations afs_fs_dentry_operations;
478extern const struct file_operations afs_dir_file_operations; 629extern const struct file_operations afs_dir_file_operations;
@@ -484,15 +635,19 @@ extern const struct address_space_operations afs_fs_aops;
484extern const struct inode_operations afs_file_inode_operations; 635extern const struct inode_operations afs_file_inode_operations;
485extern const struct file_operations afs_file_operations; 636extern const struct file_operations afs_file_operations;
486 637
638extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
639extern void afs_put_wb_key(struct afs_wb_key *);
487extern int afs_open(struct inode *, struct file *); 640extern int afs_open(struct inode *, struct file *);
488extern int afs_release(struct inode *, struct file *); 641extern int afs_release(struct inode *, struct file *);
642extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *);
489extern int afs_page_filler(void *, struct page *); 643extern int afs_page_filler(void *, struct page *);
490extern void afs_put_read(struct afs_read *); 644extern void afs_put_read(struct afs_read *);
491 645
492/* 646/*
493 * flock.c 647 * flock.c
494 */ 648 */
495extern void __exit afs_kill_lock_manager(void); 649extern struct workqueue_struct *afs_lock_manager;
650
496extern void afs_lock_work(struct work_struct *); 651extern void afs_lock_work(struct work_struct *);
497extern void afs_lock_may_be_available(struct afs_vnode *); 652extern void afs_lock_may_be_available(struct afs_vnode *);
498extern int afs_lock(struct file *, int, struct file_lock *); 653extern int afs_lock(struct file *, int, struct file_lock *);
@@ -501,48 +656,40 @@ extern int afs_flock(struct file *, int, struct file_lock *);
501/* 656/*
502 * fsclient.c 657 * fsclient.c
503 */ 658 */
504extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, 659extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *);
505 struct afs_vnode *, struct afs_volsync *, 660extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
506 bool); 661extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
507extern int afs_fs_give_up_callbacks(struct afs_server *, bool); 662extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t,
508extern int afs_fs_fetch_data(struct afs_server *, struct key *, 663 struct afs_fid *, struct afs_file_status *, struct afs_callback *);
509 struct afs_vnode *, struct afs_read *, bool); 664extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool);
510extern int afs_fs_create(struct afs_server *, struct key *, 665extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *);
511 struct afs_vnode *, const char *, umode_t, 666extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
512 struct afs_fid *, struct afs_file_status *, 667 struct afs_fid *, struct afs_file_status *);
513 struct afs_callback *, bool); 668extern int afs_fs_rename(struct afs_fs_cursor *, const char *,
514extern int afs_fs_remove(struct afs_server *, struct key *, 669 struct afs_vnode *, const char *);
515 struct afs_vnode *, const char *, bool, bool); 670extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
516extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *, 671 pgoff_t, pgoff_t, unsigned, unsigned);
517 struct afs_vnode *, const char *, bool); 672extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
518extern int afs_fs_symlink(struct afs_server *, struct key *, 673extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
519 struct afs_vnode *, const char *, const char *, 674extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
520 struct afs_fid *, struct afs_file_status *, bool); 675extern int afs_fs_extend_lock(struct afs_fs_cursor *);
521extern int afs_fs_rename(struct afs_server *, struct key *, 676extern int afs_fs_release_lock(struct afs_fs_cursor *);
522 struct afs_vnode *, const char *, 677extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
523 struct afs_vnode *, const char *, bool); 678 struct afs_addr_cursor *, struct key *);
524extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *, 679extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
525 pgoff_t, pgoff_t, unsigned, unsigned, bool); 680 struct afs_addr_cursor *, struct key *);
526extern int afs_fs_setattr(struct afs_server *, struct key *,
527 struct afs_vnode *, struct iattr *, bool);
528extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
529 struct afs_vnode *,
530 struct afs_volume_status *, bool);
531extern int afs_fs_set_lock(struct afs_server *, struct key *,
532 struct afs_vnode *, afs_lock_type_t, bool);
533extern int afs_fs_extend_lock(struct afs_server *, struct key *,
534 struct afs_vnode *, bool);
535extern int afs_fs_release_lock(struct afs_server *, struct key *,
536 struct afs_vnode *, bool);
537 681
538/* 682/*
539 * inode.c 683 * inode.c
540 */ 684 */
685extern int afs_fetch_status(struct afs_vnode *, struct key *);
686extern int afs_iget5_test(struct inode *, void *);
541extern struct inode *afs_iget_autocell(struct inode *, const char *, int, 687extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
542 struct key *); 688 struct key *);
543extern struct inode *afs_iget(struct super_block *, struct key *, 689extern struct inode *afs_iget(struct super_block *, struct key *,
544 struct afs_fid *, struct afs_file_status *, 690 struct afs_fid *, struct afs_file_status *,
545 struct afs_callback *); 691 struct afs_callback *,
692 struct afs_cb_interest *);
546extern void afs_zap_data(struct afs_vnode *); 693extern void afs_zap_data(struct afs_vnode *);
547extern int afs_validate(struct afs_vnode *, struct key *); 694extern int afs_validate(struct afs_vnode *, struct key *);
548extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); 695extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -554,7 +701,35 @@ extern int afs_drop_inode(struct inode *);
554 * main.c 701 * main.c
555 */ 702 */
556extern struct workqueue_struct *afs_wq; 703extern struct workqueue_struct *afs_wq;
557extern struct afs_uuid afs_uuid; 704
705static inline struct afs_net *afs_d2net(struct dentry *dentry)
706{
707 return &__afs_net;
708}
709
710static inline struct afs_net *afs_i2net(struct inode *inode)
711{
712 return &__afs_net;
713}
714
715static inline struct afs_net *afs_v2net(struct afs_vnode *vnode)
716{
717 return &__afs_net;
718}
719
720static inline struct afs_net *afs_sock2net(struct sock *sk)
721{
722 return &__afs_net;
723}
724
725static inline struct afs_net *afs_get_net(struct afs_net *net)
726{
727 return net;
728}
729
730static inline void afs_put_net(struct afs_net *net)
731{
732}
558 733
559/* 734/*
560 * misc.c 735 * misc.c
@@ -579,23 +754,33 @@ extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
579/* 754/*
580 * proc.c 755 * proc.c
581 */ 756 */
582extern int afs_proc_init(void); 757extern int __net_init afs_proc_init(struct afs_net *);
583extern void afs_proc_cleanup(void); 758extern void __net_exit afs_proc_cleanup(struct afs_net *);
584extern int afs_proc_cell_setup(struct afs_cell *); 759extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *);
585extern void afs_proc_cell_remove(struct afs_cell *); 760extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *);
761
762/*
763 * rotate.c
764 */
765extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *,
766 struct key *);
767extern bool afs_select_fileserver(struct afs_fs_cursor *);
768extern bool afs_select_current_fileserver(struct afs_fs_cursor *);
769extern int afs_end_vnode_operation(struct afs_fs_cursor *);
586 770
587/* 771/*
588 * rxrpc.c 772 * rxrpc.c
589 */ 773 */
590extern struct socket *afs_socket; 774extern struct workqueue_struct *afs_async_calls;
591extern atomic_t afs_outstanding_calls;
592 775
593extern int afs_open_socket(void); 776extern int __net_init afs_open_socket(struct afs_net *);
594extern void afs_close_socket(void); 777extern void __net_exit afs_close_socket(struct afs_net *);
778extern void afs_charge_preallocation(struct work_struct *);
595extern void afs_put_call(struct afs_call *); 779extern void afs_put_call(struct afs_call *);
596extern int afs_queue_call_work(struct afs_call *); 780extern int afs_queue_call_work(struct afs_call *);
597extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, bool); 781extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
598extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, 782extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
783 const struct afs_call_type *,
599 size_t, size_t); 784 size_t, size_t);
600extern void afs_flat_call_destructor(struct afs_call *); 785extern void afs_flat_call_destructor(struct afs_call *);
601extern void afs_send_empty_reply(struct afs_call *); 786extern void afs_send_empty_reply(struct afs_call *);
@@ -607,117 +792,135 @@ static inline int afs_transfer_reply(struct afs_call *call)
607 return afs_extract_data(call, call->buffer, call->reply_max, false); 792 return afs_extract_data(call, call->buffer, call->reply_max, false);
608} 793}
609 794
795static inline bool afs_check_call_state(struct afs_call *call,
796 enum afs_call_state state)
797{
798 return READ_ONCE(call->state) == state;
799}
800
801static inline bool afs_set_call_state(struct afs_call *call,
802 enum afs_call_state from,
803 enum afs_call_state to)
804{
805 bool ok = false;
806
807 spin_lock_bh(&call->state_lock);
808 if (call->state == from) {
809 call->state = to;
810 trace_afs_call_state(call, from, to, 0, 0);
811 ok = true;
812 }
813 spin_unlock_bh(&call->state_lock);
814 return ok;
815}
816
817static inline void afs_set_call_complete(struct afs_call *call,
818 int error, u32 remote_abort)
819{
820 enum afs_call_state state;
821 bool ok = false;
822
823 spin_lock_bh(&call->state_lock);
824 state = call->state;
825 if (state != AFS_CALL_COMPLETE) {
826 call->abort_code = remote_abort;
827 call->error = error;
828 call->state = AFS_CALL_COMPLETE;
829 trace_afs_call_state(call, state, AFS_CALL_COMPLETE,
830 error, remote_abort);
831 ok = true;
832 }
833 spin_unlock_bh(&call->state_lock);
834 if (ok)
835 trace_afs_call_done(call);
836}
837
610/* 838/*
611 * security.c 839 * security.c
612 */ 840 */
841extern void afs_put_permits(struct afs_permits *);
613extern void afs_clear_permits(struct afs_vnode *); 842extern void afs_clear_permits(struct afs_vnode *);
614extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 843extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
615extern void afs_zap_permits(struct rcu_head *); 844extern void afs_zap_permits(struct rcu_head *);
616extern struct key *afs_request_key(struct afs_cell *); 845extern struct key *afs_request_key(struct afs_cell *);
617extern int afs_permission(struct inode *, int); 846extern int afs_permission(struct inode *, int);
847extern void __exit afs_clean_up_permit_cache(void);
618 848
619/* 849/*
620 * server.c 850 * server.c
621 */ 851 */
622extern spinlock_t afs_server_peer_lock; 852extern spinlock_t afs_server_peer_lock;
623 853
624#define afs_get_server(S) \ 854static inline struct afs_server *afs_get_server(struct afs_server *server)
625do { \ 855{
626 _debug("GET SERVER %d", atomic_read(&(S)->usage)); \ 856 atomic_inc(&server->usage);
627 atomic_inc(&(S)->usage); \ 857 return server;
628} while(0) 858}
629 859
630extern struct afs_server *afs_lookup_server(struct afs_cell *, 860extern struct afs_server *afs_find_server(struct afs_net *,
631 const struct in_addr *); 861 const struct sockaddr_rxrpc *);
632extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *); 862extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
633extern void afs_put_server(struct afs_server *); 863extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *);
634extern void __exit afs_purge_servers(void); 864extern void afs_put_server(struct afs_net *, struct afs_server *);
865extern void afs_manage_servers(struct work_struct *);
866extern void afs_servers_timer(struct timer_list *);
867extern void __net_exit afs_purge_servers(struct afs_net *);
868extern bool afs_probe_fileserver(struct afs_fs_cursor *);
869extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
635 870
636/* 871/*
637 * super.c 872 * server_list.c
638 */ 873 */
639extern int afs_fs_init(void); 874static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist)
640extern void afs_fs_exit(void); 875{
876 refcount_inc(&slist->usage);
877 return slist;
878}
641 879
642/* 880extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
643 * vlclient.c 881extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *,
644 */ 882 struct afs_vldb_entry *,
645extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, 883 u8);
646 const char *, struct afs_cache_vlocation *, 884extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
647 bool);
648extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *,
649 afs_volid_t, afs_voltype_t,
650 struct afs_cache_vlocation *, bool);
651 885
652/* 886/*
653 * vlocation.c 887 * super.c
654 */ 888 */
655#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0) 889extern int __init afs_fs_init(void);
656 890extern void __exit afs_fs_exit(void);
657extern int __init afs_vlocation_update_init(void);
658extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
659 struct key *,
660 const char *, size_t);
661extern void afs_put_vlocation(struct afs_vlocation *);
662extern void afs_vlocation_purge(void);
663 891
664/* 892/*
665 * vnode.c 893 * vlclient.c
666 */ 894 */
667static inline struct afs_vnode *AFS_FS_I(struct inode *inode) 895extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
668{ 896 struct afs_addr_cursor *,
669 return container_of(inode, struct afs_vnode, vfs_inode); 897 struct key *, const char *, int);
670} 898extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
671 899 struct key *, const uuid_t *);
672static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) 900extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
673{ 901extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
674 return &vnode->vfs_inode; 902 struct key *, const uuid_t *);
675}
676
677extern void afs_vnode_finalise_status_update(struct afs_vnode *,
678 struct afs_server *);
679extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
680 struct key *);
681extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
682 struct afs_read *);
683extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
684 umode_t, struct afs_fid *, struct afs_file_status *,
685 struct afs_callback *, struct afs_server **);
686extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
687 bool);
688extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
689 const char *);
690extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
691 const char *, struct afs_fid *,
692 struct afs_file_status *, struct afs_server **);
693extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
694 struct key *, const char *, const char *);
695extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
696 unsigned, unsigned);
697extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
698extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *,
699 struct afs_volume_status *);
700extern int afs_vnode_set_lock(struct afs_vnode *, struct key *,
701 afs_lock_type_t);
702extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *);
703extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
704 903
705/* 904/*
706 * volume.c 905 * volume.c
707 */ 906 */
708#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) 907static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume)
908{
909 if (volume)
910 atomic_inc(&volume->usage);
911 return volume;
912}
709 913
710extern void afs_put_volume(struct afs_volume *); 914extern struct afs_volume *afs_create_volume(struct afs_mount_params *);
711extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); 915extern void afs_activate_volume(struct afs_volume *);
712extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); 916extern void afs_deactivate_volume(struct afs_volume *);
713extern int afs_volume_release_fileserver(struct afs_vnode *, 917extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
714 struct afs_server *, int); 918extern int afs_check_volume_status(struct afs_volume *, struct key *);
715 919
716/* 920/*
717 * write.c 921 * write.c
718 */ 922 */
719extern int afs_set_page_dirty(struct page *); 923extern int afs_set_page_dirty(struct page *);
720extern void afs_put_writeback(struct afs_writeback *);
721extern int afs_write_begin(struct file *file, struct address_space *mapping, 924extern int afs_write_begin(struct file *file, struct address_space *mapping,
722 loff_t pos, unsigned len, unsigned flags, 925 loff_t pos, unsigned len, unsigned flags,
723 struct page **pagep, void **fsdata); 926 struct page **pagep, void **fsdata);
@@ -728,9 +931,11 @@ extern int afs_writepage(struct page *, struct writeback_control *);
728extern int afs_writepages(struct address_space *, struct writeback_control *); 931extern int afs_writepages(struct address_space *, struct writeback_control *);
729extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); 932extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
730extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); 933extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
731extern int afs_writeback_all(struct afs_vnode *);
732extern int afs_flush(struct file *, fl_owner_t); 934extern int afs_flush(struct file *, fl_owner_t);
733extern int afs_fsync(struct file *, loff_t, loff_t, int); 935extern int afs_fsync(struct file *, loff_t, loff_t, int);
936extern int afs_page_mkwrite(struct vm_fault *);
937extern void afs_prune_wb_keys(struct afs_vnode *);
938extern int afs_launder_page(struct page *);
734 939
735/* 940/*
736 * xattr.c 941 * xattr.c
@@ -738,12 +943,42 @@ extern int afs_fsync(struct file *, loff_t, loff_t, int);
738extern const struct xattr_handler *afs_xattr_handlers[]; 943extern const struct xattr_handler *afs_xattr_handlers[];
739extern ssize_t afs_listxattr(struct dentry *, char *, size_t); 944extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
740 945
946
947/*
948 * Miscellaneous inline functions.
949 */
950static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
951{
952 return container_of(inode, struct afs_vnode, vfs_inode);
953}
954
955static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
956{
957 return &vnode->vfs_inode;
958}
959
960static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc,
961 struct afs_vnode *vnode,
962 unsigned int cb_break)
963{
964 if (fc->ac.error == 0)
965 afs_cache_permit(vnode, fc->key, cb_break);
966}
967
968static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
969 struct afs_vnode *vnode)
970{
971 if (fc->ac.error == -ENOENT) {
972 set_bit(AFS_VNODE_DELETED, &vnode->flags);
973 afs_break_callback(vnode);
974 }
975}
976
977
741/*****************************************************************************/ 978/*****************************************************************************/
742/* 979/*
743 * debug tracing 980 * debug tracing
744 */ 981 */
745#include <trace/events/afs.h>
746
747extern unsigned afs_debug; 982extern unsigned afs_debug;
748 983
749#define dbgprintk(FMT,...) \ 984#define dbgprintk(FMT,...) \
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 9944770849da..15a02a05ff40 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -31,57 +31,112 @@ static char *rootcell;
31module_param(rootcell, charp, 0); 31module_param(rootcell, charp, 0);
32MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); 32MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
33 33
34struct afs_uuid afs_uuid;
35struct workqueue_struct *afs_wq; 34struct workqueue_struct *afs_wq;
35struct afs_net __afs_net;
36 36
37/* 37/*
38 * initialise the AFS client FS module 38 * Initialise an AFS network namespace record.
39 */ 39 */
40static int __init afs_init(void) 40static int __net_init afs_net_init(struct afs_net *net)
41{ 41{
42 int ret; 42 int ret;
43 43
44 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); 44 net->live = true;
45 generate_random_uuid((unsigned char *)&net->uuid);
45 46
46 generate_random_uuid((unsigned char *)&afs_uuid); 47 INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
48 mutex_init(&net->socket_mutex);
47 49
48 /* create workqueue */ 50 net->cells = RB_ROOT;
49 ret = -ENOMEM; 51 seqlock_init(&net->cells_lock);
50 afs_wq = alloc_workqueue("afs", 0, 0); 52 INIT_WORK(&net->cells_manager, afs_manage_cells);
51 if (!afs_wq) 53 timer_setup(&net->cells_timer, afs_cells_timer, 0);
52 return ret;
53 54
54 /* register the /proc stuff */ 55 spin_lock_init(&net->proc_cells_lock);
55 ret = afs_proc_init(); 56 INIT_LIST_HEAD(&net->proc_cells);
56 if (ret < 0)
57 goto error_proc;
58 57
59#ifdef CONFIG_AFS_FSCACHE 58 seqlock_init(&net->fs_lock);
60 /* we want to be able to cache */ 59 net->fs_servers = RB_ROOT;
61 ret = fscache_register_netfs(&afs_cache_netfs); 60 INIT_LIST_HEAD(&net->fs_updates);
61 INIT_HLIST_HEAD(&net->fs_proc);
62
63 INIT_HLIST_HEAD(&net->fs_addresses4);
64 INIT_HLIST_HEAD(&net->fs_addresses6);
65 seqlock_init(&net->fs_addr_lock);
66
67 INIT_WORK(&net->fs_manager, afs_manage_servers);
68 timer_setup(&net->fs_timer, afs_servers_timer, 0);
69
70 /* Register the /proc stuff */
71 ret = afs_proc_init(net);
62 if (ret < 0) 72 if (ret < 0)
63 goto error_cache; 73 goto error_proc;
64#endif
65 74
66 /* initialise the cell DB */ 75 /* Initialise the cell DB */
67 ret = afs_cell_init(rootcell); 76 ret = afs_cell_init(net, rootcell);
68 if (ret < 0) 77 if (ret < 0)
69 goto error_cell_init; 78 goto error_cell_init;
70 79
71 /* initialise the VL update process */ 80 /* Create the RxRPC transport */
72 ret = afs_vlocation_update_init(); 81 ret = afs_open_socket(net);
73 if (ret < 0) 82 if (ret < 0)
74 goto error_vl_update_init; 83 goto error_open_socket;
75 84
76 /* initialise the callback update process */ 85 return 0;
77 ret = afs_callback_update_init(); 86
87error_open_socket:
88 net->live = false;
89 afs_cell_purge(net);
90 afs_purge_servers(net);
91error_cell_init:
92 net->live = false;
93 afs_proc_cleanup(net);
94error_proc:
95 net->live = false;
96 return ret;
97}
98
99/*
100 * Clean up and destroy an AFS network namespace record.
101 */
102static void __net_exit afs_net_exit(struct afs_net *net)
103{
104 net->live = false;
105 afs_cell_purge(net);
106 afs_purge_servers(net);
107 afs_close_socket(net);
108 afs_proc_cleanup(net);
109}
110
111/*
112 * initialise the AFS client FS module
113 */
114static int __init afs_init(void)
115{
116 int ret = -ENOMEM;
117
118 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
119
120 afs_wq = alloc_workqueue("afs", 0, 0);
121 if (!afs_wq)
122 goto error_afs_wq;
123 afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
124 if (!afs_async_calls)
125 goto error_async;
126 afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
127 if (!afs_lock_manager)
128 goto error_lockmgr;
129
130#ifdef CONFIG_AFS_FSCACHE
131 /* we want to be able to cache */
132 ret = fscache_register_netfs(&afs_cache_netfs);
78 if (ret < 0) 133 if (ret < 0)
79 goto error_callback_update_init; 134 goto error_cache;
135#endif
80 136
81 /* create the RxRPC transport */ 137 ret = afs_net_init(&__afs_net);
82 ret = afs_open_socket();
83 if (ret < 0) 138 if (ret < 0)
84 goto error_open_socket; 139 goto error_net;
85 140
86 /* register the filesystems */ 141 /* register the filesystems */
87 ret = afs_fs_init(); 142 ret = afs_fs_init();
@@ -91,21 +146,18 @@ static int __init afs_init(void)
91 return ret; 146 return ret;
92 147
93error_fs: 148error_fs:
94 afs_close_socket(); 149 afs_net_exit(&__afs_net);
95error_open_socket: 150error_net:
96 afs_callback_update_kill();
97error_callback_update_init:
98 afs_vlocation_purge();
99error_vl_update_init:
100 afs_cell_purge();
101error_cell_init:
102#ifdef CONFIG_AFS_FSCACHE 151#ifdef CONFIG_AFS_FSCACHE
103 fscache_unregister_netfs(&afs_cache_netfs); 152 fscache_unregister_netfs(&afs_cache_netfs);
104error_cache: 153error_cache:
105#endif 154#endif
106 afs_proc_cleanup(); 155 destroy_workqueue(afs_lock_manager);
107error_proc: 156error_lockmgr:
157 destroy_workqueue(afs_async_calls);
158error_async:
108 destroy_workqueue(afs_wq); 159 destroy_workqueue(afs_wq);
160error_afs_wq:
109 rcu_barrier(); 161 rcu_barrier();
110 printk(KERN_ERR "kAFS: failed to register: %d\n", ret); 162 printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
111 return ret; 163 return ret;
@@ -124,17 +176,14 @@ static void __exit afs_exit(void)
124 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); 176 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
125 177
126 afs_fs_exit(); 178 afs_fs_exit();
127 afs_kill_lock_manager(); 179 afs_net_exit(&__afs_net);
128 afs_close_socket();
129 afs_purge_servers();
130 afs_callback_update_kill();
131 afs_vlocation_purge();
132 destroy_workqueue(afs_wq);
133 afs_cell_purge();
134#ifdef CONFIG_AFS_FSCACHE 180#ifdef CONFIG_AFS_FSCACHE
135 fscache_unregister_netfs(&afs_cache_netfs); 181 fscache_unregister_netfs(&afs_cache_netfs);
136#endif 182#endif
137 afs_proc_cleanup(); 183 destroy_workqueue(afs_lock_manager);
184 destroy_workqueue(afs_async_calls);
185 destroy_workqueue(afs_wq);
186 afs_clean_up_permit_cache();
138 rcu_barrier(); 187 rcu_barrier();
139} 188}
140 189
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index c05f1f1c0d41..700a5fa7f4ec 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -21,12 +21,12 @@
21int afs_abort_to_error(u32 abort_code) 21int afs_abort_to_error(u32 abort_code)
22{ 22{
23 switch (abort_code) { 23 switch (abort_code) {
24 /* low errno codes inserted into abort namespace */ 24 /* Low errno codes inserted into abort namespace */
25 case 13: return -EACCES; 25 case 13: return -EACCES;
26 case 27: return -EFBIG; 26 case 27: return -EFBIG;
27 case 30: return -EROFS; 27 case 30: return -EROFS;
28 28
29 /* VICE "special error" codes; 101 - 111 */ 29 /* VICE "special error" codes; 101 - 111 */
30 case VSALVAGE: return -EIO; 30 case VSALVAGE: return -EIO;
31 case VNOVNODE: return -ENOENT; 31 case VNOVNODE: return -ENOENT;
32 case VNOVOL: return -ENOMEDIUM; 32 case VNOVOL: return -ENOMEDIUM;
@@ -39,7 +39,37 @@ int afs_abort_to_error(u32 abort_code)
39 case VBUSY: return -EBUSY; 39 case VBUSY: return -EBUSY;
40 case VMOVED: return -ENXIO; 40 case VMOVED: return -ENXIO;
41 41
42 /* Unified AFS error table; ET "uae" == 0x2f6df00 */ 42 /* Volume Location server errors */
43 case AFSVL_IDEXIST: return -EEXIST;
44 case AFSVL_IO: return -EREMOTEIO;
45 case AFSVL_NAMEEXIST: return -EEXIST;
46 case AFSVL_CREATEFAIL: return -EREMOTEIO;
47 case AFSVL_NOENT: return -ENOMEDIUM;
48 case AFSVL_EMPTY: return -ENOMEDIUM;
49 case AFSVL_ENTDELETED: return -ENOMEDIUM;
50 case AFSVL_BADNAME: return -EINVAL;
51 case AFSVL_BADINDEX: return -EINVAL;
52 case AFSVL_BADVOLTYPE: return -EINVAL;
53 case AFSVL_BADSERVER: return -EINVAL;
54 case AFSVL_BADPARTITION: return -EINVAL;
55 case AFSVL_REPSFULL: return -EFBIG;
56 case AFSVL_NOREPSERVER: return -ENOENT;
57 case AFSVL_DUPREPSERVER: return -EEXIST;
58 case AFSVL_RWNOTFOUND: return -ENOENT;
59 case AFSVL_BADREFCOUNT: return -EINVAL;
60 case AFSVL_SIZEEXCEEDED: return -EINVAL;
61 case AFSVL_BADENTRY: return -EINVAL;
62 case AFSVL_BADVOLIDBUMP: return -EINVAL;
63 case AFSVL_IDALREADYHASHED: return -EINVAL;
64 case AFSVL_ENTRYLOCKED: return -EBUSY;
65 case AFSVL_BADVOLOPER: return -EBADRQC;
66 case AFSVL_BADRELLOCKTYPE: return -EINVAL;
67 case AFSVL_RERELEASE: return -EREMOTEIO;
68 case AFSVL_BADSERVERFLAG: return -EINVAL;
69 case AFSVL_PERM: return -EACCES;
70 case AFSVL_NOMEM: return -EREMOTEIO;
71
72 /* Unified AFS error table; ET "uae" == 0x2f6df00 */
43 case 0x2f6df00: return -EPERM; 73 case 0x2f6df00: return -EPERM;
44 case 0x2f6df01: return -ENOENT; 74 case 0x2f6df01: return -ENOENT;
45 case 0x2f6df04: return -EIO; 75 case 0x2f6df04: return -EIO;
@@ -68,7 +98,7 @@ int afs_abort_to_error(u32 abort_code)
68 case 0x2f6df6c: return -ETIMEDOUT; 98 case 0x2f6df6c: return -ETIMEDOUT;
69 case 0x2f6df78: return -EDQUOT; 99 case 0x2f6df78: return -EDQUOT;
70 100
71 /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ 101 /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
72 case RXKADINCONSISTENCY: return -EPROTO; 102 case RXKADINCONSISTENCY: return -EPROTO;
73 case RXKADPACKETSHORT: return -EPROTO; 103 case RXKADPACKETSHORT: return -EPROTO;
74 case RXKADLEVELFAIL: return -EKEYREJECTED; 104 case RXKADLEVELFAIL: return -EKEYREJECTED;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 35efb9a31dd7..4508dd54f789 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -17,8 +17,15 @@
17#include <linux/uaccess.h> 17#include <linux/uaccess.h>
18#include "internal.h" 18#include "internal.h"
19 19
20static struct proc_dir_entry *proc_afs; 20static inline struct afs_net *afs_proc2net(struct file *f)
21{
22 return &__afs_net;
23}
21 24
25static inline struct afs_net *afs_seq2net(struct seq_file *m)
26{
27 return &__afs_net; // TODO: use seq_file_net(m)
28}
22 29
23static int afs_proc_cells_open(struct inode *inode, struct file *file); 30static int afs_proc_cells_open(struct inode *inode, struct file *file);
24static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos); 31static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos);
@@ -98,22 +105,22 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
98 .release = seq_release, 105 .release = seq_release,
99}; 106};
100 107
101static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); 108static int afs_proc_servers_open(struct inode *inode, struct file *file);
102static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos); 109static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos);
103static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, 110static void *afs_proc_servers_next(struct seq_file *p, void *v,
104 loff_t *pos); 111 loff_t *pos);
105static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); 112static void afs_proc_servers_stop(struct seq_file *p, void *v);
106static int afs_proc_cell_servers_show(struct seq_file *m, void *v); 113static int afs_proc_servers_show(struct seq_file *m, void *v);
107 114
108static const struct seq_operations afs_proc_cell_servers_ops = { 115static const struct seq_operations afs_proc_servers_ops = {
109 .start = afs_proc_cell_servers_start, 116 .start = afs_proc_servers_start,
110 .next = afs_proc_cell_servers_next, 117 .next = afs_proc_servers_next,
111 .stop = afs_proc_cell_servers_stop, 118 .stop = afs_proc_servers_stop,
112 .show = afs_proc_cell_servers_show, 119 .show = afs_proc_servers_show,
113}; 120};
114 121
115static const struct file_operations afs_proc_cell_servers_fops = { 122static const struct file_operations afs_proc_servers_fops = {
116 .open = afs_proc_cell_servers_open, 123 .open = afs_proc_servers_open,
117 .read = seq_read, 124 .read = seq_read,
118 .llseek = seq_lseek, 125 .llseek = seq_lseek,
119 .release = seq_release, 126 .release = seq_release,
@@ -122,23 +129,24 @@ static const struct file_operations afs_proc_cell_servers_fops = {
122/* 129/*
123 * initialise the /proc/fs/afs/ directory 130 * initialise the /proc/fs/afs/ directory
124 */ 131 */
125int afs_proc_init(void) 132int afs_proc_init(struct afs_net *net)
126{ 133{
127 _enter(""); 134 _enter("");
128 135
129 proc_afs = proc_mkdir("fs/afs", NULL); 136 net->proc_afs = proc_mkdir("fs/afs", NULL);
130 if (!proc_afs) 137 if (!net->proc_afs)
131 goto error_dir; 138 goto error_dir;
132 139
133 if (!proc_create("cells", 0644, proc_afs, &afs_proc_cells_fops) || 140 if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
134 !proc_create("rootcell", 0644, proc_afs, &afs_proc_rootcell_fops)) 141 !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
142 !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops))
135 goto error_tree; 143 goto error_tree;
136 144
137 _leave(" = 0"); 145 _leave(" = 0");
138 return 0; 146 return 0;
139 147
140error_tree: 148error_tree:
141 remove_proc_subtree("fs/afs", NULL); 149 proc_remove(net->proc_afs);
142error_dir: 150error_dir:
143 _leave(" = -ENOMEM"); 151 _leave(" = -ENOMEM");
144 return -ENOMEM; 152 return -ENOMEM;
@@ -147,9 +155,10 @@ error_dir:
147/* 155/*
148 * clean up the /proc/fs/afs/ directory 156 * clean up the /proc/fs/afs/ directory
149 */ 157 */
150void afs_proc_cleanup(void) 158void afs_proc_cleanup(struct afs_net *net)
151{ 159{
152 remove_proc_subtree("fs/afs", NULL); 160 proc_remove(net->proc_afs);
161 net->proc_afs = NULL;
153} 162}
154 163
155/* 164/*
@@ -166,7 +175,6 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
166 175
167 m = file->private_data; 176 m = file->private_data;
168 m->private = PDE_DATA(inode); 177 m->private = PDE_DATA(inode);
169
170 return 0; 178 return 0;
171} 179}
172 180
@@ -176,25 +184,28 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
176 */ 184 */
177static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) 185static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
178{ 186{
179 /* lock the list against modification */ 187 struct afs_net *net = afs_seq2net(m);
180 down_read(&afs_proc_cells_sem); 188
181 return seq_list_start_head(&afs_proc_cells, *_pos); 189 rcu_read_lock();
190 return seq_list_start_head(&net->proc_cells, *_pos);
182} 191}
183 192
184/* 193/*
185 * move to next cell in cells list 194 * move to next cell in cells list
186 */ 195 */
187static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) 196static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos)
188{ 197{
189 return seq_list_next(v, &afs_proc_cells, pos); 198 struct afs_net *net = afs_seq2net(m);
199
200 return seq_list_next(v, &net->proc_cells, pos);
190} 201}
191 202
192/* 203/*
193 * clean up after reading from the cells list 204 * clean up after reading from the cells list
194 */ 205 */
195static void afs_proc_cells_stop(struct seq_file *p, void *v) 206static void afs_proc_cells_stop(struct seq_file *m, void *v)
196{ 207{
197 up_read(&afs_proc_cells_sem); 208 rcu_read_unlock();
198} 209}
199 210
200/* 211/*
@@ -203,16 +214,16 @@ static void afs_proc_cells_stop(struct seq_file *p, void *v)
203static int afs_proc_cells_show(struct seq_file *m, void *v) 214static int afs_proc_cells_show(struct seq_file *m, void *v)
204{ 215{
205 struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); 216 struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
217 struct afs_net *net = afs_seq2net(m);
206 218
207 if (v == &afs_proc_cells) { 219 if (v == &net->proc_cells) {
208 /* display header on line 1 */ 220 /* display header on line 1 */
209 seq_puts(m, "USE NAME\n"); 221 seq_puts(m, "USE NAME\n");
210 return 0; 222 return 0;
211 } 223 }
212 224
213 /* display one cell per line on subsequent lines */ 225 /* display one cell per line on subsequent lines */
214 seq_printf(m, "%3d %s\n", 226 seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name);
215 atomic_read(&cell->usage), cell->name);
216 return 0; 227 return 0;
217} 228}
218 229
@@ -223,6 +234,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
223static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, 234static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
224 size_t size, loff_t *_pos) 235 size_t size, loff_t *_pos)
225{ 236{
237 struct afs_net *net = afs_proc2net(file);
226 char *kbuf, *name, *args; 238 char *kbuf, *name, *args;
227 int ret; 239 int ret;
228 240
@@ -264,13 +276,13 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
264 if (strcmp(kbuf, "add") == 0) { 276 if (strcmp(kbuf, "add") == 0) {
265 struct afs_cell *cell; 277 struct afs_cell *cell;
266 278
267 cell = afs_cell_create(name, strlen(name), args, false); 279 cell = afs_lookup_cell(net, name, strlen(name), args, true);
268 if (IS_ERR(cell)) { 280 if (IS_ERR(cell)) {
269 ret = PTR_ERR(cell); 281 ret = PTR_ERR(cell);
270 goto done; 282 goto done;
271 } 283 }
272 284
273 afs_put_cell(cell); 285 set_bit(AFS_CELL_FL_NO_GC, &cell->flags);
274 printk("kAFS: Added new cell '%s'\n", name); 286 printk("kAFS: Added new cell '%s'\n", name);
275 } else { 287 } else {
276 goto inval; 288 goto inval;
@@ -303,6 +315,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
303 const char __user *buf, 315 const char __user *buf,
304 size_t size, loff_t *_pos) 316 size_t size, loff_t *_pos)
305{ 317{
318 struct afs_net *net = afs_proc2net(file);
306 char *kbuf, *s; 319 char *kbuf, *s;
307 int ret; 320 int ret;
308 321
@@ -322,7 +335,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
322 /* determine command to perform */ 335 /* determine command to perform */
323 _debug("rootcell=%s", kbuf); 336 _debug("rootcell=%s", kbuf);
324 337
325 ret = afs_cell_init(kbuf); 338 ret = afs_cell_init(net, kbuf);
326 if (ret >= 0) 339 if (ret >= 0)
327 ret = size; /* consume everything, always */ 340 ret = size; /* consume everything, always */
328 341
@@ -334,29 +347,27 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
334/* 347/*
335 * initialise /proc/fs/afs/<cell>/ 348 * initialise /proc/fs/afs/<cell>/
336 */ 349 */
337int afs_proc_cell_setup(struct afs_cell *cell) 350int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
338{ 351{
339 struct proc_dir_entry *dir; 352 struct proc_dir_entry *dir;
340 353
341 _enter("%p{%s}", cell, cell->name); 354 _enter("%p{%s},%p", cell, cell->name, net->proc_afs);
342 355
343 dir = proc_mkdir(cell->name, proc_afs); 356 dir = proc_mkdir(cell->name, net->proc_afs);
344 if (!dir) 357 if (!dir)
345 goto error_dir; 358 goto error_dir;
346 359
347 if (!proc_create_data("servers", 0, dir, 360 if (!proc_create_data("vlservers", 0, dir,
348 &afs_proc_cell_servers_fops, cell) || 361 &afs_proc_cell_vlservers_fops, cell) ||
349 !proc_create_data("vlservers", 0, dir,
350 &afs_proc_cell_vlservers_fops, cell) ||
351 !proc_create_data("volumes", 0, dir, 362 !proc_create_data("volumes", 0, dir,
352 &afs_proc_cell_volumes_fops, cell)) 363 &afs_proc_cell_volumes_fops, cell))
353 goto error_tree; 364 goto error_tree;
354 365
355 _leave(" = 0"); 366 _leave(" = 0");
356 return 0; 367 return 0;
357 368
358error_tree: 369error_tree:
359 remove_proc_subtree(cell->name, proc_afs); 370 remove_proc_subtree(cell->name, net->proc_afs);
360error_dir: 371error_dir:
361 _leave(" = -ENOMEM"); 372 _leave(" = -ENOMEM");
362 return -ENOMEM; 373 return -ENOMEM;
@@ -365,11 +376,11 @@ error_dir:
365/* 376/*
366 * remove /proc/fs/afs/<cell>/ 377 * remove /proc/fs/afs/<cell>/
367 */ 378 */
368void afs_proc_cell_remove(struct afs_cell *cell) 379void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell)
369{ 380{
370 _enter(""); 381 _enter("");
371 382
372 remove_proc_subtree(cell->name, proc_afs); 383 remove_proc_subtree(cell->name, net->proc_afs);
373 384
374 _leave(""); 385 _leave("");
375} 386}
@@ -407,9 +418,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
407 418
408 _enter("cell=%p pos=%Ld", cell, *_pos); 419 _enter("cell=%p pos=%Ld", cell, *_pos);
409 420
410 /* lock the list against modification */ 421 read_lock(&cell->proc_lock);
411 down_read(&cell->vl_sem); 422 return seq_list_start_head(&cell->proc_volumes, *_pos);
412 return seq_list_start_head(&cell->vl_list, *_pos);
413} 423}
414 424
415/* 425/*
@@ -421,7 +431,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
421 struct afs_cell *cell = p->private; 431 struct afs_cell *cell = p->private;
422 432
423 _enter("cell=%p pos=%Ld", cell, *_pos); 433 _enter("cell=%p pos=%Ld", cell, *_pos);
424 return seq_list_next(v, &cell->vl_list, _pos); 434 return seq_list_next(v, &cell->proc_volumes, _pos);
425} 435}
426 436
427/* 437/*
@@ -431,17 +441,13 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
431{ 441{
432 struct afs_cell *cell = p->private; 442 struct afs_cell *cell = p->private;
433 443
434 up_read(&cell->vl_sem); 444 read_unlock(&cell->proc_lock);
435} 445}
436 446
437static const char afs_vlocation_states[][4] = { 447static const char afs_vol_types[3][3] = {
438 [AFS_VL_NEW] = "New", 448 [AFSVL_RWVOL] = "RW",
439 [AFS_VL_CREATING] = "Crt", 449 [AFSVL_ROVOL] = "RO",
440 [AFS_VL_VALID] = "Val", 450 [AFSVL_BACKVOL] = "BK",
441 [AFS_VL_NO_VOLUME] = "NoV",
442 [AFS_VL_UPDATING] = "Upd",
443 [AFS_VL_VOLUME_DELETED] = "Del",
444 [AFS_VL_UNCERTAIN] = "Unc",
445}; 451};
446 452
447/* 453/*
@@ -450,23 +456,17 @@ static const char afs_vlocation_states[][4] = {
450static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) 456static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
451{ 457{
452 struct afs_cell *cell = m->private; 458 struct afs_cell *cell = m->private;
453 struct afs_vlocation *vlocation = 459 struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link);
454 list_entry(v, struct afs_vlocation, link);
455 460
456 /* display header on line 1 */ 461 /* Display header on line 1 */
457 if (v == &cell->vl_list) { 462 if (v == &cell->proc_volumes) {
458 seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); 463 seq_puts(m, "USE VID TY\n");
459 return 0; 464 return 0;
460 } 465 }
461 466
462 /* display one cell per line on subsequent lines */ 467 seq_printf(m, "%3d %08x %s\n",
463 seq_printf(m, "%3d %s %08x %08x %08x %s\n", 468 atomic_read(&vol->usage), vol->vid,
464 atomic_read(&vlocation->usage), 469 afs_vol_types[vol->type]);
465 afs_vlocation_states[vlocation->state],
466 vlocation->vldb.vid[0],
467 vlocation->vldb.vid[1],
468 vlocation->vldb.vid[2],
469 vlocation->vldb.name);
470 470
471 return 0; 471 return 0;
472} 472}
@@ -501,23 +501,23 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
501 */ 501 */
502static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) 502static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
503{ 503{
504 struct afs_addr_list *alist;
504 struct afs_cell *cell = m->private; 505 struct afs_cell *cell = m->private;
505 loff_t pos = *_pos; 506 loff_t pos = *_pos;
506 507
507 _enter("cell=%p pos=%Ld", cell, *_pos); 508 rcu_read_lock();
508 509
509 /* lock the list against modification */ 510 alist = rcu_dereference(cell->vl_addrs);
510 down_read(&cell->vl_sem);
511 511
512 /* allow for the header line */ 512 /* allow for the header line */
513 if (!pos) 513 if (!pos)
514 return (void *) 1; 514 return (void *) 1;
515 pos--; 515 pos--;
516 516
517 if (pos >= cell->vl_naddrs) 517 if (!alist || pos >= alist->nr_addrs)
518 return NULL; 518 return NULL;
519 519
520 return &cell->vl_addrs[pos]; 520 return alist->addrs + pos;
521} 521}
522 522
523/* 523/*
@@ -526,17 +526,18 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
526static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, 526static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
527 loff_t *_pos) 527 loff_t *_pos)
528{ 528{
529 struct afs_addr_list *alist;
529 struct afs_cell *cell = p->private; 530 struct afs_cell *cell = p->private;
530 loff_t pos; 531 loff_t pos;
531 532
532 _enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos); 533 alist = rcu_dereference(cell->vl_addrs);
533 534
534 pos = *_pos; 535 pos = *_pos;
535 (*_pos)++; 536 (*_pos)++;
536 if (pos >= cell->vl_naddrs) 537 if (!alist || pos >= alist->nr_addrs)
537 return NULL; 538 return NULL;
538 539
539 return &cell->vl_addrs[pos]; 540 return alist->addrs + pos;
540} 541}
541 542
542/* 543/*
@@ -544,9 +545,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
544 */ 545 */
545static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) 546static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
546{ 547{
547 struct afs_cell *cell = p->private; 548 rcu_read_unlock();
548
549 up_read(&cell->vl_sem);
550} 549}
551 550
552/* 551/*
@@ -554,100 +553,76 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
554 */ 553 */
555static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) 554static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
556{ 555{
557 struct in_addr *addr = v; 556 struct sockaddr_rxrpc *addr = v;
558 557
559 /* display header on line 1 */ 558 /* display header on line 1 */
560 if (v == (struct in_addr *) 1) { 559 if (v == (void *)1) {
561 seq_puts(m, "ADDRESS\n"); 560 seq_puts(m, "ADDRESS\n");
562 return 0; 561 return 0;
563 } 562 }
564 563
565 /* display one cell per line on subsequent lines */ 564 /* display one cell per line on subsequent lines */
566 seq_printf(m, "%pI4\n", &addr->s_addr); 565 seq_printf(m, "%pISp\n", &addr->transport);
567 return 0; 566 return 0;
568} 567}
569 568
570/* 569/*
571 * open "/proc/fs/afs/<cell>/servers" which provides a summary of active 570 * open "/proc/fs/afs/servers" which provides a summary of active
572 * servers 571 * servers
573 */ 572 */
574static int afs_proc_cell_servers_open(struct inode *inode, struct file *file) 573static int afs_proc_servers_open(struct inode *inode, struct file *file)
575{ 574{
576 struct afs_cell *cell; 575 return seq_open(file, &afs_proc_servers_ops);
577 struct seq_file *m;
578 int ret;
579
580 cell = PDE_DATA(inode);
581 if (!cell)
582 return -ENOENT;
583
584 ret = seq_open(file, &afs_proc_cell_servers_ops);
585 if (ret < 0)
586 return ret;
587
588 m = file->private_data;
589 m->private = cell;
590 return 0;
591} 576}
592 577
593/* 578/*
594 * set up the iterator to start reading from the cells list and return the 579 * Set up the iterator to start reading from the server list and return the
595 * first item 580 * first item.
596 */ 581 */
597static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) 582static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos)
598 __acquires(m->private->servers_lock)
599{ 583{
600 struct afs_cell *cell = m->private; 584 struct afs_net *net = afs_seq2net(m);
601
602 _enter("cell=%p pos=%Ld", cell, *_pos);
603 585
604 /* lock the list against modification */ 586 rcu_read_lock();
605 read_lock(&cell->servers_lock); 587 return seq_hlist_start_head_rcu(&net->fs_proc, *_pos);
606 return seq_list_start_head(&cell->servers, *_pos);
607} 588}
608 589
609/* 590/*
610 * move to next cell in cells list 591 * move to next cell in cells list
611 */ 592 */
612static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, 593static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos)
613 loff_t *_pos)
614{ 594{
615 struct afs_cell *cell = p->private; 595 struct afs_net *net = afs_seq2net(m);
616 596
617 _enter("cell=%p pos=%Ld", cell, *_pos); 597 return seq_hlist_next_rcu(v, &net->fs_proc, _pos);
618 return seq_list_next(v, &cell->servers, _pos);
619} 598}
620 599
621/* 600/*
622 * clean up after reading from the cells list 601 * clean up after reading from the cells list
623 */ 602 */
624static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) 603static void afs_proc_servers_stop(struct seq_file *p, void *v)
625 __releases(p->private->servers_lock)
626{ 604{
627 struct afs_cell *cell = p->private; 605 rcu_read_unlock();
628
629 read_unlock(&cell->servers_lock);
630} 606}
631 607
632/* 608/*
633 * display a header line followed by a load of volume lines 609 * display a header line followed by a load of volume lines
634 */ 610 */
635static int afs_proc_cell_servers_show(struct seq_file *m, void *v) 611static int afs_proc_servers_show(struct seq_file *m, void *v)
636{ 612{
637 struct afs_cell *cell = m->private; 613 struct afs_server *server;
638 struct afs_server *server = list_entry(v, struct afs_server, link); 614 struct afs_addr_list *alist;
639 char ipaddr[20];
640 615
641 /* display header on line 1 */ 616 if (v == SEQ_START_TOKEN) {
642 if (v == &cell->servers) { 617 seq_puts(m, "UUID USE ADDR\n");
643 seq_puts(m, "USE ADDR STATE\n");
644 return 0; 618 return 0;
645 } 619 }
646 620
647 /* display one cell per line on subsequent lines */ 621 server = list_entry(v, struct afs_server, proc_link);
648 sprintf(ipaddr, "%pI4", &server->addr); 622 alist = rcu_dereference(server->addresses);
649 seq_printf(m, "%3d %-15.15s %5d\n", 623 seq_printf(m, "%pU %3d %pISp\n",
650 atomic_read(&server->usage), ipaddr, server->fs_state); 624 &server->uuid,
651 625 atomic_read(&server->usage),
626 &alist->addrs[alist->index].transport);
652 return 0; 627 return 0;
653} 628}
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
new file mode 100644
index 000000000000..e728ca1776c9
--- /dev/null
+++ b/fs/afs/rotate.c
@@ -0,0 +1,715 @@
1/* Handle fileserver selection and rotation.
2 *
3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/fs.h>
15#include <linux/sched.h>
16#include <linux/delay.h>
17#include <linux/sched/signal.h>
18#include "internal.h"
19#include "afs_fs.h"
20
21/*
22 * Initialise a filesystem server cursor for iterating over FS servers.
23 */
24void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
25{
26 memset(fc, 0, sizeof(*fc));
27}
28
29/*
30 * Begin an operation on the fileserver.
31 *
32 * Fileserver operations are serialised on the server by vnode, so we serialise
33 * them here also using the io_lock.
34 */
35bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
36 struct key *key)
37{
38 afs_init_fs_cursor(fc, vnode);
39 fc->vnode = vnode;
40 fc->key = key;
41 fc->ac.error = SHRT_MAX;
42
43 if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
44 fc->ac.error = -EINTR;
45 fc->flags |= AFS_FS_CURSOR_STOP;
46 return false;
47 }
48
49 if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) ||
50 test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
51 fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
52 return true;
53}
54
55/*
56 * Begin iteration through a server list, starting with the vnode's last used
57 * server if possible, or the last recorded good server if not.
58 */
59static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
60 struct afs_vnode *vnode)
61{
62 struct afs_cb_interest *cbi;
63 int i;
64
65 read_lock(&vnode->volume->servers_lock);
66 fc->server_list = afs_get_serverlist(vnode->volume->servers);
67 read_unlock(&vnode->volume->servers_lock);
68
69 cbi = vnode->cb_interest;
70 if (cbi) {
71 /* See if the vnode's preferred record is still available */
72 for (i = 0; i < fc->server_list->nr_servers; i++) {
73 if (fc->server_list->servers[i].cb_interest == cbi) {
74 fc->start = i;
75 goto found_interest;
76 }
77 }
78
79 /* If we have a lock outstanding on a server that's no longer
80 * serving this vnode, then we can't switch to another server
81 * and have to return an error.
82 */
83 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
84 fc->ac.error = -ESTALE;
85 return false;
86 }
87
88 /* Note that the callback promise is effectively broken */
89 write_seqlock(&vnode->cb_lock);
90 ASSERTCMP(cbi, ==, vnode->cb_interest);
91 vnode->cb_interest = NULL;
92 if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
93 vnode->cb_break++;
94 write_sequnlock(&vnode->cb_lock);
95
96 afs_put_cb_interest(afs_v2net(vnode), cbi);
97 cbi = NULL;
98 } else {
99 fc->start = READ_ONCE(fc->server_list->index);
100 }
101
102found_interest:
103 fc->index = fc->start;
104 return true;
105}
106
107/*
108 * Post volume busy note.
109 */
110static void afs_busy(struct afs_volume *volume, u32 abort_code)
111{
112 const char *m;
113
114 switch (abort_code) {
115 case VOFFLINE: m = "offline"; break;
116 case VRESTARTING: m = "restarting"; break;
117 case VSALVAGING: m = "being salvaged"; break;
118 default: m = "busy"; break;
119 }
120
121 pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
122}
123
124/*
125 * Sleep and retry the operation to the same fileserver.
126 */
127static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
128{
129 msleep_interruptible(1000);
130 if (signal_pending(current)) {
131 fc->ac.error = -ERESTARTSYS;
132 return false;
133 }
134
135 return true;
136}
137
138/*
139 * Select the fileserver to use. May be called multiple times to rotate
140 * through the fileservers.
141 */
142bool afs_select_fileserver(struct afs_fs_cursor *fc)
143{
144 struct afs_addr_list *alist;
145 struct afs_server *server;
146 struct afs_vnode *vnode = fc->vnode;
147
148 _enter("%u/%u,%u/%u,%d,%d",
149 fc->index, fc->start,
150 fc->ac.index, fc->ac.start,
151 fc->ac.error, fc->ac.abort_code);
152
153 if (fc->flags & AFS_FS_CURSOR_STOP) {
154 _leave(" = f [stopped]");
155 return false;
156 }
157
158 /* Evaluate the result of the previous operation, if there was one. */
159 switch (fc->ac.error) {
160 case SHRT_MAX:
161 goto start;
162
163 case 0:
164 default:
165 /* Success or local failure. Stop. */
166 fc->flags |= AFS_FS_CURSOR_STOP;
167 _leave(" = f [okay/local %d]", fc->ac.error);
168 return false;
169
170 case -ECONNABORTED:
171 /* The far side rejected the operation on some grounds. This
172 * might involve the server being busy or the volume having been moved.
173 */
174 switch (fc->ac.abort_code) {
175 case VNOVOL:
176 /* This fileserver doesn't know about the volume.
177 * - May indicate that the VL is wrong - retry once and compare
178 * the results.
179 * - May indicate that the fileserver couldn't attach to the vol.
180 */
181 if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
182 fc->ac.error = -EREMOTEIO;
183 goto failed;
184 }
185
186 write_lock(&vnode->volume->servers_lock);
187 fc->server_list->vnovol_mask |= 1 << fc->index;
188 write_unlock(&vnode->volume->servers_lock);
189
190 set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
191 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
192 if (fc->ac.error < 0)
193 goto failed;
194
195 if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
196 fc->ac.error = -ENOMEDIUM;
197 goto failed;
198 }
199
200 /* If the server list didn't change, then assume that
201 * it's the fileserver having trouble.
202 */
203 if (vnode->volume->servers == fc->server_list) {
204 fc->ac.error = -EREMOTEIO;
205 goto failed;
206 }
207
208 /* Try again */
209 fc->flags |= AFS_FS_CURSOR_VNOVOL;
210 _leave(" = t [vnovol]");
211 return true;
212
213 case VSALVAGE: /* TODO: Should this return an error or iterate? */
214 case VVOLEXISTS:
215 case VNOSERVICE:
216 case VONLINE:
217 case VDISKFULL:
218 case VOVERQUOTA:
219 fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
220 goto next_server;
221
222 case VOFFLINE:
223 if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) {
224 afs_busy(vnode->volume, fc->ac.abort_code);
225 clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
226 }
227 if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
228 fc->ac.error = -EADV;
229 goto failed;
230 }
231 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
232 fc->ac.error = -ESTALE;
233 goto failed;
234 }
235 goto busy;
236
237 case VSALVAGING:
238 case VRESTARTING:
239 case VBUSY:
240 /* Retry after going round all the servers unless we
241 * have a file lock we need to maintain.
242 */
243 if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
244 fc->ac.error = -EBUSY;
245 goto failed;
246 }
247 if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
248 afs_busy(vnode->volume, fc->ac.abort_code);
249 clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
250 }
251 busy:
252 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
253 if (!afs_sleep_and_retry(fc))
254 goto failed;
255
256 /* Retry with same server & address */
257 _leave(" = t [vbusy]");
258 return true;
259 }
260
261 fc->flags |= AFS_FS_CURSOR_VBUSY;
262 goto next_server;
263
264 case VMOVED:
265 /* The volume migrated to another server. We consider
266 * consider all locks and callbacks broken and request
267 * an update from the VLDB.
268 *
269 * We also limit the number of VMOVED hops we will
270 * honour, just in case someone sets up a loop.
271 */
272 if (fc->flags & AFS_FS_CURSOR_VMOVED) {
273 fc->ac.error = -EREMOTEIO;
274 goto failed;
275 }
276 fc->flags |= AFS_FS_CURSOR_VMOVED;
277
278 set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
279 set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
280 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
281 if (fc->ac.error < 0)
282 goto failed;
283
284 /* If the server list didn't change, then the VLDB is
285 * out of sync with the fileservers. This is hopefully
286 * a temporary condition, however, so we don't want to
287 * permanently block access to the file.
288 *
289 * TODO: Try other fileservers if we can.
290 *
291 * TODO: Retry a few times with sleeps.
292 */
293 if (vnode->volume->servers == fc->server_list) {
294 fc->ac.error = -ENOMEDIUM;
295 goto failed;
296 }
297
298 goto restart_from_beginning;
299
300 default:
301 clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
302 clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
303 fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
304 goto failed;
305 }
306
307 case -ENETUNREACH:
308 case -EHOSTUNREACH:
309 case -ECONNREFUSED:
310 case -ETIMEDOUT:
311 case -ETIME:
312 _debug("no conn");
313 goto iterate_address;
314 }
315
316restart_from_beginning:
317 _debug("restart");
318 afs_end_cursor(&fc->ac);
319 afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
320 fc->cbi = NULL;
321 afs_put_serverlist(afs_v2net(vnode), fc->server_list);
322 fc->server_list = NULL;
323start:
324 _debug("start");
325 /* See if we need to do an update of the volume record. Note that the
326 * volume may have moved or even have been deleted.
327 */
328 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
329 if (fc->ac.error < 0)
330 goto failed;
331
332 if (!afs_start_fs_iteration(fc, vnode))
333 goto failed;
334 goto use_server;
335
336next_server:
337 _debug("next");
338 afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
339 fc->cbi = NULL;
340 fc->index++;
341 if (fc->index >= fc->server_list->nr_servers)
342 fc->index = 0;
343 if (fc->index != fc->start)
344 goto use_server;
345
346 /* That's all the servers poked to no good effect. Try again if some
347 * of them were busy.
348 */
349 if (fc->flags & AFS_FS_CURSOR_VBUSY)
350 goto restart_from_beginning;
351
352 fc->ac.error = -EDESTADDRREQ;
353 goto failed;
354
355use_server:
356 _debug("use");
357 /* We're starting on a different fileserver from the list. We need to
358 * check it, create a callback intercept, find its address list and
359 * probe its capabilities before we use it.
360 */
361 ASSERTCMP(fc->ac.alist, ==, NULL);
362 server = fc->server_list->servers[fc->index].server;
363
364 if (!afs_check_server_record(fc, server))
365 goto failed;
366
367 _debug("USING SERVER: %pU", &server->uuid);
368
369 /* Make sure we've got a callback interest record for this server. We
370 * have to link it in before we send the request as we can be sent a
371 * break request before we've finished decoding the reply and
372 * installing the vnode.
373 */
374 fc->ac.error = afs_register_server_cb_interest(
375 vnode, &fc->server_list->servers[fc->index]);
376 if (fc->ac.error < 0)
377 goto failed;
378
379 fc->cbi = afs_get_cb_interest(vnode->cb_interest);
380
381 read_lock(&server->fs_lock);
382 alist = rcu_dereference_protected(server->addresses,
383 lockdep_is_held(&server->fs_lock));
384 afs_get_addrlist(alist);
385 read_unlock(&server->fs_lock);
386
387
388 /* Probe the current fileserver if we haven't done so yet. */
389 if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
390 fc->ac.alist = afs_get_addrlist(alist);
391
392 if (!afs_probe_fileserver(fc))
393 goto failed;
394 }
395
396 if (!fc->ac.alist)
397 fc->ac.alist = alist;
398 else
399 afs_put_addrlist(alist);
400
401 fc->ac.addr = NULL;
402 fc->ac.start = READ_ONCE(alist->index);
403 fc->ac.index = fc->ac.start;
404 fc->ac.error = 0;
405 fc->ac.begun = false;
406 goto iterate_address;
407
408iterate_address:
409 ASSERT(fc->ac.alist);
410 _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
411 /* Iterate over the current server's address list to try and find an
412 * address on which it will respond to us.
413 */
414 if (afs_iterate_addresses(&fc->ac)) {
415 _leave(" = t");
416 return true;
417 }
418
419 afs_end_cursor(&fc->ac);
420 goto next_server;
421
422failed:
423 fc->flags |= AFS_FS_CURSOR_STOP;
424 _leave(" = f [failed %d]", fc->ac.error);
425 return false;
426}
427
428/*
429 * Select the same fileserver we used for a vnode before and only that
430 * fileserver. We use this when we have a lock on that file, which is backed
431 * only by the fileserver we obtained it from.
432 */
433bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
434{
435 struct afs_vnode *vnode = fc->vnode;
436 struct afs_cb_interest *cbi = vnode->cb_interest;
437 struct afs_addr_list *alist;
438
439 _enter("");
440
441 if (!cbi) {
442 fc->ac.error = -ESTALE;
443 fc->flags |= AFS_FS_CURSOR_STOP;
444 return false;
445 }
446
447 read_lock(&cbi->server->fs_lock);
448 alist = afs_get_addrlist(cbi->server->addresses);
449 read_unlock(&cbi->server->fs_lock);
450 if (!alist) {
451 fc->ac.error = -ESTALE;
452 fc->flags |= AFS_FS_CURSOR_STOP;
453 return false;
454 }
455
456 fc->ac.alist = alist;
457 fc->ac.error = 0;
458 return true;
459}
460
461/*
462 * Tidy up a filesystem cursor and unlock the vnode.
463 */
464int afs_end_vnode_operation(struct afs_fs_cursor *fc)
465{
466 struct afs_net *net = afs_v2net(fc->vnode);
467 int ret;
468
469 mutex_unlock(&fc->vnode->io_lock);
470
471 afs_end_cursor(&fc->ac);
472 afs_put_cb_interest(net, fc->cbi);
473 afs_put_serverlist(net, fc->server_list);
474
475 ret = fc->ac.error;
476 if (ret == -ECONNABORTED)
477 afs_abort_to_error(fc->ac.abort_code);
478
479 return fc->ac.error;
480}
481
482#if 0
483/*
484 * Set a filesystem server cursor for using a specific FS server.
485 */
486int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
487{
488 afs_init_fs_cursor(fc, vnode);
489
490 read_seqlock_excl(&vnode->cb_lock);
491 if (vnode->cb_interest) {
492 if (vnode->cb_interest->server->fs_state == 0)
493 fc->server = afs_get_server(vnode->cb_interest->server);
494 else
495 fc->ac.error = vnode->cb_interest->server->fs_state;
496 } else {
497 fc->ac.error = -ESTALE;
498 }
499 read_sequnlock_excl(&vnode->cb_lock);
500
501 return fc->ac.error;
502}
503
504/*
505 * pick a server to use to try accessing this volume
506 * - returns with an elevated usage count on the server chosen
507 */
508bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
509{
510 struct afs_volume *volume = vnode->volume;
511 struct afs_server *server;
512 int ret, state, loop;
513
514 _enter("%s", volume->vlocation->vldb.name);
515
516 /* stick with the server we're already using if we can */
517 if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
518 fc->server = afs_get_server(vnode->cb_interest->server);
519 goto set_server;
520 }
521
522 down_read(&volume->server_sem);
523
524 /* handle the no-server case */
525 if (volume->nservers == 0) {
526 fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
527 up_read(&volume->server_sem);
528 _leave(" = f [no servers %d]", fc->ac.error);
529 return false;
530 }
531
532 /* basically, just search the list for the first live server and use
533 * that */
534 ret = 0;
535 for (loop = 0; loop < volume->nservers; loop++) {
536 server = volume->servers[loop];
537 state = server->fs_state;
538
539 _debug("consider %d [%d]", loop, state);
540
541 switch (state) {
542 case 0:
543 goto picked_server;
544
545 case -ENETUNREACH:
546 if (ret == 0)
547 ret = state;
548 break;
549
550 case -EHOSTUNREACH:
551 if (ret == 0 ||
552 ret == -ENETUNREACH)
553 ret = state;
554 break;
555
556 case -ECONNREFUSED:
557 if (ret == 0 ||
558 ret == -ENETUNREACH ||
559 ret == -EHOSTUNREACH)
560 ret = state;
561 break;
562
563 default:
564 case -EREMOTEIO:
565 if (ret == 0 ||
566 ret == -ENETUNREACH ||
567 ret == -EHOSTUNREACH ||
568 ret == -ECONNREFUSED)
569 ret = state;
570 break;
571 }
572 }
573
574error:
575 fc->ac.error = ret;
576
577 /* no available servers
578 * - TODO: handle the no active servers case better
579 */
580 up_read(&volume->server_sem);
581 _leave(" = f [%d]", fc->ac.error);
582 return false;
583
584picked_server:
585 /* Found an apparently healthy server. We need to register an interest
586 * in receiving callbacks before we talk to it.
587 */
588 ret = afs_register_server_cb_interest(vnode,
589 &volume->cb_interests[loop], server);
590 if (ret < 0)
591 goto error;
592
593 fc->server = afs_get_server(server);
594 up_read(&volume->server_sem);
595set_server:
596 fc->ac.alist = afs_get_addrlist(fc->server->addrs);
597 fc->ac.addr = &fc->ac.alist->addrs[0];
598 _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
599 _leave(" = t (picked %pIS)", &fc->ac.addr->transport);
600 return true;
601}
602
603/*
604 * release a server after use
605 * - releases the ref on the server struct that was acquired by picking
606 * - records result of using a particular server to access a volume
607 * - return true to try again, false if okay or to issue error
608 * - the caller must release the server struct if result was false
609 */
610bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
611 struct afs_vnode *vnode)
612{
613 struct afs_volume *volume = vnode->volume;
614 struct afs_server *server = fc->server;
615 unsigned loop;
616
617 _enter("%s,%pIS,%d",
618 volume->vlocation->vldb.name, &fc->ac.addr->transport,
619 fc->ac.error);
620
621 switch (fc->ac.error) {
622 /* success */
623 case 0:
624 server->fs_state = 0;
625 _leave(" = f");
626 return false;
627
628 /* the fileserver denied all knowledge of the volume */
629 case -ENOMEDIUM:
630 down_write(&volume->server_sem);
631
632 /* firstly, find where the server is in the active list (if it
633 * is) */
634 for (loop = 0; loop < volume->nservers; loop++)
635 if (volume->servers[loop] == server)
636 goto present;
637
638 /* no longer there - may have been discarded by another op */
639 goto try_next_server_upw;
640
641 present:
642 volume->nservers--;
643 memmove(&volume->servers[loop],
644 &volume->servers[loop + 1],
645 sizeof(volume->servers[loop]) *
646 (volume->nservers - loop));
647 volume->servers[volume->nservers] = NULL;
648 afs_put_server(afs_v2net(vnode), server);
649 volume->rjservers++;
650
651 if (volume->nservers > 0)
652 /* another server might acknowledge its existence */
653 goto try_next_server_upw;
654
655 /* handle the case where all the fileservers have rejected the
656 * volume
657 * - TODO: try asking the fileservers for volume information
658 * - TODO: contact the VL server again to see if the volume is
659 * no longer registered
660 */
661 up_write(&volume->server_sem);
662 afs_put_server(afs_v2net(vnode), server);
663 fc->server = NULL;
664 _leave(" = f [completely rejected]");
665 return false;
666
667 /* problem reaching the server */
668 case -ENETUNREACH:
669 case -EHOSTUNREACH:
670 case -ECONNREFUSED:
671 case -ETIME:
672 case -ETIMEDOUT:
673 case -EREMOTEIO:
674 /* mark the server as dead
675 * TODO: vary dead timeout depending on error
676 */
677 spin_lock(&server->fs_lock);
678 if (!server->fs_state) {
679 server->fs_state = fc->ac.error;
680 printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
681 }
682 spin_unlock(&server->fs_lock);
683 goto try_next_server;
684
685 /* miscellaneous error */
686 default:
687 case -ENOMEM:
688 case -ENONET:
689 /* tell the caller to accept the result */
690 afs_put_server(afs_v2net(vnode), server);
691 fc->server = NULL;
692 _leave(" = f [local failure]");
693 return false;
694 }
695
696 /* tell the caller to loop around and try the next server */
697try_next_server_upw:
698 up_write(&volume->server_sem);
699try_next_server:
700 afs_put_server(afs_v2net(vnode), server);
701 _leave(" = t [try next server]");
702 return true;
703}
704
705/*
706 * Clean up a fileserver cursor.
707 */
708int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
709{
710 afs_end_cursor(&fc->ac);
711 afs_put_server(net, fc->server);
712 return fc->ac.error;
713}
714
715#endif
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bb1e2caa1720..ea1460b9b71a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -17,13 +17,10 @@
17#include "internal.h" 17#include "internal.h"
18#include "afs_cm.h" 18#include "afs_cm.h"
19 19
20struct socket *afs_socket; /* my RxRPC socket */ 20struct workqueue_struct *afs_async_calls;
21static struct workqueue_struct *afs_async_calls;
22static struct afs_call *afs_spare_incoming_call;
23atomic_t afs_outstanding_calls;
24 21
25static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); 22static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
26static int afs_wait_for_call_to_complete(struct afs_call *); 23static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
27static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); 24static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
28static void afs_process_async_call(struct work_struct *); 25static void afs_process_async_call(struct work_struct *);
29static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); 26static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
@@ -34,24 +31,13 @@ static int afs_deliver_cm_op_id(struct afs_call *);
34static const struct afs_call_type afs_RXCMxxxx = { 31static const struct afs_call_type afs_RXCMxxxx = {
35 .name = "CB.xxxx", 32 .name = "CB.xxxx",
36 .deliver = afs_deliver_cm_op_id, 33 .deliver = afs_deliver_cm_op_id,
37 .abort_to_error = afs_abort_to_error,
38}; 34};
39 35
40static void afs_charge_preallocation(struct work_struct *);
41
42static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
43
44static int afs_wait_atomic_t(atomic_t *p)
45{
46 schedule();
47 return 0;
48}
49
50/* 36/*
51 * open an RxRPC socket and bind it to be a server for callback notifications 37 * open an RxRPC socket and bind it to be a server for callback notifications
52 * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT 38 * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
53 */ 39 */
54int afs_open_socket(void) 40int afs_open_socket(struct afs_net *net)
55{ 41{
56 struct sockaddr_rxrpc srx; 42 struct sockaddr_rxrpc srx;
57 struct socket *socket; 43 struct socket *socket;
@@ -59,28 +45,26 @@ int afs_open_socket(void)
59 45
60 _enter(""); 46 _enter("");
61 47
62 ret = -ENOMEM; 48 ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket);
63 afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
64 if (!afs_async_calls)
65 goto error_0;
66
67 ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
68 if (ret < 0) 49 if (ret < 0)
69 goto error_1; 50 goto error_1;
70 51
71 socket->sk->sk_allocation = GFP_NOFS; 52 socket->sk->sk_allocation = GFP_NOFS;
72 53
73 /* bind the callback manager's address to make this a server socket */ 54 /* bind the callback manager's address to make this a server socket */
55 memset(&srx, 0, sizeof(srx));
74 srx.srx_family = AF_RXRPC; 56 srx.srx_family = AF_RXRPC;
75 srx.srx_service = CM_SERVICE; 57 srx.srx_service = CM_SERVICE;
76 srx.transport_type = SOCK_DGRAM; 58 srx.transport_type = SOCK_DGRAM;
77 srx.transport_len = sizeof(srx.transport.sin); 59 srx.transport_len = sizeof(srx.transport.sin6);
78 srx.transport.sin.sin_family = AF_INET; 60 srx.transport.sin6.sin6_family = AF_INET6;
79 srx.transport.sin.sin_port = htons(AFS_CM_PORT); 61 srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
80 memset(&srx.transport.sin.sin_addr, 0,
81 sizeof(srx.transport.sin.sin_addr));
82 62
83 ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); 63 ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
64 if (ret == -EADDRINUSE) {
65 srx.transport.sin6.sin6_port = 0;
66 ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
67 }
84 if (ret < 0) 68 if (ret < 0)
85 goto error_2; 69 goto error_2;
86 70
@@ -91,16 +75,14 @@ int afs_open_socket(void)
91 if (ret < 0) 75 if (ret < 0)
92 goto error_2; 76 goto error_2;
93 77
94 afs_socket = socket; 78 net->socket = socket;
95 afs_charge_preallocation(NULL); 79 afs_charge_preallocation(&net->charge_preallocation_work);
96 _leave(" = 0"); 80 _leave(" = 0");
97 return 0; 81 return 0;
98 82
99error_2: 83error_2:
100 sock_release(socket); 84 sock_release(socket);
101error_1: 85error_1:
102 destroy_workqueue(afs_async_calls);
103error_0:
104 _leave(" = %d", ret); 86 _leave(" = %d", ret);
105 return ret; 87 return ret;
106} 88}
@@ -108,36 +90,36 @@ error_0:
108/* 90/*
109 * close the RxRPC socket AFS was using 91 * close the RxRPC socket AFS was using
110 */ 92 */
111void afs_close_socket(void) 93void afs_close_socket(struct afs_net *net)
112{ 94{
113 _enter(""); 95 _enter("");
114 96
115 kernel_listen(afs_socket, 0); 97 kernel_listen(net->socket, 0);
116 flush_workqueue(afs_async_calls); 98 flush_workqueue(afs_async_calls);
117 99
118 if (afs_spare_incoming_call) { 100 if (net->spare_incoming_call) {
119 afs_put_call(afs_spare_incoming_call); 101 afs_put_call(net->spare_incoming_call);
120 afs_spare_incoming_call = NULL; 102 net->spare_incoming_call = NULL;
121 } 103 }
122 104
123 _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); 105 _debug("outstanding %u", atomic_read(&net->nr_outstanding_calls));
124 wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, 106 wait_on_atomic_t(&net->nr_outstanding_calls, atomic_t_wait,
125 TASK_UNINTERRUPTIBLE); 107 TASK_UNINTERRUPTIBLE);
126 _debug("no outstanding calls"); 108 _debug("no outstanding calls");
127 109
128 kernel_sock_shutdown(afs_socket, SHUT_RDWR); 110 kernel_sock_shutdown(net->socket, SHUT_RDWR);
129 flush_workqueue(afs_async_calls); 111 flush_workqueue(afs_async_calls);
130 sock_release(afs_socket); 112 sock_release(net->socket);
131 113
132 _debug("dework"); 114 _debug("dework");
133 destroy_workqueue(afs_async_calls);
134 _leave(""); 115 _leave("");
135} 116}
136 117
137/* 118/*
138 * Allocate a call. 119 * Allocate a call.
139 */ 120 */
140static struct afs_call *afs_alloc_call(const struct afs_call_type *type, 121static struct afs_call *afs_alloc_call(struct afs_net *net,
122 const struct afs_call_type *type,
141 gfp_t gfp) 123 gfp_t gfp)
142{ 124{
143 struct afs_call *call; 125 struct afs_call *call;
@@ -148,11 +130,13 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
148 return NULL; 130 return NULL;
149 131
150 call->type = type; 132 call->type = type;
133 call->net = net;
151 atomic_set(&call->usage, 1); 134 atomic_set(&call->usage, 1);
152 INIT_WORK(&call->async_work, afs_process_async_call); 135 INIT_WORK(&call->async_work, afs_process_async_call);
153 init_waitqueue_head(&call->waitq); 136 init_waitqueue_head(&call->waitq);
137 spin_lock_init(&call->state_lock);
154 138
155 o = atomic_inc_return(&afs_outstanding_calls); 139 o = atomic_inc_return(&net->nr_outstanding_calls);
156 trace_afs_call(call, afs_call_trace_alloc, 1, o, 140 trace_afs_call(call, afs_call_trace_alloc, 1, o,
157 __builtin_return_address(0)); 141 __builtin_return_address(0));
158 return call; 142 return call;
@@ -163,8 +147,9 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
163 */ 147 */
164void afs_put_call(struct afs_call *call) 148void afs_put_call(struct afs_call *call)
165{ 149{
150 struct afs_net *net = call->net;
166 int n = atomic_dec_return(&call->usage); 151 int n = atomic_dec_return(&call->usage);
167 int o = atomic_read(&afs_outstanding_calls); 152 int o = atomic_read(&net->nr_outstanding_calls);
168 153
169 trace_afs_call(call, afs_call_trace_put, n + 1, o, 154 trace_afs_call(call, afs_call_trace_put, n + 1, o,
170 __builtin_return_address(0)); 155 __builtin_return_address(0));
@@ -175,20 +160,22 @@ void afs_put_call(struct afs_call *call)
175 ASSERT(call->type->name != NULL); 160 ASSERT(call->type->name != NULL);
176 161
177 if (call->rxcall) { 162 if (call->rxcall) {
178 rxrpc_kernel_end_call(afs_socket, call->rxcall); 163 rxrpc_kernel_end_call(net->socket, call->rxcall);
179 call->rxcall = NULL; 164 call->rxcall = NULL;
180 } 165 }
181 if (call->type->destructor) 166 if (call->type->destructor)
182 call->type->destructor(call); 167 call->type->destructor(call);
183 168
169 afs_put_server(call->net, call->cm_server);
170 afs_put_cb_interest(call->net, call->cbi);
184 kfree(call->request); 171 kfree(call->request);
185 kfree(call); 172 kfree(call);
186 173
187 o = atomic_dec_return(&afs_outstanding_calls); 174 o = atomic_dec_return(&net->nr_outstanding_calls);
188 trace_afs_call(call, afs_call_trace_free, 0, o, 175 trace_afs_call(call, afs_call_trace_free, 0, o,
189 __builtin_return_address(0)); 176 __builtin_return_address(0));
190 if (o == 0) 177 if (o == 0)
191 wake_up_atomic_t(&afs_outstanding_calls); 178 wake_up_atomic_t(&net->nr_outstanding_calls);
192 } 179 }
193} 180}
194 181
@@ -200,7 +187,7 @@ int afs_queue_call_work(struct afs_call *call)
200 int u = atomic_inc_return(&call->usage); 187 int u = atomic_inc_return(&call->usage);
201 188
202 trace_afs_call(call, afs_call_trace_work, u, 189 trace_afs_call(call, afs_call_trace_work, u,
203 atomic_read(&afs_outstanding_calls), 190 atomic_read(&call->net->nr_outstanding_calls),
204 __builtin_return_address(0)); 191 __builtin_return_address(0));
205 192
206 INIT_WORK(&call->work, call->type->work); 193 INIT_WORK(&call->work, call->type->work);
@@ -213,12 +200,13 @@ int afs_queue_call_work(struct afs_call *call)
213/* 200/*
214 * allocate a call with flat request and reply buffers 201 * allocate a call with flat request and reply buffers
215 */ 202 */
216struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, 203struct afs_call *afs_alloc_flat_call(struct afs_net *net,
204 const struct afs_call_type *type,
217 size_t request_size, size_t reply_max) 205 size_t request_size, size_t reply_max)
218{ 206{
219 struct afs_call *call; 207 struct afs_call *call;
220 208
221 call = afs_alloc_call(type, GFP_NOFS); 209 call = afs_alloc_call(net, type, GFP_NOFS);
222 if (!call) 210 if (!call)
223 goto nomem_call; 211 goto nomem_call;
224 212
@@ -236,6 +224,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
236 goto nomem_free; 224 goto nomem_free;
237 } 225 }
238 226
227 call->operation_ID = type->op;
239 init_waitqueue_head(&call->waitq); 228 init_waitqueue_head(&call->waitq);
240 return call; 229 return call;
241 230
@@ -300,8 +289,7 @@ static void afs_notify_end_request_tx(struct sock *sock,
300{ 289{
301 struct afs_call *call = (struct afs_call *)call_user_ID; 290 struct afs_call *call = (struct afs_call *)call_user_ID;
302 291
303 if (call->state == AFS_CALL_REQUESTING) 292 afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY);
304 call->state = AFS_CALL_AWAIT_REPLY;
305} 293}
306 294
307/* 295/*
@@ -319,11 +307,13 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
319 307
320 do { 308 do {
321 afs_load_bvec(call, msg, bv, first, last, offset); 309 afs_load_bvec(call, msg, bv, first, last, offset);
310 trace_afs_send_pages(call, msg, first, last, offset);
311
322 offset = 0; 312 offset = 0;
323 bytes = msg->msg_iter.count; 313 bytes = msg->msg_iter.count;
324 nr = msg->msg_iter.nr_segs; 314 nr = msg->msg_iter.nr_segs;
325 315
326 ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg, 316 ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg,
327 bytes, afs_notify_end_request_tx); 317 bytes, afs_notify_end_request_tx);
328 for (loop = 0; loop < nr; loop++) 318 for (loop = 0; loop < nr; loop++)
329 put_page(bv[loop].bv_page); 319 put_page(bv[loop].bv_page);
@@ -333,63 +323,62 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
333 first += nr; 323 first += nr;
334 } while (first <= last); 324 } while (first <= last);
335 325
326 trace_afs_sent_pages(call, call->first, last, first, ret);
336 return ret; 327 return ret;
337} 328}
338 329
339/* 330/*
340 * initiate a call 331 * initiate a call
341 */ 332 */
342int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, 333long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
343 bool async) 334 gfp_t gfp, bool async)
344{ 335{
345 struct sockaddr_rxrpc srx; 336 struct sockaddr_rxrpc *srx = ac->addr;
346 struct rxrpc_call *rxcall; 337 struct rxrpc_call *rxcall;
347 struct msghdr msg; 338 struct msghdr msg;
348 struct kvec iov[1]; 339 struct kvec iov[1];
349 size_t offset; 340 size_t offset;
350 s64 tx_total_len; 341 s64 tx_total_len;
351 u32 abort_code;
352 int ret; 342 int ret;
353 343
354 _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); 344 _enter(",{%pISp},", &srx->transport);
355 345
356 ASSERT(call->type != NULL); 346 ASSERT(call->type != NULL);
357 ASSERT(call->type->name != NULL); 347 ASSERT(call->type->name != NULL);
358 348
359 _debug("____MAKE %p{%s,%x} [%d]____", 349 _debug("____MAKE %p{%s,%x} [%d]____",
360 call, call->type->name, key_serial(call->key), 350 call, call->type->name, key_serial(call->key),
361 atomic_read(&afs_outstanding_calls)); 351 atomic_read(&call->net->nr_outstanding_calls));
362 352
363 call->async = async; 353 call->async = async;
364 354
365 memset(&srx, 0, sizeof(srx));
366 srx.srx_family = AF_RXRPC;
367 srx.srx_service = call->service_id;
368 srx.transport_type = SOCK_DGRAM;
369 srx.transport_len = sizeof(srx.transport.sin);
370 srx.transport.sin.sin_family = AF_INET;
371 srx.transport.sin.sin_port = call->port;
372 memcpy(&srx.transport.sin.sin_addr, addr, 4);
373
374 /* Work out the length we're going to transmit. This is awkward for 355 /* Work out the length we're going to transmit. This is awkward for
375 * calls such as FS.StoreData where there's an extra injection of data 356 * calls such as FS.StoreData where there's an extra injection of data
376 * after the initial fixed part. 357 * after the initial fixed part.
377 */ 358 */
378 tx_total_len = call->request_size; 359 tx_total_len = call->request_size;
379 if (call->send_pages) { 360 if (call->send_pages) {
380 tx_total_len += call->last_to - call->first_offset; 361 if (call->last == call->first) {
381 tx_total_len += (call->last - call->first) * PAGE_SIZE; 362 tx_total_len += call->last_to - call->first_offset;
363 } else {
364 /* It looks mathematically like you should be able to
365 * combine the following lines with the ones above, but
366 * unsigned arithmetic is fun when it wraps...
367 */
368 tx_total_len += PAGE_SIZE - call->first_offset;
369 tx_total_len += call->last_to;
370 tx_total_len += (call->last - call->first - 1) * PAGE_SIZE;
371 }
382 } 372 }
383 373
384 /* create a call */ 374 /* create a call */
385 rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, 375 rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
386 (unsigned long)call, 376 (unsigned long)call,
387 tx_total_len, gfp, 377 tx_total_len, gfp,
388 (async ? 378 (async ?
389 afs_wake_up_async_call : 379 afs_wake_up_async_call :
390 afs_wake_up_call_waiter), 380 afs_wake_up_call_waiter),
391 call->upgrade); 381 call->upgrade);
392 call->key = NULL;
393 if (IS_ERR(rxcall)) { 382 if (IS_ERR(rxcall)) {
394 ret = PTR_ERR(rxcall); 383 ret = PTR_ERR(rxcall);
395 goto error_kill_call; 384 goto error_kill_call;
@@ -409,14 +398,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
409 msg.msg_controllen = 0; 398 msg.msg_controllen = 0;
410 msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); 399 msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
411 400
412 /* We have to change the state *before* sending the last packet as 401 ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
413 * rxrpc might give us the reply before it returns from sending the
414 * request. Further, if the send fails, we may already have been given
415 * a notification and may have collected it.
416 */
417 if (!call->send_pages)
418 call->state = AFS_CALL_AWAIT_REPLY;
419 ret = rxrpc_kernel_send_data(afs_socket, rxcall,
420 &msg, call->request_size, 402 &msg, call->request_size,
421 afs_notify_end_request_tx); 403 afs_notify_end_request_tx);
422 if (ret < 0) 404 if (ret < 0)
@@ -433,22 +415,26 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
433 if (call->async) 415 if (call->async)
434 return -EINPROGRESS; 416 return -EINPROGRESS;
435 417
436 return afs_wait_for_call_to_complete(call); 418 return afs_wait_for_call_to_complete(call, ac);
437 419
438error_do_abort: 420error_do_abort:
439 call->state = AFS_CALL_COMPLETE; 421 call->state = AFS_CALL_COMPLETE;
440 if (ret != -ECONNABORTED) { 422 if (ret != -ECONNABORTED) {
441 rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, 423 rxrpc_kernel_abort_call(call->net->socket, rxcall,
442 ret, "KSD"); 424 RX_USER_ABORT, ret, "KSD");
443 } else { 425 } else {
444 abort_code = 0;
445 offset = 0; 426 offset = 0;
446 rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, 427 rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
447 false, &abort_code, &call->service_id); 428 0, &offset, false, &call->abort_code,
448 ret = call->type->abort_to_error(abort_code); 429 &call->service_id);
430 ac->abort_code = call->abort_code;
431 ac->responded = true;
449 } 432 }
433 call->error = ret;
434 trace_afs_call_done(call);
450error_kill_call: 435error_kill_call:
451 afs_put_call(call); 436 afs_put_call(call);
437 ac->error = ret;
452 _leave(" = %d", ret); 438 _leave(" = %d", ret);
453 return ret; 439 return ret;
454} 440}
@@ -458,88 +444,98 @@ error_kill_call:
458 */ 444 */
459static void afs_deliver_to_call(struct afs_call *call) 445static void afs_deliver_to_call(struct afs_call *call)
460{ 446{
461 u32 abort_code; 447 enum afs_call_state state;
448 u32 abort_code, remote_abort = 0;
462 int ret; 449 int ret;
463 450
464 _enter("%s", call->type->name); 451 _enter("%s", call->type->name);
465 452
466 while (call->state == AFS_CALL_AWAIT_REPLY || 453 while (state = READ_ONCE(call->state),
467 call->state == AFS_CALL_AWAIT_OP_ID || 454 state == AFS_CALL_CL_AWAIT_REPLY ||
468 call->state == AFS_CALL_AWAIT_REQUEST || 455 state == AFS_CALL_SV_AWAIT_OP_ID ||
469 call->state == AFS_CALL_AWAIT_ACK 456 state == AFS_CALL_SV_AWAIT_REQUEST ||
457 state == AFS_CALL_SV_AWAIT_ACK
470 ) { 458 ) {
471 if (call->state == AFS_CALL_AWAIT_ACK) { 459 if (state == AFS_CALL_SV_AWAIT_ACK) {
472 size_t offset = 0; 460 size_t offset = 0;
473 ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, 461 ret = rxrpc_kernel_recv_data(call->net->socket,
462 call->rxcall,
474 NULL, 0, &offset, false, 463 NULL, 0, &offset, false,
475 &call->abort_code, 464 &remote_abort,
476 &call->service_id); 465 &call->service_id);
477 trace_afs_recv_data(call, 0, offset, false, ret); 466 trace_afs_recv_data(call, 0, offset, false, ret);
478 467
479 if (ret == -EINPROGRESS || ret == -EAGAIN) 468 if (ret == -EINPROGRESS || ret == -EAGAIN)
480 return; 469 return;
481 if (ret == 1 || ret < 0) { 470 if (ret < 0 || ret == 1) {
482 call->state = AFS_CALL_COMPLETE; 471 if (ret == 1)
483 goto done; 472 ret = 0;
473 goto call_complete;
484 } 474 }
485 return; 475 return;
486 } 476 }
487 477
488 ret = call->type->deliver(call); 478 ret = call->type->deliver(call);
479 state = READ_ONCE(call->state);
489 switch (ret) { 480 switch (ret) {
490 case 0: 481 case 0:
491 if (call->state == AFS_CALL_AWAIT_REPLY) 482 if (state == AFS_CALL_CL_PROC_REPLY)
492 call->state = AFS_CALL_COMPLETE; 483 goto call_complete;
484 ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
493 goto done; 485 goto done;
494 case -EINPROGRESS: 486 case -EINPROGRESS:
495 case -EAGAIN: 487 case -EAGAIN:
496 goto out; 488 goto out;
489 case -EIO:
497 case -ECONNABORTED: 490 case -ECONNABORTED:
498 goto call_complete; 491 ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
492 goto done;
499 case -ENOTCONN: 493 case -ENOTCONN:
500 abort_code = RX_CALL_DEAD; 494 abort_code = RX_CALL_DEAD;
501 rxrpc_kernel_abort_call(afs_socket, call->rxcall, 495 rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
502 abort_code, ret, "KNC"); 496 abort_code, ret, "KNC");
503 goto save_error; 497 goto local_abort;
504 case -ENOTSUPP: 498 case -ENOTSUPP:
505 abort_code = RXGEN_OPCODE; 499 abort_code = RXGEN_OPCODE;
506 rxrpc_kernel_abort_call(afs_socket, call->rxcall, 500 rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
507 abort_code, ret, "KIV"); 501 abort_code, ret, "KIV");
508 goto save_error; 502 goto local_abort;
509 case -ENODATA: 503 case -ENODATA:
510 case -EBADMSG: 504 case -EBADMSG:
511 case -EMSGSIZE: 505 case -EMSGSIZE:
512 default: 506 default:
513 abort_code = RXGEN_CC_UNMARSHAL; 507 abort_code = RXGEN_CC_UNMARSHAL;
514 if (call->state != AFS_CALL_AWAIT_REPLY) 508 if (state != AFS_CALL_CL_AWAIT_REPLY)
515 abort_code = RXGEN_SS_UNMARSHAL; 509 abort_code = RXGEN_SS_UNMARSHAL;
516 rxrpc_kernel_abort_call(afs_socket, call->rxcall, 510 rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
517 abort_code, -EBADMSG, "KUM"); 511 abort_code, -EBADMSG, "KUM");
518 goto save_error; 512 goto local_abort;
519 } 513 }
520 } 514 }
521 515
522done: 516done:
523 if (call->state == AFS_CALL_COMPLETE && call->incoming) 517 if (state == AFS_CALL_COMPLETE && call->incoming)
524 afs_put_call(call); 518 afs_put_call(call);
525out: 519out:
526 _leave(""); 520 _leave("");
527 return; 521 return;
528 522
529save_error: 523local_abort:
530 call->error = ret; 524 abort_code = 0;
531call_complete: 525call_complete:
532 call->state = AFS_CALL_COMPLETE; 526 afs_set_call_complete(call, ret, remote_abort);
527 state = AFS_CALL_COMPLETE;
533 goto done; 528 goto done;
534} 529}
535 530
536/* 531/*
537 * wait synchronously for a call to complete 532 * wait synchronously for a call to complete
538 */ 533 */
539static int afs_wait_for_call_to_complete(struct afs_call *call) 534static long afs_wait_for_call_to_complete(struct afs_call *call,
535 struct afs_addr_cursor *ac)
540{ 536{
541 signed long rtt2, timeout; 537 signed long rtt2, timeout;
542 int ret; 538 long ret;
543 u64 rtt; 539 u64 rtt;
544 u32 life, last_life; 540 u32 life, last_life;
545 541
@@ -547,30 +543,31 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
547 543
548 _enter(""); 544 _enter("");
549 545
550 rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall); 546 rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
551 rtt2 = nsecs_to_jiffies64(rtt) * 2; 547 rtt2 = nsecs_to_jiffies64(rtt) * 2;
552 if (rtt2 < 2) 548 if (rtt2 < 2)
553 rtt2 = 2; 549 rtt2 = 2;
554 550
555 timeout = rtt2; 551 timeout = rtt2;
556 last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall); 552 last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
557 553
558 add_wait_queue(&call->waitq, &myself); 554 add_wait_queue(&call->waitq, &myself);
559 for (;;) { 555 for (;;) {
560 set_current_state(TASK_UNINTERRUPTIBLE); 556 set_current_state(TASK_UNINTERRUPTIBLE);
561 557
562 /* deliver any messages that are in the queue */ 558 /* deliver any messages that are in the queue */
563 if (call->state < AFS_CALL_COMPLETE && call->need_attention) { 559 if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
560 call->need_attention) {
564 call->need_attention = false; 561 call->need_attention = false;
565 __set_current_state(TASK_RUNNING); 562 __set_current_state(TASK_RUNNING);
566 afs_deliver_to_call(call); 563 afs_deliver_to_call(call);
567 continue; 564 continue;
568 } 565 }
569 566
570 if (call->state == AFS_CALL_COMPLETE) 567 if (afs_check_call_state(call, AFS_CALL_COMPLETE))
571 break; 568 break;
572 569
573 life = rxrpc_kernel_check_life(afs_socket, call->rxcall); 570 life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
574 if (timeout == 0 && 571 if (timeout == 0 &&
575 life == last_life && signal_pending(current)) 572 life == last_life && signal_pending(current))
576 break; 573 break;
@@ -587,16 +584,34 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
587 __set_current_state(TASK_RUNNING); 584 __set_current_state(TASK_RUNNING);
588 585
589 /* Kill off the call if it's still live. */ 586 /* Kill off the call if it's still live. */
590 if (call->state < AFS_CALL_COMPLETE) { 587 if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
591 _debug("call interrupted"); 588 _debug("call interrupted");
592 rxrpc_kernel_abort_call(afs_socket, call->rxcall, 589 if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
593 RX_USER_ABORT, -EINTR, "KWI"); 590 RX_USER_ABORT, -EINTR, "KWI"))
591 afs_set_call_complete(call, -EINTR, 0);
592 }
593
594 spin_lock_bh(&call->state_lock);
595 ac->abort_code = call->abort_code;
596 ac->error = call->error;
597 spin_unlock_bh(&call->state_lock);
598
599 ret = ac->error;
600 switch (ret) {
601 case 0:
602 if (call->ret_reply0) {
603 ret = (long)call->reply[0];
604 call->reply[0] = NULL;
605 }
606 /* Fall through */
607 case -ECONNABORTED:
608 ac->responded = true;
609 break;
594 } 610 }
595 611
596 ret = call->error;
597 _debug("call complete"); 612 _debug("call complete");
598 afs_put_call(call); 613 afs_put_call(call);
599 _leave(" = %d", ret); 614 _leave(" = %p", (void *)ret);
600 return ret; 615 return ret;
601} 616}
602 617
@@ -627,7 +642,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
627 u = __atomic_add_unless(&call->usage, 1, 0); 642 u = __atomic_add_unless(&call->usage, 1, 0);
628 if (u != 0) { 643 if (u != 0) {
629 trace_afs_call(call, afs_call_trace_wake, u, 644 trace_afs_call(call, afs_call_trace_wake, u,
630 atomic_read(&afs_outstanding_calls), 645 atomic_read(&call->net->nr_outstanding_calls),
631 __builtin_return_address(0)); 646 __builtin_return_address(0));
632 647
633 if (!queue_work(afs_async_calls, &call->async_work)) 648 if (!queue_work(afs_async_calls, &call->async_work))
@@ -666,7 +681,7 @@ static void afs_process_async_call(struct work_struct *work)
666 } 681 }
667 682
668 if (call->state == AFS_CALL_COMPLETE) { 683 if (call->state == AFS_CALL_COMPLETE) {
669 call->reply = NULL; 684 call->reply[0] = NULL;
670 685
671 /* We have two refs to release - one from the alloc and one 686 /* We have two refs to release - one from the alloc and one
672 * queued with the work item - and we can't just deallocate the 687 * queued with the work item - and we can't just deallocate the
@@ -691,22 +706,24 @@ static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
691/* 706/*
692 * Charge the incoming call preallocation. 707 * Charge the incoming call preallocation.
693 */ 708 */
694static void afs_charge_preallocation(struct work_struct *work) 709void afs_charge_preallocation(struct work_struct *work)
695{ 710{
696 struct afs_call *call = afs_spare_incoming_call; 711 struct afs_net *net =
712 container_of(work, struct afs_net, charge_preallocation_work);
713 struct afs_call *call = net->spare_incoming_call;
697 714
698 for (;;) { 715 for (;;) {
699 if (!call) { 716 if (!call) {
700 call = afs_alloc_call(&afs_RXCMxxxx, GFP_KERNEL); 717 call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL);
701 if (!call) 718 if (!call)
702 break; 719 break;
703 720
704 call->async = true; 721 call->async = true;
705 call->state = AFS_CALL_AWAIT_OP_ID; 722 call->state = AFS_CALL_SV_AWAIT_OP_ID;
706 init_waitqueue_head(&call->waitq); 723 init_waitqueue_head(&call->waitq);
707 } 724 }
708 725
709 if (rxrpc_kernel_charge_accept(afs_socket, 726 if (rxrpc_kernel_charge_accept(net->socket,
710 afs_wake_up_async_call, 727 afs_wake_up_async_call,
711 afs_rx_attach, 728 afs_rx_attach,
712 (unsigned long)call, 729 (unsigned long)call,
@@ -714,7 +731,7 @@ static void afs_charge_preallocation(struct work_struct *work)
714 break; 731 break;
715 call = NULL; 732 call = NULL;
716 } 733 }
717 afs_spare_incoming_call = call; 734 net->spare_incoming_call = call;
718} 735}
719 736
720/* 737/*
@@ -735,7 +752,9 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
735static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, 752static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
736 unsigned long user_call_ID) 753 unsigned long user_call_ID)
737{ 754{
738 queue_work(afs_wq, &afs_charge_preallocation_work); 755 struct afs_net *net = afs_sock2net(sk);
756
757 queue_work(afs_wq, &net->charge_preallocation_work);
739} 758}
740 759
741/* 760/*
@@ -756,7 +775,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
756 return ret; 775 return ret;
757 776
758 call->operation_ID = ntohl(call->tmp); 777 call->operation_ID = ntohl(call->tmp);
759 call->state = AFS_CALL_AWAIT_REQUEST; 778 afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
760 call->offset = 0; 779 call->offset = 0;
761 780
762 /* ask the cache manager to route the call (it'll change the call type 781 /* ask the cache manager to route the call (it'll change the call type
@@ -781,8 +800,7 @@ static void afs_notify_end_reply_tx(struct sock *sock,
781{ 800{
782 struct afs_call *call = (struct afs_call *)call_user_ID; 801 struct afs_call *call = (struct afs_call *)call_user_ID;
783 802
784 if (call->state == AFS_CALL_REPLYING) 803 afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK);
785 call->state = AFS_CALL_AWAIT_ACK;
786} 804}
787 805
788/* 806/*
@@ -790,11 +808,12 @@ static void afs_notify_end_reply_tx(struct sock *sock,
790 */ 808 */
791void afs_send_empty_reply(struct afs_call *call) 809void afs_send_empty_reply(struct afs_call *call)
792{ 810{
811 struct afs_net *net = call->net;
793 struct msghdr msg; 812 struct msghdr msg;
794 813
795 _enter(""); 814 _enter("");
796 815
797 rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0); 816 rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0);
798 817
799 msg.msg_name = NULL; 818 msg.msg_name = NULL;
800 msg.msg_namelen = 0; 819 msg.msg_namelen = 0;
@@ -803,8 +822,7 @@ void afs_send_empty_reply(struct afs_call *call)
803 msg.msg_controllen = 0; 822 msg.msg_controllen = 0;
804 msg.msg_flags = 0; 823 msg.msg_flags = 0;
805 824
806 call->state = AFS_CALL_AWAIT_ACK; 825 switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0,
807 switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0,
808 afs_notify_end_reply_tx)) { 826 afs_notify_end_reply_tx)) {
809 case 0: 827 case 0:
810 _leave(" [replied]"); 828 _leave(" [replied]");
@@ -812,7 +830,7 @@ void afs_send_empty_reply(struct afs_call *call)
812 830
813 case -ENOMEM: 831 case -ENOMEM:
814 _debug("oom"); 832 _debug("oom");
815 rxrpc_kernel_abort_call(afs_socket, call->rxcall, 833 rxrpc_kernel_abort_call(net->socket, call->rxcall,
816 RX_USER_ABORT, -ENOMEM, "KOO"); 834 RX_USER_ABORT, -ENOMEM, "KOO");
817 default: 835 default:
818 _leave(" [error]"); 836 _leave(" [error]");
@@ -825,13 +843,14 @@ void afs_send_empty_reply(struct afs_call *call)
825 */ 843 */
826void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) 844void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
827{ 845{
846 struct afs_net *net = call->net;
828 struct msghdr msg; 847 struct msghdr msg;
829 struct kvec iov[1]; 848 struct kvec iov[1];
830 int n; 849 int n;
831 850
832 _enter(""); 851 _enter("");
833 852
834 rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len); 853 rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len);
835 854
836 iov[0].iov_base = (void *) buf; 855 iov[0].iov_base = (void *) buf;
837 iov[0].iov_len = len; 856 iov[0].iov_len = len;
@@ -842,8 +861,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
842 msg.msg_controllen = 0; 861 msg.msg_controllen = 0;
843 msg.msg_flags = 0; 862 msg.msg_flags = 0;
844 863
845 call->state = AFS_CALL_AWAIT_ACK; 864 n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len,
846 n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len,
847 afs_notify_end_reply_tx); 865 afs_notify_end_reply_tx);
848 if (n >= 0) { 866 if (n >= 0) {
849 /* Success */ 867 /* Success */
@@ -853,7 +871,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
853 871
854 if (n == -ENOMEM) { 872 if (n == -ENOMEM) {
855 _debug("oom"); 873 _debug("oom");
856 rxrpc_kernel_abort_call(afs_socket, call->rxcall, 874 rxrpc_kernel_abort_call(net->socket, call->rxcall,
857 RX_USER_ABORT, -ENOMEM, "KOO"); 875 RX_USER_ABORT, -ENOMEM, "KOO");
858 } 876 }
859 _leave(" [error]"); 877 _leave(" [error]");
@@ -865,6 +883,9 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
865int afs_extract_data(struct afs_call *call, void *buf, size_t count, 883int afs_extract_data(struct afs_call *call, void *buf, size_t count,
866 bool want_more) 884 bool want_more)
867{ 885{
886 struct afs_net *net = call->net;
887 enum afs_call_state state;
888 u32 remote_abort;
868 int ret; 889 int ret;
869 890
870 _enter("{%s,%zu},,%zu,%d", 891 _enter("{%s,%zu},,%zu,%d",
@@ -872,32 +893,32 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
872 893
873 ASSERTCMP(call->offset, <=, count); 894 ASSERTCMP(call->offset, <=, count);
874 895
875 ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, 896 ret = rxrpc_kernel_recv_data(net->socket, call->rxcall,
876 buf, count, &call->offset, 897 buf, count, &call->offset,
877 want_more, &call->abort_code, 898 want_more, &remote_abort,
878 &call->service_id); 899 &call->service_id);
879 trace_afs_recv_data(call, count, call->offset, want_more, ret); 900 trace_afs_recv_data(call, count, call->offset, want_more, ret);
880 if (ret == 0 || ret == -EAGAIN) 901 if (ret == 0 || ret == -EAGAIN)
881 return ret; 902 return ret;
882 903
904 state = READ_ONCE(call->state);
883 if (ret == 1) { 905 if (ret == 1) {
884 switch (call->state) { 906 switch (state) {
885 case AFS_CALL_AWAIT_REPLY: 907 case AFS_CALL_CL_AWAIT_REPLY:
886 call->state = AFS_CALL_COMPLETE; 908 afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY);
887 break; 909 break;
888 case AFS_CALL_AWAIT_REQUEST: 910 case AFS_CALL_SV_AWAIT_REQUEST:
889 call->state = AFS_CALL_REPLYING; 911 afs_set_call_state(call, state, AFS_CALL_SV_REPLYING);
890 break; 912 break;
913 case AFS_CALL_COMPLETE:
914 kdebug("prem complete %d", call->error);
915 return -EIO;
891 default: 916 default:
892 break; 917 break;
893 } 918 }
894 return 0; 919 return 0;
895 } 920 }
896 921
897 if (ret == -ECONNABORTED) 922 afs_set_call_complete(call, ret, remote_abort);
898 call->error = call->type->abort_to_error(call->abort_code);
899 else
900 call->error = ret;
901 call->state = AFS_CALL_COMPLETE;
902 return ret; 923 return ret;
903} 924}
diff --git a/fs/afs/security.c b/fs/afs/security.c
index faca66227ecf..46a881a4d08f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -1,6 +1,6 @@
1/* AFS security handling 1/* AFS security handling
2 * 2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -14,9 +14,13 @@
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/hashtable.h>
17#include <keys/rxrpc-type.h> 18#include <keys/rxrpc-type.h>
18#include "internal.h" 19#include "internal.h"
19 20
21static DEFINE_HASHTABLE(afs_permits_cache, 10);
22static DEFINE_SPINLOCK(afs_permits_lock);
23
20/* 24/*
21 * get a key 25 * get a key
22 */ 26 */
@@ -46,167 +50,233 @@ struct key *afs_request_key(struct afs_cell *cell)
46} 50}
47 51
48/* 52/*
49 * dispose of a permits list 53 * Dispose of a list of permits.
50 */ 54 */
51void afs_zap_permits(struct rcu_head *rcu) 55static void afs_permits_rcu(struct rcu_head *rcu)
52{ 56{
53 struct afs_permits *permits = 57 struct afs_permits *permits =
54 container_of(rcu, struct afs_permits, rcu); 58 container_of(rcu, struct afs_permits, rcu);
55 int loop; 59 int i;
56
57 _enter("{%d}", permits->count);
58 60
59 for (loop = permits->count - 1; loop >= 0; loop--) 61 for (i = 0; i < permits->nr_permits; i++)
60 key_put(permits->permits[loop].key); 62 key_put(permits->permits[i].key);
61 kfree(permits); 63 kfree(permits);
62} 64}
63 65
64/* 66/*
65 * dispose of a permits list in which all the key pointers have been copied 67 * Discard a permission cache.
66 */ 68 */
67static void afs_dispose_of_permits(struct rcu_head *rcu) 69void afs_put_permits(struct afs_permits *permits)
68{ 70{
69 struct afs_permits *permits = 71 if (permits && refcount_dec_and_test(&permits->usage)) {
70 container_of(rcu, struct afs_permits, rcu); 72 spin_lock(&afs_permits_lock);
71 73 hash_del_rcu(&permits->hash_node);
72 _enter("{%d}", permits->count); 74 spin_unlock(&afs_permits_lock);
73 75 call_rcu(&permits->rcu, afs_permits_rcu);
74 kfree(permits); 76 }
75} 77}
76 78
77/* 79/*
78 * get the authorising vnode - this is the specified inode itself if it's a 80 * Clear a permit cache on callback break.
79 * directory or it's the parent directory if the specified inode is a file or
80 * symlink
81 * - the caller must release the ref on the inode
82 */ 81 */
83static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode, 82void afs_clear_permits(struct afs_vnode *vnode)
84 struct key *key)
85{ 83{
86 struct afs_vnode *auth_vnode; 84 struct afs_permits *permits;
87 struct inode *auth_inode;
88 85
89 _enter(""); 86 spin_lock(&vnode->lock);
87 permits = rcu_dereference_protected(vnode->permit_cache,
88 lockdep_is_held(&vnode->lock));
89 RCU_INIT_POINTER(vnode->permit_cache, NULL);
90 vnode->cb_break++;
91 spin_unlock(&vnode->lock);
90 92
91 if (S_ISDIR(vnode->vfs_inode.i_mode)) { 93 if (permits)
92 auth_inode = igrab(&vnode->vfs_inode); 94 afs_put_permits(permits);
93 ASSERT(auth_inode != NULL);
94 } else {
95 auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
96 &vnode->status.parent, NULL, NULL);
97 if (IS_ERR(auth_inode))
98 return ERR_CAST(auth_inode);
99 }
100
101 auth_vnode = AFS_FS_I(auth_inode);
102 _leave(" = {%x}", auth_vnode->fid.vnode);
103 return auth_vnode;
104} 95}
105 96
106/* 97/*
107 * clear the permit cache on a directory vnode 98 * Hash a list of permits. Use simple addition to make it easy to add an extra
99 * one at an as-yet indeterminate position in the list.
108 */ 100 */
109void afs_clear_permits(struct afs_vnode *vnode) 101static void afs_hash_permits(struct afs_permits *permits)
110{ 102{
111 struct afs_permits *permits; 103 unsigned long h = permits->nr_permits;
112 104 int i;
113 _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
114 105
115 mutex_lock(&vnode->permits_lock); 106 for (i = 0; i < permits->nr_permits; i++) {
116 permits = vnode->permits; 107 h += (unsigned long)permits->permits[i].key / sizeof(void *);
117 RCU_INIT_POINTER(vnode->permits, NULL); 108 h += permits->permits[i].access;
118 mutex_unlock(&vnode->permits_lock); 109 }
119 110
120 if (permits) 111 permits->h = h;
121 call_rcu(&permits->rcu, afs_zap_permits);
122 _leave("");
123} 112}
124 113
125/* 114/*
126 * add the result obtained for a vnode to its or its parent directory's cache 115 * Cache the CallerAccess result obtained from doing a fileserver operation
127 * for the key used to access it 116 * that returned a vnode status for a particular key. If a callback break
117 * occurs whilst the operation was in progress then we have to ditch the cache
118 * as the ACL *may* have changed.
128 */ 119 */
129void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order) 120void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
121 unsigned int cb_break)
130{ 122{
131 struct afs_permits *permits, *xpermits; 123 struct afs_permits *permits, *xpermits, *replacement, *new = NULL;
132 struct afs_permit *permit; 124 afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
133 struct afs_vnode *auth_vnode; 125 size_t size = 0;
134 int count, loop; 126 bool changed = false;
127 int i, j;
128
129 _enter("{%x:%u},%x,%x",
130 vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
131
132 rcu_read_lock();
133
134 /* Check for the common case first: We got back the same access as last
135 * time we tried and already have it recorded.
136 */
137 permits = rcu_dereference(vnode->permit_cache);
138 if (permits) {
139 if (!permits->invalidated) {
140 for (i = 0; i < permits->nr_permits; i++) {
141 if (permits->permits[i].key < key)
142 continue;
143 if (permits->permits[i].key > key)
144 break;
145 if (permits->permits[i].access != caller_access) {
146 changed = true;
147 break;
148 }
135 149
136 _enter("{%x:%u},%x,%lx", 150 if (cb_break != (vnode->cb_break +
137 vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order); 151 vnode->cb_interest->server->cb_s_break)) {
152 changed = true;
153 break;
154 }
138 155
139 auth_vnode = afs_get_auth_inode(vnode, key); 156 /* The cache is still good. */
140 if (IS_ERR(auth_vnode)) { 157 rcu_read_unlock();
141 _leave(" [get error %ld]", PTR_ERR(auth_vnode)); 158 return;
142 return; 159 }
143 } 160 }
161
162 changed |= permits->invalidated;
163 size = permits->nr_permits;
144 164
145 mutex_lock(&auth_vnode->permits_lock); 165 /* If this set of permits is now wrong, clear the permits
166 * pointer so that no one tries to use the stale information.
167 */
168 if (changed) {
169 spin_lock(&vnode->lock);
170 if (permits != rcu_access_pointer(vnode->permit_cache))
171 goto someone_else_changed_it_unlock;
172 RCU_INIT_POINTER(vnode->permit_cache, NULL);
173 spin_unlock(&vnode->lock);
174
175 afs_put_permits(permits);
176 permits = NULL;
177 size = 0;
178 }
179 }
146 180
147 /* guard against a rename being detected whilst we waited for the 181 if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) {
148 * lock */ 182 rcu_read_unlock();
149 if (memcmp(&auth_vnode->fid, &vnode->status.parent, 183 goto someone_else_changed_it;
150 sizeof(struct afs_fid)) != 0) {
151 _debug("renamed");
152 goto out_unlock;
153 } 184 }
154 185
155 /* have to be careful as the directory's callback may be broken between 186 /* We need a ref on any permits list we want to copy as we'll have to
156 * us receiving the status we're trying to cache and us getting the 187 * drop the lock to do memory allocation.
157 * lock to update the cache for the status */ 188 */
158 if (auth_vnode->acl_order - acl_order > 0) { 189 if (permits && !refcount_inc_not_zero(&permits->usage)) {
159 _debug("ACL changed?"); 190 rcu_read_unlock();
160 goto out_unlock; 191 goto someone_else_changed_it;
161 } 192 }
162 193
163 /* always update the anonymous mask */ 194 rcu_read_unlock();
164 _debug("anon access %x", vnode->status.anon_access); 195
165 auth_vnode->status.anon_access = vnode->status.anon_access; 196 /* Speculatively create a new list with the revised permission set. We
166 if (key == vnode->volume->cell->anonymous_key) 197 * discard this if we find an extant match already in the hash, but
167 goto out_unlock; 198 * it's easier to compare with memcmp this way.
168 199 *
169 xpermits = auth_vnode->permits; 200 * We fill in the key pointers at this time, but we don't get the refs
170 count = 0; 201 * yet.
171 if (xpermits) { 202 */
172 /* see if the permit is already in the list 203 size++;
173 * - if it is then we just amend the list 204 new = kzalloc(sizeof(struct afs_permits) +
174 */ 205 sizeof(struct afs_permit) * size, GFP_NOFS);
175 count = xpermits->count; 206 if (!new)
176 permit = xpermits->permits; 207 return;
177 for (loop = count; loop > 0; loop--) { 208
178 if (permit->key == key) { 209 refcount_set(&new->usage, 1);
179 permit->access_mask = 210 new->nr_permits = size;
180 vnode->status.caller_access; 211 i = j = 0;
181 goto out_unlock; 212 if (permits) {
213 for (i = 0; i < permits->nr_permits; i++) {
214 if (j == i && permits->permits[i].key > key) {
215 new->permits[j].key = key;
216 new->permits[j].access = caller_access;
217 j++;
182 } 218 }
183 permit++; 219 new->permits[j].key = permits->permits[i].key;
220 new->permits[j].access = permits->permits[i].access;
221 j++;
222 }
223 }
224
225 if (j == i) {
226 new->permits[j].key = key;
227 new->permits[j].access = caller_access;
228 }
229
230 afs_hash_permits(new);
231
232 afs_put_permits(permits);
233
234 /* Now see if the permit list we want is actually already available */
235 spin_lock(&afs_permits_lock);
236
237 hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) {
238 if (xpermits->h != new->h ||
239 xpermits->invalidated ||
240 xpermits->nr_permits != new->nr_permits ||
241 memcmp(xpermits->permits, new->permits,
242 new->nr_permits * sizeof(struct afs_permit)) != 0)
243 continue;
244
245 if (refcount_inc_not_zero(&xpermits->usage)) {
246 replacement = xpermits;
247 goto found;
184 } 248 }
249
250 break;
185 } 251 }
186 252
187 permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1), 253 for (i = 0; i < new->nr_permits; i++)
188 GFP_NOFS); 254 key_get(new->permits[i].key);
189 if (!permits) 255 hash_add_rcu(afs_permits_cache, &new->hash_node, new->h);
190 goto out_unlock; 256 replacement = new;
191 257 new = NULL;
192 if (xpermits) 258
193 memcpy(permits->permits, xpermits->permits, 259found:
194 count * sizeof(struct afs_permit)); 260 spin_unlock(&afs_permits_lock);
195 261
196 _debug("key %x access %x", 262 kfree(new);
197 key_serial(key), vnode->status.caller_access); 263
198 permits->permits[count].access_mask = vnode->status.caller_access; 264 spin_lock(&vnode->lock);
199 permits->permits[count].key = key_get(key); 265 if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) ||
200 permits->count = count + 1; 266 permits != rcu_access_pointer(vnode->permit_cache))
201 267 goto someone_else_changed_it_unlock;
202 rcu_assign_pointer(auth_vnode->permits, permits); 268 rcu_assign_pointer(vnode->permit_cache, replacement);
203 if (xpermits) 269 spin_unlock(&vnode->lock);
204 call_rcu(&xpermits->rcu, afs_dispose_of_permits); 270 afs_put_permits(permits);
205 271 return;
206out_unlock: 272
207 mutex_unlock(&auth_vnode->permits_lock); 273someone_else_changed_it_unlock:
208 iput(&auth_vnode->vfs_inode); 274 spin_unlock(&vnode->lock);
209 _leave(""); 275someone_else_changed_it:
276 /* Someone else changed the cache under us - don't recheck at this
277 * time.
278 */
279 return;
210} 280}
211 281
212/* 282/*
@@ -218,56 +288,45 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
218 afs_access_t *_access) 288 afs_access_t *_access)
219{ 289{
220 struct afs_permits *permits; 290 struct afs_permits *permits;
221 struct afs_permit *permit; 291 bool valid = false;
222 struct afs_vnode *auth_vnode; 292 int i, ret;
223 bool valid;
224 int loop, ret;
225 293
226 _enter("{%x:%u},%x", 294 _enter("{%x:%u},%x",
227 vnode->fid.vid, vnode->fid.vnode, key_serial(key)); 295 vnode->fid.vid, vnode->fid.vnode, key_serial(key));
228 296
229 auth_vnode = afs_get_auth_inode(vnode, key); 297 permits = vnode->permit_cache;
230 if (IS_ERR(auth_vnode)) {
231 *_access = 0;
232 _leave(" = %ld", PTR_ERR(auth_vnode));
233 return PTR_ERR(auth_vnode);
234 }
235
236 ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
237 298
238 /* check the permits to see if we've got one yet */ 299 /* check the permits to see if we've got one yet */
239 if (key == auth_vnode->volume->cell->anonymous_key) { 300 if (key == vnode->volume->cell->anonymous_key) {
240 _debug("anon"); 301 _debug("anon");
241 *_access = auth_vnode->status.anon_access; 302 *_access = vnode->status.anon_access;
242 valid = true; 303 valid = true;
243 } else { 304 } else {
244 valid = false;
245 rcu_read_lock(); 305 rcu_read_lock();
246 permits = rcu_dereference(auth_vnode->permits); 306 permits = rcu_dereference(vnode->permit_cache);
247 if (permits) { 307 if (permits) {
248 permit = permits->permits; 308 for (i = 0; i < permits->nr_permits; i++) {
249 for (loop = permits->count; loop > 0; loop--) { 309 if (permits->permits[i].key < key)
250 if (permit->key == key) { 310 continue;
251 _debug("found in cache"); 311 if (permits->permits[i].key > key)
252 *_access = permit->access_mask;
253 valid = true;
254 break; 312 break;
255 } 313
256 permit++; 314 *_access = permits->permits[i].access;
315 valid = !permits->invalidated;
316 break;
257 } 317 }
258 } 318 }
259 rcu_read_unlock(); 319 rcu_read_unlock();
260 } 320 }
261 321
262 if (!valid) { 322 if (!valid) {
263 /* check the status on the file we're actually interested in 323 /* Check the status on the file we're actually interested in
264 * (the post-processing will cache the result on auth_vnode) */ 324 * (the post-processing will cache the result).
325 */
265 _debug("no valid permit"); 326 _debug("no valid permit");
266 327
267 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); 328 ret = afs_fetch_status(vnode, key);
268 ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
269 if (ret < 0) { 329 if (ret < 0) {
270 iput(&auth_vnode->vfs_inode);
271 *_access = 0; 330 *_access = 0;
272 _leave(" = %d", ret); 331 _leave(" = %d", ret);
273 return ret; 332 return ret;
@@ -275,7 +334,6 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
275 *_access = vnode->status.caller_access; 334 *_access = vnode->status.caller_access;
276 } 335 }
277 336
278 iput(&auth_vnode->vfs_inode);
279 _leave(" = 0 [access %x]", *_access); 337 _leave(" = 0 [access %x]", *_access);
280 return 0; 338 return 0;
281} 339}
@@ -304,14 +362,9 @@ int afs_permission(struct inode *inode, int mask)
304 return PTR_ERR(key); 362 return PTR_ERR(key);
305 } 363 }
306 364
307 /* if the promise has expired, we need to check the server again */ 365 ret = afs_validate(vnode, key);
308 if (!vnode->cb_promised) { 366 if (ret < 0)
309 _debug("not promised"); 367 goto error;
310 ret = afs_vnode_fetch_status(vnode, NULL, key);
311 if (ret < 0)
312 goto error;
313 _debug("new promise [fl=%lx]", vnode->flags);
314 }
315 368
316 /* check the permits to see if we've got one yet */ 369 /* check the permits to see if we've got one yet */
317 ret = afs_check_permit(vnode, key, &access); 370 ret = afs_check_permit(vnode, key, &access);
@@ -365,3 +418,12 @@ error:
365 _leave(" = %d", ret); 418 _leave(" = %d", ret);
366 return ret; 419 return ret;
367} 420}
421
422void __exit afs_clean_up_permit_cache(void)
423{
424 int i;
425
426 for (i = 0; i < HASH_SIZE(afs_permits_cache); i++)
427 WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i]));
428
429}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index c001b1f2455f..1880f1b6a9f1 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -11,317 +11,689 @@
11 11
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include "afs_fs.h"
14#include "internal.h" 15#include "internal.h"
15 16
16static unsigned afs_server_timeout = 10; /* server timeout in seconds */ 17static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
18static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
17 19
18static void afs_reap_server(struct work_struct *); 20static void afs_inc_servers_outstanding(struct afs_net *net)
21{
22 atomic_inc(&net->servers_outstanding);
23}
24
25static void afs_dec_servers_outstanding(struct afs_net *net)
26{
27 if (atomic_dec_and_test(&net->servers_outstanding))
28 wake_up_atomic_t(&net->servers_outstanding);
29}
30
31/*
32 * Find a server by one of its addresses.
33 */
34struct afs_server *afs_find_server(struct afs_net *net,
35 const struct sockaddr_rxrpc *srx)
36{
37 const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
38 const struct afs_addr_list *alist;
39 struct afs_server *server = NULL;
40 unsigned int i;
41 bool ipv6 = true;
42 int seq = 0, diff;
43
44 if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
45 srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
46 srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
47 ipv6 = false;
48
49 rcu_read_lock();
50
51 do {
52 if (server)
53 afs_put_server(net, server);
54 server = NULL;
55 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
56
57 if (ipv6) {
58 hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
59 alist = rcu_dereference(server->addresses);
60 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
61 b = &alist->addrs[i].transport.sin6;
62 diff = (u16)a->sin6_port - (u16)b->sin6_port;
63 if (diff == 0)
64 diff = memcmp(&a->sin6_addr,
65 &b->sin6_addr,
66 sizeof(struct in6_addr));
67 if (diff == 0)
68 goto found;
69 if (diff < 0) {
70 // TODO: Sort the list
71 //if (i == alist->nr_ipv4)
72 // goto not_found;
73 break;
74 }
75 }
76 }
77 } else {
78 hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
79 alist = rcu_dereference(server->addresses);
80 for (i = 0; i < alist->nr_ipv4; i++) {
81 b = &alist->addrs[i].transport.sin6;
82 diff = (u16)a->sin6_port - (u16)b->sin6_port;
83 if (diff == 0)
84 diff = ((u32)a->sin6_addr.s6_addr32[3] -
85 (u32)b->sin6_addr.s6_addr32[3]);
86 if (diff == 0)
87 goto found;
88 if (diff < 0) {
89 // TODO: Sort the list
90 //if (i == 0)
91 // goto not_found;
92 break;
93 }
94 }
95 }
96 }
97
98 //not_found:
99 server = NULL;
100 found:
101 if (server && !atomic_inc_not_zero(&server->usage))
102 server = NULL;
103
104 } while (need_seqretry(&net->fs_addr_lock, seq));
19 105
20/* tree of all the servers, indexed by IP address */ 106 done_seqretry(&net->fs_addr_lock, seq);
21static struct rb_root afs_servers = RB_ROOT;
22static DEFINE_RWLOCK(afs_servers_lock);
23 107
24/* LRU list of all the servers not currently in use */ 108 rcu_read_unlock();
25static LIST_HEAD(afs_server_graveyard); 109 return server;
26static DEFINE_SPINLOCK(afs_server_graveyard_lock); 110}
27static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server);
28 111
29/* 112/*
30 * install a server record in the master tree 113 * Look up a server by its UUID
31 */ 114 */
32static int afs_install_server(struct afs_server *server) 115struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
33{ 116{
34 struct afs_server *xserver; 117 struct afs_server *server = NULL;
118 struct rb_node *p;
119 int diff, seq = 0;
120
121 _enter("%pU", uuid);
122
123 do {
124 /* Unfortunately, rbtree walking doesn't give reliable results
125 * under just the RCU read lock, so we have to check for
126 * changes.
127 */
128 if (server)
129 afs_put_server(net, server);
130 server = NULL;
131
132 read_seqbegin_or_lock(&net->fs_lock, &seq);
133
134 p = net->fs_servers.rb_node;
135 while (p) {
136 server = rb_entry(p, struct afs_server, uuid_rb);
137
138 diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
139 if (diff < 0) {
140 p = p->rb_left;
141 } else if (diff > 0) {
142 p = p->rb_right;
143 } else {
144 afs_get_server(server);
145 break;
146 }
147
148 server = NULL;
149 }
150 } while (need_seqretry(&net->fs_lock, seq));
151
152 done_seqretry(&net->fs_lock, seq);
153
154 _leave(" = %p", server);
155 return server;
156}
157
158/*
159 * Install a server record in the namespace tree
160 */
161static struct afs_server *afs_install_server(struct afs_net *net,
162 struct afs_server *candidate)
163{
164 const struct afs_addr_list *alist;
165 struct afs_server *server;
35 struct rb_node **pp, *p; 166 struct rb_node **pp, *p;
36 int ret; 167 int ret = -EEXIST, diff;
37 168
38 _enter("%p", server); 169 _enter("%p", candidate);
39 170
40 write_lock(&afs_servers_lock); 171 write_seqlock(&net->fs_lock);
41 172
42 ret = -EEXIST; 173 /* Firstly install the server in the UUID lookup tree */
43 pp = &afs_servers.rb_node; 174 pp = &net->fs_servers.rb_node;
44 p = NULL; 175 p = NULL;
45 while (*pp) { 176 while (*pp) {
46 p = *pp; 177 p = *pp;
47 _debug("- consider %p", p); 178 _debug("- consider %p", p);
48 xserver = rb_entry(p, struct afs_server, master_rb); 179 server = rb_entry(p, struct afs_server, uuid_rb);
49 if (server->addr.s_addr < xserver->addr.s_addr) 180 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
181 if (diff < 0)
50 pp = &(*pp)->rb_left; 182 pp = &(*pp)->rb_left;
51 else if (server->addr.s_addr > xserver->addr.s_addr) 183 else if (diff > 0)
52 pp = &(*pp)->rb_right; 184 pp = &(*pp)->rb_right;
53 else 185 else
54 goto error; 186 goto exists;
55 } 187 }
56 188
57 rb_link_node(&server->master_rb, p, pp); 189 server = candidate;
58 rb_insert_color(&server->master_rb, &afs_servers); 190 rb_link_node(&server->uuid_rb, p, pp);
191 rb_insert_color(&server->uuid_rb, &net->fs_servers);
192 hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
193
194 write_seqlock(&net->fs_addr_lock);
195 alist = rcu_dereference_protected(server->addresses,
196 lockdep_is_held(&net->fs_addr_lock.lock));
197
198 /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
199 * it in the IPv4 and/or IPv6 reverse-map lists.
200 *
201 * TODO: For speed we want to use something other than a flat list
202 * here; even sorting the list in terms of lowest address would help a
203 * bit, but anything we might want to do gets messy and memory
204 * intensive.
205 */
206 if (alist->nr_ipv4 > 0)
207 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
208 if (alist->nr_addrs > alist->nr_ipv4)
209 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
210
211 write_sequnlock(&net->fs_addr_lock);
59 ret = 0; 212 ret = 0;
60 213
61error: 214exists:
62 write_unlock(&afs_servers_lock); 215 afs_get_server(server);
63 return ret; 216 write_sequnlock(&net->fs_lock);
217 return server;
64} 218}
65 219
66/* 220/*
67 * allocate a new server record 221 * allocate a new server record
68 */ 222 */
69static struct afs_server *afs_alloc_server(struct afs_cell *cell, 223static struct afs_server *afs_alloc_server(struct afs_net *net,
70 const struct in_addr *addr) 224 const uuid_t *uuid,
225 struct afs_addr_list *alist)
71{ 226{
72 struct afs_server *server; 227 struct afs_server *server;
73 228
74 _enter(""); 229 _enter("");
75 230
76 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); 231 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
77 if (server) { 232 if (!server)
78 atomic_set(&server->usage, 1); 233 goto enomem;
79 server->cell = cell; 234
80 235 atomic_set(&server->usage, 1);
81 INIT_LIST_HEAD(&server->link); 236 RCU_INIT_POINTER(server->addresses, alist);
82 INIT_LIST_HEAD(&server->grave); 237 server->addr_version = alist->version;
83 init_rwsem(&server->sem); 238 server->uuid = *uuid;
84 spin_lock_init(&server->fs_lock); 239 server->flags = (1UL << AFS_SERVER_FL_NEW);
85 server->fs_vnodes = RB_ROOT; 240 server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
86 server->cb_promises = RB_ROOT; 241 rwlock_init(&server->fs_lock);
87 spin_lock_init(&server->cb_lock); 242 INIT_LIST_HEAD(&server->cb_interests);
88 init_waitqueue_head(&server->cb_break_waitq); 243 rwlock_init(&server->cb_break_lock);
89 INIT_DELAYED_WORK(&server->cb_break_work, 244
90 afs_dispatch_give_up_callbacks); 245 afs_inc_servers_outstanding(net);
91 246 _leave(" = %p", server);
92 memcpy(&server->addr, addr, sizeof(struct in_addr));
93 server->addr.s_addr = addr->s_addr;
94 _leave(" = %p{%d}", server, atomic_read(&server->usage));
95 } else {
96 _leave(" = NULL [nomem]");
97 }
98 return server; 247 return server;
248
249enomem:
250 _leave(" = NULL [nomem]");
251 return NULL;
252}
253
254/*
255 * Look up an address record for a server
256 */
257static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
258 struct key *key, const uuid_t *uuid)
259{
260 struct afs_addr_cursor ac;
261 struct afs_addr_list *alist;
262 int ret;
263
264 ret = afs_set_vl_cursor(&ac, cell);
265 if (ret < 0)
266 return ERR_PTR(ret);
267
268 while (afs_iterate_addresses(&ac)) {
269 if (test_bit(ac.index, &ac.alist->yfs))
270 alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
271 else
272 alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
273 switch (ac.error) {
274 case 0:
275 afs_end_cursor(&ac);
276 return alist;
277 case -ECONNABORTED:
278 ac.error = afs_abort_to_error(ac.abort_code);
279 goto error;
280 case -ENOMEM:
281 case -ENONET:
282 goto error;
283 case -ENETUNREACH:
284 case -EHOSTUNREACH:
285 case -ECONNREFUSED:
286 break;
287 default:
288 ac.error = -EIO;
289 goto error;
290 }
291 }
292
293error:
294 return ERR_PTR(afs_end_cursor(&ac));
99} 295}
100 296
101/* 297/*
102 * get an FS-server record for a cell 298 * Get or create a fileserver record.
103 */ 299 */
104struct afs_server *afs_lookup_server(struct afs_cell *cell, 300struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
105 const struct in_addr *addr) 301 const uuid_t *uuid)
106{ 302{
303 struct afs_addr_list *alist;
107 struct afs_server *server, *candidate; 304 struct afs_server *server, *candidate;
108 305
109 _enter("%p,%pI4", cell, &addr->s_addr); 306 _enter("%p,%pU", cell->net, uuid);
110 307
111 /* quick scan of the list to see if we already have the server */ 308 server = afs_find_server_by_uuid(cell->net, uuid);
112 read_lock(&cell->servers_lock); 309 if (server)
310 return server;
113 311
114 list_for_each_entry(server, &cell->servers, link) { 312 alist = afs_vl_lookup_addrs(cell, key, uuid);
115 if (server->addr.s_addr == addr->s_addr) 313 if (IS_ERR(alist))
116 goto found_server_quickly; 314 return ERR_CAST(alist);
117 }
118 read_unlock(&cell->servers_lock);
119 315
120 candidate = afs_alloc_server(cell, addr); 316 candidate = afs_alloc_server(cell->net, uuid, alist);
121 if (!candidate) { 317 if (!candidate) {
122 _leave(" = -ENOMEM"); 318 afs_put_addrlist(alist);
123 return ERR_PTR(-ENOMEM); 319 return ERR_PTR(-ENOMEM);
124 } 320 }
125 321
126 write_lock(&cell->servers_lock); 322 server = afs_install_server(cell->net, candidate);
127 323 if (server != candidate) {
128 /* check the cell's server list again */ 324 afs_put_addrlist(alist);
129 list_for_each_entry(server, &cell->servers, link) { 325 kfree(candidate);
130 if (server->addr.s_addr == addr->s_addr)
131 goto found_server;
132 } 326 }
133 327
134 _debug("new");
135 server = candidate;
136 if (afs_install_server(server) < 0)
137 goto server_in_two_cells;
138
139 afs_get_cell(cell);
140 list_add_tail(&server->link, &cell->servers);
141
142 write_unlock(&cell->servers_lock);
143 _leave(" = %p{%d}", server, atomic_read(&server->usage)); 328 _leave(" = %p{%d}", server, atomic_read(&server->usage));
144 return server; 329 return server;
330}
145 331
146 /* found a matching server quickly */ 332/*
147found_server_quickly: 333 * Set the server timer to fire after a given delay, assuming it's not already
148 _debug("found quickly"); 334 * set for an earlier time.
149 afs_get_server(server); 335 */
150 read_unlock(&cell->servers_lock); 336static void afs_set_server_timer(struct afs_net *net, time64_t delay)
151no_longer_unused: 337{
152 if (!list_empty(&server->grave)) { 338 if (net->live) {
153 spin_lock(&afs_server_graveyard_lock); 339 afs_inc_servers_outstanding(net);
154 list_del_init(&server->grave); 340 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
155 spin_unlock(&afs_server_graveyard_lock); 341 afs_dec_servers_outstanding(net);
156 } 342 }
157 _leave(" = %p{%d}", server, atomic_read(&server->usage)); 343}
158 return server;
159 344
160 /* found a matching server on the second pass */ 345/*
161found_server: 346 * Server management timer. We have an increment on fs_outstanding that we
162 _debug("found"); 347 * need to pass along to the work item.
163 afs_get_server(server); 348 */
164 write_unlock(&cell->servers_lock); 349void afs_servers_timer(struct timer_list *timer)
165 kfree(candidate); 350{
166 goto no_longer_unused; 351 struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
167 352
168 /* found a server that seems to be in two cells */ 353 _enter("");
169server_in_two_cells: 354 if (!queue_work(afs_wq, &net->fs_manager))
170 write_unlock(&cell->servers_lock); 355 afs_dec_servers_outstanding(net);
171 kfree(candidate);
172 printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
173 addr);
174 _leave(" = -EEXIST");
175 return ERR_PTR(-EEXIST);
176} 356}
177 357
178/* 358/*
179 * look up a server by its IP address 359 * Release a reference on a server record.
180 */ 360 */
181struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) 361void afs_put_server(struct afs_net *net, struct afs_server *server)
182{ 362{
183 struct afs_server *server = NULL; 363 unsigned int usage;
184 struct rb_node *p;
185 struct in_addr addr = srx->transport.sin.sin_addr;
186 364
187 _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr); 365 if (!server)
366 return;
188 367
189 if (srx->transport.family != AF_INET) { 368 server->put_time = ktime_get_real_seconds();
190 WARN(true, "AFS does not yes support non-IPv4 addresses\n");
191 return NULL;
192 }
193 369
194 read_lock(&afs_servers_lock); 370 usage = atomic_dec_return(&server->usage);
195 371
196 p = afs_servers.rb_node; 372 _enter("{%u}", usage);
197 while (p) {
198 server = rb_entry(p, struct afs_server, master_rb);
199 373
200 _debug("- consider %p", p); 374 if (likely(usage > 0))
375 return;
201 376
202 if (addr.s_addr < server->addr.s_addr) { 377 afs_set_server_timer(net, afs_server_gc_delay);
203 p = p->rb_left; 378}
204 } else if (addr.s_addr > server->addr.s_addr) {
205 p = p->rb_right;
206 } else {
207 afs_get_server(server);
208 goto found;
209 }
210 }
211 379
212 server = NULL; 380static void afs_server_rcu(struct rcu_head *rcu)
213found: 381{
214 read_unlock(&afs_servers_lock); 382 struct afs_server *server = container_of(rcu, struct afs_server, rcu);
215 ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr); 383
216 _leave(" = %p", server); 384 afs_put_addrlist(server->addresses);
217 return server; 385 kfree(server);
218} 386}
219 387
220/* 388/*
221 * destroy a server record 389 * destroy a dead server
222 * - removes from the cell list
223 */ 390 */
224void afs_put_server(struct afs_server *server) 391static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
225{ 392{
226 if (!server) 393 struct afs_addr_list *alist = server->addresses;
227 return; 394 struct afs_addr_cursor ac = {
395 .alist = alist,
396 .addr = &alist->addrs[0],
397 .start = alist->index,
398 .index = alist->index,
399 .error = 0,
400 };
401 _enter("%p", server);
228 402
229 _enter("%p{%d}", server, atomic_read(&server->usage)); 403 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
404 call_rcu(&server->rcu, afs_server_rcu);
405 afs_dec_servers_outstanding(net);
406}
230 407
231 _debug("PUT SERVER %d", atomic_read(&server->usage)); 408/*
409 * Garbage collect any expired servers.
410 */
411static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
412{
413 struct afs_server *server;
414 bool deleted;
415 int usage;
416
417 while ((server = gc_list)) {
418 gc_list = server->gc_next;
419
420 write_seqlock(&net->fs_lock);
421 usage = 1;
422 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
423 if (deleted) {
424 rb_erase(&server->uuid_rb, &net->fs_servers);
425 hlist_del_rcu(&server->proc_link);
426 }
427 write_sequnlock(&net->fs_lock);
232 428
233 ASSERTCMP(atomic_read(&server->usage), >, 0); 429 if (deleted)
430 afs_destroy_server(net, server);
431 }
432}
234 433
235 if (likely(!atomic_dec_and_test(&server->usage))) { 434/*
236 _leave(""); 435 * Manage the records of servers known to be within a network namespace. This
237 return; 436 * includes garbage collecting unused servers.
437 *
438 * Note also that we were given an increment on net->servers_outstanding by
439 * whoever queued us that we need to deal with before returning.
440 */
441void afs_manage_servers(struct work_struct *work)
442{
443 struct afs_net *net = container_of(work, struct afs_net, fs_manager);
444 struct afs_server *gc_list = NULL;
445 struct rb_node *cursor;
446 time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
447 bool purging = !net->live;
448
449 _enter("");
450
451 /* Trawl the server list looking for servers that have expired from
452 * lack of use.
453 */
454 read_seqlock_excl(&net->fs_lock);
455
456 for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
457 struct afs_server *server =
458 rb_entry(cursor, struct afs_server, uuid_rb);
459 int usage = atomic_read(&server->usage);
460
461 _debug("manage %pU %u", &server->uuid, usage);
462
463 ASSERTCMP(usage, >=, 1);
464 ASSERTIFCMP(purging, usage, ==, 1);
465
466 if (usage == 1) {
467 time64_t expire_at = server->put_time;
468
469 if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
470 !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
471 expire_at += afs_server_gc_delay;
472 if (purging || expire_at <= now) {
473 server->gc_next = gc_list;
474 gc_list = server;
475 } else if (expire_at < next_manage) {
476 next_manage = expire_at;
477 }
478 }
238 } 479 }
239 480
240 afs_flush_callback_breaks(server); 481 read_sequnlock_excl(&net->fs_lock);
482
483 /* Update the timer on the way out. We have to pass an increment on
484 * servers_outstanding in the namespace that we are in to the timer or
485 * the work scheduler.
486 */
487 if (!purging && next_manage < TIME64_MAX) {
488 now = ktime_get_real_seconds();
241 489
242 spin_lock(&afs_server_graveyard_lock); 490 if (next_manage - now <= 0) {
243 if (atomic_read(&server->usage) == 0) { 491 if (queue_work(afs_wq, &net->fs_manager))
244 list_move_tail(&server->grave, &afs_server_graveyard); 492 afs_inc_servers_outstanding(net);
245 server->time_of_death = ktime_get_real_seconds(); 493 } else {
246 queue_delayed_work(afs_wq, &afs_server_reaper, 494 afs_set_server_timer(net, next_manage - now);
247 afs_server_timeout * HZ); 495 }
248 } 496 }
249 spin_unlock(&afs_server_graveyard_lock); 497
250 _leave(" [dead]"); 498 afs_gc_servers(net, gc_list);
499
500 afs_dec_servers_outstanding(net);
501 _leave(" [%d]", atomic_read(&net->servers_outstanding));
502}
503
504static void afs_queue_server_manager(struct afs_net *net)
505{
506 afs_inc_servers_outstanding(net);
507 if (!queue_work(afs_wq, &net->fs_manager))
508 afs_dec_servers_outstanding(net);
251} 509}
252 510
253/* 511/*
254 * destroy a dead server 512 * Purge list of servers.
255 */ 513 */
256static void afs_destroy_server(struct afs_server *server) 514void afs_purge_servers(struct afs_net *net)
257{ 515{
258 _enter("%p", server); 516 _enter("");
259 517
260 ASSERTIF(server->cb_break_head != server->cb_break_tail, 518 if (del_timer_sync(&net->fs_timer))
261 delayed_work_pending(&server->cb_break_work)); 519 atomic_dec(&net->servers_outstanding);
262 520
263 ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); 521 afs_queue_server_manager(net);
264 ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
265 ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
266 ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
267 522
268 afs_put_cell(server->cell); 523 _debug("wait");
269 kfree(server); 524 wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
525 TASK_UNINTERRUPTIBLE);
526 _leave("");
270} 527}
271 528
272/* 529/*
273 * reap dead server records 530 * Probe a fileserver to find its capabilities.
531 *
532 * TODO: Try service upgrade.
274 */ 533 */
275static void afs_reap_server(struct work_struct *work) 534static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
276{ 535{
277 LIST_HEAD(corpses); 536 _enter("");
278 struct afs_server *server;
279 unsigned long delay, expiry;
280 time64_t now;
281
282 now = ktime_get_real_seconds();
283 spin_lock(&afs_server_graveyard_lock);
284
285 while (!list_empty(&afs_server_graveyard)) {
286 server = list_entry(afs_server_graveyard.next,
287 struct afs_server, grave);
288 537
289 /* the queue is ordered most dead first */ 538 fc->ac.addr = NULL;
290 expiry = server->time_of_death + afs_server_timeout; 539 fc->ac.start = READ_ONCE(fc->ac.alist->index);
291 if (expiry > now) { 540 fc->ac.index = fc->ac.start;
292 delay = (expiry - now) * HZ; 541 fc->ac.error = 0;
293 mod_delayed_work(afs_wq, &afs_server_reaper, delay); 542 fc->ac.begun = false;
543
544 while (afs_iterate_addresses(&fc->ac)) {
545 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
546 &fc->ac, fc->key);
547 switch (fc->ac.error) {
548 case 0:
549 afs_end_cursor(&fc->ac);
550 set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
551 return true;
552 case -ECONNABORTED:
553 fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
554 goto error;
555 case -ENOMEM:
556 case -ENONET:
557 goto error;
558 case -ENETUNREACH:
559 case -EHOSTUNREACH:
560 case -ECONNREFUSED:
561 case -ETIMEDOUT:
562 case -ETIME:
294 break; 563 break;
564 default:
565 fc->ac.error = -EIO;
566 goto error;
295 } 567 }
568 }
296 569
297 write_lock(&server->cell->servers_lock); 570error:
298 write_lock(&afs_servers_lock); 571 afs_end_cursor(&fc->ac);
299 if (atomic_read(&server->usage) > 0) { 572 return false;
300 list_del_init(&server->grave); 573}
301 } else { 574
302 list_move_tail(&server->grave, &corpses); 575/*
303 list_del_init(&server->link); 576 * If we haven't already, try probing the fileserver to get its capabilities.
304 rb_erase(&server->master_rb, &afs_servers); 577 * We try not to instigate parallel probes, but it's possible that the parallel
305 } 578 * probes will fail due to authentication failure when ours would succeed.
306 write_unlock(&afs_servers_lock); 579 *
307 write_unlock(&server->cell->servers_lock); 580 * TODO: Try sending an anonymous probe if an authenticated probe fails.
581 */
582bool afs_probe_fileserver(struct afs_fs_cursor *fc)
583{
584 bool success;
585 int ret, retries = 0;
586
587 _enter("");
588
589retry:
590 if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
591 _leave(" = t");
592 return true;
308 } 593 }
309 594
310 spin_unlock(&afs_server_graveyard_lock); 595 if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
596 success = afs_do_probe_fileserver(fc);
597 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
598 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
599 _leave(" = t");
600 return success;
601 }
602
603 _debug("wait");
604 ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
605 TASK_INTERRUPTIBLE);
606 if (ret == -ERESTARTSYS) {
607 fc->ac.error = ret;
608 _leave(" = f [%d]", ret);
609 return false;
610 }
311 611
312 /* now reap the corpses we've extracted */ 612 retries++;
313 while (!list_empty(&corpses)) { 613 if (retries == 4) {
314 server = list_entry(corpses.next, struct afs_server, grave); 614 fc->ac.error = -ESTALE;
315 list_del(&server->grave); 615 _leave(" = f [stale]");
316 afs_destroy_server(server); 616 return false;
317 } 617 }
618 _debug("retry");
619 goto retry;
318} 620}
319 621
320/* 622/*
321 * discard all the server records for rmmod 623 * Get an update for a server's address list.
322 */ 624 */
323void __exit afs_purge_servers(void) 625static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
324{ 626{
325 afs_server_timeout = 0; 627 struct afs_addr_list *alist, *discard;
326 mod_delayed_work(afs_wq, &afs_server_reaper, 0); 628
629 _enter("");
630
631 alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
632 &server->uuid);
633 if (IS_ERR(alist)) {
634 fc->ac.error = PTR_ERR(alist);
635 _leave(" = f [%d]", fc->ac.error);
636 return false;
637 }
638
639 discard = alist;
640 if (server->addr_version != alist->version) {
641 write_lock(&server->fs_lock);
642 discard = rcu_dereference_protected(server->addresses,
643 lockdep_is_held(&server->fs_lock));
644 rcu_assign_pointer(server->addresses, alist);
645 server->addr_version = alist->version;
646 write_unlock(&server->fs_lock);
647 }
648
649 server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
650 afs_put_addrlist(discard);
651 _leave(" = t");
652 return true;
653}
654
655/*
656 * See if a server's address list needs updating.
657 */
658bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
659{
660 time64_t now = ktime_get_real_seconds();
661 long diff;
662 bool success;
663 int ret, retries = 0;
664
665 _enter("");
666
667 ASSERT(server);
668
669retry:
670 diff = READ_ONCE(server->update_at) - now;
671 if (diff > 0) {
672 _leave(" = t [not now %ld]", diff);
673 return true;
674 }
675
676 if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
677 success = afs_update_server_record(fc, server);
678 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
679 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
680 _leave(" = %d", success);
681 return success;
682 }
683
684 ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
685 TASK_INTERRUPTIBLE);
686 if (ret == -ERESTARTSYS) {
687 fc->ac.error = ret;
688 _leave(" = f [intr]");
689 return false;
690 }
691
692 retries++;
693 if (retries == 4) {
694 _leave(" = f [stale]");
695 ret = -ESTALE;
696 return false;
697 }
698 goto retry;
327} 699}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
new file mode 100644
index 000000000000..26bad7032bba
--- /dev/null
+++ b/fs/afs/server_list.c
@@ -0,0 +1,153 @@
1/* AFS fileserver list management.
2 *
3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include "internal.h"
15
16void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
17{
18 int i;
19
20 if (refcount_dec_and_test(&slist->usage)) {
21 for (i = 0; i < slist->nr_servers; i++) {
22 afs_put_cb_interest(net, slist->servers[i].cb_interest);
23 afs_put_server(net, slist->servers[i].server);
24 }
25 kfree(slist);
26 }
27}
28
29/*
30 * Build a server list from a VLDB record.
31 */
32struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
33 struct key *key,
34 struct afs_vldb_entry *vldb,
35 u8 type_mask)
36{
37 struct afs_server_list *slist;
38 struct afs_server *server;
39 int ret = -ENOMEM, nr_servers = 0, i, j;
40
41 for (i = 0; i < vldb->nr_servers; i++)
42 if (vldb->fs_mask[i] & type_mask)
43 nr_servers++;
44
45 slist = kzalloc(sizeof(struct afs_server_list) +
46 sizeof(struct afs_server_entry) * nr_servers,
47 GFP_KERNEL);
48 if (!slist)
49 goto error;
50
51 refcount_set(&slist->usage, 1);
52
53 /* Make sure a records exists for each server in the list. */
54 for (i = 0; i < vldb->nr_servers; i++) {
55 if (!(vldb->fs_mask[i] & type_mask))
56 continue;
57
58 server = afs_lookup_server(cell, key, &vldb->fs_server[i]);
59 if (IS_ERR(server)) {
60 ret = PTR_ERR(server);
61 if (ret == -ENOENT)
62 continue;
63 goto error_2;
64 }
65
66 /* Insertion-sort by server pointer */
67 for (j = 0; j < slist->nr_servers; j++)
68 if (slist->servers[j].server >= server)
69 break;
70 if (j < slist->nr_servers) {
71 if (slist->servers[j].server == server) {
72 afs_put_server(cell->net, server);
73 continue;
74 }
75
76 memmove(slist->servers + j + 1,
77 slist->servers + j,
78 (slist->nr_servers - j) * sizeof(struct afs_server_entry));
79 }
80
81 slist->servers[j].server = server;
82 slist->nr_servers++;
83 }
84
85 if (slist->nr_servers == 0) {
86 ret = -EDESTADDRREQ;
87 goto error_2;
88 }
89
90 return slist;
91
92error_2:
93 afs_put_serverlist(cell->net, slist);
94error:
95 return ERR_PTR(ret);
96}
97
98/*
99 * Copy the annotations from an old server list to its potential replacement.
100 */
101bool afs_annotate_server_list(struct afs_server_list *new,
102 struct afs_server_list *old)
103{
104 struct afs_server *cur;
105 int i, j;
106
107 if (old->nr_servers != new->nr_servers)
108 goto changed;
109
110 for (i = 0; i < old->nr_servers; i++)
111 if (old->servers[i].server != new->servers[i].server)
112 goto changed;
113
114 return false;
115
116changed:
117 /* Maintain the same current server as before if possible. */
118 cur = old->servers[old->index].server;
119 for (j = 0; j < new->nr_servers; j++) {
120 if (new->servers[j].server == cur) {
121 new->index = j;
122 break;
123 }
124 }
125
126 /* Keep the old callback interest records where possible so that we
127 * maintain callback interception.
128 */
129 i = 0;
130 j = 0;
131 while (i < old->nr_servers && j < new->nr_servers) {
132 if (new->servers[j].server == old->servers[i].server) {
133 struct afs_cb_interest *cbi = old->servers[i].cb_interest;
134 if (cbi) {
135 new->servers[j].cb_interest = cbi;
136 refcount_inc(&cbi->usage);
137 }
138 i++;
139 j++;
140 continue;
141 }
142
143 if (new->servers[j].server < old->servers[i].server) {
144 j++;
145 continue;
146 }
147
148 i++;
149 continue;
150 }
151
152 return true;
153}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 689173c0a682..875b5eb02242 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -25,11 +25,10 @@
25#include <linux/statfs.h> 25#include <linux/statfs.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/nsproxy.h> 27#include <linux/nsproxy.h>
28#include <linux/magic.h>
28#include <net/net_namespace.h> 29#include <net/net_namespace.h>
29#include "internal.h" 30#include "internal.h"
30 31
31#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
32
33static void afs_i_init_once(void *foo); 32static void afs_i_init_once(void *foo);
34static struct dentry *afs_mount(struct file_system_type *fs_type, 33static struct dentry *afs_mount(struct file_system_type *fs_type,
35 int flags, const char *dev_name, void *data); 34 int flags, const char *dev_name, void *data);
@@ -143,9 +142,9 @@ void __exit afs_fs_exit(void)
143 */ 142 */
144static int afs_show_devname(struct seq_file *m, struct dentry *root) 143static int afs_show_devname(struct seq_file *m, struct dentry *root)
145{ 144{
146 struct afs_super_info *as = root->d_sb->s_fs_info; 145 struct afs_super_info *as = AFS_FS_S(root->d_sb);
147 struct afs_volume *volume = as->volume; 146 struct afs_volume *volume = as->volume;
148 struct afs_cell *cell = volume->cell; 147 struct afs_cell *cell = as->cell;
149 const char *suf = ""; 148 const char *suf = "";
150 char pref = '%'; 149 char pref = '%';
151 150
@@ -163,7 +162,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
163 break; 162 break;
164 } 163 }
165 164
166 seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->vlocation->vldb.name, suf); 165 seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf);
167 return 0; 166 return 0;
168} 167}
169 168
@@ -201,12 +200,14 @@ static int afs_parse_options(struct afs_mount_params *params,
201 token = match_token(p, afs_options_list, args); 200 token = match_token(p, afs_options_list, args);
202 switch (token) { 201 switch (token) {
203 case afs_opt_cell: 202 case afs_opt_cell:
204 cell = afs_cell_lookup(args[0].from, 203 rcu_read_lock();
205 args[0].to - args[0].from, 204 cell = afs_lookup_cell_rcu(params->net,
206 false); 205 args[0].from,
206 args[0].to - args[0].from);
207 rcu_read_unlock();
207 if (IS_ERR(cell)) 208 if (IS_ERR(cell))
208 return PTR_ERR(cell); 209 return PTR_ERR(cell);
209 afs_put_cell(params->cell); 210 afs_put_cell(params->net, params->cell);
210 params->cell = cell; 211 params->cell = cell;
211 break; 212 break;
212 213
@@ -308,13 +309,14 @@ static int afs_parse_device_name(struct afs_mount_params *params,
308 309
309 /* lookup the cell record */ 310 /* lookup the cell record */
310 if (cellname || !params->cell) { 311 if (cellname || !params->cell) {
311 cell = afs_cell_lookup(cellname, cellnamesz, true); 312 cell = afs_lookup_cell(params->net, cellname, cellnamesz,
313 NULL, false);
312 if (IS_ERR(cell)) { 314 if (IS_ERR(cell)) {
313 printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", 315 printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n",
314 cellnamesz, cellnamesz, cellname ?: ""); 316 cellnamesz, cellnamesz, cellname ?: "");
315 return PTR_ERR(cell); 317 return PTR_ERR(cell);
316 } 318 }
317 afs_put_cell(params->cell); 319 afs_put_cell(params->net, params->cell);
318 params->cell = cell; 320 params->cell = cell;
319 } 321 }
320 322
@@ -332,14 +334,16 @@ static int afs_parse_device_name(struct afs_mount_params *params,
332static int afs_test_super(struct super_block *sb, void *data) 334static int afs_test_super(struct super_block *sb, void *data)
333{ 335{
334 struct afs_super_info *as1 = data; 336 struct afs_super_info *as1 = data;
335 struct afs_super_info *as = sb->s_fs_info; 337 struct afs_super_info *as = AFS_FS_S(sb);
336 338
337 return as->volume == as1->volume; 339 return as->net == as1->net && as->volume->vid == as1->volume->vid;
338} 340}
339 341
340static int afs_set_super(struct super_block *sb, void *data) 342static int afs_set_super(struct super_block *sb, void *data)
341{ 343{
342 sb->s_fs_info = data; 344 struct afs_super_info *as = data;
345
346 sb->s_fs_info = as;
343 return set_anon_super(sb, NULL); 347 return set_anon_super(sb, NULL);
344} 348}
345 349
@@ -349,7 +353,7 @@ static int afs_set_super(struct super_block *sb, void *data)
349static int afs_fill_super(struct super_block *sb, 353static int afs_fill_super(struct super_block *sb,
350 struct afs_mount_params *params) 354 struct afs_mount_params *params)
351{ 355{
352 struct afs_super_info *as = sb->s_fs_info; 356 struct afs_super_info *as = AFS_FS_S(sb);
353 struct afs_fid fid; 357 struct afs_fid fid;
354 struct inode *inode = NULL; 358 struct inode *inode = NULL;
355 int ret; 359 int ret;
@@ -366,13 +370,15 @@ static int afs_fill_super(struct super_block *sb,
366 if (ret) 370 if (ret)
367 return ret; 371 return ret;
368 sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; 372 sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
369 strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); 373 sprintf(sb->s_id, "%u", as->volume->vid);
374
375 afs_activate_volume(as->volume);
370 376
371 /* allocate the root inode and dentry */ 377 /* allocate the root inode and dentry */
372 fid.vid = as->volume->vid; 378 fid.vid = as->volume->vid;
373 fid.vnode = 1; 379 fid.vnode = 1;
374 fid.unique = 1; 380 fid.unique = 1;
375 inode = afs_iget(sb, params->key, &fid, NULL, NULL); 381 inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
376 if (IS_ERR(inode)) 382 if (IS_ERR(inode))
377 return PTR_ERR(inode); 383 return PTR_ERR(inode);
378 384
@@ -394,23 +400,45 @@ error:
394 return ret; 400 return ret;
395} 401}
396 402
403static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params)
404{
405 struct afs_super_info *as;
406
407 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
408 if (as) {
409 as->net = afs_get_net(params->net);
410 as->cell = afs_get_cell(params->cell);
411 }
412 return as;
413}
414
415static void afs_destroy_sbi(struct afs_super_info *as)
416{
417 if (as) {
418 afs_put_volume(as->cell, as->volume);
419 afs_put_cell(as->net, as->cell);
420 afs_put_net(as->net);
421 kfree(as);
422 }
423}
424
397/* 425/*
398 * get an AFS superblock 426 * get an AFS superblock
399 */ 427 */
400static struct dentry *afs_mount(struct file_system_type *fs_type, 428static struct dentry *afs_mount(struct file_system_type *fs_type,
401 int flags, const char *dev_name, void *options) 429 int flags, const char *dev_name, void *options)
402{ 430{
403 struct afs_mount_params params; 431 struct afs_mount_params params;
404 struct super_block *sb; 432 struct super_block *sb;
405 struct afs_volume *vol; 433 struct afs_volume *candidate;
406 struct key *key; 434 struct key *key;
407 char *new_opts = kstrdup(options, GFP_KERNEL);
408 struct afs_super_info *as; 435 struct afs_super_info *as;
409 int ret; 436 int ret;
410 437
411 _enter(",,%s,%p", dev_name, options); 438 _enter(",,%s,%p", dev_name, options);
412 439
413 memset(&params, 0, sizeof(params)); 440 memset(&params, 0, sizeof(params));
441 params.net = &__afs_net;
414 442
415 ret = -EINVAL; 443 ret = -EINVAL;
416 if (current->nsproxy->net_ns != &init_net) 444 if (current->nsproxy->net_ns != &init_net)
@@ -436,66 +464,75 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
436 } 464 }
437 params.key = key; 465 params.key = key;
438 466
439 /* parse the device name */
440 vol = afs_volume_lookup(&params);
441 if (IS_ERR(vol)) {
442 ret = PTR_ERR(vol);
443 goto error;
444 }
445
446 /* allocate a superblock info record */ 467 /* allocate a superblock info record */
447 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); 468 ret = -ENOMEM;
448 if (!as) { 469 as = afs_alloc_sbi(&params);
449 ret = -ENOMEM; 470 if (!as)
450 afs_put_volume(vol); 471 goto error_key;
451 goto error; 472
473 /* Assume we're going to need a volume record; at the very least we can
474 * use it to update the volume record if we have one already. This
475 * checks that the volume exists within the cell.
476 */
477 candidate = afs_create_volume(&params);
478 if (IS_ERR(candidate)) {
479 ret = PTR_ERR(candidate);
480 goto error_as;
452 } 481 }
453 as->volume = vol; 482
483 as->volume = candidate;
454 484
455 /* allocate a deviceless superblock */ 485 /* allocate a deviceless superblock */
456 sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); 486 sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
457 if (IS_ERR(sb)) { 487 if (IS_ERR(sb)) {
458 ret = PTR_ERR(sb); 488 ret = PTR_ERR(sb);
459 afs_put_volume(vol); 489 goto error_as;
460 kfree(as);
461 goto error;
462 } 490 }
463 491
464 if (!sb->s_root) { 492 if (!sb->s_root) {
465 /* initial superblock/root creation */ 493 /* initial superblock/root creation */
466 _debug("create"); 494 _debug("create");
467 ret = afs_fill_super(sb, &params); 495 ret = afs_fill_super(sb, &params);
468 if (ret < 0) { 496 if (ret < 0)
469 deactivate_locked_super(sb); 497 goto error_sb;
470 goto error; 498 as = NULL;
471 }
472 sb->s_flags |= MS_ACTIVE; 499 sb->s_flags |= MS_ACTIVE;
473 } else { 500 } else {
474 _debug("reuse"); 501 _debug("reuse");
475 ASSERTCMP(sb->s_flags, &, MS_ACTIVE); 502 ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
476 afs_put_volume(vol); 503 afs_destroy_sbi(as);
477 kfree(as); 504 as = NULL;
478 } 505 }
479 506
480 afs_put_cell(params.cell); 507 afs_put_cell(params.net, params.cell);
481 kfree(new_opts); 508 key_put(params.key);
482 _leave(" = 0 [%p]", sb); 509 _leave(" = 0 [%p]", sb);
483 return dget(sb->s_root); 510 return dget(sb->s_root);
484 511
485error: 512error_sb:
486 afs_put_cell(params.cell); 513 deactivate_locked_super(sb);
514 goto error_key;
515error_as:
516 afs_destroy_sbi(as);
517error_key:
487 key_put(params.key); 518 key_put(params.key);
488 kfree(new_opts); 519error:
520 afs_put_cell(params.net, params.cell);
489 _leave(" = %d", ret); 521 _leave(" = %d", ret);
490 return ERR_PTR(ret); 522 return ERR_PTR(ret);
491} 523}
492 524
493static void afs_kill_super(struct super_block *sb) 525static void afs_kill_super(struct super_block *sb)
494{ 526{
495 struct afs_super_info *as = sb->s_fs_info; 527 struct afs_super_info *as = AFS_FS_S(sb);
528
529 /* Clear the callback interests (which will do ilookup5) before
530 * deactivating the superblock.
531 */
532 afs_clear_callback_interests(as->net, as->volume->servers);
496 kill_anon_super(sb); 533 kill_anon_super(sb);
497 afs_put_volume(as->volume); 534 afs_deactivate_volume(as->volume);
498 kfree(as); 535 afs_destroy_sbi(as);
499} 536}
500 537
501/* 538/*
@@ -507,16 +544,15 @@ static void afs_i_init_once(void *_vnode)
507 544
508 memset(vnode, 0, sizeof(*vnode)); 545 memset(vnode, 0, sizeof(*vnode));
509 inode_init_once(&vnode->vfs_inode); 546 inode_init_once(&vnode->vfs_inode);
510 init_waitqueue_head(&vnode->update_waitq); 547 mutex_init(&vnode->io_lock);
511 mutex_init(&vnode->permits_lock);
512 mutex_init(&vnode->validate_lock); 548 mutex_init(&vnode->validate_lock);
513 spin_lock_init(&vnode->writeback_lock); 549 spin_lock_init(&vnode->wb_lock);
514 spin_lock_init(&vnode->lock); 550 spin_lock_init(&vnode->lock);
515 INIT_LIST_HEAD(&vnode->writebacks); 551 INIT_LIST_HEAD(&vnode->wb_keys);
516 INIT_LIST_HEAD(&vnode->pending_locks); 552 INIT_LIST_HEAD(&vnode->pending_locks);
517 INIT_LIST_HEAD(&vnode->granted_locks); 553 INIT_LIST_HEAD(&vnode->granted_locks);
518 INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); 554 INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
519 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); 555 seqlock_init(&vnode->cb_lock);
520} 556}
521 557
522/* 558/*
@@ -536,9 +572,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
536 memset(&vnode->status, 0, sizeof(vnode->status)); 572 memset(&vnode->status, 0, sizeof(vnode->status));
537 573
538 vnode->volume = NULL; 574 vnode->volume = NULL;
539 vnode->update_cnt = 0;
540 vnode->flags = 1 << AFS_VNODE_UNSET; 575 vnode->flags = 1 << AFS_VNODE_UNSET;
541 vnode->cb_promised = false;
542 576
543 _leave(" = %p", &vnode->vfs_inode); 577 _leave(" = %p", &vnode->vfs_inode);
544 return &vnode->vfs_inode; 578 return &vnode->vfs_inode;
@@ -562,7 +596,7 @@ static void afs_destroy_inode(struct inode *inode)
562 596
563 _debug("DESTROY INODE %p", inode); 597 _debug("DESTROY INODE %p", inode);
564 598
565 ASSERTCMP(vnode->server, ==, NULL); 599 ASSERTCMP(vnode->cb_interest, ==, NULL);
566 600
567 call_rcu(&inode->i_rcu, afs_i_callback); 601 call_rcu(&inode->i_rcu, afs_i_callback);
568 atomic_dec(&afs_count_active_inodes); 602 atomic_dec(&afs_count_active_inodes);
@@ -573,6 +607,7 @@ static void afs_destroy_inode(struct inode *inode)
573 */ 607 */
574static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) 608static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
575{ 609{
610 struct afs_fs_cursor fc;
576 struct afs_volume_status vs; 611 struct afs_volume_status vs;
577 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); 612 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
578 struct key *key; 613 struct key *key;
@@ -582,21 +617,32 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
582 if (IS_ERR(key)) 617 if (IS_ERR(key))
583 return PTR_ERR(key); 618 return PTR_ERR(key);
584 619
585 ret = afs_vnode_get_volume_status(vnode, key, &vs); 620 ret = -ERESTARTSYS;
586 key_put(key); 621 if (afs_begin_vnode_operation(&fc, vnode, key)) {
587 if (ret < 0) { 622 fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
588 _leave(" = %d", ret); 623 while (afs_select_fileserver(&fc)) {
589 return ret; 624 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
625 afs_fs_get_volume_status(&fc, &vs);
626 }
627
628 afs_check_for_remote_deletion(&fc, fc.vnode);
629 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
630 ret = afs_end_vnode_operation(&fc);
590 } 631 }
591 632
592 buf->f_type = dentry->d_sb->s_magic; 633 key_put(key);
593 buf->f_bsize = AFS_BLOCK_SIZE;
594 buf->f_namelen = AFSNAMEMAX - 1;
595 634
596 if (vs.max_quota == 0) 635 if (ret == 0) {
597 buf->f_blocks = vs.part_max_blocks; 636 buf->f_type = dentry->d_sb->s_magic;
598 else 637 buf->f_bsize = AFS_BLOCK_SIZE;
599 buf->f_blocks = vs.max_quota; 638 buf->f_namelen = AFSNAMEMAX - 1;
600 buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; 639
601 return 0; 640 if (vs.max_quota == 0)
641 buf->f_blocks = vs.part_max_blocks;
642 else
643 buf->f_blocks = vs.max_quota;
644 buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use;
645 }
646
647 return ret;
602} 648}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index a5e4cc561b6c..e372f89fd36a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -12,58 +12,19 @@
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include "afs_fs.h"
15#include "internal.h" 16#include "internal.h"
16 17
17/* 18/*
18 * map volume locator abort codes to error codes 19 * Deliver reply data to a VL.GetEntryByNameU call.
19 */ 20 */
20static int afs_vl_abort_to_error(u32 abort_code) 21static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
21{ 22{
22 _enter("%u", abort_code); 23 struct afs_uvldbentry__xdr *uvldb;
23 24 struct afs_vldb_entry *entry;
24 switch (abort_code) { 25 bool new_only = false;
25 case AFSVL_IDEXIST: return -EEXIST;
26 case AFSVL_IO: return -EREMOTEIO;
27 case AFSVL_NAMEEXIST: return -EEXIST;
28 case AFSVL_CREATEFAIL: return -EREMOTEIO;
29 case AFSVL_NOENT: return -ENOMEDIUM;
30 case AFSVL_EMPTY: return -ENOMEDIUM;
31 case AFSVL_ENTDELETED: return -ENOMEDIUM;
32 case AFSVL_BADNAME: return -EINVAL;
33 case AFSVL_BADINDEX: return -EINVAL;
34 case AFSVL_BADVOLTYPE: return -EINVAL;
35 case AFSVL_BADSERVER: return -EINVAL;
36 case AFSVL_BADPARTITION: return -EINVAL;
37 case AFSVL_REPSFULL: return -EFBIG;
38 case AFSVL_NOREPSERVER: return -ENOENT;
39 case AFSVL_DUPREPSERVER: return -EEXIST;
40 case AFSVL_RWNOTFOUND: return -ENOENT;
41 case AFSVL_BADREFCOUNT: return -EINVAL;
42 case AFSVL_SIZEEXCEEDED: return -EINVAL;
43 case AFSVL_BADENTRY: return -EINVAL;
44 case AFSVL_BADVOLIDBUMP: return -EINVAL;
45 case AFSVL_IDALREADYHASHED: return -EINVAL;
46 case AFSVL_ENTRYLOCKED: return -EBUSY;
47 case AFSVL_BADVOLOPER: return -EBADRQC;
48 case AFSVL_BADRELLOCKTYPE: return -EINVAL;
49 case AFSVL_RERELEASE: return -EREMOTEIO;
50 case AFSVL_BADSERVERFLAG: return -EINVAL;
51 case AFSVL_PERM: return -EACCES;
52 case AFSVL_NOMEM: return -EREMOTEIO;
53 default:
54 return afs_abort_to_error(abort_code);
55 }
56}
57
58/*
59 * deliver reply data to a VL.GetEntryByXXX call
60 */
61static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
62{
63 struct afs_cache_vlocation *entry;
64 __be32 *bp;
65 u32 tmp; 26 u32 tmp;
66 int loop, ret; 27 int i, ret;
67 28
68 _enter(""); 29 _enter("");
69 30
@@ -72,144 +33,613 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
72 return ret; 33 return ret;
73 34
74 /* unmarshall the reply once we've received all of it */ 35 /* unmarshall the reply once we've received all of it */
75 entry = call->reply; 36 uvldb = call->buffer;
76 bp = call->buffer; 37 entry = call->reply[0];
77
78 for (loop = 0; loop < 64; loop++)
79 entry->name[loop] = ntohl(*bp++);
80 entry->name[loop] = 0;
81 bp++; /* final NUL */
82 38
83 bp++; /* type */ 39 for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++)
84 entry->nservers = ntohl(*bp++); 40 entry->name[i] = (u8)ntohl(uvldb->name[i]);
41 entry->name[i] = 0;
42 entry->name_len = strlen(entry->name);
85 43
86 for (loop = 0; loop < 8; loop++) 44 /* If there is a new replication site that we can use, ignore all the
87 entry->servers[loop].s_addr = *bp++; 45 * sites that aren't marked as new.
46 */
47 for (i = 0; i < AFS_NMAXNSERVERS; i++) {
48 tmp = ntohl(uvldb->serverFlags[i]);
49 if (!(tmp & AFS_VLSF_DONTUSE) &&
50 (tmp & AFS_VLSF_NEWREPSITE))
51 new_only = true;
52 }
88 53
89 bp += 8; /* partition IDs */ 54 for (i = 0; i < AFS_NMAXNSERVERS; i++) {
55 struct afs_uuid__xdr *xdr;
56 struct afs_uuid *uuid;
57 int j;
90 58
91 for (loop = 0; loop < 8; loop++) { 59 tmp = ntohl(uvldb->serverFlags[i]);
92 tmp = ntohl(*bp++); 60 if (tmp & AFS_VLSF_DONTUSE ||
93 entry->srvtmask[loop] = 0; 61 (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
62 continue;
94 if (tmp & AFS_VLSF_RWVOL) 63 if (tmp & AFS_VLSF_RWVOL)
95 entry->srvtmask[loop] |= AFS_VOL_VTM_RW; 64 entry->fs_mask[i] |= AFS_VOL_VTM_RW;
96 if (tmp & AFS_VLSF_ROVOL) 65 if (tmp & AFS_VLSF_ROVOL)
97 entry->srvtmask[loop] |= AFS_VOL_VTM_RO; 66 entry->fs_mask[i] |= AFS_VOL_VTM_RO;
98 if (tmp & AFS_VLSF_BACKVOL) 67 if (tmp & AFS_VLSF_BACKVOL)
99 entry->srvtmask[loop] |= AFS_VOL_VTM_BAK; 68 entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
100 } 69 if (!entry->fs_mask[i])
70 continue;
101 71
102 entry->vid[0] = ntohl(*bp++); 72 xdr = &uvldb->serverNumber[i];
103 entry->vid[1] = ntohl(*bp++); 73 uuid = (struct afs_uuid *)&entry->fs_server[i];
104 entry->vid[2] = ntohl(*bp++); 74 uuid->time_low = xdr->time_low;
75 uuid->time_mid = htons(ntohl(xdr->time_mid));
76 uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version));
77 uuid->clock_seq_hi_and_reserved = (u8)ntohl(xdr->clock_seq_hi_and_reserved);
78 uuid->clock_seq_low = (u8)ntohl(xdr->clock_seq_low);
79 for (j = 0; j < 6; j++)
80 uuid->node[j] = (u8)ntohl(xdr->node[j]);
105 81
106 bp++; /* clone ID */ 82 entry->nr_servers++;
83 }
84
85 for (i = 0; i < AFS_MAXTYPES; i++)
86 entry->vid[i] = ntohl(uvldb->volumeId[i]);
107 87
108 tmp = ntohl(*bp++); /* flags */ 88 tmp = ntohl(uvldb->flags);
109 entry->vidmask = 0;
110 if (tmp & AFS_VLF_RWEXISTS) 89 if (tmp & AFS_VLF_RWEXISTS)
111 entry->vidmask |= AFS_VOL_VTM_RW; 90 __set_bit(AFS_VLDB_HAS_RW, &entry->flags);
112 if (tmp & AFS_VLF_ROEXISTS) 91 if (tmp & AFS_VLF_ROEXISTS)
113 entry->vidmask |= AFS_VOL_VTM_RO; 92 __set_bit(AFS_VLDB_HAS_RO, &entry->flags);
114 if (tmp & AFS_VLF_BACKEXISTS) 93 if (tmp & AFS_VLF_BACKEXISTS)
115 entry->vidmask |= AFS_VOL_VTM_BAK; 94 __set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
116 if (!entry->vidmask)
117 return -EBADMSG;
118 95
96 if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
97 entry->error = -ENOMEDIUM;
98 __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
99 }
100
101 __set_bit(AFS_VLDB_QUERY_VALID, &entry->flags);
119 _leave(" = 0 [done]"); 102 _leave(" = 0 [done]");
120 return 0; 103 return 0;
121} 104}
122 105
123/* 106static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
124 * VL.GetEntryByName operation type 107{
125 */ 108 kfree(call->reply[0]);
126static const struct afs_call_type afs_RXVLGetEntryByName = { 109 afs_flat_call_destructor(call);
127 .name = "VL.GetEntryByName", 110}
128 .deliver = afs_deliver_vl_get_entry_by_xxx,
129 .abort_to_error = afs_vl_abort_to_error,
130 .destructor = afs_flat_call_destructor,
131};
132 111
133/* 112/*
134 * VL.GetEntryById operation type 113 * VL.GetEntryByNameU operation type.
135 */ 114 */
136static const struct afs_call_type afs_RXVLGetEntryById = { 115static const struct afs_call_type afs_RXVLGetEntryByNameU = {
137 .name = "VL.GetEntryById", 116 .name = "VL.GetEntryByNameU",
138 .deliver = afs_deliver_vl_get_entry_by_xxx, 117 .op = afs_VL_GetEntryByNameU,
139 .abort_to_error = afs_vl_abort_to_error, 118 .deliver = afs_deliver_vl_get_entry_by_name_u,
140 .destructor = afs_flat_call_destructor, 119 .destructor = afs_destroy_vl_get_entry_by_name_u,
141}; 120};
142 121
143/* 122/*
144 * dispatch a get volume entry by name operation 123 * Dispatch a get volume entry by name or ID operation (uuid variant). If the
124 * volname is a decimal number then it's a volume ID not a volume name.
145 */ 125 */
146int afs_vl_get_entry_by_name(struct in_addr *addr, 126struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
147 struct key *key, 127 struct afs_addr_cursor *ac,
148 const char *volname, 128 struct key *key,
149 struct afs_cache_vlocation *entry, 129 const char *volname,
150 bool async) 130 int volnamesz)
151{ 131{
132 struct afs_vldb_entry *entry;
152 struct afs_call *call; 133 struct afs_call *call;
153 size_t volnamesz, reqsz, padsz; 134 size_t reqsz, padsz;
154 __be32 *bp; 135 __be32 *bp;
155 136
156 _enter(""); 137 _enter("");
157 138
158 volnamesz = strlen(volname);
159 padsz = (4 - (volnamesz & 3)) & 3; 139 padsz = (4 - (volnamesz & 3)) & 3;
160 reqsz = 8 + volnamesz + padsz; 140 reqsz = 8 + volnamesz + padsz;
161 141
162 call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384); 142 entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL);
163 if (!call) 143 if (!entry)
164 return -ENOMEM; 144 return ERR_PTR(-ENOMEM);
145
146 call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz,
147 sizeof(struct afs_uvldbentry__xdr));
148 if (!call) {
149 kfree(entry);
150 return ERR_PTR(-ENOMEM);
151 }
165 152
166 call->key = key; 153 call->key = key;
167 call->reply = entry; 154 call->reply[0] = entry;
168 call->service_id = VL_SERVICE; 155 call->ret_reply0 = true;
169 call->port = htons(AFS_VL_PORT);
170 156
171 /* marshall the parameters */ 157 /* Marshall the parameters */
172 bp = call->request; 158 bp = call->request;
173 *bp++ = htonl(VLGETENTRYBYNAME); 159 *bp++ = htonl(VLGETENTRYBYNAMEU);
174 *bp++ = htonl(volnamesz); 160 *bp++ = htonl(volnamesz);
175 memcpy(bp, volname, volnamesz); 161 memcpy(bp, volname, volnamesz);
176 if (padsz > 0) 162 if (padsz > 0)
177 memset((void *) bp + volnamesz, 0, padsz); 163 memset((void *)bp + volnamesz, 0, padsz);
178 164
179 /* initiate the call */ 165 trace_afs_make_vl_call(call);
180 return afs_make_call(addr, call, GFP_KERNEL, async); 166 return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false);
181} 167}
182 168
183/* 169/*
184 * dispatch a get volume entry by ID operation 170 * Deliver reply data to a VL.GetAddrsU call.
171 *
172 * GetAddrsU(IN ListAddrByAttributes *inaddr,
173 * OUT afsUUID *uuidp1,
174 * OUT uint32_t *uniquifier,
175 * OUT uint32_t *nentries,
176 * OUT bulkaddrs *blkaddrs);
185 */ 177 */
186int afs_vl_get_entry_by_id(struct in_addr *addr, 178static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
187 struct key *key,
188 afs_volid_t volid,
189 afs_voltype_t voltype,
190 struct afs_cache_vlocation *entry,
191 bool async)
192{ 179{
180 struct afs_addr_list *alist;
181 __be32 *bp;
182 u32 uniquifier, nentries, count;
183 int i, ret;
184
185 _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
186
187again:
188 switch (call->unmarshall) {
189 case 0:
190 call->offset = 0;
191 call->unmarshall++;
192
193 /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */
194 case 1:
195 ret = afs_extract_data(call, call->buffer,
196 sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32),
197 true);
198 if (ret < 0)
199 return ret;
200
201 bp = call->buffer + sizeof(struct afs_uuid__xdr);
202 uniquifier = ntohl(*bp++);
203 nentries = ntohl(*bp++);
204 count = ntohl(*bp);
205
206 nentries = min(nentries, count);
207 alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT);
208 if (!alist)
209 return -ENOMEM;
210 alist->version = uniquifier;
211 call->reply[0] = alist;
212 call->count = count;
213 call->count2 = nentries;
214 call->offset = 0;
215 call->unmarshall++;
216
217 /* Extract entries */
218 case 2:
219 count = min(call->count, 4U);
220 ret = afs_extract_data(call, call->buffer,
221 count * sizeof(__be32),
222 call->count > 4);
223 if (ret < 0)
224 return ret;
225
226 alist = call->reply[0];
227 bp = call->buffer;
228 for (i = 0; i < count; i++)
229 if (alist->nr_addrs < call->count2)
230 afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
231
232 call->count -= count;
233 if (call->count > 0)
234 goto again;
235 call->offset = 0;
236 call->unmarshall++;
237 break;
238 }
239
240 _leave(" = 0 [done]");
241 return 0;
242}
243
244static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
245{
246 afs_put_server(call->net, (struct afs_server *)call->reply[0]);
247 kfree(call->reply[1]);
248 return afs_flat_call_destructor(call);
249}
250
251/*
252 * VL.GetAddrsU operation type.
253 */
254static const struct afs_call_type afs_RXVLGetAddrsU = {
255 .name = "VL.GetAddrsU",
256 .op = afs_VL_GetAddrsU,
257 .deliver = afs_deliver_vl_get_addrs_u,
258 .destructor = afs_vl_get_addrs_u_destructor,
259};
260
261/*
262 * Dispatch an operation to get the addresses for a server, where the server is
263 * nominated by UUID.
264 */
265struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
266 struct afs_addr_cursor *ac,
267 struct key *key,
268 const uuid_t *uuid)
269{
270 struct afs_ListAddrByAttributes__xdr *r;
271 const struct afs_uuid *u = (const struct afs_uuid *)uuid;
193 struct afs_call *call; 272 struct afs_call *call;
194 __be32 *bp; 273 __be32 *bp;
274 int i;
195 275
196 _enter(""); 276 _enter("");
197 277
198 call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384); 278 call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU,
279 sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr),
280 sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
281 if (!call)
282 return ERR_PTR(-ENOMEM);
283
284 call->key = key;
285 call->reply[0] = NULL;
286 call->ret_reply0 = true;
287
288 /* Marshall the parameters */
289 bp = call->request;
290 *bp++ = htonl(VLGETADDRSU);
291 r = (struct afs_ListAddrByAttributes__xdr *)bp;
292 r->Mask = htonl(AFS_VLADDR_UUID);
293 r->ipaddr = 0;
294 r->index = 0;
295 r->spare = 0;
296 r->uuid.time_low = u->time_low;
297 r->uuid.time_mid = htonl(ntohs(u->time_mid));
298 r->uuid.time_hi_and_version = htonl(ntohs(u->time_hi_and_version));
299 r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved);
300 r->uuid.clock_seq_low = htonl(u->clock_seq_low);
301 for (i = 0; i < 6; i++)
302 r->uuid.node[i] = ntohl(u->node[i]);
303
304 trace_afs_make_vl_call(call);
305 return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
306}
307
308/*
309 * Deliver reply data to an VL.GetCapabilities operation.
310 */
311static int afs_deliver_vl_get_capabilities(struct afs_call *call)
312{
313 u32 count;
314 int ret;
315
316 _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
317
318again:
319 switch (call->unmarshall) {
320 case 0:
321 call->offset = 0;
322 call->unmarshall++;
323
324 /* Extract the capabilities word count */
325 case 1:
326 ret = afs_extract_data(call, &call->tmp,
327 1 * sizeof(__be32),
328 true);
329 if (ret < 0)
330 return ret;
331
332 count = ntohl(call->tmp);
333
334 call->count = count;
335 call->count2 = count;
336 call->offset = 0;
337 call->unmarshall++;
338
339 /* Extract capabilities words */
340 case 2:
341 count = min(call->count, 16U);
342 ret = afs_extract_data(call, call->buffer,
343 count * sizeof(__be32),
344 call->count > 16);
345 if (ret < 0)
346 return ret;
347
348 /* TODO: Examine capabilities */
349
350 call->count -= count;
351 if (call->count > 0)
352 goto again;
353 call->offset = 0;
354 call->unmarshall++;
355 break;
356 }
357
358 call->reply[0] = (void *)(unsigned long)call->service_id;
359
360 _leave(" = 0 [done]");
361 return 0;
362}
363
364/*
365 * VL.GetCapabilities operation type
366 */
367static const struct afs_call_type afs_RXVLGetCapabilities = {
368 .name = "VL.GetCapabilities",
369 .op = afs_VL_GetCapabilities,
370 .deliver = afs_deliver_vl_get_capabilities,
371 .destructor = afs_flat_call_destructor,
372};
373
374/*
375 * Probe a fileserver for the capabilities that it supports. This can
376 * return up to 196 words.
377 *
378 * We use this to probe for service upgrade to determine what the server at the
379 * other end supports.
380 */
381int afs_vl_get_capabilities(struct afs_net *net,
382 struct afs_addr_cursor *ac,
383 struct key *key)
384{
385 struct afs_call *call;
386 __be32 *bp;
387
388 _enter("");
389
390 call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
199 if (!call) 391 if (!call)
200 return -ENOMEM; 392 return -ENOMEM;
201 393
202 call->key = key; 394 call->key = key;
203 call->reply = entry; 395 call->upgrade = true; /* Let's see if this is a YFS server */
204 call->service_id = VL_SERVICE; 396 call->reply[0] = (void *)VLGETCAPABILITIES;
205 call->port = htons(AFS_VL_PORT); 397 call->ret_reply0 = true;
206 398
207 /* marshall the parameters */ 399 /* marshall the parameters */
208 bp = call->request; 400 bp = call->request;
209 *bp++ = htonl(VLGETENTRYBYID); 401 *bp++ = htonl(VLGETCAPABILITIES);
210 *bp++ = htonl(volid); 402
211 *bp = htonl(voltype); 403 /* Can't take a ref on server */
404 trace_afs_make_vl_call(call);
405 return afs_make_call(ac, call, GFP_KERNEL, false);
406}
407
408/*
409 * Deliver reply data to a YFSVL.GetEndpoints call.
410 *
411 * GetEndpoints(IN yfsServerAttributes *attr,
412 * OUT opr_uuid *uuid,
413 * OUT afs_int32 *uniquifier,
414 * OUT endpoints *fsEndpoints,
415 * OUT endpoints *volEndpoints)
416 */
417static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
418{
419 struct afs_addr_list *alist;
420 __be32 *bp;
421 u32 uniquifier, size;
422 int ret;
423
424 _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
425
426again:
427 switch (call->unmarshall) {
428 case 0:
429 call->offset = 0;
430 call->unmarshall = 1;
431
432 /* Extract the returned uuid, uniquifier, fsEndpoints count and
433 * either the first fsEndpoint type or the volEndpoints
434 * count if there are no fsEndpoints. */
435 case 1:
436 ret = afs_extract_data(call, call->buffer,
437 sizeof(uuid_t) +
438 3 * sizeof(__be32),
439 true);
440 if (ret < 0)
441 return ret;
442
443 bp = call->buffer + sizeof(uuid_t);
444 uniquifier = ntohl(*bp++);
445 call->count = ntohl(*bp++);
446 call->count2 = ntohl(*bp); /* Type or next count */
447
448 if (call->count > YFS_MAXENDPOINTS)
449 return -EBADMSG;
450
451 alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
452 if (!alist)
453 return -ENOMEM;
454 alist->version = uniquifier;
455 call->reply[0] = alist;
456 call->offset = 0;
457
458 if (call->count == 0)
459 goto extract_volendpoints;
460
461 call->unmarshall = 2;
462
463 /* Extract fsEndpoints[] entries */
464 case 2:
465 switch (call->count2) {
466 case YFS_ENDPOINT_IPV4:
467 size = sizeof(__be32) * (1 + 1 + 1);
468 break;
469 case YFS_ENDPOINT_IPV6:
470 size = sizeof(__be32) * (1 + 4 + 1);
471 break;
472 default:
473 return -EBADMSG;
474 }
475
476 size += sizeof(__be32);
477 ret = afs_extract_data(call, call->buffer, size, true);
478 if (ret < 0)
479 return ret;
480
481 alist = call->reply[0];
482 bp = call->buffer;
483 switch (call->count2) {
484 case YFS_ENDPOINT_IPV4:
485 if (ntohl(bp[0]) != sizeof(__be32) * 2)
486 return -EBADMSG;
487 afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
488 bp += 3;
489 break;
490 case YFS_ENDPOINT_IPV6:
491 if (ntohl(bp[0]) != sizeof(__be32) * 5)
492 return -EBADMSG;
493 afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
494 bp += 6;
495 break;
496 default:
497 return -EBADMSG;
498 }
499
500 /* Got either the type of the next entry or the count of
501 * volEndpoints if no more fsEndpoints.
502 */
503 call->count2 = htonl(*bp++);
504
505 call->offset = 0;
506 call->count--;
507 if (call->count > 0)
508 goto again;
509
510 extract_volendpoints:
511 /* Extract the list of volEndpoints. */
512 call->count = call->count2;
513 if (!call->count)
514 goto end;
515 if (call->count > YFS_MAXENDPOINTS)
516 return -EBADMSG;
517
518 call->unmarshall = 3;
519
520 /* Extract the type of volEndpoints[0]. Normally we would
521 * extract the type of the next endpoint when we extract the
522 * data of the current one, but this is the first...
523 */
524 case 3:
525 ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
526 if (ret < 0)
527 return ret;
528
529 bp = call->buffer;
530 call->count2 = htonl(*bp++);
531 call->offset = 0;
532 call->unmarshall = 4;
533
534 /* Extract volEndpoints[] entries */
535 case 4:
536 switch (call->count2) {
537 case YFS_ENDPOINT_IPV4:
538 size = sizeof(__be32) * (1 + 1 + 1);
539 break;
540 case YFS_ENDPOINT_IPV6:
541 size = sizeof(__be32) * (1 + 4 + 1);
542 break;
543 default:
544 return -EBADMSG;
545 }
546
547 if (call->count > 1)
548 size += sizeof(__be32);
549 ret = afs_extract_data(call, call->buffer, size, true);
550 if (ret < 0)
551 return ret;
552
553 bp = call->buffer;
554 switch (call->count2) {
555 case YFS_ENDPOINT_IPV4:
556 if (ntohl(bp[0]) != sizeof(__be32) * 2)
557 return -EBADMSG;
558 bp += 3;
559 break;
560 case YFS_ENDPOINT_IPV6:
561 if (ntohl(bp[0]) != sizeof(__be32) * 5)
562 return -EBADMSG;
563 bp += 6;
564 break;
565 default:
566 return -EBADMSG;
567 }
568
569 /* Got either the type of the next entry or the count of
570 * volEndpoints if no more fsEndpoints.
571 */
572 call->offset = 0;
573 call->count--;
574 if (call->count > 0) {
575 call->count2 = htonl(*bp++);
576 goto again;
577 }
578
579 end:
580 call->unmarshall = 5;
581
582 /* Done */
583 case 5:
584 ret = afs_extract_data(call, call->buffer, 0, false);
585 if (ret < 0)
586 return ret;
587 call->unmarshall = 6;
588
589 case 6:
590 break;
591 }
592
593 alist = call->reply[0];
594
595 /* Start with IPv6 if available. */
596 if (alist->nr_ipv4 < alist->nr_addrs)
597 alist->index = alist->nr_ipv4;
598
599 _leave(" = 0 [done]");
600 return 0;
601}
602
603/*
604 * YFSVL.GetEndpoints operation type.
605 */
606static const struct afs_call_type afs_YFSVLGetEndpoints = {
607 .name = "YFSVL.GetEndpoints",
608 .op = afs_YFSVL_GetEndpoints,
609 .deliver = afs_deliver_yfsvl_get_endpoints,
610 .destructor = afs_vl_get_addrs_u_destructor,
611};
612
613/*
614 * Dispatch an operation to get the addresses for a server, where the server is
615 * nominated by UUID.
616 */
617struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
618 struct afs_addr_cursor *ac,
619 struct key *key,
620 const uuid_t *uuid)
621{
622 struct afs_call *call;
623 __be32 *bp;
624
625 _enter("");
626
627 call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints,
628 sizeof(__be32) * 2 + sizeof(*uuid),
629 sizeof(struct in6_addr) + sizeof(__be32) * 3);
630 if (!call)
631 return ERR_PTR(-ENOMEM);
632
633 call->key = key;
634 call->reply[0] = NULL;
635 call->ret_reply0 = true;
636
637 /* Marshall the parameters */
638 bp = call->request;
639 *bp++ = htonl(YVLGETENDPOINTS);
640 *bp++ = htonl(YFS_SERVER_UUID);
641 memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
212 642
213 /* initiate the call */ 643 trace_afs_make_vl_call(call);
214 return afs_make_call(addr, call, GFP_KERNEL, async); 644 return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
215} 645}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
deleted file mode 100644
index 37b7c3b342a6..000000000000
--- a/fs/afs/vlocation.c
+++ /dev/null
@@ -1,720 +0,0 @@
1/* AFS volume location management
2 *
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/init.h>
16#include <linux/sched.h>
17#include "internal.h"
18
19static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
20static unsigned afs_vlocation_update_timeout = 10 * 60;
21
22static void afs_vlocation_reaper(struct work_struct *);
23static void afs_vlocation_updater(struct work_struct *);
24
25static LIST_HEAD(afs_vlocation_updates);
26static LIST_HEAD(afs_vlocation_graveyard);
27static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
28static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
29static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
30static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
31static struct workqueue_struct *afs_vlocation_update_worker;
32
33/*
34 * iterate through the VL servers in a cell until one of them admits knowing
35 * about the volume in question
36 */
37static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
38 struct key *key,
39 struct afs_cache_vlocation *vldb)
40{
41 struct afs_cell *cell = vl->cell;
42 struct in_addr addr;
43 int count, ret;
44
45 _enter("%s,%s", cell->name, vl->vldb.name);
46
47 down_write(&vl->cell->vl_sem);
48 ret = -ENOMEDIUM;
49 for (count = cell->vl_naddrs; count > 0; count--) {
50 addr = cell->vl_addrs[cell->vl_curr_svix];
51
52 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
53
54 /* attempt to access the VL server */
55 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
56 false);
57 switch (ret) {
58 case 0:
59 goto out;
60 case -ENOMEM:
61 case -ENONET:
62 case -ENETUNREACH:
63 case -EHOSTUNREACH:
64 case -ECONNREFUSED:
65 if (ret == -ENOMEM || ret == -ENONET)
66 goto out;
67 goto rotate;
68 case -ENOMEDIUM:
69 case -EKEYREJECTED:
70 case -EKEYEXPIRED:
71 goto out;
72 default:
73 ret = -EIO;
74 goto rotate;
75 }
76
77 /* rotate the server records upon lookup failure */
78 rotate:
79 cell->vl_curr_svix++;
80 cell->vl_curr_svix %= cell->vl_naddrs;
81 }
82
83out:
84 up_write(&vl->cell->vl_sem);
85 _leave(" = %d", ret);
86 return ret;
87}
88
89/*
90 * iterate through the VL servers in a cell until one of them admits knowing
91 * about the volume in question
92 */
93static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
94 struct key *key,
95 afs_volid_t volid,
96 afs_voltype_t voltype,
97 struct afs_cache_vlocation *vldb)
98{
99 struct afs_cell *cell = vl->cell;
100 struct in_addr addr;
101 int count, ret;
102
103 _enter("%s,%x,%d,", cell->name, volid, voltype);
104
105 down_write(&vl->cell->vl_sem);
106 ret = -ENOMEDIUM;
107 for (count = cell->vl_naddrs; count > 0; count--) {
108 addr = cell->vl_addrs[cell->vl_curr_svix];
109
110 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
111
112 /* attempt to access the VL server */
113 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
114 false);
115 switch (ret) {
116 case 0:
117 goto out;
118 case -ENOMEM:
119 case -ENONET:
120 case -ENETUNREACH:
121 case -EHOSTUNREACH:
122 case -ECONNREFUSED:
123 if (ret == -ENOMEM || ret == -ENONET)
124 goto out;
125 goto rotate;
126 case -EBUSY:
127 vl->upd_busy_cnt++;
128 if (vl->upd_busy_cnt <= 3) {
129 if (vl->upd_busy_cnt > 1) {
130 /* second+ BUSY - sleep a little bit */
131 set_current_state(TASK_UNINTERRUPTIBLE);
132 schedule_timeout(1);
133 }
134 continue;
135 }
136 break;
137 case -ENOMEDIUM:
138 vl->upd_rej_cnt++;
139 goto rotate;
140 default:
141 ret = -EIO;
142 goto rotate;
143 }
144
145 /* rotate the server records upon lookup failure */
146 rotate:
147 cell->vl_curr_svix++;
148 cell->vl_curr_svix %= cell->vl_naddrs;
149 vl->upd_busy_cnt = 0;
150 }
151
152out:
153 if (ret < 0 && vl->upd_rej_cnt > 0) {
154 printk(KERN_NOTICE "kAFS:"
155 " Active volume no longer valid '%s'\n",
156 vl->vldb.name);
157 vl->valid = 0;
158 ret = -ENOMEDIUM;
159 }
160
161 up_write(&vl->cell->vl_sem);
162 _leave(" = %d", ret);
163 return ret;
164}
165
166/*
167 * allocate a volume location record
168 */
169static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
170 const char *name,
171 size_t namesz)
172{
173 struct afs_vlocation *vl;
174
175 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
176 if (vl) {
177 vl->cell = cell;
178 vl->state = AFS_VL_NEW;
179 atomic_set(&vl->usage, 1);
180 INIT_LIST_HEAD(&vl->link);
181 INIT_LIST_HEAD(&vl->grave);
182 INIT_LIST_HEAD(&vl->update);
183 init_waitqueue_head(&vl->waitq);
184 spin_lock_init(&vl->lock);
185 memcpy(vl->vldb.name, name, namesz);
186 }
187
188 _leave(" = %p", vl);
189 return vl;
190}
191
192/*
193 * update record if we found it in the cache
194 */
195static int afs_vlocation_update_record(struct afs_vlocation *vl,
196 struct key *key,
197 struct afs_cache_vlocation *vldb)
198{
199 afs_voltype_t voltype;
200 afs_volid_t vid;
201 int ret;
202
203 /* try to look up a cached volume in the cell VL databases by ID */
204 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
205 vl->vldb.name,
206 vl->vldb.vidmask,
207 ntohl(vl->vldb.servers[0].s_addr),
208 vl->vldb.srvtmask[0],
209 ntohl(vl->vldb.servers[1].s_addr),
210 vl->vldb.srvtmask[1],
211 ntohl(vl->vldb.servers[2].s_addr),
212 vl->vldb.srvtmask[2]);
213
214 _debug("Vids: %08x %08x %08x",
215 vl->vldb.vid[0],
216 vl->vldb.vid[1],
217 vl->vldb.vid[2]);
218
219 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
220 vid = vl->vldb.vid[0];
221 voltype = AFSVL_RWVOL;
222 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
223 vid = vl->vldb.vid[1];
224 voltype = AFSVL_ROVOL;
225 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
226 vid = vl->vldb.vid[2];
227 voltype = AFSVL_BACKVOL;
228 } else {
229 BUG();
230 vid = 0;
231 voltype = 0;
232 }
233
234 /* contact the server to make sure the volume is still available
235 * - TODO: need to handle disconnected operation here
236 */
237 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
238 switch (ret) {
239 /* net error */
240 default:
241 printk(KERN_WARNING "kAFS:"
242 " failed to update volume '%s' (%x) up in '%s': %d\n",
243 vl->vldb.name, vid, vl->cell->name, ret);
244 _leave(" = %d", ret);
245 return ret;
246
247 /* pulled from local cache into memory */
248 case 0:
249 _leave(" = 0");
250 return 0;
251
252 /* uh oh... looks like the volume got deleted */
253 case -ENOMEDIUM:
254 printk(KERN_ERR "kAFS:"
255 " volume '%s' (%x) does not exist '%s'\n",
256 vl->vldb.name, vid, vl->cell->name);
257
258 /* TODO: make existing record unavailable */
259 _leave(" = %d", ret);
260 return ret;
261 }
262}
263
264/*
265 * apply the update to a VL record
266 */
267static void afs_vlocation_apply_update(struct afs_vlocation *vl,
268 struct afs_cache_vlocation *vldb)
269{
270 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
271 vldb->name, vldb->vidmask,
272 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
273 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
274 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
275
276 _debug("Vids: %08x %08x %08x",
277 vldb->vid[0], vldb->vid[1], vldb->vid[2]);
278
279 if (strcmp(vldb->name, vl->vldb.name) != 0)
280 printk(KERN_NOTICE "kAFS:"
281 " name of volume '%s' changed to '%s' on server\n",
282 vl->vldb.name, vldb->name);
283
284 vl->vldb = *vldb;
285
286#ifdef CONFIG_AFS_FSCACHE
287 fscache_update_cookie(vl->cache);
288#endif
289}
290
291/*
292 * fill in a volume location record, consulting the cache and the VL server
293 * both
294 */
295static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
296 struct key *key)
297{
298 struct afs_cache_vlocation vldb;
299 int ret;
300
301 _enter("");
302
303 ASSERTCMP(vl->valid, ==, 0);
304
305 memset(&vldb, 0, sizeof(vldb));
306
307 /* see if we have an in-cache copy (will set vl->valid if there is) */
308#ifdef CONFIG_AFS_FSCACHE
309 vl->cache = fscache_acquire_cookie(vl->cell->cache,
310 &afs_vlocation_cache_index_def, vl,
311 true);
312#endif
313
314 if (vl->valid) {
315 /* try to update a known volume in the cell VL databases by
316 * ID as the name may have changed */
317 _debug("found in cache");
318 ret = afs_vlocation_update_record(vl, key, &vldb);
319 } else {
320 /* try to look up an unknown volume in the cell VL databases by
321 * name */
322 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
323 if (ret < 0) {
324 printk("kAFS: failed to locate '%s' in cell '%s'\n",
325 vl->vldb.name, vl->cell->name);
326 return ret;
327 }
328 }
329
330 afs_vlocation_apply_update(vl, &vldb);
331 _leave(" = 0");
332 return 0;
333}
334
335/*
336 * queue a vlocation record for updates
337 */
338static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
339{
340 struct afs_vlocation *xvl;
341
342 /* wait at least 10 minutes before updating... */
343 vl->update_at = ktime_get_real_seconds() +
344 afs_vlocation_update_timeout;
345
346 spin_lock(&afs_vlocation_updates_lock);
347
348 if (!list_empty(&afs_vlocation_updates)) {
349 /* ... but wait at least 1 second more than the newest record
350 * already queued so that we don't spam the VL server suddenly
351 * with lots of requests
352 */
353 xvl = list_entry(afs_vlocation_updates.prev,
354 struct afs_vlocation, update);
355 if (vl->update_at <= xvl->update_at)
356 vl->update_at = xvl->update_at + 1;
357 } else {
358 queue_delayed_work(afs_vlocation_update_worker,
359 &afs_vlocation_update,
360 afs_vlocation_update_timeout * HZ);
361 }
362
363 list_add_tail(&vl->update, &afs_vlocation_updates);
364 spin_unlock(&afs_vlocation_updates_lock);
365}
366
367/*
368 * lookup volume location
369 * - iterate through the VL servers in a cell until one of them admits knowing
370 * about the volume in question
371 * - lookup in the local cache if not able to find on the VL server
372 * - insert/update in the local cache if did get a VL response
373 */
374struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
375 struct key *key,
376 const char *name,
377 size_t namesz)
378{
379 struct afs_vlocation *vl;
380 int ret;
381
382 _enter("{%s},{%x},%*.*s,%zu",
383 cell->name, key_serial(key),
384 (int) namesz, (int) namesz, name, namesz);
385
386 if (namesz >= sizeof(vl->vldb.name)) {
387 _leave(" = -ENAMETOOLONG");
388 return ERR_PTR(-ENAMETOOLONG);
389 }
390
391 /* see if we have an in-memory copy first */
392 down_write(&cell->vl_sem);
393 spin_lock(&cell->vl_lock);
394 list_for_each_entry(vl, &cell->vl_list, link) {
395 if (vl->vldb.name[namesz] != '\0')
396 continue;
397 if (memcmp(vl->vldb.name, name, namesz) == 0)
398 goto found_in_memory;
399 }
400 spin_unlock(&cell->vl_lock);
401
402 /* not in the cell's in-memory lists - create a new record */
403 vl = afs_vlocation_alloc(cell, name, namesz);
404 if (!vl) {
405 up_write(&cell->vl_sem);
406 return ERR_PTR(-ENOMEM);
407 }
408
409 afs_get_cell(cell);
410
411 list_add_tail(&vl->link, &cell->vl_list);
412 vl->state = AFS_VL_CREATING;
413 up_write(&cell->vl_sem);
414
415fill_in_record:
416 ret = afs_vlocation_fill_in_record(vl, key);
417 if (ret < 0)
418 goto error_abandon;
419 spin_lock(&vl->lock);
420 vl->state = AFS_VL_VALID;
421 spin_unlock(&vl->lock);
422 wake_up(&vl->waitq);
423
424 /* update volume entry in local cache */
425#ifdef CONFIG_AFS_FSCACHE
426 fscache_update_cookie(vl->cache);
427#endif
428
429 /* schedule for regular updates */
430 afs_vlocation_queue_for_updates(vl);
431 goto success;
432
433found_in_memory:
434 /* found in memory */
435 _debug("found in memory");
436 atomic_inc(&vl->usage);
437 spin_unlock(&cell->vl_lock);
438 if (!list_empty(&vl->grave)) {
439 spin_lock(&afs_vlocation_graveyard_lock);
440 list_del_init(&vl->grave);
441 spin_unlock(&afs_vlocation_graveyard_lock);
442 }
443 up_write(&cell->vl_sem);
444
445 /* see if it was an abandoned record that we might try filling in */
446 spin_lock(&vl->lock);
447 while (vl->state != AFS_VL_VALID) {
448 afs_vlocation_state_t state = vl->state;
449
450 _debug("invalid [state %d]", state);
451
452 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
453 vl->state = AFS_VL_CREATING;
454 spin_unlock(&vl->lock);
455 goto fill_in_record;
456 }
457
458 /* must now wait for creation or update by someone else to
459 * complete */
460 _debug("wait");
461
462 spin_unlock(&vl->lock);
463 ret = wait_event_interruptible(vl->waitq,
464 vl->state == AFS_VL_NEW ||
465 vl->state == AFS_VL_VALID ||
466 vl->state == AFS_VL_NO_VOLUME);
467 if (ret < 0)
468 goto error;
469 spin_lock(&vl->lock);
470 }
471 spin_unlock(&vl->lock);
472
473success:
474 _leave(" = %p", vl);
475 return vl;
476
477error_abandon:
478 spin_lock(&vl->lock);
479 vl->state = AFS_VL_NEW;
480 spin_unlock(&vl->lock);
481 wake_up(&vl->waitq);
482error:
483 ASSERT(vl != NULL);
484 afs_put_vlocation(vl);
485 _leave(" = %d", ret);
486 return ERR_PTR(ret);
487}
488
489/*
490 * finish using a volume location record
491 */
492void afs_put_vlocation(struct afs_vlocation *vl)
493{
494 if (!vl)
495 return;
496
497 _enter("%s", vl->vldb.name);
498
499 ASSERTCMP(atomic_read(&vl->usage), >, 0);
500
501 if (likely(!atomic_dec_and_test(&vl->usage))) {
502 _leave("");
503 return;
504 }
505
506 spin_lock(&afs_vlocation_graveyard_lock);
507 if (atomic_read(&vl->usage) == 0) {
508 _debug("buried");
509 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
510 vl->time_of_death = ktime_get_real_seconds();
511 queue_delayed_work(afs_wq, &afs_vlocation_reap,
512 afs_vlocation_timeout * HZ);
513
514 /* suspend updates on this record */
515 if (!list_empty(&vl->update)) {
516 spin_lock(&afs_vlocation_updates_lock);
517 list_del_init(&vl->update);
518 spin_unlock(&afs_vlocation_updates_lock);
519 }
520 }
521 spin_unlock(&afs_vlocation_graveyard_lock);
522 _leave(" [killed?]");
523}
524
525/*
526 * destroy a dead volume location record
527 */
528static void afs_vlocation_destroy(struct afs_vlocation *vl)
529{
530 _enter("%p", vl);
531
532#ifdef CONFIG_AFS_FSCACHE
533 fscache_relinquish_cookie(vl->cache, 0);
534#endif
535 afs_put_cell(vl->cell);
536 kfree(vl);
537}
538
539/*
540 * reap dead volume location records
541 */
542static void afs_vlocation_reaper(struct work_struct *work)
543{
544 LIST_HEAD(corpses);
545 struct afs_vlocation *vl;
546 unsigned long delay, expiry;
547 time64_t now;
548
549 _enter("");
550
551 now = ktime_get_real_seconds();
552 spin_lock(&afs_vlocation_graveyard_lock);
553
554 while (!list_empty(&afs_vlocation_graveyard)) {
555 vl = list_entry(afs_vlocation_graveyard.next,
556 struct afs_vlocation, grave);
557
558 _debug("check %p", vl);
559
560 /* the queue is ordered most dead first */
561 expiry = vl->time_of_death + afs_vlocation_timeout;
562 if (expiry > now) {
563 delay = (expiry - now) * HZ;
564 _debug("delay %lu", delay);
565 mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
566 break;
567 }
568
569 spin_lock(&vl->cell->vl_lock);
570 if (atomic_read(&vl->usage) > 0) {
571 _debug("no reap");
572 list_del_init(&vl->grave);
573 } else {
574 _debug("reap");
575 list_move_tail(&vl->grave, &corpses);
576 list_del_init(&vl->link);
577 }
578 spin_unlock(&vl->cell->vl_lock);
579 }
580
581 spin_unlock(&afs_vlocation_graveyard_lock);
582
583 /* now reap the corpses we've extracted */
584 while (!list_empty(&corpses)) {
585 vl = list_entry(corpses.next, struct afs_vlocation, grave);
586 list_del(&vl->grave);
587 afs_vlocation_destroy(vl);
588 }
589
590 _leave("");
591}
592
593/*
594 * initialise the VL update process
595 */
596int __init afs_vlocation_update_init(void)
597{
598 afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated",
599 WQ_MEM_RECLAIM, 0);
600 return afs_vlocation_update_worker ? 0 : -ENOMEM;
601}
602
603/*
604 * discard all the volume location records for rmmod
605 */
606void afs_vlocation_purge(void)
607{
608 afs_vlocation_timeout = 0;
609
610 spin_lock(&afs_vlocation_updates_lock);
611 list_del_init(&afs_vlocation_updates);
612 spin_unlock(&afs_vlocation_updates_lock);
613 mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
614 destroy_workqueue(afs_vlocation_update_worker);
615
616 mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
617}
618
619/*
620 * update a volume location
621 */
622static void afs_vlocation_updater(struct work_struct *work)
623{
624 struct afs_cache_vlocation vldb;
625 struct afs_vlocation *vl, *xvl;
626 time64_t now;
627 long timeout;
628 int ret;
629
630 _enter("");
631
632 now = ktime_get_real_seconds();
633
634 /* find a record to update */
635 spin_lock(&afs_vlocation_updates_lock);
636 for (;;) {
637 if (list_empty(&afs_vlocation_updates)) {
638 spin_unlock(&afs_vlocation_updates_lock);
639 _leave(" [nothing]");
640 return;
641 }
642
643 vl = list_entry(afs_vlocation_updates.next,
644 struct afs_vlocation, update);
645 if (atomic_read(&vl->usage) > 0)
646 break;
647 list_del_init(&vl->update);
648 }
649
650 timeout = vl->update_at - now;
651 if (timeout > 0) {
652 queue_delayed_work(afs_vlocation_update_worker,
653 &afs_vlocation_update, timeout * HZ);
654 spin_unlock(&afs_vlocation_updates_lock);
655 _leave(" [nothing]");
656 return;
657 }
658
659 list_del_init(&vl->update);
660 atomic_inc(&vl->usage);
661 spin_unlock(&afs_vlocation_updates_lock);
662
663 /* we can now perform the update */
664 _debug("update %s", vl->vldb.name);
665 vl->state = AFS_VL_UPDATING;
666 vl->upd_rej_cnt = 0;
667 vl->upd_busy_cnt = 0;
668
669 ret = afs_vlocation_update_record(vl, NULL, &vldb);
670 spin_lock(&vl->lock);
671 switch (ret) {
672 case 0:
673 afs_vlocation_apply_update(vl, &vldb);
674 vl->state = AFS_VL_VALID;
675 break;
676 case -ENOMEDIUM:
677 vl->state = AFS_VL_VOLUME_DELETED;
678 break;
679 default:
680 vl->state = AFS_VL_UNCERTAIN;
681 break;
682 }
683 spin_unlock(&vl->lock);
684 wake_up(&vl->waitq);
685
686 /* and then reschedule */
687 _debug("reschedule");
688 vl->update_at = ktime_get_real_seconds() +
689 afs_vlocation_update_timeout;
690
691 spin_lock(&afs_vlocation_updates_lock);
692
693 if (!list_empty(&afs_vlocation_updates)) {
694 /* next update in 10 minutes, but wait at least 1 second more
695 * than the newest record already queued so that we don't spam
696 * the VL server suddenly with lots of requests
697 */
698 xvl = list_entry(afs_vlocation_updates.prev,
699 struct afs_vlocation, update);
700 if (vl->update_at <= xvl->update_at)
701 vl->update_at = xvl->update_at + 1;
702 xvl = list_entry(afs_vlocation_updates.next,
703 struct afs_vlocation, update);
704 timeout = xvl->update_at - now;
705 if (timeout < 0)
706 timeout = 0;
707 } else {
708 timeout = afs_vlocation_update_timeout;
709 }
710
711 ASSERT(list_empty(&vl->update));
712
713 list_add_tail(&vl->update, &afs_vlocation_updates);
714
715 _debug("timeout %ld", timeout);
716 queue_delayed_work(afs_vlocation_update_worker,
717 &afs_vlocation_update, timeout * HZ);
718 spin_unlock(&afs_vlocation_updates_lock);
719 afs_put_vlocation(vl);
720}
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
deleted file mode 100644
index dcb956143c86..000000000000
--- a/fs/afs/vnode.c
+++ /dev/null
@@ -1,1025 +0,0 @@
1/* AFS vnode management
2 *
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/fs.h>
16#include <linux/sched.h>
17#include "internal.h"
18
19#if 0
20static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
21 int depth, char lr)
22{
23 struct afs_vnode *vnode;
24 bool bad = false;
25
26 if (!node)
27 return false;
28
29 if (node->rb_left)
30 bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
31
32 vnode = rb_entry(node, struct afs_vnode, cb_promise);
33 _debug("%c %*.*s%c%p {%d}",
34 rb_is_red(node) ? 'R' : 'B',
35 depth, depth, "", lr,
36 vnode, vnode->cb_expires_at);
37 if (rb_parent(node) != parent) {
38 printk("BAD: %p != %p\n", rb_parent(node), parent);
39 bad = true;
40 }
41
42 if (node->rb_right)
43 bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
44
45 return bad;
46}
47
48static noinline void dump_tree(const char *name, struct afs_server *server)
49{
50 _enter("%s", name);
51 if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
52 BUG();
53}
54#endif
55
56/*
57 * insert a vnode into the backing server's vnode tree
58 */
59static void afs_install_vnode(struct afs_vnode *vnode,
60 struct afs_server *server)
61{
62 struct afs_server *old_server = vnode->server;
63 struct afs_vnode *xvnode;
64 struct rb_node *parent, **p;
65
66 _enter("%p,%p", vnode, server);
67
68 if (old_server) {
69 spin_lock(&old_server->fs_lock);
70 rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
71 spin_unlock(&old_server->fs_lock);
72 }
73
74 afs_get_server(server);
75 vnode->server = server;
76 afs_put_server(old_server);
77
78 /* insert into the server's vnode tree in FID order */
79 spin_lock(&server->fs_lock);
80
81 parent = NULL;
82 p = &server->fs_vnodes.rb_node;
83 while (*p) {
84 parent = *p;
85 xvnode = rb_entry(parent, struct afs_vnode, server_rb);
86 if (vnode->fid.vid < xvnode->fid.vid)
87 p = &(*p)->rb_left;
88 else if (vnode->fid.vid > xvnode->fid.vid)
89 p = &(*p)->rb_right;
90 else if (vnode->fid.vnode < xvnode->fid.vnode)
91 p = &(*p)->rb_left;
92 else if (vnode->fid.vnode > xvnode->fid.vnode)
93 p = &(*p)->rb_right;
94 else if (vnode->fid.unique < xvnode->fid.unique)
95 p = &(*p)->rb_left;
96 else if (vnode->fid.unique > xvnode->fid.unique)
97 p = &(*p)->rb_right;
98 else
99 BUG(); /* can't happen unless afs_iget() malfunctions */
100 }
101
102 rb_link_node(&vnode->server_rb, parent, p);
103 rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
104
105 spin_unlock(&server->fs_lock);
106 _leave("");
107}
108
109/*
110 * insert a vnode into the promising server's update/expiration tree
111 * - caller must hold vnode->lock
112 */
113static void afs_vnode_note_promise(struct afs_vnode *vnode,
114 struct afs_server *server)
115{
116 struct afs_server *old_server;
117 struct afs_vnode *xvnode;
118 struct rb_node *parent, **p;
119
120 _enter("%p,%p", vnode, server);
121
122 ASSERT(server != NULL);
123
124 old_server = vnode->server;
125 if (vnode->cb_promised) {
126 if (server == old_server &&
127 vnode->cb_expires == vnode->cb_expires_at) {
128 _leave(" [no change]");
129 return;
130 }
131
132 spin_lock(&old_server->cb_lock);
133 if (vnode->cb_promised) {
134 _debug("delete");
135 rb_erase(&vnode->cb_promise, &old_server->cb_promises);
136 vnode->cb_promised = false;
137 }
138 spin_unlock(&old_server->cb_lock);
139 }
140
141 if (vnode->server != server)
142 afs_install_vnode(vnode, server);
143
144 vnode->cb_expires_at = vnode->cb_expires;
145 _debug("PROMISE on %p {%lu}",
146 vnode, (unsigned long) vnode->cb_expires_at);
147
148 /* abuse an RB-tree to hold the expiration order (we may have multiple
149 * items with the same expiration time) */
150 spin_lock(&server->cb_lock);
151
152 parent = NULL;
153 p = &server->cb_promises.rb_node;
154 while (*p) {
155 parent = *p;
156 xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
157 if (vnode->cb_expires_at < xvnode->cb_expires_at)
158 p = &(*p)->rb_left;
159 else
160 p = &(*p)->rb_right;
161 }
162
163 rb_link_node(&vnode->cb_promise, parent, p);
164 rb_insert_color(&vnode->cb_promise, &server->cb_promises);
165 vnode->cb_promised = true;
166
167 spin_unlock(&server->cb_lock);
168 _leave("");
169}
170
171/*
172 * handle remote file deletion by discarding the callback promise
173 */
174static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
175{
176 struct afs_server *server;
177
178 _enter("{%p}", vnode->server);
179
180 set_bit(AFS_VNODE_DELETED, &vnode->flags);
181
182 server = vnode->server;
183 if (server) {
184 if (vnode->cb_promised) {
185 spin_lock(&server->cb_lock);
186 if (vnode->cb_promised) {
187 rb_erase(&vnode->cb_promise,
188 &server->cb_promises);
189 vnode->cb_promised = false;
190 }
191 spin_unlock(&server->cb_lock);
192 }
193
194 spin_lock(&server->fs_lock);
195 rb_erase(&vnode->server_rb, &server->fs_vnodes);
196 spin_unlock(&server->fs_lock);
197
198 vnode->server = NULL;
199 afs_put_server(server);
200 } else {
201 ASSERT(!vnode->cb_promised);
202 }
203
204 _leave("");
205}
206
207/*
208 * finish off updating the recorded status of a file after a successful
209 * operation completion
210 * - starts callback expiry timer
211 * - adds to server's callback list
212 */
213void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
214 struct afs_server *server)
215{
216 struct afs_server *oldserver = NULL;
217
218 _enter("%p,%p", vnode, server);
219
220 spin_lock(&vnode->lock);
221 clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
222 afs_vnode_note_promise(vnode, server);
223 vnode->update_cnt--;
224 ASSERTCMP(vnode->update_cnt, >=, 0);
225 spin_unlock(&vnode->lock);
226
227 wake_up_all(&vnode->update_waitq);
228 afs_put_server(oldserver);
229 _leave("");
230}
231
232/*
233 * finish off updating the recorded status of a file after an operation failed
234 */
235static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
236{
237 _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret);
238
239 spin_lock(&vnode->lock);
240
241 clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
242
243 if (ret == -ENOENT) {
244 /* the file was deleted on the server */
245 _debug("got NOENT from server - marking file deleted");
246 afs_vnode_deleted_remotely(vnode);
247 }
248
249 vnode->update_cnt--;
250 ASSERTCMP(vnode->update_cnt, >=, 0);
251 spin_unlock(&vnode->lock);
252
253 wake_up_all(&vnode->update_waitq);
254 _leave("");
255}
256
257/*
258 * fetch file status from the volume
259 * - don't issue a fetch if:
260 * - the changed bit is not set and there's a valid callback
261 * - there are any outstanding ops that will fetch the status
262 * - TODO implement local caching
263 */
264int afs_vnode_fetch_status(struct afs_vnode *vnode,
265 struct afs_vnode *auth_vnode, struct key *key)
266{
267 struct afs_server *server;
268 unsigned long acl_order;
269 int ret;
270
271 DECLARE_WAITQUEUE(myself, current);
272
273 _enter("%s,{%x:%u.%u}",
274 vnode->volume->vlocation->vldb.name,
275 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
276
277 if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
278 vnode->cb_promised) {
279 _leave(" [unchanged]");
280 return 0;
281 }
282
283 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
284 _leave(" [deleted]");
285 return -ENOENT;
286 }
287
288 acl_order = 0;
289 if (auth_vnode)
290 acl_order = auth_vnode->acl_order;
291
292 spin_lock(&vnode->lock);
293
294 if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
295 vnode->cb_promised) {
296 spin_unlock(&vnode->lock);
297 _leave(" [unchanged]");
298 return 0;
299 }
300
301 ASSERTCMP(vnode->update_cnt, >=, 0);
302
303 if (vnode->update_cnt > 0) {
304 /* someone else started a fetch */
305 _debug("wait on fetch %d", vnode->update_cnt);
306
307 set_current_state(TASK_UNINTERRUPTIBLE);
308 ASSERT(myself.func != NULL);
309 add_wait_queue(&vnode->update_waitq, &myself);
310
311 /* wait for the status to be updated */
312 for (;;) {
313 if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
314 break;
315 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
316 break;
317
318 /* check to see if it got updated and invalidated all
319 * before we saw it */
320 if (vnode->update_cnt == 0) {
321 remove_wait_queue(&vnode->update_waitq,
322 &myself);
323 set_current_state(TASK_RUNNING);
324 goto get_anyway;
325 }
326
327 spin_unlock(&vnode->lock);
328
329 schedule();
330 set_current_state(TASK_UNINTERRUPTIBLE);
331
332 spin_lock(&vnode->lock);
333 }
334
335 remove_wait_queue(&vnode->update_waitq, &myself);
336 spin_unlock(&vnode->lock);
337 set_current_state(TASK_RUNNING);
338
339 return test_bit(AFS_VNODE_DELETED, &vnode->flags) ?
340 -ENOENT : 0;
341 }
342
343get_anyway:
344 /* okay... we're going to have to initiate the op */
345 vnode->update_cnt++;
346
347 spin_unlock(&vnode->lock);
348
349 /* merge AFS status fetches and clear outstanding callback on this
350 * vnode */
351 do {
352 /* pick a server to query */
353 server = afs_volume_pick_fileserver(vnode);
354 if (IS_ERR(server))
355 goto no_server;
356
357 _debug("USING SERVER: %p{%08x}",
358 server, ntohl(server->addr.s_addr));
359
360 ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
361 false);
362
363 } while (!afs_volume_release_fileserver(vnode, server, ret));
364
365 /* adjust the flags */
366 if (ret == 0) {
367 _debug("adjust");
368 if (auth_vnode)
369 afs_cache_permit(vnode, key, acl_order);
370 afs_vnode_finalise_status_update(vnode, server);
371 afs_put_server(server);
372 } else {
373 _debug("failed [%d]", ret);
374 afs_vnode_status_update_failed(vnode, ret);
375 }
376
377 ASSERTCMP(vnode->update_cnt, >=, 0);
378
379 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
380 return ret;
381
382no_server:
383 spin_lock(&vnode->lock);
384 vnode->update_cnt--;
385 ASSERTCMP(vnode->update_cnt, >=, 0);
386 spin_unlock(&vnode->lock);
387 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
388 return PTR_ERR(server);
389}
390
391/*
392 * fetch file data from the volume
393 * - TODO implement caching
394 */
395int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
396 struct afs_read *desc)
397{
398 struct afs_server *server;
399 int ret;
400
401 _enter("%s{%x:%u.%u},%x,,,",
402 vnode->volume->vlocation->vldb.name,
403 vnode->fid.vid,
404 vnode->fid.vnode,
405 vnode->fid.unique,
406 key_serial(key));
407
408 /* this op will fetch the status */
409 spin_lock(&vnode->lock);
410 vnode->update_cnt++;
411 spin_unlock(&vnode->lock);
412
413 /* merge in AFS status fetches and clear outstanding callback on this
414 * vnode */
415 do {
416 /* pick a server to query */
417 server = afs_volume_pick_fileserver(vnode);
418 if (IS_ERR(server))
419 goto no_server;
420
421 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
422
423 ret = afs_fs_fetch_data(server, key, vnode, desc,
424 false);
425
426 } while (!afs_volume_release_fileserver(vnode, server, ret));
427
428 /* adjust the flags */
429 if (ret == 0) {
430 afs_vnode_finalise_status_update(vnode, server);
431 afs_put_server(server);
432 } else {
433 afs_vnode_status_update_failed(vnode, ret);
434 }
435
436 _leave(" = %d", ret);
437 return ret;
438
439no_server:
440 spin_lock(&vnode->lock);
441 vnode->update_cnt--;
442 ASSERTCMP(vnode->update_cnt, >=, 0);
443 spin_unlock(&vnode->lock);
444 return PTR_ERR(server);
445}
446
447/*
448 * make a file or a directory
449 */
450int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
451 const char *name, umode_t mode, struct afs_fid *newfid,
452 struct afs_file_status *newstatus,
453 struct afs_callback *newcb, struct afs_server **_server)
454{
455 struct afs_server *server;
456 int ret;
457
458 _enter("%s{%x:%u.%u},%x,%s,,",
459 vnode->volume->vlocation->vldb.name,
460 vnode->fid.vid,
461 vnode->fid.vnode,
462 vnode->fid.unique,
463 key_serial(key),
464 name);
465
466 /* this op will fetch the status on the directory we're creating in */
467 spin_lock(&vnode->lock);
468 vnode->update_cnt++;
469 spin_unlock(&vnode->lock);
470
471 do {
472 /* pick a server to query */
473 server = afs_volume_pick_fileserver(vnode);
474 if (IS_ERR(server))
475 goto no_server;
476
477 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
478
479 ret = afs_fs_create(server, key, vnode, name, mode, newfid,
480 newstatus, newcb, false);
481
482 } while (!afs_volume_release_fileserver(vnode, server, ret));
483
484 /* adjust the flags */
485 if (ret == 0) {
486 afs_vnode_finalise_status_update(vnode, server);
487 *_server = server;
488 } else {
489 afs_vnode_status_update_failed(vnode, ret);
490 *_server = NULL;
491 }
492
493 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
494 return ret;
495
496no_server:
497 spin_lock(&vnode->lock);
498 vnode->update_cnt--;
499 ASSERTCMP(vnode->update_cnt, >=, 0);
500 spin_unlock(&vnode->lock);
501 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
502 return PTR_ERR(server);
503}
504
505/*
506 * remove a file or directory
507 */
508int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
509 bool isdir)
510{
511 struct afs_server *server;
512 int ret;
513
514 _enter("%s{%x:%u.%u},%x,%s",
515 vnode->volume->vlocation->vldb.name,
516 vnode->fid.vid,
517 vnode->fid.vnode,
518 vnode->fid.unique,
519 key_serial(key),
520 name);
521
522 /* this op will fetch the status on the directory we're removing from */
523 spin_lock(&vnode->lock);
524 vnode->update_cnt++;
525 spin_unlock(&vnode->lock);
526
527 do {
528 /* pick a server to query */
529 server = afs_volume_pick_fileserver(vnode);
530 if (IS_ERR(server))
531 goto no_server;
532
533 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
534
535 ret = afs_fs_remove(server, key, vnode, name, isdir,
536 false);
537
538 } while (!afs_volume_release_fileserver(vnode, server, ret));
539
540 /* adjust the flags */
541 if (ret == 0) {
542 afs_vnode_finalise_status_update(vnode, server);
543 afs_put_server(server);
544 } else {
545 afs_vnode_status_update_failed(vnode, ret);
546 }
547
548 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
549 return ret;
550
551no_server:
552 spin_lock(&vnode->lock);
553 vnode->update_cnt--;
554 ASSERTCMP(vnode->update_cnt, >=, 0);
555 spin_unlock(&vnode->lock);
556 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
557 return PTR_ERR(server);
558}
559
560/*
561 * create a hard link
562 */
563int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
564 struct key *key, const char *name)
565{
566 struct afs_server *server;
567 int ret;
568
569 _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s",
570 dvnode->volume->vlocation->vldb.name,
571 dvnode->fid.vid,
572 dvnode->fid.vnode,
573 dvnode->fid.unique,
574 vnode->volume->vlocation->vldb.name,
575 vnode->fid.vid,
576 vnode->fid.vnode,
577 vnode->fid.unique,
578 key_serial(key),
579 name);
580
581 /* this op will fetch the status on the directory we're removing from */
582 spin_lock(&vnode->lock);
583 vnode->update_cnt++;
584 spin_unlock(&vnode->lock);
585 spin_lock(&dvnode->lock);
586 dvnode->update_cnt++;
587 spin_unlock(&dvnode->lock);
588
589 do {
590 /* pick a server to query */
591 server = afs_volume_pick_fileserver(dvnode);
592 if (IS_ERR(server))
593 goto no_server;
594
595 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
596
597 ret = afs_fs_link(server, key, dvnode, vnode, name,
598 false);
599
600 } while (!afs_volume_release_fileserver(dvnode, server, ret));
601
602 /* adjust the flags */
603 if (ret == 0) {
604 afs_vnode_finalise_status_update(vnode, server);
605 afs_vnode_finalise_status_update(dvnode, server);
606 afs_put_server(server);
607 } else {
608 afs_vnode_status_update_failed(vnode, ret);
609 afs_vnode_status_update_failed(dvnode, ret);
610 }
611
612 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
613 return ret;
614
615no_server:
616 spin_lock(&vnode->lock);
617 vnode->update_cnt--;
618 ASSERTCMP(vnode->update_cnt, >=, 0);
619 spin_unlock(&vnode->lock);
620 spin_lock(&dvnode->lock);
621 dvnode->update_cnt--;
622 ASSERTCMP(dvnode->update_cnt, >=, 0);
623 spin_unlock(&dvnode->lock);
624 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
625 return PTR_ERR(server);
626}
627
628/*
629 * create a symbolic link
630 */
631int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
632 const char *name, const char *content,
633 struct afs_fid *newfid,
634 struct afs_file_status *newstatus,
635 struct afs_server **_server)
636{
637 struct afs_server *server;
638 int ret;
639
640 _enter("%s{%x:%u.%u},%x,%s,%s,,,",
641 vnode->volume->vlocation->vldb.name,
642 vnode->fid.vid,
643 vnode->fid.vnode,
644 vnode->fid.unique,
645 key_serial(key),
646 name, content);
647
648 /* this op will fetch the status on the directory we're creating in */
649 spin_lock(&vnode->lock);
650 vnode->update_cnt++;
651 spin_unlock(&vnode->lock);
652
653 do {
654 /* pick a server to query */
655 server = afs_volume_pick_fileserver(vnode);
656 if (IS_ERR(server))
657 goto no_server;
658
659 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
660
661 ret = afs_fs_symlink(server, key, vnode, name, content,
662 newfid, newstatus, false);
663
664 } while (!afs_volume_release_fileserver(vnode, server, ret));
665
666 /* adjust the flags */
667 if (ret == 0) {
668 afs_vnode_finalise_status_update(vnode, server);
669 *_server = server;
670 } else {
671 afs_vnode_status_update_failed(vnode, ret);
672 *_server = NULL;
673 }
674
675 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
676 return ret;
677
678no_server:
679 spin_lock(&vnode->lock);
680 vnode->update_cnt--;
681 ASSERTCMP(vnode->update_cnt, >=, 0);
682 spin_unlock(&vnode->lock);
683 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
684 return PTR_ERR(server);
685}
686
687/*
688 * rename a file
689 */
690int afs_vnode_rename(struct afs_vnode *orig_dvnode,
691 struct afs_vnode *new_dvnode,
692 struct key *key,
693 const char *orig_name,
694 const char *new_name)
695{
696 struct afs_server *server;
697 int ret;
698
699 _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s",
700 orig_dvnode->volume->vlocation->vldb.name,
701 orig_dvnode->fid.vid,
702 orig_dvnode->fid.vnode,
703 orig_dvnode->fid.unique,
704 new_dvnode->volume->vlocation->vldb.name,
705 new_dvnode->fid.vid,
706 new_dvnode->fid.vnode,
707 new_dvnode->fid.unique,
708 key_serial(key),
709 orig_name,
710 new_name);
711
712 /* this op will fetch the status on both the directories we're dealing
713 * with */
714 spin_lock(&orig_dvnode->lock);
715 orig_dvnode->update_cnt++;
716 spin_unlock(&orig_dvnode->lock);
717 if (new_dvnode != orig_dvnode) {
718 spin_lock(&new_dvnode->lock);
719 new_dvnode->update_cnt++;
720 spin_unlock(&new_dvnode->lock);
721 }
722
723 do {
724 /* pick a server to query */
725 server = afs_volume_pick_fileserver(orig_dvnode);
726 if (IS_ERR(server))
727 goto no_server;
728
729 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
730
731 ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
732 new_dvnode, new_name, false);
733
734 } while (!afs_volume_release_fileserver(orig_dvnode, server, ret));
735
736 /* adjust the flags */
737 if (ret == 0) {
738 afs_vnode_finalise_status_update(orig_dvnode, server);
739 if (new_dvnode != orig_dvnode)
740 afs_vnode_finalise_status_update(new_dvnode, server);
741 afs_put_server(server);
742 } else {
743 afs_vnode_status_update_failed(orig_dvnode, ret);
744 if (new_dvnode != orig_dvnode)
745 afs_vnode_status_update_failed(new_dvnode, ret);
746 }
747
748 _leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt);
749 return ret;
750
751no_server:
752 spin_lock(&orig_dvnode->lock);
753 orig_dvnode->update_cnt--;
754 ASSERTCMP(orig_dvnode->update_cnt, >=, 0);
755 spin_unlock(&orig_dvnode->lock);
756 if (new_dvnode != orig_dvnode) {
757 spin_lock(&new_dvnode->lock);
758 new_dvnode->update_cnt--;
759 ASSERTCMP(new_dvnode->update_cnt, >=, 0);
760 spin_unlock(&new_dvnode->lock);
761 }
762 _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
763 return PTR_ERR(server);
764}
765
766/*
767 * write to a file
768 */
769int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
770 unsigned offset, unsigned to)
771{
772 struct afs_server *server;
773 struct afs_vnode *vnode = wb->vnode;
774 int ret;
775
776 _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
777 vnode->volume->vlocation->vldb.name,
778 vnode->fid.vid,
779 vnode->fid.vnode,
780 vnode->fid.unique,
781 key_serial(wb->key),
782 first, last, offset, to);
783
784 /* this op will fetch the status */
785 spin_lock(&vnode->lock);
786 vnode->update_cnt++;
787 spin_unlock(&vnode->lock);
788
789 do {
790 /* pick a server to query */
791 server = afs_volume_pick_fileserver(vnode);
792 if (IS_ERR(server))
793 goto no_server;
794
795 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
796
797 ret = afs_fs_store_data(server, wb, first, last, offset, to,
798 false);
799
800 } while (!afs_volume_release_fileserver(vnode, server, ret));
801
802 /* adjust the flags */
803 if (ret == 0) {
804 afs_vnode_finalise_status_update(vnode, server);
805 afs_put_server(server);
806 } else {
807 afs_vnode_status_update_failed(vnode, ret);
808 }
809
810 _leave(" = %d", ret);
811 return ret;
812
813no_server:
814 spin_lock(&vnode->lock);
815 vnode->update_cnt--;
816 ASSERTCMP(vnode->update_cnt, >=, 0);
817 spin_unlock(&vnode->lock);
818 return PTR_ERR(server);
819}
820
821/*
822 * set the attributes on a file
823 */
824int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
825 struct iattr *attr)
826{
827 struct afs_server *server;
828 int ret;
829
830 _enter("%s{%x:%u.%u},%x",
831 vnode->volume->vlocation->vldb.name,
832 vnode->fid.vid,
833 vnode->fid.vnode,
834 vnode->fid.unique,
835 key_serial(key));
836
837 /* this op will fetch the status */
838 spin_lock(&vnode->lock);
839 vnode->update_cnt++;
840 spin_unlock(&vnode->lock);
841
842 do {
843 /* pick a server to query */
844 server = afs_volume_pick_fileserver(vnode);
845 if (IS_ERR(server))
846 goto no_server;
847
848 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
849
850 ret = afs_fs_setattr(server, key, vnode, attr, false);
851
852 } while (!afs_volume_release_fileserver(vnode, server, ret));
853
854 /* adjust the flags */
855 if (ret == 0) {
856 afs_vnode_finalise_status_update(vnode, server);
857 afs_put_server(server);
858 } else {
859 afs_vnode_status_update_failed(vnode, ret);
860 }
861
862 _leave(" = %d", ret);
863 return ret;
864
865no_server:
866 spin_lock(&vnode->lock);
867 vnode->update_cnt--;
868 ASSERTCMP(vnode->update_cnt, >=, 0);
869 spin_unlock(&vnode->lock);
870 return PTR_ERR(server);
871}
872
873/*
874 * get the status of a volume
875 */
876int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
877 struct afs_volume_status *vs)
878{
879 struct afs_server *server;
880 int ret;
881
882 _enter("%s{%x:%u.%u},%x,",
883 vnode->volume->vlocation->vldb.name,
884 vnode->fid.vid,
885 vnode->fid.vnode,
886 vnode->fid.unique,
887 key_serial(key));
888
889 do {
890 /* pick a server to query */
891 server = afs_volume_pick_fileserver(vnode);
892 if (IS_ERR(server))
893 goto no_server;
894
895 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
896
897 ret = afs_fs_get_volume_status(server, key, vnode, vs, false);
898
899 } while (!afs_volume_release_fileserver(vnode, server, ret));
900
901 /* adjust the flags */
902 if (ret == 0)
903 afs_put_server(server);
904
905 _leave(" = %d", ret);
906 return ret;
907
908no_server:
909 return PTR_ERR(server);
910}
911
912/*
913 * get a lock on a file
914 */
915int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
916 afs_lock_type_t type)
917{
918 struct afs_server *server;
919 int ret;
920
921 _enter("%s{%x:%u.%u},%x,%u",
922 vnode->volume->vlocation->vldb.name,
923 vnode->fid.vid,
924 vnode->fid.vnode,
925 vnode->fid.unique,
926 key_serial(key), type);
927
928 do {
929 /* pick a server to query */
930 server = afs_volume_pick_fileserver(vnode);
931 if (IS_ERR(server))
932 goto no_server;
933
934 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
935
936 ret = afs_fs_set_lock(server, key, vnode, type, false);
937
938 } while (!afs_volume_release_fileserver(vnode, server, ret));
939
940 /* adjust the flags */
941 if (ret == 0)
942 afs_put_server(server);
943
944 _leave(" = %d", ret);
945 return ret;
946
947no_server:
948 return PTR_ERR(server);
949}
950
951/*
952 * extend a lock on a file
953 */
954int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
955{
956 struct afs_server *server;
957 int ret;
958
959 _enter("%s{%x:%u.%u},%x",
960 vnode->volume->vlocation->vldb.name,
961 vnode->fid.vid,
962 vnode->fid.vnode,
963 vnode->fid.unique,
964 key_serial(key));
965
966 do {
967 /* pick a server to query */
968 server = afs_volume_pick_fileserver(vnode);
969 if (IS_ERR(server))
970 goto no_server;
971
972 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
973
974 ret = afs_fs_extend_lock(server, key, vnode, false);
975
976 } while (!afs_volume_release_fileserver(vnode, server, ret));
977
978 /* adjust the flags */
979 if (ret == 0)
980 afs_put_server(server);
981
982 _leave(" = %d", ret);
983 return ret;
984
985no_server:
986 return PTR_ERR(server);
987}
988
989/*
990 * release a lock on a file
991 */
992int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
993{
994 struct afs_server *server;
995 int ret;
996
997 _enter("%s{%x:%u.%u},%x",
998 vnode->volume->vlocation->vldb.name,
999 vnode->fid.vid,
1000 vnode->fid.vnode,
1001 vnode->fid.unique,
1002 key_serial(key));
1003
1004 do {
1005 /* pick a server to query */
1006 server = afs_volume_pick_fileserver(vnode);
1007 if (IS_ERR(server))
1008 goto no_server;
1009
1010 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
1011
1012 ret = afs_fs_release_lock(server, key, vnode, false);
1013
1014 } while (!afs_volume_release_fileserver(vnode, server, ret));
1015
1016 /* adjust the flags */
1017 if (ret == 0)
1018 afs_put_server(server);
1019
1020 _leave(" = %d", ret);
1021 return ret;
1022
1023no_server:
1024 return PTR_ERR(server);
1025}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index db73d6dad02b..684c48293353 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -10,19 +10,167 @@
10 */ 10 */
11 11
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/slab.h> 13#include <linux/slab.h>
16#include <linux/fs.h>
17#include <linux/pagemap.h>
18#include <linux/sched.h>
19#include "internal.h" 14#include "internal.h"
20 15
21static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; 16unsigned __read_mostly afs_volume_gc_delay = 10;
17unsigned __read_mostly afs_volume_record_life = 60 * 60;
18
19static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
20
21/*
22 * Allocate a volume record and load it up from a vldb record.
23 */
24static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
25 struct afs_vldb_entry *vldb,
26 unsigned long type_mask)
27{
28 struct afs_server_list *slist;
29 struct afs_server *server;
30 struct afs_volume *volume;
31 int ret = -ENOMEM, nr_servers = 0, i, j;
32
33 for (i = 0; i < vldb->nr_servers; i++)
34 if (vldb->fs_mask[i] & type_mask)
35 nr_servers++;
36
37 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
38 if (!volume)
39 goto error_0;
40
41 volume->vid = vldb->vid[params->type];
42 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
43 volume->cell = afs_get_cell(params->cell);
44 volume->type = params->type;
45 volume->type_force = params->force;
46 volume->name_len = vldb->name_len;
47
48 atomic_set(&volume->usage, 1);
49 INIT_LIST_HEAD(&volume->proc_link);
50 rwlock_init(&volume->servers_lock);
51 memcpy(volume->name, vldb->name, vldb->name_len + 1);
52
53 slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
54 if (IS_ERR(slist)) {
55 ret = PTR_ERR(slist);
56 goto error_1;
57 }
58
59 refcount_set(&slist->usage, 1);
60 volume->servers = slist;
61
62 /* Make sure a records exists for each server this volume occupies. */
63 for (i = 0; i < nr_servers; i++) {
64 if (!(vldb->fs_mask[i] & type_mask))
65 continue;
66
67 server = afs_lookup_server(params->cell, params->key,
68 &vldb->fs_server[i]);
69 if (IS_ERR(server)) {
70 ret = PTR_ERR(server);
71 if (ret == -ENOENT)
72 continue;
73 goto error_2;
74 }
75
76 /* Insertion-sort by server pointer */
77 for (j = 0; j < slist->nr_servers; j++)
78 if (slist->servers[j].server >= server)
79 break;
80 if (j < slist->nr_servers) {
81 if (slist->servers[j].server == server) {
82 afs_put_server(params->net, server);
83 continue;
84 }
85
86 memmove(slist->servers + j + 1,
87 slist->servers + j,
88 (slist->nr_servers - j) * sizeof(struct afs_server_entry));
89 }
90
91 slist->servers[j].server = server;
92 slist->nr_servers++;
93 }
94
95 if (slist->nr_servers == 0) {
96 ret = -EDESTADDRREQ;
97 goto error_2;
98 }
99
100 return volume;
101
102error_2:
103 afs_put_serverlist(params->net, slist);
104error_1:
105 kfree(volume);
106error_0:
107 return ERR_PTR(ret);
108}
22 109
23/* 110/*
24 * lookup a volume by name 111 * Look up a VLDB record for a volume.
25 * - this can be one of the following: 112 */
113static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
114 struct key *key,
115 const char *volname,
116 size_t volnamesz)
117{
118 struct afs_addr_cursor ac;
119 struct afs_vldb_entry *vldb;
120 int ret;
121
122 ret = afs_set_vl_cursor(&ac, cell);
123 if (ret < 0)
124 return ERR_PTR(ret);
125
126 while (afs_iterate_addresses(&ac)) {
127 if (!test_bit(ac.index, &ac.alist->probed)) {
128 ret = afs_vl_get_capabilities(cell->net, &ac, key);
129 switch (ret) {
130 case VL_SERVICE:
131 clear_bit(ac.index, &ac.alist->yfs);
132 set_bit(ac.index, &ac.alist->probed);
133 ac.addr->srx_service = ret;
134 break;
135 case YFS_VL_SERVICE:
136 set_bit(ac.index, &ac.alist->yfs);
137 set_bit(ac.index, &ac.alist->probed);
138 ac.addr->srx_service = ret;
139 break;
140 }
141 }
142
143 vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
144 volname, volnamesz);
145 switch (ac.error) {
146 case 0:
147 afs_end_cursor(&ac);
148 return vldb;
149 case -ECONNABORTED:
150 ac.error = afs_abort_to_error(ac.abort_code);
151 goto error;
152 case -ENOMEM:
153 case -ENONET:
154 goto error;
155 case -ENETUNREACH:
156 case -EHOSTUNREACH:
157 case -ECONNREFUSED:
158 break;
159 default:
160 ac.error = -EIO;
161 goto error;
162 }
163 }
164
165error:
166 return ERR_PTR(afs_end_cursor(&ac));
167}
168
169/*
170 * Look up a volume in the VL server and create a candidate volume record for
171 * it.
172 *
173 * The volume name can be one of the following:
26 * "%[cell:]volume[.]" R/W volume 174 * "%[cell:]volume[.]" R/W volume
27 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), 175 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
28 * or R/W (rwparent=1) volume 176 * or R/W (rwparent=1) volume
@@ -42,353 +190,218 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
42 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 190 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
43 * explicitly told otherwise 191 * explicitly told otherwise
44 */ 192 */
45struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) 193struct afs_volume *afs_create_volume(struct afs_mount_params *params)
46{ 194{
47 struct afs_vlocation *vlocation = NULL; 195 struct afs_vldb_entry *vldb;
48 struct afs_volume *volume = NULL; 196 struct afs_volume *volume;
49 struct afs_server *server = NULL; 197 unsigned long type_mask = 1UL << params->type;
50 char srvtmask;
51 int ret, loop;
52
53 _enter("{%*.*s,%d}",
54 params->volnamesz, params->volnamesz, params->volname, params->rwpath);
55
56 /* lookup the volume location record */
57 vlocation = afs_vlocation_lookup(params->cell, params->key,
58 params->volname, params->volnamesz);
59 if (IS_ERR(vlocation)) {
60 ret = PTR_ERR(vlocation);
61 vlocation = NULL;
62 goto error;
63 }
64 198
65 /* make the final decision on the type we want */ 199 vldb = afs_vl_lookup_vldb(params->cell, params->key,
66 ret = -ENOMEDIUM; 200 params->volname, params->volnamesz);
67 if (params->force && !(vlocation->vldb.vidmask & (1 << params->type))) 201 if (IS_ERR(vldb))
68 goto error; 202 return ERR_CAST(vldb);
69 203
70 srvtmask = 0; 204 if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
71 for (loop = 0; loop < vlocation->vldb.nservers; loop++) 205 volume = ERR_PTR(vldb->error);
72 srvtmask |= vlocation->vldb.srvtmask[loop]; 206 goto error;
207 }
73 208
209 /* Make the final decision on the type we want */
210 volume = ERR_PTR(-ENOMEDIUM);
74 if (params->force) { 211 if (params->force) {
75 if (!(srvtmask & (1 << params->type))) 212 if (!(vldb->flags & type_mask))
76 goto error; 213 goto error;
77 } else if (srvtmask & AFS_VOL_VTM_RO) { 214 } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
78 params->type = AFSVL_ROVOL; 215 params->type = AFSVL_ROVOL;
79 } else if (srvtmask & AFS_VOL_VTM_RW) { 216 } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
80 params->type = AFSVL_RWVOL; 217 params->type = AFSVL_RWVOL;
81 } else { 218 } else {
82 goto error; 219 goto error;
83 } 220 }
84 221
85 down_write(&params->cell->vl_sem); 222 type_mask = 1UL << params->type;
223 volume = afs_alloc_volume(params, vldb, type_mask);
86 224
87 /* is the volume already active? */ 225error:
88 if (vlocation->vols[params->type]) { 226 kfree(vldb);
89 /* yes - re-use it */ 227 return volume;
90 volume = vlocation->vols[params->type]; 228}
91 afs_get_volume(volume);
92 goto success;
93 }
94 229
95 /* create a new volume record */ 230/*
96 _debug("creating new volume record"); 231 * Destroy a volume record
232 */
233static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
234{
235 _enter("%p", volume);
97 236
98 ret = -ENOMEM; 237#ifdef CONFIG_AFS_FSCACHE
99 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); 238 ASSERTCMP(volume->cache, ==, NULL);
100 if (!volume) 239#endif
101 goto error_up;
102 240
103 atomic_set(&volume->usage, 1); 241 afs_put_serverlist(net, volume->servers);
104 volume->type = params->type; 242 afs_put_cell(net, volume->cell);
105 volume->type_force = params->force; 243 kfree(volume);
106 volume->cell = params->cell;
107 volume->vid = vlocation->vldb.vid[params->type];
108
109 init_rwsem(&volume->server_sem);
110
111 /* look up all the applicable server records */
112 for (loop = 0; loop < 8; loop++) {
113 if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
114 server = afs_lookup_server(
115 volume->cell, &vlocation->vldb.servers[loop]);
116 if (IS_ERR(server)) {
117 ret = PTR_ERR(server);
118 goto error_discard;
119 }
120 244
121 volume->servers[volume->nservers] = server; 245 _leave(" [destroyed]");
122 volume->nservers++; 246}
123 } 247
248/*
249 * Drop a reference on a volume record.
250 */
251void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
252{
253 if (volume) {
254 _enter("%s", volume->name);
255
256 if (atomic_dec_and_test(&volume->usage))
257 afs_destroy_volume(cell->net, volume);
124 } 258 }
259}
125 260
126 /* attach the cache and volume location */ 261/*
262 * Activate a volume.
263 */
264void afs_activate_volume(struct afs_volume *volume)
265{
127#ifdef CONFIG_AFS_FSCACHE 266#ifdef CONFIG_AFS_FSCACHE
128 volume->cache = fscache_acquire_cookie(vlocation->cache, 267 volume->cache = fscache_acquire_cookie(volume->cell->cache,
129 &afs_volume_cache_index_def, 268 &afs_volume_cache_index_def,
130 volume, true); 269 volume, true);
131#endif 270#endif
132 afs_get_vlocation(vlocation);
133 volume->vlocation = vlocation;
134
135 vlocation->vols[volume->type] = volume;
136
137success:
138 _debug("kAFS selected %s volume %08x",
139 afs_voltypes[volume->type], volume->vid);
140 up_write(&params->cell->vl_sem);
141 afs_put_vlocation(vlocation);
142 _leave(" = %p", volume);
143 return volume;
144
145 /* clean up */
146error_up:
147 up_write(&params->cell->vl_sem);
148error:
149 afs_put_vlocation(vlocation);
150 _leave(" = %d", ret);
151 return ERR_PTR(ret);
152
153error_discard:
154 up_write(&params->cell->vl_sem);
155
156 for (loop = volume->nservers - 1; loop >= 0; loop--)
157 afs_put_server(volume->servers[loop]);
158 271
159 kfree(volume); 272 write_lock(&volume->cell->proc_lock);
160 goto error; 273 list_add_tail(&volume->proc_link, &volume->cell->proc_volumes);
274 write_unlock(&volume->cell->proc_lock);
161} 275}
162 276
163/* 277/*
164 * destroy a volume record 278 * Deactivate a volume.
165 */ 279 */
166void afs_put_volume(struct afs_volume *volume) 280void afs_deactivate_volume(struct afs_volume *volume)
167{ 281{
168 struct afs_vlocation *vlocation; 282 _enter("%s", volume->name);
169 int loop;
170
171 if (!volume)
172 return;
173
174 _enter("%p", volume);
175 283
176 ASSERTCMP(atomic_read(&volume->usage), >, 0); 284 write_lock(&volume->cell->proc_lock);
285 list_del_init(&volume->proc_link);
286 write_unlock(&volume->cell->proc_lock);
177 287
178 vlocation = volume->vlocation;
179
180 /* to prevent a race, the decrement and the dequeue must be effectively
181 * atomic */
182 down_write(&vlocation->cell->vl_sem);
183
184 if (likely(!atomic_dec_and_test(&volume->usage))) {
185 up_write(&vlocation->cell->vl_sem);
186 _leave("");
187 return;
188 }
189
190 vlocation->vols[volume->type] = NULL;
191
192 up_write(&vlocation->cell->vl_sem);
193
194 /* finish cleaning up the volume */
195#ifdef CONFIG_AFS_FSCACHE 288#ifdef CONFIG_AFS_FSCACHE
196 fscache_relinquish_cookie(volume->cache, 0); 289 fscache_relinquish_cookie(volume->cache,
290 test_bit(AFS_VOLUME_DELETED, &volume->flags));
291 volume->cache = NULL;
197#endif 292#endif
198 afs_put_vlocation(vlocation);
199
200 for (loop = volume->nservers - 1; loop >= 0; loop--)
201 afs_put_server(volume->servers[loop]);
202
203 kfree(volume);
204 293
205 _leave(" [destroyed]"); 294 _leave("");
206} 295}
207 296
208/* 297/*
209 * pick a server to use to try accessing this volume 298 * Query the VL service to update the volume status.
210 * - returns with an elevated usage count on the server chosen
211 */ 299 */
212struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) 300static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
213{ 301{
214 struct afs_volume *volume = vnode->volume; 302 struct afs_server_list *new, *old, *discard;
215 struct afs_server *server; 303 struct afs_vldb_entry *vldb;
216 int ret, state, loop; 304 char idbuf[16];
305 int ret, idsz;
306
307 _enter("");
308
309 /* We look up an ID by passing it as a decimal string in the
310 * operation's name parameter.
311 */
312 idsz = sprintf(idbuf, "%u", volume->vid);
217 313
218 _enter("%s", volume->vlocation->vldb.name); 314 vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
315 if (IS_ERR(vldb)) {
316 ret = PTR_ERR(vldb);
317 goto error;
318 }
219 319
220 /* stick with the server we're already using if we can */ 320 /* See if the volume got renamed. */
221 if (vnode->server && vnode->server->fs_state == 0) { 321 if (vldb->name_len != volume->name_len ||
222 afs_get_server(vnode->server); 322 memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
223 _leave(" = %p [current]", vnode->server); 323 /* TODO: Use RCU'd string. */
224 return vnode->server; 324 memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
325 volume->name_len = vldb->name_len;
225 } 326 }
226 327
227 down_read(&volume->server_sem); 328 /* See if the volume's server list got updated. */
329 new = afs_alloc_server_list(volume->cell, key,
330 vldb, (1 << volume->type));
331 if (IS_ERR(new)) {
332 ret = PTR_ERR(new);
333 goto error_vldb;
334 }
228 335
229 /* handle the no-server case */ 336 write_lock(&volume->servers_lock);
230 if (volume->nservers == 0) { 337
231 ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; 338 discard = new;
232 up_read(&volume->server_sem); 339 old = volume->servers;
233 _leave(" = %d [no servers]", ret); 340 if (afs_annotate_server_list(new, old)) {
234 return ERR_PTR(ret); 341 new->seq = volume->servers_seq + 1;
342 volume->servers = new;
343 smp_wmb();
344 volume->servers_seq++;
345 discard = old;
235 } 346 }
236 347
237 /* basically, just search the list for the first live server and use 348 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
238 * that */ 349 clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
350 write_unlock(&volume->servers_lock);
239 ret = 0; 351 ret = 0;
240 for (loop = 0; loop < volume->nservers; loop++) {
241 server = volume->servers[loop];
242 state = server->fs_state;
243 352
244 _debug("consider %d [%d]", loop, state); 353 afs_put_serverlist(volume->cell->net, discard);
354error_vldb:
355 kfree(vldb);
356error:
357 _leave(" = %d", ret);
358 return ret;
359}
245 360
246 switch (state) { 361/*
247 /* found an apparently healthy server */ 362 * Make sure the volume record is up to date.
248 case 0: 363 */
249 afs_get_server(server); 364int afs_check_volume_status(struct afs_volume *volume, struct key *key)
250 up_read(&volume->server_sem); 365{
251 _leave(" = %p (picked %08x)", 366 time64_t now = ktime_get_real_seconds();
252 server, ntohl(server->addr.s_addr)); 367 int ret, retries = 0;
253 return server;
254 368
255 case -ENETUNREACH: 369 _enter("");
256 if (ret == 0)
257 ret = state;
258 break;
259 370
260 case -EHOSTUNREACH: 371 if (volume->update_at <= now)
261 if (ret == 0 || 372 set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
262 ret == -ENETUNREACH)
263 ret = state;
264 break;
265 373
266 case -ECONNREFUSED: 374retry:
267 if (ret == 0 || 375 if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) &&
268 ret == -ENETUNREACH || 376 !test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
269 ret == -EHOSTUNREACH) 377 _leave(" = 0");
270 ret = state; 378 return 0;
271 break; 379 }
272 380
273 default: 381 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
274 case -EREMOTEIO: 382 ret = afs_update_volume_status(volume, key);
275 if (ret == 0 || 383 clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
276 ret == -ENETUNREACH || 384 clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
277 ret == -EHOSTUNREACH || 385 wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
278 ret == -ECONNREFUSED) 386 _leave(" = %d", ret);
279 ret = state; 387 return ret;
280 break;
281 }
282 } 388 }
283 389
284 /* no available servers 390 if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
285 * - TODO: handle the no active servers case better 391 _leave(" = 0 [no wait]");
286 */ 392 return 0;
287 up_read(&volume->server_sem); 393 }
288 _leave(" = %d", ret);
289 return ERR_PTR(ret);
290}
291 394
292/* 395 ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE);
293 * release a server after use 396 if (ret == -ERESTARTSYS) {
294 * - releases the ref on the server struct that was acquired by picking 397 _leave(" = %d", ret);
295 * - records result of using a particular server to access a volume 398 return ret;
296 * - return 0 to try again, 1 if okay or to issue error
297 * - the caller must release the server struct if result was 0
298 */
299int afs_volume_release_fileserver(struct afs_vnode *vnode,
300 struct afs_server *server,
301 int result)
302{
303 struct afs_volume *volume = vnode->volume;
304 unsigned loop;
305
306 _enter("%s,%08x,%d",
307 volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
308 result);
309
310 switch (result) {
311 /* success */
312 case 0:
313 server->fs_act_jif = jiffies;
314 server->fs_state = 0;
315 _leave("");
316 return 1;
317
318 /* the fileserver denied all knowledge of the volume */
319 case -ENOMEDIUM:
320 server->fs_act_jif = jiffies;
321 down_write(&volume->server_sem);
322
323 /* firstly, find where the server is in the active list (if it
324 * is) */
325 for (loop = 0; loop < volume->nservers; loop++)
326 if (volume->servers[loop] == server)
327 goto present;
328
329 /* no longer there - may have been discarded by another op */
330 goto try_next_server_upw;
331
332 present:
333 volume->nservers--;
334 memmove(&volume->servers[loop],
335 &volume->servers[loop + 1],
336 sizeof(volume->servers[loop]) *
337 (volume->nservers - loop));
338 volume->servers[volume->nservers] = NULL;
339 afs_put_server(server);
340 volume->rjservers++;
341
342 if (volume->nservers > 0)
343 /* another server might acknowledge its existence */
344 goto try_next_server_upw;
345
346 /* handle the case where all the fileservers have rejected the
347 * volume
348 * - TODO: try asking the fileservers for volume information
349 * - TODO: contact the VL server again to see if the volume is
350 * no longer registered
351 */
352 up_write(&volume->server_sem);
353 afs_put_server(server);
354 _leave(" [completely rejected]");
355 return 1;
356
357 /* problem reaching the server */
358 case -ENETUNREACH:
359 case -EHOSTUNREACH:
360 case -ECONNREFUSED:
361 case -ETIME:
362 case -ETIMEDOUT:
363 case -EREMOTEIO:
364 /* mark the server as dead
365 * TODO: vary dead timeout depending on error
366 */
367 spin_lock(&server->fs_lock);
368 if (!server->fs_state) {
369 server->fs_dead_jif = jiffies + HZ * 10;
370 server->fs_state = result;
371 printk("kAFS: SERVER DEAD state=%d\n", result);
372 }
373 spin_unlock(&server->fs_lock);
374 goto try_next_server;
375
376 /* miscellaneous error */
377 default:
378 server->fs_act_jif = jiffies;
379 case -ENOMEM:
380 case -ENONET:
381 /* tell the caller to accept the result */
382 afs_put_server(server);
383 _leave(" [local failure]");
384 return 1;
385 } 399 }
386 400
387 /* tell the caller to loop around and try the next server */ 401 retries++;
388try_next_server_upw: 402 if (retries == 4) {
389 up_write(&volume->server_sem); 403 _leave(" = -ESTALE");
390try_next_server: 404 return -ESTALE;
391 afs_put_server(server); 405 }
392 _leave(" [try next server]"); 406 goto retry;
393 return 0;
394} 407}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 11dd0526b96b..18e46e31523c 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -8,6 +8,7 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11
11#include <linux/backing-dev.h> 12#include <linux/backing-dev.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
@@ -16,9 +17,6 @@
16#include <linux/pagevec.h> 17#include <linux/pagevec.h>
17#include "internal.h" 18#include "internal.h"
18 19
19static int afs_write_back_from_locked_page(struct afs_writeback *wb,
20 struct page *page);
21
22/* 20/*
23 * mark a page as having been made dirty and thus needing writeback 21 * mark a page as having been made dirty and thus needing writeback
24 */ 22 */
@@ -29,58 +27,6 @@ int afs_set_page_dirty(struct page *page)
29} 27}
30 28
31/* 29/*
32 * unlink a writeback record because its usage has reached zero
33 * - must be called with the wb->vnode->writeback_lock held
34 */
35static void afs_unlink_writeback(struct afs_writeback *wb)
36{
37 struct afs_writeback *front;
38 struct afs_vnode *vnode = wb->vnode;
39
40 list_del_init(&wb->link);
41 if (!list_empty(&vnode->writebacks)) {
42 /* if an fsync rises to the front of the queue then wake it
43 * up */
44 front = list_entry(vnode->writebacks.next,
45 struct afs_writeback, link);
46 if (front->state == AFS_WBACK_SYNCING) {
47 _debug("wake up sync");
48 front->state = AFS_WBACK_COMPLETE;
49 wake_up(&front->waitq);
50 }
51 }
52}
53
54/*
55 * free a writeback record
56 */
57static void afs_free_writeback(struct afs_writeback *wb)
58{
59 _enter("");
60 key_put(wb->key);
61 kfree(wb);
62}
63
64/*
65 * dispose of a reference to a writeback record
66 */
67void afs_put_writeback(struct afs_writeback *wb)
68{
69 struct afs_vnode *vnode = wb->vnode;
70
71 _enter("{%d}", wb->usage);
72
73 spin_lock(&vnode->writeback_lock);
74 if (--wb->usage == 0)
75 afs_unlink_writeback(wb);
76 else
77 wb = NULL;
78 spin_unlock(&vnode->writeback_lock);
79 if (wb)
80 afs_free_writeback(wb);
81}
82
83/*
84 * partly or wholly fill a page that's under preparation for writing 30 * partly or wholly fill a page that's under preparation for writing
85 */ 31 */
86static int afs_fill_page(struct afs_vnode *vnode, struct key *key, 32static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
@@ -103,7 +49,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
103 req->pages[0] = page; 49 req->pages[0] = page;
104 get_page(page); 50 get_page(page);
105 51
106 ret = afs_vnode_fetch_data(vnode, key, req); 52 ret = afs_fetch_data(vnode, key, req);
107 afs_put_read(req); 53 afs_put_read(req);
108 if (ret < 0) { 54 if (ret < 0) {
109 if (ret == -ENOENT) { 55 if (ret == -ENOENT) {
@@ -125,42 +71,32 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
125 loff_t pos, unsigned len, unsigned flags, 71 loff_t pos, unsigned len, unsigned flags,
126 struct page **pagep, void **fsdata) 72 struct page **pagep, void **fsdata)
127{ 73{
128 struct afs_writeback *candidate, *wb;
129 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 74 struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
130 struct page *page; 75 struct page *page;
131 struct key *key = file->private_data; 76 struct key *key = afs_file_key(file);
132 unsigned from = pos & (PAGE_SIZE - 1); 77 unsigned long priv;
133 unsigned to = from + len; 78 unsigned f, from = pos & (PAGE_SIZE - 1);
79 unsigned t, to = from + len;
134 pgoff_t index = pos >> PAGE_SHIFT; 80 pgoff_t index = pos >> PAGE_SHIFT;
135 int ret; 81 int ret;
136 82
137 _enter("{%x:%u},{%lx},%u,%u", 83 _enter("{%x:%u},{%lx},%u,%u",
138 vnode->fid.vid, vnode->fid.vnode, index, from, to); 84 vnode->fid.vid, vnode->fid.vnode, index, from, to);
139 85
140 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); 86 /* We want to store information about how much of a page is altered in
141 if (!candidate) 87 * page->private.
142 return -ENOMEM; 88 */
143 candidate->vnode = vnode; 89 BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8);
144 candidate->first = candidate->last = index;
145 candidate->offset_first = from;
146 candidate->to_last = to;
147 INIT_LIST_HEAD(&candidate->link);
148 candidate->usage = 1;
149 candidate->state = AFS_WBACK_PENDING;
150 init_waitqueue_head(&candidate->waitq);
151 90
152 page = grab_cache_page_write_begin(mapping, index, flags); 91 page = grab_cache_page_write_begin(mapping, index, flags);
153 if (!page) { 92 if (!page)
154 kfree(candidate);
155 return -ENOMEM; 93 return -ENOMEM;
156 }
157 94
158 if (!PageUptodate(page) && len != PAGE_SIZE) { 95 if (!PageUptodate(page) && len != PAGE_SIZE) {
159 ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); 96 ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
160 if (ret < 0) { 97 if (ret < 0) {
161 unlock_page(page); 98 unlock_page(page);
162 put_page(page); 99 put_page(page);
163 kfree(candidate);
164 _leave(" = %d [prep]", ret); 100 _leave(" = %d [prep]", ret);
165 return ret; 101 return ret;
166 } 102 }
@@ -171,79 +107,54 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
171 *pagep = page; 107 *pagep = page;
172 108
173try_again: 109try_again:
174 spin_lock(&vnode->writeback_lock); 110 /* See if this page is already partially written in a way that we can
175 111 * merge the new write with.
176 /* see if this page is already pending a writeback under a suitable key 112 */
177 * - if so we can just join onto that one */ 113 t = f = 0;
178 wb = (struct afs_writeback *) page_private(page); 114 if (PagePrivate(page)) {
179 if (wb) { 115 priv = page_private(page);
180 if (wb->key == key && wb->state == AFS_WBACK_PENDING) 116 f = priv & AFS_PRIV_MAX;
181 goto subsume_in_current_wb; 117 t = priv >> AFS_PRIV_SHIFT;
182 goto flush_conflicting_wb; 118 ASSERTCMP(f, <=, t);
183 } 119 }
184 120
185 if (index > 0) { 121 if (f != t) {
186 /* see if we can find an already pending writeback that we can 122 if (to < f || from > t)
187 * append this page to */ 123 goto flush_conflicting_write;
188 list_for_each_entry(wb, &vnode->writebacks, link) { 124 if (from < f)
189 if (wb->last == index - 1 && wb->key == key && 125 f = from;
190 wb->state == AFS_WBACK_PENDING) 126 if (to > t)
191 goto append_to_previous_wb; 127 t = to;
192 } 128 } else {
129 f = from;
130 t = to;
193 } 131 }
194 132
195 list_add_tail(&candidate->link, &vnode->writebacks); 133 priv = (unsigned long)t << AFS_PRIV_SHIFT;
196 candidate->key = key_get(key); 134 priv |= f;
197 spin_unlock(&vnode->writeback_lock); 135 trace_afs_page_dirty(vnode, tracepoint_string("begin"),
198 SetPagePrivate(page); 136 page->index, priv);
199 set_page_private(page, (unsigned long) candidate);
200 _leave(" = 0 [new]");
201 return 0;
202
203subsume_in_current_wb:
204 _debug("subsume");
205 ASSERTRANGE(wb->first, <=, index, <=, wb->last);
206 if (index == wb->first && from < wb->offset_first)
207 wb->offset_first = from;
208 if (index == wb->last && to > wb->to_last)
209 wb->to_last = to;
210 spin_unlock(&vnode->writeback_lock);
211 kfree(candidate);
212 _leave(" = 0 [sub]");
213 return 0;
214
215append_to_previous_wb:
216 _debug("append into %lx-%lx", wb->first, wb->last);
217 wb->usage++;
218 wb->last++;
219 wb->to_last = to;
220 spin_unlock(&vnode->writeback_lock);
221 SetPagePrivate(page); 137 SetPagePrivate(page);
222 set_page_private(page, (unsigned long) wb); 138 set_page_private(page, priv);
223 kfree(candidate); 139 _leave(" = 0");
224 _leave(" = 0 [app]");
225 return 0; 140 return 0;
226 141
227 /* the page is currently bound to another context, so if it's dirty we 142 /* The previous write and this write aren't adjacent or overlapping, so
228 * need to flush it before we can use the new context */ 143 * flush the page out.
229flush_conflicting_wb: 144 */
145flush_conflicting_write:
230 _debug("flush conflict"); 146 _debug("flush conflict");
231 if (wb->state == AFS_WBACK_PENDING) 147 ret = write_one_page(page);
232 wb->state = AFS_WBACK_CONFLICTING; 148 if (ret < 0) {
233 spin_unlock(&vnode->writeback_lock); 149 _leave(" = %d", ret);
234 if (clear_page_dirty_for_io(page)) { 150 return ret;
235 ret = afs_write_back_from_locked_page(wb, page);
236 if (ret < 0) {
237 afs_put_writeback(candidate);
238 _leave(" = %d", ret);
239 return ret;
240 }
241 } 151 }
242 152
243 /* the page holds a ref on the writeback record */ 153 ret = lock_page_killable(page);
244 afs_put_writeback(wb); 154 if (ret < 0) {
245 set_page_private(page, 0); 155 _leave(" = %d", ret);
246 ClearPagePrivate(page); 156 return ret;
157 }
247 goto try_again; 158 goto try_again;
248} 159}
249 160
@@ -255,7 +166,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
255 struct page *page, void *fsdata) 166 struct page *page, void *fsdata)
256{ 167{
257 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 168 struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
258 struct key *key = file->private_data; 169 struct key *key = afs_file_key(file);
259 loff_t i_size, maybe_i_size; 170 loff_t i_size, maybe_i_size;
260 int ret; 171 int ret;
261 172
@@ -266,11 +177,11 @@ int afs_write_end(struct file *file, struct address_space *mapping,
266 177
267 i_size = i_size_read(&vnode->vfs_inode); 178 i_size = i_size_read(&vnode->vfs_inode);
268 if (maybe_i_size > i_size) { 179 if (maybe_i_size > i_size) {
269 spin_lock(&vnode->writeback_lock); 180 spin_lock(&vnode->wb_lock);
270 i_size = i_size_read(&vnode->vfs_inode); 181 i_size = i_size_read(&vnode->vfs_inode);
271 if (maybe_i_size > i_size) 182 if (maybe_i_size > i_size)
272 i_size_write(&vnode->vfs_inode, maybe_i_size); 183 i_size_write(&vnode->vfs_inode, maybe_i_size);
273 spin_unlock(&vnode->writeback_lock); 184 spin_unlock(&vnode->wb_lock);
274 } 185 }
275 186
276 if (!PageUptodate(page)) { 187 if (!PageUptodate(page)) {
@@ -299,9 +210,10 @@ int afs_write_end(struct file *file, struct address_space *mapping,
299/* 210/*
300 * kill all the pages in the given range 211 * kill all the pages in the given range
301 */ 212 */
302static void afs_kill_pages(struct afs_vnode *vnode, bool error, 213static void afs_kill_pages(struct address_space *mapping,
303 pgoff_t first, pgoff_t last) 214 pgoff_t first, pgoff_t last)
304{ 215{
216 struct afs_vnode *vnode = AFS_FS_I(mapping->host);
305 struct pagevec pv; 217 struct pagevec pv;
306 unsigned count, loop; 218 unsigned count, loop;
307 219
@@ -316,37 +228,157 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
316 count = last - first + 1; 228 count = last - first + 1;
317 if (count > PAGEVEC_SIZE) 229 if (count > PAGEVEC_SIZE)
318 count = PAGEVEC_SIZE; 230 count = PAGEVEC_SIZE;
319 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, 231 pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
320 first, count, pv.pages);
321 ASSERTCMP(pv.nr, ==, count); 232 ASSERTCMP(pv.nr, ==, count);
322 233
323 for (loop = 0; loop < count; loop++) { 234 for (loop = 0; loop < count; loop++) {
324 struct page *page = pv.pages[loop]; 235 struct page *page = pv.pages[loop];
325 ClearPageUptodate(page); 236 ClearPageUptodate(page);
326 if (error) 237 SetPageError(page);
327 SetPageError(page); 238 end_page_writeback(page);
328 if (PageWriteback(page))
329 end_page_writeback(page);
330 if (page->index >= first) 239 if (page->index >= first)
331 first = page->index + 1; 240 first = page->index + 1;
241 lock_page(page);
242 generic_error_remove_page(mapping, page);
332 } 243 }
333 244
334 __pagevec_release(&pv); 245 __pagevec_release(&pv);
335 } while (first < last); 246 } while (first <= last);
336 247
337 _leave(""); 248 _leave("");
338} 249}
339 250
340/* 251/*
341 * synchronously write back the locked page and any subsequent non-locked dirty 252 * Redirty all the pages in a given range.
342 * pages also covered by the same writeback record 253 */
254static void afs_redirty_pages(struct writeback_control *wbc,
255 struct address_space *mapping,
256 pgoff_t first, pgoff_t last)
257{
258 struct afs_vnode *vnode = AFS_FS_I(mapping->host);
259 struct pagevec pv;
260 unsigned count, loop;
261
262 _enter("{%x:%u},%lx-%lx",
263 vnode->fid.vid, vnode->fid.vnode, first, last);
264
265 pagevec_init(&pv);
266
267 do {
268 _debug("redirty %lx-%lx", first, last);
269
270 count = last - first + 1;
271 if (count > PAGEVEC_SIZE)
272 count = PAGEVEC_SIZE;
273 pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
274 ASSERTCMP(pv.nr, ==, count);
275
276 for (loop = 0; loop < count; loop++) {
277 struct page *page = pv.pages[loop];
278
279 redirty_page_for_writepage(wbc, page);
280 end_page_writeback(page);
281 if (page->index >= first)
282 first = page->index + 1;
283 }
284
285 __pagevec_release(&pv);
286 } while (first <= last);
287
288 _leave("");
289}
290
291/*
292 * write to a file
293 */
294static int afs_store_data(struct address_space *mapping,
295 pgoff_t first, pgoff_t last,
296 unsigned offset, unsigned to)
297{
298 struct afs_vnode *vnode = AFS_FS_I(mapping->host);
299 struct afs_fs_cursor fc;
300 struct afs_wb_key *wbk = NULL;
301 struct list_head *p;
302 int ret = -ENOKEY, ret2;
303
304 _enter("%s{%x:%u.%u},%lx,%lx,%x,%x",
305 vnode->volume->name,
306 vnode->fid.vid,
307 vnode->fid.vnode,
308 vnode->fid.unique,
309 first, last, offset, to);
310
311 spin_lock(&vnode->wb_lock);
312 p = vnode->wb_keys.next;
313
314 /* Iterate through the list looking for a valid key to use. */
315try_next_key:
316 while (p != &vnode->wb_keys) {
317 wbk = list_entry(p, struct afs_wb_key, vnode_link);
318 _debug("wbk %u", key_serial(wbk->key));
319 ret2 = key_validate(wbk->key);
320 if (ret2 == 0)
321 goto found_key;
322 if (ret == -ENOKEY)
323 ret = ret2;
324 p = p->next;
325 }
326
327 spin_unlock(&vnode->wb_lock);
328 afs_put_wb_key(wbk);
329 _leave(" = %d [no keys]", ret);
330 return ret;
331
332found_key:
333 refcount_inc(&wbk->usage);
334 spin_unlock(&vnode->wb_lock);
335
336 _debug("USE WB KEY %u", key_serial(wbk->key));
337
338 ret = -ERESTARTSYS;
339 if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
340 while (afs_select_fileserver(&fc)) {
341 fc.cb_break = vnode->cb_break + vnode->cb_s_break;
342 afs_fs_store_data(&fc, mapping, first, last, offset, to);
343 }
344
345 afs_check_for_remote_deletion(&fc, fc.vnode);
346 afs_vnode_commit_status(&fc, vnode, fc.cb_break);
347 ret = afs_end_vnode_operation(&fc);
348 }
349
350 switch (ret) {
351 case -EACCES:
352 case -EPERM:
353 case -ENOKEY:
354 case -EKEYEXPIRED:
355 case -EKEYREJECTED:
356 case -EKEYREVOKED:
357 _debug("next");
358 spin_lock(&vnode->wb_lock);
359 p = wbk->vnode_link.next;
360 afs_put_wb_key(wbk);
361 goto try_next_key;
362 }
363
364 afs_put_wb_key(wbk);
365 _leave(" = %d", ret);
366 return ret;
367}
368
369/*
370 * Synchronously write back the locked page and any subsequent non-locked dirty
371 * pages.
343 */ 372 */
344static int afs_write_back_from_locked_page(struct afs_writeback *wb, 373static int afs_write_back_from_locked_page(struct address_space *mapping,
345 struct page *primary_page) 374 struct writeback_control *wbc,
375 struct page *primary_page,
376 pgoff_t final_page)
346{ 377{
378 struct afs_vnode *vnode = AFS_FS_I(mapping->host);
347 struct page *pages[8], *page; 379 struct page *pages[8], *page;
348 unsigned long count; 380 unsigned long count, priv;
349 unsigned n, offset, to; 381 unsigned n, offset, to, f, t;
350 pgoff_t start, first, last; 382 pgoff_t start, first, last;
351 int loop, ret; 383 int loop, ret;
352 384
@@ -356,20 +388,33 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
356 if (test_set_page_writeback(primary_page)) 388 if (test_set_page_writeback(primary_page))
357 BUG(); 389 BUG();
358 390
359 /* find all consecutive lockable dirty pages, stopping when we find a 391 /* Find all consecutive lockable dirty pages that have contiguous
360 * page that is not immediately lockable, is not dirty or is missing, 392 * written regions, stopping when we find a page that is not
361 * or we reach the end of the range */ 393 * immediately lockable, is not dirty or is missing, or we reach the
394 * end of the range.
395 */
362 start = primary_page->index; 396 start = primary_page->index;
363 if (start >= wb->last) 397 priv = page_private(primary_page);
398 offset = priv & AFS_PRIV_MAX;
399 to = priv >> AFS_PRIV_SHIFT;
400 trace_afs_page_dirty(vnode, tracepoint_string("store"),
401 primary_page->index, priv);
402
403 WARN_ON(offset == to);
404 if (offset == to)
405 trace_afs_page_dirty(vnode, tracepoint_string("WARN"),
406 primary_page->index, priv);
407
408 if (start >= final_page || to < PAGE_SIZE)
364 goto no_more; 409 goto no_more;
410
365 start++; 411 start++;
366 do { 412 do {
367 _debug("more %lx [%lx]", start, count); 413 _debug("more %lx [%lx]", start, count);
368 n = wb->last - start + 1; 414 n = final_page - start + 1;
369 if (n > ARRAY_SIZE(pages)) 415 if (n > ARRAY_SIZE(pages))
370 n = ARRAY_SIZE(pages); 416 n = ARRAY_SIZE(pages);
371 n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, 417 n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages);
372 start, n, pages);
373 _debug("fgpc %u", n); 418 _debug("fgpc %u", n);
374 if (n == 0) 419 if (n == 0)
375 goto no_more; 420 goto no_more;
@@ -381,16 +426,30 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
381 } 426 }
382 427
383 for (loop = 0; loop < n; loop++) { 428 for (loop = 0; loop < n; loop++) {
429 if (to != PAGE_SIZE)
430 break;
384 page = pages[loop]; 431 page = pages[loop];
385 if (page->index > wb->last) 432 if (page->index > final_page)
386 break; 433 break;
387 if (!trylock_page(page)) 434 if (!trylock_page(page))
388 break; 435 break;
389 if (!PageDirty(page) || 436 if (!PageDirty(page) || PageWriteback(page)) {
390 page_private(page) != (unsigned long) wb) {
391 unlock_page(page); 437 unlock_page(page);
392 break; 438 break;
393 } 439 }
440
441 priv = page_private(page);
442 f = priv & AFS_PRIV_MAX;
443 t = priv >> AFS_PRIV_SHIFT;
444 if (f != 0) {
445 unlock_page(page);
446 break;
447 }
448 to = t;
449
450 trace_afs_page_dirty(vnode, tracepoint_string("store+"),
451 page->index, priv);
452
394 if (!clear_page_dirty_for_io(page)) 453 if (!clear_page_dirty_for_io(page))
395 BUG(); 454 BUG();
396 if (test_set_page_writeback(page)) 455 if (test_set_page_writeback(page))
@@ -406,50 +465,55 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
406 } 465 }
407 466
408 start += loop; 467 start += loop;
409 } while (start <= wb->last && count < 65536); 468 } while (start <= final_page && count < 65536);
410 469
411no_more: 470no_more:
412 /* we now have a contiguous set of dirty pages, each with writeback set 471 /* We now have a contiguous set of dirty pages, each with writeback
413 * and the dirty mark cleared; the first page is locked and must remain 472 * set; the first page is still locked at this point, but all the rest
414 * so, all the rest are unlocked */ 473 * have been unlocked.
474 */
475 unlock_page(primary_page);
476
415 first = primary_page->index; 477 first = primary_page->index;
416 last = first + count - 1; 478 last = first + count - 1;
417 479
418 offset = (first == wb->first) ? wb->offset_first : 0;
419 to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
420
421 _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); 480 _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
422 481
423 ret = afs_vnode_store_data(wb, first, last, offset, to); 482 ret = afs_store_data(mapping, first, last, offset, to);
424 if (ret < 0) { 483 switch (ret) {
425 switch (ret) { 484 case 0:
426 case -EDQUOT:
427 case -ENOSPC:
428 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC);
429 break;
430 case -EROFS:
431 case -EIO:
432 case -EREMOTEIO:
433 case -EFBIG:
434 case -ENOENT:
435 case -ENOMEDIUM:
436 case -ENXIO:
437 afs_kill_pages(wb->vnode, true, first, last);
438 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO);
439 break;
440 case -EACCES:
441 case -EPERM:
442 case -ENOKEY:
443 case -EKEYEXPIRED:
444 case -EKEYREJECTED:
445 case -EKEYREVOKED:
446 afs_kill_pages(wb->vnode, false, first, last);
447 break;
448 default:
449 break;
450 }
451 } else {
452 ret = count; 485 ret = count;
486 break;
487
488 default:
489 pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
490 /* Fall through */
491 case -EACCES:
492 case -EPERM:
493 case -ENOKEY:
494 case -EKEYEXPIRED:
495 case -EKEYREJECTED:
496 case -EKEYREVOKED:
497 afs_redirty_pages(wbc, mapping, first, last);
498 mapping_set_error(mapping, ret);
499 break;
500
501 case -EDQUOT:
502 case -ENOSPC:
503 afs_redirty_pages(wbc, mapping, first, last);
504 mapping_set_error(mapping, -ENOSPC);
505 break;
506
507 case -EROFS:
508 case -EIO:
509 case -EREMOTEIO:
510 case -EFBIG:
511 case -ENOENT:
512 case -ENOMEDIUM:
513 case -ENXIO:
514 afs_kill_pages(mapping, first, last);
515 mapping_set_error(mapping, ret);
516 break;
453 } 517 }
454 518
455 _leave(" = %d", ret); 519 _leave(" = %d", ret);
@@ -462,16 +526,12 @@ no_more:
462 */ 526 */
463int afs_writepage(struct page *page, struct writeback_control *wbc) 527int afs_writepage(struct page *page, struct writeback_control *wbc)
464{ 528{
465 struct afs_writeback *wb;
466 int ret; 529 int ret;
467 530
468 _enter("{%lx},", page->index); 531 _enter("{%lx},", page->index);
469 532
470 wb = (struct afs_writeback *) page_private(page); 533 ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
471 ASSERT(wb != NULL); 534 wbc->range_end >> PAGE_SHIFT);
472
473 ret = afs_write_back_from_locked_page(wb, page);
474 unlock_page(page);
475 if (ret < 0) { 535 if (ret < 0) {
476 _leave(" = %d", ret); 536 _leave(" = %d", ret);
477 return 0; 537 return 0;
@@ -490,7 +550,6 @@ static int afs_writepages_region(struct address_space *mapping,
490 struct writeback_control *wbc, 550 struct writeback_control *wbc,
491 pgoff_t index, pgoff_t end, pgoff_t *_next) 551 pgoff_t index, pgoff_t end, pgoff_t *_next)
492{ 552{
493 struct afs_writeback *wb;
494 struct page *page; 553 struct page *page;
495 int ret, n; 554 int ret, n;
496 555
@@ -509,7 +568,12 @@ static int afs_writepages_region(struct address_space *mapping,
509 * (changing page->mapping to NULL), or even swizzled back from 568 * (changing page->mapping to NULL), or even swizzled back from
510 * swapper_space to tmpfs file mapping 569 * swapper_space to tmpfs file mapping
511 */ 570 */
512 lock_page(page); 571 ret = lock_page_killable(page);
572 if (ret < 0) {
573 put_page(page);
574 _leave(" = %d", ret);
575 return ret;
576 }
513 577
514 if (page->mapping != mapping || !PageDirty(page)) { 578 if (page->mapping != mapping || !PageDirty(page)) {
515 unlock_page(page); 579 unlock_page(page);
@@ -525,17 +589,9 @@ static int afs_writepages_region(struct address_space *mapping,
525 continue; 589 continue;
526 } 590 }
527 591
528 wb = (struct afs_writeback *) page_private(page);
529 ASSERT(wb != NULL);
530
531 spin_lock(&wb->vnode->writeback_lock);
532 wb->state = AFS_WBACK_WRITING;
533 spin_unlock(&wb->vnode->writeback_lock);
534
535 if (!clear_page_dirty_for_io(page)) 592 if (!clear_page_dirty_for_io(page))
536 BUG(); 593 BUG();
537 ret = afs_write_back_from_locked_page(wb, page); 594 ret = afs_write_back_from_locked_page(mapping, wbc, page, end);
538 unlock_page(page);
539 put_page(page); 595 put_page(page);
540 if (ret < 0) { 596 if (ret < 0) {
541 _leave(" = %d", ret); 597 _leave(" = %d", ret);
@@ -591,17 +647,14 @@ int afs_writepages(struct address_space *mapping,
591 */ 647 */
592void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) 648void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
593{ 649{
594 struct afs_writeback *wb = call->wb;
595 struct pagevec pv; 650 struct pagevec pv;
651 unsigned long priv;
596 unsigned count, loop; 652 unsigned count, loop;
597 pgoff_t first = call->first, last = call->last; 653 pgoff_t first = call->first, last = call->last;
598 bool free_wb;
599 654
600 _enter("{%x:%u},{%lx-%lx}", 655 _enter("{%x:%u},{%lx-%lx}",
601 vnode->fid.vid, vnode->fid.vnode, first, last); 656 vnode->fid.vid, vnode->fid.vnode, first, last);
602 657
603 ASSERT(wb != NULL);
604
605 pagevec_init(&pv); 658 pagevec_init(&pv);
606 659
607 do { 660 do {
@@ -610,35 +663,22 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
610 count = last - first + 1; 663 count = last - first + 1;
611 if (count > PAGEVEC_SIZE) 664 if (count > PAGEVEC_SIZE)
612 count = PAGEVEC_SIZE; 665 count = PAGEVEC_SIZE;
613 pv.nr = find_get_pages_contig(call->mapping, first, count, 666 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
614 pv.pages); 667 first, count, pv.pages);
615 ASSERTCMP(pv.nr, ==, count); 668 ASSERTCMP(pv.nr, ==, count);
616 669
617 spin_lock(&vnode->writeback_lock);
618 for (loop = 0; loop < count; loop++) { 670 for (loop = 0; loop < count; loop++) {
619 struct page *page = pv.pages[loop]; 671 priv = page_private(pv.pages[loop]);
620 end_page_writeback(page); 672 trace_afs_page_dirty(vnode, tracepoint_string("clear"),
621 if (page_private(page) == (unsigned long) wb) { 673 pv.pages[loop]->index, priv);
622 set_page_private(page, 0); 674 set_page_private(pv.pages[loop], 0);
623 ClearPagePrivate(page); 675 end_page_writeback(pv.pages[loop]);
624 wb->usage--;
625 }
626 }
627 free_wb = false;
628 if (wb->usage == 0) {
629 afs_unlink_writeback(wb);
630 free_wb = true;
631 } 676 }
632 spin_unlock(&vnode->writeback_lock);
633 first += count; 677 first += count;
634 if (free_wb) {
635 afs_free_writeback(wb);
636 wb = NULL;
637 }
638
639 __pagevec_release(&pv); 678 __pagevec_release(&pv);
640 } while (first <= last); 679 } while (first <= last);
641 680
681 afs_prune_wb_keys(vnode);
642 _leave(""); 682 _leave("");
643} 683}
644 684
@@ -670,28 +710,6 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
670} 710}
671 711
672/* 712/*
673 * flush the vnode to the fileserver
674 */
675int afs_writeback_all(struct afs_vnode *vnode)
676{
677 struct address_space *mapping = vnode->vfs_inode.i_mapping;
678 struct writeback_control wbc = {
679 .sync_mode = WB_SYNC_ALL,
680 .nr_to_write = LONG_MAX,
681 .range_cyclic = 1,
682 };
683 int ret;
684
685 _enter("");
686
687 ret = mapping->a_ops->writepages(mapping, &wbc);
688 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
689
690 _leave(" = %d", ret);
691 return ret;
692}
693
694/*
695 * flush any dirty pages for this process, and check for write errors. 713 * flush any dirty pages for this process, and check for write errors.
696 * - the return status from this call provides a reliable indication of 714 * - the return status from this call provides a reliable indication of
697 * whether any write errors occurred for this process. 715 * whether any write errors occurred for this process.
@@ -699,61 +717,13 @@ int afs_writeback_all(struct afs_vnode *vnode)
699int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 717int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
700{ 718{
701 struct inode *inode = file_inode(file); 719 struct inode *inode = file_inode(file);
702 struct afs_writeback *wb, *xwb;
703 struct afs_vnode *vnode = AFS_FS_I(inode); 720 struct afs_vnode *vnode = AFS_FS_I(inode);
704 int ret;
705 721
706 _enter("{%x:%u},{n=%pD},%d", 722 _enter("{%x:%u},{n=%pD},%d",
707 vnode->fid.vid, vnode->fid.vnode, file, 723 vnode->fid.vid, vnode->fid.vnode, file,
708 datasync); 724 datasync);
709 725
710 ret = file_write_and_wait_range(file, start, end); 726 return file_write_and_wait_range(file, start, end);
711 if (ret)
712 return ret;
713 inode_lock(inode);
714
715 /* use a writeback record as a marker in the queue - when this reaches
716 * the front of the queue, all the outstanding writes are either
717 * completed or rejected */
718 wb = kzalloc(sizeof(*wb), GFP_KERNEL);
719 if (!wb) {
720 ret = -ENOMEM;
721 goto out;
722 }
723 wb->vnode = vnode;
724 wb->first = 0;
725 wb->last = -1;
726 wb->offset_first = 0;
727 wb->to_last = PAGE_SIZE;
728 wb->usage = 1;
729 wb->state = AFS_WBACK_SYNCING;
730 init_waitqueue_head(&wb->waitq);
731
732 spin_lock(&vnode->writeback_lock);
733 list_for_each_entry(xwb, &vnode->writebacks, link) {
734 if (xwb->state == AFS_WBACK_PENDING)
735 xwb->state = AFS_WBACK_CONFLICTING;
736 }
737 list_add_tail(&wb->link, &vnode->writebacks);
738 spin_unlock(&vnode->writeback_lock);
739
740 /* push all the outstanding writebacks to the server */
741 ret = afs_writeback_all(vnode);
742 if (ret < 0) {
743 afs_put_writeback(wb);
744 _leave(" = %d [wb]", ret);
745 goto out;
746 }
747
748 /* wait for the preceding writes to actually complete */
749 ret = wait_event_interruptible(wb->waitq,
750 wb->state == AFS_WBACK_COMPLETE ||
751 vnode->writebacks.next == &wb->link);
752 afs_put_writeback(wb);
753 _leave(" = %d", ret);
754out:
755 inode_unlock(inode);
756 return ret;
757} 727}
758 728
759/* 729/*
@@ -774,19 +744,114 @@ int afs_flush(struct file *file, fl_owner_t id)
774 * notification that a previously read-only page is about to become writable 744 * notification that a previously read-only page is about to become writable
775 * - if it returns an error, the caller will deliver a bus error signal 745 * - if it returns an error, the caller will deliver a bus error signal
776 */ 746 */
777int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 747int afs_page_mkwrite(struct vm_fault *vmf)
778{ 748{
779 struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); 749 struct file *file = vmf->vma->vm_file;
750 struct inode *inode = file_inode(file);
751 struct afs_vnode *vnode = AFS_FS_I(inode);
752 unsigned long priv;
780 753
781 _enter("{{%x:%u}},{%lx}", 754 _enter("{{%x:%u}},{%lx}",
782 vnode->fid.vid, vnode->fid.vnode, page->index); 755 vnode->fid.vid, vnode->fid.vnode, vmf->page->index);
756
757 sb_start_pagefault(inode->i_sb);
783 758
784 /* wait for the page to be written to the cache before we allow it to 759 /* Wait for the page to be written to the cache before we allow it to
785 * be modified */ 760 * be modified. We then assume the entire page will need writing back.
761 */
786#ifdef CONFIG_AFS_FSCACHE 762#ifdef CONFIG_AFS_FSCACHE
787 fscache_wait_on_page_write(vnode->cache, page); 763 fscache_wait_on_page_write(vnode->cache, vmf->page);
788#endif 764#endif
789 765
790 _leave(" = 0"); 766 if (PageWriteback(vmf->page) &&
791 return 0; 767 wait_on_page_bit_killable(vmf->page, PG_writeback) < 0)
768 return VM_FAULT_RETRY;
769
770 if (lock_page_killable(vmf->page) < 0)
771 return VM_FAULT_RETRY;
772
773 /* We mustn't change page->private until writeback is complete as that
774 * details the portion of the page we need to write back and we might
775 * need to redirty the page if there's a problem.
776 */
777 wait_on_page_writeback(vmf->page);
778
779 priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */
780 priv |= 0; /* From */
781 trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"),
782 vmf->page->index, priv);
783 SetPagePrivate(vmf->page);
784 set_page_private(vmf->page, priv);
785
786 sb_end_pagefault(inode->i_sb);
787 return VM_FAULT_LOCKED;
788}
789
790/*
791 * Prune the keys cached for writeback. The caller must hold vnode->wb_lock.
792 */
793void afs_prune_wb_keys(struct afs_vnode *vnode)
794{
795 LIST_HEAD(graveyard);
796 struct afs_wb_key *wbk, *tmp;
797
798 /* Discard unused keys */
799 spin_lock(&vnode->wb_lock);
800
801 if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
802 !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) {
803 list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
804 if (refcount_read(&wbk->usage) == 1)
805 list_move(&wbk->vnode_link, &graveyard);
806 }
807 }
808
809 spin_unlock(&vnode->wb_lock);
810
811 while (!list_empty(&graveyard)) {
812 wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
813 list_del(&wbk->vnode_link);
814 afs_put_wb_key(wbk);
815 }
816}
817
818/*
819 * Clean up a page during invalidation.
820 */
821int afs_launder_page(struct page *page)
822{
823 struct address_space *mapping = page->mapping;
824 struct afs_vnode *vnode = AFS_FS_I(mapping->host);
825 unsigned long priv;
826 unsigned int f, t;
827 int ret = 0;
828
829 _enter("{%lx}", page->index);
830
831 priv = page_private(page);
832 if (clear_page_dirty_for_io(page)) {
833 f = 0;
834 t = PAGE_SIZE;
835 if (PagePrivate(page)) {
836 f = priv & AFS_PRIV_MAX;
837 t = priv >> AFS_PRIV_SHIFT;
838 }
839
840 trace_afs_page_dirty(vnode, tracepoint_string("launder"),
841 page->index, priv);
842 ret = afs_store_data(mapping, page->index, page->index, t, f);
843 }
844
845 trace_afs_page_dirty(vnode, tracepoint_string("laundered"),
846 page->index, priv);
847 set_page_private(page, 0);
848 ClearPagePrivate(page);
849
850#ifdef CONFIG_AFS_FSCACHE
851 if (PageFsCache(page)) {
852 fscache_wait_on_page_write(vnode->cache, page);
853 fscache_uncache_page(vnode->cache, page);
854 }
855#endif
856 return ret;
792} 857}
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 2830e4f48d85..cfcc674e64a5 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -45,7 +45,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler,
45 struct afs_cell *cell = vnode->volume->cell; 45 struct afs_cell *cell = vnode->volume->cell;
46 size_t namelen; 46 size_t namelen;
47 47
48 namelen = strlen(cell->name); 48 namelen = cell->name_len;
49 if (size == 0) 49 if (size == 0)
50 return namelen; 50 return namelen;
51 if (namelen > size) 51 if (namelen > size)
@@ -96,7 +96,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler,
96 void *buffer, size_t size) 96 void *buffer, size_t size)
97{ 97{
98 struct afs_vnode *vnode = AFS_FS_I(inode); 98 struct afs_vnode *vnode = AFS_FS_I(inode);
99 const char *volname = vnode->volume->vlocation->vldb.name; 99 const char *volname = vnode->volume->name;
100 size_t namelen; 100 size_t namelen;
101 101
102 namelen = strlen(volname); 102 namelen = strlen(volname);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 673ac4e01dd0..7208ecef7088 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3992,16 +3992,9 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3992 btrfs_put_block_group(bg); 3992 btrfs_put_block_group(bg);
3993} 3993}
3994 3994
3995static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
3996{
3997 schedule();
3998 return 0;
3999}
4000
4001void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) 3995void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
4002{ 3996{
4003 wait_on_atomic_t(&bg->nocow_writers, 3997 wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait,
4004 btrfs_wait_nocow_writers_atomic_t,
4005 TASK_UNINTERRUPTIBLE); 3998 TASK_UNINTERRUPTIBLE);
4006} 3999}
4007 4000
@@ -6530,12 +6523,6 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6530 btrfs_put_block_group(bg); 6523 btrfs_put_block_group(bg);
6531} 6524}
6532 6525
6533static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
6534{
6535 schedule();
6536 return 0;
6537}
6538
6539void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) 6526void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6540{ 6527{
6541 struct btrfs_space_info *space_info = bg->space_info; 6528 struct btrfs_space_info *space_info = bg->space_info;
@@ -6558,8 +6545,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6558 down_write(&space_info->groups_sem); 6545 down_write(&space_info->groups_sem);
6559 up_write(&space_info->groups_sem); 6546 up_write(&space_info->groups_sem);
6560 6547
6561 wait_on_atomic_t(&bg->reservations, 6548 wait_on_atomic_t(&bg->reservations, atomic_t_wait,
6562 btrfs_wait_bg_reservations_atomic_t,
6563 TASK_UNINTERRUPTIBLE); 6549 TASK_UNINTERRUPTIBLE);
6564} 6550}
6565 6551
@@ -11059,12 +11045,6 @@ int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
11059 return 1; 11045 return 1;
11060} 11046}
11061 11047
11062static int wait_snapshotting_atomic_t(atomic_t *a)
11063{
11064 schedule();
11065 return 0;
11066}
11067
11068void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) 11048void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11069{ 11049{
11070 while (true) { 11050 while (true) {
@@ -11073,8 +11053,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11073 ret = btrfs_start_write_no_snapshotting(root); 11053 ret = btrfs_start_write_no_snapshotting(root);
11074 if (ret) 11054 if (ret)
11075 break; 11055 break;
11076 wait_on_atomic_t(&root->will_be_snapshotted, 11056 wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait,
11077 wait_snapshotting_atomic_t,
11078 TASK_UNINTERRUPTIBLE); 11057 TASK_UNINTERRUPTIBLE);
11079 } 11058 }
11080} 11059}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 40d61077bead..ff84258132bb 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -558,7 +558,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
558 * have completed. 558 * have completed.
559 */ 559 */
560 if (!atomic_dec_and_test(&cookie->n_active)) 560 if (!atomic_dec_and_test(&cookie->n_active))
561 wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, 561 wait_on_atomic_t(&cookie->n_active, atomic_t_wait,
562 TASK_UNINTERRUPTIBLE); 562 TASK_UNINTERRUPTIBLE);
563 563
564 /* Make sure any pending writes are cancelled. */ 564 /* Make sure any pending writes are cancelled. */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 97ec45110957..0ff4b49a0037 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
97 return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); 97 return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
98} 98}
99 99
100extern int fscache_wait_atomic_t(atomic_t *);
101
102/* 100/*
103 * object.c 101 * object.c
104 */ 102 */
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index b39d487ccfb0..249968dcbf5c 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -195,12 +195,3 @@ static void __exit fscache_exit(void)
195} 195}
196 196
197module_exit(fscache_exit); 197module_exit(fscache_exit);
198
199/*
200 * wait_on_atomic_t() sleep function for uninterruptible waiting
201 */
202int fscache_wait_atomic_t(atomic_t *p)
203{
204 schedule();
205 return 0;
206}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 134d9f560240..1629056aa2c9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -85,9 +85,9 @@ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
85} 85}
86EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); 86EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
87 87
88int nfs_wait_atomic_killable(atomic_t *p) 88int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode)
89{ 89{
90 return nfs_wait_killable(TASK_KILLABLE); 90 return nfs_wait_killable(mode);
91} 91}
92 92
93/** 93/**
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f9a4a5524bd5..5ab17fd4700a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -388,7 +388,7 @@ extern void nfs_evict_inode(struct inode *);
388void nfs_zap_acl_cache(struct inode *inode); 388void nfs_zap_acl_cache(struct inode *inode);
389extern bool nfs_check_cache_invalid(struct inode *, unsigned long); 389extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
390extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); 390extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
391extern int nfs_wait_atomic_killable(atomic_t *p); 391extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode);
392 392
393/* super.c */ 393/* super.c */
394extern const struct super_operations nfs_sops; 394extern const struct super_operations nfs_sops;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 2cabbcf2f28e..e87279e49ba3 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -129,19 +129,13 @@ static struct kobj_attribute ocfs2_attr_filecheck_set =
129 ocfs2_filecheck_show, 129 ocfs2_filecheck_show,
130 ocfs2_filecheck_store); 130 ocfs2_filecheck_store);
131 131
132static int ocfs2_filecheck_sysfs_wait(atomic_t *p)
133{
134 schedule();
135 return 0;
136}
137
138static void 132static void
139ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) 133ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
140{ 134{
141 struct ocfs2_filecheck_entry *p; 135 struct ocfs2_filecheck_entry *p;
142 136
143 if (!atomic_dec_and_test(&entry->fs_count)) 137 if (!atomic_dec_and_test(&entry->fs_count))
144 wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait, 138 wait_on_atomic_t(&entry->fs_count, atomic_t_wait,
145 TASK_UNINTERRUPTIBLE); 139 TASK_UNINTERRUPTIBLE);
146 140
147 spin_lock(&entry->fs_fcheck->fc_lock); 141 spin_lock(&entry->fs_fcheck->fc_lock);
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index af0d495430d7..61b39eaf7cad 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -26,6 +26,8 @@ struct wait_bit_queue_entry {
26 { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } 26 { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
27 27
28typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); 28typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
29typedef int wait_atomic_t_action_f(atomic_t *counter, unsigned int mode);
30
29void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); 31void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
30int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); 32int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
31int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); 33int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
@@ -34,7 +36,7 @@ void wake_up_atomic_t(atomic_t *p);
34int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); 36int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
35int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); 37int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
36int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); 38int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
37int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); 39int out_of_line_wait_on_atomic_t(atomic_t *p, wait_atomic_t_action_f action, unsigned int mode);
38struct wait_queue_head *bit_waitqueue(void *word, int bit); 40struct wait_queue_head *bit_waitqueue(void *word, int bit);
39extern void __init wait_bit_init(void); 41extern void __init wait_bit_init(void);
40 42
@@ -51,10 +53,11 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
51 }, \ 53 }, \
52 } 54 }
53 55
54extern int bit_wait(struct wait_bit_key *key, int bit); 56extern int bit_wait(struct wait_bit_key *key, int mode);
55extern int bit_wait_io(struct wait_bit_key *key, int bit); 57extern int bit_wait_io(struct wait_bit_key *key, int mode);
56extern int bit_wait_timeout(struct wait_bit_key *key, int bit); 58extern int bit_wait_timeout(struct wait_bit_key *key, int mode);
57extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); 59extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode);
60extern int atomic_t_wait(atomic_t *counter, unsigned int mode);
58 61
59/** 62/**
60 * wait_on_bit - wait for a bit to be cleared 63 * wait_on_bit - wait for a bit to be cleared
@@ -251,7 +254,7 @@ wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
251 * outside of the target 'word'. 254 * outside of the target 'word'.
252 */ 255 */
253static inline 256static inline
254int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) 257int wait_on_atomic_t(atomic_t *val, wait_atomic_t_action_f action, unsigned mode)
255{ 258{
256 might_sleep(); 259 might_sleep();
257 if (atomic_read(val) == 0) 260 if (atomic_read(val) == 0)
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 8b95c16b7045..6b59c63a8e51 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -30,6 +30,38 @@ enum afs_call_trace {
30 afs_call_trace_work, 30 afs_call_trace_work,
31}; 31};
32 32
33enum afs_fs_operation {
34 afs_FS_FetchData = 130, /* AFS Fetch file data */
35 afs_FS_FetchStatus = 132, /* AFS Fetch file status */
36 afs_FS_StoreData = 133, /* AFS Store file data */
37 afs_FS_StoreStatus = 135, /* AFS Store file status */
38 afs_FS_RemoveFile = 136, /* AFS Remove a file */
39 afs_FS_CreateFile = 137, /* AFS Create a file */
40 afs_FS_Rename = 138, /* AFS Rename or move a file or directory */
41 afs_FS_Symlink = 139, /* AFS Create a symbolic link */
42 afs_FS_Link = 140, /* AFS Create a hard link */
43 afs_FS_MakeDir = 141, /* AFS Create a directory */
44 afs_FS_RemoveDir = 142, /* AFS Remove a directory */
45 afs_FS_GetVolumeInfo = 148, /* AFS Get information about a volume */
46 afs_FS_GetVolumeStatus = 149, /* AFS Get volume status information */
47 afs_FS_GetRootVolume = 151, /* AFS Get root volume name */
48 afs_FS_SetLock = 156, /* AFS Request a file lock */
49 afs_FS_ExtendLock = 157, /* AFS Extend a file lock */
50 afs_FS_ReleaseLock = 158, /* AFS Release a file lock */
51 afs_FS_Lookup = 161, /* AFS lookup file in directory */
52 afs_FS_FetchData64 = 65537, /* AFS Fetch file data */
53 afs_FS_StoreData64 = 65538, /* AFS Store file data */
54 afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */
55 afs_FS_GetCapabilities = 65540, /* AFS Get FS server capabilities */
56};
57
58enum afs_vl_operation {
59 afs_VL_GetEntryByNameU = 527, /* AFS Get Vol Entry By Name operation ID */
60 afs_VL_GetAddrsU = 533, /* AFS Get FS server addresses */
61 afs_YFSVL_GetEndpoints = 64002, /* YFS Get FS & Vol server addresses */
62 afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */
63};
64
33#endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ 65#endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
34 66
35/* 67/*
@@ -42,6 +74,37 @@ enum afs_call_trace {
42 EM(afs_call_trace_wake, "WAKE ") \ 74 EM(afs_call_trace_wake, "WAKE ") \
43 E_(afs_call_trace_work, "WORK ") 75 E_(afs_call_trace_work, "WORK ")
44 76
77#define afs_fs_operations \
78 EM(afs_FS_FetchData, "FS.FetchData") \
79 EM(afs_FS_FetchStatus, "FS.FetchStatus") \
80 EM(afs_FS_StoreData, "FS.StoreData") \
81 EM(afs_FS_StoreStatus, "FS.StoreStatus") \
82 EM(afs_FS_RemoveFile, "FS.RemoveFile") \
83 EM(afs_FS_CreateFile, "FS.CreateFile") \
84 EM(afs_FS_Rename, "FS.Rename") \
85 EM(afs_FS_Symlink, "FS.Symlink") \
86 EM(afs_FS_Link, "FS.Link") \
87 EM(afs_FS_MakeDir, "FS.MakeDir") \
88 EM(afs_FS_RemoveDir, "FS.RemoveDir") \
89 EM(afs_FS_GetVolumeInfo, "FS.GetVolumeInfo") \
90 EM(afs_FS_GetVolumeStatus, "FS.GetVolumeStatus") \
91 EM(afs_FS_GetRootVolume, "FS.GetRootVolume") \
92 EM(afs_FS_SetLock, "FS.SetLock") \
93 EM(afs_FS_ExtendLock, "FS.ExtendLock") \
94 EM(afs_FS_ReleaseLock, "FS.ReleaseLock") \
95 EM(afs_FS_Lookup, "FS.Lookup") \
96 EM(afs_FS_FetchData64, "FS.FetchData64") \
97 EM(afs_FS_StoreData64, "FS.StoreData64") \
98 EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \
99 E_(afs_FS_GetCapabilities, "FS.GetCapabilities")
100
101#define afs_vl_operations \
102 EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \
103 EM(afs_VL_GetAddrsU, "VL.GetAddrsU") \
104 EM(afs_YFSVL_GetEndpoints, "YFSVL.GetEndpoints") \
105 E_(afs_VL_GetCapabilities, "VL.GetCapabilities")
106
107
45/* 108/*
46 * Export enum symbols via userspace. 109 * Export enum symbols via userspace.
47 */ 110 */
@@ -51,6 +114,8 @@ enum afs_call_trace {
51#define E_(a, b) TRACE_DEFINE_ENUM(a); 114#define E_(a, b) TRACE_DEFINE_ENUM(a);
52 115
53afs_call_traces; 116afs_call_traces;
117afs_fs_operations;
118afs_vl_operations;
54 119
55/* 120/*
56 * Now redefine the EM() and E_() macros to map the enums to the strings that 121 * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -178,6 +243,234 @@ TRACE_EVENT(afs_call,
178 __entry->where) 243 __entry->where)
179 ); 244 );
180 245
246TRACE_EVENT(afs_make_fs_call,
247 TP_PROTO(struct afs_call *call, const struct afs_fid *fid),
248
249 TP_ARGS(call, fid),
250
251 TP_STRUCT__entry(
252 __field(struct afs_call *, call )
253 __field(enum afs_fs_operation, op )
254 __field_struct(struct afs_fid, fid )
255 ),
256
257 TP_fast_assign(
258 __entry->call = call;
259 __entry->op = call->operation_ID;
260 if (fid) {
261 __entry->fid = *fid;
262 } else {
263 __entry->fid.vid = 0;
264 __entry->fid.vnode = 0;
265 __entry->fid.unique = 0;
266 }
267 ),
268
269 TP_printk("c=%p %06x:%06x:%06x %s",
270 __entry->call,
271 __entry->fid.vid,
272 __entry->fid.vnode,
273 __entry->fid.unique,
274 __print_symbolic(__entry->op, afs_fs_operations))
275 );
276
277TRACE_EVENT(afs_make_vl_call,
278 TP_PROTO(struct afs_call *call),
279
280 TP_ARGS(call),
281
282 TP_STRUCT__entry(
283 __field(struct afs_call *, call )
284 __field(enum afs_vl_operation, op )
285 ),
286
287 TP_fast_assign(
288 __entry->call = call;
289 __entry->op = call->operation_ID;
290 ),
291
292 TP_printk("c=%p %s",
293 __entry->call,
294 __print_symbolic(__entry->op, afs_vl_operations))
295 );
296
297TRACE_EVENT(afs_call_done,
298 TP_PROTO(struct afs_call *call),
299
300 TP_ARGS(call),
301
302 TP_STRUCT__entry(
303 __field(struct afs_call *, call )
304 __field(struct rxrpc_call *, rx_call )
305 __field(int, ret )
306 __field(u32, abort_code )
307 ),
308
309 TP_fast_assign(
310 __entry->call = call;
311 __entry->rx_call = call->rxcall;
312 __entry->ret = call->error;
313 __entry->abort_code = call->abort_code;
314 ),
315
316 TP_printk(" c=%p ret=%d ab=%d [%p]",
317 __entry->call,
318 __entry->ret,
319 __entry->abort_code,
320 __entry->rx_call)
321 );
322
323TRACE_EVENT(afs_send_pages,
324 TP_PROTO(struct afs_call *call, struct msghdr *msg,
325 pgoff_t first, pgoff_t last, unsigned int offset),
326
327 TP_ARGS(call, msg, first, last, offset),
328
329 TP_STRUCT__entry(
330 __field(struct afs_call *, call )
331 __field(pgoff_t, first )
332 __field(pgoff_t, last )
333 __field(unsigned int, nr )
334 __field(unsigned int, bytes )
335 __field(unsigned int, offset )
336 __field(unsigned int, flags )
337 ),
338
339 TP_fast_assign(
340 __entry->call = call;
341 __entry->first = first;
342 __entry->last = last;
343 __entry->nr = msg->msg_iter.nr_segs;
344 __entry->bytes = msg->msg_iter.count;
345 __entry->offset = offset;
346 __entry->flags = msg->msg_flags;
347 ),
348
349 TP_printk(" c=%p %lx-%lx-%lx b=%x o=%x f=%x",
350 __entry->call,
351 __entry->first, __entry->first + __entry->nr - 1, __entry->last,
352 __entry->bytes, __entry->offset,
353 __entry->flags)
354 );
355
356TRACE_EVENT(afs_sent_pages,
357 TP_PROTO(struct afs_call *call, pgoff_t first, pgoff_t last,
358 pgoff_t cursor, int ret),
359
360 TP_ARGS(call, first, last, cursor, ret),
361
362 TP_STRUCT__entry(
363 __field(struct afs_call *, call )
364 __field(pgoff_t, first )
365 __field(pgoff_t, last )
366 __field(pgoff_t, cursor )
367 __field(int, ret )
368 ),
369
370 TP_fast_assign(
371 __entry->call = call;
372 __entry->first = first;
373 __entry->last = last;
374 __entry->cursor = cursor;
375 __entry->ret = ret;
376 ),
377
378 TP_printk(" c=%p %lx-%lx c=%lx r=%d",
379 __entry->call,
380 __entry->first, __entry->last,
381 __entry->cursor, __entry->ret)
382 );
383
384TRACE_EVENT(afs_dir_check_failed,
385 TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size),
386
387 TP_ARGS(vnode, off, i_size),
388
389 TP_STRUCT__entry(
390 __field(struct afs_vnode *, vnode )
391 __field(loff_t, off )
392 __field(loff_t, i_size )
393 ),
394
395 TP_fast_assign(
396 __entry->vnode = vnode;
397 __entry->off = off;
398 __entry->i_size = i_size;
399 ),
400
401 TP_printk("vn=%p %llx/%llx",
402 __entry->vnode, __entry->off, __entry->i_size)
403 );
404
405/*
406 * We use page->private to hold the amount of the page that we've written to,
407 * splitting the field into two parts. However, we need to represent a range
408 * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system.
409 */
410#if PAGE_SIZE > 32768
411#define AFS_PRIV_MAX 0xffffffff
412#define AFS_PRIV_SHIFT 32
413#else
414#define AFS_PRIV_MAX 0xffff
415#define AFS_PRIV_SHIFT 16
416#endif
417
418TRACE_EVENT(afs_page_dirty,
419 TP_PROTO(struct afs_vnode *vnode, const char *where,
420 pgoff_t page, unsigned long priv),
421
422 TP_ARGS(vnode, where, page, priv),
423
424 TP_STRUCT__entry(
425 __field(struct afs_vnode *, vnode )
426 __field(const char *, where )
427 __field(pgoff_t, page )
428 __field(unsigned long, priv )
429 ),
430
431 TP_fast_assign(
432 __entry->vnode = vnode;
433 __entry->where = where;
434 __entry->page = page;
435 __entry->priv = priv;
436 ),
437
438 TP_printk("vn=%p %lx %s %lu-%lu",
439 __entry->vnode, __entry->page, __entry->where,
440 __entry->priv & AFS_PRIV_MAX,
441 __entry->priv >> AFS_PRIV_SHIFT)
442 );
443
444TRACE_EVENT(afs_call_state,
445 TP_PROTO(struct afs_call *call,
446 enum afs_call_state from,
447 enum afs_call_state to,
448 int ret, u32 remote_abort),
449
450 TP_ARGS(call, from, to, ret, remote_abort),
451
452 TP_STRUCT__entry(
453 __field(struct afs_call *, call )
454 __field(enum afs_call_state, from )
455 __field(enum afs_call_state, to )
456 __field(int, ret )
457 __field(u32, abort )
458 ),
459
460 TP_fast_assign(
461 __entry->call = call;
462 __entry->from = from;
463 __entry->to = to;
464 __entry->ret = ret;
465 __entry->abort = remote_abort;
466 ),
467
468 TP_printk("c=%p %u->%u r=%d ab=%d",
469 __entry->call,
470 __entry->from, __entry->to,
471 __entry->ret, __entry->abort)
472 );
473
181#endif /* _TRACE_AFS_H */ 474#endif /* _TRACE_AFS_H */
182 475
183/* This part must be outside protection */ 476/* This part must be outside protection */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index aa50113ebe5b..1a6fee974116 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -47,6 +47,7 @@
47#define OPENPROM_SUPER_MAGIC 0x9fa1 47#define OPENPROM_SUPER_MAGIC 0x9fa1
48#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ 48#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */
49#define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ 49#define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */
50#define AFS_FS_MAGIC 0x6B414653
50 51
51#define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ 52#define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */
52 /* used by file system utilities that 53 /* used by file system utilities that
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index f8159698aa4d..84cb3acd9260 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -183,7 +183,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
183 */ 183 */
184static __sched 184static __sched
185int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, 185int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
186 int (*action)(atomic_t *), unsigned mode) 186 wait_atomic_t_action_f action, unsigned int mode)
187{ 187{
188 atomic_t *val; 188 atomic_t *val;
189 int ret = 0; 189 int ret = 0;
@@ -193,7 +193,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
193 val = wbq_entry->key.flags; 193 val = wbq_entry->key.flags;
194 if (atomic_read(val) == 0) 194 if (atomic_read(val) == 0)
195 break; 195 break;
196 ret = (*action)(val); 196 ret = (*action)(val, mode);
197 } while (!ret && atomic_read(val) != 0); 197 } while (!ret && atomic_read(val) != 0);
198 finish_wait(wq_head, &wbq_entry->wq_entry); 198 finish_wait(wq_head, &wbq_entry->wq_entry);
199 return ret; 199 return ret;
@@ -210,8 +210,9 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
210 }, \ 210 }, \
211 } 211 }
212 212
213__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), 213__sched int out_of_line_wait_on_atomic_t(atomic_t *p,
214 unsigned mode) 214 wait_atomic_t_action_f action,
215 unsigned int mode)
215{ 216{
216 struct wait_queue_head *wq_head = atomic_t_waitqueue(p); 217 struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
217 DEFINE_WAIT_ATOMIC_T(wq_entry, p); 218 DEFINE_WAIT_ATOMIC_T(wq_entry, p);
@@ -220,6 +221,15 @@ __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
220} 221}
221EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); 222EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
222 223
224__sched int atomic_t_wait(atomic_t *counter, unsigned int mode)
225{
226 schedule();
227 if (signal_pending_state(mode, current))
228 return -EINTR;
229 return 0;
230}
231EXPORT_SYMBOL(atomic_t_wait);
232
223/** 233/**
224 * wake_up_atomic_t - Wake up a waiter on a atomic_t 234 * wake_up_atomic_t - Wake up a waiter on a atomic_t
225 * @p: The atomic_t being waited on, a kernel virtual address 235 * @p: The atomic_t being waited on, a kernel virtual address
diff --git a/mm/filemap.c b/mm/filemap.c
index 923fc2ebd74a..ee83baaf855d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1139,6 +1139,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
1139 wait_queue_head_t *q = page_waitqueue(page); 1139 wait_queue_head_t *q = page_waitqueue(page);
1140 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); 1140 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
1141} 1141}
1142EXPORT_SYMBOL(wait_on_page_bit_killable);
1142 1143
1143/** 1144/**
1144 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue 1145 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue