diff options
author | Jeff Layton <jlayton@redhat.com> | 2013-02-04 11:57:27 -0500 |
---|---|---|
committer | J. Bruce Fields <bfields@redhat.com> | 2013-02-08 16:02:26 -0500 |
commit | 01a7decf75930925322c5efc87af0b5e58eb8650 (patch) | |
tree | cdd3420f451edc0568f4318b1d2338d491679068 | |
parent | 4c190e2f913f038c9c91ee63b59cd037260ba353 (diff) |
nfsd: keep a checksum of the first 256 bytes of request
Now that we're allowing more DRC entries, it becomes a lot easier to hit
problems with XID collisions. In order to mitigate those, calculate a
checksum of up to the first 256 bytes of each request coming in and store
that in the cache entry, along with the total length of the request.
This initially used crc32, but Chuck Lever and Jim Rees pointed out that
crc32 is probably more heavyweight than we really need for generating
these checksums, and recommended looking at using the same routines that
are used to generate checksums for IP packets.
On an x86_64 KVM guest measurements with ftrace showed ~800ns to use
csum_partial vs ~1750ns for crc32. The difference probably isn't
terribly significant, but for now we may as well use csum_partial.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Stones-thrown-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r-- | fs/nfsd/cache.h | 5 | ||||
-rw-r--r-- | fs/nfsd/nfscache.c | 47 |
2 files changed, 49 insertions, 3 deletions
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 9c7232b45103..87fd1410b737 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h | |||
@@ -29,6 +29,8 @@ struct svc_cacherep { | |||
29 | u32 c_prot; | 29 | u32 c_prot; |
30 | u32 c_proc; | 30 | u32 c_proc; |
31 | u32 c_vers; | 31 | u32 c_vers; |
32 | unsigned int c_len; | ||
33 | __wsum c_csum; | ||
32 | unsigned long c_timestamp; | 34 | unsigned long c_timestamp; |
33 | union { | 35 | union { |
34 | struct kvec u_vec; | 36 | struct kvec u_vec; |
@@ -73,6 +75,9 @@ enum { | |||
73 | /* Cache entries expire after this time period */ | 75 | /* Cache entries expire after this time period */ |
74 | #define RC_EXPIRE (120 * HZ) | 76 | #define RC_EXPIRE (120 * HZ) |
75 | 77 | ||
78 | /* Checksum this amount of the request */ | ||
79 | #define RC_CSUMLEN (256U) | ||
80 | |||
76 | int nfsd_reply_cache_init(void); | 81 | int nfsd_reply_cache_init(void); |
77 | void nfsd_reply_cache_shutdown(void); | 82 | void nfsd_reply_cache_shutdown(void); |
78 | int nfsd_cache_lookup(struct svc_rqst *); | 83 | int nfsd_cache_lookup(struct svc_rqst *); |
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index f7544698e6e6..40db57eb2b06 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/sunrpc/addr.h> | 12 | #include <linux/sunrpc/addr.h> |
13 | #include <linux/highmem.h> | 13 | #include <linux/highmem.h> |
14 | #include <net/checksum.h> | ||
14 | 15 | ||
15 | #include "nfsd.h" | 16 | #include "nfsd.h" |
16 | #include "cache.h" | 17 | #include "cache.h" |
@@ -130,6 +131,7 @@ int nfsd_reply_cache_init(void) | |||
130 | INIT_LIST_HEAD(&lru_head); | 131 | INIT_LIST_HEAD(&lru_head); |
131 | max_drc_entries = nfsd_cache_size_limit(); | 132 | max_drc_entries = nfsd_cache_size_limit(); |
132 | num_drc_entries = 0; | 133 | num_drc_entries = 0; |
134 | |||
133 | return 0; | 135 | return 0; |
134 | out_nomem: | 136 | out_nomem: |
135 | printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); | 137 | printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); |
@@ -238,12 +240,45 @@ nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
238 | } | 240 | } |
239 | 241 | ||
240 | /* | 242 | /* |
243 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes | ||
244 | */ | ||
245 | static __wsum | ||
246 | nfsd_cache_csum(struct svc_rqst *rqstp) | ||
247 | { | ||
248 | int idx; | ||
249 | unsigned int base; | ||
250 | __wsum csum; | ||
251 | struct xdr_buf *buf = &rqstp->rq_arg; | ||
252 | const unsigned char *p = buf->head[0].iov_base; | ||
253 | size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, | ||
254 | RC_CSUMLEN); | ||
255 | size_t len = min(buf->head[0].iov_len, csum_len); | ||
256 | |||
257 | /* rq_arg.head first */ | ||
258 | csum = csum_partial(p, len, 0); | ||
259 | csum_len -= len; | ||
260 | |||
261 | /* Continue into page array */ | ||
262 | idx = buf->page_base / PAGE_SIZE; | ||
263 | base = buf->page_base & ~PAGE_MASK; | ||
264 | while (csum_len) { | ||
265 | p = page_address(buf->pages[idx]) + base; | ||
266 | len = min(PAGE_SIZE - base, csum_len); | ||
267 | csum = csum_partial(p, len, csum); | ||
268 | csum_len -= len; | ||
269 | base = 0; | ||
270 | ++idx; | ||
271 | } | ||
272 | return csum; | ||
273 | } | ||
274 | |||
275 | /* | ||
241 | * Search the request hash for an entry that matches the given rqstp. | 276 | * Search the request hash for an entry that matches the given rqstp. |
242 | * Must be called with cache_lock held. Returns the found entry or | 277 | * Must be called with cache_lock held. Returns the found entry or |
243 | * NULL on failure. | 278 | * NULL on failure. |
244 | */ | 279 | */ |
245 | static struct svc_cacherep * | 280 | static struct svc_cacherep * |
246 | nfsd_cache_search(struct svc_rqst *rqstp) | 281 | nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) |
247 | { | 282 | { |
248 | struct svc_cacherep *rp; | 283 | struct svc_cacherep *rp; |
249 | struct hlist_node *hn; | 284 | struct hlist_node *hn; |
@@ -257,6 +292,7 @@ nfsd_cache_search(struct svc_rqst *rqstp) | |||
257 | hlist_for_each_entry(rp, hn, rh, c_hash) { | 292 | hlist_for_each_entry(rp, hn, rh, c_hash) { |
258 | if (xid == rp->c_xid && proc == rp->c_proc && | 293 | if (xid == rp->c_xid && proc == rp->c_proc && |
259 | proto == rp->c_prot && vers == rp->c_vers && | 294 | proto == rp->c_prot && vers == rp->c_vers && |
295 | rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum && | ||
260 | rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && | 296 | rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && |
261 | rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) | 297 | rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) |
262 | return rp; | 298 | return rp; |
@@ -277,6 +313,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
277 | u32 proto = rqstp->rq_prot, | 313 | u32 proto = rqstp->rq_prot, |
278 | vers = rqstp->rq_vers, | 314 | vers = rqstp->rq_vers, |
279 | proc = rqstp->rq_proc; | 315 | proc = rqstp->rq_proc; |
316 | __wsum csum; | ||
280 | unsigned long age; | 317 | unsigned long age; |
281 | int type = rqstp->rq_cachetype; | 318 | int type = rqstp->rq_cachetype; |
282 | int rtn; | 319 | int rtn; |
@@ -287,10 +324,12 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
287 | return RC_DOIT; | 324 | return RC_DOIT; |
288 | } | 325 | } |
289 | 326 | ||
327 | csum = nfsd_cache_csum(rqstp); | ||
328 | |||
290 | spin_lock(&cache_lock); | 329 | spin_lock(&cache_lock); |
291 | rtn = RC_DOIT; | 330 | rtn = RC_DOIT; |
292 | 331 | ||
293 | rp = nfsd_cache_search(rqstp); | 332 | rp = nfsd_cache_search(rqstp, csum); |
294 | if (rp) | 333 | if (rp) |
295 | goto found_entry; | 334 | goto found_entry; |
296 | 335 | ||
@@ -318,7 +357,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
318 | * Must search again just in case someone inserted one | 357 | * Must search again just in case someone inserted one |
319 | * after we dropped the lock above. | 358 | * after we dropped the lock above. |
320 | */ | 359 | */ |
321 | found = nfsd_cache_search(rqstp); | 360 | found = nfsd_cache_search(rqstp, csum); |
322 | if (found) { | 361 | if (found) { |
323 | nfsd_reply_cache_free_locked(rp); | 362 | nfsd_reply_cache_free_locked(rp); |
324 | rp = found; | 363 | rp = found; |
@@ -344,6 +383,8 @@ setup_entry: | |||
344 | rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); | 383 | rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); |
345 | rp->c_prot = proto; | 384 | rp->c_prot = proto; |
346 | rp->c_vers = vers; | 385 | rp->c_vers = vers; |
386 | rp->c_len = rqstp->rq_arg.len; | ||
387 | rp->c_csum = csum; | ||
347 | 388 | ||
348 | hash_refile(rp); | 389 | hash_refile(rp); |
349 | lru_put_end(rp); | 390 | lru_put_end(rp); |