aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig12
-rw-r--r--fs/Makefile1
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/ecryptfs/Makefile7
-rw-r--r--fs/ecryptfs/crypto.c1659
-rw-r--r--fs/ecryptfs/debug.c123
-rw-r--r--fs/ecryptfs/dentry.c87
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h482
-rw-r--r--fs/ecryptfs/file.c440
-rw-r--r--fs/ecryptfs/inode.c1079
-rw-r--r--fs/ecryptfs/keystore.c1061
-rw-r--r--fs/ecryptfs/main.c831
-rw-r--r--fs/ecryptfs/mmap.c788
-rw-r--r--fs/ecryptfs/super.c198
-rw-r--r--fs/lockd/clntlock.c54
-rw-r--r--fs/lockd/clntproc.c17
-rw-r--r--fs/lockd/host.c325
-rw-r--r--fs/lockd/mon.c65
-rw-r--r--fs/lockd/svc.c19
-rw-r--r--fs/lockd/svc4proc.c29
-rw-r--r--fs/lockd/svclock.c197
-rw-r--r--fs/lockd/svcproc.c27
-rw-r--r--fs/lockd/svcshare.c20
-rw-r--r--fs/lockd/svcsubs.c164
-rw-r--r--fs/nfsd/export.c149
-rw-r--r--fs/nfsd/nfs2acl.c3
-rw-r--r--fs/nfsd/nfs3acl.c3
-rw-r--r--fs/nfsd/nfs3proc.c18
-rw-r--r--fs/nfsd/nfs3xdr.c56
-rw-r--r--fs/nfsd/nfs4acl.c711
-rw-r--r--fs/nfsd/nfs4proc.c32
-rw-r--r--fs/nfsd/nfs4xdr.c231
-rw-r--r--fs/nfsd/nfsctl.c49
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c19
-rw-r--r--fs/nfsd/nfsxdr.c43
-rw-r--r--fs/nfsd/vfs.c86
-rw-r--r--fs/reiserfs/inode.c2
38 files changed, 8173 insertions, 935 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 68f4561423ff..674cfbb83a95 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -995,6 +995,18 @@ config AFFS_FS
995 To compile this file system support as a module, choose M here: the 995 To compile this file system support as a module, choose M here: the
996 module will be called affs. If unsure, say N. 996 module will be called affs. If unsure, say N.
997 997
998config ECRYPT_FS
999 tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
1000 depends on EXPERIMENTAL && KEYS && CRYPTO
1001 help
1002 Encrypted filesystem that operates on the VFS layer. See
1003 <file:Documentation/ecryptfs.txt> to learn more about
1004 eCryptfs. Userspace components are required and can be
1005 obtained from <http://ecryptfs.sf.net>.
1006
1007 To compile this file system support as a module, choose M here: the
1008 module will be called ecryptfs.
1009
998config HFS_FS 1010config HFS_FS
999 tristate "Apple Macintosh file system support (EXPERIMENTAL)" 1011 tristate "Apple Macintosh file system support (EXPERIMENTAL)"
1000 depends on BLOCK && EXPERIMENTAL 1012 depends on BLOCK && EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile
index 819b2a93bebe..fd24d67a7cdb 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -75,6 +75,7 @@ obj-$(CONFIG_BFS_FS) += bfs/
75obj-$(CONFIG_ISO9660_FS) += isofs/ 75obj-$(CONFIG_ISO9660_FS) += isofs/
76obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ 76obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
77obj-$(CONFIG_HFS_FS) += hfs/ 77obj-$(CONFIG_HFS_FS) += hfs/
78obj-$(CONFIG_ECRYPT_FS) += ecryptfs/
78obj-$(CONFIG_VXFS_FS) += freevxfs/ 79obj-$(CONFIG_VXFS_FS) += freevxfs/
79obj-$(CONFIG_NFS_FS) += nfs/ 80obj-$(CONFIG_NFS_FS) += nfs/
80obj-$(CONFIG_EXPORTFS) += exportfs/ 81obj-$(CONFIG_EXPORTFS) += exportfs/
diff --git a/fs/dcache.c b/fs/dcache.c
index fc2faa44f8d1..2355bddad8de 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -291,9 +291,9 @@ struct dentry * dget_locked(struct dentry *dentry)
291 * it can be unhashed only if it has no children, or if it is the root 291 * it can be unhashed only if it has no children, or if it is the root
292 * of a filesystem. 292 * of a filesystem.
293 * 293 *
294 * If the inode has a DCACHE_DISCONNECTED alias, then prefer 294 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
295 * any other hashed alias over that one unless @want_discon is set, 295 * any other hashed alias over that one unless @want_discon is set,
296 * in which case only return a DCACHE_DISCONNECTED alias. 296 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
297 */ 297 */
298 298
299static struct dentry * __d_find_alias(struct inode *inode, int want_discon) 299static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
@@ -309,7 +309,8 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
309 prefetch(next); 309 prefetch(next);
310 alias = list_entry(tmp, struct dentry, d_alias); 310 alias = list_entry(tmp, struct dentry, d_alias);
311 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 311 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
312 if (alias->d_flags & DCACHE_DISCONNECTED) 312 if (IS_ROOT(alias) &&
313 (alias->d_flags & DCACHE_DISCONNECTED))
313 discon_alias = alias; 314 discon_alias = alias;
314 else if (!want_discon) { 315 else if (!want_discon) {
315 __dget_locked(alias); 316 __dget_locked(alias);
@@ -1004,7 +1005,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1004{ 1005{
1005 struct dentry *new = NULL; 1006 struct dentry *new = NULL;
1006 1007
1007 if (inode) { 1008 if (inode && S_ISDIR(inode->i_mode)) {
1008 spin_lock(&dcache_lock); 1009 spin_lock(&dcache_lock);
1009 new = __d_find_alias(inode, 1); 1010 new = __d_find_alias(inode, 1);
1010 if (new) { 1011 if (new) {
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
new file mode 100644
index 000000000000..ca6562451eeb
--- /dev/null
+++ b/fs/ecryptfs/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the Linux 2.6 eCryptfs
3#
4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
new file mode 100644
index 000000000000..ed35a9712fa1
--- /dev/null
+++ b/fs/ecryptfs/crypto.c
@@ -0,0 +1,1659 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/fs.h>
27#include <linux/mount.h>
28#include <linux/pagemap.h>
29#include <linux/random.h>
30#include <linux/compiler.h>
31#include <linux/key.h>
32#include <linux/namei.h>
33#include <linux/crypto.h>
34#include <linux/file.h>
35#include <linux/scatterlist.h>
36#include "ecryptfs_kernel.h"
37
38static int
39ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
40 struct page *dst_page, int dst_offset,
41 struct page *src_page, int src_offset, int size,
42 unsigned char *iv);
43static int
44ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
45 struct page *dst_page, int dst_offset,
46 struct page *src_page, int src_offset, int size,
47 unsigned char *iv);
48
49/**
50 * ecryptfs_to_hex
51 * @dst: Buffer to take hex character representation of contents of
52 * src; must be at least of size (src_size * 2)
53 * @src: Buffer to be converted to a hex string respresentation
54 * @src_size: number of bytes to convert
55 */
56void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
57{
58 int x;
59
60 for (x = 0; x < src_size; x++)
61 sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]);
62}
63
64/**
65 * ecryptfs_from_hex
66 * @dst: Buffer to take the bytes from src hex; must be at least of
67 * size (src_size / 2)
68 * @src: Buffer to be converted from a hex string respresentation to raw value
69 * @dst_size: size of dst buffer, or number of hex characters pairs to convert
70 */
71void ecryptfs_from_hex(char *dst, char *src, int dst_size)
72{
73 int x;
74 char tmp[3] = { 0, };
75
76 for (x = 0; x < dst_size; x++) {
77 tmp[0] = src[x * 2];
78 tmp[1] = src[x * 2 + 1];
79 dst[x] = (unsigned char)simple_strtol(tmp, NULL, 16);
80 }
81}
82
83/**
84 * ecryptfs_calculate_md5 - calculates the md5 of @src
85 * @dst: Pointer to 16 bytes of allocated memory
86 * @crypt_stat: Pointer to crypt_stat struct for the current inode
87 * @src: Data to be md5'd
88 * @len: Length of @src
89 *
90 * Uses the allocated crypto context that crypt_stat references to
91 * generate the MD5 sum of the contents of src.
92 */
93static int ecryptfs_calculate_md5(char *dst,
94 struct ecryptfs_crypt_stat *crypt_stat,
95 char *src, int len)
96{
97 int rc = 0;
98 struct scatterlist sg;
99
100 mutex_lock(&crypt_stat->cs_md5_tfm_mutex);
101 sg_init_one(&sg, (u8 *)src, len);
102 if (!crypt_stat->md5_tfm) {
103 crypt_stat->md5_tfm =
104 crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
105 if (!crypt_stat->md5_tfm) {
106 rc = -ENOMEM;
107 ecryptfs_printk(KERN_ERR, "Error attempting to "
108 "allocate crypto context\n");
109 goto out;
110 }
111 }
112 crypto_digest_init(crypt_stat->md5_tfm);
113 crypto_digest_update(crypt_stat->md5_tfm, &sg, 1);
114 crypto_digest_final(crypt_stat->md5_tfm, dst);
115 mutex_unlock(&crypt_stat->cs_md5_tfm_mutex);
116out:
117 return rc;
118}
119
120/**
121 * ecryptfs_derive_iv
122 * @iv: destination for the derived iv vale
123 * @crypt_stat: Pointer to crypt_stat struct for the current inode
124 * @offset: Offset of the page whose's iv we are to derive
125 *
126 * Generate the initialization vector from the given root IV and page
127 * offset.
128 *
129 * Returns zero on success; non-zero on error.
130 */
131static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
132 pgoff_t offset)
133{
134 int rc = 0;
135 char dst[MD5_DIGEST_SIZE];
136 char src[ECRYPTFS_MAX_IV_BYTES + 16];
137
138 if (unlikely(ecryptfs_verbosity > 0)) {
139 ecryptfs_printk(KERN_DEBUG, "root iv:\n");
140 ecryptfs_dump_hex(crypt_stat->root_iv, crypt_stat->iv_bytes);
141 }
142 /* TODO: It is probably secure to just cast the least
143 * significant bits of the root IV into an unsigned long and
144 * add the offset to that rather than go through all this
145 * hashing business. -Halcrow */
146 memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes);
147 memset((src + crypt_stat->iv_bytes), 0, 16);
148 snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset);
149 if (unlikely(ecryptfs_verbosity > 0)) {
150 ecryptfs_printk(KERN_DEBUG, "source:\n");
151 ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16));
152 }
153 rc = ecryptfs_calculate_md5(dst, crypt_stat, src,
154 (crypt_stat->iv_bytes + 16));
155 if (rc) {
156 ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
157 "MD5 while generating IV for a page\n");
158 goto out;
159 }
160 memcpy(iv, dst, crypt_stat->iv_bytes);
161 if (unlikely(ecryptfs_verbosity > 0)) {
162 ecryptfs_printk(KERN_DEBUG, "derived iv:\n");
163 ecryptfs_dump_hex(iv, crypt_stat->iv_bytes);
164 }
165out:
166 return rc;
167}
168
169/**
170 * ecryptfs_init_crypt_stat
171 * @crypt_stat: Pointer to the crypt_stat struct to initialize.
172 *
173 * Initialize the crypt_stat structure.
174 */
175void
176ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
177{
178 memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
179 mutex_init(&crypt_stat->cs_mutex);
180 mutex_init(&crypt_stat->cs_tfm_mutex);
181 mutex_init(&crypt_stat->cs_md5_tfm_mutex);
182 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED);
183}
184
185/**
186 * ecryptfs_destruct_crypt_stat
187 * @crypt_stat: Pointer to the crypt_stat struct to initialize.
188 *
189 * Releases all memory associated with a crypt_stat struct.
190 */
191void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
192{
193 if (crypt_stat->tfm)
194 crypto_free_tfm(crypt_stat->tfm);
195 if (crypt_stat->md5_tfm)
196 crypto_free_tfm(crypt_stat->md5_tfm);
197 memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
198}
199
200void ecryptfs_destruct_mount_crypt_stat(
201 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
202{
203 if (mount_crypt_stat->global_auth_tok_key)
204 key_put(mount_crypt_stat->global_auth_tok_key);
205 if (mount_crypt_stat->global_key_tfm)
206 crypto_free_tfm(mount_crypt_stat->global_key_tfm);
207 memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat));
208}
209
210/**
211 * virt_to_scatterlist
212 * @addr: Virtual address
213 * @size: Size of data; should be an even multiple of the block size
214 * @sg: Pointer to scatterlist array; set to NULL to obtain only
215 * the number of scatterlist structs required in array
216 * @sg_size: Max array size
217 *
218 * Fills in a scatterlist array with page references for a passed
219 * virtual address.
220 *
221 * Returns the number of scatterlist structs in array used
222 */
223int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
224 int sg_size)
225{
226 int i = 0;
227 struct page *pg;
228 int offset;
229 int remainder_of_page;
230
231 while (size > 0 && i < sg_size) {
232 pg = virt_to_page(addr);
233 offset = offset_in_page(addr);
234 if (sg) {
235 sg[i].page = pg;
236 sg[i].offset = offset;
237 }
238 remainder_of_page = PAGE_CACHE_SIZE - offset;
239 if (size >= remainder_of_page) {
240 if (sg)
241 sg[i].length = remainder_of_page;
242 addr += remainder_of_page;
243 size -= remainder_of_page;
244 } else {
245 if (sg)
246 sg[i].length = size;
247 addr += size;
248 size = 0;
249 }
250 i++;
251 }
252 if (size > 0)
253 return -ENOMEM;
254 return i;
255}
256
257/**
258 * encrypt_scatterlist
259 * @crypt_stat: Pointer to the crypt_stat struct to initialize.
260 * @dest_sg: Destination of encrypted data
261 * @src_sg: Data to be encrypted
262 * @size: Length of data to be encrypted
263 * @iv: iv to use during encryption
264 *
265 * Returns the number of bytes encrypted; negative value on error
266 */
267static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
268 struct scatterlist *dest_sg,
269 struct scatterlist *src_sg, int size,
270 unsigned char *iv)
271{
272 int rc = 0;
273
274 BUG_ON(!crypt_stat || !crypt_stat->tfm
275 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
276 ECRYPTFS_STRUCT_INITIALIZED));
277 if (unlikely(ecryptfs_verbosity > 0)) {
278 ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
279 crypt_stat->key_size);
280 ecryptfs_dump_hex(crypt_stat->key,
281 crypt_stat->key_size);
282 }
283 /* Consider doing this once, when the file is opened */
284 mutex_lock(&crypt_stat->cs_tfm_mutex);
285 rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
286 crypt_stat->key_size);
287 if (rc) {
288 ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
289 rc);
290 mutex_unlock(&crypt_stat->cs_tfm_mutex);
291 rc = -EINVAL;
292 goto out;
293 }
294 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size);
295 crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv);
296 mutex_unlock(&crypt_stat->cs_tfm_mutex);
297out:
298 return rc;
299}
300
301static void
302ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx,
303 int *byte_offset,
304 struct ecryptfs_crypt_stat *crypt_stat,
305 unsigned long extent_num)
306{
307 unsigned long lower_extent_num;
308 int extents_occupied_by_headers_at_front;
309 int bytes_occupied_by_headers_at_front;
310 int extent_offset;
311 int extents_per_page;
312
313 bytes_occupied_by_headers_at_front =
314 ( crypt_stat->header_extent_size
315 * crypt_stat->num_header_extents_at_front );
316 extents_occupied_by_headers_at_front =
317 ( bytes_occupied_by_headers_at_front
318 / crypt_stat->extent_size );
319 lower_extent_num = extents_occupied_by_headers_at_front + extent_num;
320 extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
321 (*lower_page_idx) = lower_extent_num / extents_per_page;
322 extent_offset = lower_extent_num % extents_per_page;
323 (*byte_offset) = extent_offset * crypt_stat->extent_size;
324 ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = "
325 "[%d]\n", crypt_stat->header_extent_size);
326 ecryptfs_printk(KERN_DEBUG, " * crypt_stat->"
327 "num_header_extents_at_front = [%d]\n",
328 crypt_stat->num_header_extents_at_front);
329 ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_"
330 "front = [%d]\n", extents_occupied_by_headers_at_front);
331 ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n",
332 lower_extent_num);
333 ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n",
334 extents_per_page);
335 ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n",
336 (*lower_page_idx));
337 ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n",
338 extent_offset);
339 ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n",
340 (*byte_offset));
341}
342
343static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx,
344 struct page *lower_page,
345 struct inode *lower_inode,
346 int byte_offset_in_page, int bytes_to_write)
347{
348 int rc = 0;
349
350 if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
351 rc = ecryptfs_commit_lower_page(lower_page, lower_inode,
352 ctx->param.lower_file,
353 byte_offset_in_page,
354 bytes_to_write);
355 if (rc) {
356 ecryptfs_printk(KERN_ERR, "Error calling lower "
357 "commit; rc = [%d]\n", rc);
358 goto out;
359 }
360 } else {
361 rc = ecryptfs_writepage_and_release_lower_page(lower_page,
362 lower_inode,
363 ctx->param.wbc);
364 if (rc) {
365 ecryptfs_printk(KERN_ERR, "Error calling lower "
366 "writepage(); rc = [%d]\n", rc);
367 goto out;
368 }
369 }
370out:
371 return rc;
372}
373
374static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx,
375 struct page **lower_page,
376 struct inode *lower_inode,
377 unsigned long lower_page_idx,
378 int byte_offset_in_page)
379{
380 int rc = 0;
381
382 if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
383 /* TODO: Limit this to only the data extents that are
384 * needed */
385 rc = ecryptfs_get_lower_page(lower_page, lower_inode,
386 ctx->param.lower_file,
387 lower_page_idx,
388 byte_offset_in_page,
389 (PAGE_CACHE_SIZE
390 - byte_offset_in_page));
391 if (rc) {
392 ecryptfs_printk(
393 KERN_ERR, "Error attempting to grab, map, "
394 "and prepare_write lower page with index "
395 "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc);
396 goto out;
397 }
398 } else {
399 rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL,
400 lower_inode,
401 lower_page_idx);
402 if (rc) {
403 ecryptfs_printk(
404 KERN_ERR, "Error attempting to grab and map "
405 "lower page with index [0x%.16x]; rc = [%d]\n",
406 lower_page_idx, rc);
407 goto out;
408 }
409 }
410out:
411 return rc;
412}
413
414/**
415 * ecryptfs_encrypt_page
416 * @ctx: The context of the page
417 *
418 * Encrypt an eCryptfs page. This is done on a per-extent basis. Note
419 * that eCryptfs pages may straddle the lower pages -- for instance,
420 * if the file was created on a machine with an 8K page size
421 * (resulting in an 8K header), and then the file is copied onto a
422 * host with a 32K page size, then when reading page 0 of the eCryptfs
423 * file, 24K of page 0 of the lower file will be read and decrypted,
424 * and then 8K of page 1 of the lower file will be read and decrypted.
425 *
426 * The actual operations performed on each page depends on the
427 * contents of the ecryptfs_page_crypt_context struct.
428 *
429 * Returns zero on success; negative on error
430 */
431int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx)
432{
433 char extent_iv[ECRYPTFS_MAX_IV_BYTES];
434 unsigned long base_extent;
435 unsigned long extent_offset = 0;
436 unsigned long lower_page_idx = 0;
437 unsigned long prior_lower_page_idx = 0;
438 struct page *lower_page;
439 struct inode *lower_inode;
440 struct ecryptfs_inode_info *inode_info;
441 struct ecryptfs_crypt_stat *crypt_stat;
442 int rc = 0;
443 int lower_byte_offset = 0;
444 int orig_byte_offset = 0;
445 int num_extents_per_page;
446#define ECRYPTFS_PAGE_STATE_UNREAD 0
447#define ECRYPTFS_PAGE_STATE_READ 1
448#define ECRYPTFS_PAGE_STATE_MODIFIED 2
449#define ECRYPTFS_PAGE_STATE_WRITTEN 3
450 int page_state;
451
452 lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host);
453 inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host);
454 crypt_stat = &inode_info->crypt_stat;
455 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
456 rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode,
457 ctx->param.lower_file);
458 if (rc)
459 ecryptfs_printk(KERN_ERR, "Error attempting to copy "
460 "page at index [0x%.16x]\n",
461 ctx->page->index);
462 goto out;
463 }
464 num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
465 base_extent = (ctx->page->index * num_extents_per_page);
466 page_state = ECRYPTFS_PAGE_STATE_UNREAD;
467 while (extent_offset < num_extents_per_page) {
468 ecryptfs_extent_to_lwr_pg_idx_and_offset(
469 &lower_page_idx, &lower_byte_offset, crypt_stat,
470 (base_extent + extent_offset));
471 if (prior_lower_page_idx != lower_page_idx
472 && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) {
473 rc = ecryptfs_write_out_page(ctx, lower_page,
474 lower_inode,
475 orig_byte_offset,
476 (PAGE_CACHE_SIZE
477 - orig_byte_offset));
478 if (rc) {
479 ecryptfs_printk(KERN_ERR, "Error attempting "
480 "to write out page; rc = [%d]"
481 "\n", rc);
482 goto out;
483 }
484 page_state = ECRYPTFS_PAGE_STATE_WRITTEN;
485 }
486 if (page_state == ECRYPTFS_PAGE_STATE_UNREAD
487 || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) {
488 rc = ecryptfs_read_in_page(ctx, &lower_page,
489 lower_inode, lower_page_idx,
490 lower_byte_offset);
491 if (rc) {
492 ecryptfs_printk(KERN_ERR, "Error attempting "
493 "to read in lower page with "
494 "index [0x%.16x]; rc = [%d]\n",
495 lower_page_idx, rc);
496 goto out;
497 }
498 orig_byte_offset = lower_byte_offset;
499 prior_lower_page_idx = lower_page_idx;
500 page_state = ECRYPTFS_PAGE_STATE_READ;
501 }
502 BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED
503 || page_state == ECRYPTFS_PAGE_STATE_READ));
504 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
505 (base_extent + extent_offset));
506 if (rc) {
507 ecryptfs_printk(KERN_ERR, "Error attempting to "
508 "derive IV for extent [0x%.16x]; "
509 "rc = [%d]\n",
510 (base_extent + extent_offset), rc);
511 goto out;
512 }
513 if (unlikely(ecryptfs_verbosity > 0)) {
514 ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
515 "with iv:\n");
516 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
517 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
518 "encryption:\n");
519 ecryptfs_dump_hex((char *)
520 (page_address(ctx->page)
521 + (extent_offset
522 * crypt_stat->extent_size)), 8);
523 }
524 rc = ecryptfs_encrypt_page_offset(
525 crypt_stat, lower_page, lower_byte_offset, ctx->page,
526 (extent_offset * crypt_stat->extent_size),
527 crypt_stat->extent_size, extent_iv);
528 ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
529 "rc = [%d]\n",
530 (base_extent + extent_offset), rc);
531 if (unlikely(ecryptfs_verbosity > 0)) {
532 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
533 "encryption:\n");
534 ecryptfs_dump_hex((char *)(page_address(lower_page)
535 + lower_byte_offset), 8);
536 }
537 page_state = ECRYPTFS_PAGE_STATE_MODIFIED;
538 extent_offset++;
539 }
540 BUG_ON(orig_byte_offset != 0);
541 rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0,
542 (lower_byte_offset
543 + crypt_stat->extent_size));
544 if (rc) {
545 ecryptfs_printk(KERN_ERR, "Error attempting to write out "
546 "page; rc = [%d]\n", rc);
547 goto out;
548 }
549out:
550 return rc;
551}
552
553/**
554 * ecryptfs_decrypt_page
555 * @file: The ecryptfs file
556 * @page: The page in ecryptfs to decrypt
557 *
558 * Decrypt an eCryptfs page. This is done on a per-extent basis. Note
559 * that eCryptfs pages may straddle the lower pages -- for instance,
560 * if the file was created on a machine with an 8K page size
561 * (resulting in an 8K header), and then the file is copied onto a
562 * host with a 32K page size, then when reading page 0 of the eCryptfs
563 * file, 24K of page 0 of the lower file will be read and decrypted,
564 * and then 8K of page 1 of the lower file will be read and decrypted.
565 *
566 * Returns zero on success; negative on error
567 */
568int ecryptfs_decrypt_page(struct file *file, struct page *page)
569{
570 char extent_iv[ECRYPTFS_MAX_IV_BYTES];
571 unsigned long base_extent;
572 unsigned long extent_offset = 0;
573 unsigned long lower_page_idx = 0;
574 unsigned long prior_lower_page_idx = 0;
575 struct page *lower_page;
576 char *lower_page_virt = NULL;
577 struct inode *lower_inode;
578 struct ecryptfs_crypt_stat *crypt_stat;
579 int rc = 0;
580 int byte_offset;
581 int num_extents_per_page;
582 int page_state;
583
584 crypt_stat = &(ecryptfs_inode_to_private(
585 page->mapping->host)->crypt_stat);
586 lower_inode = ecryptfs_inode_to_lower(page->mapping->host);
587 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
588 rc = ecryptfs_do_readpage(file, page, page->index);
589 if (rc)
590 ecryptfs_printk(KERN_ERR, "Error attempting to copy "
591 "page at index [0x%.16x]\n",
592 page->index);
593 goto out;
594 }
595 num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
596 base_extent = (page->index * num_extents_per_page);
597 lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
598 SLAB_KERNEL);
599 if (!lower_page_virt) {
600 rc = -ENOMEM;
601 ecryptfs_printk(KERN_ERR, "Error getting page for encrypted "
602 "lower page(s)\n");
603 goto out;
604 }
605 lower_page = virt_to_page(lower_page_virt);
606 page_state = ECRYPTFS_PAGE_STATE_UNREAD;
607 while (extent_offset < num_extents_per_page) {
608 ecryptfs_extent_to_lwr_pg_idx_and_offset(
609 &lower_page_idx, &byte_offset, crypt_stat,
610 (base_extent + extent_offset));
611 if (prior_lower_page_idx != lower_page_idx
612 || page_state == ECRYPTFS_PAGE_STATE_UNREAD) {
613 rc = ecryptfs_do_readpage(file, lower_page,
614 lower_page_idx);
615 if (rc) {
616 ecryptfs_printk(KERN_ERR, "Error reading "
617 "lower encrypted page; rc = "
618 "[%d]\n", rc);
619 goto out;
620 }
621 prior_lower_page_idx = lower_page_idx;
622 page_state = ECRYPTFS_PAGE_STATE_READ;
623 }
624 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
625 (base_extent + extent_offset));
626 if (rc) {
627 ecryptfs_printk(KERN_ERR, "Error attempting to "
628 "derive IV for extent [0x%.16x]; rc = "
629 "[%d]\n",
630 (base_extent + extent_offset), rc);
631 goto out;
632 }
633 if (unlikely(ecryptfs_verbosity > 0)) {
634 ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
635 "with iv:\n");
636 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
637 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
638 "decryption:\n");
639 ecryptfs_dump_hex((lower_page_virt + byte_offset), 8);
640 }
641 rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
642 (extent_offset
643 * crypt_stat->extent_size),
644 lower_page, byte_offset,
645 crypt_stat->extent_size,
646 extent_iv);
647 if (rc != crypt_stat->extent_size) {
648 ecryptfs_printk(KERN_ERR, "Error attempting to "
649 "decrypt extent [0x%.16x]\n",
650 (base_extent + extent_offset));
651 goto out;
652 }
653 rc = 0;
654 if (unlikely(ecryptfs_verbosity > 0)) {
655 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
656 "decryption:\n");
657 ecryptfs_dump_hex((char *)(page_address(page)
658 + byte_offset), 8);
659 }
660 extent_offset++;
661 }
662out:
663 if (lower_page_virt)
664 kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt);
665 return rc;
666}
667
668/**
669 * decrypt_scatterlist
670 *
671 * Returns the number of bytes decrypted; negative value on error
672 */
673static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
674 struct scatterlist *dest_sg,
675 struct scatterlist *src_sg, int size,
676 unsigned char *iv)
677{
678 int rc = 0;
679
680 /* Consider doing this once, when the file is opened */
681 mutex_lock(&crypt_stat->cs_tfm_mutex);
682 rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
683 crypt_stat->key_size);
684 if (rc) {
685 ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
686 rc);
687 mutex_unlock(&crypt_stat->cs_tfm_mutex);
688 rc = -EINVAL;
689 goto out;
690 }
691 ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size);
692 rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size,
693 iv);
694 mutex_unlock(&crypt_stat->cs_tfm_mutex);
695 if (rc) {
696 ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n",
697 rc);
698 goto out;
699 }
700 rc = size;
701out:
702 return rc;
703}
704
705/**
706 * ecryptfs_encrypt_page_offset
707 *
708 * Returns the number of bytes encrypted
709 */
710static int
711ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
712 struct page *dst_page, int dst_offset,
713 struct page *src_page, int src_offset, int size,
714 unsigned char *iv)
715{
716 struct scatterlist src_sg, dst_sg;
717
718 src_sg.page = src_page;
719 src_sg.offset = src_offset;
720 src_sg.length = size;
721 dst_sg.page = dst_page;
722 dst_sg.offset = dst_offset;
723 dst_sg.length = size;
724 return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
725}
726
727/**
728 * ecryptfs_decrypt_page_offset
729 *
730 * Returns the number of bytes decrypted
731 */
732static int
733ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
734 struct page *dst_page, int dst_offset,
735 struct page *src_page, int src_offset, int size,
736 unsigned char *iv)
737{
738 struct scatterlist src_sg, dst_sg;
739
740 src_sg.page = src_page;
741 src_sg.offset = src_offset;
742 src_sg.length = size;
743 dst_sg.page = dst_page;
744 dst_sg.offset = dst_offset;
745 dst_sg.length = size;
746 return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
747}
748
749#define ECRYPTFS_MAX_SCATTERLIST_LEN 4
750
751/**
752 * ecryptfs_init_crypt_ctx
753 * @crypt_stat: Uninitilized crypt stats structure
754 *
755 * Initialize the crypto context.
756 *
757 * TODO: Performance: Keep a cache of initialized cipher contexts;
758 * only init if needed
759 */
760int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
761{
762 int rc = -EINVAL;
763
764 if (!crypt_stat->cipher) {
765 ecryptfs_printk(KERN_ERR, "No cipher specified\n");
766 goto out;
767 }
768 ecryptfs_printk(KERN_DEBUG,
769 "Initializing cipher [%s]; strlen = [%d]; "
770 "key_size_bits = [%d]\n",
771 crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
772 crypt_stat->key_size << 3);
773 if (crypt_stat->tfm) {
774 rc = 0;
775 goto out;
776 }
777 mutex_lock(&crypt_stat->cs_tfm_mutex);
778 crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher,
779 ECRYPTFS_DEFAULT_CHAINING_MODE
780 | CRYPTO_TFM_REQ_WEAK_KEY);
781 mutex_unlock(&crypt_stat->cs_tfm_mutex);
782 if (!crypt_stat->tfm) {
783 ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
784 "Error initializing cipher [%s]\n",
785 crypt_stat->cipher);
786 goto out;
787 }
788 rc = 0;
789out:
790 return rc;
791}
792
793static void set_extent_mask_and_shift(struct ecryptfs_crypt_stat *crypt_stat)
794{
795 int extent_size_tmp;
796
797 crypt_stat->extent_mask = 0xFFFFFFFF;
798 crypt_stat->extent_shift = 0;
799 if (crypt_stat->extent_size == 0)
800 return;
801 extent_size_tmp = crypt_stat->extent_size;
802 while ((extent_size_tmp & 0x01) == 0) {
803 extent_size_tmp >>= 1;
804 crypt_stat->extent_mask <<= 1;
805 crypt_stat->extent_shift++;
806 }
807}
808
809void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
810{
811 /* Default values; may be overwritten as we are parsing the
812 * packets. */
813 crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
814 set_extent_mask_and_shift(crypt_stat);
815 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
816 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
817 crypt_stat->header_extent_size =
818 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
819 } else
820 crypt_stat->header_extent_size = PAGE_CACHE_SIZE;
821 crypt_stat->num_header_extents_at_front = 1;
822}
823
824/**
825 * ecryptfs_compute_root_iv
826 * @crypt_stats
827 *
828 * On error, sets the root IV to all 0's.
829 */
830int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
831{
832 int rc = 0;
833 char dst[MD5_DIGEST_SIZE];
834
835 BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE);
836 BUG_ON(crypt_stat->iv_bytes <= 0);
837 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) {
838 rc = -EINVAL;
839 ecryptfs_printk(KERN_WARNING, "Session key not valid; "
840 "cannot generate root IV\n");
841 goto out;
842 }
843 rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key,
844 crypt_stat->key_size);
845 if (rc) {
846 ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
847 "MD5 while generating root IV\n");
848 goto out;
849 }
850 memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes);
851out:
852 if (rc) {
853 memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes);
854 ECRYPTFS_SET_FLAG(crypt_stat->flags,
855 ECRYPTFS_SECURITY_WARNING);
856 }
857 return rc;
858}
859
860static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
861{
862 get_random_bytes(crypt_stat->key, crypt_stat->key_size);
863 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
864 ecryptfs_compute_root_iv(crypt_stat);
865 if (unlikely(ecryptfs_verbosity > 0)) {
866 ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n");
867 ecryptfs_dump_hex(crypt_stat->key,
868 crypt_stat->key_size);
869 }
870}
871
872/**
873 * ecryptfs_set_default_crypt_stat_vals
874 * @crypt_stat
875 *
876 * Default values in the event that policy does not override them.
877 */
878static void ecryptfs_set_default_crypt_stat_vals(
879 struct ecryptfs_crypt_stat *crypt_stat,
880 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
881{
882 ecryptfs_set_default_sizes(crypt_stat);
883 strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER);
884 crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES;
885 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
886 crypt_stat->file_version = ECRYPTFS_FILE_VERSION;
887 crypt_stat->mount_crypt_stat = mount_crypt_stat;
888}
889
890/**
891 * ecryptfs_new_file_context
892 * @ecryptfs_dentry
893 *
894 * If the crypto context for the file has not yet been established,
895 * this is where we do that. Establishing a new crypto context
896 * involves the following decisions:
897 * - What cipher to use?
898 * - What set of authentication tokens to use?
899 * Here we just worry about getting enough information into the
900 * authentication tokens so that we know that they are available.
901 * We associate the available authentication tokens with the new file
902 * via the set of signatures in the crypt_stat struct. Later, when
903 * the headers are actually written out, we may again defer to
904 * userspace to perform the encryption of the session key; for the
905 * foreseeable future, this will be the case with public key packets.
906 *
907 * Returns zero on success; non-zero otherwise
908 */
909/* Associate an authentication token(s) with the file */
910int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
911{
912 int rc = 0;
913 struct ecryptfs_crypt_stat *crypt_stat =
914 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
915 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
916 &ecryptfs_superblock_to_private(
917 ecryptfs_dentry->d_sb)->mount_crypt_stat;
918 int cipher_name_len;
919
920 ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat);
921 /* See if there are mount crypt options */
922 if (mount_crypt_stat->global_auth_tok) {
923 ecryptfs_printk(KERN_DEBUG, "Initializing context for new "
924 "file using mount_crypt_stat\n");
925 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
926 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
927 memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++],
928 mount_crypt_stat->global_auth_tok_sig,
929 ECRYPTFS_SIG_SIZE_HEX);
930 cipher_name_len =
931 strlen(mount_crypt_stat->global_default_cipher_name);
932 memcpy(crypt_stat->cipher,
933 mount_crypt_stat->global_default_cipher_name,
934 cipher_name_len);
935 crypt_stat->cipher[cipher_name_len] = '\0';
936 crypt_stat->key_size =
937 mount_crypt_stat->global_default_cipher_key_size;
938 ecryptfs_generate_new_key(crypt_stat);
939 } else
940 /* We should not encounter this scenario since we
941 * should detect lack of global_auth_tok at mount time
942 * TODO: Applies to 0.1 release only; remove in future
943 * release */
944 BUG();
945 rc = ecryptfs_init_crypt_ctx(crypt_stat);
946 if (rc)
947 ecryptfs_printk(KERN_ERR, "Error initializing cryptographic "
948 "context for cipher [%s]: rc = [%d]\n",
949 crypt_stat->cipher, rc);
950 return rc;
951}
952
953/**
954 * contains_ecryptfs_marker - check for the ecryptfs marker
955 * @data: The data block in which to check
956 *
957 * Returns one if marker found; zero if not found
958 */
959int contains_ecryptfs_marker(char *data)
960{
961 u32 m_1, m_2;
962
963 memcpy(&m_1, data, 4);
964 m_1 = be32_to_cpu(m_1);
965 memcpy(&m_2, (data + 4), 4);
966 m_2 = be32_to_cpu(m_2);
967 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
968 return 1;
969 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
970 "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2,
971 MAGIC_ECRYPTFS_MARKER);
972 ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = "
973 "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER));
974 return 0;
975}
976
977struct ecryptfs_flag_map_elem {
978 u32 file_flag;
979 u32 local_flag;
980};
981
982/* Add support for additional flags by adding elements here. */
983static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
984 {0x00000001, ECRYPTFS_ENABLE_HMAC},
985 {0x00000002, ECRYPTFS_ENCRYPTED}
986};
987
988/**
989 * ecryptfs_process_flags
990 * @crypt_stat
991 * @page_virt: Source data to be parsed
992 * @bytes_read: Updated with the number of bytes read
993 *
994 * Returns zero on success; non-zero if the flag set is invalid
995 */
996static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
997 char *page_virt, int *bytes_read)
998{
999 int rc = 0;
1000 int i;
1001 u32 flags;
1002
1003 memcpy(&flags, page_virt, 4);
1004 flags = be32_to_cpu(flags);
1005 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1006 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1007 if (flags & ecryptfs_flag_map[i].file_flag) {
1008 ECRYPTFS_SET_FLAG(crypt_stat->flags,
1009 ecryptfs_flag_map[i].local_flag);
1010 } else
1011 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
1012 ecryptfs_flag_map[i].local_flag);
1013 /* Version is in top 8 bits of the 32-bit flag vector */
1014 crypt_stat->file_version = ((flags >> 24) & 0xFF);
1015 (*bytes_read) = 4;
1016 return rc;
1017}
1018
1019/**
1020 * write_ecryptfs_marker
1021 * @page_virt: The pointer to in a page to begin writing the marker
1022 * @written: Number of bytes written
1023 *
1024 * Marker = 0x3c81b7f5
1025 */
1026static void write_ecryptfs_marker(char *page_virt, size_t *written)
1027{
1028 u32 m_1, m_2;
1029
1030 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1031 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
1032 m_1 = cpu_to_be32(m_1);
1033 memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1034 m_2 = cpu_to_be32(m_2);
1035 memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
1036 (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1037 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1038}
1039
1040static void
1041write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
1042 size_t *written)
1043{
1044 u32 flags = 0;
1045 int i;
1046
1047 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1048 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1049 if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
1050 ecryptfs_flag_map[i].local_flag))
1051 flags |= ecryptfs_flag_map[i].file_flag;
1052 /* Version is in top 8 bits of the 32-bit flag vector */
1053 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
1054 flags = cpu_to_be32(flags);
1055 memcpy(page_virt, &flags, 4);
1056 (*written) = 4;
1057}
1058
1059struct ecryptfs_cipher_code_str_map_elem {
1060 char cipher_str[16];
1061 u16 cipher_code;
1062};
1063
1064/* Add support for additional ciphers by adding elements here. The
1065 * cipher_code is whatever OpenPGP applicatoins use to identify the
1066 * ciphers. List in order of probability. */
1067static struct ecryptfs_cipher_code_str_map_elem
1068ecryptfs_cipher_code_str_map[] = {
1069 {"aes",RFC2440_CIPHER_AES_128 },
1070 {"blowfish", RFC2440_CIPHER_BLOWFISH},
1071 {"des3_ede", RFC2440_CIPHER_DES3_EDE},
1072 {"cast5", RFC2440_CIPHER_CAST_5},
1073 {"twofish", RFC2440_CIPHER_TWOFISH},
1074 {"cast6", RFC2440_CIPHER_CAST_6},
1075 {"aes", RFC2440_CIPHER_AES_192},
1076 {"aes", RFC2440_CIPHER_AES_256}
1077};
1078
1079/**
1080 * ecryptfs_code_for_cipher_string
1081 * @str: The string representing the cipher name
1082 *
1083 * Returns zero on no match, or the cipher code on match
1084 */
1085u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat)
1086{
1087 int i;
1088 u16 code = 0;
1089 struct ecryptfs_cipher_code_str_map_elem *map =
1090 ecryptfs_cipher_code_str_map;
1091
1092 if (strcmp(crypt_stat->cipher, "aes") == 0) {
1093 switch (crypt_stat->key_size) {
1094 case 16:
1095 code = RFC2440_CIPHER_AES_128;
1096 break;
1097 case 24:
1098 code = RFC2440_CIPHER_AES_192;
1099 break;
1100 case 32:
1101 code = RFC2440_CIPHER_AES_256;
1102 }
1103 } else {
1104 for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
1105 if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){
1106 code = map[i].cipher_code;
1107 break;
1108 }
1109 }
1110 return code;
1111}
1112
1113/**
1114 * ecryptfs_cipher_code_to_string
1115 * @str: Destination to write out the cipher name
1116 * @cipher_code: The code to convert to cipher name string
1117 *
1118 * Returns zero on success
1119 */
1120int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
1121{
1122 int rc = 0;
1123 int i;
1124
1125 str[0] = '\0';
1126 for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
1127 if (cipher_code == ecryptfs_cipher_code_str_map[i].cipher_code)
1128 strcpy(str, ecryptfs_cipher_code_str_map[i].cipher_str);
1129 if (str[0] == '\0') {
1130 ecryptfs_printk(KERN_WARNING, "Cipher code not recognized: "
1131 "[%d]\n", cipher_code);
1132 rc = -EINVAL;
1133 }
1134 return rc;
1135}
1136
1137/**
1138 * ecryptfs_read_header_region
1139 * @data
1140 * @dentry
1141 * @nd
1142 *
1143 * Returns zero on success; non-zero otherwise
1144 */
1145int ecryptfs_read_header_region(char *data, struct dentry *dentry,
1146 struct vfsmount *mnt)
1147{
1148 struct file *file;
1149 mm_segment_t oldfs;
1150 int rc;
1151
1152 mnt = mntget(mnt);
1153 file = dentry_open(dentry, mnt, O_RDONLY);
1154 if (IS_ERR(file)) {
1155 ecryptfs_printk(KERN_DEBUG, "Error opening file to "
1156 "read header region\n");
1157 mntput(mnt);
1158 rc = PTR_ERR(file);
1159 goto out;
1160 }
1161 file->f_pos = 0;
1162 oldfs = get_fs();
1163 set_fs(get_ds());
1164 /* For releases 0.1 and 0.2, all of the header information
1165 * fits in the first data extent-sized region. */
1166 rc = file->f_op->read(file, (char __user *)data,
1167 ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos);
1168 set_fs(oldfs);
1169 fput(file);
1170 rc = 0;
1171out:
1172 return rc;
1173}
1174
1175static void
1176write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat,
1177 size_t *written)
1178{
1179 u32 header_extent_size;
1180 u16 num_header_extents_at_front;
1181
1182 header_extent_size = (u32)crypt_stat->header_extent_size;
1183 num_header_extents_at_front =
1184 (u16)crypt_stat->num_header_extents_at_front;
1185 header_extent_size = cpu_to_be32(header_extent_size);
1186 memcpy(virt, &header_extent_size, 4);
1187 virt += 4;
1188 num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front);
1189 memcpy(virt, &num_header_extents_at_front, 2);
1190 (*written) = 6;
1191}
1192
1193struct kmem_cache *ecryptfs_header_cache_0;
1194struct kmem_cache *ecryptfs_header_cache_1;
1195struct kmem_cache *ecryptfs_header_cache_2;
1196
1197/**
1198 * ecryptfs_write_headers_virt
1199 * @page_virt
1200 * @crypt_stat
1201 * @ecryptfs_dentry
1202 *
1203 * Format version: 1
1204 *
1205 * Header Extent:
1206 * Octets 0-7: Unencrypted file size (big-endian)
1207 * Octets 8-15: eCryptfs special marker
1208 * Octets 16-19: Flags
1209 * Octet 16: File format version number (between 0 and 255)
1210 * Octets 17-18: Reserved
1211 * Octet 19: Bit 1 (lsb): Reserved
1212 * Bit 2: Encrypted?
1213 * Bits 3-8: Reserved
1214 * Octets 20-23: Header extent size (big-endian)
1215 * Octets 24-25: Number of header extents at front of file
1216 * (big-endian)
1217 * Octet 26: Begin RFC 2440 authentication token packet set
1218 * Data Extent 0:
1219 * Lower data (CBC encrypted)
1220 * Data Extent 1:
1221 * Lower data (CBC encrypted)
1222 * ...
1223 *
1224 * Returns zero on success
1225 */
1226int ecryptfs_write_headers_virt(char *page_virt,
1227 struct ecryptfs_crypt_stat *crypt_stat,
1228 struct dentry *ecryptfs_dentry)
1229{
1230 int rc;
1231 size_t written;
1232 size_t offset;
1233
1234 offset = ECRYPTFS_FILE_SIZE_BYTES;
1235 write_ecryptfs_marker((page_virt + offset), &written);
1236 offset += written;
1237 write_ecryptfs_flags((page_virt + offset), crypt_stat, &written);
1238 offset += written;
1239 write_header_metadata((page_virt + offset), crypt_stat, &written);
1240 offset += written;
1241 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat,
1242 ecryptfs_dentry, &written,
1243 PAGE_CACHE_SIZE - offset);
1244 if (rc)
1245 ecryptfs_printk(KERN_WARNING, "Error generating key packet "
1246 "set; rc = [%d]\n", rc);
1247 return rc;
1248}
1249
1250/**
1251 * ecryptfs_write_headers
1252 * @lower_file: The lower file struct, which was returned from dentry_open
1253 *
1254 * Write the file headers out. This will likely involve a userspace
1255 * callout, in which the session key is encrypted with one or more
1256 * public keys and/or the passphrase necessary to do the encryption is
1257 * retrieved via a prompt. Exactly what happens at this point should
1258 * be policy-dependent.
1259 *
1260 * Returns zero on success; non-zero on error
1261 */
1262int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
1263 struct file *lower_file)
1264{
1265 mm_segment_t oldfs;
1266 struct ecryptfs_crypt_stat *crypt_stat;
1267 char *page_virt;
1268 int current_header_page;
1269 int header_pages;
1270 int rc = 0;
1271
1272 crypt_stat = &ecryptfs_inode_to_private(
1273 ecryptfs_dentry->d_inode)->crypt_stat;
1274 if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
1275 ECRYPTFS_ENCRYPTED))) {
1276 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
1277 ECRYPTFS_KEY_VALID)) {
1278 ecryptfs_printk(KERN_DEBUG, "Key is "
1279 "invalid; bailing out\n");
1280 rc = -EINVAL;
1281 goto out;
1282 }
1283 } else {
1284 rc = -EINVAL;
1285 ecryptfs_printk(KERN_WARNING,
1286 "Called with crypt_stat->encrypted == 0\n");
1287 goto out;
1288 }
1289 /* Released in this function */
1290 page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER);
1291 if (!page_virt) {
1292 ecryptfs_printk(KERN_ERR, "Out of memory\n");
1293 rc = -ENOMEM;
1294 goto out;
1295 }
1296 memset(page_virt, 0, PAGE_CACHE_SIZE);
1297 rc = ecryptfs_write_headers_virt(page_virt, crypt_stat,
1298 ecryptfs_dentry);
1299 if (unlikely(rc)) {
1300 ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n");
1301 memset(page_virt, 0, PAGE_CACHE_SIZE);
1302 goto out_free;
1303 }
1304 ecryptfs_printk(KERN_DEBUG,
1305 "Writing key packet set to underlying file\n");
1306 lower_file->f_pos = 0;
1307 oldfs = get_fs();
1308 set_fs(get_ds());
1309 ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
1310 "write() w/ header page; lower_file->f_pos = "
1311 "[0x%.16x]\n", lower_file->f_pos);
1312 lower_file->f_op->write(lower_file, (char __user *)page_virt,
1313 PAGE_CACHE_SIZE, &lower_file->f_pos);
1314 header_pages = ((crypt_stat->header_extent_size
1315 * crypt_stat->num_header_extents_at_front)
1316 / PAGE_CACHE_SIZE);
1317 memset(page_virt, 0, PAGE_CACHE_SIZE);
1318 current_header_page = 1;
1319 while (current_header_page < header_pages) {
1320 ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
1321 "write() w/ zero'd page; lower_file->f_pos = "
1322 "[0x%.16x]\n", lower_file->f_pos);
1323 lower_file->f_op->write(lower_file, (char __user *)page_virt,
1324 PAGE_CACHE_SIZE, &lower_file->f_pos);
1325 current_header_page++;
1326 }
1327 set_fs(oldfs);
1328 ecryptfs_printk(KERN_DEBUG,
1329 "Done writing key packet set to underlying file.\n");
1330out_free:
1331 kmem_cache_free(ecryptfs_header_cache_0, page_virt);
1332out:
1333 return rc;
1334}
1335
1336static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1337 char *virt, int *bytes_read)
1338{
1339 int rc = 0;
1340 u32 header_extent_size;
1341 u16 num_header_extents_at_front;
1342
1343 memcpy(&header_extent_size, virt, 4);
1344 header_extent_size = be32_to_cpu(header_extent_size);
1345 virt += 4;
1346 memcpy(&num_header_extents_at_front, virt, 2);
1347 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
1348 crypt_stat->header_extent_size = (int)header_extent_size;
1349 crypt_stat->num_header_extents_at_front =
1350 (int)num_header_extents_at_front;
1351 (*bytes_read) = 6;
1352 if ((crypt_stat->header_extent_size
1353 * crypt_stat->num_header_extents_at_front)
1354 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
1355 rc = -EINVAL;
1356 ecryptfs_printk(KERN_WARNING, "Invalid header extent size: "
1357 "[%d]\n", crypt_stat->header_extent_size);
1358 }
1359 return rc;
1360}
1361
1362/**
1363 * set_default_header_data
1364 *
1365 * For version 0 file format; this function is only for backwards
1366 * compatibility for files created with the prior versions of
1367 * eCryptfs.
1368 */
1369static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1370{
1371 crypt_stat->header_extent_size = 4096;
1372 crypt_stat->num_header_extents_at_front = 1;
1373}
1374
1375/**
1376 * ecryptfs_read_headers_virt
1377 *
1378 * Read/parse the header data. The header format is detailed in the
1379 * comment block for the ecryptfs_write_headers_virt() function.
1380 *
1381 * Returns zero on success
1382 */
1383static int ecryptfs_read_headers_virt(char *page_virt,
1384 struct ecryptfs_crypt_stat *crypt_stat,
1385 struct dentry *ecryptfs_dentry)
1386{
1387 int rc = 0;
1388 int offset;
1389 int bytes_read;
1390
1391 ecryptfs_set_default_sizes(crypt_stat);
1392 crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private(
1393 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1394 offset = ECRYPTFS_FILE_SIZE_BYTES;
1395 rc = contains_ecryptfs_marker(page_virt + offset);
1396 if (rc == 0) {
1397 rc = -EINVAL;
1398 goto out;
1399 }
1400 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1401 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
1402 &bytes_read);
1403 if (rc) {
1404 ecryptfs_printk(KERN_WARNING, "Error processing flags\n");
1405 goto out;
1406 }
1407 if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) {
1408 ecryptfs_printk(KERN_WARNING, "File version is [%d]; only "
1409 "file version [%d] is supported by this "
1410 "version of eCryptfs\n",
1411 crypt_stat->file_version,
1412 ECRYPTFS_SUPPORTED_FILE_VERSION);
1413 rc = -EINVAL;
1414 goto out;
1415 }
1416 offset += bytes_read;
1417 if (crypt_stat->file_version >= 1) {
1418 rc = parse_header_metadata(crypt_stat, (page_virt + offset),
1419 &bytes_read);
1420 if (rc) {
1421 ecryptfs_printk(KERN_WARNING, "Error reading header "
1422 "metadata; rc = [%d]\n", rc);
1423 }
1424 offset += bytes_read;
1425 } else
1426 set_default_header_data(crypt_stat);
1427 rc = ecryptfs_parse_packet_set(crypt_stat, (page_virt + offset),
1428 ecryptfs_dentry);
1429out:
1430 return rc;
1431}
1432
1433/**
1434 * ecryptfs_read_headers
1435 *
1436 * Returns zero if valid headers found and parsed; non-zero otherwise
1437 */
1438int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
1439 struct file *lower_file)
1440{
1441 int rc = 0;
1442 char *page_virt = NULL;
1443 mm_segment_t oldfs;
1444 ssize_t bytes_read;
1445 struct ecryptfs_crypt_stat *crypt_stat =
1446 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
1447
1448 /* Read the first page from the underlying file */
1449 page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER);
1450 if (!page_virt) {
1451 rc = -ENOMEM;
1452 ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n");
1453 goto out;
1454 }
1455 lower_file->f_pos = 0;
1456 oldfs = get_fs();
1457 set_fs(get_ds());
1458 bytes_read = lower_file->f_op->read(lower_file,
1459 (char __user *)page_virt,
1460 ECRYPTFS_DEFAULT_EXTENT_SIZE,
1461 &lower_file->f_pos);
1462 set_fs(oldfs);
1463 if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) {
1464 rc = -EINVAL;
1465 goto out;
1466 }
1467 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
1468 ecryptfs_dentry);
1469 if (rc) {
1470 ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not "
1471 "found\n");
1472 rc = -EINVAL;
1473 }
1474out:
1475 if (page_virt) {
1476 memset(page_virt, 0, PAGE_CACHE_SIZE);
1477 kmem_cache_free(ecryptfs_header_cache_1, page_virt);
1478 }
1479 return rc;
1480}
1481
1482/**
1483 * ecryptfs_encode_filename - converts a plaintext file name to cipher text
1484 * @crypt_stat: The crypt_stat struct associated with the file anem to encode
1485 * @name: The plaintext name
1486 * @length: The length of the plaintext
1487 * @encoded_name: The encypted name
1488 *
1489 * Encrypts and encodes a filename into something that constitutes a
1490 * valid filename for a filesystem, with printable characters.
1491 *
1492 * We assume that we have a properly initialized crypto context,
1493 * pointed to by crypt_stat->tfm.
1494 *
1495 * TODO: Implement filename decoding and decryption here, in place of
1496 * memcpy. We are keeping the framework around for now to (1)
1497 * facilitate testing of the components needed to implement filename
1498 * encryption and (2) to provide a code base from which other
1499 * developers in the community can easily implement this feature.
1500 *
1501 * Returns the length of encoded filename; negative if error
1502 */
1503int
1504ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
1505 const char *name, int length, char **encoded_name)
1506{
1507 int error = 0;
1508
1509 (*encoded_name) = kmalloc(length + 2, GFP_KERNEL);
1510 if (!(*encoded_name)) {
1511 error = -ENOMEM;
1512 goto out;
1513 }
1514 /* TODO: Filename encryption is a scheduled feature for a
1515 * future version of eCryptfs. This function is here only for
1516 * the purpose of providing a framework for other developers
1517 * to easily implement filename encryption. Hint: Replace this
1518 * memcpy() with a call to encrypt and encode the
1519 * filename, the set the length accordingly. */
1520 memcpy((void *)(*encoded_name), (void *)name, length);
1521 (*encoded_name)[length] = '\0';
1522 error = length + 1;
1523out:
1524 return error;
1525}
1526
1527/**
1528 * ecryptfs_decode_filename - converts the cipher text name to plaintext
1529 * @crypt_stat: The crypt_stat struct associated with the file
1530 * @name: The filename in cipher text
1531 * @length: The length of the cipher text name
1532 * @decrypted_name: The plaintext name
1533 *
1534 * Decodes and decrypts the filename.
1535 *
1536 * We assume that we have a properly initialized crypto context,
1537 * pointed to by crypt_stat->tfm.
1538 *
1539 * TODO: Implement filename decoding and decryption here, in place of
1540 * memcpy. We are keeping the framework around for now to (1)
1541 * facilitate testing of the components needed to implement filename
1542 * encryption and (2) to provide a code base from which other
1543 * developers in the community can easily implement this feature.
1544 *
1545 * Returns the length of decoded filename; negative if error
1546 */
1547int
1548ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
1549 const char *name, int length, char **decrypted_name)
1550{
1551 int error = 0;
1552
1553 (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL);
1554 if (!(*decrypted_name)) {
1555 error = -ENOMEM;
1556 goto out;
1557 }
1558 /* TODO: Filename encryption is a scheduled feature for a
1559 * future version of eCryptfs. This function is here only for
1560 * the purpose of providing a framework for other developers
1561 * to easily implement filename encryption. Hint: Replace this
1562 * memcpy() with a call to decode and decrypt the
1563 * filename, the set the length accordingly. */
1564 memcpy((void *)(*decrypted_name), (void *)name, length);
1565 (*decrypted_name)[length + 1] = '\0'; /* Only for convenience
1566 * in printing out the
1567 * string in debug
1568 * messages */
1569 error = length;
1570out:
1571 return error;
1572}
1573
1574/**
1575 * ecryptfs_process_cipher - Perform cipher initialization.
1576 * @tfm: Crypto context set by this function
1577 * @key_tfm: Crypto context for key material, set by this function
1578 * @cipher_name: Name of the cipher.
1579 * @key_size: Size of the key in bytes.
1580 *
1581 * Returns zero on success. Any crypto_tfm structs allocated here
1582 * should be released by other functions, such as on a superblock put
1583 * event, regardless of whether this function succeeds for fails.
1584 */
1585int
1586ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
1587 char *cipher_name, size_t key_size)
1588{
1589 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1590 int rc;
1591
1592 *tfm = *key_tfm = NULL;
1593 if (key_size > ECRYPTFS_MAX_KEY_BYTES) {
1594 rc = -EINVAL;
1595 printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum "
1596 "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES);
1597 goto out;
1598 }
1599 *tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE
1600 | CRYPTO_TFM_REQ_WEAK_KEY));
1601 if (!(*tfm)) {
1602 rc = -EINVAL;
1603 printk(KERN_ERR "Unable to allocate crypto cipher with name "
1604 "[%s]\n", cipher_name);
1605 goto out;
1606 }
1607 *key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY);
1608 if (!(*key_tfm)) {
1609 rc = -EINVAL;
1610 printk(KERN_ERR "Unable to allocate crypto cipher with name "
1611 "[%s]\n", cipher_name);
1612 goto out;
1613 }
1614 if (key_size < crypto_tfm_alg_min_keysize(*tfm)) {
1615 rc = -EINVAL;
1616 printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
1617 "supported by cipher [%s] is [%d]\n", key_size,
1618 cipher_name, crypto_tfm_alg_min_keysize(*tfm));
1619 goto out;
1620 }
1621 if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) {
1622 rc = -EINVAL;
1623 printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
1624 "supported by cipher [%s] is [%d]\n", key_size,
1625 cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
1626 goto out;
1627 }
1628 if (key_size > crypto_tfm_alg_max_keysize(*tfm)) {
1629 rc = -EINVAL;
1630 printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
1631 "supported by cipher [%s] is [%d]\n", key_size,
1632 cipher_name, crypto_tfm_alg_min_keysize(*tfm));
1633 goto out;
1634 }
1635 if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) {
1636 rc = -EINVAL;
1637 printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
1638 "supported by cipher [%s] is [%d]\n", key_size,
1639 cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
1640 goto out;
1641 }
1642 get_random_bytes(dummy_key, key_size);
1643 rc = crypto_cipher_setkey(*tfm, dummy_key, key_size);
1644 if (rc) {
1645 printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
1646 "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
1647 rc = -EINVAL;
1648 goto out;
1649 }
1650 rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size);
1651 if (rc) {
1652 printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
1653 "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
1654 rc = -EINVAL;
1655 goto out;
1656 }
1657out:
1658 return rc;
1659}
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
new file mode 100644
index 000000000000..61f8e894284f
--- /dev/null
+++ b/fs/ecryptfs/debug.c
@@ -0,0 +1,123 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * Functions only useful for debugging.
4 *
5 * Copyright (C) 2006 International Business Machines Corp.
6 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23
24#include "ecryptfs_kernel.h"
25
26/**
27 * ecryptfs_dump_auth_tok - debug function to print auth toks
28 *
29 * This function will print the contents of an ecryptfs authentication
30 * token.
31 */
32void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
33{
34 char salt[ECRYPTFS_SALT_SIZE * 2 + 1];
35 char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
36
37 ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n",
38 auth_tok);
39 if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) {
40 ecryptfs_printk(KERN_DEBUG, " * private key type\n");
41 ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT "
42 "IN ECRYPTFS VERSION 0.1)\n");
43 } else {
44 ecryptfs_printk(KERN_DEBUG, " * passphrase type\n");
45 ecryptfs_to_hex(salt, auth_tok->token.password.salt,
46 ECRYPTFS_SALT_SIZE);
47 salt[ECRYPTFS_SALT_SIZE * 2] = '\0';
48 ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt);
49 if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
50 ECRYPTFS_PERSISTENT_PASSWORD)) {
51 ecryptfs_printk(KERN_DEBUG, " * persistent\n");
52 }
53 memcpy(sig, auth_tok->token.password.signature,
54 ECRYPTFS_SIG_SIZE_HEX);
55 sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
56 ecryptfs_printk(KERN_DEBUG, " * signature = [%s]\n", sig);
57 }
58 ecryptfs_printk(KERN_DEBUG, " * session_key.flags = [0x%x]\n",
59 auth_tok->session_key.flags);
60 if (auth_tok->session_key.flags
61 & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT)
62 ecryptfs_printk(KERN_DEBUG,
63 " * Userspace decrypt request set\n");
64 if (auth_tok->session_key.flags
65 & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT)
66 ecryptfs_printk(KERN_DEBUG,
67 " * Userspace encrypt request set\n");
68 if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_DECRYPTED_KEY) {
69 ecryptfs_printk(KERN_DEBUG, " * Contains decrypted key\n");
70 ecryptfs_printk(KERN_DEBUG,
71 " * session_key.decrypted_key_size = [0x%x]\n",
72 auth_tok->session_key.decrypted_key_size);
73 ecryptfs_printk(KERN_DEBUG, " * Decrypted session key "
74 "dump:\n");
75 if (ecryptfs_verbosity > 0)
76 ecryptfs_dump_hex(auth_tok->session_key.decrypted_key,
77 ECRYPTFS_DEFAULT_KEY_BYTES);
78 }
79 if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_ENCRYPTED_KEY) {
80 ecryptfs_printk(KERN_DEBUG, " * Contains encrypted key\n");
81 ecryptfs_printk(KERN_DEBUG,
82 " * session_key.encrypted_key_size = [0x%x]\n",
83 auth_tok->session_key.encrypted_key_size);
84 ecryptfs_printk(KERN_DEBUG, " * Encrypted session key "
85 "dump:\n");
86 if (ecryptfs_verbosity > 0)
87 ecryptfs_dump_hex(auth_tok->session_key.encrypted_key,
88 auth_tok->session_key.
89 encrypted_key_size);
90 }
91}
92
93/**
94 * ecryptfs_dump_hex - debug hex printer
95 * @data: string of bytes to be printed
96 * @bytes: number of bytes to print
97 *
98 * Dump hexadecimal representation of char array
99 */
100void ecryptfs_dump_hex(char *data, int bytes)
101{
102 int i = 0;
103 int add_newline = 1;
104
105 if (ecryptfs_verbosity < 1)
106 return;
107 if (bytes != 0) {
108 printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]);
109 i++;
110 }
111 while (i < bytes) {
112 printk("0x%.2x.", (unsigned char)data[i]);
113 i++;
114 if (i % 16 == 0) {
115 printk("\n");
116 add_newline = 0;
117 } else
118 add_newline = 1;
119 }
120 if (add_newline)
121 printk("\n");
122}
123
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
new file mode 100644
index 000000000000..f0d2a433242b
--- /dev/null
+++ b/fs/ecryptfs/dentry.c
@@ -0,0 +1,87 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2003 Erez Zadok
5 * Copyright (C) 2001-2003 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA.
23 */
24
25#include <linux/dcache.h>
26#include <linux/namei.h>
27#include "ecryptfs_kernel.h"
28
29/**
30 * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
31 * @dentry: The ecryptfs dentry
32 * @nd: The associated nameidata
33 *
34 * Called when the VFS needs to revalidate a dentry. This
35 * is called whenever a name lookup finds a dentry in the
36 * dcache. Most filesystems leave this as NULL, because all their
37 * dentries in the dcache are valid.
38 *
39 * Returns 1 if valid, 0 otherwise.
40 *
41 */
42static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
43{
44 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
45 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
46 struct dentry *dentry_save;
47 struct vfsmount *vfsmount_save;
48 int rc = 1;
49
50 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
51 goto out;
52 dentry_save = nd->dentry;
53 vfsmount_save = nd->mnt;
54 nd->dentry = lower_dentry;
55 nd->mnt = lower_mnt;
56 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
57 nd->dentry = dentry_save;
58 nd->mnt = vfsmount_save;
59out:
60 return rc;
61}
62
63struct kmem_cache *ecryptfs_dentry_info_cache;
64
65/**
66 * ecryptfs_d_release
67 * @dentry: The ecryptfs dentry
68 *
69 * Called when a dentry is really deallocated.
70 */
71static void ecryptfs_d_release(struct dentry *dentry)
72{
73 struct dentry *lower_dentry;
74
75 lower_dentry = ecryptfs_dentry_to_lower(dentry);
76 if (ecryptfs_dentry_to_private(dentry))
77 kmem_cache_free(ecryptfs_dentry_info_cache,
78 ecryptfs_dentry_to_private(dentry));
79 if (lower_dentry)
80 dput(lower_dentry);
81 return;
82}
83
84struct dentry_operations ecryptfs_dops = {
85 .d_revalidate = ecryptfs_d_revalidate,
86 .d_release = ecryptfs_d_release,
87};
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
new file mode 100644
index 000000000000..872c9958531a
--- /dev/null
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -0,0 +1,482 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * Kernel declarations.
4 *
5 * Copyright (C) 1997-2003 Erez Zadok
6 * Copyright (C) 2001-2003 Stony Brook University
7 * Copyright (C) 2004-2006 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#ifndef ECRYPTFS_KERNEL_H
27#define ECRYPTFS_KERNEL_H
28
29#include <keys/user-type.h>
30#include <linux/fs.h>
31#include <linux/scatterlist.h>
32
33/* Version verification for shared data structures w/ userspace */
34#define ECRYPTFS_VERSION_MAJOR 0x00
35#define ECRYPTFS_VERSION_MINOR 0x04
36#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01
37/* These flags indicate which features are supported by the kernel
38 * module; userspace tools such as the mount helper read
39 * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
40 * how to behave. */
41#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
42#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
43#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
44#define ECRYPTFS_VERSIONING_POLICY 0x00000008
45#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
46 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH)
47
48#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
49#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
50#define ECRYPTFS_SALT_SIZE 8
51#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
52/* The original signature size is only for what is stored on disk; all
53 * in-memory representations are expanded hex, so it better adapted to
54 * be passed around or referenced on the command line */
55#define ECRYPTFS_SIG_SIZE 8
56#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
57#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
58#define ECRYPTFS_MAX_KEY_BYTES 64
59#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
60#define ECRYPTFS_DEFAULT_IV_BYTES 16
61#define ECRYPTFS_FILE_VERSION 0x01
62#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192
63#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
64#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
65
66#define RFC2440_CIPHER_DES3_EDE 0x02
67#define RFC2440_CIPHER_CAST_5 0x03
68#define RFC2440_CIPHER_BLOWFISH 0x04
69#define RFC2440_CIPHER_AES_128 0x07
70#define RFC2440_CIPHER_AES_192 0x08
71#define RFC2440_CIPHER_AES_256 0x09
72#define RFC2440_CIPHER_TWOFISH 0x0a
73#define RFC2440_CIPHER_CAST_6 0x0b
74
75#define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag))
76#define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag))
77#define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag))
78
79/**
80 * For convenience, we may need to pass around the encrypted session
81 * key between kernel and userspace because the authentication token
82 * may not be extractable. For example, the TPM may not release the
83 * private key, instead requiring the encrypted data and returning the
84 * decrypted data.
85 */
86struct ecryptfs_session_key {
87#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
88#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
89#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
90#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
91 u32 flags;
92 u32 encrypted_key_size;
93 u32 decrypted_key_size;
94 u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
95 u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
96};
97
98struct ecryptfs_password {
99 u32 password_bytes;
100 s32 hash_algo;
101 u32 hash_iterations;
102 u32 session_key_encryption_key_bytes;
103#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
104#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
105 u32 flags;
106 /* Iterated-hash concatenation of salt and passphrase */
107 u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
108 u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
109 /* Always in expanded hex */
110 u8 salt[ECRYPTFS_SALT_SIZE];
111};
112
113enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
114
115/* May be a password or a private key */
116struct ecryptfs_auth_tok {
117 u16 version; /* 8-bit major and 8-bit minor */
118 u16 token_type;
119 u32 flags;
120 struct ecryptfs_session_key session_key;
121 u8 reserved[32];
122 union {
123 struct ecryptfs_password password;
124 /* Private key is in future eCryptfs releases */
125 } token;
126} __attribute__ ((packed));
127
128void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
129extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
130extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
131
132struct ecryptfs_key_record {
133 unsigned char type;
134 size_t enc_key_size;
135 unsigned char sig[ECRYPTFS_SIG_SIZE];
136 unsigned char enc_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
137};
138
139struct ecryptfs_auth_tok_list {
140 struct ecryptfs_auth_tok *auth_tok;
141 struct list_head list;
142};
143
144struct ecryptfs_crypt_stat;
145struct ecryptfs_mount_crypt_stat;
146
147struct ecryptfs_page_crypt_context {
148 struct page *page;
149#define ECRYPTFS_PREPARE_COMMIT_MODE 0
150#define ECRYPTFS_WRITEPAGE_MODE 1
151 unsigned int mode;
152 union {
153 struct file *lower_file;
154 struct writeback_control *wbc;
155 } param;
156};
157
158static inline struct ecryptfs_auth_tok *
159ecryptfs_get_key_payload_data(struct key *key)
160{
161 return (struct ecryptfs_auth_tok *)
162 (((struct user_key_payload*)key->payload.data)->data);
163}
164
165#define ECRYPTFS_SUPER_MAGIC 0xf15f
166#define ECRYPTFS_MAX_KEYSET_SIZE 1024
167#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
168#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
169#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */
170#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
171#define ECRYPTFS_SALT_BYTES 2
172#define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5
173#define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */
174#define ECRYPTFS_FILE_SIZE_BYTES 8
175#define ECRYPTFS_DEFAULT_CIPHER "aes"
176#define ECRYPTFS_DEFAULT_KEY_BYTES 16
177#define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC
178#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
179#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
180#define MD5_DIGEST_SIZE 16
181
182/**
183 * This is the primary struct associated with each encrypted file.
184 *
185 * TODO: cache align/pack?
186 */
187struct ecryptfs_crypt_stat {
188#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001
189#define ECRYPTFS_POLICY_APPLIED 0x00000002
190#define ECRYPTFS_NEW_FILE 0x00000004
191#define ECRYPTFS_ENCRYPTED 0x00000008
192#define ECRYPTFS_SECURITY_WARNING 0x00000010
193#define ECRYPTFS_ENABLE_HMAC 0x00000020
194#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040
195#define ECRYPTFS_KEY_VALID 0x00000080
196 u32 flags;
197 unsigned int file_version;
198 size_t iv_bytes;
199 size_t num_keysigs;
200 size_t header_extent_size;
201 size_t num_header_extents_at_front;
202 size_t extent_size; /* Data extent size; default is 4096 */
203 size_t key_size;
204 size_t extent_shift;
205 unsigned int extent_mask;
206 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
207 struct crypto_tfm *tfm;
208 struct crypto_tfm *md5_tfm; /* Crypto context for generating
209 * the initialization vectors */
210 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
211 unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
212 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
213 unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX];
214 struct mutex cs_tfm_mutex;
215 struct mutex cs_md5_tfm_mutex;
216 struct mutex cs_mutex;
217};
218
219/* inode private data. */
220struct ecryptfs_inode_info {
221 struct inode vfs_inode;
222 struct inode *wii_inode;
223 struct ecryptfs_crypt_stat crypt_stat;
224};
225
226/* dentry private data. Each dentry must keep track of a lower
227 * vfsmount too. */
228struct ecryptfs_dentry_info {
229 struct dentry *wdi_dentry;
230 struct vfsmount *lower_mnt;
231 struct ecryptfs_crypt_stat *crypt_stat;
232};
233
234/**
235 * This struct is to enable a mount-wide passphrase/salt combo. This
236 * is more or less a stopgap to provide similar functionality to other
237 * crypto filesystems like EncFS or CFS until full policy support is
238 * implemented in eCryptfs.
239 */
240struct ecryptfs_mount_crypt_stat {
241 /* Pointers to memory we do not own, do not free these */
242#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001
243 u32 flags;
244 struct ecryptfs_auth_tok *global_auth_tok;
245 struct key *global_auth_tok_key;
246 size_t global_default_cipher_key_size;
247 struct crypto_tfm *global_key_tfm;
248 struct mutex global_key_tfm_mutex;
249 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
250 + 1];
251 unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
252};
253
254/* superblock private data. */
255struct ecryptfs_sb_info {
256 struct super_block *wsi_sb;
257 struct ecryptfs_mount_crypt_stat mount_crypt_stat;
258};
259
260/* file private data. */
261struct ecryptfs_file_info {
262 struct file *wfi_file;
263 struct ecryptfs_crypt_stat *crypt_stat;
264};
265
266/* auth_tok <=> encrypted_session_key mappings */
267struct ecryptfs_auth_tok_list_item {
268 unsigned char encrypted_session_key[ECRYPTFS_MAX_KEY_BYTES];
269 struct list_head list;
270 struct ecryptfs_auth_tok auth_tok;
271};
272
273static inline struct ecryptfs_file_info *
274ecryptfs_file_to_private(struct file *file)
275{
276 return (struct ecryptfs_file_info *)file->private_data;
277}
278
279static inline void
280ecryptfs_set_file_private(struct file *file,
281 struct ecryptfs_file_info *file_info)
282{
283 file->private_data = file_info;
284}
285
286static inline struct file *ecryptfs_file_to_lower(struct file *file)
287{
288 return ((struct ecryptfs_file_info *)file->private_data)->wfi_file;
289}
290
291static inline void
292ecryptfs_set_file_lower(struct file *file, struct file *lower_file)
293{
294 ((struct ecryptfs_file_info *)file->private_data)->wfi_file =
295 lower_file;
296}
297
298static inline struct ecryptfs_inode_info *
299ecryptfs_inode_to_private(struct inode *inode)
300{
301 return container_of(inode, struct ecryptfs_inode_info, vfs_inode);
302}
303
304static inline struct inode *ecryptfs_inode_to_lower(struct inode *inode)
305{
306 return ecryptfs_inode_to_private(inode)->wii_inode;
307}
308
309static inline void
310ecryptfs_set_inode_lower(struct inode *inode, struct inode *lower_inode)
311{
312 ecryptfs_inode_to_private(inode)->wii_inode = lower_inode;
313}
314
315static inline struct ecryptfs_sb_info *
316ecryptfs_superblock_to_private(struct super_block *sb)
317{
318 return (struct ecryptfs_sb_info *)sb->s_fs_info;
319}
320
321static inline void
322ecryptfs_set_superblock_private(struct super_block *sb,
323 struct ecryptfs_sb_info *sb_info)
324{
325 sb->s_fs_info = sb_info;
326}
327
328static inline struct super_block *
329ecryptfs_superblock_to_lower(struct super_block *sb)
330{
331 return ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb;
332}
333
334static inline void
335ecryptfs_set_superblock_lower(struct super_block *sb,
336 struct super_block *lower_sb)
337{
338 ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb = lower_sb;
339}
340
341static inline struct ecryptfs_dentry_info *
342ecryptfs_dentry_to_private(struct dentry *dentry)
343{
344 return (struct ecryptfs_dentry_info *)dentry->d_fsdata;
345}
346
347static inline void
348ecryptfs_set_dentry_private(struct dentry *dentry,
349 struct ecryptfs_dentry_info *dentry_info)
350{
351 dentry->d_fsdata = dentry_info;
352}
353
354static inline struct dentry *
355ecryptfs_dentry_to_lower(struct dentry *dentry)
356{
357 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry;
358}
359
360static inline void
361ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
362{
363 ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry =
364 lower_dentry;
365}
366
367static inline struct vfsmount *
368ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
369{
370 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt;
371}
372
373static inline void
374ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
375{
376 ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt =
377 lower_mnt;
378}
379
380#define ecryptfs_printk(type, fmt, arg...) \
381 __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg);
382void __ecryptfs_printk(const char *fmt, ...);
383
384extern const struct file_operations ecryptfs_main_fops;
385extern const struct file_operations ecryptfs_dir_fops;
386extern struct inode_operations ecryptfs_main_iops;
387extern struct inode_operations ecryptfs_dir_iops;
388extern struct inode_operations ecryptfs_symlink_iops;
389extern struct super_operations ecryptfs_sops;
390extern struct dentry_operations ecryptfs_dops;
391extern struct address_space_operations ecryptfs_aops;
392extern int ecryptfs_verbosity;
393
394extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
395extern struct kmem_cache *ecryptfs_file_info_cache;
396extern struct kmem_cache *ecryptfs_dentry_info_cache;
397extern struct kmem_cache *ecryptfs_inode_info_cache;
398extern struct kmem_cache *ecryptfs_sb_info_cache;
399extern struct kmem_cache *ecryptfs_header_cache_0;
400extern struct kmem_cache *ecryptfs_header_cache_1;
401extern struct kmem_cache *ecryptfs_header_cache_2;
402extern struct kmem_cache *ecryptfs_lower_page_cache;
403
404int ecryptfs_interpose(struct dentry *hidden_dentry,
405 struct dentry *this_dentry, struct super_block *sb,
406 int flag);
407int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
408int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
409 const char *name, int length,
410 char **decrypted_name);
411int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
412 const char *name, int length,
413 char **encoded_name);
414struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
415void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src);
416void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src);
417void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src);
418void ecryptfs_dump_hex(char *data, int bytes);
419int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
420 int sg_size);
421int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
422void ecryptfs_rotate_iv(unsigned char *iv);
423void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
424void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
425void ecryptfs_destruct_mount_crypt_stat(
426 struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
427int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
428int ecryptfs_write_inode_size_to_header(struct file *lower_file,
429 struct inode *lower_inode,
430 struct inode *inode);
431int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
432 struct file *lower_file,
433 unsigned long lower_page_index, int byte_offset,
434 int region_bytes);
435int
436ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
437 struct file *lower_file, int byte_offset,
438 int region_size);
439int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
440 struct file *lower_file);
441int ecryptfs_do_readpage(struct file *file, struct page *page,
442 pgoff_t lower_page_index);
443int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
444 char **lower_virt,
445 struct inode *lower_inode,
446 unsigned long lower_page_index);
447int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
448 struct inode *lower_inode,
449 struct writeback_control *wbc);
450int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx);
451int ecryptfs_decrypt_page(struct file *file, struct page *page);
452int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
453 struct file *lower_file);
454int ecryptfs_write_headers_virt(char *page_virt,
455 struct ecryptfs_crypt_stat *crypt_stat,
456 struct dentry *ecryptfs_dentry);
457int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
458 struct file *lower_file);
459int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
460int contains_ecryptfs_marker(char *data);
461int ecryptfs_read_header_region(char *data, struct dentry *dentry,
462 struct vfsmount *mnt);
463u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
464int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code);
465void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
466int ecryptfs_generate_key_packet_set(char *dest_base,
467 struct ecryptfs_crypt_stat *crypt_stat,
468 struct dentry *ecryptfs_dentry,
469 size_t *len, size_t max);
470int process_request_key_err(long err_code);
471int
472ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
473 unsigned char *src, struct dentry *ecryptfs_dentry);
474int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
475int
476ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
477 char *cipher_name, size_t key_size);
478int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
479int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
480void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
481
482#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
new file mode 100644
index 000000000000..c8550c9f9cd2
--- /dev/null
+++ b/fs/ecryptfs/file.c
@@ -0,0 +1,440 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/file.h>
27#include <linux/poll.h>
28#include <linux/mount.h>
29#include <linux/pagemap.h>
30#include <linux/security.h>
31#include <linux/smp_lock.h>
32#include <linux/compat.h>
33#include "ecryptfs_kernel.h"
34
35/**
36 * ecryptfs_llseek
37 * @file: File we are seeking in
38 * @offset: The offset to seek to
39 * @origin: 2 - offset from i_size; 1 - offset from f_pos
40 *
41 * Returns the position we have seeked to, or negative on error
42 */
43static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin)
44{
45 loff_t rv;
46 loff_t new_end_pos;
47 int rc;
48 int expanding_file = 0;
49 struct inode *inode = file->f_mapping->host;
50
51 /* If our offset is past the end of our file, we're going to
52 * need to grow it so we have a valid length of 0's */
53 new_end_pos = offset;
54 switch (origin) {
55 case 2:
56 new_end_pos += i_size_read(inode);
57 expanding_file = 1;
58 break;
59 case 1:
60 new_end_pos += file->f_pos;
61 if (new_end_pos > i_size_read(inode)) {
62 ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) "
63 "> i_size_read(inode)(=[0x%.16x])\n",
64 new_end_pos, i_size_read(inode));
65 expanding_file = 1;
66 }
67 break;
68 default:
69 if (new_end_pos > i_size_read(inode)) {
70 ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) "
71 "> i_size_read(inode)(=[0x%.16x])\n",
72 new_end_pos, i_size_read(inode));
73 expanding_file = 1;
74 }
75 }
76 ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos);
77 if (expanding_file) {
78 rc = ecryptfs_truncate(file->f_dentry, new_end_pos);
79 if (rc) {
80 rv = rc;
81 ecryptfs_printk(KERN_ERR, "Error on attempt to "
82 "truncate to (higher) offset [0x%.16x];"
83 " rc = [%d]\n", new_end_pos, rc);
84 goto out;
85 }
86 }
87 rv = generic_file_llseek(file, offset, origin);
88out:
89 return rv;
90}
91
92/**
93 * ecryptfs_read_update_atime
94 *
95 * generic_file_read updates the atime of upper layer inode. But, it
96 * doesn't give us a chance to update the atime of the lower layer
97 * inode. This function is a wrapper to generic_file_read. It
98 * updates the atime of the lower level inode if generic_file_read
99 * returns without any errors. This is to be used only for file reads.
100 * The function to be used for directory reads is ecryptfs_read.
101 */
102static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
103 const struct iovec *iov,
104 unsigned long nr_segs, loff_t pos)
105{
106 int rc;
107 struct dentry *lower_dentry;
108 struct vfsmount *lower_vfsmount;
109 struct file *file = iocb->ki_filp;
110
111 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
112 /*
113 * Even though this is a async interface, we need to wait
114 * for IO to finish to update atime
115 */
116 if (-EIOCBQUEUED == rc)
117 rc = wait_on_sync_kiocb(iocb);
118 if (rc >= 0) {
119 lower_dentry = ecryptfs_dentry_to_lower(file->f_dentry);
120 lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_dentry);
121 touch_atime(lower_vfsmount, lower_dentry);
122 }
123 return rc;
124}
125
126struct ecryptfs_getdents_callback {
127 void *dirent;
128 struct dentry *dentry;
129 filldir_t filldir;
130 int err;
131 int filldir_called;
132 int entries_written;
133};
134
135/* Inspired by generic filldir in fs/readir.c */
136static int
137ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset,
138 u64 ino, unsigned int d_type)
139{
140 struct ecryptfs_crypt_stat *crypt_stat;
141 struct ecryptfs_getdents_callback *buf =
142 (struct ecryptfs_getdents_callback *)dirent;
143 int rc;
144 int decoded_length;
145 char *decoded_name;
146
147 crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat;
148 buf->filldir_called++;
149 decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen,
150 &decoded_name);
151 if (decoded_length < 0) {
152 rc = decoded_length;
153 goto out;
154 }
155 rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset,
156 ino, d_type);
157 kfree(decoded_name);
158 if (rc >= 0)
159 buf->entries_written++;
160out:
161 return rc;
162}
163
164/**
165 * ecryptfs_readdir
166 * @file: The ecryptfs file struct
167 * @dirent: Directory entry
168 * @filldir: The filldir callback function
169 */
170static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
171{
172 int rc;
173 struct file *lower_file;
174 struct inode *inode;
175 struct ecryptfs_getdents_callback buf;
176
177 lower_file = ecryptfs_file_to_lower(file);
178 lower_file->f_pos = file->f_pos;
179 inode = file->f_dentry->d_inode;
180 memset(&buf, 0, sizeof(buf));
181 buf.dirent = dirent;
182 buf.dentry = file->f_dentry;
183 buf.filldir = filldir;
184retry:
185 buf.filldir_called = 0;
186 buf.entries_written = 0;
187 buf.err = 0;
188 rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf);
189 if (buf.err)
190 rc = buf.err;
191 if (buf.filldir_called && !buf.entries_written)
192 goto retry;
193 file->f_pos = lower_file->f_pos;
194 if (rc >= 0)
195 ecryptfs_copy_attr_atime(inode, lower_file->f_dentry->d_inode);
196 return rc;
197}
198
199struct kmem_cache *ecryptfs_file_info_cache;
200
201/**
202 * ecryptfs_open
203 * @inode: inode speciying file to open
204 * @file: Structure to return filled in
205 *
206 * Opens the file specified by inode.
207 *
208 * Returns zero on success; non-zero otherwise
209 */
210static int ecryptfs_open(struct inode *inode, struct file *file)
211{
212 int rc = 0;
213 struct ecryptfs_crypt_stat *crypt_stat = NULL;
214 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
215 struct dentry *ecryptfs_dentry = file->f_dentry;
216 /* Private value of ecryptfs_dentry allocated in
217 * ecryptfs_lookup() */
218 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
219 struct inode *lower_inode = NULL;
220 struct file *lower_file = NULL;
221 struct vfsmount *lower_mnt;
222 struct ecryptfs_file_info *file_info;
223 int lower_flags;
224
225 /* Released in ecryptfs_release or end of function if failure */
226 file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL);
227 ecryptfs_set_file_private(file, file_info);
228 if (!file_info) {
229 ecryptfs_printk(KERN_ERR,
230 "Error attempting to allocate memory\n");
231 rc = -ENOMEM;
232 goto out;
233 }
234 memset(file_info, 0, sizeof(*file_info));
235 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
236 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
237 mount_crypt_stat = &ecryptfs_superblock_to_private(
238 ecryptfs_dentry->d_sb)->mount_crypt_stat;
239 mutex_lock(&crypt_stat->cs_mutex);
240 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) {
241 ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
242 /* Policy code enabled in future release */
243 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED);
244 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
245 }
246 mutex_unlock(&crypt_stat->cs_mutex);
247 /* This mntget & dget is undone via fput when the file is released */
248 dget(lower_dentry);
249 lower_flags = file->f_flags;
250 if ((lower_flags & O_ACCMODE) == O_WRONLY)
251 lower_flags = (lower_flags & O_ACCMODE) | O_RDWR;
252 if (file->f_flags & O_APPEND)
253 lower_flags &= ~O_APPEND;
254 lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
255 mntget(lower_mnt);
256 /* Corresponding fput() in ecryptfs_release() */
257 lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags);
258 if (IS_ERR(lower_file)) {
259 rc = PTR_ERR(lower_file);
260 ecryptfs_printk(KERN_ERR, "Error opening lower file\n");
261 goto out_puts;
262 }
263 ecryptfs_set_file_lower(file, lower_file);
264 /* Isn't this check the same as the one in lookup? */
265 lower_inode = lower_dentry->d_inode;
266 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
267 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
268 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
269 rc = 0;
270 goto out;
271 }
272 mutex_lock(&crypt_stat->cs_mutex);
273 if (i_size_read(lower_inode) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
274 if (!(mount_crypt_stat->flags
275 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
276 rc = -EIO;
277 printk(KERN_WARNING "Attempt to read file that is "
278 "not in a valid eCryptfs format, and plaintext "
279 "passthrough mode is not enabled; returning "
280 "-EIO\n");
281 mutex_unlock(&crypt_stat->cs_mutex);
282 goto out_puts;
283 }
284 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
285 rc = 0;
286 mutex_unlock(&crypt_stat->cs_mutex);
287 goto out;
288 } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
289 ECRYPTFS_POLICY_APPLIED)
290 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
291 ECRYPTFS_KEY_VALID)) {
292 rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file);
293 if (rc) {
294 ecryptfs_printk(KERN_DEBUG,
295 "Valid headers not found\n");
296 if (!(mount_crypt_stat->flags
297 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
298 rc = -EIO;
299 printk(KERN_WARNING "Attempt to read file that "
300 "is not in a valid eCryptfs format, "
301 "and plaintext passthrough mode is not "
302 "enabled; returning -EIO\n");
303 mutex_unlock(&crypt_stat->cs_mutex);
304 goto out_puts;
305 }
306 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
307 ECRYPTFS_ENCRYPTED);
308 rc = 0;
309 mutex_unlock(&crypt_stat->cs_mutex);
310 goto out;
311 }
312 }
313 mutex_unlock(&crypt_stat->cs_mutex);
314 ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
315 "size: [0x%.16x]\n", inode, inode->i_ino,
316 i_size_read(inode));
317 ecryptfs_set_file_lower(file, lower_file);
318 goto out;
319out_puts:
320 mntput(lower_mnt);
321 dput(lower_dentry);
322 kmem_cache_free(ecryptfs_file_info_cache,
323 ecryptfs_file_to_private(file));
324out:
325 return rc;
326}
327
328static int ecryptfs_flush(struct file *file, fl_owner_t td)
329{
330 int rc = 0;
331 struct file *lower_file = NULL;
332
333 lower_file = ecryptfs_file_to_lower(file);
334 if (lower_file->f_op && lower_file->f_op->flush)
335 rc = lower_file->f_op->flush(lower_file, td);
336 return rc;
337}
338
339static int ecryptfs_release(struct inode *inode, struct file *file)
340{
341 struct file *lower_file = ecryptfs_file_to_lower(file);
342 struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file);
343 struct inode *lower_inode = ecryptfs_inode_to_lower(inode);
344
345 fput(lower_file);
346 inode->i_blocks = lower_inode->i_blocks;
347 kmem_cache_free(ecryptfs_file_info_cache, file_info);
348 return 0;
349}
350
351static int
352ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
353{
354 struct file *lower_file = ecryptfs_file_to_lower(file);
355 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
356 struct inode *lower_inode = lower_dentry->d_inode;
357 int rc = -EINVAL;
358
359 if (lower_inode->i_fop->fsync) {
360 mutex_lock(&lower_inode->i_mutex);
361 rc = lower_inode->i_fop->fsync(lower_file, lower_dentry,
362 datasync);
363 mutex_unlock(&lower_inode->i_mutex);
364 }
365 return rc;
366}
367
368static int ecryptfs_fasync(int fd, struct file *file, int flag)
369{
370 int rc = 0;
371 struct file *lower_file = NULL;
372
373 lower_file = ecryptfs_file_to_lower(file);
374 if (lower_file->f_op && lower_file->f_op->fasync)
375 rc = lower_file->f_op->fasync(fd, lower_file, flag);
376 return rc;
377}
378
379static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
380 size_t count, read_actor_t actor, void *target)
381{
382 struct file *lower_file = NULL;
383 int rc = -EINVAL;
384
385 lower_file = ecryptfs_file_to_lower(file);
386 if (lower_file->f_op && lower_file->f_op->sendfile)
387 rc = lower_file->f_op->sendfile(lower_file, ppos, count,
388 actor, target);
389
390 return rc;
391}
392
393static int ecryptfs_ioctl(struct inode *inode, struct file *file,
394 unsigned int cmd, unsigned long arg);
395
396const struct file_operations ecryptfs_dir_fops = {
397 .readdir = ecryptfs_readdir,
398 .ioctl = ecryptfs_ioctl,
399 .mmap = generic_file_mmap,
400 .open = ecryptfs_open,
401 .flush = ecryptfs_flush,
402 .release = ecryptfs_release,
403 .fsync = ecryptfs_fsync,
404 .fasync = ecryptfs_fasync,
405 .sendfile = ecryptfs_sendfile,
406};
407
408const struct file_operations ecryptfs_main_fops = {
409 .llseek = ecryptfs_llseek,
410 .read = do_sync_read,
411 .aio_read = ecryptfs_read_update_atime,
412 .write = do_sync_write,
413 .aio_write = generic_file_aio_write,
414 .readdir = ecryptfs_readdir,
415 .ioctl = ecryptfs_ioctl,
416 .mmap = generic_file_mmap,
417 .open = ecryptfs_open,
418 .flush = ecryptfs_flush,
419 .release = ecryptfs_release,
420 .fsync = ecryptfs_fsync,
421 .fasync = ecryptfs_fasync,
422 .sendfile = ecryptfs_sendfile,
423};
424
425static int
426ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
427 unsigned long arg)
428{
429 int rc = 0;
430 struct file *lower_file = NULL;
431
432 if (ecryptfs_file_to_private(file))
433 lower_file = ecryptfs_file_to_lower(file);
434 if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
435 rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
436 lower_file, cmd, arg);
437 else
438 rc = -ENOTTY;
439 return rc;
440}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
new file mode 100644
index 000000000000..efdd2b7b62d7
--- /dev/null
+++ b/fs/ecryptfs/inode.c
@@ -0,0 +1,1079 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2004 Erez Zadok
5 * Copyright (C) 2001-2004 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompsion <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/file.h>
27#include <linux/vmalloc.h>
28#include <linux/pagemap.h>
29#include <linux/dcache.h>
30#include <linux/namei.h>
31#include <linux/mount.h>
32#include <linux/crypto.h>
33#include "ecryptfs_kernel.h"
34
35static struct dentry *lock_parent(struct dentry *dentry)
36{
37 struct dentry *dir;
38
39 dir = dget(dentry->d_parent);
40 mutex_lock(&(dir->d_inode->i_mutex));
41 return dir;
42}
43
44static void unlock_parent(struct dentry *dentry)
45{
46 mutex_unlock(&(dentry->d_parent->d_inode->i_mutex));
47 dput(dentry->d_parent);
48}
49
50static void unlock_dir(struct dentry *dir)
51{
52 mutex_unlock(&dir->d_inode->i_mutex);
53 dput(dir);
54}
55
56void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src)
57{
58 i_size_write(dst, i_size_read((struct inode *)src));
59 dst->i_blocks = src->i_blocks;
60}
61
62void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src)
63{
64 dest->i_atime = src->i_atime;
65}
66
67static void ecryptfs_copy_attr_times(struct inode *dest,
68 const struct inode *src)
69{
70 dest->i_atime = src->i_atime;
71 dest->i_mtime = src->i_mtime;
72 dest->i_ctime = src->i_ctime;
73}
74
75static void ecryptfs_copy_attr_timesizes(struct inode *dest,
76 const struct inode *src)
77{
78 dest->i_atime = src->i_atime;
79 dest->i_mtime = src->i_mtime;
80 dest->i_ctime = src->i_ctime;
81 ecryptfs_copy_inode_size(dest, src);
82}
83
84void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src)
85{
86 dest->i_mode = src->i_mode;
87 dest->i_nlink = src->i_nlink;
88 dest->i_uid = src->i_uid;
89 dest->i_gid = src->i_gid;
90 dest->i_rdev = src->i_rdev;
91 dest->i_atime = src->i_atime;
92 dest->i_mtime = src->i_mtime;
93 dest->i_ctime = src->i_ctime;
94 dest->i_blkbits = src->i_blkbits;
95 dest->i_flags = src->i_flags;
96}
97
98/**
99 * ecryptfs_create_underlying_file
100 * @lower_dir_inode: inode of the parent in the lower fs of the new file
101 * @lower_dentry: New file's dentry in the lower fs
102 * @ecryptfs_dentry: New file's dentry in ecryptfs
103 * @mode: The mode of the new file
104 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
105 *
106 * Creates the file in the lower file system.
107 *
108 * Returns zero on success; non-zero on error condition
109 */
110static int
111ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
112 struct dentry *dentry, int mode,
113 struct nameidata *nd)
114{
115 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
116 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
117 struct dentry *dentry_save;
118 struct vfsmount *vfsmount_save;
119 int rc;
120
121 dentry_save = nd->dentry;
122 vfsmount_save = nd->mnt;
123 nd->dentry = lower_dentry;
124 nd->mnt = lower_mnt;
125 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
126 nd->dentry = dentry_save;
127 nd->mnt = vfsmount_save;
128 return rc;
129}
130
131/**
132 * ecryptfs_do_create
133 * @directory_inode: inode of the new file's dentry's parent in ecryptfs
134 * @ecryptfs_dentry: New file's dentry in ecryptfs
135 * @mode: The mode of the new file
136 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
137 *
138 * Creates the underlying file and the eCryptfs inode which will link to
139 * it. It will also update the eCryptfs directory inode to mimic the
140 * stat of the lower directory inode.
141 *
142 * Returns zero on success; non-zero on error condition
143 */
144static int
145ecryptfs_do_create(struct inode *directory_inode,
146 struct dentry *ecryptfs_dentry, int mode,
147 struct nameidata *nd)
148{
149 int rc;
150 struct dentry *lower_dentry;
151 struct dentry *lower_dir_dentry;
152
153 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
154 lower_dir_dentry = lock_parent(lower_dentry);
155 if (unlikely(IS_ERR(lower_dir_dentry))) {
156 ecryptfs_printk(KERN_ERR, "Error locking directory of "
157 "dentry\n");
158 rc = PTR_ERR(lower_dir_dentry);
159 goto out;
160 }
161 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
162 ecryptfs_dentry, mode, nd);
163 if (unlikely(rc)) {
164 ecryptfs_printk(KERN_ERR,
165 "Failure to create underlying file\n");
166 goto out_lock;
167 }
168 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
169 directory_inode->i_sb, 0);
170 if (rc) {
171 ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
172 goto out_lock;
173 }
174 ecryptfs_copy_attr_timesizes(directory_inode,
175 lower_dir_dentry->d_inode);
176out_lock:
177 unlock_dir(lower_dir_dentry);
178out:
179 return rc;
180}
181
182/**
183 * grow_file
184 * @ecryptfs_dentry: the ecryptfs dentry
185 * @lower_file: The lower file
186 * @inode: The ecryptfs inode
187 * @lower_inode: The lower inode
188 *
189 * This is the code which will grow the file to its correct size.
190 */
191static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file,
192 struct inode *inode, struct inode *lower_inode)
193{
194 int rc = 0;
195 struct file fake_file;
196 struct ecryptfs_file_info tmp_file_info;
197
198 memset(&fake_file, 0, sizeof(fake_file));
199 fake_file.f_dentry = ecryptfs_dentry;
200 memset(&tmp_file_info, 0, sizeof(tmp_file_info));
201 ecryptfs_set_file_private(&fake_file, &tmp_file_info);
202 ecryptfs_set_file_lower(&fake_file, lower_file);
203 rc = ecryptfs_fill_zeros(&fake_file, 1);
204 if (rc) {
205 ECRYPTFS_SET_FLAG(
206 ecryptfs_inode_to_private(inode)->crypt_stat.flags,
207 ECRYPTFS_SECURITY_WARNING);
208 ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros "
209 "in file; rc = [%d]\n", rc);
210 goto out;
211 }
212 i_size_write(inode, 0);
213 ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
214 ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags,
215 ECRYPTFS_NEW_FILE);
216out:
217 return rc;
218}
219
220/**
221 * ecryptfs_initialize_file
222 *
223 * Cause the file to be changed from a basic empty file to an ecryptfs
224 * file with a header and first data page.
225 *
226 * Returns zero on success
227 */
228static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
229{
230 int rc = 0;
231 int lower_flags;
232 struct ecryptfs_crypt_stat *crypt_stat;
233 struct dentry *lower_dentry;
234 struct dentry *tlower_dentry = NULL;
235 struct file *lower_file;
236 struct inode *inode, *lower_inode;
237 struct vfsmount *lower_mnt;
238
239 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
240 ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n",
241 lower_dentry->d_name.name);
242 inode = ecryptfs_dentry->d_inode;
243 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
244 tlower_dentry = dget(lower_dentry);
245 if (!tlower_dentry) {
246 rc = -ENOMEM;
247 ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n");
248 goto out;
249 }
250 lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR;
251#if BITS_PER_LONG != 32
252 lower_flags |= O_LARGEFILE;
253#endif
254 lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
255 mntget(lower_mnt);
256 /* Corresponding fput() at end of this function */
257 lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags);
258 if (IS_ERR(lower_file)) {
259 rc = PTR_ERR(lower_file);
260 ecryptfs_printk(KERN_ERR,
261 "Error opening dentry; rc = [%i]\n", rc);
262 goto out;
263 }
264 /* fput(lower_file) should handle the puts if we do this */
265 lower_file->f_dentry = tlower_dentry;
266 lower_file->f_vfsmnt = lower_mnt;
267 lower_inode = tlower_dentry->d_inode;
268 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
269 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
270 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
271 goto out_fput;
272 }
273 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
274 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
275 rc = ecryptfs_new_file_context(ecryptfs_dentry);
276 if (rc) {
277 ecryptfs_printk(KERN_DEBUG, "Error creating new file "
278 "context\n");
279 goto out_fput;
280 }
281 rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file);
282 if (rc) {
283 ecryptfs_printk(KERN_DEBUG, "Error writing headers\n");
284 goto out_fput;
285 }
286 rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode);
287out_fput:
288 fput(lower_file);
289out:
290 return rc;
291}
292
293/**
294 * ecryptfs_create
295 * @dir: The inode of the directory in which to create the file.
296 * @dentry: The eCryptfs dentry
297 * @mode: The mode of the new file.
298 * @nd: nameidata
299 *
300 * Creates a new file.
301 *
302 * Returns zero on success; non-zero on error condition
303 */
304static int
305ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
306 int mode, struct nameidata *nd)
307{
308 int rc;
309
310 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
311 if (unlikely(rc)) {
312 ecryptfs_printk(KERN_WARNING, "Failed to create file in"
313 "lower filesystem\n");
314 goto out;
315 }
316 /* At this point, a file exists on "disk"; we need to make sure
317 * that this on disk file is prepared to be an ecryptfs file */
318 rc = ecryptfs_initialize_file(ecryptfs_dentry);
319out:
320 return rc;
321}
322
323/**
324 * ecryptfs_lookup
325 * @dir: inode
326 * @dentry: The dentry
327 * @nd: nameidata, may be NULL
328 *
329 * Find a file on disk. If the file does not exist, then we'll add it to the
330 * dentry cache and continue on to read it from the disk.
331 */
332static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
333 struct nameidata *nd)
334{
335 int rc = 0;
336 struct dentry *lower_dir_dentry;
337 struct dentry *lower_dentry;
338 struct vfsmount *lower_mnt;
339 struct dentry *tlower_dentry = NULL;
340 char *encoded_name;
341 unsigned int encoded_namelen;
342 struct ecryptfs_crypt_stat *crypt_stat = NULL;
343 char *page_virt = NULL;
344 struct inode *lower_inode;
345 u64 file_size;
346
347 lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
348 dentry->d_op = &ecryptfs_dops;
349 if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, "."))
350 || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, "..")))
351 goto out_drop;
352 encoded_namelen = ecryptfs_encode_filename(crypt_stat,
353 dentry->d_name.name,
354 dentry->d_name.len,
355 &encoded_name);
356 if (encoded_namelen < 0) {
357 rc = encoded_namelen;
358 goto out_drop;
359 }
360 ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
361 "= [%d]\n", encoded_name, encoded_namelen);
362 lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
363 encoded_namelen - 1);
364 kfree(encoded_name);
365 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
366 if (IS_ERR(lower_dentry)) {
367 ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
368 rc = PTR_ERR(lower_dentry);
369 goto out_drop;
370 }
371 ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->"
372 "d_name.name = [%s]\n", lower_dentry,
373 lower_dentry->d_name.name);
374 lower_inode = lower_dentry->d_inode;
375 ecryptfs_copy_attr_atime(dir, lower_dir_dentry->d_inode);
376 BUG_ON(!atomic_read(&lower_dentry->d_count));
377 ecryptfs_set_dentry_private(dentry,
378 kmem_cache_alloc(ecryptfs_dentry_info_cache,
379 SLAB_KERNEL));
380 if (!ecryptfs_dentry_to_private(dentry)) {
381 rc = -ENOMEM;
382 ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting "
383 "to allocate ecryptfs_dentry_info struct\n");
384 goto out_dput;
385 }
386 ecryptfs_set_dentry_lower(dentry, lower_dentry);
387 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
388 if (!lower_dentry->d_inode) {
389 /* We want to add because we couldn't find in lower */
390 d_add(dentry, NULL);
391 goto out;
392 }
393 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1);
394 if (rc) {
395 ecryptfs_printk(KERN_ERR, "Error interposing\n");
396 goto out_dput;
397 }
398 if (S_ISDIR(lower_inode->i_mode)) {
399 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
400 goto out;
401 }
402 if (S_ISLNK(lower_inode->i_mode)) {
403 ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
404 goto out;
405 }
406 if (!nd) {
407 ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
408 "as we *think* we are about to unlink\n");
409 goto out;
410 }
411 tlower_dentry = dget(lower_dentry);
412 if (!tlower_dentry || IS_ERR(tlower_dentry)) {
413 rc = -ENOMEM;
414 ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n");
415 goto out_dput;
416 }
417 /* Released in this function */
418 page_virt =
419 (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
420 SLAB_USER);
421 if (!page_virt) {
422 rc = -ENOMEM;
423 ecryptfs_printk(KERN_ERR,
424 "Cannot ecryptfs_kmalloc a page\n");
425 goto out_dput;
426 }
427 memset(page_virt, 0, PAGE_CACHE_SIZE);
428 rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt);
429 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
430 if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED))
431 ecryptfs_set_default_sizes(crypt_stat);
432 if (rc) {
433 rc = 0;
434 ecryptfs_printk(KERN_WARNING, "Error reading header region;"
435 " assuming unencrypted\n");
436 } else {
437 if (!contains_ecryptfs_marker(page_virt
438 + ECRYPTFS_FILE_SIZE_BYTES)) {
439 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
440 goto out;
441 }
442 memcpy(&file_size, page_virt, sizeof(file_size));
443 file_size = be64_to_cpu(file_size);
444 i_size_write(dentry->d_inode, (loff_t)file_size);
445 }
446 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
447 goto out;
448
449out_dput:
450 dput(lower_dentry);
451 if (tlower_dentry)
452 dput(tlower_dentry);
453out_drop:
454 d_drop(dentry);
455out:
456 return ERR_PTR(rc);
457}
458
459static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
460 struct dentry *new_dentry)
461{
462 struct dentry *lower_old_dentry;
463 struct dentry *lower_new_dentry;
464 struct dentry *lower_dir_dentry;
465 u64 file_size_save;
466 int rc;
467
468 file_size_save = i_size_read(old_dentry->d_inode);
469 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
470 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
471 dget(lower_old_dentry);
472 dget(lower_new_dentry);
473 lower_dir_dentry = lock_parent(lower_new_dentry);
474 rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
475 lower_new_dentry);
476 if (rc || !lower_new_dentry->d_inode)
477 goto out_lock;
478 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
479 if (rc)
480 goto out_lock;
481 ecryptfs_copy_attr_timesizes(dir, lower_new_dentry->d_inode);
482 old_dentry->d_inode->i_nlink =
483 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
484 i_size_write(new_dentry->d_inode, file_size_save);
485out_lock:
486 unlock_dir(lower_dir_dentry);
487 dput(lower_new_dentry);
488 dput(lower_old_dentry);
489 if (!new_dentry->d_inode)
490 d_drop(new_dentry);
491 return rc;
492}
493
494static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
495{
496 int rc = 0;
497 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
498 struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
499
500 lock_parent(lower_dentry);
501 rc = vfs_unlink(lower_dir_inode, lower_dentry);
502 if (rc) {
503 ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n");
504 goto out_unlock;
505 }
506 ecryptfs_copy_attr_times(dir, lower_dir_inode);
507 dentry->d_inode->i_nlink =
508 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink;
509 dentry->d_inode->i_ctime = dir->i_ctime;
510out_unlock:
511 unlock_parent(lower_dentry);
512 return rc;
513}
514
515static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
516 const char *symname)
517{
518 int rc;
519 struct dentry *lower_dentry;
520 struct dentry *lower_dir_dentry;
521 umode_t mode;
522 char *encoded_symname;
523 unsigned int encoded_symlen;
524 struct ecryptfs_crypt_stat *crypt_stat = NULL;
525
526 lower_dentry = ecryptfs_dentry_to_lower(dentry);
527 dget(lower_dentry);
528 lower_dir_dentry = lock_parent(lower_dentry);
529 mode = S_IALLUGO;
530 encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
531 strlen(symname),
532 &encoded_symname);
533 if (encoded_symlen < 0) {
534 rc = encoded_symlen;
535 goto out_lock;
536 }
537 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
538 encoded_symname, mode);
539 kfree(encoded_symname);
540 if (rc || !lower_dentry->d_inode)
541 goto out_lock;
542 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
543 if (rc)
544 goto out_lock;
545 ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
546out_lock:
547 unlock_dir(lower_dir_dentry);
548 dput(lower_dentry);
549 if (!dentry->d_inode)
550 d_drop(dentry);
551 return rc;
552}
553
554static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
555{
556 int rc;
557 struct dentry *lower_dentry;
558 struct dentry *lower_dir_dentry;
559
560 lower_dentry = ecryptfs_dentry_to_lower(dentry);
561 lower_dir_dentry = lock_parent(lower_dentry);
562 rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode);
563 if (rc || !lower_dentry->d_inode)
564 goto out;
565 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
566 if (rc)
567 goto out;
568 ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
569 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
570out:
571 unlock_dir(lower_dir_dentry);
572 if (!dentry->d_inode)
573 d_drop(dentry);
574 return rc;
575}
576
577static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
578{
579 int rc = 0;
580 struct dentry *tdentry = NULL;
581 struct dentry *lower_dentry;
582 struct dentry *tlower_dentry = NULL;
583 struct dentry *lower_dir_dentry;
584
585 lower_dentry = ecryptfs_dentry_to_lower(dentry);
586 if (!(tdentry = dget(dentry))) {
587 rc = -EINVAL;
588 ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n",
589 dentry);
590 goto out;
591 }
592 lower_dir_dentry = lock_parent(lower_dentry);
593 if (!(tlower_dentry = dget(lower_dentry))) {
594 rc = -EINVAL;
595 ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry "
596 "[%p]\n", lower_dentry);
597 goto out;
598 }
599 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
600 if (!rc) {
601 d_delete(tlower_dentry);
602 tlower_dentry = NULL;
603 }
604 ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode);
605 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
606 unlock_dir(lower_dir_dentry);
607 if (!rc)
608 d_drop(dentry);
609out:
610 if (tdentry)
611 dput(tdentry);
612 if (tlower_dentry)
613 dput(tlower_dentry);
614 return rc;
615}
616
617static int
618ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
619{
620 int rc;
621 struct dentry *lower_dentry;
622 struct dentry *lower_dir_dentry;
623
624 lower_dentry = ecryptfs_dentry_to_lower(dentry);
625 lower_dir_dentry = lock_parent(lower_dentry);
626 rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
627 if (rc || !lower_dentry->d_inode)
628 goto out;
629 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
630 if (rc)
631 goto out;
632 ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
633out:
634 unlock_dir(lower_dir_dentry);
635 if (!dentry->d_inode)
636 d_drop(dentry);
637 return rc;
638}
639
640static int
641ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
642 struct inode *new_dir, struct dentry *new_dentry)
643{
644 int rc;
645 struct dentry *lower_old_dentry;
646 struct dentry *lower_new_dentry;
647 struct dentry *lower_old_dir_dentry;
648 struct dentry *lower_new_dir_dentry;
649
650 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
651 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
652 dget(lower_old_dentry);
653 dget(lower_new_dentry);
654 lower_old_dir_dentry = dget_parent(lower_old_dentry);
655 lower_new_dir_dentry = dget_parent(lower_new_dentry);
656 lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
657 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
658 lower_new_dir_dentry->d_inode, lower_new_dentry);
659 if (rc)
660 goto out_lock;
661 ecryptfs_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
662 if (new_dir != old_dir)
663 ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
664out_lock:
665 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
666 dput(lower_new_dentry);
667 dput(lower_old_dentry);
668 return rc;
669}
670
671static int
672ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz)
673{
674 int rc;
675 struct dentry *lower_dentry;
676 char *decoded_name;
677 char *lower_buf;
678 mm_segment_t old_fs;
679 struct ecryptfs_crypt_stat *crypt_stat;
680
681 lower_dentry = ecryptfs_dentry_to_lower(dentry);
682 if (!lower_dentry->d_inode->i_op ||
683 !lower_dentry->d_inode->i_op->readlink) {
684 rc = -EINVAL;
685 goto out;
686 }
687 /* Released in this function */
688 lower_buf = kmalloc(bufsiz, GFP_KERNEL);
689 if (lower_buf == NULL) {
690 ecryptfs_printk(KERN_ERR, "Out of memory\n");
691 rc = -ENOMEM;
692 goto out;
693 }
694 old_fs = get_fs();
695 set_fs(get_ds());
696 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
697 "lower_dentry->d_name.name = [%s]\n",
698 lower_dentry->d_name.name);
699 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
700 (char __user *)lower_buf,
701 bufsiz);
702 set_fs(old_fs);
703 if (rc >= 0) {
704 crypt_stat = NULL;
705 rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc,
706 &decoded_name);
707 if (rc == -ENOMEM)
708 goto out_free_lower_buf;
709 if (rc > 0) {
710 ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes "
711 "to userspace: [%*s]\n", rc,
712 decoded_name);
713 if (copy_to_user(buf, decoded_name, rc))
714 rc = -EFAULT;
715 }
716 kfree(decoded_name);
717 ecryptfs_copy_attr_atime(dentry->d_inode,
718 lower_dentry->d_inode);
719 }
720out_free_lower_buf:
721 kfree(lower_buf);
722out:
723 return rc;
724}
725
726static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
727{
728 char *buf;
729 int len = PAGE_SIZE, rc;
730 mm_segment_t old_fs;
731
732 /* Released in ecryptfs_put_link(); only release here on error */
733 buf = kmalloc(len, GFP_KERNEL);
734 if (!buf) {
735 rc = -ENOMEM;
736 goto out;
737 }
738 old_fs = get_fs();
739 set_fs(get_ds());
740 ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
741 "dentry->d_name.name = [%s]\n", dentry->d_name.name);
742 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
743 buf[rc] = '\0';
744 set_fs(old_fs);
745 if (rc < 0)
746 goto out_free;
747 rc = 0;
748 nd_set_link(nd, buf);
749 goto out;
750out_free:
751 kfree(buf);
752out:
753 return ERR_PTR(rc);
754}
755
756static void
757ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
758{
759 /* Free the char* */
760 kfree(nd_get_link(nd));
761}
762
763/**
764 * upper_size_to_lower_size
765 * @crypt_stat: Crypt_stat associated with file
766 * @upper_size: Size of the upper file
767 *
768 * Calculate the requried size of the lower file based on the
769 * specified size of the upper file. This calculation is based on the
770 * number of headers in the underlying file and the extent size.
771 *
772 * Returns Calculated size of the lower file.
773 */
774static loff_t
775upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
776 loff_t upper_size)
777{
778 loff_t lower_size;
779
780 lower_size = ( crypt_stat->header_extent_size
781 * crypt_stat->num_header_extents_at_front );
782 if (upper_size != 0) {
783 loff_t num_extents;
784
785 num_extents = upper_size >> crypt_stat->extent_shift;
786 if (upper_size & ~crypt_stat->extent_mask)
787 num_extents++;
788 lower_size += (num_extents * crypt_stat->extent_size);
789 }
790 return lower_size;
791}
792
793/**
794 * ecryptfs_truncate
795 * @dentry: The ecryptfs layer dentry
796 * @new_length: The length to expand the file to
797 *
798 * Function to handle truncations modifying the size of the file. Note
799 * that the file sizes are interpolated. When expanding, we are simply
800 * writing strings of 0's out. When truncating, we need to modify the
801 * underlying file size according to the page index interpolations.
802 *
803 * Returns zero on success; non-zero otherwise
804 */
805int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
806{
807 int rc = 0;
808 struct inode *inode = dentry->d_inode;
809 struct dentry *lower_dentry;
810 struct vfsmount *lower_mnt;
811 struct file fake_ecryptfs_file, *lower_file = NULL;
812 struct ecryptfs_crypt_stat *crypt_stat;
813 loff_t i_size = i_size_read(inode);
814 loff_t lower_size_before_truncate;
815 loff_t lower_size_after_truncate;
816
817 if (unlikely((new_length == i_size)))
818 goto out;
819 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
820 /* Set up a fake ecryptfs file, this is used to interface with
821 * the file in the underlying filesystem so that the
822 * truncation has an effect there as well. */
823 memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file));
824 fake_ecryptfs_file.f_dentry = dentry;
825 /* Released at out_free: label */
826 ecryptfs_set_file_private(&fake_ecryptfs_file,
827 kmem_cache_alloc(ecryptfs_file_info_cache,
828 SLAB_KERNEL));
829 if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) {
830 rc = -ENOMEM;
831 goto out;
832 }
833 lower_dentry = ecryptfs_dentry_to_lower(dentry);
834 /* This dget & mntget is released through fput at out_fput: */
835 dget(lower_dentry);
836 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
837 mntget(lower_mnt);
838 lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR);
839 if (unlikely(IS_ERR(lower_file))) {
840 rc = PTR_ERR(lower_file);
841 goto out_free;
842 }
843 ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file);
844 /* Switch on growing or shrinking file */
845 if (new_length > i_size) {
846 rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length);
847 if (rc) {
848 ecryptfs_printk(KERN_ERR,
849 "Problem with fill_zeros\n");
850 goto out_fput;
851 }
852 i_size_write(inode, new_length);
853 rc = ecryptfs_write_inode_size_to_header(lower_file,
854 lower_dentry->d_inode,
855 inode);
856 if (rc) {
857 ecryptfs_printk(KERN_ERR,
858 "Problem with ecryptfs_write"
859 "_inode_size\n");
860 goto out_fput;
861 }
862 } else { /* new_length < i_size_read(inode) */
863 vmtruncate(inode, new_length);
864 ecryptfs_write_inode_size_to_header(lower_file,
865 lower_dentry->d_inode,
866 inode);
867 /* We are reducing the size of the ecryptfs file, and need to
868 * know if we need to reduce the size of the lower file. */
869 lower_size_before_truncate =
870 upper_size_to_lower_size(crypt_stat, i_size);
871 lower_size_after_truncate =
872 upper_size_to_lower_size(crypt_stat, new_length);
873 if (lower_size_after_truncate < lower_size_before_truncate)
874 vmtruncate(lower_dentry->d_inode,
875 lower_size_after_truncate);
876 }
877 /* Update the access times */
878 lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime
879 = CURRENT_TIME;
880 mark_inode_dirty_sync(inode);
881out_fput:
882 fput(lower_file);
883out_free:
884 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
885 kmem_cache_free(ecryptfs_file_info_cache,
886 ecryptfs_file_to_private(&fake_ecryptfs_file));
887out:
888 return rc;
889}
890
891static int
892ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
893{
894 int rc;
895
896 if (nd) {
897 struct vfsmount *vfsmnt_save = nd->mnt;
898 struct dentry *dentry_save = nd->dentry;
899
900 nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry);
901 nd->dentry = ecryptfs_dentry_to_lower(nd->dentry);
902 rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
903 nd->mnt = vfsmnt_save;
904 nd->dentry = dentry_save;
905 } else
906 rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
907 return rc;
908}
909
910/**
911 * ecryptfs_setattr
912 * @dentry: dentry handle to the inode to modify
913 * @ia: Structure with flags of what to change and values
914 *
915 * Updates the metadata of an inode. If the update is to the size
916 * i.e. truncation, then ecryptfs_truncate will handle the size modification
917 * of both the ecryptfs inode and the lower inode.
918 *
919 * All other metadata changes will be passed right to the lower filesystem,
920 * and we will just update our inode to look like the lower.
921 */
922static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
923{
924 int rc = 0;
925 struct dentry *lower_dentry;
926 struct inode *inode;
927 struct inode *lower_inode;
928 struct ecryptfs_crypt_stat *crypt_stat;
929
930 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
931 lower_dentry = ecryptfs_dentry_to_lower(dentry);
932 inode = dentry->d_inode;
933 lower_inode = ecryptfs_inode_to_lower(inode);
934 if (ia->ia_valid & ATTR_SIZE) {
935 ecryptfs_printk(KERN_DEBUG,
936 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
937 ia->ia_valid, ATTR_SIZE);
938 rc = ecryptfs_truncate(dentry, ia->ia_size);
939 /* ecryptfs_truncate handles resizing of the lower file */
940 ia->ia_valid &= ~ATTR_SIZE;
941 ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
942 ia->ia_valid);
943 if (rc < 0)
944 goto out;
945 }
946 rc = notify_change(lower_dentry, ia);
947out:
948 ecryptfs_copy_attr_all(inode, lower_inode);
949 return rc;
950}
951
952static int
953ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
954 size_t size, int flags)
955{
956 int rc = 0;
957 struct dentry *lower_dentry;
958
959 lower_dentry = ecryptfs_dentry_to_lower(dentry);
960 if (!lower_dentry->d_inode->i_op->setxattr) {
961 rc = -ENOSYS;
962 goto out;
963 }
964 mutex_lock(&lower_dentry->d_inode->i_mutex);
965 rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
966 size, flags);
967 mutex_unlock(&lower_dentry->d_inode->i_mutex);
968out:
969 return rc;
970}
971
972static ssize_t
973ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
974 size_t size)
975{
976 int rc = 0;
977 struct dentry *lower_dentry;
978
979 lower_dentry = ecryptfs_dentry_to_lower(dentry);
980 if (!lower_dentry->d_inode->i_op->getxattr) {
981 rc = -ENOSYS;
982 goto out;
983 }
984 mutex_lock(&lower_dentry->d_inode->i_mutex);
985 rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value,
986 size);
987 mutex_unlock(&lower_dentry->d_inode->i_mutex);
988out:
989 return rc;
990}
991
992static ssize_t
993ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
994{
995 int rc = 0;
996 struct dentry *lower_dentry;
997
998 lower_dentry = ecryptfs_dentry_to_lower(dentry);
999 if (!lower_dentry->d_inode->i_op->listxattr) {
1000 rc = -ENOSYS;
1001 goto out;
1002 }
1003 mutex_lock(&lower_dentry->d_inode->i_mutex);
1004 rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size);
1005 mutex_unlock(&lower_dentry->d_inode->i_mutex);
1006out:
1007 return rc;
1008}
1009
1010static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
1011{
1012 int rc = 0;
1013 struct dentry *lower_dentry;
1014
1015 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1016 if (!lower_dentry->d_inode->i_op->removexattr) {
1017 rc = -ENOSYS;
1018 goto out;
1019 }
1020 mutex_lock(&lower_dentry->d_inode->i_mutex);
1021 rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name);
1022 mutex_unlock(&lower_dentry->d_inode->i_mutex);
1023out:
1024 return rc;
1025}
1026
1027int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode)
1028{
1029 if ((ecryptfs_inode_to_lower(inode)
1030 == (struct inode *)candidate_lower_inode))
1031 return 1;
1032 else
1033 return 0;
1034}
1035
1036int ecryptfs_inode_set(struct inode *inode, void *lower_inode)
1037{
1038 ecryptfs_init_inode(inode, (struct inode *)lower_inode);
1039 return 0;
1040}
1041
1042struct inode_operations ecryptfs_symlink_iops = {
1043 .readlink = ecryptfs_readlink,
1044 .follow_link = ecryptfs_follow_link,
1045 .put_link = ecryptfs_put_link,
1046 .permission = ecryptfs_permission,
1047 .setattr = ecryptfs_setattr,
1048 .setxattr = ecryptfs_setxattr,
1049 .getxattr = ecryptfs_getxattr,
1050 .listxattr = ecryptfs_listxattr,
1051 .removexattr = ecryptfs_removexattr
1052};
1053
1054struct inode_operations ecryptfs_dir_iops = {
1055 .create = ecryptfs_create,
1056 .lookup = ecryptfs_lookup,
1057 .link = ecryptfs_link,
1058 .unlink = ecryptfs_unlink,
1059 .symlink = ecryptfs_symlink,
1060 .mkdir = ecryptfs_mkdir,
1061 .rmdir = ecryptfs_rmdir,
1062 .mknod = ecryptfs_mknod,
1063 .rename = ecryptfs_rename,
1064 .permission = ecryptfs_permission,
1065 .setattr = ecryptfs_setattr,
1066 .setxattr = ecryptfs_setxattr,
1067 .getxattr = ecryptfs_getxattr,
1068 .listxattr = ecryptfs_listxattr,
1069 .removexattr = ecryptfs_removexattr
1070};
1071
1072struct inode_operations ecryptfs_main_iops = {
1073 .permission = ecryptfs_permission,
1074 .setattr = ecryptfs_setattr,
1075 .setxattr = ecryptfs_setxattr,
1076 .getxattr = ecryptfs_getxattr,
1077 .listxattr = ecryptfs_listxattr,
1078 .removexattr = ecryptfs_removexattr
1079};
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
new file mode 100644
index 000000000000..ba454785a0c5
--- /dev/null
+++ b/fs/ecryptfs/keystore.c
@@ -0,0 +1,1061 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * In-kernel key management code. Includes functions to parse and
4 * write authentication token-related packets with the underlying
5 * file.
6 *
7 * Copyright (C) 2004-2006 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
9 * Michael C. Thompson <mcthomps@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as
13 * published by the Free Software Foundation; either version 2 of the
14 * License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 * 02111-1307, USA.
25 */
26
27#include <linux/string.h>
28#include <linux/sched.h>
29#include <linux/syscalls.h>
30#include <linux/pagemap.h>
31#include <linux/key.h>
32#include <linux/random.h>
33#include <linux/crypto.h>
34#include <linux/scatterlist.h>
35#include "ecryptfs_kernel.h"
36
37/**
38 * request_key returned an error instead of a valid key address;
39 * determine the type of error, make appropriate log entries, and
40 * return an error code.
41 */
42int process_request_key_err(long err_code)
43{
44 int rc = 0;
45
46 switch (err_code) {
47 case ENOKEY:
48 ecryptfs_printk(KERN_WARNING, "No key\n");
49 rc = -ENOENT;
50 break;
51 case EKEYEXPIRED:
52 ecryptfs_printk(KERN_WARNING, "Key expired\n");
53 rc = -ETIME;
54 break;
55 case EKEYREVOKED:
56 ecryptfs_printk(KERN_WARNING, "Key revoked\n");
57 rc = -EINVAL;
58 break;
59 default:
60 ecryptfs_printk(KERN_WARNING, "Unknown error code: "
61 "[0x%.16x]\n", err_code);
62 rc = -EINVAL;
63 }
64 return rc;
65}
66
67static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
68{
69 struct list_head *walker;
70 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
71
72 walker = auth_tok_list_head->next;
73 while (walker != auth_tok_list_head) {
74 auth_tok_list_item =
75 list_entry(walker, struct ecryptfs_auth_tok_list_item,
76 list);
77 walker = auth_tok_list_item->list.next;
78 memset(auth_tok_list_item, 0,
79 sizeof(struct ecryptfs_auth_tok_list_item));
80 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
81 auth_tok_list_item);
82 }
83}
84
85struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
86
87/**
88 * parse_packet_length
89 * @data: Pointer to memory containing length at offset
90 * @size: This function writes the decoded size to this memory
91 * address; zero on error
92 * @length_size: The number of bytes occupied by the encoded length
93 *
94 * Returns Zero on success
95 */
96static int parse_packet_length(unsigned char *data, size_t *size,
97 size_t *length_size)
98{
99 int rc = 0;
100
101 (*length_size) = 0;
102 (*size) = 0;
103 if (data[0] < 192) {
104 /* One-byte length */
105 (*size) = data[0];
106 (*length_size) = 1;
107 } else if (data[0] < 224) {
108 /* Two-byte length */
109 (*size) = ((data[0] - 192) * 256);
110 (*size) += (data[1] + 192);
111 (*length_size) = 2;
112 } else if (data[0] == 255) {
113 /* Five-byte length; we're not supposed to see this */
114 ecryptfs_printk(KERN_ERR, "Five-byte packet length not "
115 "supported\n");
116 rc = -EINVAL;
117 goto out;
118 } else {
119 ecryptfs_printk(KERN_ERR, "Error parsing packet length\n");
120 rc = -EINVAL;
121 goto out;
122 }
123out:
124 return rc;
125}
126
127/**
128 * write_packet_length
129 * @dest: The byte array target into which to write the
130 * length. Must have at least 5 bytes allocated.
131 * @size: The length to write.
132 * @packet_size_length: The number of bytes used to encode the
133 * packet length is written to this address.
134 *
135 * Returns zero on success; non-zero on error.
136 */
137static int write_packet_length(char *dest, size_t size,
138 size_t *packet_size_length)
139{
140 int rc = 0;
141
142 if (size < 192) {
143 dest[0] = size;
144 (*packet_size_length) = 1;
145 } else if (size < 65536) {
146 dest[0] = (((size - 192) / 256) + 192);
147 dest[1] = ((size - 192) % 256);
148 (*packet_size_length) = 2;
149 } else {
150 rc = -EINVAL;
151 ecryptfs_printk(KERN_WARNING,
152 "Unsupported packet size: [%d]\n", size);
153 }
154 return rc;
155}
156
157/**
158 * parse_tag_3_packet
159 * @crypt_stat: The cryptographic context to modify based on packet
160 * contents.
161 * @data: The raw bytes of the packet.
162 * @auth_tok_list: eCryptfs parses packets into authentication tokens;
163 * a new authentication token will be placed at the end
164 * of this list for this packet.
165 * @new_auth_tok: Pointer to a pointer to memory that this function
166 * allocates; sets the memory address of the pointer to
167 * NULL on error. This object is added to the
168 * auth_tok_list.
169 * @packet_size: This function writes the size of the parsed packet
170 * into this memory location; zero on error.
171 * @max_packet_size: maximum number of bytes to parse
172 *
173 * Returns zero on success; non-zero on error.
174 */
175static int
176parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
177 unsigned char *data, struct list_head *auth_tok_list,
178 struct ecryptfs_auth_tok **new_auth_tok,
179 size_t *packet_size, size_t max_packet_size)
180{
181 int rc = 0;
182 size_t body_size;
183 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
184 size_t length_size;
185
186 (*packet_size) = 0;
187 (*new_auth_tok) = NULL;
188
189 /* we check that:
190 * one byte for the Tag 3 ID flag
191 * two bytes for the body size
192 * do not exceed the maximum_packet_size
193 */
194 if (unlikely((*packet_size) + 3 > max_packet_size)) {
195 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
196 rc = -EINVAL;
197 goto out;
198 }
199
200 /* check for Tag 3 identifyer - one byte */
201 if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) {
202 ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n",
203 ECRYPTFS_TAG_3_PACKET_TYPE);
204 rc = -EINVAL;
205 goto out;
206 }
207 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
208 * at end of function upon failure */
209 auth_tok_list_item =
210 kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL);
211 if (!auth_tok_list_item) {
212 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
213 rc = -ENOMEM;
214 goto out;
215 }
216 memset(auth_tok_list_item, 0,
217 sizeof(struct ecryptfs_auth_tok_list_item));
218 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
219
220 /* check for body size - one to two bytes */
221 rc = parse_packet_length(&data[(*packet_size)], &body_size,
222 &length_size);
223 if (rc) {
224 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
225 "rc = [%d]\n", rc);
226 goto out_free;
227 }
228 if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) {
229 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
230 body_size);
231 rc = -EINVAL;
232 goto out_free;
233 }
234 (*packet_size) += length_size;
235
236 /* now we know the length of the remainting Tag 3 packet size:
237 * 5 fix bytes for: version string, cipher, S2K ID, hash algo,
238 * number of hash iterations
239 * ECRYPTFS_SALT_SIZE bytes for salt
240 * body_size bytes minus the stuff above is the encrypted key size
241 */
242 if (unlikely((*packet_size) + body_size > max_packet_size)) {
243 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
244 rc = -EINVAL;
245 goto out_free;
246 }
247
248 /* There are 5 characters of additional information in the
249 * packet */
250 (*new_auth_tok)->session_key.encrypted_key_size =
251 body_size - (0x05 + ECRYPTFS_SALT_SIZE);
252 ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
253 (*new_auth_tok)->session_key.encrypted_key_size);
254
255 /* Version 4 (from RFC2440) - one byte */
256 if (unlikely(data[(*packet_size)++] != 0x04)) {
257 ecryptfs_printk(KERN_DEBUG, "Unknown version number "
258 "[%d]\n", data[(*packet_size) - 1]);
259 rc = -EINVAL;
260 goto out_free;
261 }
262
263 /* cipher - one byte */
264 ecryptfs_cipher_code_to_string(crypt_stat->cipher,
265 (u16)data[(*packet_size)]);
266 /* A little extra work to differentiate among the AES key
267 * sizes; see RFC2440 */
268 switch(data[(*packet_size)++]) {
269 case RFC2440_CIPHER_AES_192:
270 crypt_stat->key_size = 24;
271 break;
272 default:
273 crypt_stat->key_size =
274 (*new_auth_tok)->session_key.encrypted_key_size;
275 }
276 ecryptfs_init_crypt_ctx(crypt_stat);
277 /* S2K identifier 3 (from RFC2440) */
278 if (unlikely(data[(*packet_size)++] != 0x03)) {
279 ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently "
280 "supported\n");
281 rc = -ENOSYS;
282 goto out_free;
283 }
284
285 /* TODO: finish the hash mapping */
286 /* hash algorithm - one byte */
287 switch (data[(*packet_size)++]) {
288 case 0x01: /* See RFC2440 for these numbers and their mappings */
289 /* Choose MD5 */
290 /* salt - ECRYPTFS_SALT_SIZE bytes */
291 memcpy((*new_auth_tok)->token.password.salt,
292 &data[(*packet_size)], ECRYPTFS_SALT_SIZE);
293 (*packet_size) += ECRYPTFS_SALT_SIZE;
294
295 /* This conversion was taken straight from RFC2440 */
296 /* number of hash iterations - one byte */
297 (*new_auth_tok)->token.password.hash_iterations =
298 ((u32) 16 + (data[(*packet_size)] & 15))
299 << ((data[(*packet_size)] >> 4) + 6);
300 (*packet_size)++;
301
302 /* encrypted session key -
303 * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */
304 memcpy((*new_auth_tok)->session_key.encrypted_key,
305 &data[(*packet_size)],
306 (*new_auth_tok)->session_key.encrypted_key_size);
307 (*packet_size) +=
308 (*new_auth_tok)->session_key.encrypted_key_size;
309 (*new_auth_tok)->session_key.flags &=
310 ~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
311 (*new_auth_tok)->session_key.flags |=
312 ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
313 (*new_auth_tok)->token.password.hash_algo = 0x01;
314 break;
315 default:
316 ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: "
317 "[%d]\n", data[(*packet_size) - 1]);
318 rc = -ENOSYS;
319 goto out_free;
320 }
321 (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD;
322 /* TODO: Parametarize; we might actually want userspace to
323 * decrypt the session key. */
324 ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
325 ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
326 ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
327 ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
328 list_add(&auth_tok_list_item->list, auth_tok_list);
329 goto out;
330out_free:
331 (*new_auth_tok) = NULL;
332 memset(auth_tok_list_item, 0,
333 sizeof(struct ecryptfs_auth_tok_list_item));
334 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
335 auth_tok_list_item);
336out:
337 if (rc)
338 (*packet_size) = 0;
339 return rc;
340}
341
342/**
343 * parse_tag_11_packet
344 * @data: The raw bytes of the packet
345 * @contents: This function writes the data contents of the literal
346 * packet into this memory location
347 * @max_contents_bytes: The maximum number of bytes that this function
348 * is allowed to write into contents
349 * @tag_11_contents_size: This function writes the size of the parsed
350 * contents into this memory location; zero on
351 * error
352 * @packet_size: This function writes the size of the parsed packet
353 * into this memory location; zero on error
354 * @max_packet_size: maximum number of bytes to parse
355 *
356 * Returns zero on success; non-zero on error.
357 */
358static int
359parse_tag_11_packet(unsigned char *data, unsigned char *contents,
360 size_t max_contents_bytes, size_t *tag_11_contents_size,
361 size_t *packet_size, size_t max_packet_size)
362{
363 int rc = 0;
364 size_t body_size;
365 size_t length_size;
366
367 (*packet_size) = 0;
368 (*tag_11_contents_size) = 0;
369
370 /* check that:
371 * one byte for the Tag 11 ID flag
372 * two bytes for the Tag 11 length
373 * do not exceed the maximum_packet_size
374 */
375 if (unlikely((*packet_size) + 3 > max_packet_size)) {
376 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
377 rc = -EINVAL;
378 goto out;
379 }
380
381 /* check for Tag 11 identifyer - one byte */
382 if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) {
383 ecryptfs_printk(KERN_WARNING,
384 "Invalid tag 11 packet format\n");
385 rc = -EINVAL;
386 goto out;
387 }
388
389 /* get Tag 11 content length - one or two bytes */
390 rc = parse_packet_length(&data[(*packet_size)], &body_size,
391 &length_size);
392 if (rc) {
393 ecryptfs_printk(KERN_WARNING,
394 "Invalid tag 11 packet format\n");
395 goto out;
396 }
397 (*packet_size) += length_size;
398
399 if (body_size < 13) {
400 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
401 body_size);
402 rc = -EINVAL;
403 goto out;
404 }
405 /* We have 13 bytes of surrounding packet values */
406 (*tag_11_contents_size) = (body_size - 13);
407
408 /* now we know the length of the remainting Tag 11 packet size:
409 * 14 fix bytes for: special flag one, special flag two,
410 * 12 skipped bytes
411 * body_size bytes minus the stuff above is the Tag 11 content
412 */
413 /* FIXME why is the body size one byte smaller than the actual
414 * size of the body?
415 * this seems to be an error here as well as in
416 * write_tag_11_packet() */
417 if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) {
418 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
419 rc = -EINVAL;
420 goto out;
421 }
422
423 /* special flag one - one byte */
424 if (data[(*packet_size)++] != 0x62) {
425 ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
426 rc = -EINVAL;
427 goto out;
428 }
429
430 /* special flag two - one byte */
431 if (data[(*packet_size)++] != 0x08) {
432 ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
433 rc = -EINVAL;
434 goto out;
435 }
436
437 /* skip the next 12 bytes */
438 (*packet_size) += 12; /* We don't care about the filename or
439 * the timestamp */
440
441 /* get the Tag 11 contents - tag_11_contents_size bytes */
442 memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size));
443 (*packet_size) += (*tag_11_contents_size);
444
445out:
446 if (rc) {
447 (*packet_size) = 0;
448 (*tag_11_contents_size) = 0;
449 }
450 return rc;
451}
452
453/**
454 * decrypt_session_key - Decrypt the session key with the given auth_tok.
455 *
456 * Returns Zero on success; non-zero error otherwise.
457 */
458static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
459 struct ecryptfs_crypt_stat *crypt_stat)
460{
461 int rc = 0;
462 struct ecryptfs_password *password_s_ptr;
463 struct crypto_tfm *tfm = NULL;
464 struct scatterlist src_sg[2], dst_sg[2];
465 struct mutex *tfm_mutex = NULL;
466 /* TODO: Use virt_to_scatterlist for these */
467 char *encrypted_session_key;
468 char *session_key;
469
470 password_s_ptr = &auth_tok->token.password;
471 if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags,
472 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET))
473 ecryptfs_printk(KERN_DEBUG, "Session key encryption key "
474 "set; skipping key generation\n");
475 ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])"
476 ":\n",
477 password_s_ptr->session_key_encryption_key_bytes);
478 if (ecryptfs_verbosity > 0)
479 ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key,
480 password_s_ptr->
481 session_key_encryption_key_bytes);
482 if (!strcmp(crypt_stat->cipher,
483 crypt_stat->mount_crypt_stat->global_default_cipher_name)
484 && crypt_stat->mount_crypt_stat->global_key_tfm) {
485 tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
486 tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
487 } else {
488 tfm = crypto_alloc_tfm(crypt_stat->cipher,
489 CRYPTO_TFM_REQ_WEAK_KEY);
490 if (!tfm) {
491 printk(KERN_ERR "Error allocating crypto context\n");
492 rc = -ENOMEM;
493 goto out;
494 }
495 }
496 if (password_s_ptr->session_key_encryption_key_bytes
497 < crypto_tfm_alg_min_keysize(tfm)) {
498 printk(KERN_WARNING "Session key encryption key is [%d] bytes; "
499 "minimum keysize for selected cipher is [%d] bytes.\n",
500 password_s_ptr->session_key_encryption_key_bytes,
501 crypto_tfm_alg_min_keysize(tfm));
502 rc = -EINVAL;
503 goto out;
504 }
505 if (tfm_mutex)
506 mutex_lock(tfm_mutex);
507 crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key,
508 crypt_stat->key_size);
509 /* TODO: virt_to_scatterlist */
510 encrypted_session_key = (char *)__get_free_page(GFP_KERNEL);
511 if (!encrypted_session_key) {
512 ecryptfs_printk(KERN_ERR, "Out of memory\n");
513 rc = -ENOMEM;
514 goto out_free_tfm;
515 }
516 session_key = (char *)__get_free_page(GFP_KERNEL);
517 if (!session_key) {
518 kfree(encrypted_session_key);
519 ecryptfs_printk(KERN_ERR, "Out of memory\n");
520 rc = -ENOMEM;
521 goto out_free_tfm;
522 }
523 memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key,
524 auth_tok->session_key.encrypted_key_size);
525 src_sg[0].page = virt_to_page(encrypted_session_key);
526 src_sg[0].offset = 0;
527 BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE);
528 src_sg[0].length = auth_tok->session_key.encrypted_key_size;
529 dst_sg[0].page = virt_to_page(session_key);
530 dst_sg[0].offset = 0;
531 auth_tok->session_key.decrypted_key_size =
532 auth_tok->session_key.encrypted_key_size;
533 dst_sg[0].length = auth_tok->session_key.encrypted_key_size;
534 /* TODO: Handle error condition */
535 crypto_cipher_decrypt(tfm, dst_sg, src_sg,
536 auth_tok->session_key.encrypted_key_size);
537 auth_tok->session_key.decrypted_key_size =
538 auth_tok->session_key.encrypted_key_size;
539 memcpy(auth_tok->session_key.decrypted_key, session_key,
540 auth_tok->session_key.decrypted_key_size);
541 auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
542 memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
543 auth_tok->session_key.decrypted_key_size);
544 ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
545 ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
546 if (ecryptfs_verbosity > 0)
547 ecryptfs_dump_hex(crypt_stat->key,
548 crypt_stat->key_size);
549 memset(encrypted_session_key, 0, PAGE_CACHE_SIZE);
550 free_page((unsigned long)encrypted_session_key);
551 memset(session_key, 0, PAGE_CACHE_SIZE);
552 free_page((unsigned long)session_key);
553out_free_tfm:
554 if (tfm_mutex)
555 mutex_unlock(tfm_mutex);
556 else
557 crypto_free_tfm(tfm);
558out:
559 return rc;
560}
561
562/**
563 * ecryptfs_parse_packet_set
564 * @dest: The header page in memory
565 * @version: Version of file format, to guide parsing behavior
566 *
567 * Get crypt_stat to have the file's session key if the requisite key
568 * is available to decrypt the session key.
569 *
570 * Returns Zero if a valid authentication token was retrieved and
571 * processed; negative value for file not encrypted or for error
572 * conditions.
573 */
574int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
575 unsigned char *src,
576 struct dentry *ecryptfs_dentry)
577{
578 size_t i = 0;
579 int rc = 0;
580 size_t found_auth_tok = 0;
581 size_t next_packet_is_auth_tok_packet;
582 char sig[ECRYPTFS_SIG_SIZE_HEX];
583 struct list_head auth_tok_list;
584 struct list_head *walker;
585 struct ecryptfs_auth_tok *chosen_auth_tok = NULL;
586 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
587 &ecryptfs_superblock_to_private(
588 ecryptfs_dentry->d_sb)->mount_crypt_stat;
589 struct ecryptfs_auth_tok *candidate_auth_tok = NULL;
590 size_t packet_size;
591 struct ecryptfs_auth_tok *new_auth_tok;
592 unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE];
593 size_t tag_11_contents_size;
594 size_t tag_11_packet_size;
595
596 INIT_LIST_HEAD(&auth_tok_list);
597 /* Parse the header to find as many packets as we can, these will be
598 * added the our &auth_tok_list */
599 next_packet_is_auth_tok_packet = 1;
600 while (next_packet_is_auth_tok_packet) {
601 size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i);
602
603 switch (src[i]) {
604 case ECRYPTFS_TAG_3_PACKET_TYPE:
605 rc = parse_tag_3_packet(crypt_stat,
606 (unsigned char *)&src[i],
607 &auth_tok_list, &new_auth_tok,
608 &packet_size, max_packet_size);
609 if (rc) {
610 ecryptfs_printk(KERN_ERR, "Error parsing "
611 "tag 3 packet\n");
612 rc = -EIO;
613 goto out_wipe_list;
614 }
615 i += packet_size;
616 rc = parse_tag_11_packet((unsigned char *)&src[i],
617 sig_tmp_space,
618 ECRYPTFS_SIG_SIZE,
619 &tag_11_contents_size,
620 &tag_11_packet_size,
621 max_packet_size);
622 if (rc) {
623 ecryptfs_printk(KERN_ERR, "No valid "
624 "(ecryptfs-specific) literal "
625 "packet containing "
626 "authentication token "
627 "signature found after "
628 "tag 3 packet\n");
629 rc = -EIO;
630 goto out_wipe_list;
631 }
632 i += tag_11_packet_size;
633 if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) {
634 ecryptfs_printk(KERN_ERR, "Expected "
635 "signature of size [%d]; "
636 "read size [%d]\n",
637 ECRYPTFS_SIG_SIZE,
638 tag_11_contents_size);
639 rc = -EIO;
640 goto out_wipe_list;
641 }
642 ecryptfs_to_hex(new_auth_tok->token.password.signature,
643 sig_tmp_space, tag_11_contents_size);
644 new_auth_tok->token.password.signature[
645 ECRYPTFS_PASSWORD_SIG_SIZE] = '\0';
646 ECRYPTFS_SET_FLAG(crypt_stat->flags,
647 ECRYPTFS_ENCRYPTED);
648 break;
649 case ECRYPTFS_TAG_11_PACKET_TYPE:
650 ecryptfs_printk(KERN_WARNING, "Invalid packet set "
651 "(Tag 11 not allowed by itself)\n");
652 rc = -EIO;
653 goto out_wipe_list;
654 break;
655 default:
656 ecryptfs_printk(KERN_DEBUG, "No packet at offset "
657 "[%d] of the file header; hex value of "
658 "character is [0x%.2x]\n", i, src[i]);
659 next_packet_is_auth_tok_packet = 0;
660 }
661 }
662 if (list_empty(&auth_tok_list)) {
663 rc = -EINVAL; /* Do not support non-encrypted files in
664 * the 0.1 release */
665 goto out;
666 }
667 /* If we have a global auth tok, then we should try to use
668 * it */
669 if (mount_crypt_stat->global_auth_tok) {
670 memcpy(sig, mount_crypt_stat->global_auth_tok_sig,
671 ECRYPTFS_SIG_SIZE_HEX);
672 chosen_auth_tok = mount_crypt_stat->global_auth_tok;
673 } else
674 BUG(); /* We should always have a global auth tok in
675 * the 0.1 release */
676 /* Scan list to see if our chosen_auth_tok works */
677 list_for_each(walker, &auth_tok_list) {
678 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
679 auth_tok_list_item =
680 list_entry(walker, struct ecryptfs_auth_tok_list_item,
681 list);
682 candidate_auth_tok = &auth_tok_list_item->auth_tok;
683 if (unlikely(ecryptfs_verbosity > 0)) {
684 ecryptfs_printk(KERN_DEBUG,
685 "Considering cadidate auth tok:\n");
686 ecryptfs_dump_auth_tok(candidate_auth_tok);
687 }
688 /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */
689 if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD
690 && !strncmp(candidate_auth_tok->token.password.signature,
691 sig, ECRYPTFS_SIG_SIZE_HEX)) {
692 found_auth_tok = 1;
693 goto leave_list;
694 /* TODO: Transfer the common salt into the
695 * crypt_stat salt */
696 }
697 }
698leave_list:
699 if (!found_auth_tok) {
700 ecryptfs_printk(KERN_ERR, "Could not find authentication "
701 "token on temporary list for sig [%.*s]\n",
702 ECRYPTFS_SIG_SIZE_HEX, sig);
703 rc = -EIO;
704 goto out_wipe_list;
705 } else {
706 memcpy(&(candidate_auth_tok->token.password),
707 &(chosen_auth_tok->token.password),
708 sizeof(struct ecryptfs_password));
709 rc = decrypt_session_key(candidate_auth_tok, crypt_stat);
710 if (rc) {
711 ecryptfs_printk(KERN_ERR, "Error decrypting the "
712 "session key\n");
713 goto out_wipe_list;
714 }
715 rc = ecryptfs_compute_root_iv(crypt_stat);
716 if (rc) {
717 ecryptfs_printk(KERN_ERR, "Error computing "
718 "the root IV\n");
719 goto out_wipe_list;
720 }
721 }
722 rc = ecryptfs_init_crypt_ctx(crypt_stat);
723 if (rc) {
724 ecryptfs_printk(KERN_ERR, "Error initializing crypto "
725 "context for cipher [%s]; rc = [%d]\n",
726 crypt_stat->cipher, rc);
727 }
728out_wipe_list:
729 wipe_auth_tok_list(&auth_tok_list);
730out:
731 return rc;
732}
733
734/**
735 * write_tag_11_packet
736 * @dest: Target into which Tag 11 packet is to be written
737 * @max: Maximum packet length
738 * @contents: Byte array of contents to copy in
739 * @contents_length: Number of bytes in contents
740 * @packet_length: Length of the Tag 11 packet written; zero on error
741 *
742 * Returns zero on success; non-zero on error.
743 */
744static int
745write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
746 size_t *packet_length)
747{
748 int rc = 0;
749 size_t packet_size_length;
750
751 (*packet_length) = 0;
752 if ((13 + contents_length) > max) {
753 rc = -EINVAL;
754 ecryptfs_printk(KERN_ERR, "Packet length larger than "
755 "maximum allowable\n");
756 goto out;
757 }
758 /* General packet header */
759 /* Packet tag */
760 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
761 /* Packet length */
762 rc = write_packet_length(&dest[(*packet_length)],
763 (13 + contents_length), &packet_size_length);
764 if (rc) {
765 ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet "
766 "header; cannot generate packet length\n");
767 goto out;
768 }
769 (*packet_length) += packet_size_length;
770 /* Tag 11 specific */
771 /* One-octet field that describes how the data is formatted */
772 dest[(*packet_length)++] = 0x62; /* binary data */
773 /* One-octet filename length followed by filename */
774 dest[(*packet_length)++] = 8;
775 memcpy(&dest[(*packet_length)], "_CONSOLE", 8);
776 (*packet_length) += 8;
777 /* Four-octet number indicating modification date */
778 memset(&dest[(*packet_length)], 0x00, 4);
779 (*packet_length) += 4;
780 /* Remainder is literal data */
781 memcpy(&dest[(*packet_length)], contents, contents_length);
782 (*packet_length) += contents_length;
783 out:
784 if (rc)
785 (*packet_length) = 0;
786 return rc;
787}
788
789/**
790 * write_tag_3_packet
791 * @dest: Buffer into which to write the packet
792 * @max: Maximum number of bytes that can be written
793 * @auth_tok: Authentication token
794 * @crypt_stat: The cryptographic context
795 * @key_rec: encrypted key
796 * @packet_size: This function will write the number of bytes that end
797 * up constituting the packet; set to zero on error
798 *
799 * Returns zero on success; non-zero on error.
800 */
801static int
802write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
803 struct ecryptfs_crypt_stat *crypt_stat,
804 struct ecryptfs_key_record *key_rec, size_t *packet_size)
805{
806 int rc = 0;
807
808 size_t i;
809 size_t signature_is_valid = 0;
810 size_t encrypted_session_key_valid = 0;
811 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
812 struct scatterlist dest_sg[2];
813 struct scatterlist src_sg[2];
814 struct crypto_tfm *tfm = NULL;
815 struct mutex *tfm_mutex = NULL;
816 size_t key_rec_size;
817 size_t packet_size_length;
818 size_t cipher_code;
819
820 (*packet_size) = 0;
821 /* Check for a valid signature on the auth_tok */
822 for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++)
823 signature_is_valid |= auth_tok->token.password.signature[i];
824 if (!signature_is_valid)
825 BUG();
826 ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature,
827 ECRYPTFS_SIG_SIZE);
828 encrypted_session_key_valid = 0;
829 for (i = 0; i < crypt_stat->key_size; i++)
830 encrypted_session_key_valid |=
831 auth_tok->session_key.encrypted_key[i];
832 if (encrypted_session_key_valid) {
833 memcpy((*key_rec).enc_key,
834 auth_tok->session_key.encrypted_key,
835 auth_tok->session_key.encrypted_key_size);
836 goto encrypted_session_key_set;
837 }
838 if (auth_tok->session_key.encrypted_key_size == 0)
839 auth_tok->session_key.encrypted_key_size =
840 crypt_stat->key_size;
841 if (crypt_stat->key_size == 24
842 && strcmp("aes", crypt_stat->cipher) == 0) {
843 memset((crypt_stat->key + 24), 0, 8);
844 auth_tok->session_key.encrypted_key_size = 32;
845 }
846 (*key_rec).enc_key_size =
847 auth_tok->session_key.encrypted_key_size;
848 if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
849 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) {
850 ecryptfs_printk(KERN_DEBUG, "Using previously generated "
851 "session key encryption key of size [%d]\n",
852 auth_tok->token.password.
853 session_key_encryption_key_bytes);
854 memcpy(session_key_encryption_key,
855 auth_tok->token.password.session_key_encryption_key,
856 crypt_stat->key_size);
857 ecryptfs_printk(KERN_DEBUG,
858 "Cached session key " "encryption key: \n");
859 if (ecryptfs_verbosity > 0)
860 ecryptfs_dump_hex(session_key_encryption_key, 16);
861 }
862 if (unlikely(ecryptfs_verbosity > 0)) {
863 ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n");
864 ecryptfs_dump_hex(session_key_encryption_key, 16);
865 }
866 rc = virt_to_scatterlist(crypt_stat->key,
867 (*key_rec).enc_key_size, src_sg, 2);
868 if (!rc) {
869 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
870 "for crypt_stat session key\n");
871 rc = -ENOMEM;
872 goto out;
873 }
874 rc = virt_to_scatterlist((*key_rec).enc_key,
875 (*key_rec).enc_key_size, dest_sg, 2);
876 if (!rc) {
877 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
878 "for crypt_stat encrypted session key\n");
879 rc = -ENOMEM;
880 goto out;
881 }
882 if (!strcmp(crypt_stat->cipher,
883 crypt_stat->mount_crypt_stat->global_default_cipher_name)
884 && crypt_stat->mount_crypt_stat->global_key_tfm) {
885 tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
886 tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
887 } else
888 tfm = crypto_alloc_tfm(crypt_stat->cipher, 0);
889 if (!tfm) {
890 ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
891 "context for cipher [%s]\n",
892 crypt_stat->cipher);
893 rc = -EINVAL;
894 goto out;
895 }
896 if (tfm_mutex)
897 mutex_lock(tfm_mutex);
898 rc = crypto_cipher_setkey(tfm, session_key_encryption_key,
899 crypt_stat->key_size);
900 if (rc < 0) {
901 if (tfm_mutex)
902 mutex_unlock(tfm_mutex);
903 ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
904 "context\n");
905 goto out;
906 }
907 rc = 0;
908 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
909 crypt_stat->key_size);
910 crypto_cipher_encrypt(tfm, dest_sg, src_sg,
911 (*key_rec).enc_key_size);
912 if (tfm_mutex)
913 mutex_unlock(tfm_mutex);
914 ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
915 if (ecryptfs_verbosity > 0)
916 ecryptfs_dump_hex((*key_rec).enc_key,
917 (*key_rec).enc_key_size);
918encrypted_session_key_set:
919 /* Now we have a valid key_rec. Append it to the
920 * key_rec set. */
921 key_rec_size = (sizeof(struct ecryptfs_key_record)
922 - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
923 + ((*key_rec).enc_key_size));
924 /* TODO: Include a packet size limit as a parameter to this
925 * function once we have multi-packet headers (for versions
926 * later than 0.1 */
927 if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
928 ecryptfs_printk(KERN_ERR, "Keyset too large\n");
929 rc = -EINVAL;
930 goto out;
931 }
932 /* TODO: Packet size limit */
933 /* We have 5 bytes of surrounding packet data */
934 if ((0x05 + ECRYPTFS_SALT_SIZE
935 + (*key_rec).enc_key_size) >= max) {
936 ecryptfs_printk(KERN_ERR, "Authentication token is too "
937 "large\n");
938 rc = -EINVAL;
939 goto out;
940 }
941 /* This format is inspired by OpenPGP; see RFC 2440
942 * packet tag 3 */
943 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
944 /* ver+cipher+s2k+hash+salt+iter+enc_key */
945 rc = write_packet_length(&dest[(*packet_size)],
946 (0x05 + ECRYPTFS_SALT_SIZE
947 + (*key_rec).enc_key_size),
948 &packet_size_length);
949 if (rc) {
950 ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet "
951 "header; cannot generate packet length\n");
952 goto out;
953 }
954 (*packet_size) += packet_size_length;
955 dest[(*packet_size)++] = 0x04; /* version 4 */
956 cipher_code = ecryptfs_code_for_cipher_string(crypt_stat);
957 if (cipher_code == 0) {
958 ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
959 "cipher [%s]\n", crypt_stat->cipher);
960 rc = -EINVAL;
961 goto out;
962 }
963 dest[(*packet_size)++] = cipher_code;
964 dest[(*packet_size)++] = 0x03; /* S2K */
965 dest[(*packet_size)++] = 0x01; /* MD5 (TODO: parameterize) */
966 memcpy(&dest[(*packet_size)], auth_tok->token.password.salt,
967 ECRYPTFS_SALT_SIZE);
968 (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */
969 dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */
970 memcpy(&dest[(*packet_size)], (*key_rec).enc_key,
971 (*key_rec).enc_key_size);
972 (*packet_size) += (*key_rec).enc_key_size;
973out:
974 if (tfm && !tfm_mutex)
975 crypto_free_tfm(tfm);
976 if (rc)
977 (*packet_size) = 0;
978 return rc;
979}
980
981/**
982 * ecryptfs_generate_key_packet_set
983 * @dest: Virtual address from which to write the key record set
984 * @crypt_stat: The cryptographic context from which the
985 * authentication tokens will be retrieved
986 * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat
987 * for the global parameters
988 * @len: The amount written
989 * @max: The maximum amount of data allowed to be written
990 *
991 * Generates a key packet set and writes it to the virtual address
992 * passed in.
993 *
994 * Returns zero on success; non-zero on error.
995 */
996int
997ecryptfs_generate_key_packet_set(char *dest_base,
998 struct ecryptfs_crypt_stat *crypt_stat,
999 struct dentry *ecryptfs_dentry, size_t *len,
1000 size_t max)
1001{
1002 int rc = 0;
1003 struct ecryptfs_auth_tok *auth_tok;
1004 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1005 &ecryptfs_superblock_to_private(
1006 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1007 size_t written;
1008 struct ecryptfs_key_record key_rec;
1009
1010 (*len) = 0;
1011 if (mount_crypt_stat->global_auth_tok) {
1012 auth_tok = mount_crypt_stat->global_auth_tok;
1013 if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
1014 rc = write_tag_3_packet((dest_base + (*len)),
1015 max, auth_tok,
1016 crypt_stat, &key_rec,
1017 &written);
1018 if (rc) {
1019 ecryptfs_printk(KERN_WARNING, "Error "
1020 "writing tag 3 packet\n");
1021 goto out;
1022 }
1023 (*len) += written;
1024 /* Write auth tok signature packet */
1025 rc = write_tag_11_packet(
1026 (dest_base + (*len)),
1027 (max - (*len)),
1028 key_rec.sig, ECRYPTFS_SIG_SIZE, &written);
1029 if (rc) {
1030 ecryptfs_printk(KERN_ERR, "Error writing "
1031 "auth tok signature packet\n");
1032 goto out;
1033 }
1034 (*len) += written;
1035 } else {
1036 ecryptfs_printk(KERN_WARNING, "Unsupported "
1037 "authentication token type\n");
1038 rc = -EINVAL;
1039 goto out;
1040 }
1041 if (rc) {
1042 ecryptfs_printk(KERN_WARNING, "Error writing "
1043 "authentication token packet with sig "
1044 "= [%s]\n",
1045 mount_crypt_stat->global_auth_tok_sig);
1046 rc = -EIO;
1047 goto out;
1048 }
1049 } else
1050 BUG();
1051 if (likely((max - (*len)) > 0)) {
1052 dest_base[(*len)] = 0x00;
1053 } else {
1054 ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n");
1055 rc = -EIO;
1056 }
1057out:
1058 if (rc)
1059 (*len) = 0;
1060 return rc;
1061}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
new file mode 100644
index 000000000000..7a11b8ae6644
--- /dev/null
+++ b/fs/ecryptfs/main.c
@@ -0,0 +1,831 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2003 Erez Zadok
5 * Copyright (C) 2001-2003 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/dcache.h>
27#include <linux/file.h>
28#include <linux/module.h>
29#include <linux/namei.h>
30#include <linux/skbuff.h>
31#include <linux/crypto.h>
32#include <linux/netlink.h>
33#include <linux/mount.h>
34#include <linux/dcache.h>
35#include <linux/pagemap.h>
36#include <linux/key.h>
37#include <linux/parser.h>
38#include "ecryptfs_kernel.h"
39
40/**
41 * Module parameter that defines the ecryptfs_verbosity level.
42 */
43int ecryptfs_verbosity = 0;
44
45module_param(ecryptfs_verbosity, int, 0);
46MODULE_PARM_DESC(ecryptfs_verbosity,
47 "Initial verbosity level (0 or 1; defaults to "
48 "0, which is Quiet)");
49
50void __ecryptfs_printk(const char *fmt, ...)
51{
52 va_list args;
53 va_start(args, fmt);
54 if (fmt[1] == '7') { /* KERN_DEBUG */
55 if (ecryptfs_verbosity >= 1)
56 vprintk(fmt, args);
57 } else
58 vprintk(fmt, args);
59 va_end(args);
60}
61
62/**
63 * ecryptfs_interpose
64 * @lower_dentry: Existing dentry in the lower filesystem
65 * @dentry: ecryptfs' dentry
66 * @sb: ecryptfs's super_block
67 * @flag: If set to true, then d_add is called, else d_instantiate is called
68 *
69 * Interposes upper and lower dentries.
70 *
71 * Returns zero on success; non-zero otherwise
72 */
73int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
74 struct super_block *sb, int flag)
75{
76 struct inode *lower_inode;
77 struct inode *inode;
78 int rc = 0;
79
80 lower_inode = lower_dentry->d_inode;
81 if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) {
82 rc = -EXDEV;
83 goto out;
84 }
85 if (!igrab(lower_inode)) {
86 rc = -ESTALE;
87 goto out;
88 }
89 inode = iget5_locked(sb, (unsigned long)lower_inode,
90 ecryptfs_inode_test, ecryptfs_inode_set,
91 lower_inode);
92 if (!inode) {
93 rc = -EACCES;
94 iput(lower_inode);
95 goto out;
96 }
97 if (inode->i_state & I_NEW)
98 unlock_new_inode(inode);
99 else
100 iput(lower_inode);
101 if (S_ISLNK(lower_inode->i_mode))
102 inode->i_op = &ecryptfs_symlink_iops;
103 else if (S_ISDIR(lower_inode->i_mode))
104 inode->i_op = &ecryptfs_dir_iops;
105 if (S_ISDIR(lower_inode->i_mode))
106 inode->i_fop = &ecryptfs_dir_fops;
107 /* TODO: Is there a better way to identify if the inode is
108 * special? */
109 if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
110 S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
111 init_special_inode(inode, lower_inode->i_mode,
112 lower_inode->i_rdev);
113 dentry->d_op = &ecryptfs_dops;
114 if (flag)
115 d_add(dentry, inode);
116 else
117 d_instantiate(dentry, inode);
118 ecryptfs_copy_attr_all(inode, lower_inode);
119 /* This size will be overwritten for real files w/ headers and
120 * other metadata */
121 ecryptfs_copy_inode_size(inode, lower_inode);
122out:
123 return rc;
124}
125
126enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug,
127 ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher,
128 ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes,
129 ecryptfs_opt_passthrough, ecryptfs_opt_err };
130
131static match_table_t tokens = {
132 {ecryptfs_opt_sig, "sig=%s"},
133 {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"},
134 {ecryptfs_opt_debug, "debug=%u"},
135 {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"},
136 {ecryptfs_opt_cipher, "cipher=%s"},
137 {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"},
138 {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"},
139 {ecryptfs_opt_passthrough, "ecryptfs_passthrough"},
140 {ecryptfs_opt_err, NULL}
141};
142
143/**
144 * ecryptfs_verify_version
145 * @version: The version number to confirm
146 *
147 * Returns zero on good version; non-zero otherwise
148 */
149static int ecryptfs_verify_version(u16 version)
150{
151 int rc = 0;
152 unsigned char major;
153 unsigned char minor;
154
155 major = ((version >> 8) & 0xFF);
156 minor = (version & 0xFF);
157 if (major != ECRYPTFS_VERSION_MAJOR) {
158 ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
159 "Expected [%d]; got [%d]\n",
160 ECRYPTFS_VERSION_MAJOR, major);
161 rc = -EINVAL;
162 goto out;
163 }
164 if (minor != ECRYPTFS_VERSION_MINOR) {
165 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
166 "Expected [%d]; got [%d]\n",
167 ECRYPTFS_VERSION_MINOR, minor);
168 rc = -EINVAL;
169 goto out;
170 }
171out:
172 return rc;
173}
174
175/**
176 * ecryptfs_parse_options
177 * @sb: The ecryptfs super block
178 * @options: The options pased to the kernel
179 *
180 * Parse mount options:
181 * debug=N - ecryptfs_verbosity level for debug output
182 * sig=XXX - description(signature) of the key to use
183 *
184 * Returns the dentry object of the lower-level (lower/interposed)
185 * directory; We want to mount our stackable file system on top of
186 * that lower directory.
187 *
188 * The signature of the key to use must be the description of a key
189 * already in the keyring. Mounting will fail if the key can not be
190 * found.
191 *
192 * Returns zero on success; non-zero on error
193 */
194static int ecryptfs_parse_options(struct super_block *sb, char *options)
195{
196 char *p;
197 int rc = 0;
198 int sig_set = 0;
199 int cipher_name_set = 0;
200 int cipher_key_bytes;
201 int cipher_key_bytes_set = 0;
202 struct key *auth_tok_key = NULL;
203 struct ecryptfs_auth_tok *auth_tok = NULL;
204 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
205 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
206 substring_t args[MAX_OPT_ARGS];
207 int token;
208 char *sig_src;
209 char *sig_dst;
210 char *debug_src;
211 char *cipher_name_dst;
212 char *cipher_name_src;
213 char *cipher_key_bytes_src;
214 struct crypto_tfm *tmp_tfm;
215 int cipher_name_len;
216
217 if (!options) {
218 rc = -EINVAL;
219 goto out;
220 }
221 while ((p = strsep(&options, ",")) != NULL) {
222 if (!*p)
223 continue;
224 token = match_token(p, tokens, args);
225 switch (token) {
226 case ecryptfs_opt_sig:
227 case ecryptfs_opt_ecryptfs_sig:
228 sig_src = args[0].from;
229 sig_dst =
230 mount_crypt_stat->global_auth_tok_sig;
231 memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX);
232 sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0';
233 ecryptfs_printk(KERN_DEBUG,
234 "The mount_crypt_stat "
235 "global_auth_tok_sig set to: "
236 "[%s]\n", sig_dst);
237 sig_set = 1;
238 break;
239 case ecryptfs_opt_debug:
240 case ecryptfs_opt_ecryptfs_debug:
241 debug_src = args[0].from;
242 ecryptfs_verbosity =
243 (int)simple_strtol(debug_src, &debug_src,
244 0);
245 ecryptfs_printk(KERN_DEBUG,
246 "Verbosity set to [%d]" "\n",
247 ecryptfs_verbosity);
248 break;
249 case ecryptfs_opt_cipher:
250 case ecryptfs_opt_ecryptfs_cipher:
251 cipher_name_src = args[0].from;
252 cipher_name_dst =
253 mount_crypt_stat->
254 global_default_cipher_name;
255 strncpy(cipher_name_dst, cipher_name_src,
256 ECRYPTFS_MAX_CIPHER_NAME_SIZE);
257 ecryptfs_printk(KERN_DEBUG,
258 "The mount_crypt_stat "
259 "global_default_cipher_name set to: "
260 "[%s]\n", cipher_name_dst);
261 cipher_name_set = 1;
262 break;
263 case ecryptfs_opt_ecryptfs_key_bytes:
264 cipher_key_bytes_src = args[0].from;
265 cipher_key_bytes =
266 (int)simple_strtol(cipher_key_bytes_src,
267 &cipher_key_bytes_src, 0);
268 mount_crypt_stat->global_default_cipher_key_size =
269 cipher_key_bytes;
270 ecryptfs_printk(KERN_DEBUG,
271 "The mount_crypt_stat "
272 "global_default_cipher_key_size "
273 "set to: [%d]\n", mount_crypt_stat->
274 global_default_cipher_key_size);
275 cipher_key_bytes_set = 1;
276 break;
277 case ecryptfs_opt_passthrough:
278 mount_crypt_stat->flags |=
279 ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED;
280 break;
281 case ecryptfs_opt_err:
282 default:
283 ecryptfs_printk(KERN_WARNING,
284 "eCryptfs: unrecognized option '%s'\n",
285 p);
286 }
287 }
288 /* Do not support lack of mount-wide signature in 0.1
289 * release */
290 if (!sig_set) {
291 rc = -EINVAL;
292 ecryptfs_printk(KERN_ERR, "You must supply a valid "
293 "passphrase auth tok signature as a mount "
294 "parameter; see the eCryptfs README\n");
295 goto out;
296 }
297 if (!cipher_name_set) {
298 cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
299 if (unlikely(cipher_name_len
300 >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) {
301 rc = -EINVAL;
302 BUG();
303 goto out;
304 }
305 memcpy(mount_crypt_stat->global_default_cipher_name,
306 ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
307 mount_crypt_stat->global_default_cipher_name[cipher_name_len]
308 = '\0';
309 }
310 if (!cipher_key_bytes_set) {
311 mount_crypt_stat->global_default_cipher_key_size =
312 ECRYPTFS_DEFAULT_KEY_BYTES;
313 ecryptfs_printk(KERN_DEBUG, "Cipher key size was not "
314 "specified. Defaulting to [%d]\n",
315 mount_crypt_stat->
316 global_default_cipher_key_size);
317 }
318 rc = ecryptfs_process_cipher(
319 &tmp_tfm,
320 &mount_crypt_stat->global_key_tfm,
321 mount_crypt_stat->global_default_cipher_name,
322 mount_crypt_stat->global_default_cipher_key_size);
323 if (tmp_tfm)
324 crypto_free_tfm(tmp_tfm);
325 if (rc) {
326 printk(KERN_ERR "Error attempting to initialize cipher [%s] "
327 "with key size [%Zd] bytes; rc = [%d]\n",
328 mount_crypt_stat->global_default_cipher_name,
329 mount_crypt_stat->global_default_cipher_key_size, rc);
330 rc = -EINVAL;
331 goto out;
332 }
333 mutex_init(&mount_crypt_stat->global_key_tfm_mutex);
334 ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: "
335 "[%s]\n", mount_crypt_stat->global_auth_tok_sig);
336 /* The reference to this key is held until umount is done The
337 * call to key_put is done in ecryptfs_put_super() */
338 auth_tok_key = request_key(&key_type_user,
339 mount_crypt_stat->global_auth_tok_sig,
340 NULL);
341 if (!auth_tok_key || IS_ERR(auth_tok_key)) {
342 ecryptfs_printk(KERN_ERR, "Could not find key with "
343 "description: [%s]\n",
344 mount_crypt_stat->global_auth_tok_sig);
345 process_request_key_err(PTR_ERR(auth_tok_key));
346 rc = -EINVAL;
347 goto out;
348 }
349 auth_tok = ecryptfs_get_key_payload_data(auth_tok_key);
350 if (ecryptfs_verify_version(auth_tok->version)) {
351 ecryptfs_printk(KERN_ERR, "Data structure version mismatch. "
352 "Userspace tools must match eCryptfs kernel "
353 "module with major version [%d] and minor "
354 "version [%d]\n", ECRYPTFS_VERSION_MAJOR,
355 ECRYPTFS_VERSION_MINOR);
356 rc = -EINVAL;
357 goto out;
358 }
359 if (auth_tok->token_type != ECRYPTFS_PASSWORD) {
360 ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure "
361 "returned from key\n");
362 rc = -EINVAL;
363 goto out;
364 }
365 mount_crypt_stat->global_auth_tok_key = auth_tok_key;
366 mount_crypt_stat->global_auth_tok = auth_tok;
367out:
368 return rc;
369}
370
371struct kmem_cache *ecryptfs_sb_info_cache;
372
373/**
374 * ecryptfs_fill_super
375 * @sb: The ecryptfs super block
376 * @raw_data: The options passed to mount
377 * @silent: Not used but required by function prototype
378 *
379 * Sets up what we can of the sb, rest is done in ecryptfs_read_super
380 *
381 * Returns zero on success; non-zero otherwise
382 */
383static int
384ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
385{
386 int rc = 0;
387
388 /* Released in ecryptfs_put_super() */
389 ecryptfs_set_superblock_private(sb,
390 kmem_cache_alloc(ecryptfs_sb_info_cache,
391 SLAB_KERNEL));
392 if (!ecryptfs_superblock_to_private(sb)) {
393 ecryptfs_printk(KERN_WARNING, "Out of memory\n");
394 rc = -ENOMEM;
395 goto out;
396 }
397 memset(ecryptfs_superblock_to_private(sb), 0,
398 sizeof(struct ecryptfs_sb_info));
399 sb->s_op = &ecryptfs_sops;
400 /* Released through deactivate_super(sb) from get_sb_nodev */
401 sb->s_root = d_alloc(NULL, &(const struct qstr) {
402 .hash = 0,.name = "/",.len = 1});
403 if (!sb->s_root) {
404 ecryptfs_printk(KERN_ERR, "d_alloc failed\n");
405 rc = -ENOMEM;
406 goto out;
407 }
408 sb->s_root->d_op = &ecryptfs_dops;
409 sb->s_root->d_sb = sb;
410 sb->s_root->d_parent = sb->s_root;
411 /* Released in d_release when dput(sb->s_root) is called */
412 /* through deactivate_super(sb) from get_sb_nodev() */
413 ecryptfs_set_dentry_private(sb->s_root,
414 kmem_cache_alloc(ecryptfs_dentry_info_cache,
415 SLAB_KERNEL));
416 if (!ecryptfs_dentry_to_private(sb->s_root)) {
417 ecryptfs_printk(KERN_ERR,
418 "dentry_info_cache alloc failed\n");
419 rc = -ENOMEM;
420 goto out;
421 }
422 memset(ecryptfs_dentry_to_private(sb->s_root), 0,
423 sizeof(struct ecryptfs_dentry_info));
424 rc = 0;
425out:
426 /* Should be able to rely on deactivate_super called from
427 * get_sb_nodev */
428 return rc;
429}
430
431/**
432 * ecryptfs_read_super
433 * @sb: The ecryptfs super block
434 * @dev_name: The path to mount over
435 *
436 * Read the super block of the lower filesystem, and use
437 * ecryptfs_interpose to create our initial inode and super block
438 * struct.
439 */
440static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
441{
442 int rc;
443 struct nameidata nd;
444 struct dentry *lower_root;
445 struct vfsmount *lower_mnt;
446
447 memset(&nd, 0, sizeof(struct nameidata));
448 rc = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
449 if (rc) {
450 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n");
451 goto out_free;
452 }
453 lower_root = nd.dentry;
454 if (!lower_root->d_inode) {
455 ecryptfs_printk(KERN_WARNING,
456 "No directory to interpose on\n");
457 rc = -ENOENT;
458 goto out_free;
459 }
460 lower_mnt = nd.mnt;
461 ecryptfs_set_superblock_lower(sb, lower_root->d_sb);
462 sb->s_maxbytes = lower_root->d_sb->s_maxbytes;
463 ecryptfs_set_dentry_lower(sb->s_root, lower_root);
464 ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt);
465 if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0)))
466 goto out_free;
467 rc = 0;
468 goto out;
469out_free:
470 path_release(&nd);
471out:
472 return rc;
473}
474
475/**
476 * ecryptfs_get_sb
477 * @fs_type
478 * @flags
479 * @dev_name: The path to mount over
480 * @raw_data: The options passed into the kernel
481 *
482 * The whole ecryptfs_get_sb process is broken into 4 functions:
483 * ecryptfs_parse_options(): handle options passed to ecryptfs, if any
484 * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block
485 * with as much information as it can before needing
486 * the lower filesystem.
487 * ecryptfs_read_super(): this accesses the lower filesystem and uses
488 * ecryptfs_interpolate to perform most of the linking
489 * ecryptfs_interpolate(): links the lower filesystem into ecryptfs
490 */
491static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
492 const char *dev_name, void *raw_data,
493 struct vfsmount *mnt)
494{
495 int rc;
496 struct super_block *sb;
497
498 rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt);
499 if (rc < 0) {
500 printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc);
501 goto out;
502 }
503 sb = mnt->mnt_sb;
504 rc = ecryptfs_parse_options(sb, raw_data);
505 if (rc) {
506 printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc);
507 goto out_abort;
508 }
509 rc = ecryptfs_read_super(sb, dev_name);
510 if (rc) {
511 printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc);
512 goto out_abort;
513 }
514 goto out;
515out_abort:
516 dput(sb->s_root);
517 up_write(&sb->s_umount);
518 deactivate_super(sb);
519out:
520 return rc;
521}
522
523/**
524 * ecryptfs_kill_block_super
525 * @sb: The ecryptfs super block
526 *
527 * Used to bring the superblock down and free the private data.
528 * Private data is free'd in ecryptfs_put_super()
529 */
530static void ecryptfs_kill_block_super(struct super_block *sb)
531{
532 generic_shutdown_super(sb);
533}
534
535static struct file_system_type ecryptfs_fs_type = {
536 .owner = THIS_MODULE,
537 .name = "ecryptfs",
538 .get_sb = ecryptfs_get_sb,
539 .kill_sb = ecryptfs_kill_block_super,
540 .fs_flags = 0
541};
542
543/**
544 * inode_info_init_once
545 *
546 * Initializes the ecryptfs_inode_info_cache when it is created
547 */
548static void
549inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
550{
551 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
552
553 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
554 SLAB_CTOR_CONSTRUCTOR)
555 inode_init_once(&ei->vfs_inode);
556}
557
558static struct ecryptfs_cache_info {
559 kmem_cache_t **cache;
560 const char *name;
561 size_t size;
562 void (*ctor)(void*, struct kmem_cache *, unsigned long);
563} ecryptfs_cache_infos[] = {
564 {
565 .cache = &ecryptfs_auth_tok_list_item_cache,
566 .name = "ecryptfs_auth_tok_list_item",
567 .size = sizeof(struct ecryptfs_auth_tok_list_item),
568 },
569 {
570 .cache = &ecryptfs_file_info_cache,
571 .name = "ecryptfs_file_cache",
572 .size = sizeof(struct ecryptfs_file_info),
573 },
574 {
575 .cache = &ecryptfs_dentry_info_cache,
576 .name = "ecryptfs_dentry_info_cache",
577 .size = sizeof(struct ecryptfs_dentry_info),
578 },
579 {
580 .cache = &ecryptfs_inode_info_cache,
581 .name = "ecryptfs_inode_cache",
582 .size = sizeof(struct ecryptfs_inode_info),
583 .ctor = inode_info_init_once,
584 },
585 {
586 .cache = &ecryptfs_sb_info_cache,
587 .name = "ecryptfs_sb_cache",
588 .size = sizeof(struct ecryptfs_sb_info),
589 },
590 {
591 .cache = &ecryptfs_header_cache_0,
592 .name = "ecryptfs_headers_0",
593 .size = PAGE_CACHE_SIZE,
594 },
595 {
596 .cache = &ecryptfs_header_cache_1,
597 .name = "ecryptfs_headers_1",
598 .size = PAGE_CACHE_SIZE,
599 },
600 {
601 .cache = &ecryptfs_header_cache_2,
602 .name = "ecryptfs_headers_2",
603 .size = PAGE_CACHE_SIZE,
604 },
605 {
606 .cache = &ecryptfs_lower_page_cache,
607 .name = "ecryptfs_lower_page_cache",
608 .size = PAGE_CACHE_SIZE,
609 },
610};
611
612static void ecryptfs_free_kmem_caches(void)
613{
614 int i;
615
616 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
617 struct ecryptfs_cache_info *info;
618
619 info = &ecryptfs_cache_infos[i];
620 if (*(info->cache))
621 kmem_cache_destroy(*(info->cache));
622 }
623}
624
625/**
626 * ecryptfs_init_kmem_caches
627 *
628 * Returns zero on success; non-zero otherwise
629 */
630static int ecryptfs_init_kmem_caches(void)
631{
632 int i;
633
634 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
635 struct ecryptfs_cache_info *info;
636
637 info = &ecryptfs_cache_infos[i];
638 *(info->cache) = kmem_cache_create(info->name, info->size,
639 0, SLAB_HWCACHE_ALIGN, info->ctor, NULL);
640 if (!*(info->cache)) {
641 ecryptfs_free_kmem_caches();
642 ecryptfs_printk(KERN_WARNING, "%s: "
643 "kmem_cache_create failed\n",
644 info->name);
645 return -ENOMEM;
646 }
647 }
648 return 0;
649}
650
651struct ecryptfs_obj {
652 char *name;
653 struct list_head slot_list;
654 struct kobject kobj;
655};
656
657struct ecryptfs_attribute {
658 struct attribute attr;
659 ssize_t(*show) (struct ecryptfs_obj *, char *);
660 ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t);
661};
662
663static ssize_t
664ecryptfs_attr_store(struct kobject *kobj,
665 struct attribute *attr, const char *buf, size_t len)
666{
667 struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
668 kobj);
669 struct ecryptfs_attribute *attribute =
670 container_of(attr, struct ecryptfs_attribute, attr);
671
672 return (attribute->store ? attribute->store(obj, buf, len) : 0);
673}
674
675static ssize_t
676ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
677{
678 struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
679 kobj);
680 struct ecryptfs_attribute *attribute =
681 container_of(attr, struct ecryptfs_attribute, attr);
682
683 return (attribute->show ? attribute->show(obj, buf) : 0);
684}
685
686static struct sysfs_ops ecryptfs_sysfs_ops = {
687 .show = ecryptfs_attr_show,
688 .store = ecryptfs_attr_store
689};
690
691static struct kobj_type ecryptfs_ktype = {
692 .sysfs_ops = &ecryptfs_sysfs_ops
693};
694
695static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL);
696
697static ssize_t version_show(struct ecryptfs_obj *obj, char *buff)
698{
699 return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK);
700}
701
702static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version);
703
704struct ecryptfs_version_str_map_elem {
705 u32 flag;
706 char *str;
707} ecryptfs_version_str_map[] = {
708 {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"},
709 {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
710 {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
711 {ECRYPTFS_VERSIONING_POLICY, "policy"}
712};
713
714static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
715{
716 int i;
717 int remaining = PAGE_SIZE;
718 int total_written = 0;
719
720 buff[0] = '\0';
721 for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) {
722 int entry_size;
723
724 if (!(ECRYPTFS_VERSIONING_MASK
725 & ecryptfs_version_str_map[i].flag))
726 continue;
727 entry_size = strlen(ecryptfs_version_str_map[i].str);
728 if ((entry_size + 2) > remaining)
729 goto out;
730 memcpy(buff, ecryptfs_version_str_map[i].str, entry_size);
731 buff[entry_size++] = '\n';
732 buff[entry_size] = '\0';
733 buff += entry_size;
734 total_written += entry_size;
735 remaining -= entry_size;
736 }
737out:
738 return total_written;
739}
740
741static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str);
742
743static int do_sysfs_registration(void)
744{
745 int rc;
746
747 if ((rc = subsystem_register(&ecryptfs_subsys))) {
748 printk(KERN_ERR
749 "Unable to register ecryptfs sysfs subsystem\n");
750 goto out;
751 }
752 rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj,
753 &sysfs_attr_version.attr);
754 if (rc) {
755 printk(KERN_ERR
756 "Unable to create ecryptfs version attribute\n");
757 subsystem_unregister(&ecryptfs_subsys);
758 goto out;
759 }
760 rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj,
761 &sysfs_attr_version_str.attr);
762 if (rc) {
763 printk(KERN_ERR
764 "Unable to create ecryptfs version_str attribute\n");
765 sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
766 &sysfs_attr_version.attr);
767 subsystem_unregister(&ecryptfs_subsys);
768 goto out;
769 }
770out:
771 return rc;
772}
773
774static int __init ecryptfs_init(void)
775{
776 int rc;
777
778 if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) {
779 rc = -EINVAL;
780 ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
781 "larger than the host's page size, and so "
782 "eCryptfs cannot run on this system. The "
783 "default eCryptfs extent size is [%d] bytes; "
784 "the page size is [%d] bytes.\n",
785 ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE);
786 goto out;
787 }
788 rc = ecryptfs_init_kmem_caches();
789 if (rc) {
790 printk(KERN_ERR
791 "Failed to allocate one or more kmem_cache objects\n");
792 goto out;
793 }
794 rc = register_filesystem(&ecryptfs_fs_type);
795 if (rc) {
796 printk(KERN_ERR "Failed to register filesystem\n");
797 ecryptfs_free_kmem_caches();
798 goto out;
799 }
800 kset_set_kset_s(&ecryptfs_subsys, fs_subsys);
801 sysfs_attr_version.attr.owner = THIS_MODULE;
802 sysfs_attr_version_str.attr.owner = THIS_MODULE;
803 rc = do_sysfs_registration();
804 if (rc) {
805 printk(KERN_ERR "sysfs registration failed\n");
806 unregister_filesystem(&ecryptfs_fs_type);
807 ecryptfs_free_kmem_caches();
808 goto out;
809 }
810out:
811 return rc;
812}
813
814static void __exit ecryptfs_exit(void)
815{
816 sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
817 &sysfs_attr_version.attr);
818 sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
819 &sysfs_attr_version_str.attr);
820 subsystem_unregister(&ecryptfs_subsys);
821 unregister_filesystem(&ecryptfs_fs_type);
822 ecryptfs_free_kmem_caches();
823}
824
825MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>");
826MODULE_DESCRIPTION("eCryptfs");
827
828MODULE_LICENSE("GPL");
829
830module_init(ecryptfs_init)
831module_exit(ecryptfs_exit)
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
new file mode 100644
index 000000000000..924dd90a4cf5
--- /dev/null
+++ b/fs/ecryptfs/mmap.c
@@ -0,0 +1,788 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 * This is where eCryptfs coordinates the symmetric encryption and
4 * decryption of the file data as it passes between the lower
5 * encrypted file and the upper decrypted file.
6 *
7 * Copyright (C) 1997-2003 Erez Zadok
8 * Copyright (C) 2001-2003 Stony Brook University
9 * Copyright (C) 2004-2006 International Business Machines Corp.
10 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 * 02111-1307, USA.
26 */
27
28#include <linux/pagemap.h>
29#include <linux/writeback.h>
30#include <linux/page-flags.h>
31#include <linux/mount.h>
32#include <linux/file.h>
33#include <linux/crypto.h>
34#include <linux/scatterlist.h>
35#include "ecryptfs_kernel.h"
36
37struct kmem_cache *ecryptfs_lower_page_cache;
38
39/**
40 * ecryptfs_get1page
41 *
42 * Get one page from cache or lower f/s, return error otherwise.
43 *
44 * Returns unlocked and up-to-date page (if ok), with increased
45 * refcnt.
46 */
47static struct page *ecryptfs_get1page(struct file *file, int index)
48{
49 struct page *page;
50 struct dentry *dentry;
51 struct inode *inode;
52 struct address_space *mapping;
53
54 dentry = file->f_dentry;
55 inode = dentry->d_inode;
56 mapping = inode->i_mapping;
57 page = read_cache_page(mapping, index,
58 (filler_t *)mapping->a_ops->readpage,
59 (void *)file);
60 if (IS_ERR(page))
61 goto out;
62 wait_on_page_locked(page);
63out:
64 return page;
65}
66
67static
68int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros);
69
70/**
71 * ecryptfs_fill_zeros
72 * @file: The ecryptfs file
73 * @new_length: The new length of the data in the underlying file;
74 * everything between the prior end of the file and the
75 * new end of the file will be filled with zero's.
76 * new_length must be greater than current length
77 *
78 * Function for handling lseek-ing past the end of the file.
79 *
80 * This function does not support shrinking, only growing a file.
81 *
82 * Returns zero on success; non-zero otherwise.
83 */
84int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
85{
86 int rc = 0;
87 struct dentry *dentry = file->f_dentry;
88 struct inode *inode = dentry->d_inode;
89 pgoff_t old_end_page_index = 0;
90 pgoff_t index = old_end_page_index;
91 int old_end_pos_in_page = -1;
92 pgoff_t new_end_page_index;
93 int new_end_pos_in_page;
94 loff_t cur_length = i_size_read(inode);
95
96 if (cur_length != 0) {
97 index = old_end_page_index =
98 ((cur_length - 1) >> PAGE_CACHE_SHIFT);
99 old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK);
100 }
101 new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
102 new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
103 ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; "
104 "old_end_pos_in_page = [%d]; "
105 "new_end_page_index = [0x%.16x]; "
106 "new_end_pos_in_page = [%d]\n",
107 old_end_page_index, old_end_pos_in_page,
108 new_end_page_index, new_end_pos_in_page);
109 if (old_end_page_index == new_end_page_index) {
110 /* Start and end are in the same page; we just need to
111 * set a portion of the existing page to zero's */
112 rc = write_zeros(file, index, (old_end_pos_in_page + 1),
113 (new_end_pos_in_page - old_end_pos_in_page));
114 if (rc)
115 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
116 "index=[0x%.16x], "
117 "old_end_pos_in_page=[d], "
118 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
119 "=[%d]"
120 ")=[d]) returned [%d]\n", file, index,
121 old_end_pos_in_page,
122 new_end_pos_in_page,
123 (PAGE_CACHE_SIZE - new_end_pos_in_page),
124 rc);
125 goto out;
126 }
127 /* Fill the remainder of the previous last page with zeros */
128 rc = write_zeros(file, index, (old_end_pos_in_page + 1),
129 ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
130 if (rc) {
131 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
132 "index=[0x%.16x], old_end_pos_in_page=[d], "
133 "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
134 "returned [%d]\n", file, index,
135 old_end_pos_in_page,
136 (PAGE_CACHE_SIZE - old_end_pos_in_page), rc);
137 goto out;
138 }
139 index++;
140 while (index < new_end_page_index) {
141 /* Fill all intermediate pages with zeros */
142 rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE);
143 if (rc) {
144 ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
145 "index=[0x%.16x], "
146 "old_end_pos_in_page=[d], "
147 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
148 "=[%d]"
149 ")=[d]) returned [%d]\n", file, index,
150 old_end_pos_in_page,
151 new_end_pos_in_page,
152 (PAGE_CACHE_SIZE - new_end_pos_in_page),
153 rc);
154 goto out;
155 }
156 index++;
157 }
158 /* Fill the portion at the beginning of the last new page with
159 * zero's */
160 rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1));
161 if (rc) {
162 ecryptfs_printk(KERN_ERR, "write_zeros(file="
163 "[%p], index=[0x%.16x], 0, "
164 "new_end_pos_in_page=[%d]"
165 "returned [%d]\n", file, index,
166 new_end_pos_in_page, rc);
167 goto out;
168 }
169out:
170 return rc;
171}
172
173/**
174 * ecryptfs_writepage
175 * @page: Page that is locked before this call is made
176 *
177 * Returns zero on success; non-zero otherwise
178 */
179static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
180{
181 struct ecryptfs_page_crypt_context ctx;
182 int rc;
183
184 ctx.page = page;
185 ctx.mode = ECRYPTFS_WRITEPAGE_MODE;
186 ctx.param.wbc = wbc;
187 rc = ecryptfs_encrypt_page(&ctx);
188 if (rc) {
189 ecryptfs_printk(KERN_WARNING, "Error encrypting "
190 "page (upper index [0x%.16x])\n", page->index);
191 ClearPageUptodate(page);
192 goto out;
193 }
194 SetPageUptodate(page);
195 unlock_page(page);
196out:
197 return rc;
198}
199
200/**
201 * Reads the data from the lower file file at index lower_page_index
202 * and copies that data into page.
203 *
204 * @param page Page to fill
205 * @param lower_page_index Index of the page in the lower file to get
206 */
207int ecryptfs_do_readpage(struct file *file, struct page *page,
208 pgoff_t lower_page_index)
209{
210 int rc;
211 struct dentry *dentry;
212 struct file *lower_file;
213 struct dentry *lower_dentry;
214 struct inode *inode;
215 struct inode *lower_inode;
216 char *page_data;
217 struct page *lower_page = NULL;
218 char *lower_page_data;
219 const struct address_space_operations *lower_a_ops;
220
221 dentry = file->f_dentry;
222 lower_file = ecryptfs_file_to_lower(file);
223 lower_dentry = ecryptfs_dentry_to_lower(dentry);
224 inode = dentry->d_inode;
225 lower_inode = ecryptfs_inode_to_lower(inode);
226 lower_a_ops = lower_inode->i_mapping->a_ops;
227 lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index,
228 (filler_t *)lower_a_ops->readpage,
229 (void *)lower_file);
230 if (IS_ERR(lower_page)) {
231 rc = PTR_ERR(lower_page);
232 lower_page = NULL;
233 ecryptfs_printk(KERN_ERR, "Error reading from page cache\n");
234 goto out;
235 }
236 wait_on_page_locked(lower_page);
237 page_data = (char *)kmap(page);
238 if (!page_data) {
239 rc = -ENOMEM;
240 ecryptfs_printk(KERN_ERR, "Error mapping page\n");
241 goto out;
242 }
243 lower_page_data = (char *)kmap(lower_page);
244 if (!lower_page_data) {
245 rc = -ENOMEM;
246 ecryptfs_printk(KERN_ERR, "Error mapping page\n");
247 kunmap(page);
248 goto out;
249 }
250 memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
251 kunmap(lower_page);
252 kunmap(page);
253 rc = 0;
254out:
255 if (likely(lower_page))
256 page_cache_release(lower_page);
257 if (rc == 0)
258 SetPageUptodate(page);
259 else
260 ClearPageUptodate(page);
261 return rc;
262}
263
264/**
265 * ecryptfs_readpage
266 * @file: This is an ecryptfs file
267 * @page: ecryptfs associated page to stick the read data into
268 *
269 * Read in a page, decrypting if necessary.
270 *
271 * Returns zero on success; non-zero on error.
272 */
273static int ecryptfs_readpage(struct file *file, struct page *page)
274{
275 int rc = 0;
276 struct ecryptfs_crypt_stat *crypt_stat;
277
278 BUG_ON(!(file && file->f_dentry && file->f_dentry->d_inode));
279 crypt_stat =
280 &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
281 if (!crypt_stat
282 || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)
283 || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
284 ecryptfs_printk(KERN_DEBUG,
285 "Passing through unencrypted page\n");
286 rc = ecryptfs_do_readpage(file, page, page->index);
287 if (rc) {
288 ecryptfs_printk(KERN_ERR, "Error reading page; rc = "
289 "[%d]\n", rc);
290 goto out;
291 }
292 } else {
293 rc = ecryptfs_decrypt_page(file, page);
294 if (rc) {
295
296 ecryptfs_printk(KERN_ERR, "Error decrypting page; "
297 "rc = [%d]\n", rc);
298 goto out;
299 }
300 }
301 SetPageUptodate(page);
302out:
303 if (rc)
304 ClearPageUptodate(page);
305 ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
306 page->index);
307 unlock_page(page);
308 return rc;
309}
310
311static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
312{
313 struct inode *inode = page->mapping->host;
314 int end_byte_in_page;
315 int rc = 0;
316 char *page_virt;
317
318 if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) {
319 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
320 if (to > end_byte_in_page)
321 end_byte_in_page = to;
322 page_virt = kmap(page);
323 if (!page_virt) {
324 rc = -ENOMEM;
325 ecryptfs_printk(KERN_WARNING,
326 "Could not map page\n");
327 goto out;
328 }
329 memset((page_virt + end_byte_in_page), 0,
330 (PAGE_CACHE_SIZE - end_byte_in_page));
331 kunmap(page);
332 }
333out:
334 return rc;
335}
336
337static int ecryptfs_prepare_write(struct file *file, struct page *page,
338 unsigned from, unsigned to)
339{
340 int rc = 0;
341
342 kmap(page);
343 if (from == 0 && to == PAGE_CACHE_SIZE)
344 goto out; /* If we are writing a full page, it will be
345 up to date. */
346 if (!PageUptodate(page))
347 rc = ecryptfs_do_readpage(file, page, page->index);
348out:
349 return rc;
350}
351
352int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
353 char **lower_virt,
354 struct inode *lower_inode,
355 unsigned long lower_page_index)
356{
357 int rc = 0;
358
359 (*lower_page) = grab_cache_page(lower_inode->i_mapping,
360 lower_page_index);
361 if (!(*lower_page)) {
362 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
363 "lower_page_index = [0x%.16x] failed\n",
364 lower_page_index);
365 rc = -EINVAL;
366 goto out;
367 }
368 if (lower_virt)
369 (*lower_virt) = kmap((*lower_page));
370 else
371 kmap((*lower_page));
372out:
373 return rc;
374}
375
376int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
377 struct inode *lower_inode,
378 struct writeback_control *wbc)
379{
380 int rc = 0;
381
382 rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
383 if (rc) {
384 ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); "
385 "rc = [%d]\n", rc);
386 goto out;
387 }
388 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
389 page_cache_release(lower_page);
390out:
391 return rc;
392}
393
394static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page)
395{
396 kunmap(lower_page);
397 ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = "
398 "[0x%.16x]\n", lower_page->index);
399 unlock_page(lower_page);
400 page_cache_release(lower_page);
401}
402
403/**
404 * ecryptfs_write_inode_size_to_header
405 *
406 * Writes the lower file size to the first 8 bytes of the header.
407 *
408 * Returns zero on success; non-zero on error.
409 */
410int
411ecryptfs_write_inode_size_to_header(struct file *lower_file,
412 struct inode *lower_inode,
413 struct inode *inode)
414{
415 int rc = 0;
416 struct page *header_page;
417 char *header_virt;
418 const struct address_space_operations *lower_a_ops;
419 u64 file_size;
420
421 rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt,
422 lower_inode, 0);
423 if (rc) {
424 ecryptfs_printk(KERN_ERR, "grab_cache_page for header page "
425 "failed\n");
426 goto out;
427 }
428 lower_a_ops = lower_inode->i_mapping->a_ops;
429 rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8);
430 file_size = (u64)i_size_read(inode);
431 ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
432 file_size = cpu_to_be64(file_size);
433 memcpy(header_virt, &file_size, sizeof(u64));
434 rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8);
435 if (rc < 0)
436 ecryptfs_printk(KERN_ERR, "Error commiting header page "
437 "write\n");
438 ecryptfs_unmap_and_release_lower_page(header_page);
439 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
440 mark_inode_dirty_sync(inode);
441out:
442 return rc;
443}
444
445int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
446 struct file *lower_file,
447 unsigned long lower_page_index, int byte_offset,
448 int region_bytes)
449{
450 int rc = 0;
451
452 rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode,
453 lower_page_index);
454 if (rc) {
455 ecryptfs_printk(KERN_ERR, "Error attempting to grab and map "
456 "lower page with index [0x%.16x]\n",
457 lower_page_index);
458 goto out;
459 }
460 rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file,
461 (*lower_page),
462 byte_offset,
463 region_bytes);
464 if (rc) {
465 ecryptfs_printk(KERN_ERR, "prepare_write for "
466 "lower_page_index = [0x%.16x] failed; rc = "
467 "[%d]\n", lower_page_index, rc);
468 }
469out:
470 if (rc && (*lower_page)) {
471 ecryptfs_unmap_and_release_lower_page(*lower_page);
472 (*lower_page) = NULL;
473 }
474 return rc;
475}
476
477/**
478 * ecryptfs_commit_lower_page
479 *
480 * Returns zero on success; non-zero on error
481 */
482int
483ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
484 struct file *lower_file, int byte_offset,
485 int region_size)
486{
487 int rc = 0;
488
489 rc = lower_inode->i_mapping->a_ops->commit_write(
490 lower_file, lower_page, byte_offset, region_size);
491 if (rc < 0) {
492 ecryptfs_printk(KERN_ERR,
493 "Error committing write; rc = [%d]\n", rc);
494 } else
495 rc = 0;
496 ecryptfs_unmap_and_release_lower_page(lower_page);
497 return rc;
498}
499
500/**
501 * ecryptfs_copy_page_to_lower
502 *
503 * Used for plaintext pass-through; no page index interpolation
504 * required.
505 */
506int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
507 struct file *lower_file)
508{
509 int rc = 0;
510 struct page *lower_page;
511
512 rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file,
513 page->index, 0, PAGE_CACHE_SIZE);
514 if (rc) {
515 ecryptfs_printk(KERN_ERR, "Error attempting to get page "
516 "at index [0x%.16x]\n", page->index);
517 goto out;
518 }
519 /* TODO: aops */
520 memcpy((char *)page_address(lower_page), page_address(page),
521 PAGE_CACHE_SIZE);
522 rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file,
523 0, PAGE_CACHE_SIZE);
524 if (rc)
525 ecryptfs_printk(KERN_ERR, "Error attempting to commit page "
526 "at index [0x%.16x]\n", page->index);
527out:
528 return rc;
529}
530
531static int
532process_new_file(struct ecryptfs_crypt_stat *crypt_stat,
533 struct file *file, struct inode *inode)
534{
535 struct page *header_page;
536 const struct address_space_operations *lower_a_ops;
537 struct inode *lower_inode;
538 struct file *lower_file;
539 char *header_virt;
540 int rc = 0;
541 int current_header_page = 0;
542 int header_pages;
543 int more_header_data_to_be_written = 1;
544
545 lower_inode = ecryptfs_inode_to_lower(inode);
546 lower_file = ecryptfs_file_to_lower(file);
547 lower_a_ops = lower_inode->i_mapping->a_ops;
548 header_pages = ((crypt_stat->header_extent_size
549 * crypt_stat->num_header_extents_at_front)
550 / PAGE_CACHE_SIZE);
551 BUG_ON(header_pages < 1);
552 while (current_header_page < header_pages) {
553 rc = ecryptfs_grab_and_map_lower_page(&header_page,
554 &header_virt,
555 lower_inode,
556 current_header_page);
557 if (rc) {
558 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
559 "header page [%d] failed; rc = [%d]\n",
560 current_header_page, rc);
561 goto out;
562 }
563 rc = lower_a_ops->prepare_write(lower_file, header_page, 0,
564 PAGE_CACHE_SIZE);
565 if (rc) {
566 ecryptfs_printk(KERN_ERR, "Error preparing to write "
567 "header page out; rc = [%d]\n", rc);
568 goto out;
569 }
570 memset(header_virt, 0, PAGE_CACHE_SIZE);
571 if (more_header_data_to_be_written) {
572 rc = ecryptfs_write_headers_virt(header_virt,
573 crypt_stat,
574 file->f_dentry);
575 if (rc) {
576 ecryptfs_printk(KERN_WARNING, "Error "
577 "generating header; rc = "
578 "[%d]\n", rc);
579 rc = -EIO;
580 memset(header_virt, 0, PAGE_CACHE_SIZE);
581 ecryptfs_unmap_and_release_lower_page(
582 header_page);
583 goto out;
584 }
585 if (current_header_page == 0)
586 memset(header_virt, 0, 8);
587 more_header_data_to_be_written = 0;
588 }
589 rc = lower_a_ops->commit_write(lower_file, header_page, 0,
590 PAGE_CACHE_SIZE);
591 ecryptfs_unmap_and_release_lower_page(header_page);
592 if (rc < 0) {
593 ecryptfs_printk(KERN_ERR,
594 "Error commiting header page write; "
595 "rc = [%d]\n", rc);
596 break;
597 }
598 current_header_page++;
599 }
600 if (rc >= 0) {
601 rc = 0;
602 ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = "
603 "[0x%.16x]\n", lower_inode->i_blocks);
604 i_size_write(inode, 0);
605 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
606 mark_inode_dirty_sync(inode);
607 }
608 ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in "
609 "crypt_stat at memory location [%p]\n", crypt_stat);
610 ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
611out:
612 return rc;
613}
614
615/**
616 * ecryptfs_commit_write
617 * @file: The eCryptfs file object
618 * @page: The eCryptfs page
619 * @from: Ignored (we rotate the page IV on each write)
620 * @to: Ignored
621 *
622 * This is where we encrypt the data and pass the encrypted data to
623 * the lower filesystem. In OpenPGP-compatible mode, we operate on
624 * entire underlying packets.
625 */
626static int ecryptfs_commit_write(struct file *file, struct page *page,
627 unsigned from, unsigned to)
628{
629 struct ecryptfs_page_crypt_context ctx;
630 loff_t pos;
631 struct inode *inode;
632 struct inode *lower_inode;
633 struct file *lower_file;
634 struct ecryptfs_crypt_stat *crypt_stat;
635 int rc;
636
637 inode = page->mapping->host;
638 lower_inode = ecryptfs_inode_to_lower(inode);
639 lower_file = ecryptfs_file_to_lower(file);
640 mutex_lock(&lower_inode->i_mutex);
641 crypt_stat =
642 &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
643 if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
644 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
645 "crypt_stat at memory location [%p]\n", crypt_stat);
646 rc = process_new_file(crypt_stat, file, inode);
647 if (rc) {
648 ecryptfs_printk(KERN_ERR, "Error processing new "
649 "file; rc = [%d]\n", rc);
650 goto out;
651 }
652 } else
653 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
654 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
655 "(page w/ index = [0x%.16x], to = [%d])\n", page->index,
656 to);
657 rc = fill_zeros_to_end_of_page(page, to);
658 if (rc) {
659 ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
660 "zeros in page with index = [0x%.16x]\n",
661 page->index);
662 goto out;
663 }
664 ctx.page = page;
665 ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE;
666 ctx.param.lower_file = lower_file;
667 rc = ecryptfs_encrypt_page(&ctx);
668 if (rc) {
669 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
670 "index [0x%.16x])\n", page->index);
671 goto out;
672 }
673 rc = 0;
674 inode->i_blocks = lower_inode->i_blocks;
675 pos = (page->index << PAGE_CACHE_SHIFT) + to;
676 if (pos > i_size_read(inode)) {
677 i_size_write(inode, pos);
678 ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
679 "[0x%.16x]\n", i_size_read(inode));
680 }
681 ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
682 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
683 mark_inode_dirty_sync(inode);
684out:
685 kunmap(page); /* mapped in prior call (prepare_write) */
686 if (rc < 0)
687 ClearPageUptodate(page);
688 else
689 SetPageUptodate(page);
690 mutex_unlock(&lower_inode->i_mutex);
691 return rc;
692}
693
694/**
695 * write_zeros
696 * @file: The ecryptfs file
697 * @index: The index in which we are writing
698 * @start: The position after the last block of data
699 * @num_zeros: The number of zeros to write
700 *
701 * Write a specified number of zero's to a page.
702 *
703 * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
704 */
705static
706int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
707{
708 int rc = 0;
709 struct page *tmp_page;
710
711 tmp_page = ecryptfs_get1page(file, index);
712 if (IS_ERR(tmp_page)) {
713 ecryptfs_printk(KERN_ERR, "Error getting page at index "
714 "[0x%.16x]\n", index);
715 rc = PTR_ERR(tmp_page);
716 goto out;
717 }
718 kmap(tmp_page);
719 rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros);
720 if (rc) {
721 ecryptfs_printk(KERN_ERR, "Error preparing to write zero's "
722 "to remainder of page at index [0x%.16x]\n",
723 index);
724 kunmap(tmp_page);
725 page_cache_release(tmp_page);
726 goto out;
727 }
728 memset(((char *)page_address(tmp_page) + start), 0, num_zeros);
729 rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
730 if (rc < 0) {
731 ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
732 "to remainder of page at index [0x%.16x]\n",
733 index);
734 kunmap(tmp_page);
735 page_cache_release(tmp_page);
736 goto out;
737 }
738 rc = 0;
739 kunmap(tmp_page);
740 page_cache_release(tmp_page);
741out:
742 return rc;
743}
744
745static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
746{
747 int rc = 0;
748 struct inode *inode;
749 struct inode *lower_inode;
750
751 inode = (struct inode *)mapping->host;
752 lower_inode = ecryptfs_inode_to_lower(inode);
753 if (lower_inode->i_mapping->a_ops->bmap)
754 rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping,
755 block);
756 return rc;
757}
758
759static void ecryptfs_sync_page(struct page *page)
760{
761 struct inode *inode;
762 struct inode *lower_inode;
763 struct page *lower_page;
764
765 inode = page->mapping->host;
766 lower_inode = ecryptfs_inode_to_lower(inode);
767 /* NOTE: Recently swapped with grab_cache_page(), since
768 * sync_page() just makes sure that pending I/O gets done. */
769 lower_page = find_lock_page(lower_inode->i_mapping, page->index);
770 if (!lower_page) {
771 ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n");
772 return;
773 }
774 lower_page->mapping->a_ops->sync_page(lower_page);
775 ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
776 lower_page->index);
777 unlock_page(lower_page);
778 page_cache_release(lower_page);
779}
780
781struct address_space_operations ecryptfs_aops = {
782 .writepage = ecryptfs_writepage,
783 .readpage = ecryptfs_readpage,
784 .prepare_write = ecryptfs_prepare_write,
785 .commit_write = ecryptfs_commit_write,
786 .bmap = ecryptfs_bmap,
787 .sync_page = ecryptfs_sync_page,
788};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
new file mode 100644
index 000000000000..c337c0410fb1
--- /dev/null
+++ b/fs/ecryptfs/super.c
@@ -0,0 +1,198 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 1997-2003 Erez Zadok
5 * Copyright (C) 2001-2003 Stony Brook University
6 * Copyright (C) 2004-2006 International Business Machines Corp.
7 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
8 * Michael C. Thompson <mcthomps@us.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 * 02111-1307, USA.
24 */
25
26#include <linux/fs.h>
27#include <linux/mount.h>
28#include <linux/key.h>
29#include <linux/seq_file.h>
30#include <linux/crypto.h>
31#include "ecryptfs_kernel.h"
32
33struct kmem_cache *ecryptfs_inode_info_cache;
34
35/**
36 * ecryptfs_alloc_inode - allocate an ecryptfs inode
37 * @sb: Pointer to the ecryptfs super block
38 *
39 * Called to bring an inode into existence.
40 *
41 * Only handle allocation, setting up structures should be done in
42 * ecryptfs_read_inode. This is because the kernel, between now and
43 * then, will 0 out the private data pointer.
44 *
45 * Returns a pointer to a newly allocated inode, NULL otherwise
46 */
47static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
48{
49 struct ecryptfs_inode_info *ecryptfs_inode;
50 struct inode *inode = NULL;
51
52 ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache,
53 SLAB_KERNEL);
54 if (unlikely(!ecryptfs_inode))
55 goto out;
56 ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat);
57 inode = &ecryptfs_inode->vfs_inode;
58out:
59 return inode;
60}
61
62/**
63 * ecryptfs_destroy_inode
64 * @inode: The ecryptfs inode
65 *
66 * This is used during the final destruction of the inode.
67 * All allocation of memory related to the inode, including allocated
68 * memory in the crypt_stat struct, will be released here.
69 * There should be no chance that this deallocation will be missed.
70 */
71static void ecryptfs_destroy_inode(struct inode *inode)
72{
73 struct ecryptfs_inode_info *inode_info;
74
75 inode_info = ecryptfs_inode_to_private(inode);
76 ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat);
77 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
78}
79
80/**
81 * ecryptfs_init_inode
82 * @inode: The ecryptfs inode
83 *
84 * Set up the ecryptfs inode.
85 */
86void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
87{
88 ecryptfs_set_inode_lower(inode, lower_inode);
89 inode->i_ino = lower_inode->i_ino;
90 inode->i_version++;
91 inode->i_op = &ecryptfs_main_iops;
92 inode->i_fop = &ecryptfs_main_fops;
93 inode->i_mapping->a_ops = &ecryptfs_aops;
94}
95
96/**
97 * ecryptfs_put_super
98 * @sb: Pointer to the ecryptfs super block
99 *
100 * Final actions when unmounting a file system.
101 * This will handle deallocation and release of our private data.
102 */
103static void ecryptfs_put_super(struct super_block *sb)
104{
105 struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
106
107 ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat);
108 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
109 ecryptfs_set_superblock_private(sb, NULL);
110}
111
112/**
113 * ecryptfs_statfs
114 * @sb: The ecryptfs super block
115 * @buf: The struct kstatfs to fill in with stats
116 *
117 * Get the filesystem statistics. Currently, we let this pass right through
118 * to the lower filesystem and take no action ourselves.
119 */
120static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
121{
122 return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf);
123}
124
125/**
126 * ecryptfs_clear_inode
127 * @inode - The ecryptfs inode
128 *
129 * Called by iput() when the inode reference count reached zero
130 * and the inode is not hashed anywhere. Used to clear anything
131 * that needs to be, before the inode is completely destroyed and put
132 * on the inode free list. We use this to drop out reference to the
133 * lower inode.
134 */
135static void ecryptfs_clear_inode(struct inode *inode)
136{
137 iput(ecryptfs_inode_to_lower(inode));
138}
139
140/**
141 * ecryptfs_umount_begin
142 *
143 * Called in do_umount().
144 */
145static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags)
146{
147 struct vfsmount *lower_mnt =
148 ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root);
149 struct super_block *lower_sb;
150
151 mntput(lower_mnt);
152 lower_sb = lower_mnt->mnt_sb;
153 if (lower_sb->s_op->umount_begin)
154 lower_sb->s_op->umount_begin(lower_mnt, flags);
155}
156
157/**
158 * ecryptfs_show_options
159 *
160 * Prints the directory we are currently mounted over.
161 * Returns zero on success; non-zero otherwise
162 */
163static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
164{
165 struct super_block *sb = mnt->mnt_sb;
166 struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root);
167 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root);
168 char *tmp_page;
169 char *path;
170 int rc = 0;
171
172 tmp_page = (char *)__get_free_page(GFP_KERNEL);
173 if (!tmp_page) {
174 rc = -ENOMEM;
175 goto out;
176 }
177 path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE);
178 if (IS_ERR(path)) {
179 rc = PTR_ERR(path);
180 goto out;
181 }
182 seq_printf(m, ",dir=%s", path);
183 free_page((unsigned long)tmp_page);
184out:
185 return rc;
186}
187
188struct super_operations ecryptfs_sops = {
189 .alloc_inode = ecryptfs_alloc_inode,
190 .destroy_inode = ecryptfs_destroy_inode,
191 .drop_inode = generic_delete_inode,
192 .put_super = ecryptfs_put_super,
193 .statfs = ecryptfs_statfs,
194 .remount_fs = NULL,
195 .clear_inode = ecryptfs_clear_inode,
196 .umount_begin = ecryptfs_umount_begin,
197 .show_options = ecryptfs_show_options
198};
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 87e1d03e8267..e8c7765419e8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -144,42 +144,12 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
144 */ 144 */
145 145
146/* 146/*
147 * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
148 * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
149 */
150static void nlmclnt_prepare_reclaim(struct nlm_host *host)
151{
152 down_write(&host->h_rwsem);
153 host->h_monitored = 0;
154 host->h_state++;
155 host->h_nextrebind = 0;
156 nlm_rebind_host(host);
157
158 /*
159 * Mark the locks for reclaiming.
160 */
161 list_splice_init(&host->h_granted, &host->h_reclaim);
162
163 dprintk("NLM: reclaiming locks for host %s\n", host->h_name);
164}
165
166static void nlmclnt_finish_reclaim(struct nlm_host *host)
167{
168 host->h_reclaiming = 0;
169 up_write(&host->h_rwsem);
170 dprintk("NLM: done reclaiming locks for host %s", host->h_name);
171}
172
173/*
174 * Reclaim all locks on server host. We do this by spawning a separate 147 * Reclaim all locks on server host. We do this by spawning a separate
175 * reclaimer thread. 148 * reclaimer thread.
176 */ 149 */
177void 150void
178nlmclnt_recovery(struct nlm_host *host, u32 newstate) 151nlmclnt_recovery(struct nlm_host *host)
179{ 152{
180 if (host->h_nsmstate == newstate)
181 return;
182 host->h_nsmstate = newstate;
183 if (!host->h_reclaiming++) { 153 if (!host->h_reclaiming++) {
184 nlm_get_host(host); 154 nlm_get_host(host);
185 __module_get(THIS_MODULE); 155 __module_get(THIS_MODULE);
@@ -199,18 +169,30 @@ reclaimer(void *ptr)
199 daemonize("%s-reclaim", host->h_name); 169 daemonize("%s-reclaim", host->h_name);
200 allow_signal(SIGKILL); 170 allow_signal(SIGKILL);
201 171
172 down_write(&host->h_rwsem);
173
202 /* This one ensures that our parent doesn't terminate while the 174 /* This one ensures that our parent doesn't terminate while the
203 * reclaim is in progress */ 175 * reclaim is in progress */
204 lock_kernel(); 176 lock_kernel();
205 lockd_up(0); /* note: this cannot fail as lockd is already running */ 177 lockd_up(0); /* note: this cannot fail as lockd is already running */
206 178
207 nlmclnt_prepare_reclaim(host); 179 dprintk("lockd: reclaiming locks for host %s", host->h_name);
208 /* First, reclaim all locks that have been marked. */ 180
209restart: 181restart:
210 nsmstate = host->h_nsmstate; 182 nsmstate = host->h_nsmstate;
183
184 /* Force a portmap getport - the peer's lockd will
185 * most likely end up on a different port.
186 */
187 host->h_nextrebind = jiffies;
188 nlm_rebind_host(host);
189
190 /* First, reclaim all locks that have been granted. */
191 list_splice_init(&host->h_granted, &host->h_reclaim);
211 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { 192 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
212 list_del_init(&fl->fl_u.nfs_fl.list); 193 list_del_init(&fl->fl_u.nfs_fl.list);
213 194
195 /* Why are we leaking memory here? --okir */
214 if (signalled()) 196 if (signalled())
215 continue; 197 continue;
216 if (nlmclnt_reclaim(host, fl) != 0) 198 if (nlmclnt_reclaim(host, fl) != 0)
@@ -218,11 +200,13 @@ restart:
218 list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); 200 list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
219 if (host->h_nsmstate != nsmstate) { 201 if (host->h_nsmstate != nsmstate) {
220 /* Argh! The server rebooted again! */ 202 /* Argh! The server rebooted again! */
221 list_splice_init(&host->h_granted, &host->h_reclaim);
222 goto restart; 203 goto restart;
223 } 204 }
224 } 205 }
225 nlmclnt_finish_reclaim(host); 206
207 host->h_reclaiming = 0;
208 up_write(&host->h_rwsem);
209 dprintk("NLM: done reclaiming locks for host %s", host->h_name);
226 210
227 /* Now, wake up all processes that sleep on a blocked lock */ 211 /* Now, wake up all processes that sleep on a blocked lock */
228 list_for_each_entry(block, &nlm_blocked, b_list) { 212 list_for_each_entry(block, &nlm_blocked, b_list) {
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 0116729cec5f..3d84f600b633 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -36,14 +36,14 @@ static const struct rpc_call_ops nlmclnt_cancel_ops;
36/* 36/*
37 * Cookie counter for NLM requests 37 * Cookie counter for NLM requests
38 */ 38 */
39static u32 nlm_cookie = 0x1234; 39static atomic_t nlm_cookie = ATOMIC_INIT(0x1234);
40 40
41static inline void nlmclnt_next_cookie(struct nlm_cookie *c) 41void nlmclnt_next_cookie(struct nlm_cookie *c)
42{ 42{
43 memcpy(c->data, &nlm_cookie, 4); 43 u32 cookie = atomic_inc_return(&nlm_cookie);
44 memset(c->data+4, 0, 4); 44
45 memcpy(c->data, &cookie, 4);
45 c->len=4; 46 c->len=4;
46 nlm_cookie++;
47} 47}
48 48
49static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) 49static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
@@ -153,6 +153,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
153{ 153{
154 struct rpc_clnt *client = NFS_CLIENT(inode); 154 struct rpc_clnt *client = NFS_CLIENT(inode);
155 struct sockaddr_in addr; 155 struct sockaddr_in addr;
156 struct nfs_server *nfssrv = NFS_SERVER(inode);
156 struct nlm_host *host; 157 struct nlm_host *host;
157 struct nlm_rqst *call; 158 struct nlm_rqst *call;
158 sigset_t oldset; 159 sigset_t oldset;
@@ -166,7 +167,9 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
166 } 167 }
167 168
168 rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); 169 rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
169 host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers); 170 host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers,
171 nfssrv->nfs_client->cl_hostname,
172 strlen(nfssrv->nfs_client->cl_hostname));
170 if (host == NULL) 173 if (host == NULL)
171 return -ENOLCK; 174 return -ENOLCK;
172 175
@@ -499,7 +502,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
499 unsigned char fl_flags = fl->fl_flags; 502 unsigned char fl_flags = fl->fl_flags;
500 int status = -ENOLCK; 503 int status = -ENOLCK;
501 504
502 if (!host->h_monitored && nsm_monitor(host) < 0) { 505 if (nsm_monitor(host) < 0) {
503 printk(KERN_NOTICE "lockd: failed to monitor %s\n", 506 printk(KERN_NOTICE "lockd: failed to monitor %s\n",
504 host->h_name); 507 host->h_name);
505 goto out; 508 goto out;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index a0d0b58ce7a4..fb24a9730345 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -27,46 +27,60 @@
27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) 27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) 28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ)
29 29
30static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; 30static struct hlist_head nlm_hosts[NLM_HOST_NRHASH];
31static unsigned long next_gc; 31static unsigned long next_gc;
32static int nrhosts; 32static int nrhosts;
33static DEFINE_MUTEX(nlm_host_mutex); 33static DEFINE_MUTEX(nlm_host_mutex);
34 34
35 35
36static void nlm_gc_hosts(void); 36static void nlm_gc_hosts(void);
37static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
38 const char *, int, int);
37 39
38/* 40/*
39 * Find an NLM server handle in the cache. If there is none, create it. 41 * Find an NLM server handle in the cache. If there is none, create it.
40 */ 42 */
41struct nlm_host * 43struct nlm_host *
42nlmclnt_lookup_host(struct sockaddr_in *sin, int proto, int version) 44nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
45 const char *hostname, int hostname_len)
43{ 46{
44 return nlm_lookup_host(0, sin, proto, version); 47 return nlm_lookup_host(0, sin, proto, version,
48 hostname, hostname_len);
45} 49}
46 50
47/* 51/*
48 * Find an NLM client handle in the cache. If there is none, create it. 52 * Find an NLM client handle in the cache. If there is none, create it.
49 */ 53 */
50struct nlm_host * 54struct nlm_host *
51nlmsvc_lookup_host(struct svc_rqst *rqstp) 55nlmsvc_lookup_host(struct svc_rqst *rqstp,
56 const char *hostname, int hostname_len)
52{ 57{
53 return nlm_lookup_host(1, &rqstp->rq_addr, 58 return nlm_lookup_host(1, &rqstp->rq_addr,
54 rqstp->rq_prot, rqstp->rq_vers); 59 rqstp->rq_prot, rqstp->rq_vers,
60 hostname, hostname_len);
55} 61}
56 62
57/* 63/*
58 * Common host lookup routine for server & client 64 * Common host lookup routine for server & client
59 */ 65 */
60struct nlm_host * 66struct nlm_host *
61nlm_lookup_host(int server, struct sockaddr_in *sin, 67nlm_lookup_host(int server, const struct sockaddr_in *sin,
62 int proto, int version) 68 int proto, int version,
69 const char *hostname,
70 int hostname_len)
63{ 71{
64 struct nlm_host *host, **hp; 72 struct hlist_head *chain;
65 u32 addr; 73 struct hlist_node *pos;
74 struct nlm_host *host;
75 struct nsm_handle *nsm = NULL;
66 int hash; 76 int hash;
67 77
68 dprintk("lockd: nlm_lookup_host(%08x, p=%d, v=%d)\n", 78 dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
69 (unsigned)(sin? ntohl(sin->sin_addr.s_addr) : 0), proto, version); 79 NIPQUAD(sin->sin_addr.s_addr), proto, version,
80 server? "server" : "client",
81 hostname_len,
82 hostname? hostname : "<none>");
83
70 84
71 hash = NLM_ADDRHASH(sin->sin_addr.s_addr); 85 hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
72 86
@@ -76,7 +90,22 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
76 if (time_after_eq(jiffies, next_gc)) 90 if (time_after_eq(jiffies, next_gc))
77 nlm_gc_hosts(); 91 nlm_gc_hosts();
78 92
79 for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { 93 /* We may keep several nlm_host objects for a peer, because each
94 * nlm_host is identified by
95 * (address, protocol, version, server/client)
96 * We could probably simplify this a little by putting all those
97 * different NLM rpc_clients into one single nlm_host object.
98 * This would allow us to have one nlm_host per address.
99 */
100 chain = &nlm_hosts[hash];
101 hlist_for_each_entry(host, pos, chain, h_hash) {
102 if (!nlm_cmp_addr(&host->h_addr, sin))
103 continue;
104
105 /* See if we have an NSM handle for this client */
106 if (!nsm)
107 nsm = host->h_nsmhandle;
108
80 if (host->h_proto != proto) 109 if (host->h_proto != proto)
81 continue; 110 continue;
82 if (host->h_version != version) 111 if (host->h_version != version)
@@ -84,28 +113,30 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
84 if (host->h_server != server) 113 if (host->h_server != server)
85 continue; 114 continue;
86 115
87 if (nlm_cmp_addr(&host->h_addr, sin)) { 116 /* Move to head of hash chain. */
88 if (hp != nlm_hosts + hash) { 117 hlist_del(&host->h_hash);
89 *hp = host->h_next; 118 hlist_add_head(&host->h_hash, chain);
90 host->h_next = nlm_hosts[hash];
91 nlm_hosts[hash] = host;
92 }
93 nlm_get_host(host);
94 mutex_unlock(&nlm_host_mutex);
95 return host;
96 }
97 }
98 119
99 /* Ooops, no host found, create it */ 120 nlm_get_host(host);
100 dprintk("lockd: creating host entry\n"); 121 goto out;
122 }
123 if (nsm)
124 atomic_inc(&nsm->sm_count);
101 125
102 host = kzalloc(sizeof(*host), GFP_KERNEL); 126 host = NULL;
103 if (!host)
104 goto nohost;
105 127
106 addr = sin->sin_addr.s_addr; 128 /* Sadly, the host isn't in our hash table yet. See if
107 sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr)); 129 * we have an NSM handle for it. If not, create one.
130 */
131 if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len)))
132 goto out;
108 133
134 host = kzalloc(sizeof(*host), GFP_KERNEL);
135 if (!host) {
136 nsm_release(nsm);
137 goto out;
138 }
139 host->h_name = nsm->sm_name;
109 host->h_addr = *sin; 140 host->h_addr = *sin;
110 host->h_addr.sin_port = 0; /* ouch! */ 141 host->h_addr.sin_port = 0; /* ouch! */
111 host->h_version = version; 142 host->h_version = version;
@@ -119,9 +150,9 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
119 init_rwsem(&host->h_rwsem); 150 init_rwsem(&host->h_rwsem);
120 host->h_state = 0; /* pseudo NSM state */ 151 host->h_state = 0; /* pseudo NSM state */
121 host->h_nsmstate = 0; /* real NSM state */ 152 host->h_nsmstate = 0; /* real NSM state */
153 host->h_nsmhandle = nsm;
122 host->h_server = server; 154 host->h_server = server;
123 host->h_next = nlm_hosts[hash]; 155 hlist_add_head(&host->h_hash, chain);
124 nlm_hosts[hash] = host;
125 INIT_LIST_HEAD(&host->h_lockowners); 156 INIT_LIST_HEAD(&host->h_lockowners);
126 spin_lock_init(&host->h_lock); 157 spin_lock_init(&host->h_lock);
127 INIT_LIST_HEAD(&host->h_granted); 158 INIT_LIST_HEAD(&host->h_granted);
@@ -130,35 +161,39 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
130 if (++nrhosts > NLM_HOST_MAX) 161 if (++nrhosts > NLM_HOST_MAX)
131 next_gc = 0; 162 next_gc = 0;
132 163
133nohost: 164out:
134 mutex_unlock(&nlm_host_mutex); 165 mutex_unlock(&nlm_host_mutex);
135 return host; 166 return host;
136} 167}
137 168
138struct nlm_host * 169/*
139nlm_find_client(void) 170 * Destroy a host
171 */
172static void
173nlm_destroy_host(struct nlm_host *host)
140{ 174{
141 /* find a nlm_host for a client for which h_killed == 0. 175 struct rpc_clnt *clnt;
142 * and return it 176
177 BUG_ON(!list_empty(&host->h_lockowners));
178 BUG_ON(atomic_read(&host->h_count));
179
180 /*
181 * Release NSM handle and unmonitor host.
143 */ 182 */
144 int hash; 183 nsm_unmonitor(host);
145 mutex_lock(&nlm_host_mutex); 184
146 for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) { 185 if ((clnt = host->h_rpcclnt) != NULL) {
147 struct nlm_host *host, **hp; 186 if (atomic_read(&clnt->cl_users)) {
148 for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { 187 printk(KERN_WARNING
149 if (host->h_server && 188 "lockd: active RPC handle\n");
150 host->h_killed == 0) { 189 clnt->cl_dead = 1;
151 nlm_get_host(host); 190 } else {
152 mutex_unlock(&nlm_host_mutex); 191 rpc_destroy_client(host->h_rpcclnt);
153 return host;
154 }
155 } 192 }
156 } 193 }
157 mutex_unlock(&nlm_host_mutex); 194 kfree(host);
158 return NULL;
159} 195}
160 196
161
162/* 197/*
163 * Create the NLM RPC client for an NLM peer 198 * Create the NLM RPC client for an NLM peer
164 */ 199 */
@@ -260,22 +295,82 @@ void nlm_release_host(struct nlm_host *host)
260} 295}
261 296
262/* 297/*
298 * We were notified that the host indicated by address &sin
299 * has rebooted.
300 * Release all resources held by that peer.
301 */
302void nlm_host_rebooted(const struct sockaddr_in *sin,
303 const char *hostname, int hostname_len,
304 u32 new_state)
305{
306 struct hlist_head *chain;
307 struct hlist_node *pos;
308 struct nsm_handle *nsm;
309 struct nlm_host *host;
310
311 dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n",
312 hostname, NIPQUAD(sin->sin_addr));
313
314 /* Find the NSM handle for this peer */
315 if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0)))
316 return;
317
318 /* When reclaiming locks on this peer, make sure that
319 * we set up a new notification */
320 nsm->sm_monitored = 0;
321
322 /* Mark all hosts tied to this NSM state as having rebooted.
323 * We run the loop repeatedly, because we drop the host table
324 * lock for this.
325 * To avoid processing a host several times, we match the nsmstate.
326 */
327again: mutex_lock(&nlm_host_mutex);
328 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
329 hlist_for_each_entry(host, pos, chain, h_hash) {
330 if (host->h_nsmhandle == nsm
331 && host->h_nsmstate != new_state) {
332 host->h_nsmstate = new_state;
333 host->h_state++;
334
335 nlm_get_host(host);
336 mutex_unlock(&nlm_host_mutex);
337
338 if (host->h_server) {
339 /* We're server for this guy, just ditch
340 * all the locks he held. */
341 nlmsvc_free_host_resources(host);
342 } else {
343 /* He's the server, initiate lock recovery. */
344 nlmclnt_recovery(host);
345 }
346
347 nlm_release_host(host);
348 goto again;
349 }
350 }
351 }
352
353 mutex_unlock(&nlm_host_mutex);
354}
355
356/*
263 * Shut down the hosts module. 357 * Shut down the hosts module.
264 * Note that this routine is called only at server shutdown time. 358 * Note that this routine is called only at server shutdown time.
265 */ 359 */
266void 360void
267nlm_shutdown_hosts(void) 361nlm_shutdown_hosts(void)
268{ 362{
363 struct hlist_head *chain;
364 struct hlist_node *pos;
269 struct nlm_host *host; 365 struct nlm_host *host;
270 int i;
271 366
272 dprintk("lockd: shutting down host module\n"); 367 dprintk("lockd: shutting down host module\n");
273 mutex_lock(&nlm_host_mutex); 368 mutex_lock(&nlm_host_mutex);
274 369
275 /* First, make all hosts eligible for gc */ 370 /* First, make all hosts eligible for gc */
276 dprintk("lockd: nuking all hosts...\n"); 371 dprintk("lockd: nuking all hosts...\n");
277 for (i = 0; i < NLM_HOST_NRHASH; i++) { 372 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
278 for (host = nlm_hosts[i]; host; host = host->h_next) 373 hlist_for_each_entry(host, pos, chain, h_hash)
279 host->h_expires = jiffies - 1; 374 host->h_expires = jiffies - 1;
280 } 375 }
281 376
@@ -287,8 +382,8 @@ nlm_shutdown_hosts(void)
287 if (nrhosts) { 382 if (nrhosts) {
288 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); 383 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
289 dprintk("lockd: %d hosts left:\n", nrhosts); 384 dprintk("lockd: %d hosts left:\n", nrhosts);
290 for (i = 0; i < NLM_HOST_NRHASH; i++) { 385 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
291 for (host = nlm_hosts[i]; host; host = host->h_next) { 386 hlist_for_each_entry(host, pos, chain, h_hash) {
292 dprintk(" %s (cnt %d use %d exp %ld)\n", 387 dprintk(" %s (cnt %d use %d exp %ld)\n",
293 host->h_name, atomic_read(&host->h_count), 388 host->h_name, atomic_read(&host->h_count),
294 host->h_inuse, host->h_expires); 389 host->h_inuse, host->h_expires);
@@ -305,45 +400,32 @@ nlm_shutdown_hosts(void)
305static void 400static void
306nlm_gc_hosts(void) 401nlm_gc_hosts(void)
307{ 402{
308 struct nlm_host **q, *host; 403 struct hlist_head *chain;
309 struct rpc_clnt *clnt; 404 struct hlist_node *pos, *next;
310 int i; 405 struct nlm_host *host;
311 406
312 dprintk("lockd: host garbage collection\n"); 407 dprintk("lockd: host garbage collection\n");
313 for (i = 0; i < NLM_HOST_NRHASH; i++) { 408 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
314 for (host = nlm_hosts[i]; host; host = host->h_next) 409 hlist_for_each_entry(host, pos, chain, h_hash)
315 host->h_inuse = 0; 410 host->h_inuse = 0;
316 } 411 }
317 412
318 /* Mark all hosts that hold locks, blocks or shares */ 413 /* Mark all hosts that hold locks, blocks or shares */
319 nlmsvc_mark_resources(); 414 nlmsvc_mark_resources();
320 415
321 for (i = 0; i < NLM_HOST_NRHASH; i++) { 416 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
322 q = &nlm_hosts[i]; 417 hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
323 while ((host = *q) != NULL) {
324 if (atomic_read(&host->h_count) || host->h_inuse 418 if (atomic_read(&host->h_count) || host->h_inuse
325 || time_before(jiffies, host->h_expires)) { 419 || time_before(jiffies, host->h_expires)) {
326 dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", 420 dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
327 host->h_name, atomic_read(&host->h_count), 421 host->h_name, atomic_read(&host->h_count),
328 host->h_inuse, host->h_expires); 422 host->h_inuse, host->h_expires);
329 q = &host->h_next;
330 continue; 423 continue;
331 } 424 }
332 dprintk("lockd: delete host %s\n", host->h_name); 425 dprintk("lockd: delete host %s\n", host->h_name);
333 *q = host->h_next; 426 hlist_del_init(&host->h_hash);
334 /* Don't unmonitor hosts that have been invalidated */ 427
335 if (host->h_monitored && !host->h_killed) 428 nlm_destroy_host(host);
336 nsm_unmonitor(host);
337 if ((clnt = host->h_rpcclnt) != NULL) {
338 if (atomic_read(&clnt->cl_users)) {
339 printk(KERN_WARNING
340 "lockd: active RPC handle\n");
341 clnt->cl_dead = 1;
342 } else {
343 rpc_destroy_client(host->h_rpcclnt);
344 }
345 }
346 kfree(host);
347 nrhosts--; 429 nrhosts--;
348 } 430 }
349 } 431 }
@@ -351,3 +433,88 @@ nlm_gc_hosts(void)
351 next_gc = jiffies + NLM_HOST_COLLECT; 433 next_gc = jiffies + NLM_HOST_COLLECT;
352} 434}
353 435
436
437/*
438 * Manage NSM handles
439 */
440static LIST_HEAD(nsm_handles);
441static DEFINE_MUTEX(nsm_mutex);
442
443static struct nsm_handle *
444__nsm_find(const struct sockaddr_in *sin,
445 const char *hostname, int hostname_len,
446 int create)
447{
448 struct nsm_handle *nsm = NULL;
449 struct list_head *pos;
450
451 if (!sin)
452 return NULL;
453
454 if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
455 if (printk_ratelimit()) {
456 printk(KERN_WARNING "Invalid hostname \"%.*s\" "
457 "in NFS lock request\n",
458 hostname_len, hostname);
459 }
460 return NULL;
461 }
462
463 mutex_lock(&nsm_mutex);
464 list_for_each(pos, &nsm_handles) {
465 nsm = list_entry(pos, struct nsm_handle, sm_link);
466
467 if (hostname && nsm_use_hostnames) {
468 if (strlen(nsm->sm_name) != hostname_len
469 || memcmp(nsm->sm_name, hostname, hostname_len))
470 continue;
471 } else if (!nlm_cmp_addr(&nsm->sm_addr, sin))
472 continue;
473 atomic_inc(&nsm->sm_count);
474 goto out;
475 }
476
477 if (!create) {
478 nsm = NULL;
479 goto out;
480 }
481
482 nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
483 if (nsm != NULL) {
484 nsm->sm_addr = *sin;
485 nsm->sm_name = (char *) (nsm + 1);
486 memcpy(nsm->sm_name, hostname, hostname_len);
487 nsm->sm_name[hostname_len] = '\0';
488 atomic_set(&nsm->sm_count, 1);
489
490 list_add(&nsm->sm_link, &nsm_handles);
491 }
492
493out:
494 mutex_unlock(&nsm_mutex);
495 return nsm;
496}
497
498struct nsm_handle *
499nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
500{
501 return __nsm_find(sin, hostname, hostname_len, 1);
502}
503
504/*
505 * Release an NSM handle
506 */
507void
508nsm_release(struct nsm_handle *nsm)
509{
510 if (!nsm)
511 return;
512 if (atomic_dec_and_test(&nsm->sm_count)) {
513 mutex_lock(&nsm_mutex);
514 if (atomic_read(&nsm->sm_count) == 0) {
515 list_del(&nsm->sm_link);
516 kfree(nsm);
517 }
518 mutex_unlock(&nsm_mutex);
519 }
520}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a816b920d431..e0179f8c327f 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -24,13 +24,13 @@ static struct rpc_program nsm_program;
24/* 24/*
25 * Local NSM state 25 * Local NSM state
26 */ 26 */
27u32 nsm_local_state; 27int nsm_local_state;
28 28
29/* 29/*
30 * Common procedure for SM_MON/SM_UNMON calls 30 * Common procedure for SM_MON/SM_UNMON calls
31 */ 31 */
32static int 32static int
33nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) 33nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
34{ 34{
35 struct rpc_clnt *clnt; 35 struct rpc_clnt *clnt;
36 int status; 36 int status;
@@ -46,10 +46,11 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
46 goto out; 46 goto out;
47 } 47 }
48 48
49 args.addr = host->h_addr.sin_addr.s_addr; 49 memset(&args, 0, sizeof(args));
50 args.proto= (host->h_proto<<1) | host->h_server; 50 args.mon_name = nsm->sm_name;
51 args.addr = nsm->sm_addr.sin_addr.s_addr;
51 args.prog = NLM_PROGRAM; 52 args.prog = NLM_PROGRAM;
52 args.vers = host->h_version; 53 args.vers = 3;
53 args.proc = NLMPROC_NSM_NOTIFY; 54 args.proc = NLMPROC_NSM_NOTIFY;
54 memset(res, 0, sizeof(*res)); 55 memset(res, 0, sizeof(*res));
55 56
@@ -70,17 +71,22 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
70int 71int
71nsm_monitor(struct nlm_host *host) 72nsm_monitor(struct nlm_host *host)
72{ 73{
74 struct nsm_handle *nsm = host->h_nsmhandle;
73 struct nsm_res res; 75 struct nsm_res res;
74 int status; 76 int status;
75 77
76 dprintk("lockd: nsm_monitor(%s)\n", host->h_name); 78 dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
79 BUG_ON(nsm == NULL);
77 80
78 status = nsm_mon_unmon(host, SM_MON, &res); 81 if (nsm->sm_monitored)
82 return 0;
83
84 status = nsm_mon_unmon(nsm, SM_MON, &res);
79 85
80 if (status < 0 || res.status != 0) 86 if (status < 0 || res.status != 0)
81 printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name); 87 printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
82 else 88 else
83 host->h_monitored = 1; 89 nsm->sm_monitored = 1;
84 return status; 90 return status;
85} 91}
86 92
@@ -90,16 +96,26 @@ nsm_monitor(struct nlm_host *host)
90int 96int
91nsm_unmonitor(struct nlm_host *host) 97nsm_unmonitor(struct nlm_host *host)
92{ 98{
99 struct nsm_handle *nsm = host->h_nsmhandle;
93 struct nsm_res res; 100 struct nsm_res res;
94 int status; 101 int status = 0;
95 102
96 dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name); 103 if (nsm == NULL)
97 104 return 0;
98 status = nsm_mon_unmon(host, SM_UNMON, &res); 105 host->h_nsmhandle = NULL;
99 if (status < 0) 106
100 printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name); 107 if (atomic_read(&nsm->sm_count) == 1
101 else 108 && nsm->sm_monitored && !nsm->sm_sticky) {
102 host->h_monitored = 0; 109 dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
110
111 status = nsm_mon_unmon(nsm, SM_UNMON, &res);
112 if (status < 0)
113 printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
114 host->h_name);
115 else
116 nsm->sm_monitored = 0;
117 }
118 nsm_release(nsm);
103 return status; 119 return status;
104} 120}
105 121
@@ -135,7 +151,7 @@ nsm_create(void)
135static u32 * 151static u32 *
136xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) 152xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
137{ 153{
138 char buffer[20]; 154 char buffer[20], *name;
139 155
140 /* 156 /*
141 * Use the dotted-quad IP address of the remote host as 157 * Use the dotted-quad IP address of the remote host as
@@ -143,8 +159,13 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
143 * hostname first for whatever remote hostname it receives, 159 * hostname first for whatever remote hostname it receives,
144 * so this works alright. 160 * so this works alright.
145 */ 161 */
146 sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); 162 if (nsm_use_hostnames) {
147 if (!(p = xdr_encode_string(p, buffer)) 163 name = argp->mon_name;
164 } else {
165 sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
166 name = buffer;
167 }
168 if (!(p = xdr_encode_string(p, name))
148 || !(p = xdr_encode_string(p, utsname()->nodename))) 169 || !(p = xdr_encode_string(p, utsname()->nodename)))
149 return ERR_PTR(-EIO); 170 return ERR_PTR(-EIO);
150 *p++ = htonl(argp->prog); 171 *p++ = htonl(argp->prog);
@@ -160,9 +181,11 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
160 p = xdr_encode_common(rqstp, p, argp); 181 p = xdr_encode_common(rqstp, p, argp);
161 if (IS_ERR(p)) 182 if (IS_ERR(p))
162 return PTR_ERR(p); 183 return PTR_ERR(p);
184
185 /* Surprise - there may even be room for an IPv6 address now */
163 *p++ = argp->addr; 186 *p++ = argp->addr;
164 *p++ = argp->vers; 187 *p++ = 0;
165 *p++ = argp->proto; 188 *p++ = 0;
166 *p++ = 0; 189 *p++ = 0;
167 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); 190 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
168 return 0; 191 return 0;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 3cc369e5693f..634139232aaf 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -33,6 +33,7 @@
33#include <linux/sunrpc/svcsock.h> 33#include <linux/sunrpc/svcsock.h>
34#include <net/ip.h> 34#include <net/ip.h>
35#include <linux/lockd/lockd.h> 35#include <linux/lockd/lockd.h>
36#include <linux/lockd/sm_inter.h>
36#include <linux/nfs.h> 37#include <linux/nfs.h>
37 38
38#define NLMDBG_FACILITY NLMDBG_SVC 39#define NLMDBG_FACILITY NLMDBG_SVC
@@ -61,6 +62,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
61static unsigned long nlm_grace_period; 62static unsigned long nlm_grace_period;
62static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; 63static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO;
63static int nlm_udpport, nlm_tcpport; 64static int nlm_udpport, nlm_tcpport;
65int nsm_use_hostnames = 0;
64 66
65/* 67/*
66 * Constants needed for the sysctl interface. 68 * Constants needed for the sysctl interface.
@@ -395,6 +397,22 @@ static ctl_table nlm_sysctls[] = {
395 .extra1 = (int *) &nlm_port_min, 397 .extra1 = (int *) &nlm_port_min,
396 .extra2 = (int *) &nlm_port_max, 398 .extra2 = (int *) &nlm_port_max,
397 }, 399 },
400 {
401 .ctl_name = CTL_UNNUMBERED,
402 .procname = "nsm_use_hostnames",
403 .data = &nsm_use_hostnames,
404 .maxlen = sizeof(int),
405 .mode = 0644,
406 .proc_handler = &proc_dointvec,
407 },
408 {
409 .ctl_name = CTL_UNNUMBERED,
410 .procname = "nsm_local_state",
411 .data = &nsm_local_state,
412 .maxlen = sizeof(int),
413 .mode = 0644,
414 .proc_handler = &proc_dointvec,
415 },
398 { .ctl_name = 0 } 416 { .ctl_name = 0 }
399}; 417};
400 418
@@ -483,6 +501,7 @@ module_param_call(nlm_udpport, param_set_port, param_get_int,
483 &nlm_udpport, 0644); 501 &nlm_udpport, 0644);
484module_param_call(nlm_tcpport, param_set_port, param_get_int, 502module_param_call(nlm_tcpport, param_set_port, param_get_int,
485 &nlm_tcpport, 0644); 503 &nlm_tcpport, 0644);
504module_param(nsm_use_hostnames, bool, 0644);
486 505
487/* 506/*
488 * Initialising and terminating the module. 507 * Initialising and terminating the module.
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a2dd9ccb9b32..fa370f6eb07b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -38,8 +38,8 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
38 return nlm_lck_denied_nolocks; 38 return nlm_lck_denied_nolocks;
39 39
40 /* Obtain host handle */ 40 /* Obtain host handle */
41 if (!(host = nlmsvc_lookup_host(rqstp)) 41 if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
42 || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) 42 || (argp->monitor && nsm_monitor(host) < 0))
43 goto no_locks; 43 goto no_locks;
44 *hostp = host; 44 *hostp = host;
45 45
@@ -260,7 +260,9 @@ static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *a
260 struct nlm_rqst *call; 260 struct nlm_rqst *call;
261 int stat; 261 int stat;
262 262
263 host = nlmsvc_lookup_host(rqstp); 263 host = nlmsvc_lookup_host(rqstp,
264 argp->lock.caller,
265 argp->lock.len);
264 if (host == NULL) 266 if (host == NULL)
265 return rpc_system_err; 267 return rpc_system_err;
266 268
@@ -420,10 +422,6 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
420 void *resp) 422 void *resp)
421{ 423{
422 struct sockaddr_in saddr = rqstp->rq_addr; 424 struct sockaddr_in saddr = rqstp->rq_addr;
423 int vers = argp->vers;
424 int prot = argp->proto >> 1;
425
426 struct nlm_host *host;
427 425
428 dprintk("lockd: SM_NOTIFY called\n"); 426 dprintk("lockd: SM_NOTIFY called\n");
429 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 427 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
@@ -438,21 +436,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
438 /* Obtain the host pointer for this NFS server and try to 436 /* Obtain the host pointer for this NFS server and try to
439 * reclaim all locks we hold on this server. 437 * reclaim all locks we hold on this server.
440 */ 438 */
439 memset(&saddr, 0, sizeof(saddr));
441 saddr.sin_addr.s_addr = argp->addr; 440 saddr.sin_addr.s_addr = argp->addr;
441 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
442 442
443 if ((argp->proto & 1)==0) {
444 if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) {
445 nlmclnt_recovery(host, argp->state);
446 nlm_release_host(host);
447 }
448 } else {
449 /* If we run on an NFS server, delete all locks held by the client */
450
451 if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) {
452 nlmsvc_free_host_resources(host);
453 nlm_release_host(host);
454 }
455 }
456 return rpc_success; 443 return rpc_success;
457} 444}
458 445
@@ -468,7 +455,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
468 455
469 dprintk("lockd: GRANTED_RES called\n"); 456 dprintk("lockd: GRANTED_RES called\n");
470 457
471 nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); 458 nlmsvc_grant_reply(&argp->cookie, argp->status);
472 return rpc_success; 459 return rpc_success;
473} 460}
474 461
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 93c00ee7189d..814c6064c9e0 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -40,7 +40,7 @@
40 40
41static void nlmsvc_release_block(struct nlm_block *block); 41static void nlmsvc_release_block(struct nlm_block *block);
42static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); 42static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
43static int nlmsvc_remove_block(struct nlm_block *block); 43static void nlmsvc_remove_block(struct nlm_block *block);
44 44
45static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); 45static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
46static void nlmsvc_freegrantargs(struct nlm_rqst *call); 46static void nlmsvc_freegrantargs(struct nlm_rqst *call);
@@ -49,7 +49,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
49/* 49/*
50 * The list of blocked locks to retry 50 * The list of blocked locks to retry
51 */ 51 */
52static struct nlm_block * nlm_blocked; 52static LIST_HEAD(nlm_blocked);
53 53
54/* 54/*
55 * Insert a blocked lock into the global list 55 * Insert a blocked lock into the global list
@@ -57,48 +57,44 @@ static struct nlm_block * nlm_blocked;
57static void 57static void
58nlmsvc_insert_block(struct nlm_block *block, unsigned long when) 58nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
59{ 59{
60 struct nlm_block **bp, *b; 60 struct nlm_block *b;
61 struct list_head *pos;
61 62
62 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); 63 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
63 kref_get(&block->b_count); 64 if (list_empty(&block->b_list)) {
64 if (block->b_queued) 65 kref_get(&block->b_count);
65 nlmsvc_remove_block(block); 66 } else {
66 bp = &nlm_blocked; 67 list_del_init(&block->b_list);
68 }
69
70 pos = &nlm_blocked;
67 if (when != NLM_NEVER) { 71 if (when != NLM_NEVER) {
68 if ((when += jiffies) == NLM_NEVER) 72 if ((when += jiffies) == NLM_NEVER)
69 when ++; 73 when ++;
70 while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER) 74 list_for_each(pos, &nlm_blocked) {
71 bp = &b->b_next; 75 b = list_entry(pos, struct nlm_block, b_list);
72 } else 76 if (time_after(b->b_when,when) || b->b_when == NLM_NEVER)
73 while ((b = *bp) != 0) 77 break;
74 bp = &b->b_next; 78 }
79 /* On normal exit from the loop, pos == &nlm_blocked,
80 * so we will be adding to the end of the list - good
81 */
82 }
75 83
76 block->b_queued = 1; 84 list_add_tail(&block->b_list, pos);
77 block->b_when = when; 85 block->b_when = when;
78 block->b_next = b;
79 *bp = block;
80} 86}
81 87
82/* 88/*
83 * Remove a block from the global list 89 * Remove a block from the global list
84 */ 90 */
85static int 91static inline void
86nlmsvc_remove_block(struct nlm_block *block) 92nlmsvc_remove_block(struct nlm_block *block)
87{ 93{
88 struct nlm_block **bp, *b; 94 if (!list_empty(&block->b_list)) {
89 95 list_del_init(&block->b_list);
90 if (!block->b_queued) 96 nlmsvc_release_block(block);
91 return 1;
92 for (bp = &nlm_blocked; (b = *bp) != 0; bp = &b->b_next) {
93 if (b == block) {
94 *bp = block->b_next;
95 block->b_queued = 0;
96 nlmsvc_release_block(block);
97 return 1;
98 }
99 } 97 }
100
101 return 0;
102} 98}
103 99
104/* 100/*
@@ -107,14 +103,14 @@ nlmsvc_remove_block(struct nlm_block *block)
107static struct nlm_block * 103static struct nlm_block *
108nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock) 104nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
109{ 105{
110 struct nlm_block **head, *block; 106 struct nlm_block *block;
111 struct file_lock *fl; 107 struct file_lock *fl;
112 108
113 dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n", 109 dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
114 file, lock->fl.fl_pid, 110 file, lock->fl.fl_pid,
115 (long long)lock->fl.fl_start, 111 (long long)lock->fl.fl_start,
116 (long long)lock->fl.fl_end, lock->fl.fl_type); 112 (long long)lock->fl.fl_end, lock->fl.fl_type);
117 for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) { 113 list_for_each_entry(block, &nlm_blocked, b_list) {
118 fl = &block->b_call->a_args.lock.fl; 114 fl = &block->b_call->a_args.lock.fl;
119 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n", 115 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
120 block->b_file, fl->fl_pid, 116 block->b_file, fl->fl_pid,
@@ -143,20 +139,20 @@ static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
143 * Find a block with a given NLM cookie. 139 * Find a block with a given NLM cookie.
144 */ 140 */
145static inline struct nlm_block * 141static inline struct nlm_block *
146nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) 142nlmsvc_find_block(struct nlm_cookie *cookie)
147{ 143{
148 struct nlm_block *block; 144 struct nlm_block *block;
149 145
150 for (block = nlm_blocked; block; block = block->b_next) { 146 list_for_each_entry(block, &nlm_blocked, b_list) {
151 dprintk("cookie: head of blocked queue %p, block %p\n", 147 if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
152 nlm_blocked, block); 148 goto found;
153 if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
154 && nlm_cmp_addr(sin, &block->b_host->h_addr))
155 break;
156 } 149 }
157 150
158 if (block != NULL) 151 return NULL;
159 kref_get(&block->b_count); 152
153found:
154 dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
155 kref_get(&block->b_count);
160 return block; 156 return block;
161} 157}
162 158
@@ -169,6 +165,11 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
169 * request, but (as I found out later) that's because some implementations 165 * request, but (as I found out later) that's because some implementations
170 * do just this. Never mind the standards comittees, they support our 166 * do just this. Never mind the standards comittees, they support our
171 * logging industries. 167 * logging industries.
168 *
169 * 10 years later: I hope we can safely ignore these old and broken
170 * clients by now. Let's fix this so we can uniquely identify an incoming
171 * GRANTED_RES message by cookie, without having to rely on the client's IP
172 * address. --okir
172 */ 173 */
173static inline struct nlm_block * 174static inline struct nlm_block *
174nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, 175nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
@@ -179,7 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
179 struct nlm_rqst *call = NULL; 180 struct nlm_rqst *call = NULL;
180 181
181 /* Create host handle for callback */ 182 /* Create host handle for callback */
182 host = nlmsvc_lookup_host(rqstp); 183 host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
183 if (host == NULL) 184 if (host == NULL)
184 return NULL; 185 return NULL;
185 186
@@ -192,6 +193,8 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
192 if (block == NULL) 193 if (block == NULL)
193 goto failed; 194 goto failed;
194 kref_init(&block->b_count); 195 kref_init(&block->b_count);
196 INIT_LIST_HEAD(&block->b_list);
197 INIT_LIST_HEAD(&block->b_flist);
195 198
196 if (!nlmsvc_setgrantargs(call, lock)) 199 if (!nlmsvc_setgrantargs(call, lock))
197 goto failed_free; 200 goto failed_free;
@@ -199,7 +202,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
199 /* Set notifier function for VFS, and init args */ 202 /* Set notifier function for VFS, and init args */
200 call->a_args.lock.fl.fl_flags |= FL_SLEEP; 203 call->a_args.lock.fl.fl_flags |= FL_SLEEP;
201 call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; 204 call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
202 call->a_args.cookie = *cookie; /* see above */ 205 nlmclnt_next_cookie(&call->a_args.cookie);
203 206
204 dprintk("lockd: created block %p...\n", block); 207 dprintk("lockd: created block %p...\n", block);
205 208
@@ -210,8 +213,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
210 file->f_count++; 213 file->f_count++;
211 214
212 /* Add to file's list of blocks */ 215 /* Add to file's list of blocks */
213 block->b_fnext = file->f_blocks; 216 list_add(&block->b_flist, &file->f_blocks);
214 file->f_blocks = block;
215 217
216 /* Set up RPC arguments for callback */ 218 /* Set up RPC arguments for callback */
217 block->b_call = call; 219 block->b_call = call;
@@ -248,19 +250,13 @@ static void nlmsvc_free_block(struct kref *kref)
248{ 250{
249 struct nlm_block *block = container_of(kref, struct nlm_block, b_count); 251 struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
250 struct nlm_file *file = block->b_file; 252 struct nlm_file *file = block->b_file;
251 struct nlm_block **bp;
252 253
253 dprintk("lockd: freeing block %p...\n", block); 254 dprintk("lockd: freeing block %p...\n", block);
254 255
255 down(&file->f_sema);
256 /* Remove block from file's list of blocks */ 256 /* Remove block from file's list of blocks */
257 for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) { 257 mutex_lock(&file->f_mutex);
258 if (*bp == block) { 258 list_del_init(&block->b_flist);
259 *bp = block->b_fnext; 259 mutex_unlock(&file->f_mutex);
260 break;
261 }
262 }
263 up(&file->f_sema);
264 260
265 nlmsvc_freegrantargs(block->b_call); 261 nlmsvc_freegrantargs(block->b_call);
266 nlm_release_call(block->b_call); 262 nlm_release_call(block->b_call);
@@ -274,47 +270,32 @@ static void nlmsvc_release_block(struct nlm_block *block)
274 kref_put(&block->b_count, nlmsvc_free_block); 270 kref_put(&block->b_count, nlmsvc_free_block);
275} 271}
276 272
277static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file) 273/*
278{ 274 * Loop over all blocks and delete blocks held by
279 struct nlm_block *block; 275 * a matching host.
280 276 */
281 down(&file->f_sema); 277void nlmsvc_traverse_blocks(struct nlm_host *host,
282 for (block = file->f_blocks; block != NULL; block = block->b_fnext) 278 struct nlm_file *file,
283 block->b_host->h_inuse = 1; 279 nlm_host_match_fn_t match)
284 up(&file->f_sema);
285}
286
287static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
288{ 280{
289 struct nlm_block *block; 281 struct nlm_block *block, *next;
290 282
291restart: 283restart:
292 down(&file->f_sema); 284 mutex_lock(&file->f_mutex);
293 for (block = file->f_blocks; block != NULL; block = block->b_fnext) { 285 list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
294 if (host != NULL && host != block->b_host) 286 if (!match(block->b_host, host))
295 continue; 287 continue;
296 if (!block->b_queued) 288 /* Do not destroy blocks that are not on
289 * the global retry list - why? */
290 if (list_empty(&block->b_list))
297 continue; 291 continue;
298 kref_get(&block->b_count); 292 kref_get(&block->b_count);
299 up(&file->f_sema); 293 mutex_unlock(&file->f_mutex);
300 nlmsvc_unlink_block(block); 294 nlmsvc_unlink_block(block);
301 nlmsvc_release_block(block); 295 nlmsvc_release_block(block);
302 goto restart; 296 goto restart;
303 } 297 }
304 up(&file->f_sema); 298 mutex_unlock(&file->f_mutex);
305}
306
307/*
308 * Loop over all blocks and perform the action specified.
309 * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
310 */
311void
312nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
313{
314 if (action == NLM_ACT_MARK)
315 nlmsvc_act_mark(host, file);
316 else
317 nlmsvc_act_unlock(host, file);
318} 299}
319 300
320/* 301/*
@@ -373,7 +354,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
373 lock->fl.fl_flags &= ~FL_SLEEP; 354 lock->fl.fl_flags &= ~FL_SLEEP;
374again: 355again:
375 /* Lock file against concurrent access */ 356 /* Lock file against concurrent access */
376 down(&file->f_sema); 357 mutex_lock(&file->f_mutex);
377 /* Get existing block (in case client is busy-waiting) */ 358 /* Get existing block (in case client is busy-waiting) */
378 block = nlmsvc_lookup_block(file, lock); 359 block = nlmsvc_lookup_block(file, lock);
379 if (block == NULL) { 360 if (block == NULL) {
@@ -411,10 +392,10 @@ again:
411 392
412 /* If we don't have a block, create and initialize it. Then 393 /* If we don't have a block, create and initialize it. Then
413 * retry because we may have slept in kmalloc. */ 394 * retry because we may have slept in kmalloc. */
414 /* We have to release f_sema as nlmsvc_create_block may try to 395 /* We have to release f_mutex as nlmsvc_create_block may try to
415 * to claim it while doing host garbage collection */ 396 * to claim it while doing host garbage collection */
416 if (newblock == NULL) { 397 if (newblock == NULL) {
417 up(&file->f_sema); 398 mutex_unlock(&file->f_mutex);
418 dprintk("lockd: blocking on this lock (allocating).\n"); 399 dprintk("lockd: blocking on this lock (allocating).\n");
419 if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie))) 400 if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
420 return nlm_lck_denied_nolocks; 401 return nlm_lck_denied_nolocks;
@@ -424,7 +405,7 @@ again:
424 /* Append to list of blocked */ 405 /* Append to list of blocked */
425 nlmsvc_insert_block(newblock, NLM_NEVER); 406 nlmsvc_insert_block(newblock, NLM_NEVER);
426out: 407out:
427 up(&file->f_sema); 408 mutex_unlock(&file->f_mutex);
428 nlmsvc_release_block(newblock); 409 nlmsvc_release_block(newblock);
429 nlmsvc_release_block(block); 410 nlmsvc_release_block(block);
430 dprintk("lockd: nlmsvc_lock returned %u\n", ret); 411 dprintk("lockd: nlmsvc_lock returned %u\n", ret);
@@ -451,6 +432,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
451 (long long)conflock->fl.fl_start, 432 (long long)conflock->fl.fl_start,
452 (long long)conflock->fl.fl_end); 433 (long long)conflock->fl.fl_end);
453 conflock->caller = "somehost"; /* FIXME */ 434 conflock->caller = "somehost"; /* FIXME */
435 conflock->len = strlen(conflock->caller);
454 conflock->oh.len = 0; /* don't return OH info */ 436 conflock->oh.len = 0; /* don't return OH info */
455 conflock->svid = conflock->fl.fl_pid; 437 conflock->svid = conflock->fl.fl_pid;
456 return nlm_lck_denied; 438 return nlm_lck_denied;
@@ -507,9 +489,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
507 (long long)lock->fl.fl_start, 489 (long long)lock->fl.fl_start,
508 (long long)lock->fl.fl_end); 490 (long long)lock->fl.fl_end);
509 491
510 down(&file->f_sema); 492 mutex_lock(&file->f_mutex);
511 block = nlmsvc_lookup_block(file, lock); 493 block = nlmsvc_lookup_block(file, lock);
512 up(&file->f_sema); 494 mutex_unlock(&file->f_mutex);
513 if (block != NULL) { 495 if (block != NULL) {
514 status = nlmsvc_unlink_block(block); 496 status = nlmsvc_unlink_block(block);
515 nlmsvc_release_block(block); 497 nlmsvc_release_block(block);
@@ -527,10 +509,10 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
527static void 509static void
528nlmsvc_notify_blocked(struct file_lock *fl) 510nlmsvc_notify_blocked(struct file_lock *fl)
529{ 511{
530 struct nlm_block **bp, *block; 512 struct nlm_block *block;
531 513
532 dprintk("lockd: VFS unblock notification for block %p\n", fl); 514 dprintk("lockd: VFS unblock notification for block %p\n", fl);
533 for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) { 515 list_for_each_entry(block, &nlm_blocked, b_list) {
534 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { 516 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
535 nlmsvc_insert_block(block, 0); 517 nlmsvc_insert_block(block, 0);
536 svc_wake_up(block->b_daemon); 518 svc_wake_up(block->b_daemon);
@@ -663,17 +645,14 @@ static const struct rpc_call_ops nlmsvc_grant_ops = {
663 * block. 645 * block.
664 */ 646 */
665void 647void
666nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status) 648nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status)
667{ 649{
668 struct nlm_block *block; 650 struct nlm_block *block;
669 struct nlm_file *file;
670 651
671 dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n", 652 dprintk("grant_reply: looking for cookie %x, s=%d \n",
672 *(unsigned int *)(cookie->data), 653 *(unsigned int *)(cookie->data), status);
673 ntohl(rqstp->rq_addr.sin_addr.s_addr), status); 654 if (!(block = nlmsvc_find_block(cookie)))
674 if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr)))
675 return; 655 return;
676 file = block->b_file;
677 656
678 if (block) { 657 if (block) {
679 if (status == NLM_LCK_DENIED_GRACE_PERIOD) { 658 if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
@@ -696,16 +675,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
696unsigned long 675unsigned long
697nlmsvc_retry_blocked(void) 676nlmsvc_retry_blocked(void)
698{ 677{
699 struct nlm_block *block; 678 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
679 struct nlm_block *block;
680
681 while (!list_empty(&nlm_blocked)) {
682 block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
700 683
701 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
702 nlm_blocked,
703 nlm_blocked? nlm_blocked->b_when : 0);
704 while ((block = nlm_blocked) != 0) {
705 if (block->b_when == NLM_NEVER) 684 if (block->b_when == NLM_NEVER)
706 break; 685 break;
707 if (time_after(block->b_when,jiffies)) 686 if (time_after(block->b_when,jiffies)) {
687 timeout = block->b_when - jiffies;
708 break; 688 break;
689 }
690
709 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", 691 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
710 block, block->b_when); 692 block, block->b_when);
711 kref_get(&block->b_count); 693 kref_get(&block->b_count);
@@ -713,8 +695,5 @@ nlmsvc_retry_blocked(void)
713 nlmsvc_release_block(block); 695 nlmsvc_release_block(block);
714 } 696 }
715 697
716 if ((block = nlm_blocked) && block->b_when != NLM_NEVER) 698 return timeout;
717 return (block->b_when - jiffies);
718
719 return MAX_SCHEDULE_TIMEOUT;
720} 699}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index dbb66a3b5cd9..75b2c81bcb93 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -66,8 +66,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
66 return nlm_lck_denied_nolocks; 66 return nlm_lck_denied_nolocks;
67 67
68 /* Obtain host handle */ 68 /* Obtain host handle */
69 if (!(host = nlmsvc_lookup_host(rqstp)) 69 if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
70 || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) 70 || (argp->monitor && nsm_monitor(host) < 0))
71 goto no_locks; 71 goto no_locks;
72 *hostp = host; 72 *hostp = host;
73 73
@@ -287,7 +287,9 @@ static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *ar
287 struct nlm_rqst *call; 287 struct nlm_rqst *call;
288 int stat; 288 int stat;
289 289
290 host = nlmsvc_lookup_host(rqstp); 290 host = nlmsvc_lookup_host(rqstp,
291 argp->lock.caller,
292 argp->lock.len);
291 if (host == NULL) 293 if (host == NULL)
292 return rpc_system_err; 294 return rpc_system_err;
293 295
@@ -449,9 +451,6 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
449 void *resp) 451 void *resp)
450{ 452{
451 struct sockaddr_in saddr = rqstp->rq_addr; 453 struct sockaddr_in saddr = rqstp->rq_addr;
452 int vers = argp->vers;
453 int prot = argp->proto >> 1;
454 struct nlm_host *host;
455 454
456 dprintk("lockd: SM_NOTIFY called\n"); 455 dprintk("lockd: SM_NOTIFY called\n");
457 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 456 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
@@ -466,19 +465,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
466 /* Obtain the host pointer for this NFS server and try to 465 /* Obtain the host pointer for this NFS server and try to
467 * reclaim all locks we hold on this server. 466 * reclaim all locks we hold on this server.
468 */ 467 */
468 memset(&saddr, 0, sizeof(saddr));
469 saddr.sin_addr.s_addr = argp->addr; 469 saddr.sin_addr.s_addr = argp->addr;
470 if ((argp->proto & 1)==0) { 470 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
471 if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) {
472 nlmclnt_recovery(host, argp->state);
473 nlm_release_host(host);
474 }
475 } else {
476 /* If we run on an NFS server, delete all locks held by the client */
477 if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) {
478 nlmsvc_free_host_resources(host);
479 nlm_release_host(host);
480 }
481 }
482 471
483 return rpc_success; 472 return rpc_success;
484} 473}
@@ -495,7 +484,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
495 484
496 dprintk("lockd: GRANTED_RES called\n"); 485 dprintk("lockd: GRANTED_RES called\n");
497 486
498 nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); 487 nlmsvc_grant_reply(&argp->cookie, argp->status);
499 return rpc_success; 488 return rpc_success;
500} 489}
501 490
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 27288c83da96..b9926ce8782e 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -85,24 +85,20 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
85} 85}
86 86
87/* 87/*
88 * Traverse all shares for a given file (and host). 88 * Traverse all shares for a given file, and delete
89 * NLM_ACT_CHECK is handled by nlmsvc_inspect_file. 89 * those owned by the given (type of) host
90 */ 90 */
91void 91void nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file,
92nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action) 92 nlm_host_match_fn_t match)
93{ 93{
94 struct nlm_share *share, **shpp; 94 struct nlm_share *share, **shpp;
95 95
96 shpp = &file->f_shares; 96 shpp = &file->f_shares;
97 while ((share = *shpp) != NULL) { 97 while ((share = *shpp) != NULL) {
98 if (action == NLM_ACT_MARK) 98 if (match(share->s_host, host)) {
99 share->s_host->h_inuse = 1; 99 *shpp = share->s_next;
100 else if (action == NLM_ACT_UNLOCK) { 100 kfree(share);
101 if (host == NULL || host == share->s_host) { 101 continue;
102 *shpp = share->s_next;
103 kfree(share);
104 continue;
105 }
106 } 102 }
107 shpp = &share->s_next; 103 shpp = &share->s_next;
108 } 104 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index a92dd98f8401..514f5f20701e 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -25,9 +25,9 @@
25/* 25/*
26 * Global file hash table 26 * Global file hash table
27 */ 27 */
28#define FILE_HASH_BITS 5 28#define FILE_HASH_BITS 7
29#define FILE_NRHASH (1<<FILE_HASH_BITS) 29#define FILE_NRHASH (1<<FILE_HASH_BITS)
30static struct nlm_file * nlm_files[FILE_NRHASH]; 30static struct hlist_head nlm_files[FILE_NRHASH];
31static DEFINE_MUTEX(nlm_file_mutex); 31static DEFINE_MUTEX(nlm_file_mutex);
32 32
33#ifdef NFSD_DEBUG 33#ifdef NFSD_DEBUG
@@ -82,6 +82,7 @@ u32
82nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, 82nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
83 struct nfs_fh *f) 83 struct nfs_fh *f)
84{ 84{
85 struct hlist_node *pos;
85 struct nlm_file *file; 86 struct nlm_file *file;
86 unsigned int hash; 87 unsigned int hash;
87 u32 nfserr; 88 u32 nfserr;
@@ -93,7 +94,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
93 /* Lock file table */ 94 /* Lock file table */
94 mutex_lock(&nlm_file_mutex); 95 mutex_lock(&nlm_file_mutex);
95 96
96 for (file = nlm_files[hash]; file; file = file->f_next) 97 hlist_for_each_entry(file, pos, &nlm_files[hash], f_list)
97 if (!nfs_compare_fh(&file->f_handle, f)) 98 if (!nfs_compare_fh(&file->f_handle, f))
98 goto found; 99 goto found;
99 100
@@ -105,8 +106,9 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
105 goto out_unlock; 106 goto out_unlock;
106 107
107 memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); 108 memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
108 file->f_hash = hash; 109 mutex_init(&file->f_mutex);
109 init_MUTEX(&file->f_sema); 110 INIT_HLIST_NODE(&file->f_list);
111 INIT_LIST_HEAD(&file->f_blocks);
110 112
111 /* Open the file. Note that this must not sleep for too long, else 113 /* Open the file. Note that this must not sleep for too long, else
112 * we would lock up lockd:-) So no NFS re-exports, folks. 114 * we would lock up lockd:-) So no NFS re-exports, folks.
@@ -115,12 +117,11 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
115 * the file. 117 * the file.
116 */ 118 */
117 if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) { 119 if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
118 dprintk("lockd: open failed (nfserr %d)\n", ntohl(nfserr)); 120 dprintk("lockd: open failed (error %d)\n", nfserr);
119 goto out_free; 121 goto out_free;
120 } 122 }
121 123
122 file->f_next = nlm_files[hash]; 124 hlist_add_head(&file->f_list, &nlm_files[hash]);
123 nlm_files[hash] = file;
124 125
125found: 126found:
126 dprintk("lockd: found file %p (count %d)\n", file, file->f_count); 127 dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
@@ -149,22 +150,14 @@ out_free:
149static inline void 150static inline void
150nlm_delete_file(struct nlm_file *file) 151nlm_delete_file(struct nlm_file *file)
151{ 152{
152 struct nlm_file **fp, *f;
153
154 nlm_debug_print_file("closing file", file); 153 nlm_debug_print_file("closing file", file);
155 154 if (!hlist_unhashed(&file->f_list)) {
156 fp = nlm_files + file->f_hash; 155 hlist_del(&file->f_list);
157 while ((f = *fp) != NULL) { 156 nlmsvc_ops->fclose(file->f_file);
158 if (f == file) { 157 kfree(file);
159 *fp = file->f_next; 158 } else {
160 nlmsvc_ops->fclose(file->f_file); 159 printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
161 kfree(file);
162 return;
163 }
164 fp = &f->f_next;
165 } 160 }
166
167 printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
168} 161}
169 162
170/* 163/*
@@ -172,7 +165,8 @@ nlm_delete_file(struct nlm_file *file)
172 * action. 165 * action.
173 */ 166 */
174static int 167static int
175nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action) 168nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
169 nlm_host_match_fn_t match)
176{ 170{
177 struct inode *inode = nlmsvc_file_inode(file); 171 struct inode *inode = nlmsvc_file_inode(file);
178 struct file_lock *fl; 172 struct file_lock *fl;
@@ -186,17 +180,11 @@ again:
186 180
187 /* update current lock count */ 181 /* update current lock count */
188 file->f_locks++; 182 file->f_locks++;
183
189 lockhost = (struct nlm_host *) fl->fl_owner; 184 lockhost = (struct nlm_host *) fl->fl_owner;
190 if (action == NLM_ACT_MARK) 185 if (match(lockhost, host)) {
191 lockhost->h_inuse = 1;
192 else if (action == NLM_ACT_CHECK)
193 return 1;
194 else if (action == NLM_ACT_UNLOCK) {
195 struct file_lock lock = *fl; 186 struct file_lock lock = *fl;
196 187
197 if (host && lockhost != host)
198 continue;
199
200 lock.fl_type = F_UNLCK; 188 lock.fl_type = F_UNLCK;
201 lock.fl_start = 0; 189 lock.fl_start = 0;
202 lock.fl_end = OFFSET_MAX; 190 lock.fl_end = OFFSET_MAX;
@@ -213,53 +201,66 @@ again:
213} 201}
214 202
215/* 203/*
216 * Operate on a single file 204 * Inspect a single file
217 */ 205 */
218static inline int 206static inline int
219nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action) 207nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, nlm_host_match_fn_t match)
220{ 208{
221 if (action == NLM_ACT_CHECK) { 209 nlmsvc_traverse_blocks(host, file, match);
222 /* Fast path for mark and sweep garbage collection */ 210 nlmsvc_traverse_shares(host, file, match);
223 if (file->f_count || file->f_blocks || file->f_shares) 211 return nlm_traverse_locks(host, file, match);
212}
213
214/*
215 * Quick check whether there are still any locks, blocks or
216 * shares on a given file.
217 */
218static inline int
219nlm_file_inuse(struct nlm_file *file)
220{
221 struct inode *inode = nlmsvc_file_inode(file);
222 struct file_lock *fl;
223
224 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
225 return 1;
226
227 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
228 if (fl->fl_lmops == &nlmsvc_lock_operations)
224 return 1; 229 return 1;
225 } else {
226 nlmsvc_traverse_blocks(host, file, action);
227 nlmsvc_traverse_shares(host, file, action);
228 } 230 }
229 return nlm_traverse_locks(host, file, action); 231 file->f_locks = 0;
232 return 0;
230} 233}
231 234
232/* 235/*
233 * Loop over all files in the file table. 236 * Loop over all files in the file table.
234 */ 237 */
235static int 238static int
236nlm_traverse_files(struct nlm_host *host, int action) 239nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match)
237{ 240{
238 struct nlm_file *file, **fp; 241 struct hlist_node *pos, *next;
242 struct nlm_file *file;
239 int i, ret = 0; 243 int i, ret = 0;
240 244
241 mutex_lock(&nlm_file_mutex); 245 mutex_lock(&nlm_file_mutex);
242 for (i = 0; i < FILE_NRHASH; i++) { 246 for (i = 0; i < FILE_NRHASH; i++) {
243 fp = nlm_files + i; 247 hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
244 while ((file = *fp) != NULL) {
245 file->f_count++; 248 file->f_count++;
246 mutex_unlock(&nlm_file_mutex); 249 mutex_unlock(&nlm_file_mutex);
247 250
248 /* Traverse locks, blocks and shares of this file 251 /* Traverse locks, blocks and shares of this file
249 * and update file->f_locks count */ 252 * and update file->f_locks count */
250 if (nlm_inspect_file(host, file, action)) 253 if (nlm_inspect_file(host, file, match))
251 ret = 1; 254 ret = 1;
252 255
253 mutex_lock(&nlm_file_mutex); 256 mutex_lock(&nlm_file_mutex);
254 file->f_count--; 257 file->f_count--;
255 /* No more references to this file. Let go of it. */ 258 /* No more references to this file. Let go of it. */
256 if (!file->f_blocks && !file->f_locks 259 if (list_empty(&file->f_blocks) && !file->f_locks
257 && !file->f_shares && !file->f_count) { 260 && !file->f_shares && !file->f_count) {
258 *fp = file->f_next; 261 hlist_del(&file->f_list);
259 nlmsvc_ops->fclose(file->f_file); 262 nlmsvc_ops->fclose(file->f_file);
260 kfree(file); 263 kfree(file);
261 } else {
262 fp = &file->f_next;
263 } 264 }
264 } 265 }
265 } 266 }
@@ -286,23 +287,54 @@ nlm_release_file(struct nlm_file *file)
286 mutex_lock(&nlm_file_mutex); 287 mutex_lock(&nlm_file_mutex);
287 288
288 /* If there are no more locks etc, delete the file */ 289 /* If there are no more locks etc, delete the file */
289 if(--file->f_count == 0) { 290 if (--file->f_count == 0 && !nlm_file_inuse(file))
290 if(!nlm_inspect_file(NULL, file, NLM_ACT_CHECK)) 291 nlm_delete_file(file);
291 nlm_delete_file(file);
292 }
293 292
294 mutex_unlock(&nlm_file_mutex); 293 mutex_unlock(&nlm_file_mutex);
295} 294}
296 295
297/* 296/*
297 * Helpers function for resource traversal
298 *
299 * nlmsvc_mark_host:
300 * used by the garbage collector; simply sets h_inuse.
301 * Always returns 0.
302 *
303 * nlmsvc_same_host:
304 * returns 1 iff the two hosts match. Used to release
305 * all resources bound to a specific host.
306 *
307 * nlmsvc_is_client:
308 * returns 1 iff the host is a client.
309 * Used by nlmsvc_invalidate_all
310 */
311static int
312nlmsvc_mark_host(struct nlm_host *host, struct nlm_host *dummy)
313{
314 host->h_inuse = 1;
315 return 0;
316}
317
318static int
319nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other)
320{
321 return host == other;
322}
323
324static int
325nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy)
326{
327 return host->h_server;
328}
329
330/*
298 * Mark all hosts that still hold resources 331 * Mark all hosts that still hold resources
299 */ 332 */
300void 333void
301nlmsvc_mark_resources(void) 334nlmsvc_mark_resources(void)
302{ 335{
303 dprintk("lockd: nlmsvc_mark_resources\n"); 336 dprintk("lockd: nlmsvc_mark_resources\n");
304 337 nlm_traverse_files(NULL, nlmsvc_mark_host);
305 nlm_traverse_files(NULL, NLM_ACT_MARK);
306} 338}
307 339
308/* 340/*
@@ -313,23 +345,25 @@ nlmsvc_free_host_resources(struct nlm_host *host)
313{ 345{
314 dprintk("lockd: nlmsvc_free_host_resources\n"); 346 dprintk("lockd: nlmsvc_free_host_resources\n");
315 347
316 if (nlm_traverse_files(host, NLM_ACT_UNLOCK)) 348 if (nlm_traverse_files(host, nlmsvc_same_host)) {
317 printk(KERN_WARNING 349 printk(KERN_WARNING
318 "lockd: couldn't remove all locks held by %s", 350 "lockd: couldn't remove all locks held by %s\n",
319 host->h_name); 351 host->h_name);
352 BUG();
353 }
320} 354}
321 355
322/* 356/*
323 * delete all hosts structs for clients 357 * Remove all locks held for clients
324 */ 358 */
325void 359void
326nlmsvc_invalidate_all(void) 360nlmsvc_invalidate_all(void)
327{ 361{
328 struct nlm_host *host; 362 /* Release all locks held by NFS clients.
329 while ((host = nlm_find_client()) != NULL) { 363 * Previously, the code would call
330 nlmsvc_free_host_resources(host); 364 * nlmsvc_free_host_resources for each client in
331 host->h_expires = 0; 365 * turn, which is about as inefficient as it gets.
332 host->h_killed = 1; 366 * Now we just do it once in nlm_traverse_files.
333 nlm_release_host(host); 367 */
334 } 368 nlm_traverse_files(NULL, nlmsvc_is_client);
335} 369}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cfe141e5d759..e13fa23bd108 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -319,12 +319,25 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
319 319
320static struct cache_head *export_table[EXPORT_HASHMAX]; 320static struct cache_head *export_table[EXPORT_HASHMAX];
321 321
322static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
323{
324 int i;
325
326 for (i = 0; i < fsloc->locations_count; i++) {
327 kfree(fsloc->locations[i].path);
328 kfree(fsloc->locations[i].hosts);
329 }
330 kfree(fsloc->locations);
331}
332
322static void svc_export_put(struct kref *ref) 333static void svc_export_put(struct kref *ref)
323{ 334{
324 struct svc_export *exp = container_of(ref, struct svc_export, h.ref); 335 struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
325 dput(exp->ex_dentry); 336 dput(exp->ex_dentry);
326 mntput(exp->ex_mnt); 337 mntput(exp->ex_mnt);
327 auth_domain_put(exp->ex_client); 338 auth_domain_put(exp->ex_client);
339 kfree(exp->ex_path);
340 nfsd4_fslocs_free(&exp->ex_fslocs);
328 kfree(exp); 341 kfree(exp);
329} 342}
330 343
@@ -386,6 +399,69 @@ static int check_export(struct inode *inode, int flags)
386 399
387} 400}
388 401
402#ifdef CONFIG_NFSD_V4
403
404static int
405fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
406{
407 int len;
408 int migrated, i, err;
409
410 len = qword_get(mesg, buf, PAGE_SIZE);
411 if (len != 5 || memcmp(buf, "fsloc", 5))
412 return 0;
413
414 /* listsize */
415 err = get_int(mesg, &fsloc->locations_count);
416 if (err)
417 return err;
418 if (fsloc->locations_count > MAX_FS_LOCATIONS)
419 return -EINVAL;
420 if (fsloc->locations_count == 0)
421 return 0;
422
423 fsloc->locations = kzalloc(fsloc->locations_count
424 * sizeof(struct nfsd4_fs_location), GFP_KERNEL);
425 if (!fsloc->locations)
426 return -ENOMEM;
427 for (i=0; i < fsloc->locations_count; i++) {
428 /* colon separated host list */
429 err = -EINVAL;
430 len = qword_get(mesg, buf, PAGE_SIZE);
431 if (len <= 0)
432 goto out_free_all;
433 err = -ENOMEM;
434 fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL);
435 if (!fsloc->locations[i].hosts)
436 goto out_free_all;
437 err = -EINVAL;
438 /* slash separated path component list */
439 len = qword_get(mesg, buf, PAGE_SIZE);
440 if (len <= 0)
441 goto out_free_all;
442 err = -ENOMEM;
443 fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL);
444 if (!fsloc->locations[i].path)
445 goto out_free_all;
446 }
447 /* migrated */
448 err = get_int(mesg, &migrated);
449 if (err)
450 goto out_free_all;
451 err = -EINVAL;
452 if (migrated < 0 || migrated > 1)
453 goto out_free_all;
454 fsloc->migrated = migrated;
455 return 0;
456out_free_all:
457 nfsd4_fslocs_free(fsloc);
458 return err;
459}
460
461#else /* CONFIG_NFSD_V4 */
462static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; }
463#endif
464
389static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) 465static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
390{ 466{
391 /* client path expiry [flags anonuid anongid fsid] */ 467 /* client path expiry [flags anonuid anongid fsid] */
@@ -398,6 +474,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
398 int an_int; 474 int an_int;
399 475
400 nd.dentry = NULL; 476 nd.dentry = NULL;
477 exp.ex_path = NULL;
401 478
402 if (mesg[mlen-1] != '\n') 479 if (mesg[mlen-1] != '\n')
403 return -EINVAL; 480 return -EINVAL;
@@ -428,6 +505,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
428 exp.ex_client = dom; 505 exp.ex_client = dom;
429 exp.ex_mnt = nd.mnt; 506 exp.ex_mnt = nd.mnt;
430 exp.ex_dentry = nd.dentry; 507 exp.ex_dentry = nd.dentry;
508 exp.ex_path = kstrdup(buf, GFP_KERNEL);
509 err = -ENOMEM;
510 if (!exp.ex_path)
511 goto out;
431 512
432 /* expiry */ 513 /* expiry */
433 err = -EINVAL; 514 err = -EINVAL;
@@ -435,6 +516,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
435 if (exp.h.expiry_time == 0) 516 if (exp.h.expiry_time == 0)
436 goto out; 517 goto out;
437 518
519 /* fs locations */
520 exp.ex_fslocs.locations = NULL;
521 exp.ex_fslocs.locations_count = 0;
522 exp.ex_fslocs.migrated = 0;
523
438 /* flags */ 524 /* flags */
439 err = get_int(&mesg, &an_int); 525 err = get_int(&mesg, &an_int);
440 if (err == -ENOENT) 526 if (err == -ENOENT)
@@ -460,6 +546,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
460 546
461 err = check_export(nd.dentry->d_inode, exp.ex_flags); 547 err = check_export(nd.dentry->d_inode, exp.ex_flags);
462 if (err) goto out; 548 if (err) goto out;
549
550 err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
551 if (err)
552 goto out;
463 } 553 }
464 554
465 expp = svc_export_lookup(&exp); 555 expp = svc_export_lookup(&exp);
@@ -473,6 +563,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
473 else 563 else
474 exp_put(expp); 564 exp_put(expp);
475 out: 565 out:
566 kfree(exp.ex_path);
476 if (nd.dentry) 567 if (nd.dentry)
477 path_release(&nd); 568 path_release(&nd);
478 out_no_path: 569 out_no_path:
@@ -482,7 +573,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
482 return err; 573 return err;
483} 574}
484 575
485static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong); 576static void exp_flags(struct seq_file *m, int flag, int fsid,
577 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
486 578
487static int svc_export_show(struct seq_file *m, 579static int svc_export_show(struct seq_file *m,
488 struct cache_detail *cd, 580 struct cache_detail *cd,
@@ -501,8 +593,8 @@ static int svc_export_show(struct seq_file *m,
501 seq_putc(m, '('); 593 seq_putc(m, '(');
502 if (test_bit(CACHE_VALID, &h->flags) && 594 if (test_bit(CACHE_VALID, &h->flags) &&
503 !test_bit(CACHE_NEGATIVE, &h->flags)) 595 !test_bit(CACHE_NEGATIVE, &h->flags))
504 exp_flags(m, exp->ex_flags, exp->ex_fsid, 596 exp_flags(m, exp->ex_flags, exp->ex_fsid,
505 exp->ex_anon_uid, exp->ex_anon_gid); 597 exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
506 seq_puts(m, ")\n"); 598 seq_puts(m, ")\n");
507 return 0; 599 return 0;
508} 600}
@@ -524,6 +616,10 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
524 new->ex_client = item->ex_client; 616 new->ex_client = item->ex_client;
525 new->ex_dentry = dget(item->ex_dentry); 617 new->ex_dentry = dget(item->ex_dentry);
526 new->ex_mnt = mntget(item->ex_mnt); 618 new->ex_mnt = mntget(item->ex_mnt);
619 new->ex_path = NULL;
620 new->ex_fslocs.locations = NULL;
621 new->ex_fslocs.locations_count = 0;
622 new->ex_fslocs.migrated = 0;
527} 623}
528 624
529static void export_update(struct cache_head *cnew, struct cache_head *citem) 625static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -535,6 +631,14 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
535 new->ex_anon_uid = item->ex_anon_uid; 631 new->ex_anon_uid = item->ex_anon_uid;
536 new->ex_anon_gid = item->ex_anon_gid; 632 new->ex_anon_gid = item->ex_anon_gid;
537 new->ex_fsid = item->ex_fsid; 633 new->ex_fsid = item->ex_fsid;
634 new->ex_path = item->ex_path;
635 item->ex_path = NULL;
636 new->ex_fslocs.locations = item->ex_fslocs.locations;
637 item->ex_fslocs.locations = NULL;
638 new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
639 item->ex_fslocs.locations_count = 0;
640 new->ex_fslocs.migrated = item->ex_fslocs.migrated;
641 item->ex_fslocs.migrated = 0;
538} 642}
539 643
540static struct cache_head *svc_export_alloc(void) 644static struct cache_head *svc_export_alloc(void)
@@ -1048,30 +1152,21 @@ int
1048exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, 1152exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
1049 struct cache_req *creq) 1153 struct cache_req *creq)
1050{ 1154{
1051 struct svc_expkey *fsid_key;
1052 struct svc_export *exp; 1155 struct svc_export *exp;
1053 int rv; 1156 int rv;
1054 u32 fsidv[2]; 1157 u32 fsidv[2];
1055 1158
1056 mk_fsid_v1(fsidv, 0); 1159 mk_fsid_v1(fsidv, 0);
1057 1160
1058 fsid_key = exp_find_key(clp, 1, fsidv, creq); 1161 exp = exp_find(clp, 1, fsidv, creq);
1059 if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN) 1162 if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
1060 return nfserr_dropit; 1163 return nfserr_dropit;
1061 if (!fsid_key || IS_ERR(fsid_key))
1062 return nfserr_perm;
1063
1064 exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
1065 if (exp == NULL) 1164 if (exp == NULL)
1066 rv = nfserr_perm; 1165 return nfserr_perm;
1067 else if (IS_ERR(exp)) 1166 else if (IS_ERR(exp))
1068 rv = nfserrno(PTR_ERR(exp)); 1167 return nfserrno(PTR_ERR(exp));
1069 else { 1168 rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
1070 rv = fh_compose(fhp, exp, 1169 exp_put(exp);
1071 fsid_key->ek_dentry, NULL);
1072 exp_put(exp);
1073 }
1074 cache_put(&fsid_key->h, &svc_expkey_cache);
1075 return rv; 1170 return rv;
1076} 1171}
1077 1172
@@ -1158,7 +1253,8 @@ static struct flags {
1158 { 0, {"", ""}} 1253 { 0, {"", ""}}
1159}; 1254};
1160 1255
1161static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong) 1256static void exp_flags(struct seq_file *m, int flag, int fsid,
1257 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
1162{ 1258{
1163 int first = 0; 1259 int first = 0;
1164 struct flags *flg; 1260 struct flags *flg;
@@ -1174,6 +1270,21 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t
1174 seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); 1270 seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
1175 if (anong != (gid_t)-2 && anong != (0x10000-2)) 1271 if (anong != (gid_t)-2 && anong != (0x10000-2))
1176 seq_printf(m, "%sanongid=%d", first++?",":"", anong); 1272 seq_printf(m, "%sanongid=%d", first++?",":"", anong);
1273 if (fsloc && fsloc->locations_count > 0) {
1274 char *loctype = (fsloc->migrated) ? "refer" : "replicas";
1275 int i;
1276
1277 seq_printf(m, "%s%s=", first++?",":"", loctype);
1278 seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\");
1279 seq_putc(m, '@');
1280 seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\");
1281 for (i = 1; i < fsloc->locations_count; i++) {
1282 seq_putc(m, ';');
1283 seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\");
1284 seq_putc(m, '@');
1285 seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\");
1286 }
1287 }
1177} 1288}
1178 1289
1179static int e_show(struct seq_file *m, void *p) 1290static int e_show(struct seq_file *m, void *p)
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fe56b38364cc..9187755661df 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -241,7 +241,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
241 241
242 rqstp->rq_res.page_len = w; 242 rqstp->rq_res.page_len = w;
243 while (w > 0) { 243 while (w > 0) {
244 if (!svc_take_res_page(rqstp)) 244 if (!rqstp->rq_respages[rqstp->rq_resused++])
245 return 0; 245 return 0;
246 w -= PAGE_SIZE; 246 w -= PAGE_SIZE;
247 } 247 }
@@ -333,4 +333,5 @@ struct svc_version nfsd_acl_version2 = {
333 .vs_proc = nfsd_acl_procedures2, 333 .vs_proc = nfsd_acl_procedures2,
334 .vs_dispatch = nfsd_dispatch, 334 .vs_dispatch = nfsd_dispatch,
335 .vs_xdrsize = NFS3_SVC_XDRSIZE, 335 .vs_xdrsize = NFS3_SVC_XDRSIZE,
336 .vs_hidden = 1,
336}; 337};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 16e10c170aed..d4bdc00c1169 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -185,7 +185,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
185 185
186 rqstp->rq_res.page_len = w; 186 rqstp->rq_res.page_len = w;
187 while (w > 0) { 187 while (w > 0) {
188 if (!svc_take_res_page(rqstp)) 188 if (!rqstp->rq_respages[rqstp->rq_resused++])
189 return 0; 189 return 0;
190 w -= PAGE_SIZE; 190 w -= PAGE_SIZE;
191 } 191 }
@@ -263,5 +263,6 @@ struct svc_version nfsd_acl_version3 = {
263 .vs_proc = nfsd_acl_procedures3, 263 .vs_proc = nfsd_acl_procedures3,
264 .vs_dispatch = nfsd_dispatch, 264 .vs_dispatch = nfsd_dispatch,
265 .vs_xdrsize = NFS3_SVC_XDRSIZE, 265 .vs_xdrsize = NFS3_SVC_XDRSIZE,
266 .vs_hidden = 1,
266}; 267};
267 268
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f61142afea44..a5ebc7dbb384 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
160 struct nfsd3_readres *resp) 160 struct nfsd3_readres *resp)
161{ 161{
162 int nfserr; 162 int nfserr;
163 u32 max_blocksize = svc_max_payload(rqstp);
163 164
164 dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", 165 dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
165 SVCFH_fmt(&argp->fh), 166 SVCFH_fmt(&argp->fh),
@@ -172,15 +173,15 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
172 */ 173 */
173 174
174 resp->count = argp->count; 175 resp->count = argp->count;
175 if (NFSSVC_MAXBLKSIZE < resp->count) 176 if (max_blocksize < resp->count)
176 resp->count = NFSSVC_MAXBLKSIZE; 177 resp->count = max_blocksize;
177 178
178 svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); 179 svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
179 180
180 fh_copy(&resp->fh, &argp->fh); 181 fh_copy(&resp->fh, &argp->fh);
181 nfserr = nfsd_read(rqstp, &resp->fh, NULL, 182 nfserr = nfsd_read(rqstp, &resp->fh, NULL,
182 argp->offset, 183 argp->offset,
183 argp->vec, argp->vlen, 184 rqstp->rq_vec, argp->vlen,
184 &resp->count); 185 &resp->count);
185 if (nfserr == 0) { 186 if (nfserr == 0) {
186 struct inode *inode = resp->fh.fh_dentry->d_inode; 187 struct inode *inode = resp->fh.fh_dentry->d_inode;
@@ -210,7 +211,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
210 resp->committed = argp->stable; 211 resp->committed = argp->stable;
211 nfserr = nfsd_write(rqstp, &resp->fh, NULL, 212 nfserr = nfsd_write(rqstp, &resp->fh, NULL,
212 argp->offset, 213 argp->offset,
213 argp->vec, argp->vlen, 214 rqstp->rq_vec, argp->vlen,
214 argp->len, 215 argp->len,
215 &resp->committed); 216 &resp->committed);
216 resp->count = argp->count; 217 resp->count = argp->count;
@@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
538 struct nfsd3_fsinfores *resp) 539 struct nfsd3_fsinfores *resp)
539{ 540{
540 int nfserr; 541 int nfserr;
542 u32 max_blocksize = svc_max_payload(rqstp);
541 543
542 dprintk("nfsd: FSINFO(3) %s\n", 544 dprintk("nfsd: FSINFO(3) %s\n",
543 SVCFH_fmt(&argp->fh)); 545 SVCFH_fmt(&argp->fh));
544 546
545 resp->f_rtmax = NFSSVC_MAXBLKSIZE; 547 resp->f_rtmax = max_blocksize;
546 resp->f_rtpref = NFSSVC_MAXBLKSIZE; 548 resp->f_rtpref = max_blocksize;
547 resp->f_rtmult = PAGE_SIZE; 549 resp->f_rtmult = PAGE_SIZE;
548 resp->f_wtmax = NFSSVC_MAXBLKSIZE; 550 resp->f_wtmax = max_blocksize;
549 resp->f_wtpref = NFSSVC_MAXBLKSIZE; 551 resp->f_wtpref = max_blocksize;
550 resp->f_wtmult = PAGE_SIZE; 552 resp->f_wtmult = PAGE_SIZE;
551 resp->f_dtpref = PAGE_SIZE; 553 resp->f_dtpref = PAGE_SIZE;
552 resp->f_maxfilesize = ~(u32) 0; 554 resp->f_maxfilesize = ~(u32) 0;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 243d94b9653a..247d518248bf 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
330{ 330{
331 unsigned int len; 331 unsigned int len;
332 int v,pn; 332 int v,pn;
333 u32 max_blocksize = svc_max_payload(rqstp);
333 334
334 if (!(p = decode_fh(p, &args->fh)) 335 if (!(p = decode_fh(p, &args->fh))
335 || !(p = xdr_decode_hyper(p, &args->offset))) 336 || !(p = xdr_decode_hyper(p, &args->offset)))
@@ -337,17 +338,16 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
337 338
338 len = args->count = ntohl(*p++); 339 len = args->count = ntohl(*p++);
339 340
340 if (len > NFSSVC_MAXBLKSIZE) 341 if (len > max_blocksize)
341 len = NFSSVC_MAXBLKSIZE; 342 len = max_blocksize;
342 343
343 /* set up the kvec */ 344 /* set up the kvec */
344 v=0; 345 v=0;
345 while (len > 0) { 346 while (len > 0) {
346 pn = rqstp->rq_resused; 347 pn = rqstp->rq_resused++;
347 svc_take_page(rqstp); 348 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
348 args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); 349 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
349 args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; 350 len -= rqstp->rq_vec[v].iov_len;
350 len -= args->vec[v].iov_len;
351 v++; 351 v++;
352 } 352 }
353 args->vlen = v; 353 args->vlen = v;
@@ -359,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
359 struct nfsd3_writeargs *args) 359 struct nfsd3_writeargs *args)
360{ 360{
361 unsigned int len, v, hdr; 361 unsigned int len, v, hdr;
362 u32 max_blocksize = svc_max_payload(rqstp);
362 363
363 if (!(p = decode_fh(p, &args->fh)) 364 if (!(p = decode_fh(p, &args->fh))
364 || !(p = xdr_decode_hyper(p, &args->offset))) 365 || !(p = xdr_decode_hyper(p, &args->offset)))
@@ -373,22 +374,22 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
373 rqstp->rq_arg.len - hdr < len) 374 rqstp->rq_arg.len - hdr < len)
374 return 0; 375 return 0;
375 376
376 args->vec[0].iov_base = (void*)p; 377 rqstp->rq_vec[0].iov_base = (void*)p;
377 args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; 378 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
378 379
379 if (len > NFSSVC_MAXBLKSIZE) 380 if (len > max_blocksize)
380 len = NFSSVC_MAXBLKSIZE; 381 len = max_blocksize;
381 v= 0; 382 v= 0;
382 while (len > args->vec[v].iov_len) { 383 while (len > rqstp->rq_vec[v].iov_len) {
383 len -= args->vec[v].iov_len; 384 len -= rqstp->rq_vec[v].iov_len;
384 v++; 385 v++;
385 args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); 386 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
386 args->vec[v].iov_len = PAGE_SIZE; 387 rqstp->rq_vec[v].iov_len = PAGE_SIZE;
387 } 388 }
388 args->vec[v].iov_len = len; 389 rqstp->rq_vec[v].iov_len = len;
389 args->vlen = v+1; 390 args->vlen = v+1;
390 391
391 return args->count == args->len && args->vec[0].iov_len > 0; 392 return args->count == args->len && rqstp->rq_vec[0].iov_len > 0;
392} 393}
393 394
394int 395int
@@ -446,11 +447,11 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
446 * This page appears in the rq_res.pages list, but as pages_len is always 447 * This page appears in the rq_res.pages list, but as pages_len is always
447 * 0, it won't get in the way 448 * 0, it won't get in the way
448 */ 449 */
449 svc_take_page(rqstp);
450 len = ntohl(*p++); 450 len = ntohl(*p++);
451 if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) 451 if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
452 return 0; 452 return 0;
453 args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); 453 args->tname = new =
454 page_address(rqstp->rq_respages[rqstp->rq_resused++]);
454 args->tlen = len; 455 args->tlen = len;
455 /* first copy and check from the first page */ 456 /* first copy and check from the first page */
456 old = (char*)p; 457 old = (char*)p;
@@ -522,8 +523,8 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
522{ 523{
523 if (!(p = decode_fh(p, &args->fh))) 524 if (!(p = decode_fh(p, &args->fh)))
524 return 0; 525 return 0;
525 svc_take_page(rqstp); 526 args->buffer =
526 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); 527 page_address(rqstp->rq_respages[rqstp->rq_resused++]);
527 528
528 return xdr_argsize_check(rqstp, p); 529 return xdr_argsize_check(rqstp, p);
529} 530}
@@ -554,8 +555,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
554 if (args->count > PAGE_SIZE) 555 if (args->count > PAGE_SIZE)
555 args->count = PAGE_SIZE; 556 args->count = PAGE_SIZE;
556 557
557 svc_take_page(rqstp); 558 args->buffer =
558 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); 559 page_address(rqstp->rq_respages[rqstp->rq_resused++]);
559 560
560 return xdr_argsize_check(rqstp, p); 561 return xdr_argsize_check(rqstp, p);
561} 562}
@@ -565,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
565 struct nfsd3_readdirargs *args) 566 struct nfsd3_readdirargs *args)
566{ 567{
567 int len, pn; 568 int len, pn;
569 u32 max_blocksize = svc_max_payload(rqstp);
568 570
569 if (!(p = decode_fh(p, &args->fh))) 571 if (!(p = decode_fh(p, &args->fh)))
570 return 0; 572 return 0;
@@ -573,13 +575,12 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
573 args->dircount = ntohl(*p++); 575 args->dircount = ntohl(*p++);
574 args->count = ntohl(*p++); 576 args->count = ntohl(*p++);
575 577
576 len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE : 578 len = (args->count > max_blocksize) ? max_blocksize :
577 args->count; 579 args->count;
578 args->count = len; 580 args->count = len;
579 581
580 while (len > 0) { 582 while (len > 0) {
581 pn = rqstp->rq_resused; 583 pn = rqstp->rq_resused++;
582 svc_take_page(rqstp);
583 if (!args->buffer) 584 if (!args->buffer)
584 args->buffer = page_address(rqstp->rq_respages[pn]); 585 args->buffer = page_address(rqstp->rq_respages[pn]);
585 len -= PAGE_SIZE; 586 len -= PAGE_SIZE;
@@ -668,7 +669,6 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
668 rqstp->rq_res.page_len = resp->len; 669 rqstp->rq_res.page_len = resp->len;
669 if (resp->len & 3) { 670 if (resp->len & 3) {
670 /* need to pad the tail */ 671 /* need to pad the tail */
671 rqstp->rq_restailpage = 0;
672 rqstp->rq_res.tail[0].iov_base = p; 672 rqstp->rq_res.tail[0].iov_base = p;
673 *p = 0; 673 *p = 0;
674 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); 674 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -693,7 +693,6 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
693 rqstp->rq_res.page_len = resp->count; 693 rqstp->rq_res.page_len = resp->count;
694 if (resp->count & 3) { 694 if (resp->count & 3) {
695 /* need to pad the tail */ 695 /* need to pad the tail */
696 rqstp->rq_restailpage = 0;
697 rqstp->rq_res.tail[0].iov_base = p; 696 rqstp->rq_res.tail[0].iov_base = p;
698 *p = 0; 697 *p = 0;
699 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); 698 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
@@ -768,7 +767,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
768 rqstp->rq_res.page_len = (resp->count) << 2; 767 rqstp->rq_res.page_len = (resp->count) << 2;
769 768
770 /* add the 'tail' to the end of the 'head' page - page 0. */ 769 /* add the 'tail' to the end of the 'head' page - page 0. */
771 rqstp->rq_restailpage = 0;
772 rqstp->rq_res.tail[0].iov_base = p; 770 rqstp->rq_res.tail[0].iov_base = p;
773 *p++ = 0; /* no more entries */ 771 *p++ = 0; /* no more entries */
774 *p++ = htonl(resp->common.err == nfserr_eof); 772 *p++ = htonl(resp->common.err == nfserr_eof);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index edb107e61b91..5d94555cdc83 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -63,6 +63,8 @@
63#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ 63#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
64 | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) 64 | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
65 65
66#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP)
67
66#define MASK_EQUAL(mask1, mask2) \ 68#define MASK_EQUAL(mask1, mask2) \
67 ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) 69 ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
68 70
@@ -96,24 +98,26 @@ deny_mask(u32 allow_mask, unsigned int flags)
96/* XXX: modify functions to return NFS errors; they're only ever 98/* XXX: modify functions to return NFS errors; they're only ever
97 * used by nfs code, after all.... */ 99 * used by nfs code, after all.... */
98 100
99static int 101/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
100mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) 102 * side of being more restrictive, so the mode bit mapping below is
103 * pessimistic. An optimistic version would be needed to handle DENY's,
104 * but we espect to coalesce all ALLOWs and DENYs before mapping to mode
105 * bits. */
106
107static void
108low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
101{ 109{
102 u32 ignore = 0; 110 u32 write_mode = NFS4_WRITE_MODE;
103 111
104 if (!(flags & NFS4_ACL_DIR)) 112 if (flags & NFS4_ACL_DIR)
105 ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */ 113 write_mode |= NFS4_ACE_DELETE_CHILD;
106 perm |= ignore;
107 *mode = 0; 114 *mode = 0;
108 if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) 115 if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
109 *mode |= ACL_READ; 116 *mode |= ACL_READ;
110 if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) 117 if ((perm & write_mode) == write_mode)
111 *mode |= ACL_WRITE; 118 *mode |= ACL_WRITE;
112 if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) 119 if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
113 *mode |= ACL_EXECUTE; 120 *mode |= ACL_EXECUTE;
114 if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags)))
115 return -EINVAL;
116 return 0;
117} 121}
118 122
119struct ace_container { 123struct ace_container {
@@ -338,38 +342,6 @@ sort_pacl(struct posix_acl *pacl)
338 return; 342 return;
339} 343}
340 344
341static int
342write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
343 struct posix_acl_entry **pace, short tag, unsigned int flags)
344{
345 struct posix_acl_entry *this = *pace;
346
347 if (*pace == pacl->a_entries + pacl->a_count)
348 return -EINVAL; /* fell off the end */
349 (*pace)++;
350 this->e_tag = tag;
351 if (tag == ACL_USER_OBJ)
352 flags |= NFS4_ACL_OWNER;
353 if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags))
354 return -EINVAL;
355 this->e_id = (tag == ACL_USER || tag == ACL_GROUP ?
356 ace->who : ACL_UNDEFINED_ID);
357 return 0;
358}
359
360static struct nfs4_ace *
361get_next_v4_ace(struct list_head **p, struct list_head *head)
362{
363 struct nfs4_ace *ace;
364
365 *p = (*p)->next;
366 if (*p == head)
367 return NULL;
368 ace = list_entry(*p, struct nfs4_ace, l_ace);
369
370 return ace;
371}
372
373int 345int
374nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, 346nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
375 struct posix_acl **dpacl, unsigned int flags) 347 struct posix_acl **dpacl, unsigned int flags)
@@ -385,42 +357,23 @@ nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
385 goto out; 357 goto out;
386 358
387 error = nfs4_acl_split(acl, dacl); 359 error = nfs4_acl_split(acl, dacl);
388 if (error < 0) 360 if (error)
389 goto out_acl; 361 goto out_acl;
390 362
391 if (pacl != NULL) { 363 *pacl = _nfsv4_to_posix_one(acl, flags);
392 if (acl->naces == 0) { 364 if (IS_ERR(*pacl)) {
393 error = -ENODATA; 365 error = PTR_ERR(*pacl);
394 goto try_dpacl; 366 *pacl = NULL;
395 } 367 goto out_acl;
396
397 *pacl = _nfsv4_to_posix_one(acl, flags);
398 if (IS_ERR(*pacl)) {
399 error = PTR_ERR(*pacl);
400 *pacl = NULL;
401 goto out_acl;
402 }
403 } 368 }
404 369
405try_dpacl: 370 *dpacl = _nfsv4_to_posix_one(dacl, flags);
406 if (dpacl != NULL) { 371 if (IS_ERR(*dpacl)) {
407 if (dacl->naces == 0) { 372 error = PTR_ERR(*dpacl);
408 if (pacl == NULL || *pacl == NULL) 373 *dpacl = NULL;
409 error = -ENODATA;
410 goto out_acl;
411 }
412
413 error = 0;
414 *dpacl = _nfsv4_to_posix_one(dacl, flags);
415 if (IS_ERR(*dpacl)) {
416 error = PTR_ERR(*dpacl);
417 *dpacl = NULL;
418 goto out_acl;
419 }
420 } 374 }
421
422out_acl: 375out_acl:
423 if (error && pacl) { 376 if (error) {
424 posix_acl_release(*pacl); 377 posix_acl_release(*pacl);
425 *pacl = NULL; 378 *pacl = NULL;
426 } 379 }
@@ -429,349 +382,311 @@ out:
429 return error; 382 return error;
430} 383}
431 384
385/*
386 * While processing the NFSv4 ACE, this maintains bitmasks representing
387 * which permission bits have been allowed and which denied to a given
388 * entity: */
389struct posix_ace_state {
390 u32 allow;
391 u32 deny;
392};
393
394struct posix_user_ace_state {
395 uid_t uid;
396 struct posix_ace_state perms;
397};
398
399struct posix_ace_state_array {
400 int n;
401 struct posix_user_ace_state aces[];
402};
403
404/*
405 * While processing the NFSv4 ACE, this maintains the partial permissions
406 * calculated so far: */
407
408struct posix_acl_state {
409 struct posix_ace_state owner;
410 struct posix_ace_state group;
411 struct posix_ace_state other;
412 struct posix_ace_state everyone;
413 struct posix_ace_state mask; /* Deny unused in this case */
414 struct posix_ace_state_array *users;
415 struct posix_ace_state_array *groups;
416};
417
432static int 418static int
433same_who(struct nfs4_ace *a, struct nfs4_ace *b) 419init_state(struct posix_acl_state *state, int cnt)
434{ 420{
435 return a->whotype == b->whotype && 421 int alloc;
436 (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who); 422
423 memset(state, 0, sizeof(struct posix_acl_state));
424 /*
425 * In the worst case, each individual acl could be for a distinct
426 * named user or group, but we don't no which, so we allocate
427 * enough space for either:
428 */
429 alloc = sizeof(struct posix_ace_state_array)
430 + cnt*sizeof(struct posix_ace_state);
431 state->users = kzalloc(alloc, GFP_KERNEL);
432 if (!state->users)
433 return -ENOMEM;
434 state->groups = kzalloc(alloc, GFP_KERNEL);
435 if (!state->groups) {
436 kfree(state->users);
437 return -ENOMEM;
438 }
439 return 0;
437} 440}
438 441
439static int 442static void
440complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny, 443free_state(struct posix_acl_state *state) {
441 unsigned int flags) 444 kfree(state->users);
442{ 445 kfree(state->groups);
443 int ignore = 0;
444 if (!(flags & NFS4_ACL_DIR))
445 ignore |= NFS4_ACE_DELETE_CHILD;
446 return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags),
447 ignore|deny->access_mask) &&
448 allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
449 deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
450 allow->flag == deny->flag &&
451 same_who(allow, deny);
452} 446}
453 447
454static inline int 448static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate)
455user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
456 struct posix_acl *pacl, struct posix_acl_entry **pace,
457 unsigned int flags)
458{ 449{
459 int error = -EINVAL; 450 state->mask.allow |= astate->allow;
460 struct nfs4_ace *ace, *ace2;
461
462 ace = get_next_v4_ace(p, &n4acl->ace_head);
463 if (ace == NULL)
464 goto out;
465 if (ace2type(ace) != ACL_USER_OBJ)
466 goto out;
467 error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags);
468 if (error < 0)
469 goto out;
470 error = -EINVAL;
471 ace2 = get_next_v4_ace(p, &n4acl->ace_head);
472 if (ace2 == NULL)
473 goto out;
474 if (!complementary_ace_pair(ace, ace2, flags))
475 goto out;
476 error = 0;
477out:
478 return error;
479} 451}
480 452
481static inline int 453/*
482users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, 454 * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
483 struct nfs4_ace **mask_ace, 455 * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
484 struct posix_acl *pacl, struct posix_acl_entry **pace, 456 * to traditional read/write/execute permissions.
485 unsigned int flags) 457 *
486{ 458 * It's problematic to reject acls that use certain mode bits, because it
487 int error = -EINVAL; 459 * places the burden on users to learn the rules about which bits one
488 struct nfs4_ace *ace, *ace2; 460 * particular server sets, without giving the user a lot of help--we return an
461 * error that could mean any number of different things. To make matters
462 * worse, the problematic bits might be introduced by some application that's
463 * automatically mapping from some other acl model.
464 *
465 * So wherever possible we accept anything, possibly erring on the side of
466 * denying more permissions than necessary.
467 *
468 * However we do reject *explicit* DENY's of a few bits representing
469 * permissions we could never deny:
470 */
489 471
490 ace = get_next_v4_ace(p, &n4acl->ace_head); 472static inline int check_deny(u32 mask, int isowner)
491 if (ace == NULL) 473{
492 goto out; 474 if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
493 while (ace2type(ace) == ACL_USER) { 475 return -EINVAL;
494 if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) 476 if (!isowner)
495 goto out; 477 return 0;
496 if (*mask_ace && 478 if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
497 !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) 479 return -EINVAL;
498 goto out; 480 return 0;
499 *mask_ace = ace;
500 ace = get_next_v4_ace(p, &n4acl->ace_head);
501 if (ace == NULL)
502 goto out;
503 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
504 goto out;
505 error = write_pace(ace, pacl, pace, ACL_USER, flags);
506 if (error < 0)
507 goto out;
508 error = -EINVAL;
509 ace2 = get_next_v4_ace(p, &n4acl->ace_head);
510 if (ace2 == NULL)
511 goto out;
512 if (!complementary_ace_pair(ace, ace2, flags))
513 goto out;
514 if ((*mask_ace)->flag != ace2->flag ||
515 !same_who(*mask_ace, ace2))
516 goto out;
517 ace = get_next_v4_ace(p, &n4acl->ace_head);
518 if (ace == NULL)
519 goto out;
520 }
521 error = 0;
522out:
523 return error;
524} 481}
525 482
526static inline int 483static struct posix_acl *
527group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, 484posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
528 struct nfs4_ace **mask_ace,
529 struct posix_acl *pacl, struct posix_acl_entry **pace,
530 unsigned int flags)
531{ 485{
532 int error = -EINVAL; 486 struct posix_acl_entry *pace;
533 struct nfs4_ace *ace, *ace2; 487 struct posix_acl *pacl;
534 struct ace_container *ac; 488 int nace;
535 struct list_head group_l; 489 int i, error = 0;
536
537 INIT_LIST_HEAD(&group_l);
538 ace = list_entry(*p, struct nfs4_ace, l_ace);
539
540 /* group owner (mask and allow aces) */
541 490
542 if (pacl->a_count != 3) { 491 nace = 4 + state->users->n + state->groups->n;
543 /* then the group owner should be preceded by mask */ 492 pacl = posix_acl_alloc(nace, GFP_KERNEL);
544 if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) 493 if (!pacl)
545 goto out; 494 return ERR_PTR(-ENOMEM);
546 if (*mask_ace &&
547 !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
548 goto out;
549 *mask_ace = ace;
550 ace = get_next_v4_ace(p, &n4acl->ace_head);
551 if (ace == NULL)
552 goto out;
553 495
554 if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace)) 496 pace = pacl->a_entries;
555 goto out; 497 pace->e_tag = ACL_USER_OBJ;
498 error = check_deny(state->owner.deny, 1);
499 if (error)
500 goto out_err;
501 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
502 pace->e_id = ACL_UNDEFINED_ID;
503
504 for (i=0; i < state->users->n; i++) {
505 pace++;
506 pace->e_tag = ACL_USER;
507 error = check_deny(state->users->aces[i].perms.deny, 0);
508 if (error)
509 goto out_err;
510 low_mode_from_nfs4(state->users->aces[i].perms.allow,
511 &pace->e_perm, flags);
512 pace->e_id = state->users->aces[i].uid;
513 add_to_mask(state, &state->users->aces[i].perms);
556 } 514 }
557 515
558 if (ace2type(ace) != ACL_GROUP_OBJ) 516 pace++;
559 goto out; 517 pace->e_tag = ACL_GROUP_OBJ;
560 518 error = check_deny(state->group.deny, 0);
561 ac = kmalloc(sizeof(*ac), GFP_KERNEL); 519 if (error)
562 error = -ENOMEM; 520 goto out_err;
563 if (ac == NULL) 521 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
564 goto out; 522 pace->e_id = ACL_UNDEFINED_ID;
565 ac->ace = ace; 523 add_to_mask(state, &state->group);
566 list_add_tail(&ac->ace_l, &group_l); 524
567 525 for (i=0; i < state->groups->n; i++) {
568 error = -EINVAL; 526 pace++;
569 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) 527 pace->e_tag = ACL_GROUP;
570 goto out; 528 error = check_deny(state->groups->aces[i].perms.deny, 0);
571 529 if (error)
572 error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags); 530 goto out_err;
573 if (error < 0) 531 low_mode_from_nfs4(state->groups->aces[i].perms.allow,
574 goto out; 532 &pace->e_perm, flags);
575 533 pace->e_id = state->groups->aces[i].uid;
576 error = -EINVAL; 534 add_to_mask(state, &state->groups->aces[i].perms);
577 ace = get_next_v4_ace(p, &n4acl->ace_head); 535 }
578 if (ace == NULL)
579 goto out;
580
581 /* groups (mask and allow aces) */
582
583 while (ace2type(ace) == ACL_GROUP) {
584 if (*mask_ace == NULL)
585 goto out;
586
587 if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
588 !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
589 goto out;
590 *mask_ace = ace;
591 536
592 ace = get_next_v4_ace(p, &n4acl->ace_head); 537 pace++;
593 if (ace == NULL) 538 pace->e_tag = ACL_MASK;
594 goto out; 539 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
595 ac = kmalloc(sizeof(*ac), GFP_KERNEL); 540 pace->e_id = ACL_UNDEFINED_ID;
596 error = -ENOMEM;
597 if (ac == NULL)
598 goto out;
599 error = -EINVAL;
600 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
601 !same_who(ace, *mask_ace))
602 goto out;
603 541
604 ac->ace = ace; 542 pace++;
605 list_add_tail(&ac->ace_l, &group_l); 543 pace->e_tag = ACL_OTHER;
544 error = check_deny(state->other.deny, 0);
545 if (error)
546 goto out_err;
547 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
548 pace->e_id = ACL_UNDEFINED_ID;
606 549
607 error = write_pace(ace, pacl, pace, ACL_GROUP, flags); 550 return pacl;
608 if (error < 0) 551out_err:
609 goto out; 552 posix_acl_release(pacl);
610 error = -EINVAL; 553 return ERR_PTR(error);
611 ace = get_next_v4_ace(p, &n4acl->ace_head); 554}
612 if (ace == NULL)
613 goto out;
614 }
615 555
616 /* group owner (deny ace) */ 556static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
557{
558 /* Allow all bits in the mask not already denied: */
559 astate->allow |= mask & ~astate->deny;
560}
617 561
618 if (ace2type(ace) != ACL_GROUP_OBJ) 562static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
619 goto out; 563{
620 ac = list_entry(group_l.next, struct ace_container, ace_l); 564 /* Deny all bits in the mask not already allowed: */
621 ace2 = ac->ace; 565 astate->deny |= mask & ~astate->allow;
622 if (!complementary_ace_pair(ace2, ace, flags)) 566}
623 goto out;
624 list_del(group_l.next);
625 kfree(ac);
626 567
627 /* groups (deny aces) */ 568static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
569{
570 int i;
628 571
629 while (!list_empty(&group_l)) { 572 for (i = 0; i < a->n; i++)
630 ace = get_next_v4_ace(p, &n4acl->ace_head); 573 if (a->aces[i].uid == uid)
631 if (ace == NULL) 574 return i;
632 goto out; 575 /* Not found: */
633 if (ace2type(ace) != ACL_GROUP) 576 a->n++;
634 goto out; 577 a->aces[i].uid = uid;
635 ac = list_entry(group_l.next, struct ace_container, ace_l); 578 a->aces[i].perms.allow = state->everyone.allow;
636 ace2 = ac->ace; 579 a->aces[i].perms.deny = state->everyone.deny;
637 if (!complementary_ace_pair(ace2, ace, flags))
638 goto out;
639 list_del(group_l.next);
640 kfree(ac);
641 }
642 580
643 ace = get_next_v4_ace(p, &n4acl->ace_head); 581 return i;
644 if (ace == NULL)
645 goto out;
646 if (ace2type(ace) != ACL_OTHER)
647 goto out;
648 error = 0;
649out:
650 while (!list_empty(&group_l)) {
651 ac = list_entry(group_l.next, struct ace_container, ace_l);
652 list_del(group_l.next);
653 kfree(ac);
654 }
655 return error;
656} 582}
657 583
658static inline int 584static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
659mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
660 struct nfs4_ace **mask_ace,
661 struct posix_acl *pacl, struct posix_acl_entry **pace,
662 unsigned int flags)
663{ 585{
664 int error = -EINVAL; 586 int i;
665 struct nfs4_ace *ace;
666 587
667 ace = list_entry(*p, struct nfs4_ace, l_ace); 588 for (i=0; i < a->n; i++)
668 if (pacl->a_count != 3) { 589 deny_bits(&a->aces[i].perms, mask);
669 if (*mask_ace == NULL)
670 goto out;
671 (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags);
672 write_pace(*mask_ace, pacl, pace, ACL_MASK, flags);
673 }
674 error = 0;
675out:
676 return error;
677} 590}
678 591
679static inline int 592static void allow_bits_array(struct posix_ace_state_array *a, u32 mask)
680other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
681 struct posix_acl *pacl, struct posix_acl_entry **pace,
682 unsigned int flags)
683{ 593{
684 int error = -EINVAL; 594 int i;
685 struct nfs4_ace *ace, *ace2;
686 595
687 ace = list_entry(*p, struct nfs4_ace, l_ace); 596 for (i=0; i < a->n; i++)
688 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) 597 allow_bits(&a->aces[i].perms, mask);
689 goto out;
690 error = write_pace(ace, pacl, pace, ACL_OTHER, flags);
691 if (error < 0)
692 goto out;
693 error = -EINVAL;
694 ace2 = get_next_v4_ace(p, &n4acl->ace_head);
695 if (ace2 == NULL)
696 goto out;
697 if (!complementary_ace_pair(ace, ace2, flags))
698 goto out;
699 error = 0;
700out:
701 return error;
702} 598}
703 599
704static int 600static void process_one_v4_ace(struct posix_acl_state *state,
705calculate_posix_ace_count(struct nfs4_acl *n4acl) 601 struct nfs4_ace *ace)
706{ 602{
707 if (n4acl->naces == 6) /* owner, owner group, and other only */ 603 u32 mask = ace->access_mask;
708 return 3; 604 int i;
709 else { /* Otherwise there must be a mask entry. */ 605
710 /* Also, the remaining entries are for named users and 606 switch (ace2type(ace)) {
711 * groups, and come in threes (mask, allow, deny): */ 607 case ACL_USER_OBJ:
712 if (n4acl->naces < 7) 608 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
713 return -EINVAL; 609 allow_bits(&state->owner, mask);
714 if ((n4acl->naces - 7) % 3) 610 } else {
715 return -EINVAL; 611 deny_bits(&state->owner, mask);
716 return 4 + (n4acl->naces - 7)/3; 612 }
613 break;
614 case ACL_USER:
615 i = find_uid(state, state->users, ace->who);
616 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
617 allow_bits(&state->users->aces[i].perms, mask);
618 } else {
619 deny_bits(&state->users->aces[i].perms, mask);
620 mask = state->users->aces[i].perms.deny;
621 deny_bits(&state->owner, mask);
622 }
623 break;
624 case ACL_GROUP_OBJ:
625 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
626 allow_bits(&state->group, mask);
627 } else {
628 deny_bits(&state->group, mask);
629 mask = state->group.deny;
630 deny_bits(&state->owner, mask);
631 deny_bits(&state->everyone, mask);
632 deny_bits_array(state->users, mask);
633 deny_bits_array(state->groups, mask);
634 }
635 break;
636 case ACL_GROUP:
637 i = find_uid(state, state->groups, ace->who);
638 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
639 allow_bits(&state->groups->aces[i].perms, mask);
640 } else {
641 deny_bits(&state->groups->aces[i].perms, mask);
642 mask = state->groups->aces[i].perms.deny;
643 deny_bits(&state->owner, mask);
644 deny_bits(&state->group, mask);
645 deny_bits(&state->everyone, mask);
646 deny_bits_array(state->users, mask);
647 deny_bits_array(state->groups, mask);
648 }
649 break;
650 case ACL_OTHER:
651 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
652 allow_bits(&state->owner, mask);
653 allow_bits(&state->group, mask);
654 allow_bits(&state->other, mask);
655 allow_bits(&state->everyone, mask);
656 allow_bits_array(state->users, mask);
657 allow_bits_array(state->groups, mask);
658 } else {
659 deny_bits(&state->owner, mask);
660 deny_bits(&state->group, mask);
661 deny_bits(&state->other, mask);
662 deny_bits(&state->everyone, mask);
663 deny_bits_array(state->users, mask);
664 deny_bits_array(state->groups, mask);
665 }
717 } 666 }
718} 667}
719 668
720
721static struct posix_acl * 669static struct posix_acl *
722_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) 670_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
723{ 671{
672 struct posix_acl_state state;
724 struct posix_acl *pacl; 673 struct posix_acl *pacl;
725 int error = -EINVAL, nace = 0; 674 struct nfs4_ace *ace;
726 struct list_head *p; 675 int ret;
727 struct nfs4_ace *mask_ace = NULL;
728 struct posix_acl_entry *pace;
729
730 nace = calculate_posix_ace_count(n4acl);
731 if (nace < 0)
732 goto out_err;
733
734 pacl = posix_acl_alloc(nace, GFP_KERNEL);
735 error = -ENOMEM;
736 if (pacl == NULL)
737 goto out_err;
738
739 pace = &pacl->a_entries[0];
740 p = &n4acl->ace_head;
741
742 error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags);
743 if (error)
744 goto out_acl;
745
746 error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
747 if (error)
748 goto out_acl;
749 676
750 error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, 677 ret = init_state(&state, n4acl->naces);
751 flags); 678 if (ret)
752 if (error) 679 return ERR_PTR(ret);
753 goto out_acl;
754 680
755 error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); 681 list_for_each_entry(ace, &n4acl->ace_head, l_ace)
756 if (error) 682 process_one_v4_ace(&state, ace);
757 goto out_acl;
758 error = other_from_v4(n4acl, &p, pacl, &pace, flags);
759 if (error)
760 goto out_acl;
761 683
762 error = -EINVAL; 684 pacl = posix_state_to_acl(&state, flags);
763 if (p->next != &n4acl->ace_head)
764 goto out_acl;
765 if (pace != pacl->a_entries + pacl->a_count)
766 goto out_acl;
767 685
768 sort_pacl(pacl); 686 free_state(&state);
769 687
770 return pacl; 688 if (!IS_ERR(pacl))
771out_acl: 689 sort_pacl(pacl);
772 posix_acl_release(pacl);
773out_err:
774 pacl = ERR_PTR(error);
775 return pacl; 690 return pacl;
776} 691}
777 692
@@ -785,22 +700,41 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
785 list_for_each_safe(h, n, &acl->ace_head) { 700 list_for_each_safe(h, n, &acl->ace_head) {
786 ace = list_entry(h, struct nfs4_ace, l_ace); 701 ace = list_entry(h, struct nfs4_ace, l_ace);
787 702
788 if ((ace->flag & NFS4_INHERITANCE_FLAGS) 703 if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
789 != NFS4_INHERITANCE_FLAGS) 704 ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
790 continue; 705 return -EINVAL;
791 706
792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, 707 if (ace->flag & ~NFS4_SUPPORTED_FLAGS)
793 ace->access_mask, ace->whotype, ace->who); 708 return -EINVAL;
794 if (error < 0)
795 goto out;
796 709
797 list_del(h); 710 switch (ace->flag & NFS4_INHERITANCE_FLAGS) {
798 kfree(ace); 711 case 0:
799 acl->naces--; 712 /* Leave this ace in the effective acl: */
713 continue;
714 case NFS4_INHERITANCE_FLAGS:
715 /* Add this ace to the default acl and remove it
716 * from the effective acl: */
717 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
718 ace->access_mask, ace->whotype, ace->who);
719 if (error)
720 return error;
721 list_del(h);
722 kfree(ace);
723 acl->naces--;
724 break;
725 case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE:
726 /* Add this ace to the default, but leave it in
727 * the effective acl as well: */
728 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
729 ace->access_mask, ace->whotype, ace->who);
730 if (error)
731 return error;
732 break;
733 default:
734 return -EINVAL;
735 }
800 } 736 }
801 737 return 0;
802out:
803 return error;
804} 738}
805 739
806static short 740static short
@@ -930,23 +864,6 @@ nfs4_acl_write_who(int who, char *p)
930 return -1; 864 return -1;
931} 865}
932 866
933static inline int
934match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who)
935{
936 switch (ace->whotype) {
937 case NFS4_ACL_WHO_NAMED:
938 return who == ace->who;
939 case NFS4_ACL_WHO_OWNER:
940 return who == owner;
941 case NFS4_ACL_WHO_GROUP:
942 return who == group;
943 case NFS4_ACL_WHO_EVERYONE:
944 return 1;
945 default:
946 return 0;
947 }
948}
949
950EXPORT_SYMBOL(nfs4_acl_new); 867EXPORT_SYMBOL(nfs4_acl_new);
951EXPORT_SYMBOL(nfs4_acl_free); 868EXPORT_SYMBOL(nfs4_acl_free);
952EXPORT_SYMBOL(nfs4_acl_add_ace); 869EXPORT_SYMBOL(nfs4_acl_add_ace);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 15ded7a30a72..8333db12caca 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -646,7 +646,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
646 *p++ = nfssvc_boot.tv_usec; 646 *p++ = nfssvc_boot.tv_usec;
647 647
648 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, 648 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
649 write->wr_vec, write->wr_vlen, write->wr_buflen, 649 rqstp->rq_vec, write->wr_vlen, write->wr_buflen,
650 &write->wr_how_written); 650 &write->wr_how_written);
651 if (filp) 651 if (filp)
652 fput(filp); 652 fput(filp);
@@ -802,13 +802,29 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
802 * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH 802 * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
803 * require a valid current filehandle 803 * require a valid current filehandle
804 */ 804 */
805 if ((!current_fh->fh_dentry) && 805 if (!current_fh->fh_dentry) {
806 !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || 806 if (!((op->opnum == OP_PUTFH) ||
807 (op->opnum == OP_SETCLIENTID) || 807 (op->opnum == OP_PUTROOTFH) ||
808 (op->opnum == OP_SETCLIENTID_CONFIRM) || 808 (op->opnum == OP_SETCLIENTID) ||
809 (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || 809 (op->opnum == OP_SETCLIENTID_CONFIRM) ||
810 (op->opnum == OP_RELEASE_LOCKOWNER))) { 810 (op->opnum == OP_RENEW) ||
811 op->status = nfserr_nofilehandle; 811 (op->opnum == OP_RESTOREFH) ||
812 (op->opnum == OP_RELEASE_LOCKOWNER))) {
813 op->status = nfserr_nofilehandle;
814 goto encode_op;
815 }
816 }
817 /* Check must be done at start of each operation, except
818 * for GETATTR and ops not listed as returning NFS4ERR_MOVED
819 */
820 else if (current_fh->fh_export->ex_fslocs.migrated &&
821 !((op->opnum == OP_GETATTR) ||
822 (op->opnum == OP_PUTROOTFH) ||
823 (op->opnum == OP_PUTPUBFH) ||
824 (op->opnum == OP_RENEW) ||
825 (op->opnum == OP_SETCLIENTID) ||
826 (op->opnum == OP_RELEASE_LOCKOWNER))) {
827 op->status = nfserr_moved;
812 goto encode_op; 828 goto encode_op;
813 } 829 }
814 switch (op->opnum) { 830 switch (op->opnum) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5be00436b5b8..41fc241b729a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -60,6 +60,14 @@
60 60
61#define NFSDDBG_FACILITY NFSDDBG_XDR 61#define NFSDDBG_FACILITY NFSDDBG_XDR
62 62
63/*
64 * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing
65 * directory in order to indicate to the client that a filesystem boundary is present
66 * We use a fixed fsid for a referral
67 */
68#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL
69#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL
70
63static int 71static int
64check_filename(char *str, int len, int err) 72check_filename(char *str, int len, int err)
65{ 73{
@@ -926,26 +934,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
926 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); 934 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
927 goto xdr_error; 935 goto xdr_error;
928 } 936 }
929 write->wr_vec[0].iov_base = p; 937 argp->rqstp->rq_vec[0].iov_base = p;
930 write->wr_vec[0].iov_len = avail; 938 argp->rqstp->rq_vec[0].iov_len = avail;
931 v = 0; 939 v = 0;
932 len = write->wr_buflen; 940 len = write->wr_buflen;
933 while (len > write->wr_vec[v].iov_len) { 941 while (len > argp->rqstp->rq_vec[v].iov_len) {
934 len -= write->wr_vec[v].iov_len; 942 len -= argp->rqstp->rq_vec[v].iov_len;
935 v++; 943 v++;
936 write->wr_vec[v].iov_base = page_address(argp->pagelist[0]); 944 argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
937 argp->pagelist++; 945 argp->pagelist++;
938 if (argp->pagelen >= PAGE_SIZE) { 946 if (argp->pagelen >= PAGE_SIZE) {
939 write->wr_vec[v].iov_len = PAGE_SIZE; 947 argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
940 argp->pagelen -= PAGE_SIZE; 948 argp->pagelen -= PAGE_SIZE;
941 } else { 949 } else {
942 write->wr_vec[v].iov_len = argp->pagelen; 950 argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
943 argp->pagelen -= len; 951 argp->pagelen -= len;
944 } 952 }
945 } 953 }
946 argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len); 954 argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
947 argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); 955 argp->p = (u32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
948 write->wr_vec[v].iov_len = len; 956 argp->rqstp->rq_vec[v].iov_len = len;
949 write->wr_vlen = v+1; 957 write->wr_vlen = v+1;
950 958
951 DECODE_TAIL; 959 DECODE_TAIL;
@@ -1223,6 +1231,119 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1223 stateowner->so_replay.rp_buflen); \ 1231 stateowner->so_replay.rp_buflen); \
1224 } } while (0); 1232 } } while (0);
1225 1233
1234/* Encode as an array of strings the string given with components
1235 * seperated @sep.
1236 */
1237static int nfsd4_encode_components(char sep, char *components,
1238 u32 **pp, int *buflen)
1239{
1240 u32 *p = *pp;
1241 u32 *countp = p;
1242 int strlen, count=0;
1243 char *str, *end;
1244
1245 dprintk("nfsd4_encode_components(%s)\n", components);
1246 if ((*buflen -= 4) < 0)
1247 return nfserr_resource;
1248 WRITE32(0); /* We will fill this in with @count later */
1249 end = str = components;
1250 while (*end) {
1251 for (; *end && (*end != sep); end++)
1252 ; /* Point to end of component */
1253 strlen = end - str;
1254 if (strlen) {
1255 if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
1256 return nfserr_resource;
1257 WRITE32(strlen);
1258 WRITEMEM(str, strlen);
1259 count++;
1260 }
1261 else
1262 end++;
1263 str = end;
1264 }
1265 *pp = p;
1266 p = countp;
1267 WRITE32(count);
1268 return 0;
1269}
1270
1271/*
1272 * encode a location element of a fs_locations structure
1273 */
1274static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
1275 u32 **pp, int *buflen)
1276{
1277 int status;
1278 u32 *p = *pp;
1279
1280 status = nfsd4_encode_components(':', location->hosts, &p, buflen);
1281 if (status)
1282 return status;
1283 status = nfsd4_encode_components('/', location->path, &p, buflen);
1284 if (status)
1285 return status;
1286 *pp = p;
1287 return 0;
1288}
1289
1290/*
1291 * Return the path to an export point in the pseudo filesystem namespace
1292 * Returned string is safe to use as long as the caller holds a reference
1293 * to @exp.
1294 */
1295static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp)
1296{
1297 struct svc_fh tmp_fh;
1298 char *path, *rootpath;
1299 int stat;
1300
1301 fh_init(&tmp_fh, NFS4_FHSIZE);
1302 stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
1303 if (stat)
1304 return ERR_PTR(stat);
1305 rootpath = tmp_fh.fh_export->ex_path;
1306
1307 path = exp->ex_path;
1308
1309 if (strncmp(path, rootpath, strlen(rootpath))) {
1310 printk("nfsd: fs_locations failed;"
1311 "%s is not contained in %s\n", path, rootpath);
1312 return ERR_PTR(-EOPNOTSUPP);
1313 }
1314
1315 return path + strlen(rootpath);
1316}
1317
1318/*
1319 * encode a fs_locations structure
1320 */
1321static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
1322 struct svc_export *exp,
1323 u32 **pp, int *buflen)
1324{
1325 int status, i;
1326 u32 *p = *pp;
1327 struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
1328 char *root = nfsd4_path(rqstp, exp);
1329
1330 if (IS_ERR(root))
1331 return PTR_ERR(root);
1332 status = nfsd4_encode_components('/', root, &p, buflen);
1333 if (status)
1334 return status;
1335 if ((*buflen -= 4) < 0)
1336 return nfserr_resource;
1337 WRITE32(fslocs->locations_count);
1338 for (i=0; i<fslocs->locations_count; i++) {
1339 status = nfsd4_encode_fs_location4(&fslocs->locations[i],
1340 &p, buflen);
1341 if (status)
1342 return status;
1343 }
1344 *pp = p;
1345 return 0;
1346}
1226 1347
1227static u32 nfs4_ftypes[16] = { 1348static u32 nfs4_ftypes[16] = {
1228 NF4BAD, NF4FIFO, NF4CHR, NF4BAD, 1349 NF4BAD, NF4FIFO, NF4CHR, NF4BAD,
@@ -1272,6 +1393,25 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
1272 return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); 1393 return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
1273} 1394}
1274 1395
1396#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
1397 FATTR4_WORD0_RDATTR_ERROR)
1398#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
1399
1400static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
1401{
1402 /* As per referral draft: */
1403 if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
1404 *bmval1 & ~WORD1_ABSENT_FS_ATTRS) {
1405 if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR ||
1406 *bmval0 & FATTR4_WORD0_FS_LOCATIONS)
1407 *rdattr_err = NFSERR_MOVED;
1408 else
1409 return nfserr_moved;
1410 }
1411 *bmval0 &= WORD0_ABSENT_FS_ATTRS;
1412 *bmval1 &= WORD1_ABSENT_FS_ATTRS;
1413 return 0;
1414}
1275 1415
1276/* 1416/*
1277 * Note: @fhp can be NULL; in this case, we might have to compose the filehandle 1417 * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
@@ -1294,6 +1434,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1294 u32 *attrlenp; 1434 u32 *attrlenp;
1295 u32 dummy; 1435 u32 dummy;
1296 u64 dummy64; 1436 u64 dummy64;
1437 u32 rdattr_err = 0;
1297 u32 *p = buffer; 1438 u32 *p = buffer;
1298 int status; 1439 int status;
1299 int aclsupport = 0; 1440 int aclsupport = 0;
@@ -1303,6 +1444,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1303 BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); 1444 BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
1304 BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); 1445 BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
1305 1446
1447 if (exp->ex_fslocs.migrated) {
1448 status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
1449 if (status)
1450 goto out;
1451 }
1452
1306 status = vfs_getattr(exp->ex_mnt, dentry, &stat); 1453 status = vfs_getattr(exp->ex_mnt, dentry, &stat);
1307 if (status) 1454 if (status)
1308 goto out_nfserr; 1455 goto out_nfserr;
@@ -1334,6 +1481,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1334 goto out_nfserr; 1481 goto out_nfserr;
1335 } 1482 }
1336 } 1483 }
1484 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
1485 if (exp->ex_fslocs.locations == NULL) {
1486 bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1487 }
1488 }
1337 if ((buflen -= 16) < 0) 1489 if ((buflen -= 16) < 0)
1338 goto out_resource; 1490 goto out_resource;
1339 1491
@@ -1343,12 +1495,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1343 attrlenp = p++; /* to be backfilled later */ 1495 attrlenp = p++; /* to be backfilled later */
1344 1496
1345 if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { 1497 if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
1498 u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0;
1346 if ((buflen -= 12) < 0) 1499 if ((buflen -= 12) < 0)
1347 goto out_resource; 1500 goto out_resource;
1501 if (!aclsupport)
1502 word0 &= ~FATTR4_WORD0_ACL;
1503 if (!exp->ex_fslocs.locations)
1504 word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1348 WRITE32(2); 1505 WRITE32(2);
1349 WRITE32(aclsupport ? 1506 WRITE32(word0);
1350 NFSD_SUPPORTED_ATTRS_WORD0 :
1351 NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL);
1352 WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); 1507 WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
1353 } 1508 }
1354 if (bmval0 & FATTR4_WORD0_TYPE) { 1509 if (bmval0 & FATTR4_WORD0_TYPE) {
@@ -1402,7 +1557,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1402 if (bmval0 & FATTR4_WORD0_FSID) { 1557 if (bmval0 & FATTR4_WORD0_FSID) {
1403 if ((buflen -= 16) < 0) 1558 if ((buflen -= 16) < 0)
1404 goto out_resource; 1559 goto out_resource;
1405 if (is_fsid(fhp, rqstp->rq_reffh)) { 1560 if (exp->ex_fslocs.migrated) {
1561 WRITE64(NFS4_REFERRAL_FSID_MAJOR);
1562 WRITE64(NFS4_REFERRAL_FSID_MINOR);
1563 } else if (is_fsid(fhp, rqstp->rq_reffh)) {
1406 WRITE64((u64)exp->ex_fsid); 1564 WRITE64((u64)exp->ex_fsid);
1407 WRITE64((u64)0); 1565 WRITE64((u64)0);
1408 } else { 1566 } else {
@@ -1425,7 +1583,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1425 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { 1583 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
1426 if ((buflen -= 4) < 0) 1584 if ((buflen -= 4) < 0)
1427 goto out_resource; 1585 goto out_resource;
1428 WRITE32(0); 1586 WRITE32(rdattr_err);
1429 } 1587 }
1430 if (bmval0 & FATTR4_WORD0_ACL) { 1588 if (bmval0 & FATTR4_WORD0_ACL) {
1431 struct nfs4_ace *ace; 1589 struct nfs4_ace *ace;
@@ -1513,6 +1671,13 @@ out_acl:
1513 goto out_resource; 1671 goto out_resource;
1514 WRITE64((u64) statfs.f_files); 1672 WRITE64((u64) statfs.f_files);
1515 } 1673 }
1674 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
1675 status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
1676 if (status == nfserr_resource)
1677 goto out_resource;
1678 if (status)
1679 goto out;
1680 }
1516 if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { 1681 if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
1517 if ((buflen -= 4) < 0) 1682 if ((buflen -= 4) < 0)
1518 goto out_resource; 1683 goto out_resource;
@@ -1536,12 +1701,12 @@ out_acl:
1536 if (bmval0 & FATTR4_WORD0_MAXREAD) { 1701 if (bmval0 & FATTR4_WORD0_MAXREAD) {
1537 if ((buflen -= 8) < 0) 1702 if ((buflen -= 8) < 0)
1538 goto out_resource; 1703 goto out_resource;
1539 WRITE64((u64) NFSSVC_MAXBLKSIZE); 1704 WRITE64((u64) svc_max_payload(rqstp));
1540 } 1705 }
1541 if (bmval0 & FATTR4_WORD0_MAXWRITE) { 1706 if (bmval0 & FATTR4_WORD0_MAXWRITE) {
1542 if ((buflen -= 8) < 0) 1707 if ((buflen -= 8) < 0)
1543 goto out_resource; 1708 goto out_resource;
1544 WRITE64((u64) NFSSVC_MAXBLKSIZE); 1709 WRITE64((u64) svc_max_payload(rqstp));
1545 } 1710 }
1546 if (bmval1 & FATTR4_WORD1_MODE) { 1711 if (bmval1 & FATTR4_WORD1_MODE) {
1547 if ((buflen -= 4) < 0) 1712 if ((buflen -= 4) < 0)
@@ -1845,7 +2010,6 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge
1845 nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, 2010 nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
1846 resp->p, &buflen, getattr->ga_bmval, 2011 resp->p, &buflen, getattr->ga_bmval,
1847 resp->rqstp); 2012 resp->rqstp);
1848
1849 if (!nfserr) 2013 if (!nfserr)
1850 resp->p += buflen; 2014 resp->p += buflen;
1851 return nfserr; 2015 return nfserr;
@@ -2039,7 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n
2039} 2203}
2040 2204
2041static int 2205static int
2042nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read) 2206nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
2207 struct nfsd4_read *read)
2043{ 2208{
2044 u32 eof; 2209 u32 eof;
2045 int v, pn; 2210 int v, pn;
@@ -2054,31 +2219,33 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2054 2219
2055 RESERVE_SPACE(8); /* eof flag and byte count */ 2220 RESERVE_SPACE(8); /* eof flag and byte count */
2056 2221
2057 maxcount = NFSSVC_MAXBLKSIZE; 2222 maxcount = svc_max_payload(resp->rqstp);
2058 if (maxcount > read->rd_length) 2223 if (maxcount > read->rd_length)
2059 maxcount = read->rd_length; 2224 maxcount = read->rd_length;
2060 2225
2061 len = maxcount; 2226 len = maxcount;
2062 v = 0; 2227 v = 0;
2063 while (len > 0) { 2228 while (len > 0) {
2064 pn = resp->rqstp->rq_resused; 2229 pn = resp->rqstp->rq_resused++;
2065 svc_take_page(resp->rqstp); 2230 resp->rqstp->rq_vec[v].iov_base =
2066 read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); 2231 page_address(resp->rqstp->rq_respages[pn]);
2067 read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; 2232 resp->rqstp->rq_vec[v].iov_len =
2233 len < PAGE_SIZE ? len : PAGE_SIZE;
2068 v++; 2234 v++;
2069 len -= PAGE_SIZE; 2235 len -= PAGE_SIZE;
2070 } 2236 }
2071 read->rd_vlen = v; 2237 read->rd_vlen = v;
2072 2238
2073 nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, 2239 nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
2074 read->rd_offset, read->rd_iov, read->rd_vlen, 2240 read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
2075 &maxcount); 2241 &maxcount);
2076 2242
2077 if (nfserr == nfserr_symlink) 2243 if (nfserr == nfserr_symlink)
2078 nfserr = nfserr_inval; 2244 nfserr = nfserr_inval;
2079 if (nfserr) 2245 if (nfserr)
2080 return nfserr; 2246 return nfserr;
2081 eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); 2247 eof = (read->rd_offset + maxcount >=
2248 read->rd_fhp->fh_dentry->d_inode->i_size);
2082 2249
2083 WRITE32(eof); 2250 WRITE32(eof);
2084 WRITE32(maxcount); 2251 WRITE32(maxcount);
@@ -2088,7 +2255,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2088 resp->xbuf->page_len = maxcount; 2255 resp->xbuf->page_len = maxcount;
2089 2256
2090 /* Use rest of head for padding and remaining ops: */ 2257 /* Use rest of head for padding and remaining ops: */
2091 resp->rqstp->rq_restailpage = 0;
2092 resp->xbuf->tail[0].iov_base = p; 2258 resp->xbuf->tail[0].iov_base = p;
2093 resp->xbuf->tail[0].iov_len = 0; 2259 resp->xbuf->tail[0].iov_len = 0;
2094 if (maxcount&3) { 2260 if (maxcount&3) {
@@ -2113,8 +2279,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2113 if (resp->xbuf->page_len) 2279 if (resp->xbuf->page_len)
2114 return nfserr_resource; 2280 return nfserr_resource;
2115 2281
2116 svc_take_page(resp->rqstp); 2282 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
2117 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2118 2283
2119 maxcount = PAGE_SIZE; 2284 maxcount = PAGE_SIZE;
2120 RESERVE_SPACE(4); 2285 RESERVE_SPACE(4);
@@ -2138,7 +2303,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2138 resp->xbuf->page_len = maxcount; 2303 resp->xbuf->page_len = maxcount;
2139 2304
2140 /* Use rest of head for padding and remaining ops: */ 2305 /* Use rest of head for padding and remaining ops: */
2141 resp->rqstp->rq_restailpage = 0;
2142 resp->xbuf->tail[0].iov_base = p; 2306 resp->xbuf->tail[0].iov_base = p;
2143 resp->xbuf->tail[0].iov_len = 0; 2307 resp->xbuf->tail[0].iov_len = 0;
2144 if (maxcount&3) { 2308 if (maxcount&3) {
@@ -2189,8 +2353,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2189 goto err_no_verf; 2353 goto err_no_verf;
2190 } 2354 }
2191 2355
2192 svc_take_page(resp->rqstp); 2356 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
2193 page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2194 readdir->common.err = 0; 2357 readdir->common.err = 0;
2195 readdir->buflen = maxcount; 2358 readdir->buflen = maxcount;
2196 readdir->buffer = page; 2359 readdir->buffer = page;
@@ -2215,10 +2378,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2215 p = readdir->buffer; 2378 p = readdir->buffer;
2216 *p++ = 0; /* no more entries */ 2379 *p++ = 0; /* no more entries */
2217 *p++ = htonl(readdir->common.err == nfserr_eof); 2380 *p++ = htonl(readdir->common.err == nfserr_eof);
2218 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2381 resp->xbuf->page_len = ((char*)p) - (char*)page_address(
2382 resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2219 2383
2220 /* Use rest of head for padding and remaining ops: */ 2384 /* Use rest of head for padding and remaining ops: */
2221 resp->rqstp->rq_restailpage = 0;
2222 resp->xbuf->tail[0].iov_base = tailbase; 2385 resp->xbuf->tail[0].iov_base = tailbase;
2223 resp->xbuf->tail[0].iov_len = 0; 2386 resp->xbuf->tail[0].iov_len = 0;
2224 resp->p = resp->xbuf->tail[0].iov_base; 2387 resp->p = resp->xbuf->tail[0].iov_base;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c6a477c20ec..39aed901514b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -57,6 +57,7 @@ enum {
57 NFSD_Pool_Threads, 57 NFSD_Pool_Threads,
58 NFSD_Versions, 58 NFSD_Versions,
59 NFSD_Ports, 59 NFSD_Ports,
60 NFSD_MaxBlkSize,
60 /* 61 /*
61 * The below MUST come last. Otherwise we leave a hole in nfsd_files[] 62 * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
62 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops 63 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -82,6 +83,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size);
82static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); 83static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
83static ssize_t write_versions(struct file *file, char *buf, size_t size); 84static ssize_t write_versions(struct file *file, char *buf, size_t size);
84static ssize_t write_ports(struct file *file, char *buf, size_t size); 85static ssize_t write_ports(struct file *file, char *buf, size_t size);
86static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
85#ifdef CONFIG_NFSD_V4 87#ifdef CONFIG_NFSD_V4
86static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 88static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
87static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 89static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
@@ -100,6 +102,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
100 [NFSD_Pool_Threads] = write_pool_threads, 102 [NFSD_Pool_Threads] = write_pool_threads,
101 [NFSD_Versions] = write_versions, 103 [NFSD_Versions] = write_versions,
102 [NFSD_Ports] = write_ports, 104 [NFSD_Ports] = write_ports,
105 [NFSD_MaxBlkSize] = write_maxblksize,
103#ifdef CONFIG_NFSD_V4 106#ifdef CONFIG_NFSD_V4
104 [NFSD_Leasetime] = write_leasetime, 107 [NFSD_Leasetime] = write_leasetime,
105 [NFSD_RecoveryDir] = write_recoverydir, 108 [NFSD_RecoveryDir] = write_recoverydir,
@@ -523,18 +526,20 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
523 err = nfsd_create_serv(); 526 err = nfsd_create_serv();
524 if (!err) { 527 if (!err) {
525 int proto = 0; 528 int proto = 0;
526 err = lockd_up(proto); 529 err = svc_addsock(nfsd_serv, fd, buf, &proto);
527 if (!err) { 530 if (err >= 0) {
528 err = svc_addsock(nfsd_serv, fd, buf, &proto); 531 err = lockd_up(proto);
529 if (err) 532 if (err < 0)
530 lockd_down(); 533 svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf);
531 } 534 }
532 /* Decrease the count, but don't shutdown the 535 /* Decrease the count, but don't shutdown the
533 * the service 536 * the service
534 */ 537 */
538 lock_kernel();
535 nfsd_serv->sv_nrthreads--; 539 nfsd_serv->sv_nrthreads--;
540 unlock_kernel();
536 } 541 }
537 return err; 542 return err < 0 ? err : 0;
538 } 543 }
539 if (buf[0] == '-') { 544 if (buf[0] == '-') {
540 char *toclose = kstrdup(buf+1, GFP_KERNEL); 545 char *toclose = kstrdup(buf+1, GFP_KERNEL);
@@ -545,12 +550,43 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
545 if (nfsd_serv) 550 if (nfsd_serv)
546 len = svc_sock_names(buf, nfsd_serv, toclose); 551 len = svc_sock_names(buf, nfsd_serv, toclose);
547 unlock_kernel(); 552 unlock_kernel();
553 if (len >= 0)
554 lockd_down();
548 kfree(toclose); 555 kfree(toclose);
549 return len; 556 return len;
550 } 557 }
551 return -EINVAL; 558 return -EINVAL;
552} 559}
553 560
561int nfsd_max_blksize;
562
563static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
564{
565 char *mesg = buf;
566 if (size > 0) {
567 int bsize;
568 int rv = get_int(&mesg, &bsize);
569 if (rv)
570 return rv;
571 /* force bsize into allowed range and
572 * required alignment.
573 */
574 if (bsize < 1024)
575 bsize = 1024;
576 if (bsize > NFSSVC_MAXBLKSIZE)
577 bsize = NFSSVC_MAXBLKSIZE;
578 bsize &= ~(1024-1);
579 lock_kernel();
580 if (nfsd_serv && nfsd_serv->sv_nrthreads) {
581 unlock_kernel();
582 return -EBUSY;
583 }
584 nfsd_max_blksize = bsize;
585 unlock_kernel();
586 }
587 return sprintf(buf, "%d\n", nfsd_max_blksize);
588}
589
554#ifdef CONFIG_NFSD_V4 590#ifdef CONFIG_NFSD_V4
555extern time_t nfs4_leasetime(void); 591extern time_t nfs4_leasetime(void);
556 592
@@ -616,6 +652,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
616 [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, 652 [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
617 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 653 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
618 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 654 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
655 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
619#ifdef CONFIG_NFSD_V4 656#ifdef CONFIG_NFSD_V4
620 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 657 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
621 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, 658 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 06cd0db0f32b..9ee1dab5d44a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -146,20 +146,20 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
146 * status, 17 words for fattr, and 1 word for the byte count. 146 * status, 17 words for fattr, and 1 word for the byte count.
147 */ 147 */
148 148
149 if (NFSSVC_MAXBLKSIZE < argp->count) { 149 if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
150 printk(KERN_NOTICE 150 printk(KERN_NOTICE
151 "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", 151 "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
152 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), 152 NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
153 ntohs(rqstp->rq_addr.sin_port), 153 ntohs(rqstp->rq_addr.sin_port),
154 argp->count); 154 argp->count);
155 argp->count = NFSSVC_MAXBLKSIZE; 155 argp->count = NFSSVC_MAXBLKSIZE_V2;
156 } 156 }
157 svc_reserve(rqstp, (19<<2) + argp->count + 4); 157 svc_reserve(rqstp, (19<<2) + argp->count + 4);
158 158
159 resp->count = argp->count; 159 resp->count = argp->count;
160 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, 160 nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
161 argp->offset, 161 argp->offset,
162 argp->vec, argp->vlen, 162 rqstp->rq_vec, argp->vlen,
163 &resp->count); 163 &resp->count);
164 164
165 if (nfserr) return nfserr; 165 if (nfserr) return nfserr;
@@ -185,7 +185,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
185 185
186 nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, 186 nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
187 argp->offset, 187 argp->offset,
188 argp->vec, argp->vlen, 188 rqstp->rq_vec, argp->vlen,
189 argp->len, 189 argp->len,
190 &stable); 190 &stable);
191 return nfsd_return_attrs(nfserr, resp); 191 return nfsd_return_attrs(nfserr, resp);
@@ -225,7 +225,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
225 nfserr = nfserr_exist; 225 nfserr = nfserr_exist;
226 if (isdotent(argp->name, argp->len)) 226 if (isdotent(argp->name, argp->len))
227 goto done; 227 goto done;
228 fh_lock(dirfhp); 228 fh_lock_nested(dirfhp, I_MUTEX_PARENT);
229 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); 229 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
230 if (IS_ERR(dchild)) { 230 if (IS_ERR(dchild)) {
231 nfserr = nfserrno(PTR_ERR(dchild)); 231 nfserr = nfserrno(PTR_ERR(dchild));
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
553 PROC(none, void, void, none, RC_NOCACHE, ST), 553 PROC(none, void, void, none, RC_NOCACHE, ST),
554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), 554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), 555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4), 556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4),
557 PROC(none, void, void, none, RC_NOCACHE, ST), 557 PROC(none, void, void, none, RC_NOCACHE, ST),
558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 19443056ec30..6fa6340a5fb8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -198,9 +198,26 @@ int nfsd_create_serv(void)
198 unlock_kernel(); 198 unlock_kernel();
199 return 0; 199 return 0;
200 } 200 }
201 if (nfsd_max_blksize == 0) {
202 /* choose a suitable default */
203 struct sysinfo i;
204 si_meminfo(&i);
205 /* Aim for 1/4096 of memory per thread
206 * This gives 1MB on 4Gig machines
207 * But only uses 32K on 128M machines.
208 * Bottom out at 8K on 32M and smaller.
209 * Of course, this is only a default.
210 */
211 nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
212 i.totalram <<= PAGE_SHIFT - 12;
213 while (nfsd_max_blksize > i.totalram &&
214 nfsd_max_blksize >= 8*1024*2)
215 nfsd_max_blksize /= 2;
216 }
201 217
202 atomic_set(&nfsd_busy, 0); 218 atomic_set(&nfsd_busy, 0);
203 nfsd_serv = svc_create_pooled(&nfsd_program, NFSD_BUFSIZE, 219 nfsd_serv = svc_create_pooled(&nfsd_program,
220 NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize,
204 nfsd_last_thread, 221 nfsd_last_thread,
205 nfsd, SIG_NOCLEAN, THIS_MODULE); 222 nfsd, SIG_NOCLEAN, THIS_MODULE);
206 if (nfsd_serv == NULL) 223 if (nfsd_serv == NULL)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 3f14a17eaa6e..1135c0d14557 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -254,19 +254,18 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
254 len = args->count = ntohl(*p++); 254 len = args->count = ntohl(*p++);
255 p++; /* totalcount - unused */ 255 p++; /* totalcount - unused */
256 256
257 if (len > NFSSVC_MAXBLKSIZE) 257 if (len > NFSSVC_MAXBLKSIZE_V2)
258 len = NFSSVC_MAXBLKSIZE; 258 len = NFSSVC_MAXBLKSIZE_V2;
259 259
260 /* set up somewhere to store response. 260 /* set up somewhere to store response.
261 * We take pages, put them on reslist and include in iovec 261 * We take pages, put them on reslist and include in iovec
262 */ 262 */
263 v=0; 263 v=0;
264 while (len > 0) { 264 while (len > 0) {
265 pn=rqstp->rq_resused; 265 pn = rqstp->rq_resused++;
266 svc_take_page(rqstp); 266 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
267 args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); 267 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
268 args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; 268 len -= rqstp->rq_vec[v].iov_len;
269 len -= args->vec[v].iov_len;
270 v++; 269 v++;
271 } 270 }
272 args->vlen = v; 271 args->vlen = v;
@@ -286,21 +285,21 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
286 args->offset = ntohl(*p++); /* offset */ 285 args->offset = ntohl(*p++); /* offset */
287 p++; /* totalcount */ 286 p++; /* totalcount */
288 len = args->len = ntohl(*p++); 287 len = args->len = ntohl(*p++);
289 args->vec[0].iov_base = (void*)p; 288 rqstp->rq_vec[0].iov_base = (void*)p;
290 args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - 289 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
291 (((void*)p) - rqstp->rq_arg.head[0].iov_base); 290 (((void*)p) - rqstp->rq_arg.head[0].iov_base);
292 if (len > NFSSVC_MAXBLKSIZE) 291 if (len > NFSSVC_MAXBLKSIZE_V2)
293 len = NFSSVC_MAXBLKSIZE; 292 len = NFSSVC_MAXBLKSIZE_V2;
294 v = 0; 293 v = 0;
295 while (len > args->vec[v].iov_len) { 294 while (len > rqstp->rq_vec[v].iov_len) {
296 len -= args->vec[v].iov_len; 295 len -= rqstp->rq_vec[v].iov_len;
297 v++; 296 v++;
298 args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); 297 rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
299 args->vec[v].iov_len = PAGE_SIZE; 298 rqstp->rq_vec[v].iov_len = PAGE_SIZE;
300 } 299 }
301 args->vec[v].iov_len = len; 300 rqstp->rq_vec[v].iov_len = len;
302 args->vlen = v+1; 301 args->vlen = v+1;
303 return args->vec[0].iov_len > 0; 302 return rqstp->rq_vec[0].iov_len > 0;
304} 303}
305 304
306int 305int
@@ -333,8 +332,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka
333{ 332{
334 if (!(p = decode_fh(p, &args->fh))) 333 if (!(p = decode_fh(p, &args->fh)))
335 return 0; 334 return 0;
336 svc_take_page(rqstp); 335 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
337 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
338 336
339 return xdr_argsize_check(rqstp, p); 337 return xdr_argsize_check(rqstp, p);
340} 338}
@@ -375,8 +373,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
375 if (args->count > PAGE_SIZE) 373 if (args->count > PAGE_SIZE)
376 args->count = PAGE_SIZE; 374 args->count = PAGE_SIZE;
377 375
378 svc_take_page(rqstp); 376 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
379 args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
380 377
381 return xdr_argsize_check(rqstp, p); 378 return xdr_argsize_check(rqstp, p);
382} 379}
@@ -416,7 +413,6 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
416 rqstp->rq_res.page_len = resp->len; 413 rqstp->rq_res.page_len = resp->len;
417 if (resp->len & 3) { 414 if (resp->len & 3) {
418 /* need to pad the tail */ 415 /* need to pad the tail */
419 rqstp->rq_restailpage = 0;
420 rqstp->rq_res.tail[0].iov_base = p; 416 rqstp->rq_res.tail[0].iov_base = p;
421 *p = 0; 417 *p = 0;
422 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); 418 rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -436,7 +432,6 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
436 rqstp->rq_res.page_len = resp->count; 432 rqstp->rq_res.page_len = resp->count;
437 if (resp->count & 3) { 433 if (resp->count & 3) {
438 /* need to pad the tail */ 434 /* need to pad the tail */
439 rqstp->rq_restailpage = 0;
440 rqstp->rq_res.tail[0].iov_base = p; 435 rqstp->rq_res.tail[0].iov_base = p;
441 *p = 0; 436 *p = 0;
442 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); 437 rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
@@ -463,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
463{ 458{
464 struct kstatfs *stat = &resp->stats; 459 struct kstatfs *stat = &resp->stats;
465 460
466 *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */ 461 *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */
467 *p++ = htonl(stat->f_bsize); 462 *p++ = htonl(stat->f_bsize);
468 *p++ = htonl(stat->f_blocks); 463 *p++ = htonl(stat->f_blocks);
469 *p++ = htonl(stat->f_bfree); 464 *p++ = htonl(stat->f_bfree);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 443ebc52e382..1141bd29e4e3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -54,6 +54,7 @@
54#include <linux/nfsd_idmap.h> 54#include <linux/nfsd_idmap.h>
55#include <linux/security.h> 55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 56#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h>
57 58
58#include <asm/uaccess.h> 59#include <asm/uaccess.h>
59 60
@@ -81,10 +82,19 @@ struct raparms {
81 dev_t p_dev; 82 dev_t p_dev;
82 int p_set; 83 int p_set;
83 struct file_ra_state p_ra; 84 struct file_ra_state p_ra;
85 unsigned int p_hindex;
84}; 86};
85 87
88struct raparm_hbucket {
89 struct raparms *pb_head;
90 spinlock_t pb_lock;
91} ____cacheline_aligned_in_smp;
92
86static struct raparms * raparml; 93static struct raparms * raparml;
87static struct raparms * raparm_cache; 94#define RAPARM_HASH_BITS 4
95#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
96#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
97static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
88 98
89/* 99/*
90 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 100 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -437,13 +447,11 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
437 } else if (error < 0) 447 } else if (error < 0)
438 goto out_nfserr; 448 goto out_nfserr;
439 449
440 if (pacl) { 450 error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
441 error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); 451 if (error < 0)
442 if (error < 0) 452 goto out_nfserr;
443 goto out_nfserr;
444 }
445 453
446 if (dpacl) { 454 if (S_ISDIR(inode->i_mode)) {
447 error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); 455 error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
448 if (error < 0) 456 if (error < 0)
449 goto out_nfserr; 457 goto out_nfserr;
@@ -743,16 +751,20 @@ nfsd_sync_dir(struct dentry *dp)
743 * Obtain the readahead parameters for the file 751 * Obtain the readahead parameters for the file
744 * specified by (dev, ino). 752 * specified by (dev, ino).
745 */ 753 */
746static DEFINE_SPINLOCK(ra_lock);
747 754
748static inline struct raparms * 755static inline struct raparms *
749nfsd_get_raparms(dev_t dev, ino_t ino) 756nfsd_get_raparms(dev_t dev, ino_t ino)
750{ 757{
751 struct raparms *ra, **rap, **frap = NULL; 758 struct raparms *ra, **rap, **frap = NULL;
752 int depth = 0; 759 int depth = 0;
760 unsigned int hash;
761 struct raparm_hbucket *rab;
753 762
754 spin_lock(&ra_lock); 763 hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
755 for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { 764 rab = &raparm_hash[hash];
765
766 spin_lock(&rab->pb_lock);
767 for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
756 if (ra->p_ino == ino && ra->p_dev == dev) 768 if (ra->p_ino == ino && ra->p_dev == dev)
757 goto found; 769 goto found;
758 depth++; 770 depth++;
@@ -761,7 +773,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
761 } 773 }
762 depth = nfsdstats.ra_size*11/10; 774 depth = nfsdstats.ra_size*11/10;
763 if (!frap) { 775 if (!frap) {
764 spin_unlock(&ra_lock); 776 spin_unlock(&rab->pb_lock);
765 return NULL; 777 return NULL;
766 } 778 }
767 rap = frap; 779 rap = frap;
@@ -769,15 +781,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
769 ra->p_dev = dev; 781 ra->p_dev = dev;
770 ra->p_ino = ino; 782 ra->p_ino = ino;
771 ra->p_set = 0; 783 ra->p_set = 0;
784 ra->p_hindex = hash;
772found: 785found:
773 if (rap != &raparm_cache) { 786 if (rap != &rab->pb_head) {
774 *rap = ra->p_next; 787 *rap = ra->p_next;
775 ra->p_next = raparm_cache; 788 ra->p_next = rab->pb_head;
776 raparm_cache = ra; 789 rab->pb_head = ra;
777 } 790 }
778 ra->p_count++; 791 ra->p_count++;
779 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; 792 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
780 spin_unlock(&ra_lock); 793 spin_unlock(&rab->pb_lock);
781 return ra; 794 return ra;
782} 795}
783 796
@@ -791,22 +804,26 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
791{ 804{
792 unsigned long count = desc->count; 805 unsigned long count = desc->count;
793 struct svc_rqst *rqstp = desc->arg.data; 806 struct svc_rqst *rqstp = desc->arg.data;
807 struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
794 808
795 if (size > count) 809 if (size > count)
796 size = count; 810 size = count;
797 811
798 if (rqstp->rq_res.page_len == 0) { 812 if (rqstp->rq_res.page_len == 0) {
799 get_page(page); 813 get_page(page);
800 rqstp->rq_respages[rqstp->rq_resused++] = page; 814 put_page(*pp);
815 *pp = page;
816 rqstp->rq_resused++;
801 rqstp->rq_res.page_base = offset; 817 rqstp->rq_res.page_base = offset;
802 rqstp->rq_res.page_len = size; 818 rqstp->rq_res.page_len = size;
803 } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) { 819 } else if (page != pp[-1]) {
804 get_page(page); 820 get_page(page);
805 rqstp->rq_respages[rqstp->rq_resused++] = page; 821 put_page(*pp);
822 *pp = page;
823 rqstp->rq_resused++;
806 rqstp->rq_res.page_len += size; 824 rqstp->rq_res.page_len += size;
807 } else { 825 } else
808 rqstp->rq_res.page_len += size; 826 rqstp->rq_res.page_len += size;
809 }
810 827
811 desc->count = count - size; 828 desc->count = count - size;
812 desc->written += size; 829 desc->written += size;
@@ -837,7 +854,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
837 file->f_ra = ra->p_ra; 854 file->f_ra = ra->p_ra;
838 855
839 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { 856 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
840 svc_pushback_unused_pages(rqstp); 857 rqstp->rq_resused = 1;
841 err = file->f_op->sendfile(file, &offset, *count, 858 err = file->f_op->sendfile(file, &offset, *count,
842 nfsd_read_actor, rqstp); 859 nfsd_read_actor, rqstp);
843 } else { 860 } else {
@@ -849,11 +866,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
849 866
850 /* Write back readahead params */ 867 /* Write back readahead params */
851 if (ra) { 868 if (ra) {
852 spin_lock(&ra_lock); 869 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
870 spin_lock(&rab->pb_lock);
853 ra->p_ra = file->f_ra; 871 ra->p_ra = file->f_ra;
854 ra->p_set = 1; 872 ra->p_set = 1;
855 ra->p_count--; 873 ra->p_count--;
856 spin_unlock(&ra_lock); 874 spin_unlock(&rab->pb_lock);
857 } 875 }
858 876
859 if (err >= 0) { 877 if (err >= 0) {
@@ -1829,11 +1847,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
1829void 1847void
1830nfsd_racache_shutdown(void) 1848nfsd_racache_shutdown(void)
1831{ 1849{
1832 if (!raparm_cache) 1850 if (!raparml)
1833 return; 1851 return;
1834 dprintk("nfsd: freeing readahead buffers.\n"); 1852 dprintk("nfsd: freeing readahead buffers.\n");
1835 kfree(raparml); 1853 kfree(raparml);
1836 raparm_cache = raparml = NULL; 1854 raparml = NULL;
1837} 1855}
1838/* 1856/*
1839 * Initialize readahead param cache 1857 * Initialize readahead param cache
@@ -1842,19 +1860,31 @@ int
1842nfsd_racache_init(int cache_size) 1860nfsd_racache_init(int cache_size)
1843{ 1861{
1844 int i; 1862 int i;
1863 int j = 0;
1864 int nperbucket;
1865
1845 1866
1846 if (raparm_cache) 1867 if (raparml)
1847 return 0; 1868 return 0;
1869 if (cache_size < 2*RAPARM_HASH_SIZE)
1870 cache_size = 2*RAPARM_HASH_SIZE;
1848 raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); 1871 raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
1849 1872
1850 if (raparml != NULL) { 1873 if (raparml != NULL) {
1851 dprintk("nfsd: allocating %d readahead buffers.\n", 1874 dprintk("nfsd: allocating %d readahead buffers.\n",
1852 cache_size); 1875 cache_size);
1876 for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
1877 raparm_hash[i].pb_head = NULL;
1878 spin_lock_init(&raparm_hash[i].pb_lock);
1879 }
1880 nperbucket = cache_size >> RAPARM_HASH_BITS;
1853 memset(raparml, 0, sizeof(struct raparms) * cache_size); 1881 memset(raparml, 0, sizeof(struct raparms) * cache_size);
1854 for (i = 0; i < cache_size - 1; i++) { 1882 for (i = 0; i < cache_size - 1; i++) {
1855 raparml[i].p_next = raparml + i + 1; 1883 if (i % nperbucket == 0)
1884 raparm_hash[j++].pb_head = raparml + i;
1885 if (i % nperbucket < nperbucket-1)
1886 raparml[i].p_next = raparml + i + 1;
1856 } 1887 }
1857 raparm_cache = raparml;
1858 } else { 1888 } else {
1859 printk(KERN_WARNING 1889 printk(KERN_WARNING
1860 "nfsd: Could not allocate memory read-ahead cache.\n"); 1890 "nfsd: Could not allocate memory read-ahead cache.\n");
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 7e5a2f5ebeb0..9c69bcacad22 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1780,7 +1780,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1780 err = -EDQUOT; 1780 err = -EDQUOT;
1781 goto out_end_trans; 1781 goto out_end_trans;
1782 } 1782 }
1783 if (!dir || !dir->i_nlink) { 1783 if (!dir->i_nlink) {
1784 err = -EPERM; 1784 err = -EPERM;
1785 goto out_bad_inode; 1785 goto out_bad_inode;
1786 } 1786 }