diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-29 17:53:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-29 17:53:25 -0400 |
commit | 71db34fc4330f7c784397acb9f1e6ee7f7b32eb2 (patch) | |
tree | 77dd08f6f778a799dcd0c48eb72d0742349df235 /fs/ext4 | |
parent | 50483c3268918ee51a56d1baa39b9149d2d0d521 (diff) | |
parent | 797a9d797f8483bb67f265c761b76dcd5a077a23 (diff) |
Merge branch 'for-3.4' of git://linux-nfs.org/~bfields/linux
Pull nfsd changes from Bruce Fields:
Highlights:
- Benny Halevy and Tigran Mkrtchyan implemented some more 4.1 features,
moving us closer to a complete 4.1 implementation.
- Bernd Schubert fixed a long-standing problem with readdir cookies on
ext2/3/4.
- Jeff Layton performed a long-overdue overhaul of the server reboot
recovery code which will allow us to deprecate the current code (a
rather unusual user of the vfs), and give us some needed flexibility
for further improvements.
- Like the client, we now support numeric uid's and gid's in the
auth_sys case, allowing easier upgrades from NFSv2/v3 to v4.x.
Plus miscellaneous bugfixes and cleanup.
Thanks to everyone!
There are also some delegation fixes waiting on vfs review that I
suppose will have to wait for 3.5. With that done I think we'll finally
turn off the "EXPERIMENTAL" dependency for v4 (though that's mostly
symbolic as it's been on by default in distro's for a while).
And the list of 4.1 todo's should be achievable for 3.5 as well:
http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues
though we may still want a bit more experience with it before turning it
on by default.
* 'for-3.4' of git://linux-nfs.org/~bfields/linux: (55 commits)
nfsd: only register cld pipe notifier when CONFIG_NFSD_V4 is enabled
nfsd4: use auth_unix unconditionally on backchannel
nfsd: fix NULL pointer dereference in cld_pipe_downcall
nfsd4: memory corruption in numeric_name_to_id()
sunrpc: skip portmap calls on sessions backchannel
nfsd4: allow numeric idmapping
nfsd: don't allow legacy client tracker init for anything but init_net
nfsd: add notifier to handle mount/unmount of rpc_pipefs sb
nfsd: add the infrastructure to handle the cld upcall
nfsd: add a header describing upcall to nfsdcld
nfsd: add a per-net-namespace struct for nfsd
sunrpc: create nfsd dir in rpc_pipefs
nfsd: add nfsd4_client_tracking_ops struct and a way to set it
nfsd: convert nfs4_client->cl_cb_flags to a generic flags field
NFSD: Fix nfs4_verifier memory alignment
NFSD: Fix warnings when NFSD_DEBUG is not defined
nfsd: vfs_llseek() with 32 or 64 bit offsets (hashes)
nfsd: rename 'int access' to 'int may_flags' in nfsd_open()
ext4: return 32/64-bit dir name hash according to usage type
fs: add new FMODE flags: FMODE_32bithash and FMODE_64bithash
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/dir.c | 214 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 6 | ||||
-rw-r--r-- | fs/ext4/hash.c | 4 |
3 files changed, 176 insertions, 48 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ad56866d729a..b86786202643 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = { | |||
32 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 32 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int ext4_readdir(struct file *, void *, filldir_t); | ||
36 | static int ext4_dx_readdir(struct file *filp, | 35 | static int ext4_dx_readdir(struct file *filp, |
37 | void *dirent, filldir_t filldir); | 36 | void *dirent, filldir_t filldir); |
38 | static int ext4_release_dir(struct inode *inode, | ||
39 | struct file *filp); | ||
40 | |||
41 | const struct file_operations ext4_dir_operations = { | ||
42 | .llseek = ext4_llseek, | ||
43 | .read = generic_read_dir, | ||
44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ | ||
45 | .unlocked_ioctl = ext4_ioctl, | ||
46 | #ifdef CONFIG_COMPAT | ||
47 | .compat_ioctl = ext4_compat_ioctl, | ||
48 | #endif | ||
49 | .fsync = ext4_sync_file, | ||
50 | .release = ext4_release_dir, | ||
51 | }; | ||
52 | |||
53 | 37 | ||
54 | static unsigned char get_dtype(struct super_block *sb, int filetype) | 38 | static unsigned char get_dtype(struct super_block *sb, int filetype) |
55 | { | 39 | { |
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
60 | return (ext4_filetype_table[filetype]); | 44 | return (ext4_filetype_table[filetype]); |
61 | } | 45 | } |
62 | 46 | ||
47 | /** | ||
48 | * Check if the given dir-inode refers to an htree-indexed directory | ||
49 | * (or a directory which chould potentially get coverted to use htree | ||
50 | * indexing). | ||
51 | * | ||
52 | * Return 1 if it is a dx dir, 0 if not | ||
53 | */ | ||
54 | static int is_dx_dir(struct inode *inode) | ||
55 | { | ||
56 | struct super_block *sb = inode->i_sb; | ||
57 | |||
58 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | ||
59 | EXT4_FEATURE_COMPAT_DIR_INDEX) && | ||
60 | ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || | ||
61 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) | ||
62 | return 1; | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | |||
63 | /* | 67 | /* |
64 | * Return 0 if the directory entry is OK, and 1 if there is a problem | 68 | * Return 0 if the directory entry is OK, and 1 if there is a problem |
65 | * | 69 | * |
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp, | |||
115 | unsigned int offset; | 119 | unsigned int offset; |
116 | int i, stored; | 120 | int i, stored; |
117 | struct ext4_dir_entry_2 *de; | 121 | struct ext4_dir_entry_2 *de; |
118 | struct super_block *sb; | ||
119 | int err; | 122 | int err; |
120 | struct inode *inode = filp->f_path.dentry->d_inode; | 123 | struct inode *inode = filp->f_path.dentry->d_inode; |
124 | struct super_block *sb = inode->i_sb; | ||
121 | int ret = 0; | 125 | int ret = 0; |
122 | int dir_has_error = 0; | 126 | int dir_has_error = 0; |
123 | 127 | ||
124 | sb = inode->i_sb; | 128 | if (is_dx_dir(inode)) { |
125 | |||
126 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | ||
127 | EXT4_FEATURE_COMPAT_DIR_INDEX) && | ||
128 | ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || | ||
129 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | ||
130 | err = ext4_dx_readdir(filp, dirent, filldir); | 129 | err = ext4_dx_readdir(filp, dirent, filldir); |
131 | if (err != ERR_BAD_DX_DIR) { | 130 | if (err != ERR_BAD_DX_DIR) { |
132 | ret = err; | 131 | ret = err; |
@@ -254,22 +253,134 @@ out: | |||
254 | return ret; | 253 | return ret; |
255 | } | 254 | } |
256 | 255 | ||
256 | static inline int is_32bit_api(void) | ||
257 | { | ||
258 | #ifdef CONFIG_COMPAT | ||
259 | return is_compat_task(); | ||
260 | #else | ||
261 | return (BITS_PER_LONG == 32); | ||
262 | #endif | ||
263 | } | ||
264 | |||
257 | /* | 265 | /* |
258 | * These functions convert from the major/minor hash to an f_pos | 266 | * These functions convert from the major/minor hash to an f_pos |
259 | * value. | 267 | * value for dx directories |
268 | * | ||
269 | * Upper layer (for example NFS) should specify FMODE_32BITHASH or | ||
270 | * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted | ||
271 | * directly on both 32-bit and 64-bit nodes, under such case, neither | ||
272 | * FMODE_32BITHASH nor FMODE_64BITHASH is specified. | ||
273 | */ | ||
274 | static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) | ||
275 | { | ||
276 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
277 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
278 | return major >> 1; | ||
279 | else | ||
280 | return ((__u64)(major >> 1) << 32) | (__u64)minor; | ||
281 | } | ||
282 | |||
283 | static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) | ||
284 | { | ||
285 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
286 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
287 | return (pos << 1) & 0xffffffff; | ||
288 | else | ||
289 | return ((pos >> 32) << 1) & 0xffffffff; | ||
290 | } | ||
291 | |||
292 | static inline __u32 pos2min_hash(struct file *filp, loff_t pos) | ||
293 | { | ||
294 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
295 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
296 | return 0; | ||
297 | else | ||
298 | return pos & 0xffffffff; | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * Return 32- or 64-bit end-of-file for dx directories | ||
303 | */ | ||
304 | static inline loff_t ext4_get_htree_eof(struct file *filp) | ||
305 | { | ||
306 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
307 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
308 | return EXT4_HTREE_EOF_32BIT; | ||
309 | else | ||
310 | return EXT4_HTREE_EOF_64BIT; | ||
311 | } | ||
312 | |||
313 | |||
314 | /* | ||
315 | * ext4_dir_llseek() based on generic_file_llseek() to handle both | ||
316 | * non-htree and htree directories, where the "offset" is in terms | ||
317 | * of the filename hash value instead of the byte offset. | ||
260 | * | 318 | * |
261 | * Currently we only use major hash numer. This is unfortunate, but | 319 | * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX) |
262 | * on 32-bit machines, the same VFS interface is used for lseek and | 320 | * will be invalid once the directory was converted into a dx directory |
263 | * llseek, so if we use the 64 bit offset, then the 32-bit versions of | ||
264 | * lseek/telldir/seekdir will blow out spectacularly, and from within | ||
265 | * the ext2 low-level routine, we don't know if we're being called by | ||
266 | * a 64-bit version of the system call or the 32-bit version of the | ||
267 | * system call. Worse yet, NFSv2 only allows for a 32-bit readdir | ||
268 | * cookie. Sigh. | ||
269 | */ | 321 | */ |
270 | #define hash2pos(major, minor) (major >> 1) | 322 | loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin) |
271 | #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) | 323 | { |
272 | #define pos2min_hash(pos) (0) | 324 | struct inode *inode = file->f_mapping->host; |
325 | loff_t ret = -EINVAL; | ||
326 | int dx_dir = is_dx_dir(inode); | ||
327 | |||
328 | mutex_lock(&inode->i_mutex); | ||
329 | |||
330 | /* NOTE: relative offsets with dx directories might not work | ||
331 | * as expected, as it is difficult to figure out the | ||
332 | * correct offset between dx hashes */ | ||
333 | |||
334 | switch (origin) { | ||
335 | case SEEK_END: | ||
336 | if (unlikely(offset > 0)) | ||
337 | goto out_err; /* not supported for directories */ | ||
338 | |||
339 | /* so only negative offsets are left, does that have a | ||
340 | * meaning for directories at all? */ | ||
341 | if (dx_dir) | ||
342 | offset += ext4_get_htree_eof(file); | ||
343 | else | ||
344 | offset += inode->i_size; | ||
345 | break; | ||
346 | case SEEK_CUR: | ||
347 | /* | ||
348 | * Here we special-case the lseek(fd, 0, SEEK_CUR) | ||
349 | * position-querying operation. Avoid rewriting the "same" | ||
350 | * f_pos value back to the file because a concurrent read(), | ||
351 | * write() or lseek() might have altered it | ||
352 | */ | ||
353 | if (offset == 0) { | ||
354 | offset = file->f_pos; | ||
355 | goto out_ok; | ||
356 | } | ||
357 | |||
358 | offset += file->f_pos; | ||
359 | break; | ||
360 | } | ||
361 | |||
362 | if (unlikely(offset < 0)) | ||
363 | goto out_err; | ||
364 | |||
365 | if (!dx_dir) { | ||
366 | if (offset > inode->i_sb->s_maxbytes) | ||
367 | goto out_err; | ||
368 | } else if (offset > ext4_get_htree_eof(file)) | ||
369 | goto out_err; | ||
370 | |||
371 | /* Special lock needed here? */ | ||
372 | if (offset != file->f_pos) { | ||
373 | file->f_pos = offset; | ||
374 | file->f_version = 0; | ||
375 | } | ||
376 | |||
377 | out_ok: | ||
378 | ret = offset; | ||
379 | out_err: | ||
380 | mutex_unlock(&inode->i_mutex); | ||
381 | |||
382 | return ret; | ||
383 | } | ||
273 | 384 | ||
274 | /* | 385 | /* |
275 | * This structure holds the nodes of the red-black tree used to store | 386 | * This structure holds the nodes of the red-black tree used to store |
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
330 | } | 441 | } |
331 | 442 | ||
332 | 443 | ||
333 | static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) | 444 | static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp, |
445 | loff_t pos) | ||
334 | { | 446 | { |
335 | struct dir_private_info *p; | 447 | struct dir_private_info *p; |
336 | 448 | ||
337 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); | 449 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); |
338 | if (!p) | 450 | if (!p) |
339 | return NULL; | 451 | return NULL; |
340 | p->curr_hash = pos2maj_hash(pos); | 452 | p->curr_hash = pos2maj_hash(filp, pos); |
341 | p->curr_minor_hash = pos2min_hash(pos); | 453 | p->curr_minor_hash = pos2min_hash(filp, pos); |
342 | return p; | 454 | return p; |
343 | } | 455 | } |
344 | 456 | ||
@@ -430,7 +542,7 @@ static int call_filldir(struct file *filp, void *dirent, | |||
430 | inode->i_ino, current->comm); | 542 | inode->i_ino, current->comm); |
431 | return 0; | 543 | return 0; |
432 | } | 544 | } |
433 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 545 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); |
434 | while (fname) { | 546 | while (fname) { |
435 | error = filldir(dirent, fname->name, | 547 | error = filldir(dirent, fname->name, |
436 | fname->name_len, curr_pos, | 548 | fname->name_len, curr_pos, |
@@ -455,13 +567,13 @@ static int ext4_dx_readdir(struct file *filp, | |||
455 | int ret; | 567 | int ret; |
456 | 568 | ||
457 | if (!info) { | 569 | if (!info) { |
458 | info = ext4_htree_create_dir_info(filp->f_pos); | 570 | info = ext4_htree_create_dir_info(filp, filp->f_pos); |
459 | if (!info) | 571 | if (!info) |
460 | return -ENOMEM; | 572 | return -ENOMEM; |
461 | filp->private_data = info; | 573 | filp->private_data = info; |
462 | } | 574 | } |
463 | 575 | ||
464 | if (filp->f_pos == EXT4_HTREE_EOF) | 576 | if (filp->f_pos == ext4_get_htree_eof(filp)) |
465 | return 0; /* EOF */ | 577 | return 0; /* EOF */ |
466 | 578 | ||
467 | /* Some one has messed with f_pos; reset the world */ | 579 | /* Some one has messed with f_pos; reset the world */ |
@@ -469,8 +581,8 @@ static int ext4_dx_readdir(struct file *filp, | |||
469 | free_rb_tree_fname(&info->root); | 581 | free_rb_tree_fname(&info->root); |
470 | info->curr_node = NULL; | 582 | info->curr_node = NULL; |
471 | info->extra_fname = NULL; | 583 | info->extra_fname = NULL; |
472 | info->curr_hash = pos2maj_hash(filp->f_pos); | 584 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); |
473 | info->curr_minor_hash = pos2min_hash(filp->f_pos); | 585 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); |
474 | } | 586 | } |
475 | 587 | ||
476 | /* | 588 | /* |
@@ -502,7 +614,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
502 | if (ret < 0) | 614 | if (ret < 0) |
503 | return ret; | 615 | return ret; |
504 | if (ret == 0) { | 616 | if (ret == 0) { |
505 | filp->f_pos = EXT4_HTREE_EOF; | 617 | filp->f_pos = ext4_get_htree_eof(filp); |
506 | break; | 618 | break; |
507 | } | 619 | } |
508 | info->curr_node = rb_first(&info->root); | 620 | info->curr_node = rb_first(&info->root); |
@@ -522,7 +634,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
522 | info->curr_minor_hash = fname->minor_hash; | 634 | info->curr_minor_hash = fname->minor_hash; |
523 | } else { | 635 | } else { |
524 | if (info->next_hash == ~0) { | 636 | if (info->next_hash == ~0) { |
525 | filp->f_pos = EXT4_HTREE_EOF; | 637 | filp->f_pos = ext4_get_htree_eof(filp); |
526 | break; | 638 | break; |
527 | } | 639 | } |
528 | info->curr_hash = info->next_hash; | 640 | info->curr_hash = info->next_hash; |
@@ -541,3 +653,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp) | |||
541 | 653 | ||
542 | return 0; | 654 | return 0; |
543 | } | 655 | } |
656 | |||
657 | const struct file_operations ext4_dir_operations = { | ||
658 | .llseek = ext4_dir_llseek, | ||
659 | .read = generic_read_dir, | ||
660 | .readdir = ext4_readdir, | ||
661 | .unlocked_ioctl = ext4_ioctl, | ||
662 | #ifdef CONFIG_COMPAT | ||
663 | .compat_ioctl = ext4_compat_ioctl, | ||
664 | #endif | ||
665 | .fsync = ext4_sync_file, | ||
666 | .release = ext4_release_dir, | ||
667 | }; | ||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ded731ac8a32..ab2594a30f86 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1623,7 +1623,11 @@ struct dx_hash_info | |||
1623 | u32 *seed; | 1623 | u32 *seed; |
1624 | }; | 1624 | }; |
1625 | 1625 | ||
1626 | #define EXT4_HTREE_EOF 0x7fffffff | 1626 | |
1627 | /* 32 and 64 bit signed EOF for dx directories */ | ||
1628 | #define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) | ||
1629 | #define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) | ||
1630 | |||
1627 | 1631 | ||
1628 | /* | 1632 | /* |
1629 | * Control parameters used by ext4_htree_next_block | 1633 | * Control parameters used by ext4_htree_next_block |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index ac8f168c8ab4..fa8e4911d354 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
@@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
200 | return -1; | 200 | return -1; |
201 | } | 201 | } |
202 | hash = hash & ~1; | 202 | hash = hash & ~1; |
203 | if (hash == (EXT4_HTREE_EOF << 1)) | 203 | if (hash == (EXT4_HTREE_EOF_32BIT << 1)) |
204 | hash = (EXT4_HTREE_EOF-1) << 1; | 204 | hash = (EXT4_HTREE_EOF_32BIT - 1) << 1; |
205 | hinfo->hash = hash; | 205 | hinfo->hash = hash; |
206 | hinfo->minor_hash = minor_hash; | 206 | hinfo->minor_hash = minor_hash; |
207 | return 0; | 207 | return 0; |