diff options
author | Fan Yong <yong.fan@whamcloud.com> | 2012-03-18 22:44:40 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-03-18 22:44:40 -0400 |
commit | d1f5273e9adb40724a85272f248f210dc4ce919a (patch) | |
tree | 1ddb119dab8247ab7d7774394094c61161013f2a /fs/ext4/dir.c | |
parent | 6a8a13e03861c0ab83ab07d573ca793cff0e5d00 (diff) |
ext4: return 32/64-bit dir name hash according to usage type
Traditionally ext2/3/4 has returned a 32-bit hash value from llseek()
to appease NFSv2, which can only handle a 32-bit cookie for seekdir()
and telldir(). However, this causes problems if there are 32-bit hash
collisions, since the NFSv2 server can get stuck resending the same
entries from the directory repeatedly.
Allow ext4 to return a full 64-bit hash (both major and minor) for
telldir to decrease the chance of hash collisions. This still needs
integration on the NFS side.
Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de>
(blame me if something is not correct)
Signed-off-by: Fan Yong <yong.fan@whamcloud.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/dir.c')
-rw-r--r-- | fs/ext4/dir.c | 214 |
1 files changed, 169 insertions, 45 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 164c56092e58..689d1b1a3f45 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = { | |||
32 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 32 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int ext4_readdir(struct file *, void *, filldir_t); | ||
36 | static int ext4_dx_readdir(struct file *filp, | 35 | static int ext4_dx_readdir(struct file *filp, |
37 | void *dirent, filldir_t filldir); | 36 | void *dirent, filldir_t filldir); |
38 | static int ext4_release_dir(struct inode *inode, | ||
39 | struct file *filp); | ||
40 | |||
41 | const struct file_operations ext4_dir_operations = { | ||
42 | .llseek = ext4_llseek, | ||
43 | .read = generic_read_dir, | ||
44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ | ||
45 | .unlocked_ioctl = ext4_ioctl, | ||
46 | #ifdef CONFIG_COMPAT | ||
47 | .compat_ioctl = ext4_compat_ioctl, | ||
48 | #endif | ||
49 | .fsync = ext4_sync_file, | ||
50 | .release = ext4_release_dir, | ||
51 | }; | ||
52 | |||
53 | 37 | ||
54 | static unsigned char get_dtype(struct super_block *sb, int filetype) | 38 | static unsigned char get_dtype(struct super_block *sb, int filetype) |
55 | { | 39 | { |
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
60 | return (ext4_filetype_table[filetype]); | 44 | return (ext4_filetype_table[filetype]); |
61 | } | 45 | } |
62 | 46 | ||
47 | /** | ||
48 | * Check if the given dir-inode refers to an htree-indexed directory | ||
49 | * (or a directory which chould potentially get coverted to use htree | ||
50 | * indexing). | ||
51 | * | ||
52 | * Return 1 if it is a dx dir, 0 if not | ||
53 | */ | ||
54 | static int is_dx_dir(struct inode *inode) | ||
55 | { | ||
56 | struct super_block *sb = inode->i_sb; | ||
57 | |||
58 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | ||
59 | EXT4_FEATURE_COMPAT_DIR_INDEX) && | ||
60 | ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || | ||
61 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) | ||
62 | return 1; | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | |||
63 | /* | 67 | /* |
64 | * Return 0 if the directory entry is OK, and 1 if there is a problem | 68 | * Return 0 if the directory entry is OK, and 1 if there is a problem |
65 | * | 69 | * |
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp, | |||
115 | unsigned int offset; | 119 | unsigned int offset; |
116 | int i, stored; | 120 | int i, stored; |
117 | struct ext4_dir_entry_2 *de; | 121 | struct ext4_dir_entry_2 *de; |
118 | struct super_block *sb; | ||
119 | int err; | 122 | int err; |
120 | struct inode *inode = filp->f_path.dentry->d_inode; | 123 | struct inode *inode = filp->f_path.dentry->d_inode; |
124 | struct super_block *sb = inode->i_sb; | ||
121 | int ret = 0; | 125 | int ret = 0; |
122 | int dir_has_error = 0; | 126 | int dir_has_error = 0; |
123 | 127 | ||
124 | sb = inode->i_sb; | 128 | if (is_dx_dir(inode)) { |
125 | |||
126 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | ||
127 | EXT4_FEATURE_COMPAT_DIR_INDEX) && | ||
128 | ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || | ||
129 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | ||
130 | err = ext4_dx_readdir(filp, dirent, filldir); | 129 | err = ext4_dx_readdir(filp, dirent, filldir); |
131 | if (err != ERR_BAD_DX_DIR) { | 130 | if (err != ERR_BAD_DX_DIR) { |
132 | ret = err; | 131 | ret = err; |
@@ -254,22 +253,134 @@ out: | |||
254 | return ret; | 253 | return ret; |
255 | } | 254 | } |
256 | 255 | ||
256 | static inline int is_32bit_api(void) | ||
257 | { | ||
258 | #ifdef CONFIG_COMPAT | ||
259 | return is_compat_task(); | ||
260 | #else | ||
261 | return (BITS_PER_LONG == 32); | ||
262 | #endif | ||
263 | } | ||
264 | |||
257 | /* | 265 | /* |
258 | * These functions convert from the major/minor hash to an f_pos | 266 | * These functions convert from the major/minor hash to an f_pos |
259 | * value. | 267 | * value for dx directories |
268 | * | ||
269 | * Upper layer (for example NFS) should specify FMODE_32BITHASH or | ||
270 | * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted | ||
271 | * directly on both 32-bit and 64-bit nodes, under such case, neither | ||
272 | * FMODE_32BITHASH nor FMODE_64BITHASH is specified. | ||
273 | */ | ||
274 | static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) | ||
275 | { | ||
276 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
277 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
278 | return major >> 1; | ||
279 | else | ||
280 | return ((__u64)(major >> 1) << 32) | (__u64)minor; | ||
281 | } | ||
282 | |||
283 | static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) | ||
284 | { | ||
285 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
286 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
287 | return (pos << 1) & 0xffffffff; | ||
288 | else | ||
289 | return ((pos >> 32) << 1) & 0xffffffff; | ||
290 | } | ||
291 | |||
292 | static inline __u32 pos2min_hash(struct file *filp, loff_t pos) | ||
293 | { | ||
294 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
295 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
296 | return 0; | ||
297 | else | ||
298 | return pos & 0xffffffff; | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * Return 32- or 64-bit end-of-file for dx directories | ||
303 | */ | ||
304 | static inline loff_t ext4_get_htree_eof(struct file *filp) | ||
305 | { | ||
306 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
307 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
308 | return EXT4_HTREE_EOF_32BIT; | ||
309 | else | ||
310 | return EXT4_HTREE_EOF_64BIT; | ||
311 | } | ||
312 | |||
313 | |||
314 | /* | ||
315 | * ext4_dir_llseek() based on generic_file_llseek() to handle both | ||
316 | * non-htree and htree directories, where the "offset" is in terms | ||
317 | * of the filename hash value instead of the byte offset. | ||
260 | * | 318 | * |
261 | * Currently we only use major hash numer. This is unfortunate, but | 319 | * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX) |
262 | * on 32-bit machines, the same VFS interface is used for lseek and | 320 | * will be invalid once the directory was converted into a dx directory |
263 | * llseek, so if we use the 64 bit offset, then the 32-bit versions of | ||
264 | * lseek/telldir/seekdir will blow out spectacularly, and from within | ||
265 | * the ext2 low-level routine, we don't know if we're being called by | ||
266 | * a 64-bit version of the system call or the 32-bit version of the | ||
267 | * system call. Worse yet, NFSv2 only allows for a 32-bit readdir | ||
268 | * cookie. Sigh. | ||
269 | */ | 321 | */ |
270 | #define hash2pos(major, minor) (major >> 1) | 322 | loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin) |
271 | #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) | 323 | { |
272 | #define pos2min_hash(pos) (0) | 324 | struct inode *inode = file->f_mapping->host; |
325 | loff_t ret = -EINVAL; | ||
326 | int dx_dir = is_dx_dir(inode); | ||
327 | |||
328 | mutex_lock(&inode->i_mutex); | ||
329 | |||
330 | /* NOTE: relative offsets with dx directories might not work | ||
331 | * as expected, as it is difficult to figure out the | ||
332 | * correct offset between dx hashes */ | ||
333 | |||
334 | switch (origin) { | ||
335 | case SEEK_END: | ||
336 | if (unlikely(offset > 0)) | ||
337 | goto out_err; /* not supported for directories */ | ||
338 | |||
339 | /* so only negative offsets are left, does that have a | ||
340 | * meaning for directories at all? */ | ||
341 | if (dx_dir) | ||
342 | offset += ext4_get_htree_eof(file); | ||
343 | else | ||
344 | offset += inode->i_size; | ||
345 | break; | ||
346 | case SEEK_CUR: | ||
347 | /* | ||
348 | * Here we special-case the lseek(fd, 0, SEEK_CUR) | ||
349 | * position-querying operation. Avoid rewriting the "same" | ||
350 | * f_pos value back to the file because a concurrent read(), | ||
351 | * write() or lseek() might have altered it | ||
352 | */ | ||
353 | if (offset == 0) { | ||
354 | offset = file->f_pos; | ||
355 | goto out_ok; | ||
356 | } | ||
357 | |||
358 | offset += file->f_pos; | ||
359 | break; | ||
360 | } | ||
361 | |||
362 | if (unlikely(offset < 0)) | ||
363 | goto out_err; | ||
364 | |||
365 | if (!dx_dir) { | ||
366 | if (offset > inode->i_sb->s_maxbytes) | ||
367 | goto out_err; | ||
368 | } else if (offset > ext4_get_htree_eof(file)) | ||
369 | goto out_err; | ||
370 | |||
371 | /* Special lock needed here? */ | ||
372 | if (offset != file->f_pos) { | ||
373 | file->f_pos = offset; | ||
374 | file->f_version = 0; | ||
375 | } | ||
376 | |||
377 | out_ok: | ||
378 | ret = offset; | ||
379 | out_err: | ||
380 | mutex_unlock(&inode->i_mutex); | ||
381 | |||
382 | return ret; | ||
383 | } | ||
273 | 384 | ||
274 | /* | 385 | /* |
275 | * This structure holds the nodes of the red-black tree used to store | 386 | * This structure holds the nodes of the red-black tree used to store |
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
330 | } | 441 | } |
331 | 442 | ||
332 | 443 | ||
333 | static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) | 444 | static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp, |
445 | loff_t pos) | ||
334 | { | 446 | { |
335 | struct dir_private_info *p; | 447 | struct dir_private_info *p; |
336 | 448 | ||
337 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); | 449 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); |
338 | if (!p) | 450 | if (!p) |
339 | return NULL; | 451 | return NULL; |
340 | p->curr_hash = pos2maj_hash(pos); | 452 | p->curr_hash = pos2maj_hash(filp, pos); |
341 | p->curr_minor_hash = pos2min_hash(pos); | 453 | p->curr_minor_hash = pos2min_hash(filp, pos); |
342 | return p; | 454 | return p; |
343 | } | 455 | } |
344 | 456 | ||
@@ -429,7 +541,7 @@ static int call_filldir(struct file *filp, void *dirent, | |||
429 | "null fname?!?\n"); | 541 | "null fname?!?\n"); |
430 | return 0; | 542 | return 0; |
431 | } | 543 | } |
432 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 544 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); |
433 | while (fname) { | 545 | while (fname) { |
434 | error = filldir(dirent, fname->name, | 546 | error = filldir(dirent, fname->name, |
435 | fname->name_len, curr_pos, | 547 | fname->name_len, curr_pos, |
@@ -454,13 +566,13 @@ static int ext4_dx_readdir(struct file *filp, | |||
454 | int ret; | 566 | int ret; |
455 | 567 | ||
456 | if (!info) { | 568 | if (!info) { |
457 | info = ext4_htree_create_dir_info(filp->f_pos); | 569 | info = ext4_htree_create_dir_info(filp, filp->f_pos); |
458 | if (!info) | 570 | if (!info) |
459 | return -ENOMEM; | 571 | return -ENOMEM; |
460 | filp->private_data = info; | 572 | filp->private_data = info; |
461 | } | 573 | } |
462 | 574 | ||
463 | if (filp->f_pos == EXT4_HTREE_EOF) | 575 | if (filp->f_pos == ext4_get_htree_eof(filp)) |
464 | return 0; /* EOF */ | 576 | return 0; /* EOF */ |
465 | 577 | ||
466 | /* Some one has messed with f_pos; reset the world */ | 578 | /* Some one has messed with f_pos; reset the world */ |
@@ -468,8 +580,8 @@ static int ext4_dx_readdir(struct file *filp, | |||
468 | free_rb_tree_fname(&info->root); | 580 | free_rb_tree_fname(&info->root); |
469 | info->curr_node = NULL; | 581 | info->curr_node = NULL; |
470 | info->extra_fname = NULL; | 582 | info->extra_fname = NULL; |
471 | info->curr_hash = pos2maj_hash(filp->f_pos); | 583 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); |
472 | info->curr_minor_hash = pos2min_hash(filp->f_pos); | 584 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); |
473 | } | 585 | } |
474 | 586 | ||
475 | /* | 587 | /* |
@@ -501,7 +613,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
501 | if (ret < 0) | 613 | if (ret < 0) |
502 | return ret; | 614 | return ret; |
503 | if (ret == 0) { | 615 | if (ret == 0) { |
504 | filp->f_pos = EXT4_HTREE_EOF; | 616 | filp->f_pos = ext4_get_htree_eof(filp); |
505 | break; | 617 | break; |
506 | } | 618 | } |
507 | info->curr_node = rb_first(&info->root); | 619 | info->curr_node = rb_first(&info->root); |
@@ -521,7 +633,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
521 | info->curr_minor_hash = fname->minor_hash; | 633 | info->curr_minor_hash = fname->minor_hash; |
522 | } else { | 634 | } else { |
523 | if (info->next_hash == ~0) { | 635 | if (info->next_hash == ~0) { |
524 | filp->f_pos = EXT4_HTREE_EOF; | 636 | filp->f_pos = ext4_get_htree_eof(filp); |
525 | break; | 637 | break; |
526 | } | 638 | } |
527 | info->curr_hash = info->next_hash; | 639 | info->curr_hash = info->next_hash; |
@@ -540,3 +652,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp) | |||
540 | 652 | ||
541 | return 0; | 653 | return 0; |
542 | } | 654 | } |
655 | |||
656 | const struct file_operations ext4_dir_operations = { | ||
657 | .llseek = ext4_dir_llseek, | ||
658 | .read = generic_read_dir, | ||
659 | .readdir = ext4_readdir, | ||
660 | .unlocked_ioctl = ext4_ioctl, | ||
661 | #ifdef CONFIG_COMPAT | ||
662 | .compat_ioctl = ext4_compat_ioctl, | ||
663 | #endif | ||
664 | .fsync = ext4_sync_file, | ||
665 | .release = ext4_release_dir, | ||
666 | }; | ||