diff options
| author | Eric Sandeen <sandeen@redhat.com> | 2012-04-26 14:10:39 -0400 |
|---|---|---|
| committer | Jan Kara <jack@suse.cz> | 2012-05-15 17:34:39 -0400 |
| commit | d7dab39b6e16d5eea78ed3c705d2a2d0772b4f06 (patch) | |
| tree | a2b201b2afd8cd1d278947f7e080ce7457e88814 | |
| parent | a80b12c3d08dbbf15e6a551e481c32a2df4911f3 (diff) | |
ext3: return 32/64-bit dir name hash according to usage type
This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a
ext4: return 32/64-bit dir name hash according to usage type
by Fan Yong <yong.fan@whamcloud.com>
Traditionally ext2/3/4 has returned a 32-bit hash value from llseek()
to appease NFSv2, which can only handle a 32-bit cookie for seekdir()
and telldir(). However, this causes problems if there are 32-bit hash
collisions, since the NFSv2 server can get stuck resending the same
entries from the directory repeatedly.
Allow ext3 to return a full 64-bit hash (both major and minor) for
telldir to decrease the chance of hash collisions.
This patch does implement a new ext3_dir_llseek op, because with 64-bit
hashes, nfs will attempt to seek to a hash "offset" which is much
larger than ext3's s_maxbytes. So for dx dirs, we call
generic_file_llseek_size() with the appropriate max hash value as the
maximum seekable size. Otherwise we just pass through to
generic_file_llseek().
Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de>
Patch-updated-by: Eric Sandeen <sandeen@redhat.com>
(blame us if something is not correct)
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
| -rw-r--r-- | fs/ext3/dir.c | 167 | ||||
| -rw-r--r-- | fs/ext3/ext3.h | 6 | ||||
| -rw-r--r-- | fs/ext3/hash.c | 4 |
3 files changed, 129 insertions, 48 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index cc761ad8fa5..92490e9f85c 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
| @@ -21,30 +21,15 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include <linux/compat.h> | ||
| 24 | #include "ext3.h" | 25 | #include "ext3.h" |
| 25 | 26 | ||
| 26 | static unsigned char ext3_filetype_table[] = { | 27 | static unsigned char ext3_filetype_table[] = { |
| 27 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 28 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
| 28 | }; | 29 | }; |
| 29 | 30 | ||
| 30 | static int ext3_readdir(struct file *, void *, filldir_t); | ||
| 31 | static int ext3_dx_readdir(struct file * filp, | 31 | static int ext3_dx_readdir(struct file * filp, |
| 32 | void * dirent, filldir_t filldir); | 32 | void * dirent, filldir_t filldir); |
| 33 | static int ext3_release_dir (struct inode * inode, | ||
| 34 | struct file * filp); | ||
| 35 | |||
| 36 | const struct file_operations ext3_dir_operations = { | ||
| 37 | .llseek = generic_file_llseek, | ||
| 38 | .read = generic_read_dir, | ||
| 39 | .readdir = ext3_readdir, /* we take BKL. needed?*/ | ||
| 40 | .unlocked_ioctl = ext3_ioctl, | ||
| 41 | #ifdef CONFIG_COMPAT | ||
| 42 | .compat_ioctl = ext3_compat_ioctl, | ||
| 43 | #endif | ||
| 44 | .fsync = ext3_sync_file, /* BKL held */ | ||
| 45 | .release = ext3_release_dir, | ||
| 46 | }; | ||
| 47 | |||
| 48 | 33 | ||
| 49 | static unsigned char get_dtype(struct super_block *sb, int filetype) | 34 | static unsigned char get_dtype(struct super_block *sb, int filetype) |
| 50 | { | 35 | { |
| @@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
| 55 | return (ext3_filetype_table[filetype]); | 40 | return (ext3_filetype_table[filetype]); |
| 56 | } | 41 | } |
| 57 | 42 | ||
| 43 | /** | ||
| 44 | * Check if the given dir-inode refers to an htree-indexed directory | ||
| 45 | * (or a directory which chould potentially get coverted to use htree | ||
| 46 | * indexing). | ||
| 47 | * | ||
| 48 | * Return 1 if it is a dx dir, 0 if not | ||
| 49 | */ | ||
| 50 | static int is_dx_dir(struct inode *inode) | ||
| 51 | { | ||
| 52 | struct super_block *sb = inode->i_sb; | ||
| 53 | |||
| 54 | if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, | ||
| 55 | EXT3_FEATURE_COMPAT_DIR_INDEX) && | ||
| 56 | ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || | ||
| 57 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) | ||
| 58 | return 1; | ||
| 59 | |||
| 60 | return 0; | ||
| 61 | } | ||
| 58 | 62 | ||
| 59 | int ext3_check_dir_entry (const char * function, struct inode * dir, | 63 | int ext3_check_dir_entry (const char * function, struct inode * dir, |
| 60 | struct ext3_dir_entry_2 * de, | 64 | struct ext3_dir_entry_2 * de, |
| @@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, | |||
| 94 | unsigned long offset; | 98 | unsigned long offset; |
| 95 | int i, stored; | 99 | int i, stored; |
| 96 | struct ext3_dir_entry_2 *de; | 100 | struct ext3_dir_entry_2 *de; |
| 97 | struct super_block *sb; | ||
| 98 | int err; | 101 | int err; |
| 99 | struct inode *inode = filp->f_path.dentry->d_inode; | 102 | struct inode *inode = filp->f_path.dentry->d_inode; |
| 103 | struct super_block *sb = inode->i_sb; | ||
| 100 | int ret = 0; | 104 | int ret = 0; |
| 101 | int dir_has_error = 0; | 105 | int dir_has_error = 0; |
| 102 | 106 | ||
| 103 | sb = inode->i_sb; | 107 | if (is_dx_dir(inode)) { |
| 104 | |||
| 105 | if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, | ||
| 106 | EXT3_FEATURE_COMPAT_DIR_INDEX) && | ||
| 107 | ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || | ||
| 108 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | ||
| 109 | err = ext3_dx_readdir(filp, dirent, filldir); | 108 | err = ext3_dx_readdir(filp, dirent, filldir); |
| 110 | if (err != ERR_BAD_DX_DIR) { | 109 | if (err != ERR_BAD_DX_DIR) { |
| 111 | ret = err; | 110 | ret = err; |
| @@ -227,22 +226,87 @@ out: | |||
| 227 | return ret; | 226 | return ret; |
| 228 | } | 227 | } |
| 229 | 228 | ||
| 229 | static inline int is_32bit_api(void) | ||
| 230 | { | ||
| 231 | #ifdef CONFIG_COMPAT | ||
| 232 | return is_compat_task(); | ||
| 233 | #else | ||
| 234 | return (BITS_PER_LONG == 32); | ||
| 235 | #endif | ||
| 236 | } | ||
| 237 | |||
| 230 | /* | 238 | /* |
| 231 | * These functions convert from the major/minor hash to an f_pos | 239 | * These functions convert from the major/minor hash to an f_pos |
| 232 | * value. | 240 | * value for dx directories |
| 233 | * | 241 | * |
| 234 | * Currently we only use major hash numer. This is unfortunate, but | 242 | * Upper layer (for example NFS) should specify FMODE_32BITHASH or |
| 235 | * on 32-bit machines, the same VFS interface is used for lseek and | 243 | * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted |
| 236 | * llseek, so if we use the 64 bit offset, then the 32-bit versions of | 244 | * directly on both 32-bit and 64-bit nodes, under such case, neither |
| 237 | * lseek/telldir/seekdir will blow out spectacularly, and from within | 245 | * FMODE_32BITHASH nor FMODE_64BITHASH is specified. |
| 238 | * the ext2 low-level routine, we don't know if we're being called by | ||
| 239 | * a 64-bit version of the system call or the 32-bit version of the | ||
| 240 | * system call. Worse yet, NFSv2 only allows for a 32-bit readdir | ||
| 241 | * cookie. Sigh. | ||
| 242 | */ | 246 | */ |
| 243 | #define hash2pos(major, minor) (major >> 1) | 247 | static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) |
| 244 | #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) | 248 | { |
| 245 | #define pos2min_hash(pos) (0) | 249 | if ((filp->f_mode & FMODE_32BITHASH) || |
| 250 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
| 251 | return major >> 1; | ||
| 252 | else | ||
| 253 | return ((__u64)(major >> 1) << 32) | (__u64)minor; | ||
| 254 | } | ||
| 255 | |||
| 256 | static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) | ||
| 257 | { | ||
| 258 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
| 259 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
| 260 | return (pos << 1) & 0xffffffff; | ||
| 261 | else | ||
| 262 | return ((pos >> 32) << 1) & 0xffffffff; | ||
| 263 | } | ||
| 264 | |||
| 265 | static inline __u32 pos2min_hash(struct file *filp, loff_t pos) | ||
| 266 | { | ||
| 267 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
| 268 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
| 269 | return 0; | ||
| 270 | else | ||
| 271 | return pos & 0xffffffff; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * Return 32- or 64-bit end-of-file for dx directories | ||
| 276 | */ | ||
| 277 | static inline loff_t ext3_get_htree_eof(struct file *filp) | ||
| 278 | { | ||
| 279 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
| 280 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
| 281 | return EXT3_HTREE_EOF_32BIT; | ||
| 282 | else | ||
| 283 | return EXT3_HTREE_EOF_64BIT; | ||
| 284 | } | ||
| 285 | |||
| 286 | |||
| 287 | /* | ||
| 288 | * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both | ||
| 289 | * non-htree and htree directories, where the "offset" is in terms | ||
| 290 | * of the filename hash value instead of the byte offset. | ||
| 291 | * | ||
| 292 | * Because we may return a 64-bit hash that is well beyond s_maxbytes, | ||
| 293 | * we need to pass the max hash as the maximum allowable offset in | ||
| 294 | * the htree directory case. | ||
| 295 | * | ||
| 296 | * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) | ||
| 297 | * will be invalid once the directory was converted into a dx directory | ||
| 298 | */ | ||
| 299 | loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) | ||
| 300 | { | ||
| 301 | struct inode *inode = file->f_mapping->host; | ||
| 302 | int dx_dir = is_dx_dir(inode); | ||
| 303 | |||
| 304 | if (likely(dx_dir)) | ||
| 305 | return generic_file_llseek_size(file, offset, origin, | ||
| 306 | ext3_get_htree_eof(file)); | ||
| 307 | else | ||
| 308 | return generic_file_llseek(file, offset, origin); | ||
| 309 | } | ||
| 246 | 310 | ||
| 247 | /* | 311 | /* |
| 248 | * This structure holds the nodes of the red-black tree used to store | 312 | * This structure holds the nodes of the red-black tree used to store |
| @@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
| 303 | } | 367 | } |
| 304 | 368 | ||
| 305 | 369 | ||
| 306 | static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) | 370 | static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, |
| 371 | loff_t pos) | ||
| 307 | { | 372 | { |
| 308 | struct dir_private_info *p; | 373 | struct dir_private_info *p; |
| 309 | 374 | ||
| 310 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); | 375 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); |
| 311 | if (!p) | 376 | if (!p) |
| 312 | return NULL; | 377 | return NULL; |
| 313 | p->curr_hash = pos2maj_hash(pos); | 378 | p->curr_hash = pos2maj_hash(filp, pos); |
| 314 | p->curr_minor_hash = pos2min_hash(pos); | 379 | p->curr_minor_hash = pos2min_hash(filp, pos); |
| 315 | return p; | 380 | return p; |
| 316 | } | 381 | } |
| 317 | 382 | ||
| @@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
| 401 | printk("call_filldir: called with null fname?!?\n"); | 466 | printk("call_filldir: called with null fname?!?\n"); |
| 402 | return 0; | 467 | return 0; |
| 403 | } | 468 | } |
| 404 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 469 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); |
| 405 | while (fname) { | 470 | while (fname) { |
| 406 | error = filldir(dirent, fname->name, | 471 | error = filldir(dirent, fname->name, |
| 407 | fname->name_len, curr_pos, | 472 | fname->name_len, curr_pos, |
| @@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 426 | int ret; | 491 | int ret; |
| 427 | 492 | ||
| 428 | if (!info) { | 493 | if (!info) { |
| 429 | info = ext3_htree_create_dir_info(filp->f_pos); | 494 | info = ext3_htree_create_dir_info(filp, filp->f_pos); |
| 430 | if (!info) | 495 | if (!info) |
| 431 | return -ENOMEM; | 496 | return -ENOMEM; |
| 432 | filp->private_data = info; | 497 | filp->private_data = info; |
| 433 | } | 498 | } |
| 434 | 499 | ||
| 435 | if (filp->f_pos == EXT3_HTREE_EOF) | 500 | if (filp->f_pos == ext3_get_htree_eof(filp)) |
| 436 | return 0; /* EOF */ | 501 | return 0; /* EOF */ |
| 437 | 502 | ||
| 438 | /* Some one has messed with f_pos; reset the world */ | 503 | /* Some one has messed with f_pos; reset the world */ |
| @@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 440 | free_rb_tree_fname(&info->root); | 505 | free_rb_tree_fname(&info->root); |
| 441 | info->curr_node = NULL; | 506 | info->curr_node = NULL; |
| 442 | info->extra_fname = NULL; | 507 | info->extra_fname = NULL; |
| 443 | info->curr_hash = pos2maj_hash(filp->f_pos); | 508 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); |
| 444 | info->curr_minor_hash = pos2min_hash(filp->f_pos); | 509 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); |
| 445 | } | 510 | } |
| 446 | 511 | ||
| 447 | /* | 512 | /* |
| @@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 473 | if (ret < 0) | 538 | if (ret < 0) |
| 474 | return ret; | 539 | return ret; |
| 475 | if (ret == 0) { | 540 | if (ret == 0) { |
| 476 | filp->f_pos = EXT3_HTREE_EOF; | 541 | filp->f_pos = ext3_get_htree_eof(filp); |
| 477 | break; | 542 | break; |
| 478 | } | 543 | } |
| 479 | info->curr_node = rb_first(&info->root); | 544 | info->curr_node = rb_first(&info->root); |
| @@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 493 | info->curr_minor_hash = fname->minor_hash; | 558 | info->curr_minor_hash = fname->minor_hash; |
| 494 | } else { | 559 | } else { |
| 495 | if (info->next_hash == ~0) { | 560 | if (info->next_hash == ~0) { |
| 496 | filp->f_pos = EXT3_HTREE_EOF; | 561 | filp->f_pos = ext3_get_htree_eof(filp); |
| 497 | break; | 562 | break; |
| 498 | } | 563 | } |
| 499 | info->curr_hash = info->next_hash; | 564 | info->curr_hash = info->next_hash; |
| @@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) | |||
| 512 | 577 | ||
| 513 | return 0; | 578 | return 0; |
| 514 | } | 579 | } |
| 580 | |||
| 581 | const struct file_operations ext3_dir_operations = { | ||
| 582 | .llseek = ext3_dir_llseek, | ||
| 583 | .read = generic_read_dir, | ||
| 584 | .readdir = ext3_readdir, | ||
| 585 | .unlocked_ioctl = ext3_ioctl, | ||
| 586 | #ifdef CONFIG_COMPAT | ||
| 587 | .compat_ioctl = ext3_compat_ioctl, | ||
| 588 | #endif | ||
| 589 | .fsync = ext3_sync_file, | ||
| 590 | .release = ext3_release_dir, | ||
| 591 | }; | ||
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index b6515fd7e56..fe5bef7914e 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h | |||
| @@ -920,7 +920,11 @@ struct dx_hash_info | |||
| 920 | u32 *seed; | 920 | u32 *seed; |
| 921 | }; | 921 | }; |
| 922 | 922 | ||
| 923 | #define EXT3_HTREE_EOF 0x7fffffff | 923 | |
| 924 | /* 32 and 64 bit signed EOF for dx directories */ | ||
| 925 | #define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) | ||
| 926 | #define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) | ||
| 927 | |||
| 924 | 928 | ||
| 925 | /* | 929 | /* |
| 926 | * Control parameters used by ext3_htree_next_block | 930 | * Control parameters used by ext3_htree_next_block |
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index d10231ddcf8..ede315cdf12 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c | |||
| @@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
| 198 | return -1; | 198 | return -1; |
| 199 | } | 199 | } |
| 200 | hash = hash & ~1; | 200 | hash = hash & ~1; |
| 201 | if (hash == (EXT3_HTREE_EOF << 1)) | 201 | if (hash == (EXT3_HTREE_EOF_32BIT << 1)) |
| 202 | hash = (EXT3_HTREE_EOF-1) << 1; | 202 | hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; |
| 203 | hinfo->hash = hash; | 203 | hinfo->hash = hash; |
| 204 | hinfo->minor_hash = minor_hash; | 204 | hinfo->minor_hash = minor_hash; |
| 205 | return 0; | 205 | return 0; |
