aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-29 17:53:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-29 17:53:25 -0400
commit71db34fc4330f7c784397acb9f1e6ee7f7b32eb2 (patch)
tree77dd08f6f778a799dcd0c48eb72d0742349df235 /fs/ext4
parent50483c3268918ee51a56d1baa39b9149d2d0d521 (diff)
parent797a9d797f8483bb67f265c761b76dcd5a077a23 (diff)
Merge branch 'for-3.4' of git://linux-nfs.org/~bfields/linux
Pull nfsd changes from Bruce Fields: Highlights: - Benny Halevy and Tigran Mkrtchyan implemented some more 4.1 features, moving us closer to a complete 4.1 implementation. - Bernd Schubert fixed a long-standing problem with readdir cookies on ext2/3/4. - Jeff Layton performed a long-overdue overhaul of the server reboot recovery code which will allow us to deprecate the current code (a rather unusual user of the vfs), and give us some needed flexibility for further improvements. - Like the client, we now support numeric uid's and gid's in the auth_sys case, allowing easier upgrades from NFSv2/v3 to v4.x. Plus miscellaneous bugfixes and cleanup. Thanks to everyone! There are also some delegation fixes waiting on vfs review that I suppose will have to wait for 3.5. With that done I think we'll finally turn off the "EXPERIMENTAL" dependency for v4 (though that's mostly symbolic as it's been on by default in distro's for a while). And the list of 4.1 todo's should be achievable for 3.5 as well: http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues though we may still want a bit more experience with it before turning it on by default. * 'for-3.4' of git://linux-nfs.org/~bfields/linux: (55 commits) nfsd: only register cld pipe notifier when CONFIG_NFSD_V4 is enabled nfsd4: use auth_unix unconditionally on backchannel nfsd: fix NULL pointer dereference in cld_pipe_downcall nfsd4: memory corruption in numeric_name_to_id() sunrpc: skip portmap calls on sessions backchannel nfsd4: allow numeric idmapping nfsd: don't allow legacy client tracker init for anything but init_net nfsd: add notifier to handle mount/unmount of rpc_pipefs sb nfsd: add the infrastructure to handle the cld upcall nfsd: add a header describing upcall to nfsdcld nfsd: add a per-net-namespace struct for nfsd sunrpc: create nfsd dir in rpc_pipefs nfsd: add nfsd4_client_tracking_ops struct and a way to set it nfsd: convert nfs4_client->cl_cb_flags to a generic flags field NFSD: Fix nfs4_verifier memory alignment NFSD: Fix warnings when NFSD_DEBUG is not defined nfsd: vfs_llseek() with 32 or 64 bit offsets (hashes) nfsd: rename 'int access' to 'int may_flags' in nfsd_open() ext4: return 32/64-bit dir name hash according to usage type fs: add new FMODE flags: FMODE_32bithash and FMODE_64bithash ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/dir.c214
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/hash.c4
3 files changed, 176 insertions, 48 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ad56866d729..b8678620264 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
33}; 33};
34 34
35static int ext4_readdir(struct file *, void *, filldir_t);
36static int ext4_dx_readdir(struct file *filp, 35static int ext4_dx_readdir(struct file *filp,
37 void *dirent, filldir_t filldir); 36 void *dirent, filldir_t filldir);
38static int ext4_release_dir(struct inode *inode,
39 struct file *filp);
40
41const struct file_operations ext4_dir_operations = {
42 .llseek = ext4_llseek,
43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl,
48#endif
49 .fsync = ext4_sync_file,
50 .release = ext4_release_dir,
51};
52
53 37
54static unsigned char get_dtype(struct super_block *sb, int filetype) 38static unsigned char get_dtype(struct super_block *sb, int filetype)
55{ 39{
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
60 return (ext4_filetype_table[filetype]); 44 return (ext4_filetype_table[filetype]);
61} 45}
62 46
47/**
48 * Check if the given dir-inode refers to an htree-indexed directory
49 * (or a directory which chould potentially get coverted to use htree
50 * indexing).
51 *
52 * Return 1 if it is a dx dir, 0 if not
53 */
54static int is_dx_dir(struct inode *inode)
55{
56 struct super_block *sb = inode->i_sb;
57
58 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
59 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
60 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
61 ((inode->i_size >> sb->s_blocksize_bits) == 1)))
62 return 1;
63
64 return 0;
65}
66
63/* 67/*
64 * Return 0 if the directory entry is OK, and 1 if there is a problem 68 * Return 0 if the directory entry is OK, and 1 if there is a problem
65 * 69 *
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
115 unsigned int offset; 119 unsigned int offset;
116 int i, stored; 120 int i, stored;
117 struct ext4_dir_entry_2 *de; 121 struct ext4_dir_entry_2 *de;
118 struct super_block *sb;
119 int err; 122 int err;
120 struct inode *inode = filp->f_path.dentry->d_inode; 123 struct inode *inode = filp->f_path.dentry->d_inode;
124 struct super_block *sb = inode->i_sb;
121 int ret = 0; 125 int ret = 0;
122 int dir_has_error = 0; 126 int dir_has_error = 0;
123 127
124 sb = inode->i_sb; 128 if (is_dx_dir(inode)) {
125
126 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
127 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
128 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
129 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
130 err = ext4_dx_readdir(filp, dirent, filldir); 129 err = ext4_dx_readdir(filp, dirent, filldir);
131 if (err != ERR_BAD_DX_DIR) { 130 if (err != ERR_BAD_DX_DIR) {
132 ret = err; 131 ret = err;
@@ -254,22 +253,134 @@ out:
254 return ret; 253 return ret;
255} 254}
256 255
256static inline int is_32bit_api(void)
257{
258#ifdef CONFIG_COMPAT
259 return is_compat_task();
260#else
261 return (BITS_PER_LONG == 32);
262#endif
263}
264
257/* 265/*
258 * These functions convert from the major/minor hash to an f_pos 266 * These functions convert from the major/minor hash to an f_pos
259 * value. 267 * value for dx directories
268 *
269 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
270 * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
271 * directly on both 32-bit and 64-bit nodes, under such case, neither
272 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
273 */
274static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
275{
276 if ((filp->f_mode & FMODE_32BITHASH) ||
277 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
278 return major >> 1;
279 else
280 return ((__u64)(major >> 1) << 32) | (__u64)minor;
281}
282
283static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
284{
285 if ((filp->f_mode & FMODE_32BITHASH) ||
286 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
287 return (pos << 1) & 0xffffffff;
288 else
289 return ((pos >> 32) << 1) & 0xffffffff;
290}
291
292static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
293{
294 if ((filp->f_mode & FMODE_32BITHASH) ||
295 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
296 return 0;
297 else
298 return pos & 0xffffffff;
299}
300
301/*
302 * Return 32- or 64-bit end-of-file for dx directories
303 */
304static inline loff_t ext4_get_htree_eof(struct file *filp)
305{
306 if ((filp->f_mode & FMODE_32BITHASH) ||
307 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
308 return EXT4_HTREE_EOF_32BIT;
309 else
310 return EXT4_HTREE_EOF_64BIT;
311}
312
313
314/*
315 * ext4_dir_llseek() based on generic_file_llseek() to handle both
316 * non-htree and htree directories, where the "offset" is in terms
317 * of the filename hash value instead of the byte offset.
260 * 318 *
261 * Currently we only use major hash numer. This is unfortunate, but 319 * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
262 * on 32-bit machines, the same VFS interface is used for lseek and 320 * will be invalid once the directory was converted into a dx directory
263 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
264 * lseek/telldir/seekdir will blow out spectacularly, and from within
265 * the ext2 low-level routine, we don't know if we're being called by
266 * a 64-bit version of the system call or the 32-bit version of the
267 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
268 * cookie. Sigh.
269 */ 321 */
270#define hash2pos(major, minor) (major >> 1) 322loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
271#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) 323{
272#define pos2min_hash(pos) (0) 324 struct inode *inode = file->f_mapping->host;
325 loff_t ret = -EINVAL;
326 int dx_dir = is_dx_dir(inode);
327
328 mutex_lock(&inode->i_mutex);
329
330 /* NOTE: relative offsets with dx directories might not work
331 * as expected, as it is difficult to figure out the
332 * correct offset between dx hashes */
333
334 switch (origin) {
335 case SEEK_END:
336 if (unlikely(offset > 0))
337 goto out_err; /* not supported for directories */
338
339 /* so only negative offsets are left, does that have a
340 * meaning for directories at all? */
341 if (dx_dir)
342 offset += ext4_get_htree_eof(file);
343 else
344 offset += inode->i_size;
345 break;
346 case SEEK_CUR:
347 /*
348 * Here we special-case the lseek(fd, 0, SEEK_CUR)
349 * position-querying operation. Avoid rewriting the "same"
350 * f_pos value back to the file because a concurrent read(),
351 * write() or lseek() might have altered it
352 */
353 if (offset == 0) {
354 offset = file->f_pos;
355 goto out_ok;
356 }
357
358 offset += file->f_pos;
359 break;
360 }
361
362 if (unlikely(offset < 0))
363 goto out_err;
364
365 if (!dx_dir) {
366 if (offset > inode->i_sb->s_maxbytes)
367 goto out_err;
368 } else if (offset > ext4_get_htree_eof(file))
369 goto out_err;
370
371 /* Special lock needed here? */
372 if (offset != file->f_pos) {
373 file->f_pos = offset;
374 file->f_version = 0;
375 }
376
377out_ok:
378 ret = offset;
379out_err:
380 mutex_unlock(&inode->i_mutex);
381
382 return ret;
383}
273 384
274/* 385/*
275 * This structure holds the nodes of the red-black tree used to store 386 * This structure holds the nodes of the red-black tree used to store
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
330} 441}
331 442
332 443
333static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) 444static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
445 loff_t pos)
334{ 446{
335 struct dir_private_info *p; 447 struct dir_private_info *p;
336 448
337 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 449 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
338 if (!p) 450 if (!p)
339 return NULL; 451 return NULL;
340 p->curr_hash = pos2maj_hash(pos); 452 p->curr_hash = pos2maj_hash(filp, pos);
341 p->curr_minor_hash = pos2min_hash(pos); 453 p->curr_minor_hash = pos2min_hash(filp, pos);
342 return p; 454 return p;
343} 455}
344 456
@@ -430,7 +542,7 @@ static int call_filldir(struct file *filp, void *dirent,
430 inode->i_ino, current->comm); 542 inode->i_ino, current->comm);
431 return 0; 543 return 0;
432 } 544 }
433 curr_pos = hash2pos(fname->hash, fname->minor_hash); 545 curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
434 while (fname) { 546 while (fname) {
435 error = filldir(dirent, fname->name, 547 error = filldir(dirent, fname->name,
436 fname->name_len, curr_pos, 548 fname->name_len, curr_pos,
@@ -455,13 +567,13 @@ static int ext4_dx_readdir(struct file *filp,
455 int ret; 567 int ret;
456 568
457 if (!info) { 569 if (!info) {
458 info = ext4_htree_create_dir_info(filp->f_pos); 570 info = ext4_htree_create_dir_info(filp, filp->f_pos);
459 if (!info) 571 if (!info)
460 return -ENOMEM; 572 return -ENOMEM;
461 filp->private_data = info; 573 filp->private_data = info;
462 } 574 }
463 575
464 if (filp->f_pos == EXT4_HTREE_EOF) 576 if (filp->f_pos == ext4_get_htree_eof(filp))
465 return 0; /* EOF */ 577 return 0; /* EOF */
466 578
467 /* Some one has messed with f_pos; reset the world */ 579 /* Some one has messed with f_pos; reset the world */
@@ -469,8 +581,8 @@ static int ext4_dx_readdir(struct file *filp,
469 free_rb_tree_fname(&info->root); 581 free_rb_tree_fname(&info->root);
470 info->curr_node = NULL; 582 info->curr_node = NULL;
471 info->extra_fname = NULL; 583 info->extra_fname = NULL;
472 info->curr_hash = pos2maj_hash(filp->f_pos); 584 info->curr_hash = pos2maj_hash(filp, filp->f_pos);
473 info->curr_minor_hash = pos2min_hash(filp->f_pos); 585 info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
474 } 586 }
475 587
476 /* 588 /*
@@ -502,7 +614,7 @@ static int ext4_dx_readdir(struct file *filp,
502 if (ret < 0) 614 if (ret < 0)
503 return ret; 615 return ret;
504 if (ret == 0) { 616 if (ret == 0) {
505 filp->f_pos = EXT4_HTREE_EOF; 617 filp->f_pos = ext4_get_htree_eof(filp);
506 break; 618 break;
507 } 619 }
508 info->curr_node = rb_first(&info->root); 620 info->curr_node = rb_first(&info->root);
@@ -522,7 +634,7 @@ static int ext4_dx_readdir(struct file *filp,
522 info->curr_minor_hash = fname->minor_hash; 634 info->curr_minor_hash = fname->minor_hash;
523 } else { 635 } else {
524 if (info->next_hash == ~0) { 636 if (info->next_hash == ~0) {
525 filp->f_pos = EXT4_HTREE_EOF; 637 filp->f_pos = ext4_get_htree_eof(filp);
526 break; 638 break;
527 } 639 }
528 info->curr_hash = info->next_hash; 640 info->curr_hash = info->next_hash;
@@ -541,3 +653,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
541 653
542 return 0; 654 return 0;
543} 655}
656
657const struct file_operations ext4_dir_operations = {
658 .llseek = ext4_dir_llseek,
659 .read = generic_read_dir,
660 .readdir = ext4_readdir,
661 .unlocked_ioctl = ext4_ioctl,
662#ifdef CONFIG_COMPAT
663 .compat_ioctl = ext4_compat_ioctl,
664#endif
665 .fsync = ext4_sync_file,
666 .release = ext4_release_dir,
667};
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ded731ac8a3..ab2594a30f8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1623,7 +1623,11 @@ struct dx_hash_info
1623 u32 *seed; 1623 u32 *seed;
1624}; 1624};
1625 1625
1626#define EXT4_HTREE_EOF 0x7fffffff 1626
1627/* 32 and 64 bit signed EOF for dx directories */
1628#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
1629#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
1630
1627 1631
1628/* 1632/*
1629 * Control parameters used by ext4_htree_next_block 1633 * Control parameters used by ext4_htree_next_block
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index ac8f168c8ab..fa8e4911d35 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
200 return -1; 200 return -1;
201 } 201 }
202 hash = hash & ~1; 202 hash = hash & ~1;
203 if (hash == (EXT4_HTREE_EOF << 1)) 203 if (hash == (EXT4_HTREE_EOF_32BIT << 1))
204 hash = (EXT4_HTREE_EOF-1) << 1; 204 hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
205 hinfo->hash = hash; 205 hinfo->hash = hash;
206 hinfo->minor_hash = minor_hash; 206 hinfo->minor_hash = minor_hash;
207 return 0; 207 return 0;