diff options
Diffstat (limited to 'fs/ext4/dir.c')
-rw-r--r-- | fs/ext4/dir.c | 518 |
1 files changed, 518 insertions, 0 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c new file mode 100644 index 000000000000..f8595787a70e --- /dev/null +++ b/fs/ext4/dir.c | |||
@@ -0,0 +1,518 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/dir.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/fs/minix/dir.c | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | * | ||
15 | * ext4 directory handling functions | ||
16 | * | ||
17 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
19 | * | ||
20 | * Hash Tree Directory indexing (c) 2001 Daniel Phillips | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/fs.h> | ||
25 | #include <linux/jbd2.h> | ||
26 | #include <linux/ext4_fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/smp_lock.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <linux/rbtree.h> | ||
31 | |||
32 | static unsigned char ext4_filetype_table[] = { | ||
33 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | ||
34 | }; | ||
35 | |||
36 | static int ext4_readdir(struct file *, void *, filldir_t); | ||
37 | static int ext4_dx_readdir(struct file * filp, | ||
38 | void * dirent, filldir_t filldir); | ||
39 | static int ext4_release_dir (struct inode * inode, | ||
40 | struct file * filp); | ||
41 | |||
42 | const struct file_operations ext4_dir_operations = { | ||
43 | .llseek = generic_file_llseek, | ||
44 | .read = generic_read_dir, | ||
45 | .readdir = ext4_readdir, /* we take BKL. needed?*/ | ||
46 | .ioctl = ext4_ioctl, /* BKL held */ | ||
47 | #ifdef CONFIG_COMPAT | ||
48 | .compat_ioctl = ext4_compat_ioctl, | ||
49 | #endif | ||
50 | .fsync = ext4_sync_file, /* BKL held */ | ||
51 | #ifdef CONFIG_EXT4_INDEX | ||
52 | .release = ext4_release_dir, | ||
53 | #endif | ||
54 | }; | ||
55 | |||
56 | |||
57 | static unsigned char get_dtype(struct super_block *sb, int filetype) | ||
58 | { | ||
59 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || | ||
60 | (filetype >= EXT4_FT_MAX)) | ||
61 | return DT_UNKNOWN; | ||
62 | |||
63 | return (ext4_filetype_table[filetype]); | ||
64 | } | ||
65 | |||
66 | |||
67 | int ext4_check_dir_entry (const char * function, struct inode * dir, | ||
68 | struct ext4_dir_entry_2 * de, | ||
69 | struct buffer_head * bh, | ||
70 | unsigned long offset) | ||
71 | { | ||
72 | const char * error_msg = NULL; | ||
73 | const int rlen = le16_to_cpu(de->rec_len); | ||
74 | |||
75 | if (rlen < EXT4_DIR_REC_LEN(1)) | ||
76 | error_msg = "rec_len is smaller than minimal"; | ||
77 | else if (rlen % 4 != 0) | ||
78 | error_msg = "rec_len % 4 != 0"; | ||
79 | else if (rlen < EXT4_DIR_REC_LEN(de->name_len)) | ||
80 | error_msg = "rec_len is too small for name_len"; | ||
81 | else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) | ||
82 | error_msg = "directory entry across blocks"; | ||
83 | else if (le32_to_cpu(de->inode) > | ||
84 | le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) | ||
85 | error_msg = "inode out of bounds"; | ||
86 | |||
87 | if (error_msg != NULL) | ||
88 | ext4_error (dir->i_sb, function, | ||
89 | "bad entry in directory #%lu: %s - " | ||
90 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | ||
91 | dir->i_ino, error_msg, offset, | ||
92 | (unsigned long) le32_to_cpu(de->inode), | ||
93 | rlen, de->name_len); | ||
94 | return error_msg == NULL ? 1 : 0; | ||
95 | } | ||
96 | |||
97 | static int ext4_readdir(struct file * filp, | ||
98 | void * dirent, filldir_t filldir) | ||
99 | { | ||
100 | int error = 0; | ||
101 | unsigned long offset; | ||
102 | int i, stored; | ||
103 | struct ext4_dir_entry_2 *de; | ||
104 | struct super_block *sb; | ||
105 | int err; | ||
106 | struct inode *inode = filp->f_dentry->d_inode; | ||
107 | int ret = 0; | ||
108 | |||
109 | sb = inode->i_sb; | ||
110 | |||
111 | #ifdef CONFIG_EXT4_INDEX | ||
112 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | ||
113 | EXT4_FEATURE_COMPAT_DIR_INDEX) && | ||
114 | ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || | ||
115 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | ||
116 | err = ext4_dx_readdir(filp, dirent, filldir); | ||
117 | if (err != ERR_BAD_DX_DIR) { | ||
118 | ret = err; | ||
119 | goto out; | ||
120 | } | ||
121 | /* | ||
122 | * We don't set the inode dirty flag since it's not | ||
123 | * critical that it get flushed back to the disk. | ||
124 | */ | ||
125 | EXT4_I(filp->f_dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; | ||
126 | } | ||
127 | #endif | ||
128 | stored = 0; | ||
129 | offset = filp->f_pos & (sb->s_blocksize - 1); | ||
130 | |||
131 | while (!error && !stored && filp->f_pos < inode->i_size) { | ||
132 | unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); | ||
133 | struct buffer_head map_bh; | ||
134 | struct buffer_head *bh = NULL; | ||
135 | |||
136 | map_bh.b_state = 0; | ||
137 | err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); | ||
138 | if (err > 0) { | ||
139 | page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, | ||
140 | &filp->f_ra, | ||
141 | filp, | ||
142 | map_bh.b_blocknr >> | ||
143 | (PAGE_CACHE_SHIFT - inode->i_blkbits), | ||
144 | 1); | ||
145 | bh = ext4_bread(NULL, inode, blk, 0, &err); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * We ignore I/O errors on directories so users have a chance | ||
150 | * of recovering data when there's a bad sector | ||
151 | */ | ||
152 | if (!bh) { | ||
153 | ext4_error (sb, "ext4_readdir", | ||
154 | "directory #%lu contains a hole at offset %lu", | ||
155 | inode->i_ino, (unsigned long)filp->f_pos); | ||
156 | filp->f_pos += sb->s_blocksize - offset; | ||
157 | continue; | ||
158 | } | ||
159 | |||
160 | revalidate: | ||
161 | /* If the dir block has changed since the last call to | ||
162 | * readdir(2), then we might be pointing to an invalid | ||
163 | * dirent right now. Scan from the start of the block | ||
164 | * to make sure. */ | ||
165 | if (filp->f_version != inode->i_version) { | ||
166 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | ||
167 | de = (struct ext4_dir_entry_2 *) | ||
168 | (bh->b_data + i); | ||
169 | /* It's too expensive to do a full | ||
170 | * dirent test each time round this | ||
171 | * loop, but we do have to test at | ||
172 | * least that it is non-zero. A | ||
173 | * failure will be detected in the | ||
174 | * dirent test below. */ | ||
175 | if (le16_to_cpu(de->rec_len) < | ||
176 | EXT4_DIR_REC_LEN(1)) | ||
177 | break; | ||
178 | i += le16_to_cpu(de->rec_len); | ||
179 | } | ||
180 | offset = i; | ||
181 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | ||
182 | | offset; | ||
183 | filp->f_version = inode->i_version; | ||
184 | } | ||
185 | |||
186 | while (!error && filp->f_pos < inode->i_size | ||
187 | && offset < sb->s_blocksize) { | ||
188 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | ||
189 | if (!ext4_check_dir_entry ("ext4_readdir", inode, de, | ||
190 | bh, offset)) { | ||
191 | /* | ||
192 | * On error, skip the f_pos to the next block | ||
193 | */ | ||
194 | filp->f_pos = (filp->f_pos | | ||
195 | (sb->s_blocksize - 1)) + 1; | ||
196 | brelse (bh); | ||
197 | ret = stored; | ||
198 | goto out; | ||
199 | } | ||
200 | offset += le16_to_cpu(de->rec_len); | ||
201 | if (le32_to_cpu(de->inode)) { | ||
202 | /* We might block in the next section | ||
203 | * if the data destination is | ||
204 | * currently swapped out. So, use a | ||
205 | * version stamp to detect whether or | ||
206 | * not the directory has been modified | ||
207 | * during the copy operation. | ||
208 | */ | ||
209 | unsigned long version = filp->f_version; | ||
210 | |||
211 | error = filldir(dirent, de->name, | ||
212 | de->name_len, | ||
213 | filp->f_pos, | ||
214 | le32_to_cpu(de->inode), | ||
215 | get_dtype(sb, de->file_type)); | ||
216 | if (error) | ||
217 | break; | ||
218 | if (version != filp->f_version) | ||
219 | goto revalidate; | ||
220 | stored ++; | ||
221 | } | ||
222 | filp->f_pos += le16_to_cpu(de->rec_len); | ||
223 | } | ||
224 | offset = 0; | ||
225 | brelse (bh); | ||
226 | } | ||
227 | out: | ||
228 | return ret; | ||
229 | } | ||
230 | |||
231 | #ifdef CONFIG_EXT4_INDEX | ||
232 | /* | ||
233 | * These functions convert from the major/minor hash to an f_pos | ||
234 | * value. | ||
235 | * | ||
236 | * Currently we only use major hash numer. This is unfortunate, but | ||
237 | * on 32-bit machines, the same VFS interface is used for lseek and | ||
238 | * llseek, so if we use the 64 bit offset, then the 32-bit versions of | ||
239 | * lseek/telldir/seekdir will blow out spectacularly, and from within | ||
240 | * the ext2 low-level routine, we don't know if we're being called by | ||
241 | * a 64-bit version of the system call or the 32-bit version of the | ||
242 | * system call. Worse yet, NFSv2 only allows for a 32-bit readdir | ||
243 | * cookie. Sigh. | ||
244 | */ | ||
245 | #define hash2pos(major, minor) (major >> 1) | ||
246 | #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) | ||
247 | #define pos2min_hash(pos) (0) | ||
248 | |||
249 | /* | ||
250 | * This structure holds the nodes of the red-black tree used to store | ||
251 | * the directory entry in hash order. | ||
252 | */ | ||
253 | struct fname { | ||
254 | __u32 hash; | ||
255 | __u32 minor_hash; | ||
256 | struct rb_node rb_hash; | ||
257 | struct fname *next; | ||
258 | __u32 inode; | ||
259 | __u8 name_len; | ||
260 | __u8 file_type; | ||
261 | char name[0]; | ||
262 | }; | ||
263 | |||
264 | /* | ||
265 | * This functoin implements a non-recursive way of freeing all of the | ||
266 | * nodes in the red-black tree. | ||
267 | */ | ||
268 | static void free_rb_tree_fname(struct rb_root *root) | ||
269 | { | ||
270 | struct rb_node *n = root->rb_node; | ||
271 | struct rb_node *parent; | ||
272 | struct fname *fname; | ||
273 | |||
274 | while (n) { | ||
275 | /* Do the node's children first */ | ||
276 | if ((n)->rb_left) { | ||
277 | n = n->rb_left; | ||
278 | continue; | ||
279 | } | ||
280 | if (n->rb_right) { | ||
281 | n = n->rb_right; | ||
282 | continue; | ||
283 | } | ||
284 | /* | ||
285 | * The node has no children; free it, and then zero | ||
286 | * out parent's link to it. Finally go to the | ||
287 | * beginning of the loop and try to free the parent | ||
288 | * node. | ||
289 | */ | ||
290 | parent = rb_parent(n); | ||
291 | fname = rb_entry(n, struct fname, rb_hash); | ||
292 | while (fname) { | ||
293 | struct fname * old = fname; | ||
294 | fname = fname->next; | ||
295 | kfree (old); | ||
296 | } | ||
297 | if (!parent) | ||
298 | root->rb_node = NULL; | ||
299 | else if (parent->rb_left == n) | ||
300 | parent->rb_left = NULL; | ||
301 | else if (parent->rb_right == n) | ||
302 | parent->rb_right = NULL; | ||
303 | n = parent; | ||
304 | } | ||
305 | root->rb_node = NULL; | ||
306 | } | ||
307 | |||
308 | |||
309 | static struct dir_private_info *create_dir_info(loff_t pos) | ||
310 | { | ||
311 | struct dir_private_info *p; | ||
312 | |||
313 | p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); | ||
314 | if (!p) | ||
315 | return NULL; | ||
316 | p->root.rb_node = NULL; | ||
317 | p->curr_node = NULL; | ||
318 | p->extra_fname = NULL; | ||
319 | p->last_pos = 0; | ||
320 | p->curr_hash = pos2maj_hash(pos); | ||
321 | p->curr_minor_hash = pos2min_hash(pos); | ||
322 | p->next_hash = 0; | ||
323 | return p; | ||
324 | } | ||
325 | |||
326 | void ext4_htree_free_dir_info(struct dir_private_info *p) | ||
327 | { | ||
328 | free_rb_tree_fname(&p->root); | ||
329 | kfree(p); | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Given a directory entry, enter it into the fname rb tree. | ||
334 | */ | ||
335 | int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | ||
336 | __u32 minor_hash, | ||
337 | struct ext4_dir_entry_2 *dirent) | ||
338 | { | ||
339 | struct rb_node **p, *parent = NULL; | ||
340 | struct fname * fname, *new_fn; | ||
341 | struct dir_private_info *info; | ||
342 | int len; | ||
343 | |||
344 | info = (struct dir_private_info *) dir_file->private_data; | ||
345 | p = &info->root.rb_node; | ||
346 | |||
347 | /* Create and allocate the fname structure */ | ||
348 | len = sizeof(struct fname) + dirent->name_len + 1; | ||
349 | new_fn = kzalloc(len, GFP_KERNEL); | ||
350 | if (!new_fn) | ||
351 | return -ENOMEM; | ||
352 | new_fn->hash = hash; | ||
353 | new_fn->minor_hash = minor_hash; | ||
354 | new_fn->inode = le32_to_cpu(dirent->inode); | ||
355 | new_fn->name_len = dirent->name_len; | ||
356 | new_fn->file_type = dirent->file_type; | ||
357 | memcpy(new_fn->name, dirent->name, dirent->name_len); | ||
358 | new_fn->name[dirent->name_len] = 0; | ||
359 | |||
360 | while (*p) { | ||
361 | parent = *p; | ||
362 | fname = rb_entry(parent, struct fname, rb_hash); | ||
363 | |||
364 | /* | ||
365 | * If the hash and minor hash match up, then we put | ||
366 | * them on a linked list. This rarely happens... | ||
367 | */ | ||
368 | if ((new_fn->hash == fname->hash) && | ||
369 | (new_fn->minor_hash == fname->minor_hash)) { | ||
370 | new_fn->next = fname->next; | ||
371 | fname->next = new_fn; | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | if (new_fn->hash < fname->hash) | ||
376 | p = &(*p)->rb_left; | ||
377 | else if (new_fn->hash > fname->hash) | ||
378 | p = &(*p)->rb_right; | ||
379 | else if (new_fn->minor_hash < fname->minor_hash) | ||
380 | p = &(*p)->rb_left; | ||
381 | else /* if (new_fn->minor_hash > fname->minor_hash) */ | ||
382 | p = &(*p)->rb_right; | ||
383 | } | ||
384 | |||
385 | rb_link_node(&new_fn->rb_hash, parent, p); | ||
386 | rb_insert_color(&new_fn->rb_hash, &info->root); | ||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | |||
391 | |||
392 | /* | ||
393 | * This is a helper function for ext4_dx_readdir. It calls filldir | ||
394 | * for all entres on the fname linked list. (Normally there is only | ||
395 | * one entry on the linked list, unless there are 62 bit hash collisions.) | ||
396 | */ | ||
397 | static int call_filldir(struct file * filp, void * dirent, | ||
398 | filldir_t filldir, struct fname *fname) | ||
399 | { | ||
400 | struct dir_private_info *info = filp->private_data; | ||
401 | loff_t curr_pos; | ||
402 | struct inode *inode = filp->f_dentry->d_inode; | ||
403 | struct super_block * sb; | ||
404 | int error; | ||
405 | |||
406 | sb = inode->i_sb; | ||
407 | |||
408 | if (!fname) { | ||
409 | printk("call_filldir: called with null fname?!?\n"); | ||
410 | return 0; | ||
411 | } | ||
412 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | ||
413 | while (fname) { | ||
414 | error = filldir(dirent, fname->name, | ||
415 | fname->name_len, curr_pos, | ||
416 | fname->inode, | ||
417 | get_dtype(sb, fname->file_type)); | ||
418 | if (error) { | ||
419 | filp->f_pos = curr_pos; | ||
420 | info->extra_fname = fname->next; | ||
421 | return error; | ||
422 | } | ||
423 | fname = fname->next; | ||
424 | } | ||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | static int ext4_dx_readdir(struct file * filp, | ||
429 | void * dirent, filldir_t filldir) | ||
430 | { | ||
431 | struct dir_private_info *info = filp->private_data; | ||
432 | struct inode *inode = filp->f_dentry->d_inode; | ||
433 | struct fname *fname; | ||
434 | int ret; | ||
435 | |||
436 | if (!info) { | ||
437 | info = create_dir_info(filp->f_pos); | ||
438 | if (!info) | ||
439 | return -ENOMEM; | ||
440 | filp->private_data = info; | ||
441 | } | ||
442 | |||
443 | if (filp->f_pos == EXT4_HTREE_EOF) | ||
444 | return 0; /* EOF */ | ||
445 | |||
446 | /* Some one has messed with f_pos; reset the world */ | ||
447 | if (info->last_pos != filp->f_pos) { | ||
448 | free_rb_tree_fname(&info->root); | ||
449 | info->curr_node = NULL; | ||
450 | info->extra_fname = NULL; | ||
451 | info->curr_hash = pos2maj_hash(filp->f_pos); | ||
452 | info->curr_minor_hash = pos2min_hash(filp->f_pos); | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * If there are any leftover names on the hash collision | ||
457 | * chain, return them first. | ||
458 | */ | ||
459 | if (info->extra_fname && | ||
460 | call_filldir(filp, dirent, filldir, info->extra_fname)) | ||
461 | goto finished; | ||
462 | |||
463 | if (!info->curr_node) | ||
464 | info->curr_node = rb_first(&info->root); | ||
465 | |||
466 | while (1) { | ||
467 | /* | ||
468 | * Fill the rbtree if we have no more entries, | ||
469 | * or the inode has changed since we last read in the | ||
470 | * cached entries. | ||
471 | */ | ||
472 | if ((!info->curr_node) || | ||
473 | (filp->f_version != inode->i_version)) { | ||
474 | info->curr_node = NULL; | ||
475 | free_rb_tree_fname(&info->root); | ||
476 | filp->f_version = inode->i_version; | ||
477 | ret = ext4_htree_fill_tree(filp, info->curr_hash, | ||
478 | info->curr_minor_hash, | ||
479 | &info->next_hash); | ||
480 | if (ret < 0) | ||
481 | return ret; | ||
482 | if (ret == 0) { | ||
483 | filp->f_pos = EXT4_HTREE_EOF; | ||
484 | break; | ||
485 | } | ||
486 | info->curr_node = rb_first(&info->root); | ||
487 | } | ||
488 | |||
489 | fname = rb_entry(info->curr_node, struct fname, rb_hash); | ||
490 | info->curr_hash = fname->hash; | ||
491 | info->curr_minor_hash = fname->minor_hash; | ||
492 | if (call_filldir(filp, dirent, filldir, fname)) | ||
493 | break; | ||
494 | |||
495 | info->curr_node = rb_next(info->curr_node); | ||
496 | if (!info->curr_node) { | ||
497 | if (info->next_hash == ~0) { | ||
498 | filp->f_pos = EXT4_HTREE_EOF; | ||
499 | break; | ||
500 | } | ||
501 | info->curr_hash = info->next_hash; | ||
502 | info->curr_minor_hash = 0; | ||
503 | } | ||
504 | } | ||
505 | finished: | ||
506 | info->last_pos = filp->f_pos; | ||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | static int ext4_release_dir (struct inode * inode, struct file * filp) | ||
511 | { | ||
512 | if (filp->private_data) | ||
513 | ext4_htree_free_dir_info(filp->private_data); | ||
514 | |||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | #endif | ||