aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /fs/ext3
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/acl.c40
-rw-r--r--fs/ext3/balloc.c119
-rw-r--r--fs/ext3/bitmap.c16
-rw-r--r--fs/ext3/dir.c175
-rw-r--r--fs/ext3/ext3.h1326
-rw-r--r--fs/ext3/ext3_jbd.c2
-rw-r--r--fs/ext3/file.c6
-rw-r--r--fs/ext3/fsync.c25
-rw-r--r--fs/ext3/hash.c8
-rw-r--r--fs/ext3/ialloc.c90
-rw-r--r--fs/ext3/inode.c112
-rw-r--r--fs/ext3/ioctl.c57
-rw-r--r--fs/ext3/namei.c87
-rw-r--r--fs/ext3/namei.h19
-rw-r--r--fs/ext3/resize.c5
-rw-r--r--fs/ext3/super.c135
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext3/xattr.c7
-rw-r--r--fs/ext3/xattr_security.c43
-rw-r--r--fs/ext3/xattr_trusted.c7
-rw-r--r--fs/ext3/xattr_user.c6
21 files changed, 505 insertions, 1784 deletions
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index dbb5ad59a7f..3091f62e55b 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -4,7 +4,13 @@
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */ 5 */
6 6
7#include "ext3.h" 7#include <linux/init.h>
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <linux/capability.h>
11#include <linux/fs.h>
12#include <linux/ext3_jbd.h>
13#include <linux/ext3_fs.h>
8#include "xattr.h" 14#include "xattr.h"
9#include "acl.h" 15#include "acl.h"
10 16
@@ -48,23 +54,16 @@ ext3_acl_from_disk(const void *value, size_t size)
48 case ACL_OTHER: 54 case ACL_OTHER:
49 value = (char *)value + 55 value = (char *)value +
50 sizeof(ext3_acl_entry_short); 56 sizeof(ext3_acl_entry_short);
57 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
51 break; 58 break;
52 59
53 case ACL_USER: 60 case ACL_USER:
54 value = (char *)value + sizeof(ext3_acl_entry);
55 if ((char *)value > end)
56 goto fail;
57 acl->a_entries[n].e_uid =
58 make_kuid(&init_user_ns,
59 le32_to_cpu(entry->e_id));
60 break;
61 case ACL_GROUP: 61 case ACL_GROUP:
62 value = (char *)value + sizeof(ext3_acl_entry); 62 value = (char *)value + sizeof(ext3_acl_entry);
63 if ((char *)value > end) 63 if ((char *)value > end)
64 goto fail; 64 goto fail;
65 acl->a_entries[n].e_gid = 65 acl->a_entries[n].e_id =
66 make_kgid(&init_user_ns, 66 le32_to_cpu(entry->e_id);
67 le32_to_cpu(entry->e_id));
68 break; 67 break;
69 68
70 default: 69 default:
@@ -98,19 +97,14 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
98 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
99 e = (char *)ext_acl + sizeof(ext3_acl_header); 98 e = (char *)ext_acl + sizeof(ext3_acl_header);
100 for (n=0; n < acl->a_count; n++) { 99 for (n=0; n < acl->a_count; n++) {
101 const struct posix_acl_entry *acl_e = &acl->a_entries[n];
102 ext3_acl_entry *entry = (ext3_acl_entry *)e; 100 ext3_acl_entry *entry = (ext3_acl_entry *)e;
103 entry->e_tag = cpu_to_le16(acl_e->e_tag); 101 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
104 entry->e_perm = cpu_to_le16(acl_e->e_perm); 102 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
105 switch(acl_e->e_tag) { 103 switch(acl->a_entries[n].e_tag) {
106 case ACL_USER: 104 case ACL_USER:
107 entry->e_id = cpu_to_le32(
108 from_kuid(&init_user_ns, acl_e->e_uid));
109 e += sizeof(ext3_acl_entry);
110 break;
111 case ACL_GROUP: 105 case ACL_GROUP:
112 entry->e_id = cpu_to_le32( 106 entry->e_id =
113 from_kgid(&init_user_ns, acl_e->e_gid)); 107 cpu_to_le32(acl->a_entries[n].e_id);
114 e += sizeof(ext3_acl_entry); 108 e += sizeof(ext3_acl_entry);
115 break; 109 break;
116 110
@@ -381,7 +375,7 @@ ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
381 return PTR_ERR(acl); 375 return PTR_ERR(acl);
382 if (acl == NULL) 376 if (acl == NULL)
383 return -ENODATA; 377 return -ENODATA;
384 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); 378 error = posix_acl_to_xattr(acl, buffer, size);
385 posix_acl_release(acl); 379 posix_acl_release(acl);
386 380
387 return error; 381 return error;
@@ -404,7 +398,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
404 return -EPERM; 398 return -EPERM;
405 399
406 if (value) { 400 if (value) {
407 acl = posix_acl_from_xattr(&init_user_ns, value, size); 401 acl = posix_acl_from_xattr(value, size);
408 if (IS_ERR(acl)) 402 if (IS_ERR(acl))
409 return PTR_ERR(acl); 403 return PTR_ERR(acl);
410 else if (acl) { 404 else if (acl) {
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 22548f56197..6386d76f44a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -11,9 +11,17 @@
11 * David S. Miller (davem@caip.rutgers.edu), 1995 11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */ 12 */
13 13
14#include <linux/time.h>
15#include <linux/capability.h>
16#include <linux/fs.h>
17#include <linux/slab.h>
18#include <linux/jbd.h>
19#include <linux/ext3_fs.h>
20#include <linux/ext3_jbd.h>
14#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/buffer_head.h>
15#include <linux/blkdev.h> 23#include <linux/blkdev.h>
16#include "ext3.h" 24#include <trace/events/ext3.h>
17 25
18/* 26/*
19 * balloc.c contains the blocks allocation and deallocation routines 27 * balloc.c contains the blocks allocation and deallocation routines
@@ -419,7 +427,7 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
419void ext3_init_block_alloc_info(struct inode *inode) 427void ext3_init_block_alloc_info(struct inode *inode)
420{ 428{
421 struct ext3_inode_info *ei = EXT3_I(inode); 429 struct ext3_inode_info *ei = EXT3_I(inode);
422 struct ext3_block_alloc_info *block_i; 430 struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
423 struct super_block *sb = inode->i_sb; 431 struct super_block *sb = inode->i_sb;
424 432
425 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -483,7 +491,7 @@ void ext3_discard_reservation(struct inode *inode)
483 * ext3_free_blocks_sb() -- Free given blocks and update quota 491 * ext3_free_blocks_sb() -- Free given blocks and update quota
484 * @handle: handle to this transaction 492 * @handle: handle to this transaction
485 * @sb: super block 493 * @sb: super block
486 * @block: start physical block to free 494 * @block: start physcial block to free
487 * @count: number of blocks to free 495 * @count: number of blocks to free
488 * @pdquot_freed_blocks: pointer to quota 496 * @pdquot_freed_blocks: pointer to quota
489 */ 497 */
@@ -1432,16 +1440,15 @@ out:
1432 * 1440 *
1433 * Check if filesystem has at least 1 free block available for allocation. 1441 * Check if filesystem has at least 1 free block available for allocation.
1434 */ 1442 */
1435static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation) 1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1436{ 1444{
1437 ext3_fsblk_t free_blocks, root_blocks; 1445 ext3_fsblk_t free_blocks, root_blocks;
1438 1446
1439 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1440 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1441 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1442 !use_reservation && !uid_eq(sbi->s_resuid, current_fsuid()) && 1450 sbi->s_resuid != current_fsuid() &&
1443 (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) || 1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
1444 !in_group_p (sbi->s_resgid))) {
1445 return 0; 1452 return 0;
1446 } 1453 }
1447 return 1; 1454 return 1;
@@ -1461,7 +1468,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
1461 */ 1468 */
1462int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1469int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1463{ 1470{
1464 if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3) 1471 if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
1465 return 0; 1472 return 0;
1466 1473
1467 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1539,7 +1546,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1539 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1540 my_rsv = &block_i->rsv_window_node; 1547 my_rsv = &block_i->rsv_window_node;
1541 1548
1542 if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) { 1549 if (!ext3_has_free_blocks(sbi)) {
1543 *errp = -ENOSPC; 1550 *errp = -ENOSPC;
1544 goto out; 1551 goto out;
1545 } 1552 }
@@ -1736,11 +1743,8 @@ allocated:
1736 1743
1737 *errp = 0; 1744 *errp = 0;
1738 brelse(bitmap_bh); 1745 brelse(bitmap_bh);
1739 1746 dquot_free_block(inode, *count-num);
1740 if (num < *count) { 1747 *count = num;
1741 dquot_free_block(inode, *count-num);
1742 *count = num;
1743 }
1744 1748
1745 trace_ext3_allocate_blocks(inode, goal, num, 1749 trace_ext3_allocate_blocks(inode, goal, num,
1746 (unsigned long long)ret_block); 1750 (unsigned long long)ret_block);
@@ -1813,7 +1817,7 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
1813 brelse(bitmap_bh); 1817 brelse(bitmap_bh);
1814 printk("ext3_count_free_blocks: stored = "E3FSBLK 1818 printk("ext3_count_free_blocks: stored = "E3FSBLK
1815 ", computed = "E3FSBLK", "E3FSBLK"\n", 1819 ", computed = "E3FSBLK", "E3FSBLK"\n",
1816 (ext3_fsblk_t)le32_to_cpu(es->s_free_blocks_count), 1820 le32_to_cpu(es->s_free_blocks_count),
1817 desc_count, bitmap_count); 1821 desc_count, bitmap_count);
1818 return bitmap_count; 1822 return bitmap_count;
1819#else 1823#else
@@ -1920,10 +1924,9 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1920 * reaches any used block. Then issue a TRIM command on this extent and free 1924 * reaches any used block. Then issue a TRIM command on this extent and free
1921 * the extent in the block bitmap. This is done until whole group is scanned. 1925 * the extent in the block bitmap. This is done until whole group is scanned.
1922 */ 1926 */
1923static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, 1927ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
1924 unsigned int group, 1928 ext3_grpblk_t start, ext3_grpblk_t max,
1925 ext3_grpblk_t start, ext3_grpblk_t max, 1929 ext3_grpblk_t minblocks)
1926 ext3_grpblk_t minblocks)
1927{ 1930{
1928 handle_t *handle; 1931 handle_t *handle;
1929 ext3_grpblk_t next, free_blocks, bit, freed, count = 0; 1932 ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
@@ -1966,7 +1969,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1966 sbi = EXT3_SB(sb); 1969 sbi = EXT3_SB(sb);
1967 1970
1968 /* Walk through the whole group */ 1971 /* Walk through the whole group */
1969 while (start <= max) { 1972 while (start < max) {
1970 start = bitmap_search_next_usable_block(start, bitmap_bh, max); 1973 start = bitmap_search_next_usable_block(start, bitmap_bh, max);
1971 if (start < 0) 1974 if (start < 0)
1972 break; 1975 break;
@@ -1976,7 +1979,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1976 * Allocate contiguous free extents by setting bits in the 1979 * Allocate contiguous free extents by setting bits in the
1977 * block bitmap 1980 * block bitmap
1978 */ 1981 */
1979 while (next <= max 1982 while (next < max
1980 && claim_block(sb_bgl_lock(sbi, group), 1983 && claim_block(sb_bgl_lock(sbi, group),
1981 next, bitmap_bh)) { 1984 next, bitmap_bh)) {
1982 next++; 1985 next++;
@@ -2087,75 +2090,73 @@ err_out:
2087 */ 2090 */
2088int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) 2091int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2089{ 2092{
2090 ext3_grpblk_t last_block, first_block; 2093 ext3_grpblk_t last_block, first_block, free_blocks;
2091 unsigned long group, first_group, last_group; 2094 unsigned long first_group, last_group;
2095 unsigned long group, ngroups;
2092 struct ext3_group_desc *gdp; 2096 struct ext3_group_desc *gdp;
2093 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 2097 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
2094 uint64_t start, minlen, end, trimmed = 0; 2098 uint64_t start, len, minlen, trimmed;
2095 ext3_fsblk_t first_data_blk =
2096 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
2097 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); 2099 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2098 int ret = 0; 2100 int ret = 0;
2099 2101
2100 start = range->start >> sb->s_blocksize_bits; 2102 start = (range->start >> sb->s_blocksize_bits) +
2101 end = start + (range->len >> sb->s_blocksize_bits) - 1; 2103 le32_to_cpu(es->s_first_data_block);
2104 len = range->len >> sb->s_blocksize_bits;
2102 minlen = range->minlen >> sb->s_blocksize_bits; 2105 minlen = range->minlen >> sb->s_blocksize_bits;
2106 trimmed = 0;
2103 2107
2104 if (minlen > EXT3_BLOCKS_PER_GROUP(sb) || 2108 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
2105 start >= max_blks ||
2106 range->len < sb->s_blocksize)
2107 return -EINVAL; 2109 return -EINVAL;
2108 if (end >= max_blks) 2110 if (start >= max_blks)
2109 end = max_blks - 1; 2111 return -EINVAL;
2110 if (end <= first_data_blk) 2112 if (start + len > max_blks)
2111 goto out; 2113 len = max_blks - start;
2112 if (start < first_data_blk)
2113 start = first_data_blk;
2114 2114
2115 ngroups = EXT3_SB(sb)->s_groups_count;
2115 smp_rmb(); 2116 smp_rmb();
2116 2117
2117 /* Determine first and last group to examine based on start and len */ 2118 /* Determine first and last group to examine based on start and len */
2118 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, 2119 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
2119 &first_group, &first_block); 2120 &first_group, &first_block);
2120 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end, 2121 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
2121 &last_group, &last_block); 2122 &last_group, &last_block);
2123 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
2124 last_block = EXT3_BLOCKS_PER_GROUP(sb);
2122 2125
2123 /* end now represents the last block to discard in this group */ 2126 if (first_group > last_group)
2124 end = EXT3_BLOCKS_PER_GROUP(sb) - 1; 2127 return -EINVAL;
2125 2128
2126 for (group = first_group; group <= last_group; group++) { 2129 for (group = first_group; group <= last_group; group++) {
2127 gdp = ext3_get_group_desc(sb, group, NULL); 2130 gdp = ext3_get_group_desc(sb, group, NULL);
2128 if (!gdp) 2131 if (!gdp)
2129 break; 2132 break;
2130 2133
2134 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
2135 if (free_blocks < minlen)
2136 continue;
2137
2131 /* 2138 /*
2132 * For all the groups except the last one, last block will 2139 * For all the groups except the last one, last block will
2133 * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to 2140 * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to
2134 * change it for the last group, note that last_block is 2141 * change it for the last group in which case first_block +
2135 * already computed earlier by ext3_get_group_no_and_offset() 2142 * len < EXT3_BLOCKS_PER_GROUP(sb).
2136 */ 2143 */
2137 if (group == last_group) 2144 if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb))
2138 end = last_block; 2145 last_block = first_block + len;
2146 len -= last_block - first_block;
2139 2147
2140 if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) { 2148 ret = ext3_trim_all_free(sb, group, first_block,
2141 ret = ext3_trim_all_free(sb, group, first_block, 2149 last_block, minlen);
2142 end, minlen); 2150 if (ret < 0)
2143 if (ret < 0) 2151 break;
2144 break;
2145 trimmed += ret;
2146 }
2147 2152
2148 /* 2153 trimmed += ret;
2149 * For every group except the first one, we are sure
2150 * that the first block to discard will be block #0.
2151 */
2152 first_block = 0; 2154 first_block = 0;
2153 } 2155 }
2154 2156
2155 if (ret > 0) 2157 if (ret >= 0)
2156 ret = 0; 2158 ret = 0;
2157
2158out:
2159 range->len = trimmed * sb->s_blocksize; 2159 range->len = trimmed * sb->s_blocksize;
2160
2160 return ret; 2161 return ret;
2161} 2162}
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index ef9c643e8e9..6afc39d8025 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,13 +7,25 @@
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10#include "ext3.h" 10#include <linux/buffer_head.h>
11#include <linux/jbd.h>
12#include <linux/ext3_fs.h>
11 13
12#ifdef EXT3FS_DEBUG 14#ifdef EXT3FS_DEBUG
13 15
16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
17
14unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars) 18unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
15{ 19{
16 return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars); 20 unsigned int i;
21 unsigned long sum = 0;
22
23 if (!map)
24 return (0);
25 for (i = 0; i < numchars; i++)
26 sum += nibblemap[map->b_data[i] & 0xf] +
27 nibblemap[(map->b_data[i] >> 4) & 0xf];
28 return (sum);
17} 29}
18 30
19#endif /* EXT3FS_DEBUG */ 31#endif /* EXT3FS_DEBUG */
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index dd91264ba94..34f0a072b93 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -21,15 +21,35 @@
21 * 21 *
22 */ 22 */
23 23
24#include <linux/compat.h> 24#include <linux/fs.h>
25#include "ext3.h" 25#include <linux/jbd.h>
26#include <linux/ext3_fs.h>
27#include <linux/buffer_head.h>
28#include <linux/slab.h>
29#include <linux/rbtree.h>
26 30
27static unsigned char ext3_filetype_table[] = { 31static unsigned char ext3_filetype_table[] = {
28 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
29}; 33};
30 34
35static int ext3_readdir(struct file *, void *, filldir_t);
31static int ext3_dx_readdir(struct file * filp, 36static int ext3_dx_readdir(struct file * filp,
32 void * dirent, filldir_t filldir); 37 void * dirent, filldir_t filldir);
38static int ext3_release_dir (struct inode * inode,
39 struct file * filp);
40
41const struct file_operations ext3_dir_operations = {
42 .llseek = generic_file_llseek,
43 .read = generic_read_dir,
44 .readdir = ext3_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext3_ioctl,
46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext3_compat_ioctl,
48#endif
49 .fsync = ext3_sync_file, /* BKL held */
50 .release = ext3_release_dir,
51};
52
33 53
34static unsigned char get_dtype(struct super_block *sb, int filetype) 54static unsigned char get_dtype(struct super_block *sb, int filetype)
35{ 55{
@@ -40,25 +60,6 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
40 return (ext3_filetype_table[filetype]); 60 return (ext3_filetype_table[filetype]);
41} 61}
42 62
43/**
44 * Check if the given dir-inode refers to an htree-indexed directory
45 * (or a directory which chould potentially get coverted to use htree
46 * indexing).
47 *
48 * Return 1 if it is a dx dir, 0 if not
49 */
50static int is_dx_dir(struct inode *inode)
51{
52 struct super_block *sb = inode->i_sb;
53
54 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
55 EXT3_FEATURE_COMPAT_DIR_INDEX) &&
56 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
57 ((inode->i_size >> sb->s_blocksize_bits) == 1)))
58 return 1;
59
60 return 0;
61}
62 63
63int ext3_check_dir_entry (const char * function, struct inode * dir, 64int ext3_check_dir_entry (const char * function, struct inode * dir,
64 struct ext3_dir_entry_2 * de, 65 struct ext3_dir_entry_2 * de,
@@ -98,13 +99,18 @@ static int ext3_readdir(struct file * filp,
98 unsigned long offset; 99 unsigned long offset;
99 int i, stored; 100 int i, stored;
100 struct ext3_dir_entry_2 *de; 101 struct ext3_dir_entry_2 *de;
102 struct super_block *sb;
101 int err; 103 int err;
102 struct inode *inode = filp->f_path.dentry->d_inode; 104 struct inode *inode = filp->f_path.dentry->d_inode;
103 struct super_block *sb = inode->i_sb;
104 int ret = 0; 105 int ret = 0;
105 int dir_has_error = 0; 106 int dir_has_error = 0;
106 107
107 if (is_dx_dir(inode)) { 108 sb = inode->i_sb;
109
110 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
111 EXT3_FEATURE_COMPAT_DIR_INDEX) &&
112 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
113 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
108 err = ext3_dx_readdir(filp, dirent, filldir); 114 err = ext3_dx_readdir(filp, dirent, filldir);
109 if (err != ERR_BAD_DX_DIR) { 115 if (err != ERR_BAD_DX_DIR) {
110 ret = err; 116 ret = err;
@@ -226,88 +232,22 @@ out:
226 return ret; 232 return ret;
227} 233}
228 234
229static inline int is_32bit_api(void)
230{
231#ifdef CONFIG_COMPAT
232 return is_compat_task();
233#else
234 return (BITS_PER_LONG == 32);
235#endif
236}
237
238/* 235/*
239 * These functions convert from the major/minor hash to an f_pos 236 * These functions convert from the major/minor hash to an f_pos
240 * value for dx directories 237 * value.
241 * 238 *
242 * Upper layer (for example NFS) should specify FMODE_32BITHASH or 239 * Currently we only use major hash numer. This is unfortunate, but
243 * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted 240 * on 32-bit machines, the same VFS interface is used for lseek and
244 * directly on both 32-bit and 64-bit nodes, under such case, neither 241 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
245 * FMODE_32BITHASH nor FMODE_64BITHASH is specified. 242 * lseek/telldir/seekdir will blow out spectacularly, and from within
243 * the ext2 low-level routine, we don't know if we're being called by
244 * a 64-bit version of the system call or the 32-bit version of the
245 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
246 * cookie. Sigh.
246 */ 247 */
247static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) 248#define hash2pos(major, minor) (major >> 1)
248{ 249#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
249 if ((filp->f_mode & FMODE_32BITHASH) || 250#define pos2min_hash(pos) (0)
250 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
251 return major >> 1;
252 else
253 return ((__u64)(major >> 1) << 32) | (__u64)minor;
254}
255
256static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
257{
258 if ((filp->f_mode & FMODE_32BITHASH) ||
259 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
260 return (pos << 1) & 0xffffffff;
261 else
262 return ((pos >> 32) << 1) & 0xffffffff;
263}
264
265static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
266{
267 if ((filp->f_mode & FMODE_32BITHASH) ||
268 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
269 return 0;
270 else
271 return pos & 0xffffffff;
272}
273
274/*
275 * Return 32- or 64-bit end-of-file for dx directories
276 */
277static inline loff_t ext3_get_htree_eof(struct file *filp)
278{
279 if ((filp->f_mode & FMODE_32BITHASH) ||
280 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
281 return EXT3_HTREE_EOF_32BIT;
282 else
283 return EXT3_HTREE_EOF_64BIT;
284}
285
286
287/*
288 * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
289 * non-htree and htree directories, where the "offset" is in terms
290 * of the filename hash value instead of the byte offset.
291 *
292 * Because we may return a 64-bit hash that is well beyond s_maxbytes,
293 * we need to pass the max hash as the maximum allowable offset in
294 * the htree directory case.
295 *
296 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
297 * will be invalid once the directory was converted into a dx directory
298 */
299loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
300{
301 struct inode *inode = file->f_mapping->host;
302 int dx_dir = is_dx_dir(inode);
303 loff_t htree_max = ext3_get_htree_eof(file);
304
305 if (likely(dx_dir))
306 return generic_file_llseek_size(file, offset, whence,
307 htree_max, htree_max);
308 else
309 return generic_file_llseek(file, offset, whence);
310}
311 251
312/* 252/*
313 * This structure holds the nodes of the red-black tree used to store 253 * This structure holds the nodes of the red-black tree used to store
@@ -368,16 +308,15 @@ static void free_rb_tree_fname(struct rb_root *root)
368} 308}
369 309
370 310
371static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, 311static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
372 loff_t pos)
373{ 312{
374 struct dir_private_info *p; 313 struct dir_private_info *p;
375 314
376 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 315 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
377 if (!p) 316 if (!p)
378 return NULL; 317 return NULL;
379 p->curr_hash = pos2maj_hash(filp, pos); 318 p->curr_hash = pos2maj_hash(pos);
380 p->curr_minor_hash = pos2min_hash(filp, pos); 319 p->curr_minor_hash = pos2min_hash(pos);
381 return p; 320 return p;
382} 321}
383 322
@@ -467,7 +406,7 @@ static int call_filldir(struct file * filp, void * dirent,
467 printk("call_filldir: called with null fname?!?\n"); 406 printk("call_filldir: called with null fname?!?\n");
468 return 0; 407 return 0;
469 } 408 }
470 curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); 409 curr_pos = hash2pos(fname->hash, fname->minor_hash);
471 while (fname) { 410 while (fname) {
472 error = filldir(dirent, fname->name, 411 error = filldir(dirent, fname->name,
473 fname->name_len, curr_pos, 412 fname->name_len, curr_pos,
@@ -492,13 +431,13 @@ static int ext3_dx_readdir(struct file * filp,
492 int ret; 431 int ret;
493 432
494 if (!info) { 433 if (!info) {
495 info = ext3_htree_create_dir_info(filp, filp->f_pos); 434 info = ext3_htree_create_dir_info(filp->f_pos);
496 if (!info) 435 if (!info)
497 return -ENOMEM; 436 return -ENOMEM;
498 filp->private_data = info; 437 filp->private_data = info;
499 } 438 }
500 439
501 if (filp->f_pos == ext3_get_htree_eof(filp)) 440 if (filp->f_pos == EXT3_HTREE_EOF)
502 return 0; /* EOF */ 441 return 0; /* EOF */
503 442
504 /* Some one has messed with f_pos; reset the world */ 443 /* Some one has messed with f_pos; reset the world */
@@ -506,8 +445,8 @@ static int ext3_dx_readdir(struct file * filp,
506 free_rb_tree_fname(&info->root); 445 free_rb_tree_fname(&info->root);
507 info->curr_node = NULL; 446 info->curr_node = NULL;
508 info->extra_fname = NULL; 447 info->extra_fname = NULL;
509 info->curr_hash = pos2maj_hash(filp, filp->f_pos); 448 info->curr_hash = pos2maj_hash(filp->f_pos);
510 info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); 449 info->curr_minor_hash = pos2min_hash(filp->f_pos);
511 } 450 }
512 451
513 /* 452 /*
@@ -539,7 +478,7 @@ static int ext3_dx_readdir(struct file * filp,
539 if (ret < 0) 478 if (ret < 0)
540 return ret; 479 return ret;
541 if (ret == 0) { 480 if (ret == 0) {
542 filp->f_pos = ext3_get_htree_eof(filp); 481 filp->f_pos = EXT3_HTREE_EOF;
543 break; 482 break;
544 } 483 }
545 info->curr_node = rb_first(&info->root); 484 info->curr_node = rb_first(&info->root);
@@ -559,7 +498,7 @@ static int ext3_dx_readdir(struct file * filp,
559 info->curr_minor_hash = fname->minor_hash; 498 info->curr_minor_hash = fname->minor_hash;
560 } else { 499 } else {
561 if (info->next_hash == ~0) { 500 if (info->next_hash == ~0) {
562 filp->f_pos = ext3_get_htree_eof(filp); 501 filp->f_pos = EXT3_HTREE_EOF;
563 break; 502 break;
564 } 503 }
565 info->curr_hash = info->next_hash; 504 info->curr_hash = info->next_hash;
@@ -578,15 +517,3 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
578 517
579 return 0; 518 return 0;
580} 519}
581
582const struct file_operations ext3_dir_operations = {
583 .llseek = ext3_dir_llseek,
584 .read = generic_read_dir,
585 .readdir = ext3_readdir,
586 .unlocked_ioctl = ext3_ioctl,
587#ifdef CONFIG_COMPAT
588 .compat_ioctl = ext3_compat_ioctl,
589#endif
590 .fsync = ext3_sync_file,
591 .release = ext3_release_dir,
592};
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h
deleted file mode 100644
index e85ff15a060..00000000000
--- a/fs/ext3/ext3.h
+++ /dev/null
@@ -1,1326 +0,0 @@
1/*
2 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
3 *
4 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
5 *
6 * This file is part of the Linux kernel and is made available under
7 * the terms of the GNU General Public License, version 2, or at your
8 * option, any later version, incorporated herein by reference.
9 *
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 *
15 * from
16 *
17 * linux/include/linux/minix_fs.h
18 *
19 * Copyright (C) 1991, 1992 Linus Torvalds
20 */
21
22#include <linux/fs.h>
23#include <linux/jbd.h>
24#include <linux/magic.h>
25#include <linux/bug.h>
26#include <linux/blockgroup_lock.h>
27
28/*
29 * The second extended filesystem constants/structures
30 */
31
32/*
33 * Define EXT3FS_DEBUG to produce debug messages
34 */
35#undef EXT3FS_DEBUG
36
37/*
38 * Define EXT3_RESERVATION to reserve data blocks for expanding files
39 */
40#define EXT3_DEFAULT_RESERVE_BLOCKS 8
41/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
42#define EXT3_MAX_RESERVE_BLOCKS 1027
43#define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
44
45/*
46 * Debug code
47 */
48#ifdef EXT3FS_DEBUG
49#define ext3_debug(f, a...) \
50 do { \
51 printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:", \
52 __FILE__, __LINE__, __func__); \
53 printk (KERN_DEBUG f, ## a); \
54 } while (0)
55#else
56#define ext3_debug(f, a...) do {} while (0)
57#endif
58
59/*
60 * Special inodes numbers
61 */
62#define EXT3_BAD_INO 1 /* Bad blocks inode */
63#define EXT3_ROOT_INO 2 /* Root inode */
64#define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
65#define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
66#define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
67#define EXT3_JOURNAL_INO 8 /* Journal inode */
68
69/* First non-reserved inode for old ext3 filesystems */
70#define EXT3_GOOD_OLD_FIRST_INO 11
71
72/*
73 * Maximal count of links to a file
74 */
75#define EXT3_LINK_MAX 32000
76
77/*
78 * Macro-instructions used to manage several block sizes
79 */
80#define EXT3_MIN_BLOCK_SIZE 1024
81#define EXT3_MAX_BLOCK_SIZE 65536
82#define EXT3_MIN_BLOCK_LOG_SIZE 10
83#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
84#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
85#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
86#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits)
87#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size)
88#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino)
89
90/*
91 * Macro-instructions used to manage fragments
92 */
93#define EXT3_MIN_FRAG_SIZE 1024
94#define EXT3_MAX_FRAG_SIZE 4096
95#define EXT3_MIN_FRAG_LOG_SIZE 10
96#define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size)
97#define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block)
98
99/*
100 * Structure of a blocks group descriptor
101 */
102struct ext3_group_desc
103{
104 __le32 bg_block_bitmap; /* Blocks bitmap block */
105 __le32 bg_inode_bitmap; /* Inodes bitmap block */
106 __le32 bg_inode_table; /* Inodes table block */
107 __le16 bg_free_blocks_count; /* Free blocks count */
108 __le16 bg_free_inodes_count; /* Free inodes count */
109 __le16 bg_used_dirs_count; /* Directories count */
110 __u16 bg_pad;
111 __le32 bg_reserved[3];
112};
113
114/*
115 * Macro-instructions used to manage group descriptors
116 */
117#define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group)
118#define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block)
119#define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group)
120#define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits)
121
122/*
123 * Constants relative to the data blocks
124 */
125#define EXT3_NDIR_BLOCKS 12
126#define EXT3_IND_BLOCK EXT3_NDIR_BLOCKS
127#define EXT3_DIND_BLOCK (EXT3_IND_BLOCK + 1)
128#define EXT3_TIND_BLOCK (EXT3_DIND_BLOCK + 1)
129#define EXT3_N_BLOCKS (EXT3_TIND_BLOCK + 1)
130
131/*
132 * Inode flags
133 */
134#define EXT3_SECRM_FL 0x00000001 /* Secure deletion */
135#define EXT3_UNRM_FL 0x00000002 /* Undelete */
136#define EXT3_COMPR_FL 0x00000004 /* Compress file */
137#define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */
138#define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */
139#define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */
140#define EXT3_NODUMP_FL 0x00000040 /* do not dump file */
141#define EXT3_NOATIME_FL 0x00000080 /* do not update atime */
142/* Reserved for compression usage... */
143#define EXT3_DIRTY_FL 0x00000100
144#define EXT3_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
145#define EXT3_NOCOMPR_FL 0x00000400 /* Don't compress */
146#define EXT3_ECOMPR_FL 0x00000800 /* Compression error */
147/* End compression flags --- maybe not all used */
148#define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */
149#define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
150#define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
151#define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
152#define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
153#define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
154#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
155
156#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
157#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
158
159/* Flags that should be inherited by new inodes from their parent. */
160#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
161 EXT3_SYNC_FL | EXT3_NODUMP_FL |\
162 EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
163 EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
164 EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
165
166/* Flags that are appropriate for regular files (all but dir-specific ones). */
167#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
168
169/* Flags that are appropriate for non-directories/regular files. */
170#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
171
172/* Mask out flags that are inappropriate for the given type of inode. */
173static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
174{
175 if (S_ISDIR(mode))
176 return flags;
177 else if (S_ISREG(mode))
178 return flags & EXT3_REG_FLMASK;
179 else
180 return flags & EXT3_OTHER_FLMASK;
181}
182
183/* Used to pass group descriptor data when online resize is done */
184struct ext3_new_group_input {
185 __u32 group; /* Group number for this data */
186 __u32 block_bitmap; /* Absolute block number of block bitmap */
187 __u32 inode_bitmap; /* Absolute block number of inode bitmap */
188 __u32 inode_table; /* Absolute block number of inode table start */
189 __u32 blocks_count; /* Total number of blocks in this group */
190 __u16 reserved_blocks; /* Number of reserved blocks in this group */
191 __u16 unused;
192};
193
194/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
195struct ext3_new_group_data {
196 __u32 group;
197 __u32 block_bitmap;
198 __u32 inode_bitmap;
199 __u32 inode_table;
200 __u32 blocks_count;
201 __u16 reserved_blocks;
202 __u16 unused;
203 __u32 free_blocks_count;
204};
205
206
207/*
208 * ioctl commands
209 */
210#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS
211#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
212#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
213#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
214#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
215#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
216#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
217#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
218#ifdef CONFIG_JBD_DEBUG
219#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
220#endif
221#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
222#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
223
224/*
225 * ioctl commands in 32 bit emulation
226 */
227#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS
228#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS
229#define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
230#define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
231#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
232#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
233#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
234#ifdef CONFIG_JBD_DEBUG
235#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
236#endif
237#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
238#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
239
240
241/*
242 * Mount options
243 */
244struct ext3_mount_options {
245 unsigned long s_mount_opt;
246 kuid_t s_resuid;
247 kgid_t s_resgid;
248 unsigned long s_commit_interval;
249#ifdef CONFIG_QUOTA
250 int s_jquota_fmt;
251 char *s_qf_names[MAXQUOTAS];
252#endif
253};
254
255/*
256 * Structure of an inode on the disk
257 */
258struct ext3_inode {
259 __le16 i_mode; /* File mode */
260 __le16 i_uid; /* Low 16 bits of Owner Uid */
261 __le32 i_size; /* Size in bytes */
262 __le32 i_atime; /* Access time */
263 __le32 i_ctime; /* Creation time */
264 __le32 i_mtime; /* Modification time */
265 __le32 i_dtime; /* Deletion Time */
266 __le16 i_gid; /* Low 16 bits of Group Id */
267 __le16 i_links_count; /* Links count */
268 __le32 i_blocks; /* Blocks count */
269 __le32 i_flags; /* File flags */
270 union {
271 struct {
272 __u32 l_i_reserved1;
273 } linux1;
274 struct {
275 __u32 h_i_translator;
276 } hurd1;
277 struct {
278 __u32 m_i_reserved1;
279 } masix1;
280 } osd1; /* OS dependent 1 */
281 __le32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
282 __le32 i_generation; /* File version (for NFS) */
283 __le32 i_file_acl; /* File ACL */
284 __le32 i_dir_acl; /* Directory ACL */
285 __le32 i_faddr; /* Fragment address */
286 union {
287 struct {
288 __u8 l_i_frag; /* Fragment number */
289 __u8 l_i_fsize; /* Fragment size */
290 __u16 i_pad1;
291 __le16 l_i_uid_high; /* these 2 fields */
292 __le16 l_i_gid_high; /* were reserved2[0] */
293 __u32 l_i_reserved2;
294 } linux2;
295 struct {
296 __u8 h_i_frag; /* Fragment number */
297 __u8 h_i_fsize; /* Fragment size */
298 __u16 h_i_mode_high;
299 __u16 h_i_uid_high;
300 __u16 h_i_gid_high;
301 __u32 h_i_author;
302 } hurd2;
303 struct {
304 __u8 m_i_frag; /* Fragment number */
305 __u8 m_i_fsize; /* Fragment size */
306 __u16 m_pad1;
307 __u32 m_i_reserved2[2];
308 } masix2;
309 } osd2; /* OS dependent 2 */
310 __le16 i_extra_isize;
311 __le16 i_pad1;
312};
313
314#define i_size_high i_dir_acl
315
316#define i_reserved1 osd1.linux1.l_i_reserved1
317#define i_frag osd2.linux2.l_i_frag
318#define i_fsize osd2.linux2.l_i_fsize
319#define i_uid_low i_uid
320#define i_gid_low i_gid
321#define i_uid_high osd2.linux2.l_i_uid_high
322#define i_gid_high osd2.linux2.l_i_gid_high
323#define i_reserved2 osd2.linux2.l_i_reserved2
324
325/*
326 * File system states
327 */
328#define EXT3_VALID_FS 0x0001 /* Unmounted cleanly */
329#define EXT3_ERROR_FS 0x0002 /* Errors detected */
330#define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */
331
332/*
333 * Misc. filesystem flags
334 */
335#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
336#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
337#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
338
339/*
340 * Mount flags
341 */
342#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */
343/* EXT3_MOUNT_OLDALLOC was there */
344#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */
345#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */
346#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
347#define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
348#define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
349#define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
350#define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
351#define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */
352#define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
353#define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
354#define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
355#define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
356#define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
357#define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
358#define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
359#define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
360#define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */
361#define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
362#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
363#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
364#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
365#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
366 * error in ordered mode */
367
368/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
369#ifndef _LINUX_EXT2_FS_H
370#define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt
371#define set_opt(o, opt) o |= EXT3_MOUNT_##opt
372#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \
373 EXT3_MOUNT_##opt)
374#else
375#define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD
376#define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT
377#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
378#endif
379
380#define ext3_set_bit __set_bit_le
381#define ext3_set_bit_atomic ext2_set_bit_atomic
382#define ext3_clear_bit __clear_bit_le
383#define ext3_clear_bit_atomic ext2_clear_bit_atomic
384#define ext3_test_bit test_bit_le
385#define ext3_find_next_zero_bit find_next_zero_bit_le
386
387/*
388 * Maximal mount counts between two filesystem checks
389 */
390#define EXT3_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
391#define EXT3_DFL_CHECKINTERVAL 0 /* Don't use interval check */
392
393/*
394 * Behaviour when detecting errors
395 */
396#define EXT3_ERRORS_CONTINUE 1 /* Continue execution */
397#define EXT3_ERRORS_RO 2 /* Remount fs read-only */
398#define EXT3_ERRORS_PANIC 3 /* Panic */
399#define EXT3_ERRORS_DEFAULT EXT3_ERRORS_CONTINUE
400
401/*
402 * Structure of the super block
403 */
404struct ext3_super_block {
405/*00*/ __le32 s_inodes_count; /* Inodes count */
406 __le32 s_blocks_count; /* Blocks count */
407 __le32 s_r_blocks_count; /* Reserved blocks count */
408 __le32 s_free_blocks_count; /* Free blocks count */
409/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
410 __le32 s_first_data_block; /* First Data Block */
411 __le32 s_log_block_size; /* Block size */
412 __le32 s_log_frag_size; /* Fragment size */
413/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
414 __le32 s_frags_per_group; /* # Fragments per group */
415 __le32 s_inodes_per_group; /* # Inodes per group */
416 __le32 s_mtime; /* Mount time */
417/*30*/ __le32 s_wtime; /* Write time */
418 __le16 s_mnt_count; /* Mount count */
419 __le16 s_max_mnt_count; /* Maximal mount count */
420 __le16 s_magic; /* Magic signature */
421 __le16 s_state; /* File system state */
422 __le16 s_errors; /* Behaviour when detecting errors */
423 __le16 s_minor_rev_level; /* minor revision level */
424/*40*/ __le32 s_lastcheck; /* time of last check */
425 __le32 s_checkinterval; /* max. time between checks */
426 __le32 s_creator_os; /* OS */
427 __le32 s_rev_level; /* Revision level */
428/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
429 __le16 s_def_resgid; /* Default gid for reserved blocks */
430 /*
431 * These fields are for EXT3_DYNAMIC_REV superblocks only.
432 *
433 * Note: the difference between the compatible feature set and
434 * the incompatible feature set is that if there is a bit set
435 * in the incompatible feature set that the kernel doesn't
436 * know about, it should refuse to mount the filesystem.
437 *
438 * e2fsck's requirements are more strict; if it doesn't know
439 * about a feature in either the compatible or incompatible
440 * feature set, it must abort and not try to meddle with
441 * things it doesn't understand...
442 */
443 __le32 s_first_ino; /* First non-reserved inode */
444 __le16 s_inode_size; /* size of inode structure */
445 __le16 s_block_group_nr; /* block group # of this superblock */
446 __le32 s_feature_compat; /* compatible feature set */
447/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
448 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
449/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
450/*78*/ char s_volume_name[16]; /* volume name */
451/*88*/ char s_last_mounted[64]; /* directory where last mounted */
452/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
453 /*
454 * Performance hints. Directory preallocation should only
455 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
456 */
457 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
458 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
459 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
460 /*
461 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
462 */
463/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
464/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
465 __le32 s_journal_dev; /* device number of journal file */
466 __le32 s_last_orphan; /* start of list of inodes to delete */
467 __le32 s_hash_seed[4]; /* HTREE hash seed */
468 __u8 s_def_hash_version; /* Default hash version to use */
469 __u8 s_reserved_char_pad;
470 __u16 s_reserved_word_pad;
471 __le32 s_default_mount_opts;
472 __le32 s_first_meta_bg; /* First metablock block group */
473 __le32 s_mkfs_time; /* When the filesystem was created */
474 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
475 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
476/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
477 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
478 __le32 s_free_blocks_count_hi; /* Free blocks count */
479 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
480 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
481 __le32 s_flags; /* Miscellaneous flags */
482 __le16 s_raid_stride; /* RAID stride */
483 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
484 __le64 s_mmp_block; /* Block for multi-mount protection */
485 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
486 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
487 __u8 s_reserved_char_pad2;
488 __le16 s_reserved_pad;
489 __u32 s_reserved[162]; /* Padding to the end of the block */
490};
491
492/* data type for block offset of block group */
493typedef int ext3_grpblk_t;
494
495/* data type for filesystem-wide blocks number */
496typedef unsigned long ext3_fsblk_t;
497
498#define E3FSBLK "%lu"
499
500struct ext3_reserve_window {
501 ext3_fsblk_t _rsv_start; /* First byte reserved */
502 ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */
503};
504
505struct ext3_reserve_window_node {
506 struct rb_node rsv_node;
507 __u32 rsv_goal_size;
508 __u32 rsv_alloc_hit;
509 struct ext3_reserve_window rsv_window;
510};
511
512struct ext3_block_alloc_info {
513 /* information about reservation window */
514 struct ext3_reserve_window_node rsv_window_node;
515 /*
516 * was i_next_alloc_block in ext3_inode_info
517 * is the logical (file-relative) number of the
518 * most-recently-allocated block in this file.
519 * We use this for detecting linearly ascending allocation requests.
520 */
521 __u32 last_alloc_logical_block;
522 /*
523 * Was i_next_alloc_goal in ext3_inode_info
524 * is the *physical* companion to i_next_alloc_block.
525 * it the physical block number of the block which was most-recentl
526 * allocated to this file. This give us the goal (target) for the next
527 * allocation when we detect linearly ascending requests.
528 */
529 ext3_fsblk_t last_alloc_physical_block;
530};
531
532#define rsv_start rsv_window._rsv_start
533#define rsv_end rsv_window._rsv_end
534
535/*
536 * third extended file system inode data in memory
537 */
538struct ext3_inode_info {
539 __le32 i_data[15]; /* unconverted */
540 __u32 i_flags;
541#ifdef EXT3_FRAGMENTS
542 __u32 i_faddr;
543 __u8 i_frag_no;
544 __u8 i_frag_size;
545#endif
546 ext3_fsblk_t i_file_acl;
547 __u32 i_dir_acl;
548 __u32 i_dtime;
549
550 /*
551 * i_block_group is the number of the block group which contains
552 * this file's inode. Constant across the lifetime of the inode,
553 * it is ued for making block allocation decisions - we try to
554 * place a file's data blocks near its inode block, and new inodes
555 * near to their parent directory's inode.
556 */
557 __u32 i_block_group;
558 unsigned long i_state_flags; /* Dynamic state flags for ext3 */
559
560 /* block reservation info */
561 struct ext3_block_alloc_info *i_block_alloc_info;
562
563 __u32 i_dir_start_lookup;
564#ifdef CONFIG_EXT3_FS_XATTR
565 /*
566 * Extended attributes can be read independently of the main file
567 * data. Taking i_mutex even when reading would cause contention
568 * between readers of EAs and writers of regular file data, so
569 * instead we synchronize on xattr_sem when reading or changing
570 * EAs.
571 */
572 struct rw_semaphore xattr_sem;
573#endif
574
575 struct list_head i_orphan; /* unlinked but open inodes */
576
577 /*
578 * i_disksize keeps track of what the inode size is ON DISK, not
579 * in memory. During truncate, i_size is set to the new size by
580 * the VFS prior to calling ext3_truncate(), but the filesystem won't
581 * set i_disksize to 0 until the truncate is actually under way.
582 *
583 * The intent is that i_disksize always represents the blocks which
584 * are used by this file. This allows recovery to restart truncate
585 * on orphans if we crash during truncate. We actually write i_disksize
586 * into the on-disk inode when writing inodes out, instead of i_size.
587 *
588 * The only time when i_disksize and i_size may be different is when
589 * a truncate is in progress. The only things which change i_disksize
590 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
591 */
592 loff_t i_disksize;
593
594 /* on-disk additional length */
595 __u16 i_extra_isize;
596
597 /*
598 * truncate_mutex is for serialising ext3_truncate() against
599 * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
600 * data tree are chopped off during truncate. We can't do that in
601 * ext3 because whenever we perform intermediate commits during
602 * truncate, the inode and all the metadata blocks *must* be in a
603 * consistent state which allows truncation of the orphans to restart
604 * during recovery. Hence we must fix the get_block-vs-truncate race
605 * by other means, so we have truncate_mutex.
606 */
607 struct mutex truncate_mutex;
608
609 /*
610 * Transactions that contain inode's metadata needed to complete
611 * fsync and fdatasync, respectively.
612 */
613 atomic_t i_sync_tid;
614 atomic_t i_datasync_tid;
615
616 struct inode vfs_inode;
617};
618
619/*
620 * third extended-fs super-block data in memory
621 */
622struct ext3_sb_info {
623 unsigned long s_frag_size; /* Size of a fragment in bytes */
624 unsigned long s_frags_per_block;/* Number of fragments per block */
625 unsigned long s_inodes_per_block;/* Number of inodes per block */
626 unsigned long s_frags_per_group;/* Number of fragments in a group */
627 unsigned long s_blocks_per_group;/* Number of blocks in a group */
628 unsigned long s_inodes_per_group;/* Number of inodes in a group */
629 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
630 unsigned long s_gdb_count; /* Number of group descriptor blocks */
631 unsigned long s_desc_per_block; /* Number of group descriptors per block */
632 unsigned long s_groups_count; /* Number of groups in the fs */
633 unsigned long s_overhead_last; /* Last calculated overhead */
634 unsigned long s_blocks_last; /* Last seen block count */
635 struct buffer_head * s_sbh; /* Buffer containing the super block */
636 struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
637 struct buffer_head ** s_group_desc;
638 unsigned long s_mount_opt;
639 ext3_fsblk_t s_sb_block;
640 kuid_t s_resuid;
641 kgid_t s_resgid;
642 unsigned short s_mount_state;
643 unsigned short s_pad;
644 int s_addr_per_block_bits;
645 int s_desc_per_block_bits;
646 int s_inode_size;
647 int s_first_ino;
648 spinlock_t s_next_gen_lock;
649 u32 s_next_generation;
650 u32 s_hash_seed[4];
651 int s_def_hash_version;
652 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
653 struct percpu_counter s_freeblocks_counter;
654 struct percpu_counter s_freeinodes_counter;
655 struct percpu_counter s_dirs_counter;
656 struct blockgroup_lock *s_blockgroup_lock;
657
658 /* root of the per fs reservation window tree */
659 spinlock_t s_rsv_window_lock;
660 struct rb_root s_rsv_window_root;
661 struct ext3_reserve_window_node s_rsv_window_head;
662
663 /* Journaling */
664 struct inode * s_journal_inode;
665 struct journal_s * s_journal;
666 struct list_head s_orphan;
667 struct mutex s_orphan_lock;
668 struct mutex s_resize_lock;
669 unsigned long s_commit_interval;
670 struct block_device *journal_bdev;
671#ifdef CONFIG_QUOTA
672 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
673 int s_jquota_fmt; /* Format of quota to use */
674#endif
675};
676
677static inline spinlock_t *
678sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
679{
680 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
681}
682
683static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
684{
685 return sb->s_fs_info;
686}
687static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
688{
689 return container_of(inode, struct ext3_inode_info, vfs_inode);
690}
691
692static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
693{
694 return ino == EXT3_ROOT_INO ||
695 ino == EXT3_JOURNAL_INO ||
696 ino == EXT3_RESIZE_INO ||
697 (ino >= EXT3_FIRST_INO(sb) &&
698 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
699}
700
701/*
702 * Inode dynamic state flags
703 */
704enum {
705 EXT3_STATE_JDATA, /* journaled data exists */
706 EXT3_STATE_NEW, /* inode is newly created */
707 EXT3_STATE_XATTR, /* has in-inode xattrs */
708 EXT3_STATE_FLUSH_ON_CLOSE, /* flush dirty pages on close */
709};
710
711static inline int ext3_test_inode_state(struct inode *inode, int bit)
712{
713 return test_bit(bit, &EXT3_I(inode)->i_state_flags);
714}
715
716static inline void ext3_set_inode_state(struct inode *inode, int bit)
717{
718 set_bit(bit, &EXT3_I(inode)->i_state_flags);
719}
720
721static inline void ext3_clear_inode_state(struct inode *inode, int bit)
722{
723 clear_bit(bit, &EXT3_I(inode)->i_state_flags);
724}
725
726#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
727
728/*
729 * Codes for operating systems
730 */
731#define EXT3_OS_LINUX 0
732#define EXT3_OS_HURD 1
733#define EXT3_OS_MASIX 2
734#define EXT3_OS_FREEBSD 3
735#define EXT3_OS_LITES 4
736
737/*
738 * Revision levels
739 */
740#define EXT3_GOOD_OLD_REV 0 /* The good old (original) format */
741#define EXT3_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
742
743#define EXT3_CURRENT_REV EXT3_GOOD_OLD_REV
744#define EXT3_MAX_SUPP_REV EXT3_DYNAMIC_REV
745
746#define EXT3_GOOD_OLD_INODE_SIZE 128
747
748/*
749 * Feature set definitions
750 */
751
752#define EXT3_HAS_COMPAT_FEATURE(sb,mask) \
753 ( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
754#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask) \
755 ( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
756#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask) \
757 ( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
758#define EXT3_SET_COMPAT_FEATURE(sb,mask) \
759 EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
760#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask) \
761 EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
762#define EXT3_SET_INCOMPAT_FEATURE(sb,mask) \
763 EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
764#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask) \
765 EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
766#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
767 EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
768#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask) \
769 EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
770
771#define EXT3_FEATURE_COMPAT_DIR_PREALLOC 0x0001
772#define EXT3_FEATURE_COMPAT_IMAGIC_INODES 0x0002
773#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004
774#define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008
775#define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010
776#define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020
777
778#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
779#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
780#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
781
782#define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001
783#define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002
784#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
785#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
786#define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
787
788#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
789#define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
790 EXT3_FEATURE_INCOMPAT_RECOVER| \
791 EXT3_FEATURE_INCOMPAT_META_BG)
792#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
793 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
794 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
795
796/*
797 * Default values for user and/or group using reserved blocks
798 */
799#define EXT3_DEF_RESUID 0
800#define EXT3_DEF_RESGID 0
801
802/*
803 * Default mount options
804 */
805#define EXT3_DEFM_DEBUG 0x0001
806#define EXT3_DEFM_BSDGROUPS 0x0002
807#define EXT3_DEFM_XATTR_USER 0x0004
808#define EXT3_DEFM_ACL 0x0008
809#define EXT3_DEFM_UID16 0x0010
810#define EXT3_DEFM_JMODE 0x0060
811#define EXT3_DEFM_JMODE_DATA 0x0020
812#define EXT3_DEFM_JMODE_ORDERED 0x0040
813#define EXT3_DEFM_JMODE_WBACK 0x0060
814
815/*
816 * Structure of a directory entry
817 */
818#define EXT3_NAME_LEN 255
819
820struct ext3_dir_entry {
821 __le32 inode; /* Inode number */
822 __le16 rec_len; /* Directory entry length */
823 __le16 name_len; /* Name length */
824 char name[EXT3_NAME_LEN]; /* File name */
825};
826
827/*
828 * The new version of the directory entry. Since EXT3 structures are
829 * stored in intel byte order, and the name_len field could never be
830 * bigger than 255 chars, it's safe to reclaim the extra byte for the
831 * file_type field.
832 */
833struct ext3_dir_entry_2 {
834 __le32 inode; /* Inode number */
835 __le16 rec_len; /* Directory entry length */
836 __u8 name_len; /* Name length */
837 __u8 file_type;
838 char name[EXT3_NAME_LEN]; /* File name */
839};
840
841/*
842 * Ext3 directory file types. Only the low 3 bits are used. The
843 * other bits are reserved for now.
844 */
845#define EXT3_FT_UNKNOWN 0
846#define EXT3_FT_REG_FILE 1
847#define EXT3_FT_DIR 2
848#define EXT3_FT_CHRDEV 3
849#define EXT3_FT_BLKDEV 4
850#define EXT3_FT_FIFO 5
851#define EXT3_FT_SOCK 6
852#define EXT3_FT_SYMLINK 7
853
854#define EXT3_FT_MAX 8
855
856/*
857 * EXT3_DIR_PAD defines the directory entries boundaries
858 *
859 * NOTE: It must be a multiple of 4
860 */
861#define EXT3_DIR_PAD 4
862#define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
863#define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
864 ~EXT3_DIR_ROUND)
865#define EXT3_MAX_REC_LEN ((1<<16)-1)
866
867/*
868 * Tests against MAX_REC_LEN etc were put in place for 64k block
869 * sizes; if that is not possible on this arch, we can skip
870 * those tests and speed things up.
871 */
872static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
873{
874 unsigned len = le16_to_cpu(dlen);
875
876#if (PAGE_CACHE_SIZE >= 65536)
877 if (len == EXT3_MAX_REC_LEN)
878 return 1 << 16;
879#endif
880 return len;
881}
882
883static inline __le16 ext3_rec_len_to_disk(unsigned len)
884{
885#if (PAGE_CACHE_SIZE >= 65536)
886 if (len == (1 << 16))
887 return cpu_to_le16(EXT3_MAX_REC_LEN);
888 else if (len > (1 << 16))
889 BUG();
890#endif
891 return cpu_to_le16(len);
892}
893
894/*
895 * Hash Tree Directory indexing
896 * (c) Daniel Phillips, 2001
897 */
898
899#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
900 EXT3_FEATURE_COMPAT_DIR_INDEX) && \
901 (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
902#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
903#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
904
905/* Legal values for the dx_root hash_version field: */
906
907#define DX_HASH_LEGACY 0
908#define DX_HASH_HALF_MD4 1
909#define DX_HASH_TEA 2
910#define DX_HASH_LEGACY_UNSIGNED 3
911#define DX_HASH_HALF_MD4_UNSIGNED 4
912#define DX_HASH_TEA_UNSIGNED 5
913
914/* hash info structure used by the directory hash */
915struct dx_hash_info
916{
917 u32 hash;
918 u32 minor_hash;
919 int hash_version;
920 u32 *seed;
921};
922
923
924/* 32 and 64 bit signed EOF for dx directories */
925#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
926#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
927
928
929/*
930 * Control parameters used by ext3_htree_next_block
931 */
932#define HASH_NB_ALWAYS 1
933
934
935/*
936 * Describe an inode's exact location on disk and in memory
937 */
938struct ext3_iloc
939{
940 struct buffer_head *bh;
941 unsigned long offset;
942 unsigned long block_group;
943};
944
945static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
946{
947 return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
948}
949
950/*
951 * This structure is stuffed into the struct file's private_data field
952 * for directories. It is where we put information so that we can do
953 * readdir operations in hash tree order.
954 */
955struct dir_private_info {
956 struct rb_root root;
957 struct rb_node *curr_node;
958 struct fname *extra_fname;
959 loff_t last_pos;
960 __u32 curr_hash;
961 __u32 curr_minor_hash;
962 __u32 next_hash;
963};
964
965/* calculate the first block number of the group */
966static inline ext3_fsblk_t
967ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
968{
969 return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) +
970 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
971}
972
973/*
974 * Special error return code only used by dx_probe() and its callers.
975 */
976#define ERR_BAD_DX_DIR -75000
977
978/*
979 * Function prototypes
980 */
981
982/*
983 * Ok, these declarations are also in <linux/kernel.h> but none of the
984 * ext3 source programs needs to include it so they are duplicated here.
985 */
986# define NORET_TYPE /**/
987# define ATTRIB_NORET __attribute__((noreturn))
988# define NORET_AND noreturn,
989
990/* balloc.c */
991extern int ext3_bg_has_super(struct super_block *sb, int group);
992extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
993extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
994 ext3_fsblk_t goal, int *errp);
995extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
996 ext3_fsblk_t goal, unsigned long *count, int *errp);
997extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
998 ext3_fsblk_t block, unsigned long count);
999extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
1000 ext3_fsblk_t block, unsigned long count,
1001 unsigned long *pdquot_freed_blocks);
1002extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *);
1003extern void ext3_check_blocks_bitmap (struct super_block *);
1004extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1005 unsigned int block_group,
1006 struct buffer_head ** bh);
1007extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
1008extern void ext3_init_block_alloc_info(struct inode *);
1009extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
1010extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range);
1011
1012/* dir.c */
1013extern int ext3_check_dir_entry(const char *, struct inode *,
1014 struct ext3_dir_entry_2 *,
1015 struct buffer_head *, unsigned long);
1016extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
1017 __u32 minor_hash,
1018 struct ext3_dir_entry_2 *dirent);
1019extern void ext3_htree_free_dir_info(struct dir_private_info *p);
1020
1021/* fsync.c */
1022extern int ext3_sync_file(struct file *, loff_t, loff_t, int);
1023
1024/* hash.c */
1025extern int ext3fs_dirhash(const char *name, int len, struct
1026 dx_hash_info *hinfo);
1027
1028/* ialloc.c */
1029extern struct inode * ext3_new_inode (handle_t *, struct inode *,
1030 const struct qstr *, umode_t);
1031extern void ext3_free_inode (handle_t *, struct inode *);
1032extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
1033extern unsigned long ext3_count_free_inodes (struct super_block *);
1034extern unsigned long ext3_count_dirs (struct super_block *);
1035extern void ext3_check_inodes_bitmap (struct super_block *);
1036extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
1037
1038
1039/* inode.c */
1040int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
1041 struct buffer_head *bh, ext3_fsblk_t blocknr);
1042struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
1043struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
1044int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
1045 sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
1046 int create);
1047
1048extern struct inode *ext3_iget(struct super_block *, unsigned long);
1049extern int ext3_write_inode (struct inode *, struct writeback_control *);
1050extern int ext3_setattr (struct dentry *, struct iattr *);
1051extern void ext3_evict_inode (struct inode *);
1052extern int ext3_sync_inode (handle_t *, struct inode *);
1053extern void ext3_discard_reservation (struct inode *);
1054extern void ext3_dirty_inode(struct inode *, int);
1055extern int ext3_change_inode_journal_flag(struct inode *, int);
1056extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
1057extern int ext3_can_truncate(struct inode *inode);
1058extern void ext3_truncate(struct inode *inode);
1059extern void ext3_set_inode_flags(struct inode *);
1060extern void ext3_get_inode_flags(struct ext3_inode_info *);
1061extern void ext3_set_aops(struct inode *inode);
1062extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1063 u64 start, u64 len);
1064
1065/* ioctl.c */
1066extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
1067extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long);
1068
1069/* namei.c */
1070extern int ext3_orphan_add(handle_t *, struct inode *);
1071extern int ext3_orphan_del(handle_t *, struct inode *);
1072extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1073 __u32 start_minor_hash, __u32 *next_hash);
1074
1075/* resize.c */
1076extern int ext3_group_add(struct super_block *sb,
1077 struct ext3_new_group_data *input);
1078extern int ext3_group_extend(struct super_block *sb,
1079 struct ext3_super_block *es,
1080 ext3_fsblk_t n_blocks_count);
1081
1082/* super.c */
1083extern __printf(3, 4)
1084void ext3_error(struct super_block *, const char *, const char *, ...);
1085extern void __ext3_std_error (struct super_block *, const char *, int);
1086extern __printf(3, 4)
1087void ext3_abort(struct super_block *, const char *, const char *, ...);
1088extern __printf(3, 4)
1089void ext3_warning(struct super_block *, const char *, const char *, ...);
1090extern __printf(3, 4)
1091void ext3_msg(struct super_block *, const char *, const char *, ...);
1092extern void ext3_update_dynamic_rev (struct super_block *sb);
1093
1094#define ext3_std_error(sb, errno) \
1095do { \
1096 if ((errno)) \
1097 __ext3_std_error((sb), __func__, (errno)); \
1098} while (0)
1099
1100/*
1101 * Inodes and files operations
1102 */
1103
1104/* dir.c */
1105extern const struct file_operations ext3_dir_operations;
1106
1107/* file.c */
1108extern const struct inode_operations ext3_file_inode_operations;
1109extern const struct file_operations ext3_file_operations;
1110
1111/* namei.c */
1112extern const struct inode_operations ext3_dir_inode_operations;
1113extern const struct inode_operations ext3_special_inode_operations;
1114
1115/* symlink.c */
1116extern const struct inode_operations ext3_symlink_inode_operations;
1117extern const struct inode_operations ext3_fast_symlink_inode_operations;
1118
1119#define EXT3_JOURNAL(inode) (EXT3_SB((inode)->i_sb)->s_journal)
1120
1121/* Define the number of blocks we need to account to a transaction to
1122 * modify one block of data.
1123 *
1124 * We may have to touch one inode, one bitmap buffer, up to three
1125 * indirection blocks, the group and superblock summaries, and the data
1126 * block to complete the transaction. */
1127
1128#define EXT3_SINGLEDATA_TRANS_BLOCKS 8U
1129
1130/* Extended attribute operations touch at most two data buffers,
1131 * two bitmap buffers, and two group summaries, in addition to the inode
1132 * and the superblock, which are already accounted for. */
1133
1134#define EXT3_XATTR_TRANS_BLOCKS 6U
1135
1136/* Define the minimum size for a transaction which modifies data. This
1137 * needs to take into account the fact that we may end up modifying two
1138 * quota files too (one for the group, one for the user quota). The
1139 * superblock only gets updated once, of course, so don't bother
1140 * counting that again for the quota updates. */
1141
1142#define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \
1143 EXT3_XATTR_TRANS_BLOCKS - 2 + \
1144 EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
1145
1146/* Delete operations potentially hit one directory's namespace plus an
1147 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
1148 * generous. We can grow the delete transaction later if necessary. */
1149
1150#define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
1151
1152/* Define an arbitrary limit for the amount of data we will anticipate
1153 * writing to any given transaction. For unbounded transactions such as
1154 * write(2) and truncate(2) we can write more than this, but we always
1155 * start off at the maximum transaction size and grow the transaction
1156 * optimistically as we go. */
1157
1158#define EXT3_MAX_TRANS_DATA 64U
1159
1160/* We break up a large truncate or write transaction once the handle's
1161 * buffer credits gets this low, we need either to extend the
1162 * transaction or to start a new one. Reserve enough space here for
1163 * inode, bitmap, superblock, group and indirection updates for at least
1164 * one block, plus two quota updates. Quota allocations are not
1165 * needed. */
1166
1167#define EXT3_RESERVE_TRANS_BLOCKS 12U
1168
1169#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
1170
1171#ifdef CONFIG_QUOTA
1172/* Amount of blocks needed for quota update - we know that the structure was
1173 * allocated so we need to update only inode+data */
1174#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
1175/* Amount of blocks needed for quota insert/delete - we do some block writes
1176 * but inode, sb and group updates are done only once */
1177#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
1178 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
1179#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
1180 (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
1181#else
1182#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
1183#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
1184#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
1185#endif
1186#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
1187#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
1188#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
1189
1190int
1191ext3_mark_iloc_dirty(handle_t *handle,
1192 struct inode *inode,
1193 struct ext3_iloc *iloc);
1194
1195/*
1196 * On success, We end up with an outstanding reference count against
1197 * iloc->bh. This _must_ be cleaned up later.
1198 */
1199
1200int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
1201 struct ext3_iloc *iloc);
1202
1203int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
1204
1205/*
1206 * Wrapper functions with which ext3 calls into JBD. The intent here is
1207 * to allow these to be turned into appropriate stubs so ext3 can control
1208 * ext2 filesystems, so ext2+ext3 systems only nee one fs. This work hasn't
1209 * been done yet.
1210 */
1211
1212static inline void ext3_journal_release_buffer(handle_t *handle,
1213 struct buffer_head *bh)
1214{
1215 journal_release_buffer(handle, bh);
1216}
1217
1218void ext3_journal_abort_handle(const char *caller, const char *err_fn,
1219 struct buffer_head *bh, handle_t *handle, int err);
1220
1221int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
1222 struct buffer_head *bh);
1223
1224int __ext3_journal_get_write_access(const char *where, handle_t *handle,
1225 struct buffer_head *bh);
1226
1227int __ext3_journal_forget(const char *where, handle_t *handle,
1228 struct buffer_head *bh);
1229
1230int __ext3_journal_revoke(const char *where, handle_t *handle,
1231 unsigned long blocknr, struct buffer_head *bh);
1232
1233int __ext3_journal_get_create_access(const char *where,
1234 handle_t *handle, struct buffer_head *bh);
1235
1236int __ext3_journal_dirty_metadata(const char *where,
1237 handle_t *handle, struct buffer_head *bh);
1238
1239#define ext3_journal_get_undo_access(handle, bh) \
1240 __ext3_journal_get_undo_access(__func__, (handle), (bh))
1241#define ext3_journal_get_write_access(handle, bh) \
1242 __ext3_journal_get_write_access(__func__, (handle), (bh))
1243#define ext3_journal_revoke(handle, blocknr, bh) \
1244 __ext3_journal_revoke(__func__, (handle), (blocknr), (bh))
1245#define ext3_journal_get_create_access(handle, bh) \
1246 __ext3_journal_get_create_access(__func__, (handle), (bh))
1247#define ext3_journal_dirty_metadata(handle, bh) \
1248 __ext3_journal_dirty_metadata(__func__, (handle), (bh))
1249#define ext3_journal_forget(handle, bh) \
1250 __ext3_journal_forget(__func__, (handle), (bh))
1251
1252int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
1253
1254handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
1255int __ext3_journal_stop(const char *where, handle_t *handle);
1256
1257static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
1258{
1259 return ext3_journal_start_sb(inode->i_sb, nblocks);
1260}
1261
1262#define ext3_journal_stop(handle) \
1263 __ext3_journal_stop(__func__, (handle))
1264
1265static inline handle_t *ext3_journal_current_handle(void)
1266{
1267 return journal_current_handle();
1268}
1269
1270static inline int ext3_journal_extend(handle_t *handle, int nblocks)
1271{
1272 return journal_extend(handle, nblocks);
1273}
1274
1275static inline int ext3_journal_restart(handle_t *handle, int nblocks)
1276{
1277 return journal_restart(handle, nblocks);
1278}
1279
1280static inline int ext3_journal_blocks_per_page(struct inode *inode)
1281{
1282 return journal_blocks_per_page(inode);
1283}
1284
1285static inline int ext3_journal_force_commit(journal_t *journal)
1286{
1287 return journal_force_commit(journal);
1288}
1289
1290/* super.c */
1291int ext3_force_commit(struct super_block *sb);
1292
1293static inline int ext3_should_journal_data(struct inode *inode)
1294{
1295 if (!S_ISREG(inode->i_mode))
1296 return 1;
1297 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
1298 return 1;
1299 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1300 return 1;
1301 return 0;
1302}
1303
1304static inline int ext3_should_order_data(struct inode *inode)
1305{
1306 if (!S_ISREG(inode->i_mode))
1307 return 0;
1308 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1309 return 0;
1310 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
1311 return 1;
1312 return 0;
1313}
1314
1315static inline int ext3_should_writeback_data(struct inode *inode)
1316{
1317 if (!S_ISREG(inode->i_mode))
1318 return 0;
1319 if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
1320 return 0;
1321 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
1322 return 1;
1323 return 0;
1324}
1325
1326#include <trace/events/ext3.h>
diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c
index 785a3261a26..d401f148d74 100644
--- a/fs/ext3/ext3_jbd.c
+++ b/fs/ext3/ext3_jbd.c
@@ -2,7 +2,7 @@
2 * Interface between ext3 and JBD 2 * Interface between ext3 and JBD
3 */ 3 */
4 4
5#include "ext3.h" 5#include <linux/ext3_jbd.h>
6 6
7int __ext3_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 25cb413277e..724df69847d 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -18,8 +18,12 @@
18 * (jj@sunsite.ms.mff.cuni.cz) 18 * (jj@sunsite.ms.mff.cuni.cz)
19 */ 19 */
20 20
21#include <linux/time.h>
22#include <linux/fs.h>
23#include <linux/jbd.h>
21#include <linux/quotaops.h> 24#include <linux/quotaops.h>
22#include "ext3.h" 25#include <linux/ext3_fs.h>
26#include <linux/ext3_jbd.h>
23#include "xattr.h" 27#include "xattr.h"
24#include "acl.h" 28#include "acl.h"
25 29
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index b31dbd4c46a..d494c554c6e 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -22,9 +22,15 @@
22 * we can depend on generic_block_fdatasync() to sync the data blocks. 22 * we can depend on generic_block_fdatasync() to sync the data blocks.
23 */ 23 */
24 24
25#include <linux/time.h>
25#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/fs.h>
28#include <linux/sched.h>
26#include <linux/writeback.h> 29#include <linux/writeback.h>
27#include "ext3.h" 30#include <linux/jbd.h>
31#include <linux/ext3_fs.h>
32#include <linux/ext3_jbd.h>
33#include <trace/events/ext3.h>
28 34
29/* 35/*
30 * akpm: A new design for ext3_sync_file(). 36 * akpm: A new design for ext3_sync_file().
@@ -55,6 +61,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
55 if (ret) 61 if (ret)
56 goto out; 62 goto out;
57 63
64 /*
65 * Taking the mutex here just to keep consistent with how fsync was
66 * called previously, however it looks like we don't need to take
67 * i_mutex at all.
68 */
69 mutex_lock(&inode->i_mutex);
70
58 J_ASSERT(ext3_journal_current_handle() == NULL); 71 J_ASSERT(ext3_journal_current_handle() == NULL);
59 72
60 /* 73 /*
@@ -72,6 +85,7 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
72 * safe in-journal, which is all fsync() needs to ensure. 85 * safe in-journal, which is all fsync() needs to ensure.
73 */ 86 */
74 if (ext3_should_journal_data(inode)) { 87 if (ext3_should_journal_data(inode)) {
88 mutex_unlock(&inode->i_mutex);
75 ret = ext3_force_commit(inode->i_sb); 89 ret = ext3_force_commit(inode->i_sb);
76 goto out; 90 goto out;
77 } 91 }
@@ -92,13 +106,10 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
92 * disk caches manually so that data really is on persistent 106 * disk caches manually so that data really is on persistent
93 * storage 107 * storage
94 */ 108 */
95 if (needs_barrier) { 109 if (needs_barrier)
96 int err; 110 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
97 111
98 err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 112 mutex_unlock(&inode->i_mutex);
99 if (!ret)
100 ret = err;
101 }
102out: 113out:
103 trace_ext3_sync_file_exit(inode, ret); 114 trace_ext3_sync_file_exit(inode, ret);
104 return ret; 115 return ret;
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index ede315cdf12..7d215b4d4f2 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -9,7 +9,9 @@
9 * License. 9 * License.
10 */ 10 */
11 11
12#include "ext3.h" 12#include <linux/fs.h>
13#include <linux/jbd.h>
14#include <linux/ext3_fs.h>
13#include <linux/cryptohash.h> 15#include <linux/cryptohash.h>
14 16
15#define DELTA 0x9E3779B9 17#define DELTA 0x9E3779B9
@@ -198,8 +200,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
198 return -1; 200 return -1;
199 } 201 }
200 hash = hash & ~1; 202 hash = hash & ~1;
201 if (hash == (EXT3_HTREE_EOF_32BIT << 1)) 203 if (hash == (EXT3_HTREE_EOF << 1))
202 hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; 204 hash = (EXT3_HTREE_EOF-1) << 1;
203 hinfo->hash = hash; 205 hinfo->hash = hash;
204 hinfo->minor_hash = minor_hash; 206 hinfo->minor_hash = minor_hash;
205 return 0; 207 return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 082afd78b10..bf09cbf938c 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -12,10 +12,21 @@
12 * David S. Miller (davem@caip.rutgers.edu), 1995 12 * David S. Miller (davem@caip.rutgers.edu), 1995
13 */ 13 */
14 14
15#include <linux/time.h>
16#include <linux/fs.h>
17#include <linux/jbd.h>
18#include <linux/ext3_fs.h>
19#include <linux/ext3_jbd.h>
20#include <linux/stat.h>
21#include <linux/string.h>
15#include <linux/quotaops.h> 22#include <linux/quotaops.h>
23#include <linux/buffer_head.h>
16#include <linux/random.h> 24#include <linux/random.h>
25#include <linux/bitops.h>
26#include <trace/events/ext3.h>
27
28#include <asm/byteorder.h>
17 29
18#include "ext3.h"
19#include "xattr.h" 30#include "xattr.h"
20#include "acl.h" 31#include "acl.h"
21 32
@@ -167,6 +178,42 @@ error_return:
167} 178}
168 179
169/* 180/*
181 * There are two policies for allocating an inode. If the new inode is
182 * a directory, then a forward search is made for a block group with both
183 * free space and a low directory-to-inode ratio; if that fails, then of
184 * the groups with above-average free space, that group with the fewest
185 * directories already is chosen.
186 *
187 * For other inodes, search forward from the parent directory\'s block
188 * group to find a free inode.
189 */
190static int find_group_dir(struct super_block *sb, struct inode *parent)
191{
192 int ngroups = EXT3_SB(sb)->s_groups_count;
193 unsigned int freei, avefreei;
194 struct ext3_group_desc *desc, *best_desc = NULL;
195 int group, best_group = -1;
196
197 freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
198 avefreei = freei / ngroups;
199
200 for (group = 0; group < ngroups; group++) {
201 desc = ext3_get_group_desc (sb, group, NULL);
202 if (!desc || !desc->bg_free_inodes_count)
203 continue;
204 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
205 continue;
206 if (!best_desc ||
207 (le16_to_cpu(desc->bg_free_blocks_count) >
208 le16_to_cpu(best_desc->bg_free_blocks_count))) {
209 best_group = group;
210 best_desc = desc;
211 }
212 }
213 return best_group;
214}
215
216/*
170 * Orlov's allocator for directories. 217 * Orlov's allocator for directories.
171 * 218 *
172 * We always try to spread first-level directories. 219 * We always try to spread first-level directories.
@@ -180,7 +227,8 @@ error_return:
180 * It's OK to put directory into a group unless 227 * It's OK to put directory into a group unless
181 * it has too many directories already (max_dirs) or 228 * it has too many directories already (max_dirs) or
182 * it has too few free inodes left (min_inodes) or 229 * it has too few free inodes left (min_inodes) or
183 * it has too few free blocks left (min_blocks). 230 * it has too few free blocks left (min_blocks) or
231 * it's already running too large debt (max_debt).
184 * Parent's group is preferred, if it doesn't satisfy these 232 * Parent's group is preferred, if it doesn't satisfy these
185 * conditions we search cyclically through the rest. If none 233 * conditions we search cyclically through the rest. If none
186 * of the groups look good we just look for a group with more 234 * of the groups look good we just look for a group with more
@@ -190,16 +238,21 @@ error_return:
190 * when we allocate an inode, within 0--255. 238 * when we allocate an inode, within 0--255.
191 */ 239 */
192 240
241#define INODE_COST 64
242#define BLOCK_COST 256
243
193static int find_group_orlov(struct super_block *sb, struct inode *parent) 244static int find_group_orlov(struct super_block *sb, struct inode *parent)
194{ 245{
195 int parent_group = EXT3_I(parent)->i_block_group; 246 int parent_group = EXT3_I(parent)->i_block_group;
196 struct ext3_sb_info *sbi = EXT3_SB(sb); 247 struct ext3_sb_info *sbi = EXT3_SB(sb);
248 struct ext3_super_block *es = sbi->s_es;
197 int ngroups = sbi->s_groups_count; 249 int ngroups = sbi->s_groups_count;
198 int inodes_per_group = EXT3_INODES_PER_GROUP(sb); 250 int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
199 unsigned int freei, avefreei; 251 unsigned int freei, avefreei;
200 ext3_fsblk_t freeb, avefreeb; 252 ext3_fsblk_t freeb, avefreeb;
253 ext3_fsblk_t blocks_per_dir;
201 unsigned int ndirs; 254 unsigned int ndirs;
202 int max_dirs, min_inodes; 255 int max_debt, max_dirs, min_inodes;
203 ext3_grpblk_t min_blocks; 256 ext3_grpblk_t min_blocks;
204 int group = -1, i; 257 int group = -1, i;
205 struct ext3_group_desc *desc; 258 struct ext3_group_desc *desc;
@@ -236,10 +289,20 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
236 goto fallback; 289 goto fallback;
237 } 290 }
238 291
292 blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
293
239 max_dirs = ndirs / ngroups + inodes_per_group / 16; 294 max_dirs = ndirs / ngroups + inodes_per_group / 16;
240 min_inodes = avefreei - inodes_per_group / 4; 295 min_inodes = avefreei - inodes_per_group / 4;
241 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; 296 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
242 297
298 max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
299 if (max_debt * INODE_COST > inodes_per_group)
300 max_debt = inodes_per_group / INODE_COST;
301 if (max_debt > 255)
302 max_debt = 255;
303 if (max_debt == 0)
304 max_debt = 1;
305
243 for (i = 0; i < ngroups; i++) { 306 for (i = 0; i < ngroups; i++) {
244 group = (parent_group + i) % ngroups; 307 group = (parent_group + i) % ngroups;
245 desc = ext3_get_group_desc (sb, group, NULL); 308 desc = ext3_get_group_desc (sb, group, NULL);
@@ -344,7 +407,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
344 * group to find a free inode. 407 * group to find a free inode.
345 */ 408 */
346struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, 409struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
347 const struct qstr *qstr, umode_t mode) 410 const struct qstr *qstr, int mode)
348{ 411{
349 struct super_block *sb; 412 struct super_block *sb;
350 struct buffer_head *bitmap_bh = NULL; 413 struct buffer_head *bitmap_bh = NULL;
@@ -373,9 +436,12 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
373 436
374 sbi = EXT3_SB(sb); 437 sbi = EXT3_SB(sb);
375 es = sbi->s_es; 438 es = sbi->s_es;
376 if (S_ISDIR(mode)) 439 if (S_ISDIR(mode)) {
377 group = find_group_orlov(sb, dir); 440 if (test_opt (sb, OLDALLOC))
378 else 441 group = find_group_dir(sb, dir);
442 else
443 group = find_group_orlov(sb, dir);
444 } else
379 group = find_group_other(sb, dir); 445 group = find_group_other(sb, dir);
380 446
381 err = -ENOSPC; 447 err = -ENOSPC;
@@ -498,12 +564,8 @@ got:
498 if (IS_DIRSYNC(inode)) 564 if (IS_DIRSYNC(inode))
499 handle->h_sync = 1; 565 handle->h_sync = 1;
500 if (insert_inode_locked(inode) < 0) { 566 if (insert_inode_locked(inode) < 0) {
501 /* 567 err = -EINVAL;
502 * Likely a bitmap corruption causing inode to be allocated 568 goto fail_drop;
503 * twice.
504 */
505 err = -EIO;
506 goto fail;
507 } 569 }
508 spin_lock(&sbi->s_next_gen_lock); 570 spin_lock(&sbi->s_next_gen_lock);
509 inode->i_generation = sbi->s_next_generation++; 571 inode->i_generation = sbi->s_next_generation++;
@@ -559,7 +621,7 @@ fail_free_drop:
559fail_drop: 621fail_drop:
560 dquot_drop(inode); 622 dquot_drop(inode);
561 inode->i_flags |= S_NOQUOTA; 623 inode->i_flags |= S_NOQUOTA;
562 clear_nlink(inode); 624 inode->i_nlink = 0;
563 unlock_new_inode(inode); 625 unlock_new_inode(inode);
564 iput(inode); 626 iput(inode);
565 brelse(bitmap_bh); 627 brelse(bitmap_bh);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b176d425354..2ce3c52db32 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -22,12 +22,23 @@
22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
23 */ 23 */
24 24
25#include <linux/module.h>
26#include <linux/fs.h>
27#include <linux/time.h>
28#include <linux/ext3_jbd.h>
29#include <linux/jbd.h>
25#include <linux/highuid.h> 30#include <linux/highuid.h>
31#include <linux/pagemap.h>
26#include <linux/quotaops.h> 32#include <linux/quotaops.h>
33#include <linux/string.h>
34#include <linux/buffer_head.h>
27#include <linux/writeback.h> 35#include <linux/writeback.h>
28#include <linux/mpage.h> 36#include <linux/mpage.h>
37#include <linux/uio.h>
38#include <linux/bio.h>
39#include <linux/fiemap.h>
29#include <linux/namei.h> 40#include <linux/namei.h>
30#include "ext3.h" 41#include <trace/events/ext3.h>
31#include "xattr.h" 42#include "xattr.h"
32#include "acl.h" 43#include "acl.h"
33 44
@@ -212,12 +223,8 @@ void ext3_evict_inode (struct inode *inode)
212 * 223 *
213 * Note that directories do not have this problem because they don't 224 * Note that directories do not have this problem because they don't
214 * use page cache. 225 * use page cache.
215 *
216 * The s_journal check handles the case when ext3_get_journal() fails
217 * and puts the journal inode.
218 */ 226 */
219 if (inode->i_nlink && ext3_should_journal_data(inode) && 227 if (inode->i_nlink && ext3_should_journal_data(inode) &&
220 EXT3_SB(inode->i_sb)->s_journal &&
221 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 228 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
222 tid_t commit_tid = atomic_read(&ei->i_datasync_tid); 229 tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
223 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; 230 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
@@ -272,18 +279,18 @@ void ext3_evict_inode (struct inode *inode)
272 if (ext3_mark_inode_dirty(handle, inode)) { 279 if (ext3_mark_inode_dirty(handle, inode)) {
273 /* If that failed, just dquot_drop() and be done with that */ 280 /* If that failed, just dquot_drop() and be done with that */
274 dquot_drop(inode); 281 dquot_drop(inode);
275 clear_inode(inode); 282 end_writeback(inode);
276 } else { 283 } else {
277 ext3_xattr_delete_inode(handle, inode); 284 ext3_xattr_delete_inode(handle, inode);
278 dquot_free_inode(inode); 285 dquot_free_inode(inode);
279 dquot_drop(inode); 286 dquot_drop(inode);
280 clear_inode(inode); 287 end_writeback(inode);
281 ext3_free_inode(handle, inode); 288 ext3_free_inode(handle, inode);
282 } 289 }
283 ext3_journal_stop(handle); 290 ext3_journal_stop(handle);
284 return; 291 return;
285no_delete: 292no_delete:
286 clear_inode(inode); 293 end_writeback(inode);
287 dquot_drop(inode); 294 dquot_drop(inode);
288} 295}
289 296
@@ -746,7 +753,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
746 struct ext3_block_alloc_info *block_i; 753 struct ext3_block_alloc_info *block_i;
747 ext3_fsblk_t current_block; 754 ext3_fsblk_t current_block;
748 struct ext3_inode_info *ei = EXT3_I(inode); 755 struct ext3_inode_info *ei = EXT3_I(inode);
749 struct timespec now;
750 756
751 block_i = ei->i_block_alloc_info; 757 block_i = ei->i_block_alloc_info;
752 /* 758 /*
@@ -786,11 +792,9 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
786 } 792 }
787 793
788 /* We are done with atomic stuff, now do the rest of housekeeping */ 794 /* We are done with atomic stuff, now do the rest of housekeeping */
789 now = CURRENT_TIME_SEC; 795
790 if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) { 796 inode->i_ctime = CURRENT_TIME_SEC;
791 inode->i_ctime = now; 797 ext3_mark_inode_dirty(handle, inode);
792 ext3_mark_inode_dirty(handle, inode);
793 }
794 /* ext3_mark_inode_dirty already updated i_sync_tid */ 798 /* ext3_mark_inode_dirty already updated i_sync_tid */
795 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); 799 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
796 800
@@ -1071,7 +1075,8 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
1071 * mapped. 0 in case of a HOLE. 1075 * mapped. 0 in case of a HOLE.
1072 */ 1076 */
1073 if (err > 0) { 1077 if (err > 0) {
1074 WARN_ON(err > 1); 1078 if (err > 1)
1079 WARN_ON(1);
1075 err = 0; 1080 err = 0;
1076 } 1081 }
1077 *errp = err; 1082 *errp = err;
@@ -1127,11 +1132,9 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1127 bh = ext3_getblk(handle, inode, block, create, err); 1132 bh = ext3_getblk(handle, inode, block, create, err);
1128 if (!bh) 1133 if (!bh)
1129 return bh; 1134 return bh;
1130 if (bh_uptodate_or_lock(bh)) 1135 if (buffer_uptodate(bh))
1131 return bh; 1136 return bh;
1132 get_bh(bh); 1137 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
1133 bh->b_end_io = end_buffer_read_sync;
1134 submit_bh(READ | REQ_META | REQ_PRIO, bh);
1135 wait_on_buffer(bh); 1138 wait_on_buffer(bh);
1136 if (buffer_uptodate(bh)) 1139 if (buffer_uptodate(bh))
1137 return bh; 1140 return bh;
@@ -2079,10 +2082,12 @@ static int ext3_block_truncate_page(struct inode *inode, loff_t from)
2079 if (PageUptodate(page)) 2082 if (PageUptodate(page))
2080 set_buffer_uptodate(bh); 2083 set_buffer_uptodate(bh);
2081 2084
2082 if (!bh_uptodate_or_lock(bh)) { 2085 if (!buffer_uptodate(bh)) {
2083 err = bh_submit_read(bh); 2086 err = -EIO;
2087 ll_rw_block(READ, 1, &bh);
2088 wait_on_buffer(bh);
2084 /* Uhhuh. Read error. Complain and punt. */ 2089 /* Uhhuh. Read error. Complain and punt. */
2085 if (err) 2090 if (!buffer_uptodate(bh))
2086 goto unlock; 2091 goto unlock;
2087 } 2092 }
2088 2093
@@ -2503,7 +2508,7 @@ int ext3_can_truncate(struct inode *inode)
2503 * transaction, and VFS/VM ensures that ext3_truncate() cannot run 2508 * transaction, and VFS/VM ensures that ext3_truncate() cannot run
2504 * simultaneously on behalf of the same inode. 2509 * simultaneously on behalf of the same inode.
2505 * 2510 *
2506 * As we work through the truncate and commit bits of it to the journal there 2511 * As we work through the truncate and commmit bits of it to the journal there
2507 * is one core, guiding principle: the file's tree must always be consistent on 2512 * is one core, guiding principle: the file's tree must always be consistent on
2508 * disk. We must be able to restart the truncate after a crash. 2513 * disk. We must be able to restart the truncate after a crash.
2509 * 2514 *
@@ -2890,8 +2895,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2890 transaction_t *transaction; 2895 transaction_t *transaction;
2891 long ret; 2896 long ret;
2892 int block; 2897 int block;
2893 uid_t i_uid;
2894 gid_t i_gid;
2895 2898
2896 inode = iget_locked(sb, ino); 2899 inode = iget_locked(sb, ino);
2897 if (!inode) 2900 if (!inode)
@@ -2908,15 +2911,13 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2908 bh = iloc.bh; 2911 bh = iloc.bh;
2909 raw_inode = ext3_raw_inode(&iloc); 2912 raw_inode = ext3_raw_inode(&iloc);
2910 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 2913 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2911 i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 2914 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2912 i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 2915 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2913 if(!(test_opt (inode->i_sb, NO_UID32))) { 2916 if(!(test_opt (inode->i_sb, NO_UID32))) {
2914 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 2917 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2915 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 2918 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2916 } 2919 }
2917 i_uid_write(inode, i_uid); 2920 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2918 i_gid_write(inode, i_gid);
2919 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2920 inode->i_size = le32_to_cpu(raw_inode->i_size); 2921 inode->i_size = le32_to_cpu(raw_inode->i_size);
2921 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 2922 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2922 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 2923 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
@@ -3071,10 +3072,6 @@ static int ext3_do_update_inode(handle_t *handle,
3071 struct ext3_inode_info *ei = EXT3_I(inode); 3072 struct ext3_inode_info *ei = EXT3_I(inode);
3072 struct buffer_head *bh = iloc->bh; 3073 struct buffer_head *bh = iloc->bh;
3073 int err = 0, rc, block; 3074 int err = 0, rc, block;
3074 int need_datasync = 0;
3075 __le32 disksize;
3076 uid_t i_uid;
3077 gid_t i_gid;
3078 3075
3079again: 3076again:
3080 /* we can't allow multiple procs in here at once, its a bit racey */ 3077 /* we can't allow multiple procs in here at once, its a bit racey */
@@ -3087,38 +3084,32 @@ again:
3087 3084
3088 ext3_get_inode_flags(ei); 3085 ext3_get_inode_flags(ei);
3089 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 3086 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
3090 i_uid = i_uid_read(inode);
3091 i_gid = i_gid_read(inode);
3092 if(!(test_opt(inode->i_sb, NO_UID32))) { 3087 if(!(test_opt(inode->i_sb, NO_UID32))) {
3093 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); 3088 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
3094 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); 3089 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
3095/* 3090/*
3096 * Fix up interoperability with old kernels. Otherwise, old inodes get 3091 * Fix up interoperability with old kernels. Otherwise, old inodes get
3097 * re-used with the upper 16 bits of the uid/gid intact 3092 * re-used with the upper 16 bits of the uid/gid intact
3098 */ 3093 */
3099 if(!ei->i_dtime) { 3094 if(!ei->i_dtime) {
3100 raw_inode->i_uid_high = 3095 raw_inode->i_uid_high =
3101 cpu_to_le16(high_16_bits(i_uid)); 3096 cpu_to_le16(high_16_bits(inode->i_uid));
3102 raw_inode->i_gid_high = 3097 raw_inode->i_gid_high =
3103 cpu_to_le16(high_16_bits(i_gid)); 3098 cpu_to_le16(high_16_bits(inode->i_gid));
3104 } else { 3099 } else {
3105 raw_inode->i_uid_high = 0; 3100 raw_inode->i_uid_high = 0;
3106 raw_inode->i_gid_high = 0; 3101 raw_inode->i_gid_high = 0;
3107 } 3102 }
3108 } else { 3103 } else {
3109 raw_inode->i_uid_low = 3104 raw_inode->i_uid_low =
3110 cpu_to_le16(fs_high2lowuid(i_uid)); 3105 cpu_to_le16(fs_high2lowuid(inode->i_uid));
3111 raw_inode->i_gid_low = 3106 raw_inode->i_gid_low =
3112 cpu_to_le16(fs_high2lowgid(i_gid)); 3107 cpu_to_le16(fs_high2lowgid(inode->i_gid));
3113 raw_inode->i_uid_high = 0; 3108 raw_inode->i_uid_high = 0;
3114 raw_inode->i_gid_high = 0; 3109 raw_inode->i_gid_high = 0;
3115 } 3110 }
3116 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 3111 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3117 disksize = cpu_to_le32(ei->i_disksize); 3112 raw_inode->i_size = cpu_to_le32(ei->i_disksize);
3118 if (disksize != raw_inode->i_size) {
3119 need_datasync = 1;
3120 raw_inode->i_size = disksize;
3121 }
3122 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 3113 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
3123 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 3114 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
3124 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 3115 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3134,11 +3125,8 @@ again:
3134 if (!S_ISREG(inode->i_mode)) { 3125 if (!S_ISREG(inode->i_mode)) {
3135 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); 3126 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
3136 } else { 3127 } else {
3137 disksize = cpu_to_le32(ei->i_disksize >> 32); 3128 raw_inode->i_size_high =
3138 if (disksize != raw_inode->i_size_high) { 3129 cpu_to_le32(ei->i_disksize >> 32);
3139 raw_inode->i_size_high = disksize;
3140 need_datasync = 1;
3141 }
3142 if (ei->i_disksize > 0x7fffffffULL) { 3130 if (ei->i_disksize > 0x7fffffffULL) {
3143 struct super_block *sb = inode->i_sb; 3131 struct super_block *sb = inode->i_sb;
3144 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, 3132 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3191,8 +3179,6 @@ again:
3191 ext3_clear_inode_state(inode, EXT3_STATE_NEW); 3179 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
3192 3180
3193 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); 3181 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
3194 if (need_datasync)
3195 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
3196out_brelse: 3182out_brelse:
3197 brelse (bh); 3183 brelse (bh);
3198 ext3_std_error(inode->i_sb, err); 3184 ext3_std_error(inode->i_sb, err);
@@ -3206,7 +3192,7 @@ out_brelse:
3206 * 3192 *
3207 * - Within generic_file_write() for O_SYNC files. 3193 * - Within generic_file_write() for O_SYNC files.
3208 * Here, there will be no transaction running. We wait for any running 3194 * Here, there will be no transaction running. We wait for any running
3209 * transaction to commit. 3195 * trasnaction to commit.
3210 * 3196 *
3211 * - Within sys_sync(), kupdate and such. 3197 * - Within sys_sync(), kupdate and such.
3212 * We wait on commit, if tol to. 3198 * We wait on commit, if tol to.
@@ -3280,8 +3266,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3280 3266
3281 if (is_quota_modification(inode, attr)) 3267 if (is_quota_modification(inode, attr))
3282 dquot_initialize(inode); 3268 dquot_initialize(inode);
3283 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || 3269 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3284 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { 3270 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3285 handle_t *handle; 3271 handle_t *handle;
3286 3272
3287 /* (user+group)*(old+new) structure, inode write (sb, 3273 /* (user+group)*(old+new) structure, inode write (sb,
@@ -3469,6 +3455,14 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
3469 * inode out, but prune_icache isn't a user-visible syncing function. 3455 * inode out, but prune_icache isn't a user-visible syncing function.
3470 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) 3456 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
3471 * we start and wait on commits. 3457 * we start and wait on commits.
3458 *
3459 * Is this efficient/effective? Well, we're being nice to the system
3460 * by cleaning up our inodes proactively so they can be reaped
3461 * without I/O. But we are potentially leaving up to five seconds'
3462 * worth of inodes floating about which prune_icache wants us to
3463 * write out. One way to fix that would be to get prune_icache()
3464 * to do a write_super() to free up some memory. It has the desired
3465 * effect.
3472 */ 3466 */
3473int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) 3467int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
3474{ 3468{
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 677a5c27dc6..c7f43944f16 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -7,10 +7,15 @@
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10#include <linux/fs.h>
11#include <linux/jbd.h>
12#include <linux/capability.h>
13#include <linux/ext3_fs.h>
14#include <linux/ext3_jbd.h>
10#include <linux/mount.h> 15#include <linux/mount.h>
16#include <linux/time.h>
11#include <linux/compat.h> 17#include <linux/compat.h>
12#include <asm/uaccess.h> 18#include <asm/uaccess.h>
13#include "ext3.h"
14 19
15long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 20long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
16{ 21{
@@ -39,7 +44,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
39 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
40 return -EFAULT; 45 return -EFAULT;
41 46
42 err = mnt_want_write_file(filp); 47 err = mnt_want_write(filp->f_path.mnt);
43 if (err) 48 if (err)
44 return err; 49 return err;
45 50
@@ -105,7 +110,7 @@ flags_err:
105 err = ext3_change_inode_journal_flag(inode, jflag); 110 err = ext3_change_inode_journal_flag(inode, jflag);
106flags_out: 111flags_out:
107 mutex_unlock(&inode->i_mutex); 112 mutex_unlock(&inode->i_mutex);
108 mnt_drop_write_file(filp); 113 mnt_drop_write(filp->f_path.mnt);
109 return err; 114 return err;
110 } 115 }
111 case EXT3_IOC_GETVERSION: 116 case EXT3_IOC_GETVERSION:
@@ -121,7 +126,7 @@ flags_out:
121 if (!inode_owner_or_capable(inode)) 126 if (!inode_owner_or_capable(inode))
122 return -EPERM; 127 return -EPERM;
123 128
124 err = mnt_want_write_file(filp); 129 err = mnt_want_write(filp->f_path.mnt);
125 if (err) 130 if (err)
126 return err; 131 return err;
127 if (get_user(generation, (int __user *) arg)) { 132 if (get_user(generation, (int __user *) arg)) {
@@ -129,11 +134,10 @@ flags_out:
129 goto setversion_out; 134 goto setversion_out;
130 } 135 }
131 136
132 mutex_lock(&inode->i_mutex);
133 handle = ext3_journal_start(inode, 1); 137 handle = ext3_journal_start(inode, 1);
134 if (IS_ERR(handle)) { 138 if (IS_ERR(handle)) {
135 err = PTR_ERR(handle); 139 err = PTR_ERR(handle);
136 goto unlock_out; 140 goto setversion_out;
137 } 141 }
138 err = ext3_reserve_inode_write(handle, inode, &iloc); 142 err = ext3_reserve_inode_write(handle, inode, &iloc);
139 if (err == 0) { 143 if (err == 0) {
@@ -142,13 +146,34 @@ flags_out:
142 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 146 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
143 } 147 }
144 ext3_journal_stop(handle); 148 ext3_journal_stop(handle);
145
146unlock_out:
147 mutex_unlock(&inode->i_mutex);
148setversion_out: 149setversion_out:
149 mnt_drop_write_file(filp); 150 mnt_drop_write(filp->f_path.mnt);
150 return err; 151 return err;
151 } 152 }
153#ifdef CONFIG_JBD_DEBUG
154 case EXT3_IOC_WAIT_FOR_READONLY:
155 /*
156 * This is racy - by the time we're woken up and running,
157 * the superblock could be released. And the module could
158 * have been unloaded. So sue me.
159 *
160 * Returns 1 if it slept, else zero.
161 */
162 {
163 struct super_block *sb = inode->i_sb;
164 DECLARE_WAITQUEUE(wait, current);
165 int ret = 0;
166
167 set_current_state(TASK_INTERRUPTIBLE);
168 add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
169 if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
170 schedule();
171 ret = 1;
172 }
173 remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
174 return ret;
175 }
176#endif
152 case EXT3_IOC_GETRSVSZ: 177 case EXT3_IOC_GETRSVSZ:
153 if (test_opt(inode->i_sb, RESERVATION) 178 if (test_opt(inode->i_sb, RESERVATION)
154 && S_ISREG(inode->i_mode) 179 && S_ISREG(inode->i_mode)
@@ -163,7 +188,7 @@ setversion_out:
163 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 188 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
164 return -ENOTTY; 189 return -ENOTTY;
165 190
166 err = mnt_want_write_file(filp); 191 err = mnt_want_write(filp->f_path.mnt);
167 if (err) 192 if (err)
168 return err; 193 return err;
169 194
@@ -194,7 +219,7 @@ setversion_out:
194 } 219 }
195 mutex_unlock(&ei->truncate_mutex); 220 mutex_unlock(&ei->truncate_mutex);
196setrsvsz_out: 221setrsvsz_out:
197 mnt_drop_write_file(filp); 222 mnt_drop_write(filp->f_path.mnt);
198 return err; 223 return err;
199 } 224 }
200 case EXT3_IOC_GROUP_EXTEND: { 225 case EXT3_IOC_GROUP_EXTEND: {
@@ -205,7 +230,7 @@ setrsvsz_out:
205 if (!capable(CAP_SYS_RESOURCE)) 230 if (!capable(CAP_SYS_RESOURCE))
206 return -EPERM; 231 return -EPERM;
207 232
208 err = mnt_want_write_file(filp); 233 err = mnt_want_write(filp->f_path.mnt);
209 if (err) 234 if (err)
210 return err; 235 return err;
211 236
@@ -220,7 +245,7 @@ setrsvsz_out:
220 if (err == 0) 245 if (err == 0)
221 err = err2; 246 err = err2;
222group_extend_out: 247group_extend_out:
223 mnt_drop_write_file(filp); 248 mnt_drop_write(filp->f_path.mnt);
224 return err; 249 return err;
225 } 250 }
226 case EXT3_IOC_GROUP_ADD: { 251 case EXT3_IOC_GROUP_ADD: {
@@ -231,7 +256,7 @@ group_extend_out:
231 if (!capable(CAP_SYS_RESOURCE)) 256 if (!capable(CAP_SYS_RESOURCE))
232 return -EPERM; 257 return -EPERM;
233 258
234 err = mnt_want_write_file(filp); 259 err = mnt_want_write(filp->f_path.mnt);
235 if (err) 260 if (err)
236 return err; 261 return err;
237 262
@@ -248,7 +273,7 @@ group_extend_out:
248 if (err == 0) 273 if (err == 0)
249 err = err2; 274 err = err2;
250group_add_out: 275group_add_out:
251 mnt_drop_write_file(filp); 276 mnt_drop_write(filp->f_path.mnt);
252 return err; 277 return err;
253 } 278 }
254 case FITRIM: { 279 case FITRIM: {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 890b8947c54..0629e09f651 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -24,8 +24,20 @@
24 * Theodore Ts'o, 2002 24 * Theodore Ts'o, 2002
25 */ 25 */
26 26
27#include <linux/fs.h>
28#include <linux/pagemap.h>
29#include <linux/jbd.h>
30#include <linux/time.h>
31#include <linux/ext3_fs.h>
32#include <linux/ext3_jbd.h>
33#include <linux/fcntl.h>
34#include <linux/stat.h>
35#include <linux/string.h>
27#include <linux/quotaops.h> 36#include <linux/quotaops.h>
28#include "ext3.h" 37#include <linux/buffer_head.h>
38#include <linux/bio.h>
39#include <trace/events/ext3.h>
40
29#include "namei.h" 41#include "namei.h"
30#include "xattr.h" 42#include "xattr.h"
31#include "acl.h" 43#include "acl.h"
@@ -46,7 +58,8 @@ static struct buffer_head *ext3_append(handle_t *handle,
46 58
47 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 59 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
48 60
49 if ((bh = ext3_dir_bread(handle, inode, *block, 1, err))) { 61 bh = ext3_bread(handle, inode, *block, 1, err);
62 if (bh) {
50 inode->i_size += inode->i_sb->s_blocksize; 63 inode->i_size += inode->i_sb->s_blocksize;
51 EXT3_I(inode)->i_disksize = inode->i_size; 64 EXT3_I(inode)->i_disksize = inode->i_size;
52 *err = ext3_journal_get_write_access(handle, bh); 65 *err = ext3_journal_get_write_access(handle, bh);
@@ -338,10 +351,8 @@ dx_probe(struct qstr *entry, struct inode *dir,
338 u32 hash; 351 u32 hash;
339 352
340 frame->bh = NULL; 353 frame->bh = NULL;
341 if (!(bh = ext3_dir_bread(NULL, dir, 0, 0, err))) { 354 if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
342 *err = ERR_BAD_DX_DIR;
343 goto fail; 355 goto fail;
344 }
345 root = (struct dx_root *) bh->b_data; 356 root = (struct dx_root *) bh->b_data;
346 if (root->info.hash_version != DX_HASH_TEA && 357 if (root->info.hash_version != DX_HASH_TEA &&
347 root->info.hash_version != DX_HASH_HALF_MD4 && 358 root->info.hash_version != DX_HASH_HALF_MD4 &&
@@ -437,10 +448,8 @@ dx_probe(struct qstr *entry, struct inode *dir,
437 frame->entries = entries; 448 frame->entries = entries;
438 frame->at = at; 449 frame->at = at;
439 if (!indirect--) return frame; 450 if (!indirect--) return frame;
440 if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(at), 0, err))) { 451 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
441 *err = ERR_BAD_DX_DIR;
442 goto fail2; 452 goto fail2;
443 }
444 at = entries = ((struct dx_node *) bh->b_data)->entries; 453 at = entries = ((struct dx_node *) bh->b_data)->entries;
445 if (dx_get_limit(entries) != dx_node_limit (dir)) { 454 if (dx_get_limit(entries) != dx_node_limit (dir)) {
446 ext3_warning(dir->i_sb, __func__, 455 ext3_warning(dir->i_sb, __func__,
@@ -538,8 +547,8 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
538 * block so no check is necessary 547 * block so no check is necessary
539 */ 548 */
540 while (num_frames--) { 549 while (num_frames--) {
541 if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(p->at), 550 if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
542 0, &err))) 551 0, &err)))
543 return err; /* Failure */ 552 return err; /* Failure */
544 p++; 553 p++;
545 brelse (p->bh); 554 brelse (p->bh);
@@ -562,11 +571,10 @@ static int htree_dirblock_to_tree(struct file *dir_file,
562{ 571{
563 struct buffer_head *bh; 572 struct buffer_head *bh;
564 struct ext3_dir_entry_2 *de, *top; 573 struct ext3_dir_entry_2 *de, *top;
565 int err = 0, count = 0; 574 int err, count = 0;
566 575
567 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); 576 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
568 577 if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
569 if (!(bh = ext3_dir_bread(NULL, dir, block, 0, &err)))
570 return err; 578 return err;
571 579
572 de = (struct ext3_dir_entry_2 *) bh->b_data; 580 de = (struct ext3_dir_entry_2 *) bh->b_data;
@@ -913,12 +921,9 @@ restart:
913 num++; 921 num++;
914 bh = ext3_getblk(NULL, dir, b++, 0, &err); 922 bh = ext3_getblk(NULL, dir, b++, 0, &err);
915 bh_use[ra_max] = bh; 923 bh_use[ra_max] = bh;
916 if (bh && !bh_uptodate_or_lock(bh)) { 924 if (bh)
917 get_bh(bh); 925 ll_rw_block(READ | REQ_META | REQ_PRIO,
918 bh->b_end_io = end_buffer_read_sync; 926 1, &bh);
919 submit_bh(READ | REQ_META | REQ_PRIO,
920 bh);
921 }
922 } 927 }
923 } 928 }
924 if ((bh = bh_use[ra_ptr++]) == NULL) 929 if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -980,7 +985,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
980 return NULL; 985 return NULL;
981 do { 986 do {
982 block = dx_get_block(frame->at); 987 block = dx_get_block(frame->at);
983 if (!(bh = ext3_dir_bread (NULL, dir, block, 0, err))) 988 if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
984 goto errout; 989 goto errout;
985 990
986 retval = search_dirblock(bh, dir, entry, 991 retval = search_dirblock(bh, dir, entry,
@@ -1015,7 +1020,7 @@ errout:
1015 return NULL; 1020 return NULL;
1016} 1021}
1017 1022
1018static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) 1023static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
1019{ 1024{
1020 struct inode * inode; 1025 struct inode * inode;
1021 struct ext3_dir_entry_2 * de; 1026 struct ext3_dir_entry_2 * de;
@@ -1049,7 +1054,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, uns
1049struct dentry *ext3_get_parent(struct dentry *child) 1054struct dentry *ext3_get_parent(struct dentry *child)
1050{ 1055{
1051 unsigned long ino; 1056 unsigned long ino;
1052 struct qstr dotdot = QSTR_INIT("..", 2); 1057 struct qstr dotdot = {.name = "..", .len = 2};
1053 struct ext3_dir_entry_2 * de; 1058 struct ext3_dir_entry_2 * de;
1054 struct buffer_head *bh; 1059 struct buffer_head *bh;
1055 1060
@@ -1462,9 +1467,9 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
1462 } 1467 }
1463 blocks = dir->i_size >> sb->s_blocksize_bits; 1468 blocks = dir->i_size >> sb->s_blocksize_bits;
1464 for (block = 0; block < blocks; block++) { 1469 for (block = 0; block < blocks; block++) {
1465 if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval))) 1470 bh = ext3_bread(handle, dir, block, 0, &retval);
1471 if(!bh)
1466 return retval; 1472 return retval;
1467
1468 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1473 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1469 if (retval != -ENOSPC) 1474 if (retval != -ENOSPC)
1470 return retval; 1475 return retval;
@@ -1504,7 +1509,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1504 entries = frame->entries; 1509 entries = frame->entries;
1505 at = frame->at; 1510 at = frame->at;
1506 1511
1507 if (!(bh = ext3_dir_bread(handle, dir, dx_get_block(frame->at), 0, &err))) 1512 if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
1508 goto cleanup; 1513 goto cleanup;
1509 1514
1510 BUFFER_TRACE(bh, "get_write_access"); 1515 BUFFER_TRACE(bh, "get_write_access");
@@ -1675,8 +1680,8 @@ static int ext3_add_nondir(handle_t *handle,
1675 int err = ext3_add_entry(handle, dentry, inode); 1680 int err = ext3_add_entry(handle, dentry, inode);
1676 if (!err) { 1681 if (!err) {
1677 ext3_mark_inode_dirty(handle, inode); 1682 ext3_mark_inode_dirty(handle, inode);
1678 unlock_new_inode(inode);
1679 d_instantiate(dentry, inode); 1683 d_instantiate(dentry, inode);
1684 unlock_new_inode(inode);
1680 return 0; 1685 return 0;
1681 } 1686 }
1682 drop_nlink(inode); 1687 drop_nlink(inode);
@@ -1693,8 +1698,8 @@ static int ext3_add_nondir(handle_t *handle,
1693 * If the create succeeds, we fill in the inode information 1698 * If the create succeeds, we fill in the inode information
1694 * with d_instantiate(). 1699 * with d_instantiate().
1695 */ 1700 */
1696static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode, 1701static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1697 bool excl) 1702 struct nameidata *nd)
1698{ 1703{
1699 handle_t *handle; 1704 handle_t *handle;
1700 struct inode * inode; 1705 struct inode * inode;
@@ -1727,7 +1732,7 @@ retry:
1727} 1732}
1728 1733
1729static int ext3_mknod (struct inode * dir, struct dentry *dentry, 1734static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1730 umode_t mode, dev_t rdev) 1735 int mode, dev_t rdev)
1731{ 1736{
1732 handle_t *handle; 1737 handle_t *handle;
1733 struct inode *inode; 1738 struct inode *inode;
@@ -1763,7 +1768,7 @@ retry:
1763 return err; 1768 return err;
1764} 1769}
1765 1770
1766static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 1771static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1767{ 1772{
1768 handle_t *handle; 1773 handle_t *handle;
1769 struct inode * inode; 1774 struct inode * inode;
@@ -1794,7 +1799,8 @@ retry:
1794 inode->i_op = &ext3_dir_inode_operations; 1799 inode->i_op = &ext3_dir_inode_operations;
1795 inode->i_fop = &ext3_dir_operations; 1800 inode->i_fop = &ext3_dir_operations;
1796 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1801 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1797 if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err))) 1802 dir_block = ext3_bread (handle, inode, 0, 1, &err);
1803 if (!dir_block)
1798 goto out_clear_inode; 1804 goto out_clear_inode;
1799 1805
1800 BUFFER_TRACE(dir_block, "get_write_access"); 1806 BUFFER_TRACE(dir_block, "get_write_access");
@@ -1815,7 +1821,7 @@ retry:
1815 de->name_len = 2; 1821 de->name_len = 2;
1816 strcpy (de->name, ".."); 1822 strcpy (de->name, "..");
1817 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1823 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1818 set_nlink(inode, 2); 1824 inode->i_nlink = 2;
1819 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); 1825 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
1820 err = ext3_journal_dirty_metadata(handle, dir_block); 1826 err = ext3_journal_dirty_metadata(handle, dir_block);
1821 if (err) 1827 if (err)
@@ -1827,7 +1833,7 @@ retry:
1827 1833
1828 if (err) { 1834 if (err) {
1829out_clear_inode: 1835out_clear_inode:
1830 clear_nlink(inode); 1836 inode->i_nlink = 0;
1831 unlock_new_inode(inode); 1837 unlock_new_inode(inode);
1832 ext3_mark_inode_dirty(handle, inode); 1838 ext3_mark_inode_dirty(handle, inode);
1833 iput (inode); 1839 iput (inode);
@@ -1839,8 +1845,8 @@ out_clear_inode:
1839 if (err) 1845 if (err)
1840 goto out_clear_inode; 1846 goto out_clear_inode;
1841 1847
1842 unlock_new_inode(inode);
1843 d_instantiate(dentry, inode); 1848 d_instantiate(dentry, inode);
1849 unlock_new_inode(inode);
1844out_stop: 1850out_stop:
1845 brelse(dir_block); 1851 brelse(dir_block);
1846 ext3_journal_stop(handle); 1852 ext3_journal_stop(handle);
@@ -1862,7 +1868,7 @@ static int empty_dir (struct inode * inode)
1862 1868
1863 sb = inode->i_sb; 1869 sb = inode->i_sb;
1864 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || 1870 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
1865 !(bh = ext3_dir_bread(NULL, inode, 0, 0, &err))) { 1871 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
1866 if (err) 1872 if (err)
1867 ext3_error(inode->i_sb, __func__, 1873 ext3_error(inode->i_sb, __func__,
1868 "error %d reading directory #%lu offset 0", 1874 "error %d reading directory #%lu offset 0",
@@ -1893,8 +1899,9 @@ static int empty_dir (struct inode * inode)
1893 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1899 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
1894 err = 0; 1900 err = 0;
1895 brelse (bh); 1901 brelse (bh);
1896 if (!(bh = ext3_dir_bread (NULL, inode, 1902 bh = ext3_bread (NULL, inode,
1897 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err))) { 1903 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
1904 if (!bh) {
1898 if (err) 1905 if (err)
1899 ext3_error(sb, __func__, 1906 ext3_error(sb, __func__,
1900 "error %d reading directory" 1907 "error %d reading directory"
@@ -2163,7 +2170,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2163 ext3_warning (inode->i_sb, "ext3_unlink", 2170 ext3_warning (inode->i_sb, "ext3_unlink",
2164 "Deleting nonexistent file (%lu), %d", 2171 "Deleting nonexistent file (%lu), %d",
2165 inode->i_ino, inode->i_nlink); 2172 inode->i_ino, inode->i_nlink);
2166 set_nlink(inode, 1); 2173 inode->i_nlink = 1;
2167 } 2174 }
2168 retval = ext3_delete_entry(handle, dir, de, bh); 2175 retval = ext3_delete_entry(handle, dir, de, bh);
2169 if (retval) 2176 if (retval)
@@ -2265,7 +2272,7 @@ retry:
2265 err = PTR_ERR(handle); 2272 err = PTR_ERR(handle);
2266 goto err_drop_inode; 2273 goto err_drop_inode;
2267 } 2274 }
2268 set_nlink(inode, 1); 2275 inc_nlink(inode);
2269 err = ext3_orphan_del(handle, inode); 2276 err = ext3_orphan_del(handle, inode);
2270 if (err) { 2277 if (err) {
2271 ext3_journal_stop(handle); 2278 ext3_journal_stop(handle);
@@ -2390,7 +2397,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2390 goto end_rename; 2397 goto end_rename;
2391 } 2398 }
2392 retval = -EIO; 2399 retval = -EIO;
2393 dir_bh = ext3_dir_bread(handle, old_inode, 0, 0, &retval); 2400 dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
2394 if (!dir_bh) 2401 if (!dir_bh)
2395 goto end_rename; 2402 goto end_rename;
2396 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) 2403 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h
index 46304d8c9f0..f2ce2b0065c 100644
--- a/fs/ext3/namei.h
+++ b/fs/ext3/namei.h
@@ -6,22 +6,3 @@
6*/ 6*/
7 7
8extern struct dentry *ext3_get_parent(struct dentry *child); 8extern struct dentry *ext3_get_parent(struct dentry *child);
9
10static inline struct buffer_head *ext3_dir_bread(handle_t *handle,
11 struct inode *inode,
12 int block, int create,
13 int *err)
14{
15 struct buffer_head *bh;
16
17 bh = ext3_bread(handle, inode, block, create, err);
18
19 if (!bh && !(*err)) {
20 *err = -EIO;
21 ext3_error(inode->i_sb, __func__,
22 "Directory hole detected on inode %lu\n",
23 inode->i_ino);
24 return NULL;
25 }
26 return bh;
27}
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0f814f3450d..7916e4ce166 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -11,7 +11,10 @@
11 11
12#define EXT3FS_DEBUG 12#define EXT3FS_DEBUG
13 13
14#include "ext3.h" 14#include <linux/ext3_jbd.h>
15
16#include <linux/errno.h>
17#include <linux/slab.h>
15 18
16 19
17#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 20#define outside(b, first, last) ((b) < (first) || (b) >= (last))
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e50223b329..7beb69ae001 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -17,12 +17,22 @@
17 */ 17 */
18 18
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/string.h>
21#include <linux/fs.h>
22#include <linux/time.h>
23#include <linux/jbd.h>
24#include <linux/ext3_fs.h>
25#include <linux/ext3_jbd.h>
26#include <linux/slab.h>
27#include <linux/init.h>
20#include <linux/blkdev.h> 28#include <linux/blkdev.h>
21#include <linux/parser.h> 29#include <linux/parser.h>
30#include <linux/buffer_head.h>
22#include <linux/exportfs.h> 31#include <linux/exportfs.h>
23#include <linux/statfs.h> 32#include <linux/vfs.h>
24#include <linux/random.h> 33#include <linux/random.h>
25#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/namei.h>
26#include <linux/quotaops.h> 36#include <linux/quotaops.h>
27#include <linux/seq_file.h> 37#include <linux/seq_file.h>
28#include <linux/log2.h> 38#include <linux/log2.h>
@@ -30,13 +40,13 @@
30 40
31#include <asm/uaccess.h> 41#include <asm/uaccess.h>
32 42
33#define CREATE_TRACE_POINTS
34
35#include "ext3.h"
36#include "xattr.h" 43#include "xattr.h"
37#include "acl.h" 44#include "acl.h"
38#include "namei.h" 45#include "namei.h"
39 46
47#define CREATE_TRACE_POINTS
48#include <trace/events/ext3.h>
49
40#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 50#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
41 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 51 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
42#else 52#else
@@ -64,6 +74,11 @@ static int ext3_freeze(struct super_block *sb);
64 74
65/* 75/*
66 * Wrappers for journal_start/end. 76 * Wrappers for journal_start/end.
77 *
78 * The only special thing we need to do here is to make sure that all
79 * journal_end calls result in the superblock being marked dirty, so
80 * that sync() will call the filesystem's write_super callback if
81 * appropriate.
67 */ 82 */
68handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 83handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
69{ 84{
@@ -85,6 +100,12 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
85 return journal_start(journal, nblocks); 100 return journal_start(journal, nblocks);
86} 101}
87 102
103/*
104 * The only special thing we need to do here is to make sure that all
105 * journal_stop calls result in the superblock being marked dirty, so
106 * that sync() will call the filesystem's write_super callback if
107 * appropriate.
108 */
88int __ext3_journal_stop(const char *where, handle_t *handle) 109int __ext3_journal_stop(const char *where, handle_t *handle)
89{ 110{
90 struct super_block *sb; 111 struct super_block *sb;
@@ -490,6 +511,7 @@ static int ext3_drop_inode(struct inode *inode)
490static void ext3_i_callback(struct rcu_head *head) 511static void ext3_i_callback(struct rcu_head *head)
491{ 512{
492 struct inode *inode = container_of(head, struct inode, i_rcu); 513 struct inode *inode = container_of(head, struct inode, i_rcu);
514 INIT_LIST_HEAD(&inode->i_dentry);
493 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 515 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
494} 516}
495 517
@@ -532,11 +554,6 @@ static int init_inodecache(void)
532 554
533static void destroy_inodecache(void) 555static void destroy_inodecache(void)
534{ 556{
535 /*
536 * Make sure all delayed rcu free inodes are flushed before we
537 * destroy cache.
538 */
539 rcu_barrier();
540 kmem_cache_destroy(ext3_inode_cachep); 557 kmem_cache_destroy(ext3_inode_cachep);
541} 558}
542 559
@@ -594,9 +611,9 @@ static char *data_mode_string(unsigned long mode)
594 * - it's set to a non-default value OR 611 * - it's set to a non-default value OR
595 * - if the per-sb default is different from the global default 612 * - if the per-sb default is different from the global default
596 */ 613 */
597static int ext3_show_options(struct seq_file *seq, struct dentry *root) 614static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
598{ 615{
599 struct super_block *sb = root->d_sb; 616 struct super_block *sb = vfs->mnt_sb;
600 struct ext3_sb_info *sbi = EXT3_SB(sb); 617 struct ext3_sb_info *sbi = EXT3_SB(sb);
601 struct ext3_super_block *es = sbi->s_es; 618 struct ext3_super_block *es = sbi->s_es;
602 unsigned long def_mount_opts; 619 unsigned long def_mount_opts;
@@ -611,15 +628,13 @@ static int ext3_show_options(struct seq_file *seq, struct dentry *root)
611 seq_puts(seq, ",grpid"); 628 seq_puts(seq, ",grpid");
612 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS)) 629 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
613 seq_puts(seq, ",nogrpid"); 630 seq_puts(seq, ",nogrpid");
614 if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) || 631 if (sbi->s_resuid != EXT3_DEF_RESUID ||
615 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) { 632 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
616 seq_printf(seq, ",resuid=%u", 633 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
617 from_kuid_munged(&init_user_ns, sbi->s_resuid));
618 } 634 }
619 if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) || 635 if (sbi->s_resgid != EXT3_DEF_RESGID ||
620 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { 636 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
621 seq_printf(seq, ",resgid=%u", 637 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
622 from_kgid_munged(&init_user_ns, sbi->s_resgid));
623 } 638 }
624 if (test_opt(sb, ERRORS_RO)) { 639 if (test_opt(sb, ERRORS_RO)) {
625 int def_errors = le16_to_cpu(es->s_errors); 640 int def_errors = le16_to_cpu(es->s_errors);
@@ -637,6 +652,8 @@ static int ext3_show_options(struct seq_file *seq, struct dentry *root)
637 seq_puts(seq, ",nouid32"); 652 seq_puts(seq, ",nouid32");
638 if (test_opt(sb, DEBUG)) 653 if (test_opt(sb, DEBUG))
639 seq_puts(seq, ",debug"); 654 seq_puts(seq, ",debug");
655 if (test_opt(sb, OLDALLOC))
656 seq_puts(seq, ",oldalloc");
640#ifdef CONFIG_EXT3_FS_XATTR 657#ifdef CONFIG_EXT3_FS_XATTR
641 if (test_opt(sb, XATTR_USER)) 658 if (test_opt(sb, XATTR_USER))
642 seq_puts(seq, ",user_xattr"); 659 seq_puts(seq, ",user_xattr");
@@ -963,8 +980,6 @@ static int parse_options (char *options, struct super_block *sb,
963 substring_t args[MAX_OPT_ARGS]; 980 substring_t args[MAX_OPT_ARGS];
964 int data_opt = 0; 981 int data_opt = 0;
965 int option; 982 int option;
966 kuid_t uid;
967 kgid_t gid;
968#ifdef CONFIG_QUOTA 983#ifdef CONFIG_QUOTA
969 int qfmt; 984 int qfmt;
970#endif 985#endif
@@ -980,7 +995,7 @@ static int parse_options (char *options, struct super_block *sb,
980 * Initialize args struct so we know whether arg was 995 * Initialize args struct so we know whether arg was
981 * found; some options take optional arguments. 996 * found; some options take optional arguments.
982 */ 997 */
983 args[0].to = args[0].from = NULL; 998 args[0].to = args[0].from = 0;
984 token = match_token(p, tokens, args); 999 token = match_token(p, tokens, args);
985 switch (token) { 1000 switch (token) {
986 case Opt_bsd_df: 1001 case Opt_bsd_df:
@@ -998,23 +1013,12 @@ static int parse_options (char *options, struct super_block *sb,
998 case Opt_resuid: 1013 case Opt_resuid:
999 if (match_int(&args[0], &option)) 1014 if (match_int(&args[0], &option))
1000 return 0; 1015 return 0;
1001 uid = make_kuid(current_user_ns(), option); 1016 sbi->s_resuid = option;
1002 if (!uid_valid(uid)) {
1003 ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
1004 return 0;
1005
1006 }
1007 sbi->s_resuid = uid;
1008 break; 1017 break;
1009 case Opt_resgid: 1018 case Opt_resgid:
1010 if (match_int(&args[0], &option)) 1019 if (match_int(&args[0], &option))
1011 return 0; 1020 return 0;
1012 gid = make_kgid(current_user_ns(), option); 1021 sbi->s_resgid = option;
1013 if (!gid_valid(gid)) {
1014 ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
1015 return 0;
1016 }
1017 sbi->s_resgid = gid;
1018 break; 1022 break;
1019 case Opt_sb: 1023 case Opt_sb:
1020 /* handled by get_sb_block() instead of here */ 1024 /* handled by get_sb_block() instead of here */
@@ -1045,12 +1049,10 @@ static int parse_options (char *options, struct super_block *sb,
1045 set_opt (sbi->s_mount_opt, DEBUG); 1049 set_opt (sbi->s_mount_opt, DEBUG);
1046 break; 1050 break;
1047 case Opt_oldalloc: 1051 case Opt_oldalloc:
1048 ext3_msg(sb, KERN_WARNING, 1052 set_opt (sbi->s_mount_opt, OLDALLOC);
1049 "Ignoring deprecated oldalloc option");
1050 break; 1053 break;
1051 case Opt_orlov: 1054 case Opt_orlov:
1052 ext3_msg(sb, KERN_WARNING, 1055 clear_opt (sbi->s_mount_opt, OLDALLOC);
1053 "Ignoring deprecated orlov option");
1054 break; 1056 break;
1055#ifdef CONFIG_EXT3_FS_XATTR 1057#ifdef CONFIG_EXT3_FS_XATTR
1056 case Opt_user_xattr: 1058 case Opt_user_xattr:
@@ -1484,12 +1486,10 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1484 } 1486 }
1485 1487
1486 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1488 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1487 /* don't clear list on RO mount w/ errors */ 1489 if (es->s_last_orphan)
1488 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
1489 jbd_debug(1, "Errors on filesystem, " 1490 jbd_debug(1, "Errors on filesystem, "
1490 "clearing orphan list.\n"); 1491 "clearing orphan list.\n");
1491 es->s_last_orphan = 0; 1492 es->s_last_orphan = 0;
1492 }
1493 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1493 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1494 return; 1494 return;
1495 } 1495 }
@@ -1661,6 +1661,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1661 return -ENOMEM; 1661 return -ENOMEM;
1662 } 1662 }
1663 sb->s_fs_info = sbi; 1663 sb->s_fs_info = sbi;
1664 sbi->s_mount_opt = 0;
1665 sbi->s_resuid = EXT3_DEF_RESUID;
1666 sbi->s_resgid = EXT3_DEF_RESGID;
1664 sbi->s_sb_block = sb_block; 1667 sbi->s_sb_block = sb_block;
1665 1668
1666 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1669 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
@@ -1724,8 +1727,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1724 else 1727 else
1725 set_opt(sbi->s_mount_opt, ERRORS_RO); 1728 set_opt(sbi->s_mount_opt, ERRORS_RO);
1726 1729
1727 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); 1730 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1728 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); 1731 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1729 1732
1730 /* enable barriers by default */ 1733 /* enable barriers by default */
1731 set_opt(sbi->s_mount_opt, BARRIER); 1734 set_opt(sbi->s_mount_opt, BARRIER);
@@ -2044,23 +2047,22 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2044 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2047 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2045 goto failed_mount3; 2048 goto failed_mount3;
2046 } 2049 }
2047 sb->s_root = d_make_root(root); 2050 sb->s_root = d_alloc_root(root);
2048 if (!sb->s_root) { 2051 if (!sb->s_root) {
2049 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2052 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2053 iput(root);
2050 ret = -ENOMEM; 2054 ret = -ENOMEM;
2051 goto failed_mount3; 2055 goto failed_mount3;
2052 } 2056 }
2053 2057
2054 if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY)) 2058 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
2055 sb->s_flags |= MS_RDONLY;
2056 2059
2057 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2060 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2058 ext3_orphan_cleanup(sb, es); 2061 ext3_orphan_cleanup(sb, es);
2059 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2062 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
2060 if (needs_recovery) { 2063 if (needs_recovery)
2061 ext3_mark_recovery_complete(sb, es);
2062 ext3_msg(sb, KERN_INFO, "recovery complete"); 2064 ext3_msg(sb, KERN_INFO, "recovery complete");
2063 } 2065 ext3_mark_recovery_complete(sb, es);
2064 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", 2066 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
2065 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2067 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2066 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2068 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
@@ -2228,11 +2230,11 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2228 goto out_bdev; 2230 goto out_bdev;
2229 } 2231 }
2230 journal->j_private = sb; 2232 journal->j_private = sb;
2231 if (!bh_uptodate_or_lock(journal->j_sb_buffer)) { 2233 ll_rw_block(READ, 1, &journal->j_sb_buffer);
2232 if (bh_submit_read(journal->j_sb_buffer)) { 2234 wait_on_buffer(journal->j_sb_buffer);
2233 ext3_msg(sb, KERN_ERR, "I/O error on journal device"); 2235 if (!buffer_uptodate(journal->j_sb_buffer)) {
2234 goto out_journal; 2236 ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2235 } 2237 goto out_journal;
2236 } 2238 }
2237 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2239 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2238 ext3_msg(sb, KERN_ERR, 2240 ext3_msg(sb, KERN_ERR,
@@ -2520,11 +2522,6 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
2520 tid_t target; 2522 tid_t target;
2521 2523
2522 trace_ext3_sync_fs(sb, wait); 2524 trace_ext3_sync_fs(sb, wait);
2523 /*
2524 * Writeback quota in non-journalled quota case - journalled quota has
2525 * no dirty dquots
2526 */
2527 dquot_writeback_dquots(sb, -1);
2528 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2525 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2529 if (wait) 2526 if (wait)
2530 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2527 log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2575,9 +2572,11 @@ out:
2575static int ext3_unfreeze(struct super_block *sb) 2572static int ext3_unfreeze(struct super_block *sb)
2576{ 2573{
2577 if (!(sb->s_flags & MS_RDONLY)) { 2574 if (!(sb->s_flags & MS_RDONLY)) {
2575 lock_super(sb);
2578 /* Reser the needs_recovery flag before the fs is unlocked. */ 2576 /* Reser the needs_recovery flag before the fs is unlocked. */
2579 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2577 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2580 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); 2578 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2579 unlock_super(sb);
2581 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2580 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2582 } 2581 }
2583 return 0; 2582 return 0;
@@ -2597,6 +2596,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2597#endif 2596#endif
2598 2597
2599 /* Store the original options */ 2598 /* Store the original options */
2599 lock_super(sb);
2600 old_sb_flags = sb->s_flags; 2600 old_sb_flags = sb->s_flags;
2601 old_opts.s_mount_opt = sbi->s_mount_opt; 2601 old_opts.s_mount_opt = sbi->s_mount_opt;
2602 old_opts.s_resuid = sbi->s_resuid; 2602 old_opts.s_resuid = sbi->s_resuid;
@@ -2669,13 +2669,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2669 /* 2669 /*
2670 * If we have an unprocessed orphan list hanging 2670 * If we have an unprocessed orphan list hanging
2671 * around from a previously readonly bdev mount, 2671 * around from a previously readonly bdev mount,
2672 * require a full umount & mount for now. 2672 * require a full umount/remount for now.
2673 */ 2673 */
2674 if (es->s_last_orphan) { 2674 if (es->s_last_orphan) {
2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't " 2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2676 "remount RDWR because of unprocessed " 2676 "remount RDWR because of unprocessed "
2677 "orphan inode list. Please " 2677 "orphan inode list. Please "
2678 "umount & mount instead."); 2678 "umount/remount instead.");
2679 err = -EINVAL; 2679 err = -EINVAL;
2680 goto restore_opts; 2680 goto restore_opts;
2681 } 2681 }
@@ -2702,6 +2702,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2702 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2702 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2703 kfree(old_opts.s_qf_names[i]); 2703 kfree(old_opts.s_qf_names[i]);
2704#endif 2704#endif
2705 unlock_super(sb);
2706
2705 if (enable_quota) 2707 if (enable_quota)
2706 dquot_resume(sb, -1); 2708 dquot_resume(sb, -1);
2707 return 0; 2709 return 0;
@@ -2720,6 +2722,7 @@ restore_opts:
2720 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2722 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2721 } 2723 }
2722#endif 2724#endif
2725 unlock_super(sb);
2723 return err; 2726 return err;
2724} 2727}
2725 2728
@@ -2801,7 +2804,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2801 2804
2802static inline struct inode *dquot_to_inode(struct dquot *dquot) 2805static inline struct inode *dquot_to_inode(struct dquot *dquot)
2803{ 2806{
2804 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; 2807 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
2805} 2808}
2806 2809
2807static int ext3_write_dquot(struct dquot *dquot) 2810static int ext3_write_dquot(struct dquot *dquot)
@@ -2907,7 +2910,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2907 return -EINVAL; 2910 return -EINVAL;
2908 2911
2909 /* Quotafile not on the same filesystem? */ 2912 /* Quotafile not on the same filesystem? */
2910 if (path->dentry->d_sb != sb) 2913 if (path->mnt->mnt_sb != sb)
2911 return -EXDEV; 2914 return -EXDEV;
2912 /* Journaling quota? */ 2915 /* Journaling quota? */
2913 if (EXT3_SB(sb)->s_qf_names[type]) { 2916 if (EXT3_SB(sb)->s_qf_names[type]) {
@@ -3008,6 +3011,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
3008 (unsigned long long)off, (unsigned long long)len); 3011 (unsigned long long)off, (unsigned long long)len);
3009 return -EIO; 3012 return -EIO;
3010 } 3013 }
3014 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3011 bh = ext3_bread(handle, inode, blk, 1, &err); 3015 bh = ext3_bread(handle, inode, blk, 1, &err);
3012 if (!bh) 3016 if (!bh)
3013 goto out; 3017 goto out;
@@ -3031,8 +3035,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
3031 } 3035 }
3032 brelse(bh); 3036 brelse(bh);
3033out: 3037out:
3034 if (err) 3038 if (err) {
3039 mutex_unlock(&inode->i_mutex);
3035 return err; 3040 return err;
3041 }
3036 if (inode->i_size < off + len) { 3042 if (inode->i_size < off + len) {
3037 i_size_write(inode, off + len); 3043 i_size_write(inode, off + len);
3038 EXT3_I(inode)->i_disksize = inode->i_size; 3044 EXT3_I(inode)->i_disksize = inode->i_size;
@@ -3040,6 +3046,7 @@ out:
3040 inode->i_version++; 3046 inode->i_version++;
3041 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3047 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3042 ext3_mark_inode_dirty(handle, inode); 3048 ext3_mark_inode_dirty(handle, inode);
3049 mutex_unlock(&inode->i_mutex);
3043 return len; 3050 return len;
3044} 3051}
3045 3052
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index 6b01c3eab1f..7c489820777 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -17,8 +17,10 @@
17 * ext3 symlink handling code 17 * ext3 symlink handling code
18 */ 18 */
19 19
20#include <linux/fs.h>
21#include <linux/jbd.h>
22#include <linux/ext3_fs.h>
20#include <linux/namei.h> 23#include <linux/namei.h>
21#include "ext3.h"
22#include "xattr.h" 24#include "xattr.h"
23 25
24static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index d22ebb7a4f5..d565759d82e 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -50,9 +50,14 @@
50 * by the buffer lock. 50 * by the buffer lock.
51 */ 51 */
52 52
53#include "ext3.h" 53#include <linux/init.h>
54#include <linux/fs.h>
55#include <linux/slab.h>
56#include <linux/ext3_jbd.h>
57#include <linux/ext3_fs.h>
54#include <linux/mbcache.h> 58#include <linux/mbcache.h>
55#include <linux/quotaops.h> 59#include <linux/quotaops.h>
60#include <linux/rwsem.h>
56#include "xattr.h" 61#include "xattr.h"
57#include "acl.h" 62#include "acl.h"
58 63
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 3387664ad70..b8d9f83aa5c 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -3,8 +3,13 @@
3 * Handler for storing security labels as extended attributes. 3 * Handler for storing security labels as extended attributes.
4 */ 4 */
5 5
6#include <linux/module.h>
7#include <linux/slab.h>
8#include <linux/string.h>
9#include <linux/fs.h>
10#include <linux/ext3_jbd.h>
11#include <linux/ext3_fs.h>
6#include <linux/security.h> 12#include <linux/security.h>
7#include "ext3.h"
8#include "xattr.h" 13#include "xattr.h"
9 14
10static size_t 15static size_t
@@ -43,30 +48,26 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
43 name, value, size, flags); 48 name, value, size, flags);
44} 49}
45 50
46int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
47 void *fs_info)
48{
49 const struct xattr *xattr;
50 handle_t *handle = fs_info;
51 int err = 0;
52
53 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
54 err = ext3_xattr_set_handle(handle, inode,
55 EXT3_XATTR_INDEX_SECURITY,
56 xattr->name, xattr->value,
57 xattr->value_len, 0);
58 if (err < 0)
59 break;
60 }
61 return err;
62}
63
64int 51int
65ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir, 52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
66 const struct qstr *qstr) 53 const struct qstr *qstr)
67{ 54{
68 return security_inode_init_security(inode, dir, qstr, 55 int err;
69 &ext3_initxattrs, handle); 56 size_t len;
57 void *value;
58 char *name;
59
60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
61 if (err) {
62 if (err == -EOPNOTSUPP)
63 return 0;
64 return err;
65 }
66 err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
67 name, value, len, 0);
68 kfree(name);
69 kfree(value);
70 return err;
70} 71}
71 72
72const struct xattr_handler ext3_xattr_security_handler = { 73const struct xattr_handler ext3_xattr_security_handler = {
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index d75727cc67f..dc8edda9ffe 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -5,7 +5,12 @@
5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include "ext3.h" 8#include <linux/module.h>
9#include <linux/string.h>
10#include <linux/capability.h>
11#include <linux/fs.h>
12#include <linux/ext3_jbd.h>
13#include <linux/ext3_fs.h>
9#include "xattr.h" 14#include "xattr.h"
10 15
11static size_t 16static size_t
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 5612af3567e..7a321974d58 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -5,7 +5,11 @@
5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> 5 * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
6 */ 6 */
7 7
8#include "ext3.h" 8#include <linux/module.h>
9#include <linux/string.h>
10#include <linux/fs.h>
11#include <linux/ext3_jbd.h>
12#include <linux/ext3_fs.h>
9#include "xattr.h" 13#include "xattr.h"
10 14
11static size_t 15static size_t