aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/ext4.txt12
-rw-r--r--fs/ext4/balloc.c71
-rw-r--r--fs/ext4/mballoc.c8
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c36
-rw-r--r--fs/jbd2/commit.c1
-rw-r--r--fs/jbd2/recovery.c12
-rw-r--r--include/linux/jbd2.h3
8 files changed, 121 insertions, 25 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 560f88dc7090..0c5086db8352 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -139,8 +139,16 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata
139 Setting it to very large values will improve 139 Setting it to very large values will improve
140 performance. 140 performance.
141 141
142barrier=1 This enables/disables barriers. barrier=0 disables 142barrier=<0|1(*)> This enables/disables the use of write barriers in
143 it, barrier=1 enables it. 143 the jbd code. barrier=0 disables, barrier=1 enables.
144 This also requires an IO stack which can support
145 barriers, and if jbd gets an error on a barrier
146 write, it will disable again with a warning.
147 Write barriers enforce proper on-disk ordering
148 of journal commits, making volatile disk write caches
149 safe to use, at some performance penalty. If
150 your disks are battery-backed in one way or another,
151 disabling barriers may safely improve performance.
144 152
145orlov (*) This enables the new Orlov block allocator. It is 153orlov (*) This enables the new Orlov block allocator. It is
146 enabled by default. 154 enabled by default.
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 30494c5da843..9cc80b9cc8d8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
43 43
44} 44}
45 45
46static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
47 ext4_group_t block_group)
48{
49 ext4_group_t actual_group;
50 ext4_get_group_no_and_offset(sb, block, &actual_group, 0);
51 if (actual_group == block_group)
52 return 1;
53 return 0;
54}
55
56static int ext4_group_used_meta_blocks(struct super_block *sb,
57 ext4_group_t block_group)
58{
59 ext4_fsblk_t tmp;
60 struct ext4_sb_info *sbi = EXT4_SB(sb);
61 /* block bitmap, inode bitmap, and inode table blocks */
62 int used_blocks = sbi->s_itb_per_group + 2;
63
64 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
65 struct ext4_group_desc *gdp;
66 struct buffer_head *bh;
67
68 gdp = ext4_get_group_desc(sb, block_group, &bh);
69 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
70 block_group))
71 used_blocks--;
72
73 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp),
74 block_group))
75 used_blocks--;
76
77 tmp = ext4_inode_table(sb, gdp);
78 for (; tmp < ext4_inode_table(sb, gdp) +
79 sbi->s_itb_per_group; tmp++) {
80 if (!ext4_block_in_group(sb, tmp, block_group))
81 used_blocks -= 1;
82 }
83 }
84 return used_blocks;
85}
46/* Initializes an uninitialized block bitmap if given, and returns the 86/* Initializes an uninitialized block bitmap if given, and returns the
47 * number of blocks free in the group. */ 87 * number of blocks free in the group. */
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 88unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
105 free_blocks = group_blocks - bit_max; 145 free_blocks = group_blocks - bit_max;
106 146
107 if (bh) { 147 if (bh) {
108 ext4_fsblk_t start; 148 ext4_fsblk_t start, tmp;
149 int flex_bg = 0;
109 150
110 for (bit = 0; bit < bit_max; bit++) 151 for (bit = 0; bit < bit_max; bit++)
111 ext4_set_bit(bit, bh->b_data); 152 ext4_set_bit(bit, bh->b_data);
112 153
113 start = ext4_group_first_block_no(sb, block_group); 154 start = ext4_group_first_block_no(sb, block_group);
114 155
115 /* Set bits for block and inode bitmaps, and inode table */ 156 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); 157 EXT4_FEATURE_INCOMPAT_FLEX_BG))
117 ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); 158 flex_bg = 1;
118 for (bit = (ext4_inode_table(sb, gdp) - start),
119 bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
120 ext4_set_bit(bit, bh->b_data);
121 159
160 /* Set bits for block and inode bitmaps, and inode table */
161 tmp = ext4_block_bitmap(sb, gdp);
162 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
163 ext4_set_bit(tmp - start, bh->b_data);
164
165 tmp = ext4_inode_bitmap(sb, gdp);
166 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
167 ext4_set_bit(tmp - start, bh->b_data);
168
169 tmp = ext4_inode_table(sb, gdp);
170 for (; tmp < ext4_inode_table(sb, gdp) +
171 sbi->s_itb_per_group; tmp++) {
172 if (!flex_bg ||
173 ext4_block_in_group(sb, tmp, block_group))
174 ext4_set_bit(tmp - start, bh->b_data);
175 }
122 /* 176 /*
123 * Also if the number of blocks within the group is 177 * Also if the number of blocks within the group is
124 * less than the blocksize * 8 ( which is the size 178 * less than the blocksize * 8 ( which is the size
@@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
126 */ 180 */
127 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 181 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
128 } 182 }
129 183 return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
130 return free_blocks - sbi->s_itb_per_group - 2;
131} 184}
132 185
133 186
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 873ad9b3418c..c9900aade150 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2745 sbi = EXT4_SB(sb); 2745 sbi = EXT4_SB(sb);
2746 es = sbi->s_es; 2746 es = sbi->s_es;
2747 2747
2748 ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
2749 gdp->bg_free_blocks_count);
2750 2748
2751 err = -EIO; 2749 err = -EIO;
2752 bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); 2750 bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2762 if (!gdp) 2760 if (!gdp)
2763 goto out_err; 2761 goto out_err;
2764 2762
2763 ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
2764 gdp->bg_free_blocks_count);
2765
2765 err = ext4_journal_get_write_access(handle, gdp_bh); 2766 err = ext4_journal_get_write_access(handle, gdp_bh);
2766 if (err) 2767 if (err)
2767 goto out_err; 2768 goto out_err;
@@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3094static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, 3095static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3095 struct ext4_prealloc_space *pa) 3096 struct ext4_prealloc_space *pa)
3096{ 3097{
3097 unsigned len = ac->ac_o_ex.fe_len; 3098 unsigned int len = ac->ac_o_ex.fe_len;
3098
3099 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, 3099 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3100 &ac->ac_b_ex.fe_group, 3100 &ac->ac_b_ex.fe_group,
3101 &ac->ac_b_ex.fe_start); 3101 &ac->ac_b_ex.fe_start);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9f086a6a472b..9ecb92f68543 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
563 } 563 }
564 564
565 blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; 565 blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
566 data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; 566 data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
567 EXT4_ADDR_PER_BLOCK(sb));
567 end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); 568 end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
568 569
569 /* Get each reserved primary GDT block and verify it holds backups */ 570 /* Get each reserved primary GDT block and verify it holds backups */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 09d9359c8055..cb96f127c366 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
671 unsigned long def_mount_opts; 671 unsigned long def_mount_opts;
672 struct super_block *sb = vfs->mnt_sb; 672 struct super_block *sb = vfs->mnt_sb;
673 struct ext4_sb_info *sbi = EXT4_SB(sb); 673 struct ext4_sb_info *sbi = EXT4_SB(sb);
674 journal_t *journal = sbi->s_journal;
674 struct ext4_super_block *es = sbi->s_es; 675 struct ext4_super_block *es = sbi->s_es;
675 676
676 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 677 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -729,8 +730,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
729 seq_printf(seq, ",commit=%u", 730 seq_printf(seq, ",commit=%u",
730 (unsigned) (sbi->s_commit_interval / HZ)); 731 (unsigned) (sbi->s_commit_interval / HZ));
731 } 732 }
732 if (test_opt(sb, BARRIER)) 733 /*
733 seq_puts(seq, ",barrier=1"); 734 * We're changing the default of barrier mount option, so
735 * let's always display its mount state so it's clear what its
736 * status is.
737 */
738 seq_puts(seq, ",barrier=");
739 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
740 if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
741 seq_puts(seq, ",journal_async_commit");
734 if (test_opt(sb, NOBH)) 742 if (test_opt(sb, NOBH))
735 seq_puts(seq, ",nobh"); 743 seq_puts(seq, ",nobh");
736 if (!test_opt(sb, EXTENTS)) 744 if (!test_opt(sb, EXTENTS))
@@ -1907,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1907 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1915 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1908 1916
1909 set_opt(sbi->s_mount_opt, RESERVATION); 1917 set_opt(sbi->s_mount_opt, RESERVATION);
1918 set_opt(sbi->s_mount_opt, BARRIER);
1910 1919
1911 /* 1920 /*
1912 * turn on extents feature by default in ext4 filesystem 1921 * turn on extents feature by default in ext4 filesystem
@@ -2189,6 +2198,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2189 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2198 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2190 if (ext4_load_journal(sb, es, journal_devnum)) 2199 if (ext4_load_journal(sb, es, journal_devnum))
2191 goto failed_mount3; 2200 goto failed_mount3;
2201 if (!(sb->s_flags & MS_RDONLY) &&
2202 EXT4_SB(sb)->s_journal->j_failed_commit) {
2203 printk(KERN_CRIT "EXT4-fs error (device %s): "
2204 "ext4_fill_super: Journal transaction "
2205 "%u is corrupt\n", sb->s_id,
2206 EXT4_SB(sb)->s_journal->j_failed_commit);
2207 if (test_opt (sb, ERRORS_RO)) {
2208 printk (KERN_CRIT
2209 "Mounting filesystem read-only\n");
2210 sb->s_flags |= MS_RDONLY;
2211 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2212 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2213 }
2214 if (test_opt(sb, ERRORS_PANIC)) {
2215 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2216 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2217 ext4_commit_super(sb, es, 1);
2218 printk(KERN_CRIT
2219 "EXT4-fs (device %s): mount failed\n",
2220 sb->s_id);
2221 goto failed_mount4;
2222 }
2223 }
2192 } else if (journal_inum) { 2224 } else if (journal_inum) {
2193 if (ext4_create_journal(sb, es, journal_inum)) 2225 if (ext4_create_journal(sb, es, journal_inum))
2194 goto failed_mount3; 2226 goto failed_mount3;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 4d99685fdce4..a2ed72f7ceee 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -168,6 +168,7 @@ static int journal_submit_commit_record(journal_t *journal,
168 spin_unlock(&journal->j_state_lock); 168 spin_unlock(&journal->j_state_lock);
169 169
170 /* And try again, without the barrier */ 170 /* And try again, without the barrier */
171 lock_buffer(bh);
171 set_buffer_uptodate(bh); 172 set_buffer_uptodate(bh);
172 set_buffer_dirty(bh); 173 set_buffer_dirty(bh);
173 ret = submit_bh(WRITE, bh); 174 ret = submit_bh(WRITE, bh);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 5d0405a9e7ca..058f50f65b76 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
345 obh->b_size); 345 obh->b_size);
346 } 346 }
347 put_bh(obh);
347 } 348 }
348 return 0; 349 return 0;
349} 350}
@@ -610,9 +611,8 @@ static int do_one_pass(journal_t *journal,
610 chksum_err = chksum_seen = 0; 611 chksum_err = chksum_seen = 0;
611 612
612 if (info->end_transaction) { 613 if (info->end_transaction) {
613 printk(KERN_ERR "JBD: Transaction %u " 614 journal->j_failed_commit =
614 "found to be corrupt.\n", 615 info->end_transaction;
615 next_commit_ID - 1);
616 brelse(bh); 616 brelse(bh);
617 break; 617 break;
618 } 618 }
@@ -643,10 +643,8 @@ static int do_one_pass(journal_t *journal,
643 643
644 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 644 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ 645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
646 printk(KERN_ERR 646 journal->j_failed_commit =
647 "JBD: Transaction %u " 647 next_commit_ID;
648 "found to be corrupt.\n",
649 next_commit_ID);
650 brelse(bh); 648 brelse(bh);
651 break; 649 break;
652 } 650 }
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 05e2b307161a..d147f0f90360 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -919,6 +919,9 @@ struct journal_s
919 struct proc_dir_entry *j_proc_entry; 919 struct proc_dir_entry *j_proc_entry;
920 struct transaction_stats_s j_stats; 920 struct transaction_stats_s j_stats;
921 921
922 /* Failed journal commit ID */
923 unsigned int j_failed_commit;
924
922 /* 925 /*
923 * An opaque pointer to fs-private information. ext3 puts its 926 * An opaque pointer to fs-private information. ext3 puts its
924 * superblock pointer here 927 * superblock pointer here