diff options
-rw-r--r-- | Documentation/filesystems/ext4.txt | 8 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 63 | ||||
-rw-r--r-- | fs/ext4/dir.c | 13 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 34 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 4 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 128 | ||||
-rw-r--r-- | fs/ext4/extents.c | 330 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 2 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 260 | ||||
-rw-r--r-- | fs/ext4/inode.c | 95 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 342 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 20 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 4 | ||||
-rw-r--r-- | fs/ext4/namei.c | 2 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 18 | ||||
-rw-r--r-- | fs/ext4/resize.c | 37 | ||||
-rw-r--r-- | fs/ext4/super.c | 1075 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 25 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 140 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 47 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 361 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 5 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 12 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 48 | ||||
-rw-r--r-- | include/linux/fs.h | 13 | ||||
-rw-r--r-- | include/linux/jbd2.h | 12 | ||||
-rw-r--r-- | include/linux/journal-head.h | 2 | ||||
-rw-r--r-- | include/trace/events/jbd2.h | 29 | ||||
-rw-r--r-- | mm/page-writeback.c | 2 |
30 files changed, 1522 insertions, 1611 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 8c10bf375c73..1b7f9acbcbbe 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -144,9 +144,6 @@ journal_async_commit Commit block can be written to disk without waiting | |||
144 | mount the device. This will enable 'journal_checksum' | 144 | mount the device. This will enable 'journal_checksum' |
145 | internally. | 145 | internally. |
146 | 146 | ||
147 | journal=update Update the ext4 file system's journal to the current | ||
148 | format. | ||
149 | |||
150 | journal_dev=devnum When the external journal device's major/minor numbers | 147 | journal_dev=devnum When the external journal device's major/minor numbers |
151 | have changed, this option allows the user to specify | 148 | have changed, this option allows the user to specify |
152 | the new journal location. The journal device is | 149 | the new journal location. The journal device is |
@@ -356,11 +353,6 @@ nouid32 Disables 32-bit UIDs and GIDs. This is for | |||
356 | interoperability with older kernels which only | 353 | interoperability with older kernels which only |
357 | store and expect 16-bit values. | 354 | store and expect 16-bit values. |
358 | 355 | ||
359 | resize Allows to resize filesystem to the end of the last | ||
360 | existing block group, further resize has to be done | ||
361 | with resize2fs either online, or offline. It can be | ||
362 | used only with conjunction with remount. | ||
363 | |||
364 | block_validity This options allows to enables/disables the in-kernel | 356 | block_validity This options allows to enables/disables the in-kernel |
365 | noblock_validity facility for tracking filesystem metadata blocks | 357 | noblock_validity facility for tracking filesystem metadata blocks |
366 | within internal data structures. This allows multi- | 358 | within internal data structures. This allows multi- |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f9e2cd8cf711..4bbd07a6fa18 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -336,10 +336,10 @@ err_out: | |||
336 | * Return buffer_head on success or NULL in case of failure. | 336 | * Return buffer_head on success or NULL in case of failure. |
337 | */ | 337 | */ |
338 | struct buffer_head * | 338 | struct buffer_head * |
339 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 339 | ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) |
340 | { | 340 | { |
341 | struct ext4_group_desc *desc; | 341 | struct ext4_group_desc *desc; |
342 | struct buffer_head *bh = NULL; | 342 | struct buffer_head *bh; |
343 | ext4_fsblk_t bitmap_blk; | 343 | ext4_fsblk_t bitmap_blk; |
344 | 344 | ||
345 | desc = ext4_get_group_desc(sb, block_group, NULL); | 345 | desc = ext4_get_group_desc(sb, block_group, NULL); |
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
348 | bitmap_blk = ext4_block_bitmap(sb, desc); | 348 | bitmap_blk = ext4_block_bitmap(sb, desc); |
349 | bh = sb_getblk(sb, bitmap_blk); | 349 | bh = sb_getblk(sb, bitmap_blk); |
350 | if (unlikely(!bh)) { | 350 | if (unlikely(!bh)) { |
351 | ext4_error(sb, "Cannot read block bitmap - " | 351 | ext4_error(sb, "Cannot get buffer for block bitmap - " |
352 | "block_group = %u, block_bitmap = %llu", | 352 | "block_group = %u, block_bitmap = %llu", |
353 | block_group, bitmap_blk); | 353 | block_group, bitmap_blk); |
354 | return NULL; | 354 | return NULL; |
355 | } | 355 | } |
356 | 356 | ||
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
382 | return bh; | 382 | return bh; |
383 | } | 383 | } |
384 | /* | 384 | /* |
385 | * submit the buffer_head for read. We can | 385 | * submit the buffer_head for reading |
386 | * safely mark the bitmap as uptodate now. | ||
387 | * We do it here so the bitmap uptodate bit | ||
388 | * get set with buffer lock held. | ||
389 | */ | 386 | */ |
387 | set_buffer_new(bh); | ||
390 | trace_ext4_read_block_bitmap_load(sb, block_group); | 388 | trace_ext4_read_block_bitmap_load(sb, block_group); |
391 | set_bitmap_uptodate(bh); | 389 | bh->b_end_io = ext4_end_bitmap_read; |
392 | if (bh_submit_read(bh) < 0) { | 390 | get_bh(bh); |
393 | put_bh(bh); | 391 | submit_bh(READ, bh); |
392 | return bh; | ||
393 | } | ||
394 | |||
395 | /* Returns 0 on success, 1 on error */ | ||
396 | int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, | ||
397 | struct buffer_head *bh) | ||
398 | { | ||
399 | struct ext4_group_desc *desc; | ||
400 | |||
401 | if (!buffer_new(bh)) | ||
402 | return 0; | ||
403 | desc = ext4_get_group_desc(sb, block_group, NULL); | ||
404 | if (!desc) | ||
405 | return 1; | ||
406 | wait_on_buffer(bh); | ||
407 | if (!buffer_uptodate(bh)) { | ||
394 | ext4_error(sb, "Cannot read block bitmap - " | 408 | ext4_error(sb, "Cannot read block bitmap - " |
395 | "block_group = %u, block_bitmap = %llu", | 409 | "block_group = %u, block_bitmap = %llu", |
396 | block_group, bitmap_blk); | 410 | block_group, (unsigned long long) bh->b_blocknr); |
397 | return NULL; | 411 | return 1; |
398 | } | 412 | } |
413 | clear_buffer_new(bh); | ||
414 | /* Panic or remount fs read-only if block bitmap is invalid */ | ||
399 | ext4_valid_block_bitmap(sb, desc, block_group, bh); | 415 | ext4_valid_block_bitmap(sb, desc, block_group, bh); |
400 | /* | 416 | return 0; |
401 | * file system mounted not to panic on error, | 417 | } |
402 | * continue with corrupt bitmap | 418 | |
403 | */ | 419 | struct buffer_head * |
420 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | ||
421 | { | ||
422 | struct buffer_head *bh; | ||
423 | |||
424 | bh = ext4_read_block_bitmap_nowait(sb, block_group); | ||
425 | if (ext4_wait_block_bitmap(sb, block_group, bh)) { | ||
426 | put_bh(bh); | ||
427 | return NULL; | ||
428 | } | ||
404 | return bh; | 429 | return bh; |
405 | } | 430 | } |
406 | 431 | ||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 164c56092e58..ad56866d729a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -91,17 +91,17 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, | |||
91 | return 0; | 91 | return 0; |
92 | 92 | ||
93 | if (filp) | 93 | if (filp) |
94 | ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0, | 94 | ext4_error_file(filp, function, line, bh->b_blocknr, |
95 | "bad entry in directory: %s - offset=%u(%u), " | 95 | "bad entry in directory: %s - offset=%u(%u), " |
96 | "inode=%u, rec_len=%d, name_len=%d", | 96 | "inode=%u, rec_len=%d, name_len=%d", |
97 | error_msg, (unsigned) (offset%bh->b_size), | 97 | error_msg, (unsigned) (offset % bh->b_size), |
98 | offset, le32_to_cpu(de->inode), | 98 | offset, le32_to_cpu(de->inode), |
99 | rlen, de->name_len); | 99 | rlen, de->name_len); |
100 | else | 100 | else |
101 | ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0, | 101 | ext4_error_inode(dir, function, line, bh->b_blocknr, |
102 | "bad entry in directory: %s - offset=%u(%u), " | 102 | "bad entry in directory: %s - offset=%u(%u), " |
103 | "inode=%u, rec_len=%d, name_len=%d", | 103 | "inode=%u, rec_len=%d, name_len=%d", |
104 | error_msg, (unsigned) (offset%bh->b_size), | 104 | error_msg, (unsigned) (offset % bh->b_size), |
105 | offset, le32_to_cpu(de->inode), | 105 | offset, le32_to_cpu(de->inode), |
106 | rlen, de->name_len); | 106 | rlen, de->name_len); |
107 | 107 | ||
@@ -425,8 +425,9 @@ static int call_filldir(struct file *filp, void *dirent, | |||
425 | sb = inode->i_sb; | 425 | sb = inode->i_sb; |
426 | 426 | ||
427 | if (!fname) { | 427 | if (!fname) { |
428 | printk(KERN_ERR "EXT4-fs: call_filldir: called with " | 428 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " |
429 | "null fname?!?\n"); | 429 | "called with null fname?!?", __func__, __LINE__, |
430 | inode->i_ino, current->comm); | ||
430 | return 0; | 431 | return 0; |
431 | } | 432 | } |
432 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 433 | curr_pos = hash2pos(fname->hash, fname->minor_hash); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 513004fc3d84..ded731ac8a32 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -53,7 +53,7 @@ | |||
53 | printk(KERN_DEBUG f, ## a); \ | 53 | printk(KERN_DEBUG f, ## a); \ |
54 | } while (0) | 54 | } while (0) |
55 | #else | 55 | #else |
56 | #define ext4_debug(f, a...) do {} while (0) | 56 | #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | 59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ |
@@ -184,6 +184,8 @@ struct mpage_da_data { | |||
184 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 184 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
185 | #define EXT4_IO_END_ERROR 0x0002 | 185 | #define EXT4_IO_END_ERROR 0x0002 |
186 | #define EXT4_IO_END_QUEUED 0x0004 | 186 | #define EXT4_IO_END_QUEUED 0x0004 |
187 | #define EXT4_IO_END_DIRECT 0x0008 | ||
188 | #define EXT4_IO_END_IN_FSYNC 0x0010 | ||
187 | 189 | ||
188 | struct ext4_io_page { | 190 | struct ext4_io_page { |
189 | struct page *p_page; | 191 | struct page *p_page; |
@@ -192,18 +194,25 @@ struct ext4_io_page { | |||
192 | 194 | ||
193 | #define MAX_IO_PAGES 128 | 195 | #define MAX_IO_PAGES 128 |
194 | 196 | ||
197 | /* | ||
198 | * For converting uninitialized extents on a work queue. | ||
199 | * | ||
200 | * 'page' is only used from the writepage() path; 'pages' is only used for | ||
201 | * buffered writes; they are used to keep page references until conversion | ||
202 | * takes place. For AIO/DIO, neither field is filled in. | ||
203 | */ | ||
195 | typedef struct ext4_io_end { | 204 | typedef struct ext4_io_end { |
196 | struct list_head list; /* per-file finished IO list */ | 205 | struct list_head list; /* per-file finished IO list */ |
197 | struct inode *inode; /* file being written to */ | 206 | struct inode *inode; /* file being written to */ |
198 | unsigned int flag; /* unwritten or not */ | 207 | unsigned int flag; /* unwritten or not */ |
199 | struct page *page; /* page struct for buffer write */ | 208 | struct page *page; /* for writepage() path */ |
200 | loff_t offset; /* offset in the file */ | 209 | loff_t offset; /* offset in the file */ |
201 | ssize_t size; /* size of the extent */ | 210 | ssize_t size; /* size of the extent */ |
202 | struct work_struct work; /* data work queue */ | 211 | struct work_struct work; /* data work queue */ |
203 | struct kiocb *iocb; /* iocb struct for AIO */ | 212 | struct kiocb *iocb; /* iocb struct for AIO */ |
204 | int result; /* error value for AIO */ | 213 | int result; /* error value for AIO */ |
205 | int num_io_pages; | 214 | int num_io_pages; /* for writepages() */ |
206 | struct ext4_io_page *pages[MAX_IO_PAGES]; | 215 | struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */ |
207 | } ext4_io_end_t; | 216 | } ext4_io_end_t; |
208 | 217 | ||
209 | struct ext4_io_submit { | 218 | struct ext4_io_submit { |
@@ -923,6 +932,7 @@ struct ext4_inode_info { | |||
923 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ | 932 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ |
924 | #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ | 933 | #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ |
925 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ | 934 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ |
935 | #define EXT4_MOUNT_ERRORS_MASK 0x00070 | ||
926 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ | 936 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ |
927 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ | 937 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ |
928 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ | 938 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ |
@@ -941,7 +951,6 @@ struct ext4_inode_info { | |||
941 | #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ | 951 | #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ |
942 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 952 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
943 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 953 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
944 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | ||
945 | #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ | 954 | #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ |
946 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 955 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
947 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 956 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
@@ -1142,6 +1151,7 @@ struct ext4_sb_info { | |||
1142 | unsigned int s_mount_opt; | 1151 | unsigned int s_mount_opt; |
1143 | unsigned int s_mount_opt2; | 1152 | unsigned int s_mount_opt2; |
1144 | unsigned int s_mount_flags; | 1153 | unsigned int s_mount_flags; |
1154 | unsigned int s_def_mount_opt; | ||
1145 | ext4_fsblk_t s_sb_block; | 1155 | ext4_fsblk_t s_sb_block; |
1146 | uid_t s_resuid; | 1156 | uid_t s_resuid; |
1147 | gid_t s_resgid; | 1157 | gid_t s_resgid; |
@@ -1420,8 +1430,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1420 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 | 1430 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 |
1421 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | 1431 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ |
1422 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | 1432 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ |
1423 | #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ | 1433 | #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ |
1424 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ | 1434 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ |
1435 | #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */ | ||
1425 | 1436 | ||
1426 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | 1437 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR |
1427 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1438 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
@@ -1794,8 +1805,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
1794 | ext4_group_t block_group, | 1805 | ext4_group_t block_group, |
1795 | struct buffer_head ** bh); | 1806 | struct buffer_head ** bh); |
1796 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1807 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
1797 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | 1808 | |
1798 | ext4_group_t block_group); | 1809 | extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, |
1810 | ext4_group_t block_group); | ||
1811 | extern int ext4_wait_block_bitmap(struct super_block *sb, | ||
1812 | ext4_group_t block_group, | ||
1813 | struct buffer_head *bh); | ||
1814 | extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | ||
1815 | ext4_group_t block_group); | ||
1799 | extern void ext4_init_block_bitmap(struct super_block *sb, | 1816 | extern void ext4_init_block_bitmap(struct super_block *sb, |
1800 | struct buffer_head *bh, | 1817 | struct buffer_head *bh, |
1801 | ext4_group_t group, | 1818 | ext4_group_t group, |
@@ -1841,6 +1858,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *); | |||
1841 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | 1858 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
1842 | extern int ext4_init_inode_table(struct super_block *sb, | 1859 | extern int ext4_init_inode_table(struct super_block *sb, |
1843 | ext4_group_t group, int barrier); | 1860 | ext4_group_t group, int barrier); |
1861 | extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); | ||
1844 | 1862 | ||
1845 | /* mballoc.c */ | 1863 | /* mballoc.c */ |
1846 | extern long ext4_mb_stats; | 1864 | extern long ext4_mb_stats; |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index a52db3a69a30..0f58b86e3a02 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -47,9 +47,9 @@ | |||
47 | */ | 47 | */ |
48 | #define EXT_DEBUG__ | 48 | #define EXT_DEBUG__ |
49 | #ifdef EXT_DEBUG | 49 | #ifdef EXT_DEBUG |
50 | #define ext_debug(a...) printk(a) | 50 | #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) |
51 | #else | 51 | #else |
52 | #define ext_debug(a...) | 52 | #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | /* | 55 | /* |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5802fa1dab18..83b20fcf9400 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -104,6 +104,78 @@ | |||
104 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) | 104 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) |
105 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) | 105 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) |
106 | 106 | ||
107 | /** | ||
108 | * struct ext4_journal_cb_entry - Base structure for callback information. | ||
109 | * | ||
110 | * This struct is a 'seed' structure for a using with your own callback | ||
111 | * structs. If you are using callbacks you must allocate one of these | ||
112 | * or another struct of your own definition which has this struct | ||
113 | * as it's first element and pass it to ext4_journal_callback_add(). | ||
114 | */ | ||
115 | struct ext4_journal_cb_entry { | ||
116 | /* list information for other callbacks attached to the same handle */ | ||
117 | struct list_head jce_list; | ||
118 | |||
119 | /* Function to call with this callback structure */ | ||
120 | void (*jce_func)(struct super_block *sb, | ||
121 | struct ext4_journal_cb_entry *jce, int error); | ||
122 | |||
123 | /* user data goes here */ | ||
124 | }; | ||
125 | |||
126 | /** | ||
127 | * ext4_journal_callback_add: add a function to call after transaction commit | ||
128 | * @handle: active journal transaction handle to register callback on | ||
129 | * @func: callback function to call after the transaction has committed: | ||
130 | * @sb: superblock of current filesystem for transaction | ||
131 | * @jce: returned journal callback data | ||
132 | * @rc: journal state at commit (0 = transaction committed properly) | ||
133 | * @jce: journal callback data (internal and function private data struct) | ||
134 | * | ||
135 | * The registered function will be called in the context of the journal thread | ||
136 | * after the transaction for which the handle was created has completed. | ||
137 | * | ||
138 | * No locks are held when the callback function is called, so it is safe to | ||
139 | * call blocking functions from within the callback, but the callback should | ||
140 | * not block or run for too long, or the filesystem will be blocked waiting for | ||
141 | * the next transaction to commit. No journaling functions can be used, or | ||
142 | * there is a risk of deadlock. | ||
143 | * | ||
144 | * There is no guaranteed calling order of multiple registered callbacks on | ||
145 | * the same transaction. | ||
146 | */ | ||
147 | static inline void ext4_journal_callback_add(handle_t *handle, | ||
148 | void (*func)(struct super_block *sb, | ||
149 | struct ext4_journal_cb_entry *jce, | ||
150 | int rc), | ||
151 | struct ext4_journal_cb_entry *jce) | ||
152 | { | ||
153 | struct ext4_sb_info *sbi = | ||
154 | EXT4_SB(handle->h_transaction->t_journal->j_private); | ||
155 | |||
156 | /* Add the jce to transaction's private list */ | ||
157 | jce->jce_func = func; | ||
158 | spin_lock(&sbi->s_md_lock); | ||
159 | list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); | ||
160 | spin_unlock(&sbi->s_md_lock); | ||
161 | } | ||
162 | |||
163 | /** | ||
164 | * ext4_journal_callback_del: delete a registered callback | ||
165 | * @handle: active journal transaction handle on which callback was registered | ||
166 | * @jce: registered journal callback entry to unregister | ||
167 | */ | ||
168 | static inline void ext4_journal_callback_del(handle_t *handle, | ||
169 | struct ext4_journal_cb_entry *jce) | ||
170 | { | ||
171 | struct ext4_sb_info *sbi = | ||
172 | EXT4_SB(handle->h_transaction->t_journal->j_private); | ||
173 | |||
174 | spin_lock(&sbi->s_md_lock); | ||
175 | list_del_init(&jce->jce_list); | ||
176 | spin_unlock(&sbi->s_md_lock); | ||
177 | } | ||
178 | |||
107 | int | 179 | int |
108 | ext4_mark_iloc_dirty(handle_t *handle, | 180 | ext4_mark_iloc_dirty(handle_t *handle, |
109 | struct inode *inode, | 181 | struct inode *inode, |
@@ -261,43 +333,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, | |||
261 | /* super.c */ | 333 | /* super.c */ |
262 | int ext4_force_commit(struct super_block *sb); | 334 | int ext4_force_commit(struct super_block *sb); |
263 | 335 | ||
264 | static inline int ext4_should_journal_data(struct inode *inode) | 336 | /* |
337 | * Ext4 inode journal modes | ||
338 | */ | ||
339 | #define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ | ||
340 | #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ | ||
341 | #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ | ||
342 | |||
343 | static inline int ext4_inode_journal_mode(struct inode *inode) | ||
265 | { | 344 | { |
266 | if (EXT4_JOURNAL(inode) == NULL) | 345 | if (EXT4_JOURNAL(inode) == NULL) |
267 | return 0; | 346 | return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ |
268 | if (!S_ISREG(inode->i_mode)) | 347 | /* We do not support data journalling with delayed allocation */ |
269 | return 1; | 348 | if (!S_ISREG(inode->i_mode) || |
270 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | 349 | test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) |
271 | return 1; | 350 | return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ |
272 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | 351 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && |
273 | return 1; | 352 | !test_opt(inode->i_sb, DELALLOC)) |
274 | return 0; | 353 | return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ |
354 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
355 | return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ | ||
356 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
357 | return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ | ||
358 | else | ||
359 | BUG(); | ||
360 | } | ||
361 | |||
362 | static inline int ext4_should_journal_data(struct inode *inode) | ||
363 | { | ||
364 | return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; | ||
275 | } | 365 | } |
276 | 366 | ||
277 | static inline int ext4_should_order_data(struct inode *inode) | 367 | static inline int ext4_should_order_data(struct inode *inode) |
278 | { | 368 | { |
279 | if (EXT4_JOURNAL(inode) == NULL) | 369 | return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; |
280 | return 0; | ||
281 | if (!S_ISREG(inode->i_mode)) | ||
282 | return 0; | ||
283 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | ||
284 | return 0; | ||
285 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
286 | return 1; | ||
287 | return 0; | ||
288 | } | 370 | } |
289 | 371 | ||
290 | static inline int ext4_should_writeback_data(struct inode *inode) | 372 | static inline int ext4_should_writeback_data(struct inode *inode) |
291 | { | 373 | { |
292 | if (EXT4_JOURNAL(inode) == NULL) | 374 | return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; |
293 | return 1; | ||
294 | if (!S_ISREG(inode->i_mode)) | ||
295 | return 0; | ||
296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | ||
297 | return 0; | ||
298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
299 | return 1; | ||
300 | return 0; | ||
301 | } | 375 | } |
302 | 376 | ||
303 | /* | 377 | /* |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74f23c292e1b..1421938e6792 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,6 +44,14 @@ | |||
44 | 44 | ||
45 | #include <trace/events/ext4.h> | 45 | #include <trace/events/ext4.h> |
46 | 46 | ||
47 | /* | ||
48 | * used by extent splitting. | ||
49 | */ | ||
50 | #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ | ||
51 | due to ENOSPC */ | ||
52 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | ||
53 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | ||
54 | |||
47 | static int ext4_split_extent(handle_t *handle, | 55 | static int ext4_split_extent(handle_t *handle, |
48 | struct inode *inode, | 56 | struct inode *inode, |
49 | struct ext4_ext_path *path, | 57 | struct ext4_ext_path *path, |
@@ -51,6 +59,13 @@ static int ext4_split_extent(handle_t *handle, | |||
51 | int split_flag, | 59 | int split_flag, |
52 | int flags); | 60 | int flags); |
53 | 61 | ||
62 | static int ext4_split_extent_at(handle_t *handle, | ||
63 | struct inode *inode, | ||
64 | struct ext4_ext_path *path, | ||
65 | ext4_lblk_t split, | ||
66 | int split_flag, | ||
67 | int flags); | ||
68 | |||
54 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 69 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
55 | struct inode *inode, | 70 | struct inode *inode, |
56 | int needed) | 71 | int needed) |
@@ -300,6 +315,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
300 | ext4_fsblk_t block = ext4_ext_pblock(ext); | 315 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
301 | int len = ext4_ext_get_actual_len(ext); | 316 | int len = ext4_ext_get_actual_len(ext); |
302 | 317 | ||
318 | if (len == 0) | ||
319 | return 0; | ||
303 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 320 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
304 | } | 321 | } |
305 | 322 | ||
@@ -2308,7 +2325,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2308 | struct ext4_extent *ex; | 2325 | struct ext4_extent *ex; |
2309 | 2326 | ||
2310 | /* the header must be checked already in ext4_ext_remove_space() */ | 2327 | /* the header must be checked already in ext4_ext_remove_space() */ |
2311 | ext_debug("truncate since %u in leaf\n", start); | 2328 | ext_debug("truncate since %u in leaf to %u\n", start, end); |
2312 | if (!path[depth].p_hdr) | 2329 | if (!path[depth].p_hdr) |
2313 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); | 2330 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); |
2314 | eh = path[depth].p_hdr; | 2331 | eh = path[depth].p_hdr; |
@@ -2343,14 +2360,17 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2343 | ext_debug(" border %u:%u\n", a, b); | 2360 | ext_debug(" border %u:%u\n", a, b); |
2344 | 2361 | ||
2345 | /* If this extent is beyond the end of the hole, skip it */ | 2362 | /* If this extent is beyond the end of the hole, skip it */ |
2346 | if (end <= ex_ee_block) { | 2363 | if (end < ex_ee_block) { |
2347 | ex--; | 2364 | ex--; |
2348 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2365 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2349 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2366 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2350 | continue; | 2367 | continue; |
2351 | } else if (b != ex_ee_block + ex_ee_len - 1) { | 2368 | } else if (b != ex_ee_block + ex_ee_len - 1) { |
2352 | EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", | 2369 | EXT4_ERROR_INODE(inode, |
2353 | start, end); | 2370 | "can not handle truncate %u:%u " |
2371 | "on extent %u:%u", | ||
2372 | start, end, ex_ee_block, | ||
2373 | ex_ee_block + ex_ee_len - 1); | ||
2354 | err = -EIO; | 2374 | err = -EIO; |
2355 | goto out; | 2375 | goto out; |
2356 | } else if (a != ex_ee_block) { | 2376 | } else if (a != ex_ee_block) { |
@@ -2482,7 +2502,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2482 | return 1; | 2502 | return 1; |
2483 | } | 2503 | } |
2484 | 2504 | ||
2485 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | 2505 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2506 | ext4_lblk_t end) | ||
2486 | { | 2507 | { |
2487 | struct super_block *sb = inode->i_sb; | 2508 | struct super_block *sb = inode->i_sb; |
2488 | int depth = ext_depth(inode); | 2509 | int depth = ext_depth(inode); |
@@ -2491,7 +2512,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2491 | handle_t *handle; | 2512 | handle_t *handle; |
2492 | int i, err; | 2513 | int i, err; |
2493 | 2514 | ||
2494 | ext_debug("truncate since %u\n", start); | 2515 | ext_debug("truncate since %u to %u\n", start, end); |
2495 | 2516 | ||
2496 | /* probably first extent we're gonna free will be last in block */ | 2517 | /* probably first extent we're gonna free will be last in block */ |
2497 | handle = ext4_journal_start(inode, depth + 1); | 2518 | handle = ext4_journal_start(inode, depth + 1); |
@@ -2504,6 +2525,61 @@ again: | |||
2504 | trace_ext4_ext_remove_space(inode, start, depth); | 2525 | trace_ext4_ext_remove_space(inode, start, depth); |
2505 | 2526 | ||
2506 | /* | 2527 | /* |
2528 | * Check if we are removing extents inside the extent tree. If that | ||
2529 | * is the case, we are going to punch a hole inside the extent tree | ||
2530 | * so we have to check whether we need to split the extent covering | ||
2531 | * the last block to remove so we can easily remove the part of it | ||
2532 | * in ext4_ext_rm_leaf(). | ||
2533 | */ | ||
2534 | if (end < EXT_MAX_BLOCKS - 1) { | ||
2535 | struct ext4_extent *ex; | ||
2536 | ext4_lblk_t ee_block; | ||
2537 | |||
2538 | /* find extent for this block */ | ||
2539 | path = ext4_ext_find_extent(inode, end, NULL); | ||
2540 | if (IS_ERR(path)) { | ||
2541 | ext4_journal_stop(handle); | ||
2542 | return PTR_ERR(path); | ||
2543 | } | ||
2544 | depth = ext_depth(inode); | ||
2545 | ex = path[depth].p_ext; | ||
2546 | if (!ex) | ||
2547 | goto cont; | ||
2548 | |||
2549 | ee_block = le32_to_cpu(ex->ee_block); | ||
2550 | |||
2551 | /* | ||
2552 | * See if the last block is inside the extent, if so split | ||
2553 | * the extent at 'end' block so we can easily remove the | ||
2554 | * tail of the first part of the split extent in | ||
2555 | * ext4_ext_rm_leaf(). | ||
2556 | */ | ||
2557 | if (end >= ee_block && | ||
2558 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { | ||
2559 | int split_flag = 0; | ||
2560 | |||
2561 | if (ext4_ext_is_uninitialized(ex)) | ||
2562 | split_flag = EXT4_EXT_MARK_UNINIT1 | | ||
2563 | EXT4_EXT_MARK_UNINIT2; | ||
2564 | |||
2565 | /* | ||
2566 | * Split the extent in two so that 'end' is the last | ||
2567 | * block in the first new extent | ||
2568 | */ | ||
2569 | err = ext4_split_extent_at(handle, inode, path, | ||
2570 | end + 1, split_flag, | ||
2571 | EXT4_GET_BLOCKS_PRE_IO | | ||
2572 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT); | ||
2573 | |||
2574 | if (err < 0) | ||
2575 | goto out; | ||
2576 | } | ||
2577 | ext4_ext_drop_refs(path); | ||
2578 | kfree(path); | ||
2579 | } | ||
2580 | cont: | ||
2581 | |||
2582 | /* | ||
2507 | * We start scanning from right side, freeing all the blocks | 2583 | * We start scanning from right side, freeing all the blocks |
2508 | * after i_size and walking into the tree depth-wise. | 2584 | * after i_size and walking into the tree depth-wise. |
2509 | */ | 2585 | */ |
@@ -2515,6 +2591,7 @@ again: | |||
2515 | } | 2591 | } |
2516 | path[0].p_depth = depth; | 2592 | path[0].p_depth = depth; |
2517 | path[0].p_hdr = ext_inode_hdr(inode); | 2593 | path[0].p_hdr = ext_inode_hdr(inode); |
2594 | |||
2518 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | 2595 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { |
2519 | err = -EIO; | 2596 | err = -EIO; |
2520 | goto out; | 2597 | goto out; |
@@ -2526,7 +2603,7 @@ again: | |||
2526 | /* this is leaf block */ | 2603 | /* this is leaf block */ |
2527 | err = ext4_ext_rm_leaf(handle, inode, path, | 2604 | err = ext4_ext_rm_leaf(handle, inode, path, |
2528 | &partial_cluster, start, | 2605 | &partial_cluster, start, |
2529 | EXT_MAX_BLOCKS - 1); | 2606 | end); |
2530 | /* root level has p_bh == NULL, brelse() eats this */ | 2607 | /* root level has p_bh == NULL, brelse() eats this */ |
2531 | brelse(path[i].p_bh); | 2608 | brelse(path[i].p_bh); |
2532 | path[i].p_bh = NULL; | 2609 | path[i].p_bh = NULL; |
@@ -2651,17 +2728,17 @@ void ext4_ext_init(struct super_block *sb) | |||
2651 | 2728 | ||
2652 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | 2729 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { |
2653 | #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) | 2730 | #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) |
2654 | printk(KERN_INFO "EXT4-fs: file extents enabled"); | 2731 | printk(KERN_INFO "EXT4-fs: file extents enabled" |
2655 | #ifdef AGGRESSIVE_TEST | 2732 | #ifdef AGGRESSIVE_TEST |
2656 | printk(", aggressive tests"); | 2733 | ", aggressive tests" |
2657 | #endif | 2734 | #endif |
2658 | #ifdef CHECK_BINSEARCH | 2735 | #ifdef CHECK_BINSEARCH |
2659 | printk(", check binsearch"); | 2736 | ", check binsearch" |
2660 | #endif | 2737 | #endif |
2661 | #ifdef EXTENTS_STATS | 2738 | #ifdef EXTENTS_STATS |
2662 | printk(", stats"); | 2739 | ", stats" |
2663 | #endif | 2740 | #endif |
2664 | printk("\n"); | 2741 | "\n"); |
2665 | #endif | 2742 | #endif |
2666 | #ifdef EXTENTS_STATS | 2743 | #ifdef EXTENTS_STATS |
2667 | spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); | 2744 | spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); |
@@ -2709,14 +2786,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2709 | } | 2786 | } |
2710 | 2787 | ||
2711 | /* | 2788 | /* |
2712 | * used by extent splitting. | ||
2713 | */ | ||
2714 | #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ | ||
2715 | due to ENOSPC */ | ||
2716 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | ||
2717 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | ||
2718 | |||
2719 | /* | ||
2720 | * ext4_split_extent_at() splits an extent at given block. | 2789 | * ext4_split_extent_at() splits an extent at given block. |
2721 | * | 2790 | * |
2722 | * @handle: the journal handle | 2791 | * @handle: the journal handle |
@@ -3224,11 +3293,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3224 | depth = ext_depth(inode); | 3293 | depth = ext_depth(inode); |
3225 | eh = path[depth].p_hdr; | 3294 | eh = path[depth].p_hdr; |
3226 | 3295 | ||
3227 | if (unlikely(!eh->eh_entries)) { | 3296 | /* |
3228 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | 3297 | * We're going to remove EOFBLOCKS_FL entirely in future so we |
3229 | "EOFBLOCKS_FL set"); | 3298 | * do not care for this case anymore. Simply remove the flag |
3230 | return -EIO; | 3299 | * if there are no extents. |
3231 | } | 3300 | */ |
3301 | if (unlikely(!eh->eh_entries)) | ||
3302 | goto out; | ||
3232 | last_ex = EXT_LAST_EXTENT(eh); | 3303 | last_ex = EXT_LAST_EXTENT(eh); |
3233 | /* | 3304 | /* |
3234 | * We should clear the EOFBLOCKS_FL flag if we are writing the | 3305 | * We should clear the EOFBLOCKS_FL flag if we are writing the |
@@ -3252,6 +3323,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3252 | for (i = depth-1; i >= 0; i--) | 3323 | for (i = depth-1; i >= 0; i--) |
3253 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | 3324 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) |
3254 | return 0; | 3325 | return 0; |
3326 | out: | ||
3255 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 3327 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
3256 | return ext4_mark_inode_dirty(handle, inode); | 3328 | return ext4_mark_inode_dirty(handle, inode); |
3257 | } | 3329 | } |
@@ -3710,8 +3782,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3710 | int free_on_err = 0, err = 0, depth, ret; | 3782 | int free_on_err = 0, err = 0, depth, ret; |
3711 | unsigned int allocated = 0, offset = 0; | 3783 | unsigned int allocated = 0, offset = 0; |
3712 | unsigned int allocated_clusters = 0; | 3784 | unsigned int allocated_clusters = 0; |
3713 | unsigned int punched_out = 0; | ||
3714 | unsigned int result = 0; | ||
3715 | struct ext4_allocation_request ar; | 3785 | struct ext4_allocation_request ar; |
3716 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3786 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3717 | ext4_lblk_t cluster_offset; | 3787 | ext4_lblk_t cluster_offset; |
@@ -3721,8 +3791,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3721 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 3791 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
3722 | 3792 | ||
3723 | /* check in cache */ | 3793 | /* check in cache */ |
3724 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && | 3794 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
3725 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | ||
3726 | if (!newex.ee_start_lo && !newex.ee_start_hi) { | 3795 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
3727 | if ((sbi->s_cluster_ratio > 1) && | 3796 | if ((sbi->s_cluster_ratio > 1) && |
3728 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | 3797 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) |
@@ -3790,113 +3859,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3790 | 3859 | ||
3791 | /* if found extent covers block, simply return it */ | 3860 | /* if found extent covers block, simply return it */ |
3792 | if (in_range(map->m_lblk, ee_block, ee_len)) { | 3861 | if (in_range(map->m_lblk, ee_block, ee_len)) { |
3793 | struct ext4_map_blocks punch_map; | ||
3794 | ext4_fsblk_t partial_cluster = 0; | ||
3795 | |||
3796 | newblock = map->m_lblk - ee_block + ee_start; | 3862 | newblock = map->m_lblk - ee_block + ee_start; |
3797 | /* number of remaining blocks in the extent */ | 3863 | /* number of remaining blocks in the extent */ |
3798 | allocated = ee_len - (map->m_lblk - ee_block); | 3864 | allocated = ee_len - (map->m_lblk - ee_block); |
3799 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, | 3865 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
3800 | ee_block, ee_len, newblock); | 3866 | ee_block, ee_len, newblock); |
3801 | 3867 | ||
3802 | if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { | ||
3803 | /* | ||
3804 | * Do not put uninitialized extent | ||
3805 | * in the cache | ||
3806 | */ | ||
3807 | if (!ext4_ext_is_uninitialized(ex)) { | ||
3808 | ext4_ext_put_in_cache(inode, ee_block, | ||
3809 | ee_len, ee_start); | ||
3810 | goto out; | ||
3811 | } | ||
3812 | ret = ext4_ext_handle_uninitialized_extents( | ||
3813 | handle, inode, map, path, flags, | ||
3814 | allocated, newblock); | ||
3815 | return ret; | ||
3816 | } | ||
3817 | |||
3818 | /* | ||
3819 | * Punch out the map length, but only to the | ||
3820 | * end of the extent | ||
3821 | */ | ||
3822 | punched_out = allocated < map->m_len ? | ||
3823 | allocated : map->m_len; | ||
3824 | |||
3825 | /* | 3868 | /* |
3826 | * Sense extents need to be converted to | 3869 | * Do not put uninitialized extent |
3827 | * uninitialized, they must fit in an | 3870 | * in the cache |
3828 | * uninitialized extent | ||
3829 | */ | 3871 | */ |
3830 | if (punched_out > EXT_UNINIT_MAX_LEN) | 3872 | if (!ext4_ext_is_uninitialized(ex)) { |
3831 | punched_out = EXT_UNINIT_MAX_LEN; | 3873 | ext4_ext_put_in_cache(inode, ee_block, |
3832 | 3874 | ee_len, ee_start); | |
3833 | punch_map.m_lblk = map->m_lblk; | 3875 | goto out; |
3834 | punch_map.m_pblk = newblock; | ||
3835 | punch_map.m_len = punched_out; | ||
3836 | punch_map.m_flags = 0; | ||
3837 | |||
3838 | /* Check to see if the extent needs to be split */ | ||
3839 | if (punch_map.m_len != ee_len || | ||
3840 | punch_map.m_lblk != ee_block) { | ||
3841 | |||
3842 | ret = ext4_split_extent(handle, inode, | ||
3843 | path, &punch_map, 0, | ||
3844 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
3845 | EXT4_GET_BLOCKS_PRE_IO); | ||
3846 | |||
3847 | if (ret < 0) { | ||
3848 | err = ret; | ||
3849 | goto out2; | ||
3850 | } | ||
3851 | /* | ||
3852 | * find extent for the block at | ||
3853 | * the start of the hole | ||
3854 | */ | ||
3855 | ext4_ext_drop_refs(path); | ||
3856 | kfree(path); | ||
3857 | |||
3858 | path = ext4_ext_find_extent(inode, | ||
3859 | map->m_lblk, NULL); | ||
3860 | if (IS_ERR(path)) { | ||
3861 | err = PTR_ERR(path); | ||
3862 | path = NULL; | ||
3863 | goto out2; | ||
3864 | } | ||
3865 | |||
3866 | depth = ext_depth(inode); | ||
3867 | ex = path[depth].p_ext; | ||
3868 | ee_len = ext4_ext_get_actual_len(ex); | ||
3869 | ee_block = le32_to_cpu(ex->ee_block); | ||
3870 | ee_start = ext4_ext_pblock(ex); | ||
3871 | |||
3872 | } | ||
3873 | |||
3874 | ext4_ext_mark_uninitialized(ex); | ||
3875 | |||
3876 | ext4_ext_invalidate_cache(inode); | ||
3877 | |||
3878 | err = ext4_ext_rm_leaf(handle, inode, path, | ||
3879 | &partial_cluster, map->m_lblk, | ||
3880 | map->m_lblk + punched_out); | ||
3881 | |||
3882 | if (!err && path->p_hdr->eh_entries == 0) { | ||
3883 | /* | ||
3884 | * Punch hole freed all of this sub tree, | ||
3885 | * so we need to correct eh_depth | ||
3886 | */ | ||
3887 | err = ext4_ext_get_access(handle, inode, path); | ||
3888 | if (err == 0) { | ||
3889 | ext_inode_hdr(inode)->eh_depth = 0; | ||
3890 | ext_inode_hdr(inode)->eh_max = | ||
3891 | cpu_to_le16(ext4_ext_space_root( | ||
3892 | inode, 0)); | ||
3893 | |||
3894 | err = ext4_ext_dirty( | ||
3895 | handle, inode, path); | ||
3896 | } | ||
3897 | } | 3876 | } |
3898 | 3877 | ret = ext4_ext_handle_uninitialized_extents( | |
3899 | goto out2; | 3878 | handle, inode, map, path, flags, |
3879 | allocated, newblock); | ||
3880 | return ret; | ||
3900 | } | 3881 | } |
3901 | } | 3882 | } |
3902 | 3883 | ||
@@ -4165,13 +4146,11 @@ out2: | |||
4165 | ext4_ext_drop_refs(path); | 4146 | ext4_ext_drop_refs(path); |
4166 | kfree(path); | 4147 | kfree(path); |
4167 | } | 4148 | } |
4168 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? | ||
4169 | punched_out : allocated; | ||
4170 | 4149 | ||
4171 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | 4150 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, |
4172 | newblock, map->m_len, err ? err : result); | 4151 | newblock, map->m_len, err ? err : allocated); |
4173 | 4152 | ||
4174 | return err ? err : result; | 4153 | return err ? err : allocated; |
4175 | } | 4154 | } |
4176 | 4155 | ||
4177 | void ext4_ext_truncate(struct inode *inode) | 4156 | void ext4_ext_truncate(struct inode *inode) |
@@ -4228,7 +4207,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
4228 | 4207 | ||
4229 | last_block = (inode->i_size + sb->s_blocksize - 1) | 4208 | last_block = (inode->i_size + sb->s_blocksize - 1) |
4230 | >> EXT4_BLOCK_SIZE_BITS(sb); | 4209 | >> EXT4_BLOCK_SIZE_BITS(sb); |
4231 | err = ext4_ext_remove_space(inode, last_block); | 4210 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
4232 | 4211 | ||
4233 | /* In a multi-transaction truncate, we only make the final | 4212 | /* In a multi-transaction truncate, we only make the final |
4234 | * transaction synchronous. | 4213 | * transaction synchronous. |
@@ -4436,10 +4415,11 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4436 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 4415 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
4437 | if (ret <= 0) { | 4416 | if (ret <= 0) { |
4438 | WARN_ON(ret <= 0); | 4417 | WARN_ON(ret <= 0); |
4439 | printk(KERN_ERR "%s: ext4_ext_map_blocks " | 4418 | ext4_msg(inode->i_sb, KERN_ERR, |
4440 | "returned error inode#%lu, block=%u, " | 4419 | "%s:%d: inode #%lu: block %u: len %u: " |
4441 | "max_blocks=%u", __func__, | 4420 | "ext4_ext_map_blocks returned %d", |
4442 | inode->i_ino, map.m_lblk, map.m_len); | 4421 | __func__, __LINE__, inode->i_ino, map.m_lblk, |
4422 | map.m_len, ret); | ||
4443 | } | 4423 | } |
4444 | ext4_mark_inode_dirty(handle, inode); | 4424 | ext4_mark_inode_dirty(handle, inode); |
4445 | ret2 = ext4_journal_stop(handle); | 4425 | ret2 = ext4_journal_stop(handle); |
@@ -4705,14 +4685,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4705 | { | 4685 | { |
4706 | struct inode *inode = file->f_path.dentry->d_inode; | 4686 | struct inode *inode = file->f_path.dentry->d_inode; |
4707 | struct super_block *sb = inode->i_sb; | 4687 | struct super_block *sb = inode->i_sb; |
4708 | struct ext4_ext_cache cache_ex; | 4688 | ext4_lblk_t first_block, stop_block; |
4709 | ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; | ||
4710 | struct address_space *mapping = inode->i_mapping; | 4689 | struct address_space *mapping = inode->i_mapping; |
4711 | struct ext4_map_blocks map; | ||
4712 | handle_t *handle; | 4690 | handle_t *handle; |
4713 | loff_t first_page, last_page, page_len; | 4691 | loff_t first_page, last_page, page_len; |
4714 | loff_t first_page_offset, last_page_offset; | 4692 | loff_t first_page_offset, last_page_offset; |
4715 | int ret, credits, blocks_released, err = 0; | 4693 | int credits, err = 0; |
4716 | 4694 | ||
4717 | /* No need to punch hole beyond i_size */ | 4695 | /* No need to punch hole beyond i_size */ |
4718 | if (offset >= inode->i_size) | 4696 | if (offset >= inode->i_size) |
@@ -4728,10 +4706,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4728 | offset; | 4706 | offset; |
4729 | } | 4707 | } |
4730 | 4708 | ||
4731 | first_block = (offset + sb->s_blocksize - 1) >> | ||
4732 | EXT4_BLOCK_SIZE_BITS(sb); | ||
4733 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
4734 | |||
4735 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 4709 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
4736 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 4710 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; |
4737 | 4711 | ||
@@ -4810,7 +4784,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4810 | } | 4784 | } |
4811 | } | 4785 | } |
4812 | 4786 | ||
4813 | |||
4814 | /* | 4787 | /* |
4815 | * If i_size is contained in the last page, we need to | 4788 | * If i_size is contained in the last page, we need to |
4816 | * unmap and zero the partial page after i_size | 4789 | * unmap and zero the partial page after i_size |
@@ -4830,73 +4803,22 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4830 | } | 4803 | } |
4831 | } | 4804 | } |
4832 | 4805 | ||
4806 | first_block = (offset + sb->s_blocksize - 1) >> | ||
4807 | EXT4_BLOCK_SIZE_BITS(sb); | ||
4808 | stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
4809 | |||
4833 | /* If there are no blocks to remove, return now */ | 4810 | /* If there are no blocks to remove, return now */ |
4834 | if (first_block >= last_block) | 4811 | if (first_block >= stop_block) |
4835 | goto out; | 4812 | goto out; |
4836 | 4813 | ||
4837 | down_write(&EXT4_I(inode)->i_data_sem); | 4814 | down_write(&EXT4_I(inode)->i_data_sem); |
4838 | ext4_ext_invalidate_cache(inode); | 4815 | ext4_ext_invalidate_cache(inode); |
4839 | ext4_discard_preallocations(inode); | 4816 | ext4_discard_preallocations(inode); |
4840 | 4817 | ||
4841 | /* | 4818 | err = ext4_ext_remove_space(inode, first_block, stop_block - 1); |
4842 | * Loop over all the blocks and identify blocks | ||
4843 | * that need to be punched out | ||
4844 | */ | ||
4845 | iblock = first_block; | ||
4846 | blocks_released = 0; | ||
4847 | while (iblock < last_block) { | ||
4848 | max_blocks = last_block - iblock; | ||
4849 | num_blocks = 1; | ||
4850 | memset(&map, 0, sizeof(map)); | ||
4851 | map.m_lblk = iblock; | ||
4852 | map.m_len = max_blocks; | ||
4853 | ret = ext4_ext_map_blocks(handle, inode, &map, | ||
4854 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT); | ||
4855 | |||
4856 | if (ret > 0) { | ||
4857 | blocks_released += ret; | ||
4858 | num_blocks = ret; | ||
4859 | } else if (ret == 0) { | ||
4860 | /* | ||
4861 | * If map blocks could not find the block, | ||
4862 | * then it is in a hole. If the hole was | ||
4863 | * not already cached, then map blocks should | ||
4864 | * put it in the cache. So we can get the hole | ||
4865 | * out of the cache | ||
4866 | */ | ||
4867 | memset(&cache_ex, 0, sizeof(cache_ex)); | ||
4868 | if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && | ||
4869 | !cache_ex.ec_start) { | ||
4870 | |||
4871 | /* The hole is cached */ | ||
4872 | num_blocks = cache_ex.ec_block + | ||
4873 | cache_ex.ec_len - iblock; | ||
4874 | |||
4875 | } else { | ||
4876 | /* The block could not be identified */ | ||
4877 | err = -EIO; | ||
4878 | break; | ||
4879 | } | ||
4880 | } else { | ||
4881 | /* Map blocks error */ | ||
4882 | err = ret; | ||
4883 | break; | ||
4884 | } | ||
4885 | |||
4886 | if (num_blocks == 0) { | ||
4887 | /* This condition should never happen */ | ||
4888 | ext_debug("Block lookup failed"); | ||
4889 | err = -EIO; | ||
4890 | break; | ||
4891 | } | ||
4892 | |||
4893 | iblock += num_blocks; | ||
4894 | } | ||
4895 | 4819 | ||
4896 | if (blocks_released > 0) { | 4820 | ext4_ext_invalidate_cache(inode); |
4897 | ext4_ext_invalidate_cache(inode); | 4821 | ext4_discard_preallocations(inode); |
4898 | ext4_discard_preallocations(inode); | ||
4899 | } | ||
4900 | 4822 | ||
4901 | if (IS_SYNC(inode)) | 4823 | if (IS_SYNC(inode)) |
4902 | ext4_handle_sync(handle); | 4824 | ext4_handle_sync(handle); |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 00a2cb753efd..bb6c7d811313 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode) | |||
89 | io = list_entry(ei->i_completed_io_list.next, | 89 | io = list_entry(ei->i_completed_io_list.next, |
90 | ext4_io_end_t, list); | 90 | ext4_io_end_t, list); |
91 | list_del_init(&io->list); | 91 | list_del_init(&io->list); |
92 | io->flag |= EXT4_IO_END_IN_FSYNC; | ||
92 | /* | 93 | /* |
93 | * Calling ext4_end_io_nolock() to convert completed | 94 | * Calling ext4_end_io_nolock() to convert completed |
94 | * IO to written. | 95 | * IO to written. |
@@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode) | |||
108 | if (ret < 0) | 109 | if (ret < 0) |
109 | ret2 = ret; | 110 | ret2 = ret; |
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 111 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
112 | io->flag &= ~EXT4_IO_END_IN_FSYNC; | ||
111 | } | 113 | } |
112 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 114 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
113 | return (ret2 < 0) ? ret2 : 0; | 115 | return (ret2 < 0) ? ret2 : 0; |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25d8c9781ad9..409c2ee7750a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
92 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
93 | } | 93 | } |
94 | 94 | ||
95 | void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) | ||
96 | { | ||
97 | if (uptodate) { | ||
98 | set_buffer_uptodate(bh); | ||
99 | set_bitmap_uptodate(bh); | ||
100 | } | ||
101 | unlock_buffer(bh); | ||
102 | put_bh(bh); | ||
103 | } | ||
104 | |||
95 | /* | 105 | /* |
96 | * Read the inode allocation bitmap for a given block_group, reading | 106 | * Read the inode allocation bitmap for a given block_group, reading |
97 | * into the specified slot in the superblock's bitmap cache. | 107 | * into the specified slot in the superblock's bitmap cache. |
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
147 | return bh; | 157 | return bh; |
148 | } | 158 | } |
149 | /* | 159 | /* |
150 | * submit the buffer_head for read. We can | 160 | * submit the buffer_head for reading |
151 | * safely mark the bitmap as uptodate now. | ||
152 | * We do it here so the bitmap uptodate bit | ||
153 | * get set with buffer lock held. | ||
154 | */ | 161 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | 162 | trace_ext4_load_inode_bitmap(sb, block_group); |
156 | set_bitmap_uptodate(bh); | 163 | bh->b_end_io = ext4_end_bitmap_read; |
157 | if (bh_submit_read(bh) < 0) { | 164 | get_bh(bh); |
165 | submit_bh(READ, bh); | ||
166 | wait_on_buffer(bh); | ||
167 | if (!buffer_uptodate(bh)) { | ||
158 | put_bh(bh); | 168 | put_bh(bh); |
159 | ext4_error(sb, "Cannot read inode bitmap - " | 169 | ext4_error(sb, "Cannot read inode bitmap - " |
160 | "block_group = %u, inode_bitmap = %llu", | 170 | "block_group = %u, inode_bitmap = %llu", |
161 | block_group, bitmap_blk); | 171 | block_group, bitmap_blk); |
162 | return NULL; | 172 | return NULL; |
163 | } | 173 | } |
164 | return bh; | 174 | return bh; |
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
194 | struct ext4_sb_info *sbi; | 204 | struct ext4_sb_info *sbi; |
195 | int fatal = 0, err, count, cleared; | 205 | int fatal = 0, err, count, cleared; |
196 | 206 | ||
197 | if (atomic_read(&inode->i_count) > 1) { | 207 | if (!sb) { |
198 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", | 208 | printk(KERN_ERR "EXT4-fs: %s:%d: inode on " |
199 | atomic_read(&inode->i_count)); | 209 | "nonexistent device\n", __func__, __LINE__); |
200 | return; | 210 | return; |
201 | } | 211 | } |
202 | if (inode->i_nlink) { | 212 | if (atomic_read(&inode->i_count) > 1) { |
203 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", | 213 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", |
204 | inode->i_nlink); | 214 | __func__, __LINE__, inode->i_ino, |
215 | atomic_read(&inode->i_count)); | ||
205 | return; | 216 | return; |
206 | } | 217 | } |
207 | if (!sb) { | 218 | if (inode->i_nlink) { |
208 | printk(KERN_ERR "ext4_free_inode: inode on " | 219 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", |
209 | "nonexistent device\n"); | 220 | __func__, __LINE__, inode->i_ino, inode->i_nlink); |
210 | return; | 221 | return; |
211 | } | 222 | } |
212 | sbi = EXT4_SB(sb); | 223 | sbi = EXT4_SB(sb); |
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
593 | } | 604 | } |
594 | 605 | ||
595 | /* | 606 | /* |
596 | * claim the inode from the inode bitmap. If the group | ||
597 | * is uninit we need to take the groups's ext4_group_lock | ||
598 | * and clear the uninit flag. The inode bitmap update | ||
599 | * and group desc uninit flag clear should be done | ||
600 | * after holding ext4_group_lock so that ext4_read_inode_bitmap | ||
601 | * doesn't race with the ext4_claim_inode | ||
602 | */ | ||
603 | static int ext4_claim_inode(struct super_block *sb, | ||
604 | struct buffer_head *inode_bitmap_bh, | ||
605 | unsigned long ino, ext4_group_t group, umode_t mode) | ||
606 | { | ||
607 | int free = 0, retval = 0, count; | ||
608 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
609 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
610 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | ||
611 | |||
612 | /* | ||
613 | * We have to be sure that new inode allocation does not race with | ||
614 | * inode table initialization, because otherwise we may end up | ||
615 | * allocating and writing new inode right before sb_issue_zeroout | ||
616 | * takes place and overwriting our new inode with zeroes. So we | ||
617 | * take alloc_sem to prevent it. | ||
618 | */ | ||
619 | down_read(&grp->alloc_sem); | ||
620 | ext4_lock_group(sb, group); | ||
621 | if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { | ||
622 | /* not a free inode */ | ||
623 | retval = 1; | ||
624 | goto err_ret; | ||
625 | } | ||
626 | ino++; | ||
627 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | ||
628 | ino > EXT4_INODES_PER_GROUP(sb)) { | ||
629 | ext4_unlock_group(sb, group); | ||
630 | up_read(&grp->alloc_sem); | ||
631 | ext4_error(sb, "reserved inode or inode > inodes count - " | ||
632 | "block_group = %u, inode=%lu", group, | ||
633 | ino + group * EXT4_INODES_PER_GROUP(sb)); | ||
634 | return 1; | ||
635 | } | ||
636 | /* If we didn't allocate from within the initialized part of the inode | ||
637 | * table then we need to initialize up to this inode. */ | ||
638 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
639 | |||
640 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
641 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | ||
642 | /* When marking the block group with | ||
643 | * ~EXT4_BG_INODE_UNINIT we don't want to depend | ||
644 | * on the value of bg_itable_unused even though | ||
645 | * mke2fs could have initialized the same for us. | ||
646 | * Instead we calculated the value below | ||
647 | */ | ||
648 | |||
649 | free = 0; | ||
650 | } else { | ||
651 | free = EXT4_INODES_PER_GROUP(sb) - | ||
652 | ext4_itable_unused_count(sb, gdp); | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * Check the relative inode number against the last used | ||
657 | * relative inode number in this group. if it is greater | ||
658 | * we need to update the bg_itable_unused count | ||
659 | * | ||
660 | */ | ||
661 | if (ino > free) | ||
662 | ext4_itable_unused_set(sb, gdp, | ||
663 | (EXT4_INODES_PER_GROUP(sb) - ino)); | ||
664 | } | ||
665 | count = ext4_free_inodes_count(sb, gdp) - 1; | ||
666 | ext4_free_inodes_set(sb, gdp, count); | ||
667 | if (S_ISDIR(mode)) { | ||
668 | count = ext4_used_dirs_count(sb, gdp) + 1; | ||
669 | ext4_used_dirs_set(sb, gdp, count); | ||
670 | if (sbi->s_log_groups_per_flex) { | ||
671 | ext4_group_t f = ext4_flex_group(sbi, group); | ||
672 | |||
673 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); | ||
674 | } | ||
675 | } | ||
676 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
677 | err_ret: | ||
678 | ext4_unlock_group(sb, group); | ||
679 | up_read(&grp->alloc_sem); | ||
680 | return retval; | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * There are two policies for allocating an inode. If the new inode is | 607 | * There are two policies for allocating an inode. If the new inode is |
685 | * a directory, then a forward search is made for a block group with both | 608 | * a directory, then a forward search is made for a block group with both |
686 | * free space and a low directory-to-inode ratio; if that fails, then of | 609 | * free space and a low directory-to-inode ratio; if that fails, then of |
@@ -741,6 +664,11 @@ got_group: | |||
741 | if (ret2 == -1) | 664 | if (ret2 == -1) |
742 | goto out; | 665 | goto out; |
743 | 666 | ||
667 | /* | ||
668 | * Normally we will only go through one pass of this loop, | ||
669 | * unless we get unlucky and it turns out the group we selected | ||
670 | * had its last inode grabbed by someone else. | ||
671 | */ | ||
744 | for (i = 0; i < ngroups; i++, ino = 0) { | 672 | for (i = 0; i < ngroups; i++, ino = 0) { |
745 | err = -EIO; | 673 | err = -EIO; |
746 | 674 | ||
@@ -757,51 +685,24 @@ repeat_in_this_group: | |||
757 | ino = ext4_find_next_zero_bit((unsigned long *) | 685 | ino = ext4_find_next_zero_bit((unsigned long *) |
758 | inode_bitmap_bh->b_data, | 686 | inode_bitmap_bh->b_data, |
759 | EXT4_INODES_PER_GROUP(sb), ino); | 687 | EXT4_INODES_PER_GROUP(sb), ino); |
760 | 688 | if (ino >= EXT4_INODES_PER_GROUP(sb)) { | |
761 | if (ino < EXT4_INODES_PER_GROUP(sb)) { | 689 | if (++group == ngroups) |
762 | 690 | group = 0; | |
763 | BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); | 691 | continue; |
764 | err = ext4_journal_get_write_access(handle, | ||
765 | inode_bitmap_bh); | ||
766 | if (err) | ||
767 | goto fail; | ||
768 | |||
769 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
770 | err = ext4_journal_get_write_access(handle, | ||
771 | group_desc_bh); | ||
772 | if (err) | ||
773 | goto fail; | ||
774 | if (!ext4_claim_inode(sb, inode_bitmap_bh, | ||
775 | ino, group, mode)) { | ||
776 | /* we won it */ | ||
777 | BUFFER_TRACE(inode_bitmap_bh, | ||
778 | "call ext4_handle_dirty_metadata"); | ||
779 | err = ext4_handle_dirty_metadata(handle, | ||
780 | NULL, | ||
781 | inode_bitmap_bh); | ||
782 | if (err) | ||
783 | goto fail; | ||
784 | /* zero bit is inode number 1*/ | ||
785 | ino++; | ||
786 | goto got; | ||
787 | } | ||
788 | /* we lost it */ | ||
789 | ext4_handle_release_buffer(handle, inode_bitmap_bh); | ||
790 | ext4_handle_release_buffer(handle, group_desc_bh); | ||
791 | |||
792 | if (++ino < EXT4_INODES_PER_GROUP(sb)) | ||
793 | goto repeat_in_this_group; | ||
794 | } | 692 | } |
795 | 693 | if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { | |
796 | /* | 694 | ext4_error(sb, "reserved inode found cleared - " |
797 | * This case is possible in concurrent environment. It is very | 695 | "inode=%lu", ino + 1); |
798 | * rare. We cannot repeat the find_group_xxx() call because | 696 | continue; |
799 | * that will simply return the same blockgroup, because the | 697 | } |
800 | * group descriptor metadata has not yet been updated. | 698 | ext4_lock_group(sb, group); |
801 | * So we just go onto the next blockgroup. | 699 | ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); |
802 | */ | 700 | ext4_unlock_group(sb, group); |
803 | if (++group == ngroups) | 701 | ino++; /* the inode bitmap is zero-based */ |
804 | group = 0; | 702 | if (!ret2) |
703 | goto got; /* we grabbed the inode! */ | ||
704 | if (ino < EXT4_INODES_PER_GROUP(sb)) | ||
705 | goto repeat_in_this_group; | ||
805 | } | 706 | } |
806 | err = -ENOSPC; | 707 | err = -ENOSPC; |
807 | goto out; | 708 | goto out; |
@@ -838,6 +739,59 @@ got: | |||
838 | if (err) | 739 | if (err) |
839 | goto fail; | 740 | goto fail; |
840 | } | 741 | } |
742 | |||
743 | BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); | ||
744 | err = ext4_journal_get_write_access(handle, inode_bitmap_bh); | ||
745 | if (err) | ||
746 | goto fail; | ||
747 | |||
748 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
749 | err = ext4_journal_get_write_access(handle, group_desc_bh); | ||
750 | if (err) | ||
751 | goto fail; | ||
752 | |||
753 | /* Update the relevant bg descriptor fields */ | ||
754 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
755 | int free; | ||
756 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
757 | |||
758 | down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ | ||
759 | ext4_lock_group(sb, group); /* while we modify the bg desc */ | ||
760 | free = EXT4_INODES_PER_GROUP(sb) - | ||
761 | ext4_itable_unused_count(sb, gdp); | ||
762 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
763 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | ||
764 | free = 0; | ||
765 | } | ||
766 | /* | ||
767 | * Check the relative inode number against the last used | ||
768 | * relative inode number in this group. if it is greater | ||
769 | * we need to update the bg_itable_unused count | ||
770 | */ | ||
771 | if (ino > free) | ||
772 | ext4_itable_unused_set(sb, gdp, | ||
773 | (EXT4_INODES_PER_GROUP(sb) - ino)); | ||
774 | up_read(&grp->alloc_sem); | ||
775 | } | ||
776 | ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); | ||
777 | if (S_ISDIR(mode)) { | ||
778 | ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); | ||
779 | if (sbi->s_log_groups_per_flex) { | ||
780 | ext4_group_t f = ext4_flex_group(sbi, group); | ||
781 | |||
782 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); | ||
783 | } | ||
784 | } | ||
785 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
787 | ext4_unlock_group(sb, group); | ||
788 | } | ||
789 | |||
790 | BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); | ||
791 | err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); | ||
792 | if (err) | ||
793 | goto fail; | ||
794 | |||
841 | BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); | 795 | BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); |
842 | err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); | 796 | err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); |
843 | if (err) | 797 | if (err) |
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1101 | * where it is called from on active part of filesystem is ext4lazyinit | 1055 | * where it is called from on active part of filesystem is ext4lazyinit |
1102 | * thread, so we do not need any special locks, however we have to prevent | 1056 | * thread, so we do not need any special locks, however we have to prevent |
1103 | * inode allocation from the current group, so we take alloc_sem lock, to | 1057 | * inode allocation from the current group, so we take alloc_sem lock, to |
1104 | * block ext4_claim_inode until we are finished. | 1058 | * block ext4_new_inode() until we are finished. |
1105 | */ | 1059 | */ |
1106 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | 1060 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
1107 | int barrier) | 1061 | int barrier) |
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | |||
1149 | sbi->s_inodes_per_block); | 1103 | sbi->s_inodes_per_block); |
1150 | 1104 | ||
1151 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | 1105 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { |
1152 | ext4_error(sb, "Something is wrong with group %u\n" | 1106 | ext4_error(sb, "Something is wrong with group %u: " |
1153 | "Used itable blocks: %d" | 1107 | "used itable blocks: %d; " |
1154 | "itable unused count: %u\n", | 1108 | "itable unused count: %u", |
1155 | group, used_blks, | 1109 | group, used_blks, |
1156 | ext4_itable_unused_count(sb, gdp)); | 1110 | ext4_itable_unused_count(sb, gdp)); |
1157 | ret = 1; | 1111 | ret = 1; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index feaa82fe629d..c77b0bd2c711 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -272,7 +272,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
272 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); | 272 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
273 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 273 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
274 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 274 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
275 | "with only %d reserved data blocks\n", | 275 | "with only %d reserved data blocks", |
276 | __func__, inode->i_ino, used, | 276 | __func__, inode->i_ino, used, |
277 | ei->i_reserved_data_blocks); | 277 | ei->i_reserved_data_blocks); |
278 | WARN_ON(1); | 278 | WARN_ON(1); |
@@ -1165,7 +1165,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1165 | */ | 1165 | */ |
1166 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " | 1166 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " |
1167 | "ino %lu, to_free %d with only %d reserved " | 1167 | "ino %lu, to_free %d with only %d reserved " |
1168 | "data blocks\n", inode->i_ino, to_free, | 1168 | "data blocks", inode->i_ino, to_free, |
1169 | ei->i_reserved_data_blocks); | 1169 | ei->i_reserved_data_blocks); |
1170 | WARN_ON(1); | 1170 | WARN_ON(1); |
1171 | to_free = ei->i_reserved_data_blocks; | 1171 | to_free = ei->i_reserved_data_blocks; |
@@ -1428,20 +1428,22 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
1428 | static void ext4_print_free_blocks(struct inode *inode) | 1428 | static void ext4_print_free_blocks(struct inode *inode) |
1429 | { | 1429 | { |
1430 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1430 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1431 | printk(KERN_CRIT "Total free blocks count %lld\n", | 1431 | struct super_block *sb = inode->i_sb; |
1432 | |||
1433 | ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", | ||
1432 | EXT4_C2B(EXT4_SB(inode->i_sb), | 1434 | EXT4_C2B(EXT4_SB(inode->i_sb), |
1433 | ext4_count_free_clusters(inode->i_sb))); | 1435 | ext4_count_free_clusters(inode->i_sb))); |
1434 | printk(KERN_CRIT "Free/Dirty block details\n"); | 1436 | ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); |
1435 | printk(KERN_CRIT "free_blocks=%lld\n", | 1437 | ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", |
1436 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), | 1438 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1437 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | 1439 | percpu_counter_sum(&sbi->s_freeclusters_counter))); |
1438 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 1440 | ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", |
1439 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), | 1441 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1440 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | 1442 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); |
1441 | printk(KERN_CRIT "Block reservation details\n"); | 1443 | ext4_msg(sb, KERN_CRIT, "Block reservation details"); |
1442 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 1444 | ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", |
1443 | EXT4_I(inode)->i_reserved_data_blocks); | 1445 | EXT4_I(inode)->i_reserved_data_blocks); |
1444 | printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", | 1446 | ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u", |
1445 | EXT4_I(inode)->i_reserved_meta_blocks); | 1447 | EXT4_I(inode)->i_reserved_meta_blocks); |
1446 | return; | 1448 | return; |
1447 | } | 1449 | } |
@@ -2482,13 +2484,14 @@ static int ext4_da_write_end(struct file *file, | |||
2482 | int write_mode = (int)(unsigned long)fsdata; | 2484 | int write_mode = (int)(unsigned long)fsdata; |
2483 | 2485 | ||
2484 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | 2486 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { |
2485 | if (ext4_should_order_data(inode)) { | 2487 | switch (ext4_inode_journal_mode(inode)) { |
2488 | case EXT4_INODE_ORDERED_DATA_MODE: | ||
2486 | return ext4_ordered_write_end(file, mapping, pos, | 2489 | return ext4_ordered_write_end(file, mapping, pos, |
2487 | len, copied, page, fsdata); | 2490 | len, copied, page, fsdata); |
2488 | } else if (ext4_should_writeback_data(inode)) { | 2491 | case EXT4_INODE_WRITEBACK_DATA_MODE: |
2489 | return ext4_writeback_write_end(file, mapping, pos, | 2492 | return ext4_writeback_write_end(file, mapping, pos, |
2490 | len, copied, page, fsdata); | 2493 | len, copied, page, fsdata); |
2491 | } else { | 2494 | default: |
2492 | BUG(); | 2495 | BUG(); |
2493 | } | 2496 | } |
2494 | } | 2497 | } |
@@ -2763,7 +2766,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
2763 | goto out; | 2766 | goto out; |
2764 | 2767 | ||
2765 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 2768 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
2766 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 2769 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
2767 | iocb->private, io_end->inode->i_ino, iocb, offset, | 2770 | iocb->private, io_end->inode->i_ino, iocb, offset, |
2768 | size); | 2771 | size); |
2769 | 2772 | ||
@@ -2795,9 +2798,6 @@ out: | |||
2795 | 2798 | ||
2796 | /* queue the work to convert unwritten extents to written */ | 2799 | /* queue the work to convert unwritten extents to written */ |
2797 | queue_work(wq, &io_end->work); | 2800 | queue_work(wq, &io_end->work); |
2798 | |||
2799 | /* XXX: probably should move into the real I/O completion handler */ | ||
2800 | inode_dio_done(inode); | ||
2801 | } | 2801 | } |
2802 | 2802 | ||
2803 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | 2803 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) |
@@ -2811,8 +2811,9 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2811 | goto out; | 2811 | goto out; |
2812 | 2812 | ||
2813 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | 2813 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { |
2814 | printk("sb umounted, discard end_io request for inode %lu\n", | 2814 | ext4_msg(io_end->inode->i_sb, KERN_INFO, |
2815 | io_end->inode->i_ino); | 2815 | "sb umounted, discard end_io request for inode %lu", |
2816 | io_end->inode->i_ino); | ||
2816 | ext4_free_io_end(io_end); | 2817 | ext4_free_io_end(io_end); |
2817 | goto out; | 2818 | goto out; |
2818 | } | 2819 | } |
@@ -2921,9 +2922,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
2921 | iocb->private = NULL; | 2922 | iocb->private = NULL; |
2922 | EXT4_I(inode)->cur_aio_dio = NULL; | 2923 | EXT4_I(inode)->cur_aio_dio = NULL; |
2923 | if (!is_sync_kiocb(iocb)) { | 2924 | if (!is_sync_kiocb(iocb)) { |
2924 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); | 2925 | ext4_io_end_t *io_end = |
2925 | if (!iocb->private) | 2926 | ext4_init_io_end(inode, GFP_NOFS); |
2927 | if (!io_end) | ||
2926 | return -ENOMEM; | 2928 | return -ENOMEM; |
2929 | io_end->flag |= EXT4_IO_END_DIRECT; | ||
2930 | iocb->private = io_end; | ||
2927 | /* | 2931 | /* |
2928 | * we save the io structure for current async | 2932 | * we save the io structure for current async |
2929 | * direct IO, so that later ext4_map_blocks() | 2933 | * direct IO, so that later ext4_map_blocks() |
@@ -2940,7 +2944,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
2940 | ext4_get_block_write, | 2944 | ext4_get_block_write, |
2941 | ext4_end_io_dio, | 2945 | ext4_end_io_dio, |
2942 | NULL, | 2946 | NULL, |
2943 | DIO_LOCKING | DIO_SKIP_HOLES); | 2947 | DIO_LOCKING); |
2944 | if (iocb->private) | 2948 | if (iocb->private) |
2945 | EXT4_I(inode)->cur_aio_dio = NULL; | 2949 | EXT4_I(inode)->cur_aio_dio = NULL; |
2946 | /* | 2950 | /* |
@@ -3086,18 +3090,25 @@ static const struct address_space_operations ext4_da_aops = { | |||
3086 | 3090 | ||
3087 | void ext4_set_aops(struct inode *inode) | 3091 | void ext4_set_aops(struct inode *inode) |
3088 | { | 3092 | { |
3089 | if (ext4_should_order_data(inode) && | 3093 | switch (ext4_inode_journal_mode(inode)) { |
3090 | test_opt(inode->i_sb, DELALLOC)) | 3094 | case EXT4_INODE_ORDERED_DATA_MODE: |
3091 | inode->i_mapping->a_ops = &ext4_da_aops; | 3095 | if (test_opt(inode->i_sb, DELALLOC)) |
3092 | else if (ext4_should_order_data(inode)) | 3096 | inode->i_mapping->a_ops = &ext4_da_aops; |
3093 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 3097 | else |
3094 | else if (ext4_should_writeback_data(inode) && | 3098 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
3095 | test_opt(inode->i_sb, DELALLOC)) | 3099 | break; |
3096 | inode->i_mapping->a_ops = &ext4_da_aops; | 3100 | case EXT4_INODE_WRITEBACK_DATA_MODE: |
3097 | else if (ext4_should_writeback_data(inode)) | 3101 | if (test_opt(inode->i_sb, DELALLOC)) |
3098 | inode->i_mapping->a_ops = &ext4_writeback_aops; | 3102 | inode->i_mapping->a_ops = &ext4_da_aops; |
3099 | else | 3103 | else |
3104 | inode->i_mapping->a_ops = &ext4_writeback_aops; | ||
3105 | break; | ||
3106 | case EXT4_INODE_JOURNAL_DATA_MODE: | ||
3100 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3107 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
3108 | break; | ||
3109 | default: | ||
3110 | BUG(); | ||
3111 | } | ||
3101 | } | 3112 | } |
3102 | 3113 | ||
3103 | 3114 | ||
@@ -3329,16 +3340,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3329 | { | 3340 | { |
3330 | struct inode *inode = file->f_path.dentry->d_inode; | 3341 | struct inode *inode = file->f_path.dentry->d_inode; |
3331 | if (!S_ISREG(inode->i_mode)) | 3342 | if (!S_ISREG(inode->i_mode)) |
3332 | return -ENOTSUPP; | 3343 | return -EOPNOTSUPP; |
3333 | 3344 | ||
3334 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 3345 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
3335 | /* TODO: Add support for non extent hole punching */ | 3346 | /* TODO: Add support for non extent hole punching */ |
3336 | return -ENOTSUPP; | 3347 | return -EOPNOTSUPP; |
3337 | } | 3348 | } |
3338 | 3349 | ||
3339 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | 3350 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { |
3340 | /* TODO: Add support for bigalloc file systems */ | 3351 | /* TODO: Add support for bigalloc file systems */ |
3341 | return -ENOTSUPP; | 3352 | return -EOPNOTSUPP; |
3342 | } | 3353 | } |
3343 | 3354 | ||
3344 | return ext4_ext_punch_hole(file, offset, length); | 3355 | return ext4_ext_punch_hole(file, offset, length); |
@@ -3924,10 +3935,8 @@ static int ext4_do_update_inode(handle_t *handle, | |||
3924 | ext4_update_dynamic_rev(sb); | 3935 | ext4_update_dynamic_rev(sb); |
3925 | EXT4_SET_RO_COMPAT_FEATURE(sb, | 3936 | EXT4_SET_RO_COMPAT_FEATURE(sb, |
3926 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 3937 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
3927 | sb->s_dirt = 1; | ||
3928 | ext4_handle_sync(handle); | 3938 | ext4_handle_sync(handle); |
3929 | err = ext4_handle_dirty_metadata(handle, NULL, | 3939 | err = ext4_handle_dirty_super(handle, sb); |
3930 | EXT4_SB(sb)->s_sbh); | ||
3931 | } | 3940 | } |
3932 | } | 3941 | } |
3933 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 3942 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
@@ -4152,11 +4161,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4152 | } | 4161 | } |
4153 | 4162 | ||
4154 | if (attr->ia_valid & ATTR_SIZE) { | 4163 | if (attr->ia_valid & ATTR_SIZE) { |
4155 | if (attr->ia_size != i_size_read(inode)) { | 4164 | if (attr->ia_size != i_size_read(inode)) |
4156 | truncate_setsize(inode, attr->ia_size); | 4165 | truncate_setsize(inode, attr->ia_size); |
4157 | ext4_truncate(inode); | 4166 | ext4_truncate(inode); |
4158 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
4159 | ext4_truncate(inode); | ||
4160 | } | 4167 | } |
4161 | 4168 | ||
4162 | if (!rc) { | 4169 | if (!rc) { |
@@ -4314,7 +4321,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
4314 | { | 4321 | { |
4315 | int err = 0; | 4322 | int err = 0; |
4316 | 4323 | ||
4317 | if (test_opt(inode->i_sb, I_VERSION)) | 4324 | if (IS_I_VERSION(inode)) |
4318 | inode_inc_iversion(inode); | 4325 | inode_inc_iversion(inode); |
4319 | 4326 | ||
4320 | /* the do_update_inode consumes one bh->b_count */ | 4327 | /* the do_update_inode consumes one bh->b_count */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cb990b21c698..99ab428bcfa0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -21,6 +21,7 @@ | |||
21 | * mballoc.c contains the multiblocks allocation routines | 21 | * mballoc.c contains the multiblocks allocation routines |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "ext4_jbd2.h" | ||
24 | #include "mballoc.h" | 25 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
26 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
@@ -339,7 +340,7 @@ | |||
339 | */ | 340 | */ |
340 | static struct kmem_cache *ext4_pspace_cachep; | 341 | static struct kmem_cache *ext4_pspace_cachep; |
341 | static struct kmem_cache *ext4_ac_cachep; | 342 | static struct kmem_cache *ext4_ac_cachep; |
342 | static struct kmem_cache *ext4_free_ext_cachep; | 343 | static struct kmem_cache *ext4_free_data_cachep; |
343 | 344 | ||
344 | /* We create slab caches for groupinfo data structures based on the | 345 | /* We create slab caches for groupinfo data structures based on the |
345 | * superblock block size. There will be one per mounted filesystem for | 346 | * superblock block size. There will be one per mounted filesystem for |
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
357 | ext4_group_t group); | 358 | ext4_group_t group); |
358 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 359 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
359 | ext4_group_t group); | 360 | ext4_group_t group); |
360 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | 361 | static void ext4_free_data_callback(struct super_block *sb, |
362 | struct ext4_journal_cb_entry *jce, int rc); | ||
361 | 363 | ||
362 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) | 364 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
363 | { | 365 | { |
@@ -425,7 +427,7 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
425 | { | 427 | { |
426 | char *bb; | 428 | char *bb; |
427 | 429 | ||
428 | BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); | 430 | BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); |
429 | BUG_ON(max == NULL); | 431 | BUG_ON(max == NULL); |
430 | 432 | ||
431 | if (order > e4b->bd_blkbits + 1) { | 433 | if (order > e4b->bd_blkbits + 1) { |
@@ -436,10 +438,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
436 | /* at order 0 we see each particular block */ | 438 | /* at order 0 we see each particular block */ |
437 | if (order == 0) { | 439 | if (order == 0) { |
438 | *max = 1 << (e4b->bd_blkbits + 3); | 440 | *max = 1 << (e4b->bd_blkbits + 3); |
439 | return EXT4_MB_BITMAP(e4b); | 441 | return e4b->bd_bitmap; |
440 | } | 442 | } |
441 | 443 | ||
442 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | 444 | bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; |
443 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | 445 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; |
444 | 446 | ||
445 | return bb; | 447 | return bb; |
@@ -588,7 +590,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
588 | for (j = 0; j < (1 << order); j++) { | 590 | for (j = 0; j < (1 << order); j++) { |
589 | k = (i * (1 << order)) + j; | 591 | k = (i * (1 << order)) + j; |
590 | MB_CHECK_ASSERT( | 592 | MB_CHECK_ASSERT( |
591 | !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); | 593 | !mb_test_bit(k, e4b->bd_bitmap)); |
592 | } | 594 | } |
593 | count++; | 595 | count++; |
594 | } | 596 | } |
@@ -782,7 +784,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
782 | int groups_per_page; | 784 | int groups_per_page; |
783 | int err = 0; | 785 | int err = 0; |
784 | int i; | 786 | int i; |
785 | ext4_group_t first_group; | 787 | ext4_group_t first_group, group; |
786 | int first_block; | 788 | int first_block; |
787 | struct super_block *sb; | 789 | struct super_block *sb; |
788 | struct buffer_head *bhs; | 790 | struct buffer_head *bhs; |
@@ -806,24 +808,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
806 | 808 | ||
807 | /* allocate buffer_heads to read bitmaps */ | 809 | /* allocate buffer_heads to read bitmaps */ |
808 | if (groups_per_page > 1) { | 810 | if (groups_per_page > 1) { |
809 | err = -ENOMEM; | ||
810 | i = sizeof(struct buffer_head *) * groups_per_page; | 811 | i = sizeof(struct buffer_head *) * groups_per_page; |
811 | bh = kzalloc(i, GFP_NOFS); | 812 | bh = kzalloc(i, GFP_NOFS); |
812 | if (bh == NULL) | 813 | if (bh == NULL) { |
814 | err = -ENOMEM; | ||
813 | goto out; | 815 | goto out; |
816 | } | ||
814 | } else | 817 | } else |
815 | bh = &bhs; | 818 | bh = &bhs; |
816 | 819 | ||
817 | first_group = page->index * blocks_per_page / 2; | 820 | first_group = page->index * blocks_per_page / 2; |
818 | 821 | ||
819 | /* read all groups the page covers into the cache */ | 822 | /* read all groups the page covers into the cache */ |
820 | for (i = 0; i < groups_per_page; i++) { | 823 | for (i = 0, group = first_group; i < groups_per_page; i++, group++) { |
821 | struct ext4_group_desc *desc; | 824 | if (group >= ngroups) |
822 | |||
823 | if (first_group + i >= ngroups) | ||
824 | break; | 825 | break; |
825 | 826 | ||
826 | grinfo = ext4_get_group_info(sb, first_group + i); | 827 | grinfo = ext4_get_group_info(sb, group); |
827 | /* | 828 | /* |
828 | * If page is uptodate then we came here after online resize | 829 | * If page is uptodate then we came here after online resize |
829 | * which added some new uninitialized group info structs, so | 830 | * which added some new uninitialized group info structs, so |
@@ -834,69 +835,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
834 | bh[i] = NULL; | 835 | bh[i] = NULL; |
835 | continue; | 836 | continue; |
836 | } | 837 | } |
837 | 838 | if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { | |
838 | err = -EIO; | 839 | err = -ENOMEM; |
839 | desc = ext4_get_group_desc(sb, first_group + i, NULL); | ||
840 | if (desc == NULL) | ||
841 | goto out; | ||
842 | |||
843 | err = -ENOMEM; | ||
844 | bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc)); | ||
845 | if (bh[i] == NULL) | ||
846 | goto out; | 840 | goto out; |
847 | |||
848 | if (bitmap_uptodate(bh[i])) | ||
849 | continue; | ||
850 | |||
851 | lock_buffer(bh[i]); | ||
852 | if (bitmap_uptodate(bh[i])) { | ||
853 | unlock_buffer(bh[i]); | ||
854 | continue; | ||
855 | } | ||
856 | ext4_lock_group(sb, first_group + i); | ||
857 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
858 | ext4_init_block_bitmap(sb, bh[i], | ||
859 | first_group + i, desc); | ||
860 | set_bitmap_uptodate(bh[i]); | ||
861 | set_buffer_uptodate(bh[i]); | ||
862 | ext4_unlock_group(sb, first_group + i); | ||
863 | unlock_buffer(bh[i]); | ||
864 | continue; | ||
865 | } | 841 | } |
866 | ext4_unlock_group(sb, first_group + i); | 842 | mb_debug(1, "read bitmap for group %u\n", group); |
867 | if (buffer_uptodate(bh[i])) { | ||
868 | /* | ||
869 | * if not uninit if bh is uptodate, | ||
870 | * bitmap is also uptodate | ||
871 | */ | ||
872 | set_bitmap_uptodate(bh[i]); | ||
873 | unlock_buffer(bh[i]); | ||
874 | continue; | ||
875 | } | ||
876 | get_bh(bh[i]); | ||
877 | /* | ||
878 | * submit the buffer_head for read. We can | ||
879 | * safely mark the bitmap as uptodate now. | ||
880 | * We do it here so the bitmap uptodate bit | ||
881 | * get set with buffer lock held. | ||
882 | */ | ||
883 | set_bitmap_uptodate(bh[i]); | ||
884 | bh[i]->b_end_io = end_buffer_read_sync; | ||
885 | submit_bh(READ, bh[i]); | ||
886 | mb_debug(1, "read bitmap for group %u\n", first_group + i); | ||
887 | } | 843 | } |
888 | 844 | ||
889 | /* wait for I/O completion */ | 845 | /* wait for I/O completion */ |
890 | for (i = 0; i < groups_per_page; i++) | 846 | for (i = 0, group = first_group; i < groups_per_page; i++, group++) { |
891 | if (bh[i]) | 847 | if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) { |
892 | wait_on_buffer(bh[i]); | 848 | err = -EIO; |
893 | |||
894 | err = -EIO; | ||
895 | for (i = 0; i < groups_per_page; i++) | ||
896 | if (bh[i] && !buffer_uptodate(bh[i])) | ||
897 | goto out; | 849 | goto out; |
850 | } | ||
851 | } | ||
898 | 852 | ||
899 | err = 0; | ||
900 | first_block = page->index * blocks_per_page; | 853 | first_block = page->index * blocks_per_page; |
901 | for (i = 0; i < blocks_per_page; i++) { | 854 | for (i = 0; i < blocks_per_page; i++) { |
902 | int group; | 855 | int group; |
@@ -1250,10 +1203,10 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) | |||
1250 | int order = 1; | 1203 | int order = 1; |
1251 | void *bb; | 1204 | void *bb; |
1252 | 1205 | ||
1253 | BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); | 1206 | BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); |
1254 | BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); | 1207 | BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); |
1255 | 1208 | ||
1256 | bb = EXT4_MB_BUDDY(e4b); | 1209 | bb = e4b->bd_buddy; |
1257 | while (order <= e4b->bd_blkbits + 1) { | 1210 | while (order <= e4b->bd_blkbits + 1) { |
1258 | block = block >> 1; | 1211 | block = block >> 1; |
1259 | if (!mb_test_bit(block, bb)) { | 1212 | if (!mb_test_bit(block, bb)) { |
@@ -1323,9 +1276,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1323 | 1276 | ||
1324 | /* let's maintain fragments counter */ | 1277 | /* let's maintain fragments counter */ |
1325 | if (first != 0) | 1278 | if (first != 0) |
1326 | block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); | 1279 | block = !mb_test_bit(first - 1, e4b->bd_bitmap); |
1327 | if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) | 1280 | if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) |
1328 | max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); | 1281 | max = !mb_test_bit(first + count, e4b->bd_bitmap); |
1329 | if (block && max) | 1282 | if (block && max) |
1330 | e4b->bd_info->bb_fragments--; | 1283 | e4b->bd_info->bb_fragments--; |
1331 | else if (!block && !max) | 1284 | else if (!block && !max) |
@@ -1336,7 +1289,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1336 | block = first++; | 1289 | block = first++; |
1337 | order = 0; | 1290 | order = 0; |
1338 | 1291 | ||
1339 | if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { | 1292 | if (!mb_test_bit(block, e4b->bd_bitmap)) { |
1340 | ext4_fsblk_t blocknr; | 1293 | ext4_fsblk_t blocknr; |
1341 | 1294 | ||
1342 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 1295 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
@@ -1347,7 +1300,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1347 | "freeing already freed block " | 1300 | "freeing already freed block " |
1348 | "(bit %u)", block); | 1301 | "(bit %u)", block); |
1349 | } | 1302 | } |
1350 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1303 | mb_clear_bit(block, e4b->bd_bitmap); |
1351 | e4b->bd_info->bb_counters[order]++; | 1304 | e4b->bd_info->bb_counters[order]++; |
1352 | 1305 | ||
1353 | /* start of the buddy */ | 1306 | /* start of the buddy */ |
@@ -1429,7 +1382,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1429 | break; | 1382 | break; |
1430 | 1383 | ||
1431 | next = (block + 1) * (1 << order); | 1384 | next = (block + 1) * (1 << order); |
1432 | if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) | 1385 | if (mb_test_bit(next, e4b->bd_bitmap)) |
1433 | break; | 1386 | break; |
1434 | 1387 | ||
1435 | order = mb_find_order_for_block(e4b, next); | 1388 | order = mb_find_order_for_block(e4b, next); |
@@ -1466,9 +1419,9 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1466 | 1419 | ||
1467 | /* let's maintain fragments counter */ | 1420 | /* let's maintain fragments counter */ |
1468 | if (start != 0) | 1421 | if (start != 0) |
1469 | mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); | 1422 | mlen = !mb_test_bit(start - 1, e4b->bd_bitmap); |
1470 | if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) | 1423 | if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) |
1471 | max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); | 1424 | max = !mb_test_bit(start + len, e4b->bd_bitmap); |
1472 | if (mlen && max) | 1425 | if (mlen && max) |
1473 | e4b->bd_info->bb_fragments++; | 1426 | e4b->bd_info->bb_fragments++; |
1474 | else if (!mlen && !max) | 1427 | else if (!mlen && !max) |
@@ -1511,7 +1464,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1511 | } | 1464 | } |
1512 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); | 1465 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); |
1513 | 1466 | ||
1514 | ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | 1467 | ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); |
1515 | mb_check_buddy(e4b); | 1468 | mb_check_buddy(e4b); |
1516 | 1469 | ||
1517 | return ret; | 1470 | return ret; |
@@ -1810,7 +1763,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1810 | struct ext4_buddy *e4b) | 1763 | struct ext4_buddy *e4b) |
1811 | { | 1764 | { |
1812 | struct super_block *sb = ac->ac_sb; | 1765 | struct super_block *sb = ac->ac_sb; |
1813 | void *bitmap = EXT4_MB_BITMAP(e4b); | 1766 | void *bitmap = e4b->bd_bitmap; |
1814 | struct ext4_free_extent ex; | 1767 | struct ext4_free_extent ex; |
1815 | int i; | 1768 | int i; |
1816 | int free; | 1769 | int free; |
@@ -1870,7 +1823,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1870 | { | 1823 | { |
1871 | struct super_block *sb = ac->ac_sb; | 1824 | struct super_block *sb = ac->ac_sb; |
1872 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1825 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1873 | void *bitmap = EXT4_MB_BITMAP(e4b); | 1826 | void *bitmap = e4b->bd_bitmap; |
1874 | struct ext4_free_extent ex; | 1827 | struct ext4_free_extent ex; |
1875 | ext4_fsblk_t first_group_block; | 1828 | ext4_fsblk_t first_group_block; |
1876 | ext4_fsblk_t a; | 1829 | ext4_fsblk_t a; |
@@ -2224,7 +2177,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2224 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2177 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2225 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2178 | meta_group_info = kmalloc(metalen, GFP_KERNEL); |
2226 | if (meta_group_info == NULL) { | 2179 | if (meta_group_info == NULL) { |
2227 | ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " | 2180 | ext4_msg(sb, KERN_ERR, "can't allocate mem " |
2228 | "for a buddy group"); | 2181 | "for a buddy group"); |
2229 | goto exit_meta_group_info; | 2182 | goto exit_meta_group_info; |
2230 | } | 2183 | } |
@@ -2238,7 +2191,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2238 | 2191 | ||
2239 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); | 2192 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2240 | if (meta_group_info[i] == NULL) { | 2193 | if (meta_group_info[i] == NULL) { |
2241 | ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); | 2194 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); |
2242 | goto exit_group_info; | 2195 | goto exit_group_info; |
2243 | } | 2196 | } |
2244 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | 2197 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); |
@@ -2522,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2522 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2475 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2523 | &ext4_mb_seq_groups_fops, sb); | 2476 | &ext4_mb_seq_groups_fops, sb); |
2524 | 2477 | ||
2525 | if (sbi->s_journal) | ||
2526 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | ||
2527 | |||
2528 | return 0; | 2478 | return 0; |
2529 | 2479 | ||
2530 | out_free_locality_groups: | 2480 | out_free_locality_groups: |
@@ -2637,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb, | |||
2637 | * This function is called by the jbd2 layer once the commit has finished, | 2587 | * This function is called by the jbd2 layer once the commit has finished, |
2638 | * so we know we can free the blocks that were released with that commit. | 2588 | * so we know we can free the blocks that were released with that commit. |
2639 | */ | 2589 | */ |
2640 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | 2590 | static void ext4_free_data_callback(struct super_block *sb, |
2591 | struct ext4_journal_cb_entry *jce, | ||
2592 | int rc) | ||
2641 | { | 2593 | { |
2642 | struct super_block *sb = journal->j_private; | 2594 | struct ext4_free_data *entry = (struct ext4_free_data *)jce; |
2643 | struct ext4_buddy e4b; | 2595 | struct ext4_buddy e4b; |
2644 | struct ext4_group_info *db; | 2596 | struct ext4_group_info *db; |
2645 | int err, count = 0, count2 = 0; | 2597 | int err, count = 0, count2 = 0; |
2646 | struct ext4_free_data *entry; | ||
2647 | struct list_head *l, *ltmp; | ||
2648 | 2598 | ||
2649 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2599 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2650 | entry = list_entry(l, struct ext4_free_data, list); | 2600 | entry->efd_count, entry->efd_group, entry); |
2651 | 2601 | ||
2652 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2602 | if (test_opt(sb, DISCARD)) |
2653 | entry->count, entry->group, entry); | 2603 | ext4_issue_discard(sb, entry->efd_group, |
2604 | entry->efd_start_cluster, entry->efd_count); | ||
2654 | 2605 | ||
2655 | if (test_opt(sb, DISCARD)) | 2606 | err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); |
2656 | ext4_issue_discard(sb, entry->group, | 2607 | /* we expect to find existing buddy because it's pinned */ |
2657 | entry->start_cluster, entry->count); | 2608 | BUG_ON(err != 0); |
2658 | 2609 | ||
2659 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | ||
2660 | /* we expect to find existing buddy because it's pinned */ | ||
2661 | BUG_ON(err != 0); | ||
2662 | 2610 | ||
2663 | db = e4b.bd_info; | 2611 | db = e4b.bd_info; |
2664 | /* there are blocks to put in buddy to make them really free */ | 2612 | /* there are blocks to put in buddy to make them really free */ |
2665 | count += entry->count; | 2613 | count += entry->efd_count; |
2666 | count2++; | 2614 | count2++; |
2667 | ext4_lock_group(sb, entry->group); | 2615 | ext4_lock_group(sb, entry->efd_group); |
2668 | /* Take it out of per group rb tree */ | 2616 | /* Take it out of per group rb tree */ |
2669 | rb_erase(&entry->node, &(db->bb_free_root)); | 2617 | rb_erase(&entry->efd_node, &(db->bb_free_root)); |
2670 | mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); | 2618 | mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count); |
2671 | 2619 | ||
2672 | /* | 2620 | /* |
2673 | * Clear the trimmed flag for the group so that the next | 2621 | * Clear the trimmed flag for the group so that the next |
2674 | * ext4_trim_fs can trim it. | 2622 | * ext4_trim_fs can trim it. |
2675 | * If the volume is mounted with -o discard, online discard | 2623 | * If the volume is mounted with -o discard, online discard |
2676 | * is supported and the free blocks will be trimmed online. | 2624 | * is supported and the free blocks will be trimmed online. |
2677 | */ | 2625 | */ |
2678 | if (!test_opt(sb, DISCARD)) | 2626 | if (!test_opt(sb, DISCARD)) |
2679 | EXT4_MB_GRP_CLEAR_TRIMMED(db); | 2627 | EXT4_MB_GRP_CLEAR_TRIMMED(db); |
2680 | 2628 | ||
2681 | if (!db->bb_free_root.rb_node) { | 2629 | if (!db->bb_free_root.rb_node) { |
2682 | /* No more items in the per group rb tree | 2630 | /* No more items in the per group rb tree |
2683 | * balance refcounts from ext4_mb_free_metadata() | 2631 | * balance refcounts from ext4_mb_free_metadata() |
2684 | */ | 2632 | */ |
2685 | page_cache_release(e4b.bd_buddy_page); | 2633 | page_cache_release(e4b.bd_buddy_page); |
2686 | page_cache_release(e4b.bd_bitmap_page); | 2634 | page_cache_release(e4b.bd_bitmap_page); |
2687 | } | ||
2688 | ext4_unlock_group(sb, entry->group); | ||
2689 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
2690 | ext4_mb_unload_buddy(&e4b); | ||
2691 | } | 2635 | } |
2636 | ext4_unlock_group(sb, entry->efd_group); | ||
2637 | kmem_cache_free(ext4_free_data_cachep, entry); | ||
2638 | ext4_mb_unload_buddy(&e4b); | ||
2692 | 2639 | ||
2693 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); | 2640 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
2694 | } | 2641 | } |
@@ -2741,9 +2688,9 @@ int __init ext4_init_mballoc(void) | |||
2741 | return -ENOMEM; | 2688 | return -ENOMEM; |
2742 | } | 2689 | } |
2743 | 2690 | ||
2744 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, | 2691 | ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, |
2745 | SLAB_RECLAIM_ACCOUNT); | 2692 | SLAB_RECLAIM_ACCOUNT); |
2746 | if (ext4_free_ext_cachep == NULL) { | 2693 | if (ext4_free_data_cachep == NULL) { |
2747 | kmem_cache_destroy(ext4_pspace_cachep); | 2694 | kmem_cache_destroy(ext4_pspace_cachep); |
2748 | kmem_cache_destroy(ext4_ac_cachep); | 2695 | kmem_cache_destroy(ext4_ac_cachep); |
2749 | return -ENOMEM; | 2696 | return -ENOMEM; |
@@ -2761,7 +2708,7 @@ void ext4_exit_mballoc(void) | |||
2761 | rcu_barrier(); | 2708 | rcu_barrier(); |
2762 | kmem_cache_destroy(ext4_pspace_cachep); | 2709 | kmem_cache_destroy(ext4_pspace_cachep); |
2763 | kmem_cache_destroy(ext4_ac_cachep); | 2710 | kmem_cache_destroy(ext4_ac_cachep); |
2764 | kmem_cache_destroy(ext4_free_ext_cachep); | 2711 | kmem_cache_destroy(ext4_free_data_cachep); |
2765 | ext4_groupinfo_destroy_slabs(); | 2712 | ext4_groupinfo_destroy_slabs(); |
2766 | ext4_remove_debugfs_entry(); | 2713 | ext4_remove_debugfs_entry(); |
2767 | } | 2714 | } |
@@ -2815,7 +2762,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2815 | len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); | 2762 | len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); |
2816 | if (!ext4_data_block_valid(sbi, block, len)) { | 2763 | if (!ext4_data_block_valid(sbi, block, len)) { |
2817 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " | 2764 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " |
2818 | "fs metadata\n", block, block+len); | 2765 | "fs metadata", block, block+len); |
2819 | /* File system mounted not to panic on error | 2766 | /* File system mounted not to panic on error |
2820 | * Fix the bitmap and repeat the block allocation | 2767 | * Fix the bitmap and repeat the block allocation |
2821 | * We leak some of the blocks here. | 2768 | * We leak some of the blocks here. |
@@ -2911,7 +2858,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2911 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 2858 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
2912 | int bsbits, max; | 2859 | int bsbits, max; |
2913 | ext4_lblk_t end; | 2860 | ext4_lblk_t end; |
2914 | loff_t size, orig_size, start_off; | 2861 | loff_t size, start_off; |
2862 | loff_t orig_size __maybe_unused; | ||
2915 | ext4_lblk_t start; | 2863 | ext4_lblk_t start; |
2916 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 2864 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
2917 | struct ext4_prealloc_space *pa; | 2865 | struct ext4_prealloc_space *pa; |
@@ -3321,8 +3269,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3321 | n = rb_first(&(grp->bb_free_root)); | 3269 | n = rb_first(&(grp->bb_free_root)); |
3322 | 3270 | ||
3323 | while (n) { | 3271 | while (n) { |
3324 | entry = rb_entry(n, struct ext4_free_data, node); | 3272 | entry = rb_entry(n, struct ext4_free_data, efd_node); |
3325 | ext4_set_bits(bitmap, entry->start_cluster, entry->count); | 3273 | ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); |
3326 | n = rb_next(n); | 3274 | n = rb_next(n); |
3327 | } | 3275 | } |
3328 | return; | 3276 | return; |
@@ -3916,11 +3864,11 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3916 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | 3864 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) |
3917 | return; | 3865 | return; |
3918 | 3866 | ||
3919 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" | 3867 | ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:" |
3920 | " Allocation context details:"); | 3868 | " Allocation context details:"); |
3921 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", | 3869 | ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d", |
3922 | ac->ac_status, ac->ac_flags); | 3870 | ac->ac_status, ac->ac_flags); |
3923 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " | 3871 | ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, " |
3924 | "goal %lu/%lu/%lu@%lu, " | 3872 | "goal %lu/%lu/%lu@%lu, " |
3925 | "best %lu/%lu/%lu@%lu cr %d", | 3873 | "best %lu/%lu/%lu@%lu cr %d", |
3926 | (unsigned long)ac->ac_o_ex.fe_group, | 3874 | (unsigned long)ac->ac_o_ex.fe_group, |
@@ -3936,9 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3936 | (unsigned long)ac->ac_b_ex.fe_len, | 3884 | (unsigned long)ac->ac_b_ex.fe_len, |
3937 | (unsigned long)ac->ac_b_ex.fe_logical, | 3885 | (unsigned long)ac->ac_b_ex.fe_logical, |
3938 | (int)ac->ac_criteria); | 3886 | (int)ac->ac_criteria); |
3939 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", | 3887 | ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", |
3940 | ac->ac_ex_scanned, ac->ac_found); | 3888 | ac->ac_ex_scanned, ac->ac_found); |
3941 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); | 3889 | ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); |
3942 | ngroups = ext4_get_groups_count(sb); | 3890 | ngroups = ext4_get_groups_count(sb); |
3943 | for (i = 0; i < ngroups; i++) { | 3891 | for (i = 0; i < ngroups; i++) { |
3944 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); | 3892 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
@@ -4428,9 +4376,9 @@ out: | |||
4428 | static int can_merge(struct ext4_free_data *entry1, | 4376 | static int can_merge(struct ext4_free_data *entry1, |
4429 | struct ext4_free_data *entry2) | 4377 | struct ext4_free_data *entry2) |
4430 | { | 4378 | { |
4431 | if ((entry1->t_tid == entry2->t_tid) && | 4379 | if ((entry1->efd_tid == entry2->efd_tid) && |
4432 | (entry1->group == entry2->group) && | 4380 | (entry1->efd_group == entry2->efd_group) && |
4433 | ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) | 4381 | ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) |
4434 | return 1; | 4382 | return 1; |
4435 | return 0; | 4383 | return 0; |
4436 | } | 4384 | } |
@@ -4452,8 +4400,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4452 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4400 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4453 | BUG_ON(e4b->bd_buddy_page == NULL); | 4401 | BUG_ON(e4b->bd_buddy_page == NULL); |
4454 | 4402 | ||
4455 | new_node = &new_entry->node; | 4403 | new_node = &new_entry->efd_node; |
4456 | cluster = new_entry->start_cluster; | 4404 | cluster = new_entry->efd_start_cluster; |
4457 | 4405 | ||
4458 | if (!*n) { | 4406 | if (!*n) { |
4459 | /* first free block exent. We need to | 4407 | /* first free block exent. We need to |
@@ -4466,10 +4414,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4466 | } | 4414 | } |
4467 | while (*n) { | 4415 | while (*n) { |
4468 | parent = *n; | 4416 | parent = *n; |
4469 | entry = rb_entry(parent, struct ext4_free_data, node); | 4417 | entry = rb_entry(parent, struct ext4_free_data, efd_node); |
4470 | if (cluster < entry->start_cluster) | 4418 | if (cluster < entry->efd_start_cluster) |
4471 | n = &(*n)->rb_left; | 4419 | n = &(*n)->rb_left; |
4472 | else if (cluster >= (entry->start_cluster + entry->count)) | 4420 | else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) |
4473 | n = &(*n)->rb_right; | 4421 | n = &(*n)->rb_right; |
4474 | else { | 4422 | else { |
4475 | ext4_grp_locked_error(sb, group, 0, | 4423 | ext4_grp_locked_error(sb, group, 0, |
@@ -4486,34 +4434,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4486 | /* Now try to see the extent can be merged to left and right */ | 4434 | /* Now try to see the extent can be merged to left and right */ |
4487 | node = rb_prev(new_node); | 4435 | node = rb_prev(new_node); |
4488 | if (node) { | 4436 | if (node) { |
4489 | entry = rb_entry(node, struct ext4_free_data, node); | 4437 | entry = rb_entry(node, struct ext4_free_data, efd_node); |
4490 | if (can_merge(entry, new_entry)) { | 4438 | if (can_merge(entry, new_entry)) { |
4491 | new_entry->start_cluster = entry->start_cluster; | 4439 | new_entry->efd_start_cluster = entry->efd_start_cluster; |
4492 | new_entry->count += entry->count; | 4440 | new_entry->efd_count += entry->efd_count; |
4493 | rb_erase(node, &(db->bb_free_root)); | 4441 | rb_erase(node, &(db->bb_free_root)); |
4494 | spin_lock(&sbi->s_md_lock); | 4442 | ext4_journal_callback_del(handle, &entry->efd_jce); |
4495 | list_del(&entry->list); | 4443 | kmem_cache_free(ext4_free_data_cachep, entry); |
4496 | spin_unlock(&sbi->s_md_lock); | ||
4497 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4498 | } | 4444 | } |
4499 | } | 4445 | } |
4500 | 4446 | ||
4501 | node = rb_next(new_node); | 4447 | node = rb_next(new_node); |
4502 | if (node) { | 4448 | if (node) { |
4503 | entry = rb_entry(node, struct ext4_free_data, node); | 4449 | entry = rb_entry(node, struct ext4_free_data, efd_node); |
4504 | if (can_merge(new_entry, entry)) { | 4450 | if (can_merge(new_entry, entry)) { |
4505 | new_entry->count += entry->count; | 4451 | new_entry->efd_count += entry->efd_count; |
4506 | rb_erase(node, &(db->bb_free_root)); | 4452 | rb_erase(node, &(db->bb_free_root)); |
4507 | spin_lock(&sbi->s_md_lock); | 4453 | ext4_journal_callback_del(handle, &entry->efd_jce); |
4508 | list_del(&entry->list); | 4454 | kmem_cache_free(ext4_free_data_cachep, entry); |
4509 | spin_unlock(&sbi->s_md_lock); | ||
4510 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4511 | } | 4455 | } |
4512 | } | 4456 | } |
4513 | /* Add the extent to transaction's private list */ | 4457 | /* Add the extent to transaction's private list */ |
4514 | spin_lock(&sbi->s_md_lock); | 4458 | ext4_journal_callback_add(handle, ext4_free_data_callback, |
4515 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | 4459 | &new_entry->efd_jce); |
4516 | spin_unlock(&sbi->s_md_lock); | ||
4517 | return 0; | 4460 | return 0; |
4518 | } | 4461 | } |
4519 | 4462 | ||
@@ -4691,15 +4634,15 @@ do_more: | |||
4691 | * blocks being freed are metadata. these blocks shouldn't | 4634 | * blocks being freed are metadata. these blocks shouldn't |
4692 | * be used until this transaction is committed | 4635 | * be used until this transaction is committed |
4693 | */ | 4636 | */ |
4694 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | 4637 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); |
4695 | if (!new_entry) { | 4638 | if (!new_entry) { |
4696 | err = -ENOMEM; | 4639 | err = -ENOMEM; |
4697 | goto error_return; | 4640 | goto error_return; |
4698 | } | 4641 | } |
4699 | new_entry->start_cluster = bit; | 4642 | new_entry->efd_start_cluster = bit; |
4700 | new_entry->group = block_group; | 4643 | new_entry->efd_group = block_group; |
4701 | new_entry->count = count_clusters; | 4644 | new_entry->efd_count = count_clusters; |
4702 | new_entry->t_tid = handle->h_transaction->t_tid; | 4645 | new_entry->efd_tid = handle->h_transaction->t_tid; |
4703 | 4646 | ||
4704 | ext4_lock_group(sb, block_group); | 4647 | ext4_lock_group(sb, block_group); |
4705 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); | 4648 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
@@ -4971,11 +4914,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4971 | start = (e4b.bd_info->bb_first_free > start) ? | 4914 | start = (e4b.bd_info->bb_first_free > start) ? |
4972 | e4b.bd_info->bb_first_free : start; | 4915 | e4b.bd_info->bb_first_free : start; |
4973 | 4916 | ||
4974 | while (start < max) { | 4917 | while (start <= max) { |
4975 | start = mb_find_next_zero_bit(bitmap, max, start); | 4918 | start = mb_find_next_zero_bit(bitmap, max + 1, start); |
4976 | if (start >= max) | 4919 | if (start > max) |
4977 | break; | 4920 | break; |
4978 | next = mb_find_next_bit(bitmap, max, start); | 4921 | next = mb_find_next_bit(bitmap, max + 1, start); |
4979 | 4922 | ||
4980 | if ((next - start) >= minblocks) { | 4923 | if ((next - start) >= minblocks) { |
4981 | ext4_trim_extent(sb, start, | 4924 | ext4_trim_extent(sb, start, |
@@ -5027,37 +4970,36 @@ out: | |||
5027 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | 4970 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) |
5028 | { | 4971 | { |
5029 | struct ext4_group_info *grp; | 4972 | struct ext4_group_info *grp; |
5030 | ext4_group_t first_group, last_group; | 4973 | ext4_group_t group, first_group, last_group; |
5031 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
5032 | ext4_grpblk_t cnt = 0, first_cluster, last_cluster; | 4974 | ext4_grpblk_t cnt = 0, first_cluster, last_cluster; |
5033 | uint64_t start, len, minlen, trimmed = 0; | 4975 | uint64_t start, end, minlen, trimmed = 0; |
5034 | ext4_fsblk_t first_data_blk = | 4976 | ext4_fsblk_t first_data_blk = |
5035 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 4977 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
4978 | ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); | ||
5036 | int ret = 0; | 4979 | int ret = 0; |
5037 | 4980 | ||
5038 | start = range->start >> sb->s_blocksize_bits; | 4981 | start = range->start >> sb->s_blocksize_bits; |
5039 | len = range->len >> sb->s_blocksize_bits; | 4982 | end = start + (range->len >> sb->s_blocksize_bits) - 1; |
5040 | minlen = range->minlen >> sb->s_blocksize_bits; | 4983 | minlen = range->minlen >> sb->s_blocksize_bits; |
5041 | 4984 | ||
5042 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) | 4985 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || |
4986 | unlikely(start >= max_blks)) | ||
5043 | return -EINVAL; | 4987 | return -EINVAL; |
5044 | if (start + len <= first_data_blk) | 4988 | if (end >= max_blks) |
4989 | end = max_blks - 1; | ||
4990 | if (end <= first_data_blk) | ||
5045 | goto out; | 4991 | goto out; |
5046 | if (start < first_data_blk) { | 4992 | if (start < first_data_blk) |
5047 | len -= first_data_blk - start; | ||
5048 | start = first_data_blk; | 4993 | start = first_data_blk; |
5049 | } | ||
5050 | 4994 | ||
5051 | /* Determine first and last group to examine based on start and len */ | 4995 | /* Determine first and last group to examine based on start and end */ |
5052 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | 4996 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, |
5053 | &first_group, &first_cluster); | 4997 | &first_group, &first_cluster); |
5054 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | 4998 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end, |
5055 | &last_group, &last_cluster); | 4999 | &last_group, &last_cluster); |
5056 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
5057 | last_cluster = EXT4_CLUSTERS_PER_GROUP(sb); | ||
5058 | 5000 | ||
5059 | if (first_group > last_group) | 5001 | /* end now represents the last cluster to discard in this group */ |
5060 | return -EINVAL; | 5002 | end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |
5061 | 5003 | ||
5062 | for (group = first_group; group <= last_group; group++) { | 5004 | for (group = first_group; group <= last_group; group++) { |
5063 | grp = ext4_get_group_info(sb, group); | 5005 | grp = ext4_get_group_info(sb, group); |
@@ -5069,31 +5011,35 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
5069 | } | 5011 | } |
5070 | 5012 | ||
5071 | /* | 5013 | /* |
5072 | * For all the groups except the last one, last block will | 5014 | * For all the groups except the last one, last cluster will |
5073 | * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to | 5015 | * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to |
5074 | * change it for the last group in which case start + | 5016 | * change it for the last group, note that last_cluster is |
5075 | * len < EXT4_BLOCKS_PER_GROUP(sb). | 5017 | * already computed earlier by ext4_get_group_no_and_offset() |
5076 | */ | 5018 | */ |
5077 | if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) | 5019 | if (group == last_group) |
5078 | last_cluster = first_cluster + len; | 5020 | end = last_cluster; |
5079 | len -= last_cluster - first_cluster; | ||
5080 | 5021 | ||
5081 | if (grp->bb_free >= minlen) { | 5022 | if (grp->bb_free >= minlen) { |
5082 | cnt = ext4_trim_all_free(sb, group, first_cluster, | 5023 | cnt = ext4_trim_all_free(sb, group, first_cluster, |
5083 | last_cluster, minlen); | 5024 | end, minlen); |
5084 | if (cnt < 0) { | 5025 | if (cnt < 0) { |
5085 | ret = cnt; | 5026 | ret = cnt; |
5086 | break; | 5027 | break; |
5087 | } | 5028 | } |
5029 | trimmed += cnt; | ||
5088 | } | 5030 | } |
5089 | trimmed += cnt; | 5031 | |
5032 | /* | ||
5033 | * For every group except the first one, we are sure | ||
5034 | * that the first cluster to discard will be cluster #0. | ||
5035 | */ | ||
5090 | first_cluster = 0; | 5036 | first_cluster = 0; |
5091 | } | 5037 | } |
5092 | range->len = trimmed * sb->s_blocksize; | ||
5093 | 5038 | ||
5094 | if (!ret) | 5039 | if (!ret) |
5095 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); | 5040 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); |
5096 | 5041 | ||
5097 | out: | 5042 | out: |
5043 | range->len = trimmed * sb->s_blocksize; | ||
5098 | return ret; | 5044 | return ret; |
5099 | } | 5045 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 47705f3285e3..c070618c21ce 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -96,21 +96,23 @@ extern u8 mb_enable_debug; | |||
96 | 96 | ||
97 | 97 | ||
98 | struct ext4_free_data { | 98 | struct ext4_free_data { |
99 | /* this links the free block information from group_info */ | 99 | /* MUST be the first member */ |
100 | struct rb_node node; | 100 | struct ext4_journal_cb_entry efd_jce; |
101 | |||
102 | /* ext4_free_data private data starts from here */ | ||
101 | 103 | ||
102 | /* this links the free block information from ext4_sb_info */ | 104 | /* this links the free block information from group_info */ |
103 | struct list_head list; | 105 | struct rb_node efd_node; |
104 | 106 | ||
105 | /* group which free block extent belongs */ | 107 | /* group which free block extent belongs */ |
106 | ext4_group_t group; | 108 | ext4_group_t efd_group; |
107 | 109 | ||
108 | /* free block extent */ | 110 | /* free block extent */ |
109 | ext4_grpblk_t start_cluster; | 111 | ext4_grpblk_t efd_start_cluster; |
110 | ext4_grpblk_t count; | 112 | ext4_grpblk_t efd_count; |
111 | 113 | ||
112 | /* transaction which freed this extent */ | 114 | /* transaction which freed this extent */ |
113 | tid_t t_tid; | 115 | tid_t efd_tid; |
114 | }; | 116 | }; |
115 | 117 | ||
116 | struct ext4_prealloc_space { | 118 | struct ext4_prealloc_space { |
@@ -210,8 +212,6 @@ struct ext4_buddy { | |||
210 | __u16 bd_blkbits; | 212 | __u16 bd_blkbits; |
211 | ext4_group_t bd_group; | 213 | ext4_group_t bd_group; |
212 | }; | 214 | }; |
213 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | ||
214 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | ||
215 | 215 | ||
216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | 216 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
217 | struct ext4_free_extent *fex) | 217 | struct ext4_free_extent *fex) |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index e7d6bb0acfa6..f39f80f8f2c5 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -471,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
471 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, | 471 | tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, |
472 | S_IFREG, NULL, goal, owner); | 472 | S_IFREG, NULL, goal, owner); |
473 | if (IS_ERR(tmp_inode)) { | 473 | if (IS_ERR(tmp_inode)) { |
474 | retval = PTR_ERR(inode); | 474 | retval = PTR_ERR(tmp_inode); |
475 | ext4_journal_stop(handle); | 475 | ext4_journal_stop(handle); |
476 | return retval; | 476 | return retval; |
477 | } | 477 | } |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 7ea4ba4eff2a..ed6548d89165 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
@@ -257,8 +257,8 @@ int ext4_multi_mount_protect(struct super_block *sb, | |||
257 | * If check_interval in MMP block is larger, use that instead of | 257 | * If check_interval in MMP block is larger, use that instead of |
258 | * update_interval from the superblock. | 258 | * update_interval from the superblock. |
259 | */ | 259 | */ |
260 | if (mmp->mmp_check_interval > mmp_check_interval) | 260 | if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) |
261 | mmp_check_interval = mmp->mmp_check_interval; | 261 | mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); |
262 | 262 | ||
263 | seq = le32_to_cpu(mmp->mmp_seq); | 263 | seq = le32_to_cpu(mmp->mmp_seq); |
264 | if (seq == EXT4_MMP_SEQ_CLEAN) | 264 | if (seq == EXT4_MMP_SEQ_CLEAN) |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2043f482375d..349d7b3671c8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -468,7 +468,7 @@ fail2: | |||
468 | fail: | 468 | fail: |
469 | if (*err == ERR_BAD_DX_DIR) | 469 | if (*err == ERR_BAD_DX_DIR) |
470 | ext4_warning(dir->i_sb, | 470 | ext4_warning(dir->i_sb, |
471 | "Corrupt dir inode %ld, running e2fsck is " | 471 | "Corrupt dir inode %lu, running e2fsck is " |
472 | "recommended.", dir->i_ino); | 472 | "recommended.", dir->i_ino); |
473 | return NULL; | 473 | return NULL; |
474 | } | 474 | } |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 475851896518..74cd1f7f1f88 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -60,7 +60,6 @@ void ext4_ioend_wait(struct inode *inode) | |||
60 | static void put_io_page(struct ext4_io_page *io_page) | 60 | static void put_io_page(struct ext4_io_page *io_page) |
61 | { | 61 | { |
62 | if (atomic_dec_and_test(&io_page->p_count)) { | 62 | if (atomic_dec_and_test(&io_page->p_count)) { |
63 | end_page_writeback(io_page->p_page); | ||
64 | put_page(io_page->p_page); | 63 | put_page(io_page->p_page); |
65 | kmem_cache_free(io_page_cachep, io_page); | 64 | kmem_cache_free(io_page_cachep, io_page); |
66 | } | 65 | } |
@@ -110,6 +109,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
110 | if (io->iocb) | 109 | if (io->iocb) |
111 | aio_complete(io->iocb, io->result, 0); | 110 | aio_complete(io->iocb, io->result, 0); |
112 | 111 | ||
112 | if (io->flag & EXT4_IO_END_DIRECT) | ||
113 | inode_dio_done(inode); | ||
113 | /* Wake up anyone waiting on unwritten extent conversion */ | 114 | /* Wake up anyone waiting on unwritten extent conversion */ |
114 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) | 115 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) |
115 | wake_up_all(ext4_ioend_wq(io->inode)); | 116 | wake_up_all(ext4_ioend_wq(io->inode)); |
@@ -127,12 +128,18 @@ static void ext4_end_io_work(struct work_struct *work) | |||
127 | unsigned long flags; | 128 | unsigned long flags; |
128 | 129 | ||
129 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 130 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
131 | if (io->flag & EXT4_IO_END_IN_FSYNC) | ||
132 | goto requeue; | ||
130 | if (list_empty(&io->list)) { | 133 | if (list_empty(&io->list)) { |
131 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 134 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
132 | goto free; | 135 | goto free; |
133 | } | 136 | } |
134 | 137 | ||
135 | if (!mutex_trylock(&inode->i_mutex)) { | 138 | if (!mutex_trylock(&inode->i_mutex)) { |
139 | bool was_queued; | ||
140 | requeue: | ||
141 | was_queued = !!(io->flag & EXT4_IO_END_QUEUED); | ||
142 | io->flag |= EXT4_IO_END_QUEUED; | ||
136 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 143 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
137 | /* | 144 | /* |
138 | * Requeue the work instead of waiting so that the work | 145 | * Requeue the work instead of waiting so that the work |
@@ -145,9 +152,8 @@ static void ext4_end_io_work(struct work_struct *work) | |||
145 | * yield the cpu if it sees an end_io request that has already | 152 | * yield the cpu if it sees an end_io request that has already |
146 | * been requeued. | 153 | * been requeued. |
147 | */ | 154 | */ |
148 | if (io->flag & EXT4_IO_END_QUEUED) | 155 | if (was_queued) |
149 | yield(); | 156 | yield(); |
150 | io->flag |= EXT4_IO_END_QUEUED; | ||
151 | return; | 157 | return; |
152 | } | 158 | } |
153 | list_del_init(&io->list); | 159 | list_del_init(&io->list); |
@@ -227,9 +233,9 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
227 | } while (bh != head); | 233 | } while (bh != head); |
228 | } | 234 | } |
229 | 235 | ||
230 | put_io_page(io_end->pages[i]); | 236 | if (atomic_read(&io_end->pages[i]->p_count) == 1) |
237 | end_page_writeback(io_end->pages[i]->p_page); | ||
231 | } | 238 | } |
232 | io_end->num_io_pages = 0; | ||
233 | inode = io_end->inode; | 239 | inode = io_end->inode; |
234 | 240 | ||
235 | if (error) { | 241 | if (error) { |
@@ -421,6 +427,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
421 | * PageWriteback bit from the page to prevent the system from | 427 | * PageWriteback bit from the page to prevent the system from |
422 | * wedging later on. | 428 | * wedging later on. |
423 | */ | 429 | */ |
430 | if (atomic_read(&io_page->p_count) == 1) | ||
431 | end_page_writeback(page); | ||
424 | put_io_page(io_page); | 432 | put_io_page(io_page); |
425 | return ret; | 433 | return ret; |
426 | } | 434 | } |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f9d948f0eb86..59fa0be27251 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -1163,8 +1163,11 @@ static void ext4_update_super(struct super_block *sb, | |||
1163 | do_div(reserved_blocks, 100); | 1163 | do_div(reserved_blocks, 100); |
1164 | 1164 | ||
1165 | ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); | 1165 | ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); |
1166 | ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); | ||
1166 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * | 1167 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * |
1167 | flex_gd->count); | 1168 | flex_gd->count); |
1169 | le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * | ||
1170 | flex_gd->count); | ||
1168 | 1171 | ||
1169 | /* | 1172 | /* |
1170 | * We need to protect s_groups_count against other CPUs seeing | 1173 | * We need to protect s_groups_count against other CPUs seeing |
@@ -1465,6 +1468,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, | |||
1465 | } | 1468 | } |
1466 | 1469 | ||
1467 | ext4_blocks_count_set(es, o_blocks_count + add); | 1470 | ext4_blocks_count_set(es, o_blocks_count + add); |
1471 | ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); | ||
1468 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1472 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1469 | o_blocks_count + add); | 1473 | o_blocks_count + add); |
1470 | /* We add the blocks to the bitmap and set the group need init bit */ | 1474 | /* We add the blocks to the bitmap and set the group need init bit */ |
@@ -1512,16 +1516,17 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1512 | o_blocks_count = ext4_blocks_count(es); | 1516 | o_blocks_count = ext4_blocks_count(es); |
1513 | 1517 | ||
1514 | if (test_opt(sb, DEBUG)) | 1518 | if (test_opt(sb, DEBUG)) |
1515 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", | 1519 | ext4_msg(sb, KERN_DEBUG, |
1516 | o_blocks_count, n_blocks_count); | 1520 | "extending last group from %llu to %llu blocks", |
1521 | o_blocks_count, n_blocks_count); | ||
1517 | 1522 | ||
1518 | if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) | 1523 | if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) |
1519 | return 0; | 1524 | return 0; |
1520 | 1525 | ||
1521 | if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 1526 | if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { |
1522 | printk(KERN_ERR "EXT4-fs: filesystem on %s:" | 1527 | ext4_msg(sb, KERN_ERR, |
1523 | " too large to resize to %llu blocks safely\n", | 1528 | "filesystem too large to resize to %llu blocks safely", |
1524 | sb->s_id, n_blocks_count); | 1529 | n_blocks_count); |
1525 | if (sizeof(sector_t) < 8) | 1530 | if (sizeof(sector_t) < 8) |
1526 | ext4_warning(sb, "CONFIG_LBDAF not enabled"); | 1531 | ext4_warning(sb, "CONFIG_LBDAF not enabled"); |
1527 | return -EINVAL; | 1532 | return -EINVAL; |
@@ -1582,7 +1587,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1582 | ext4_fsblk_t o_blocks_count; | 1587 | ext4_fsblk_t o_blocks_count; |
1583 | ext4_group_t o_group; | 1588 | ext4_group_t o_group; |
1584 | ext4_group_t n_group; | 1589 | ext4_group_t n_group; |
1585 | ext4_grpblk_t offset; | 1590 | ext4_grpblk_t offset, add; |
1586 | unsigned long n_desc_blocks; | 1591 | unsigned long n_desc_blocks; |
1587 | unsigned long o_desc_blocks; | 1592 | unsigned long o_desc_blocks; |
1588 | unsigned long desc_blocks; | 1593 | unsigned long desc_blocks; |
@@ -1591,8 +1596,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1591 | o_blocks_count = ext4_blocks_count(es); | 1596 | o_blocks_count = ext4_blocks_count(es); |
1592 | 1597 | ||
1593 | if (test_opt(sb, DEBUG)) | 1598 | if (test_opt(sb, DEBUG)) |
1594 | printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " | 1599 | ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " |
1595 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); | 1600 | "to %llu blocks", o_blocks_count, n_blocks_count); |
1596 | 1601 | ||
1597 | if (n_blocks_count < o_blocks_count) { | 1602 | if (n_blocks_count < o_blocks_count) { |
1598 | /* On-line shrinking not supported */ | 1603 | /* On-line shrinking not supported */ |
@@ -1605,7 +1610,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1605 | return 0; | 1610 | return 0; |
1606 | 1611 | ||
1607 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); | 1612 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); |
1608 | ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); | 1613 | ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); |
1609 | 1614 | ||
1610 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / | 1615 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / |
1611 | EXT4_DESC_PER_BLOCK(sb); | 1616 | EXT4_DESC_PER_BLOCK(sb); |
@@ -1634,10 +1639,12 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1634 | } | 1639 | } |
1635 | brelse(bh); | 1640 | brelse(bh); |
1636 | 1641 | ||
1637 | if (offset != 0) { | 1642 | /* extend the last group */ |
1638 | /* extend the last group */ | 1643 | if (n_group == o_group) |
1639 | ext4_grpblk_t add; | 1644 | add = n_blocks_count - o_blocks_count; |
1640 | add = EXT4_BLOCKS_PER_GROUP(sb) - offset; | 1645 | else |
1646 | add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); | ||
1647 | if (add > 0) { | ||
1641 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); | 1648 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); |
1642 | if (err) | 1649 | if (err) |
1643 | goto out; | 1650 | goto out; |
@@ -1674,7 +1681,7 @@ out: | |||
1674 | 1681 | ||
1675 | iput(resize_inode); | 1682 | iput(resize_inode); |
1676 | if (test_opt(sb, DEBUG)) | 1683 | if (test_opt(sb, DEBUG)) |
1677 | printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " | 1684 | ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " |
1678 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); | 1685 | "upto %llu blocks", o_blocks_count, n_blocks_count); |
1679 | return err; | 1686 | return err; |
1680 | } | 1687 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 933900909ed0..ceebaf853beb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -62,6 +62,7 @@ static struct ext4_features *ext4_feat; | |||
62 | 62 | ||
63 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 63 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
64 | unsigned long journal_devnum); | 64 | unsigned long journal_devnum); |
65 | static int ext4_show_options(struct seq_file *seq, struct dentry *root); | ||
65 | static int ext4_commit_super(struct super_block *sb, int sync); | 66 | static int ext4_commit_super(struct super_block *sb, int sync); |
66 | static void ext4_mark_recovery_complete(struct super_block *sb, | 67 | static void ext4_mark_recovery_complete(struct super_block *sb, |
67 | struct ext4_super_block *es); | 68 | struct ext4_super_block *es); |
@@ -375,7 +376,7 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line, | |||
375 | if (is_handle_aborted(handle)) | 376 | if (is_handle_aborted(handle)) |
376 | return; | 377 | return; |
377 | 378 | ||
378 | printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", | 379 | printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n", |
379 | caller, line, errstr, err_fn); | 380 | caller, line, errstr, err_fn); |
380 | 381 | ||
381 | jbd2_journal_abort_handle(handle); | 382 | jbd2_journal_abort_handle(handle); |
@@ -431,6 +432,22 @@ static int block_device_ejected(struct super_block *sb) | |||
431 | return bdi->dev == NULL; | 432 | return bdi->dev == NULL; |
432 | } | 433 | } |
433 | 434 | ||
435 | static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) | ||
436 | { | ||
437 | struct super_block *sb = journal->j_private; | ||
438 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
439 | int error = is_journal_aborted(journal); | ||
440 | struct ext4_journal_cb_entry *jce, *tmp; | ||
441 | |||
442 | spin_lock(&sbi->s_md_lock); | ||
443 | list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { | ||
444 | list_del_init(&jce->jce_list); | ||
445 | spin_unlock(&sbi->s_md_lock); | ||
446 | jce->jce_func(sb, jce, error); | ||
447 | spin_lock(&sbi->s_md_lock); | ||
448 | } | ||
449 | spin_unlock(&sbi->s_md_lock); | ||
450 | } | ||
434 | 451 | ||
435 | /* Deal with the reporting of failure conditions on a filesystem such as | 452 | /* Deal with the reporting of failure conditions on a filesystem such as |
436 | * inconsistencies detected or read IO failures. | 453 | * inconsistencies detected or read IO failures. |
@@ -498,11 +515,16 @@ void ext4_error_inode(struct inode *inode, const char *function, | |||
498 | va_start(args, fmt); | 515 | va_start(args, fmt); |
499 | vaf.fmt = fmt; | 516 | vaf.fmt = fmt; |
500 | vaf.va = &args; | 517 | vaf.va = &args; |
501 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", | ||
502 | inode->i_sb->s_id, function, line, inode->i_ino); | ||
503 | if (block) | 518 | if (block) |
504 | printk(KERN_CONT "block %llu: ", block); | 519 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " |
505 | printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); | 520 | "inode #%lu: block %llu: comm %s: %pV\n", |
521 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
522 | block, current->comm, &vaf); | ||
523 | else | ||
524 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " | ||
525 | "inode #%lu: comm %s: %pV\n", | ||
526 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
527 | current->comm, &vaf); | ||
506 | va_end(args); | 528 | va_end(args); |
507 | 529 | ||
508 | ext4_handle_error(inode->i_sb); | 530 | ext4_handle_error(inode->i_sb); |
@@ -524,15 +546,21 @@ void ext4_error_file(struct file *file, const char *function, | |||
524 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); | 546 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); |
525 | if (IS_ERR(path)) | 547 | if (IS_ERR(path)) |
526 | path = "(unknown)"; | 548 | path = "(unknown)"; |
527 | printk(KERN_CRIT | ||
528 | "EXT4-fs error (device %s): %s:%d: inode #%lu: ", | ||
529 | inode->i_sb->s_id, function, line, inode->i_ino); | ||
530 | if (block) | ||
531 | printk(KERN_CONT "block %llu: ", block); | ||
532 | va_start(args, fmt); | 549 | va_start(args, fmt); |
533 | vaf.fmt = fmt; | 550 | vaf.fmt = fmt; |
534 | vaf.va = &args; | 551 | vaf.va = &args; |
535 | printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); | 552 | if (block) |
553 | printk(KERN_CRIT | ||
554 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " | ||
555 | "block %llu: comm %s: path %s: %pV\n", | ||
556 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
557 | block, current->comm, path, &vaf); | ||
558 | else | ||
559 | printk(KERN_CRIT | ||
560 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " | ||
561 | "comm %s: path %s: %pV\n", | ||
562 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
563 | current->comm, path, &vaf); | ||
536 | va_end(args); | 564 | va_end(args); |
537 | 565 | ||
538 | ext4_handle_error(inode->i_sb); | 566 | ext4_handle_error(inode->i_sb); |
@@ -808,9 +836,6 @@ static void ext4_put_super(struct super_block *sb) | |||
808 | destroy_workqueue(sbi->dio_unwritten_wq); | 836 | destroy_workqueue(sbi->dio_unwritten_wq); |
809 | 837 | ||
810 | lock_super(sb); | 838 | lock_super(sb); |
811 | if (sb->s_dirt) | ||
812 | ext4_commit_super(sb, 1); | ||
813 | |||
814 | if (sbi->s_journal) { | 839 | if (sbi->s_journal) { |
815 | err = jbd2_journal_destroy(sbi->s_journal); | 840 | err = jbd2_journal_destroy(sbi->s_journal); |
816 | sbi->s_journal = NULL; | 841 | sbi->s_journal = NULL; |
@@ -827,9 +852,12 @@ static void ext4_put_super(struct super_block *sb) | |||
827 | if (!(sb->s_flags & MS_RDONLY)) { | 852 | if (!(sb->s_flags & MS_RDONLY)) { |
828 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 853 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
829 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 854 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
830 | ext4_commit_super(sb, 1); | ||
831 | } | 855 | } |
856 | if (sb->s_dirt || !(sb->s_flags & MS_RDONLY)) | ||
857 | ext4_commit_super(sb, 1); | ||
858 | |||
832 | if (sbi->s_proc) { | 859 | if (sbi->s_proc) { |
860 | remove_proc_entry("options", sbi->s_proc); | ||
833 | remove_proc_entry(sb->s_id, ext4_proc_root); | 861 | remove_proc_entry(sb->s_id, ext4_proc_root); |
834 | } | 862 | } |
835 | kobject_del(&sbi->s_kobj); | 863 | kobject_del(&sbi->s_kobj); |
@@ -990,180 +1018,6 @@ void ext4_clear_inode(struct inode *inode) | |||
990 | } | 1018 | } |
991 | } | 1019 | } |
992 | 1020 | ||
993 | static inline void ext4_show_quota_options(struct seq_file *seq, | ||
994 | struct super_block *sb) | ||
995 | { | ||
996 | #if defined(CONFIG_QUOTA) | ||
997 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
998 | |||
999 | if (sbi->s_jquota_fmt) { | ||
1000 | char *fmtname = ""; | ||
1001 | |||
1002 | switch (sbi->s_jquota_fmt) { | ||
1003 | case QFMT_VFS_OLD: | ||
1004 | fmtname = "vfsold"; | ||
1005 | break; | ||
1006 | case QFMT_VFS_V0: | ||
1007 | fmtname = "vfsv0"; | ||
1008 | break; | ||
1009 | case QFMT_VFS_V1: | ||
1010 | fmtname = "vfsv1"; | ||
1011 | break; | ||
1012 | } | ||
1013 | seq_printf(seq, ",jqfmt=%s", fmtname); | ||
1014 | } | ||
1015 | |||
1016 | if (sbi->s_qf_names[USRQUOTA]) | ||
1017 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | ||
1018 | |||
1019 | if (sbi->s_qf_names[GRPQUOTA]) | ||
1020 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | ||
1021 | |||
1022 | if (test_opt(sb, USRQUOTA)) | ||
1023 | seq_puts(seq, ",usrquota"); | ||
1024 | |||
1025 | if (test_opt(sb, GRPQUOTA)) | ||
1026 | seq_puts(seq, ",grpquota"); | ||
1027 | #endif | ||
1028 | } | ||
1029 | |||
1030 | /* | ||
1031 | * Show an option if | ||
1032 | * - it's set to a non-default value OR | ||
1033 | * - if the per-sb default is different from the global default | ||
1034 | */ | ||
1035 | static int ext4_show_options(struct seq_file *seq, struct dentry *root) | ||
1036 | { | ||
1037 | int def_errors; | ||
1038 | unsigned long def_mount_opts; | ||
1039 | struct super_block *sb = root->d_sb; | ||
1040 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1041 | struct ext4_super_block *es = sbi->s_es; | ||
1042 | |||
1043 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | ||
1044 | def_errors = le16_to_cpu(es->s_errors); | ||
1045 | |||
1046 | if (sbi->s_sb_block != 1) | ||
1047 | seq_printf(seq, ",sb=%llu", sbi->s_sb_block); | ||
1048 | if (test_opt(sb, MINIX_DF)) | ||
1049 | seq_puts(seq, ",minixdf"); | ||
1050 | if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) | ||
1051 | seq_puts(seq, ",grpid"); | ||
1052 | if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) | ||
1053 | seq_puts(seq, ",nogrpid"); | ||
1054 | if (sbi->s_resuid != EXT4_DEF_RESUID || | ||
1055 | le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { | ||
1056 | seq_printf(seq, ",resuid=%u", sbi->s_resuid); | ||
1057 | } | ||
1058 | if (sbi->s_resgid != EXT4_DEF_RESGID || | ||
1059 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { | ||
1060 | seq_printf(seq, ",resgid=%u", sbi->s_resgid); | ||
1061 | } | ||
1062 | if (test_opt(sb, ERRORS_RO)) { | ||
1063 | if (def_errors == EXT4_ERRORS_PANIC || | ||
1064 | def_errors == EXT4_ERRORS_CONTINUE) { | ||
1065 | seq_puts(seq, ",errors=remount-ro"); | ||
1066 | } | ||
1067 | } | ||
1068 | if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) | ||
1069 | seq_puts(seq, ",errors=continue"); | ||
1070 | if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) | ||
1071 | seq_puts(seq, ",errors=panic"); | ||
1072 | if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) | ||
1073 | seq_puts(seq, ",nouid32"); | ||
1074 | if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) | ||
1075 | seq_puts(seq, ",debug"); | ||
1076 | #ifdef CONFIG_EXT4_FS_XATTR | ||
1077 | if (test_opt(sb, XATTR_USER)) | ||
1078 | seq_puts(seq, ",user_xattr"); | ||
1079 | if (!test_opt(sb, XATTR_USER)) | ||
1080 | seq_puts(seq, ",nouser_xattr"); | ||
1081 | #endif | ||
1082 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | ||
1083 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | ||
1084 | seq_puts(seq, ",acl"); | ||
1085 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | ||
1086 | seq_puts(seq, ",noacl"); | ||
1087 | #endif | ||
1088 | if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { | ||
1089 | seq_printf(seq, ",commit=%u", | ||
1090 | (unsigned) (sbi->s_commit_interval / HZ)); | ||
1091 | } | ||
1092 | if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { | ||
1093 | seq_printf(seq, ",min_batch_time=%u", | ||
1094 | (unsigned) sbi->s_min_batch_time); | ||
1095 | } | ||
1096 | if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { | ||
1097 | seq_printf(seq, ",max_batch_time=%u", | ||
1098 | (unsigned) sbi->s_max_batch_time); | ||
1099 | } | ||
1100 | |||
1101 | /* | ||
1102 | * We're changing the default of barrier mount option, so | ||
1103 | * let's always display its mount state so it's clear what its | ||
1104 | * status is. | ||
1105 | */ | ||
1106 | seq_puts(seq, ",barrier="); | ||
1107 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); | ||
1108 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) | ||
1109 | seq_puts(seq, ",journal_async_commit"); | ||
1110 | else if (test_opt(sb, JOURNAL_CHECKSUM)) | ||
1111 | seq_puts(seq, ",journal_checksum"); | ||
1112 | if (test_opt(sb, I_VERSION)) | ||
1113 | seq_puts(seq, ",i_version"); | ||
1114 | if (!test_opt(sb, DELALLOC) && | ||
1115 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | ||
1116 | seq_puts(seq, ",nodelalloc"); | ||
1117 | |||
1118 | if (!test_opt(sb, MBLK_IO_SUBMIT)) | ||
1119 | seq_puts(seq, ",nomblk_io_submit"); | ||
1120 | if (sbi->s_stripe) | ||
1121 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | ||
1122 | /* | ||
1123 | * journal mode get enabled in different ways | ||
1124 | * So just print the value even if we didn't specify it | ||
1125 | */ | ||
1126 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | ||
1127 | seq_puts(seq, ",data=journal"); | ||
1128 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
1129 | seq_puts(seq, ",data=ordered"); | ||
1130 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
1131 | seq_puts(seq, ",data=writeback"); | ||
1132 | |||
1133 | if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
1134 | seq_printf(seq, ",inode_readahead_blks=%u", | ||
1135 | sbi->s_inode_readahead_blks); | ||
1136 | |||
1137 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
1138 | seq_puts(seq, ",data_err=abort"); | ||
1139 | |||
1140 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) | ||
1141 | seq_puts(seq, ",noauto_da_alloc"); | ||
1142 | |||
1143 | if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) | ||
1144 | seq_puts(seq, ",discard"); | ||
1145 | |||
1146 | if (test_opt(sb, NOLOAD)) | ||
1147 | seq_puts(seq, ",norecovery"); | ||
1148 | |||
1149 | if (test_opt(sb, DIOREAD_NOLOCK)) | ||
1150 | seq_puts(seq, ",dioread_nolock"); | ||
1151 | |||
1152 | if (test_opt(sb, BLOCK_VALIDITY) && | ||
1153 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) | ||
1154 | seq_puts(seq, ",block_validity"); | ||
1155 | |||
1156 | if (!test_opt(sb, INIT_INODE_TABLE)) | ||
1157 | seq_puts(seq, ",noinit_itable"); | ||
1158 | else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) | ||
1159 | seq_printf(seq, ",init_itable=%u", | ||
1160 | (unsigned) sbi->s_li_wait_mult); | ||
1161 | |||
1162 | ext4_show_quota_options(seq, sb); | ||
1163 | |||
1164 | return 0; | ||
1165 | } | ||
1166 | |||
1167 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, | 1021 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, |
1168 | u64 ino, u32 generation) | 1022 | u64 ino, u32 generation) |
1169 | { | 1023 | { |
@@ -1316,18 +1170,17 @@ static const struct export_operations ext4_export_ops = { | |||
1316 | enum { | 1170 | enum { |
1317 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 1171 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
1318 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 1172 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, |
1319 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, | 1173 | Opt_nouid32, Opt_debug, Opt_removed, |
1320 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 1174 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
1321 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, | 1175 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, |
1322 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, | 1176 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, |
1323 | Opt_journal_update, Opt_journal_dev, | 1177 | Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, |
1324 | Opt_journal_checksum, Opt_journal_async_commit, | ||
1325 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1178 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1326 | Opt_data_err_abort, Opt_data_err_ignore, | 1179 | Opt_data_err_abort, Opt_data_err_ignore, |
1327 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1180 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1328 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 1181 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1329 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, | 1182 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, |
1330 | Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, | 1183 | Opt_usrquota, Opt_grpquota, Opt_i_version, |
1331 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, | 1184 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, |
1332 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, | 1185 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, |
1333 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1186 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
@@ -1350,20 +1203,19 @@ static const match_table_t tokens = { | |||
1350 | {Opt_err_ro, "errors=remount-ro"}, | 1203 | {Opt_err_ro, "errors=remount-ro"}, |
1351 | {Opt_nouid32, "nouid32"}, | 1204 | {Opt_nouid32, "nouid32"}, |
1352 | {Opt_debug, "debug"}, | 1205 | {Opt_debug, "debug"}, |
1353 | {Opt_oldalloc, "oldalloc"}, | 1206 | {Opt_removed, "oldalloc"}, |
1354 | {Opt_orlov, "orlov"}, | 1207 | {Opt_removed, "orlov"}, |
1355 | {Opt_user_xattr, "user_xattr"}, | 1208 | {Opt_user_xattr, "user_xattr"}, |
1356 | {Opt_nouser_xattr, "nouser_xattr"}, | 1209 | {Opt_nouser_xattr, "nouser_xattr"}, |
1357 | {Opt_acl, "acl"}, | 1210 | {Opt_acl, "acl"}, |
1358 | {Opt_noacl, "noacl"}, | 1211 | {Opt_noacl, "noacl"}, |
1359 | {Opt_noload, "noload"}, | ||
1360 | {Opt_noload, "norecovery"}, | 1212 | {Opt_noload, "norecovery"}, |
1361 | {Opt_nobh, "nobh"}, | 1213 | {Opt_noload, "noload"}, |
1362 | {Opt_bh, "bh"}, | 1214 | {Opt_removed, "nobh"}, |
1215 | {Opt_removed, "bh"}, | ||
1363 | {Opt_commit, "commit=%u"}, | 1216 | {Opt_commit, "commit=%u"}, |
1364 | {Opt_min_batch_time, "min_batch_time=%u"}, | 1217 | {Opt_min_batch_time, "min_batch_time=%u"}, |
1365 | {Opt_max_batch_time, "max_batch_time=%u"}, | 1218 | {Opt_max_batch_time, "max_batch_time=%u"}, |
1366 | {Opt_journal_update, "journal=update"}, | ||
1367 | {Opt_journal_dev, "journal_dev=%u"}, | 1219 | {Opt_journal_dev, "journal_dev=%u"}, |
1368 | {Opt_journal_checksum, "journal_checksum"}, | 1220 | {Opt_journal_checksum, "journal_checksum"}, |
1369 | {Opt_journal_async_commit, "journal_async_commit"}, | 1221 | {Opt_journal_async_commit, "journal_async_commit"}, |
@@ -1389,7 +1241,6 @@ static const match_table_t tokens = { | |||
1389 | {Opt_nobarrier, "nobarrier"}, | 1241 | {Opt_nobarrier, "nobarrier"}, |
1390 | {Opt_i_version, "i_version"}, | 1242 | {Opt_i_version, "i_version"}, |
1391 | {Opt_stripe, "stripe=%u"}, | 1243 | {Opt_stripe, "stripe=%u"}, |
1392 | {Opt_resize, "resize"}, | ||
1393 | {Opt_delalloc, "delalloc"}, | 1244 | {Opt_delalloc, "delalloc"}, |
1394 | {Opt_nodelalloc, "nodelalloc"}, | 1245 | {Opt_nodelalloc, "nodelalloc"}, |
1395 | {Opt_mblk_io_submit, "mblk_io_submit"}, | 1246 | {Opt_mblk_io_submit, "mblk_io_submit"}, |
@@ -1408,6 +1259,11 @@ static const match_table_t tokens = { | |||
1408 | {Opt_init_itable, "init_itable=%u"}, | 1259 | {Opt_init_itable, "init_itable=%u"}, |
1409 | {Opt_init_itable, "init_itable"}, | 1260 | {Opt_init_itable, "init_itable"}, |
1410 | {Opt_noinit_itable, "noinit_itable"}, | 1261 | {Opt_noinit_itable, "noinit_itable"}, |
1262 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ | ||
1263 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ | ||
1264 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ | ||
1265 | {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ | ||
1266 | {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ | ||
1411 | {Opt_err, NULL}, | 1267 | {Opt_err, NULL}, |
1412 | }; | 1268 | }; |
1413 | 1269 | ||
@@ -1496,420 +1352,273 @@ static int clear_qf_name(struct super_block *sb, int qtype) | |||
1496 | } | 1352 | } |
1497 | #endif | 1353 | #endif |
1498 | 1354 | ||
1499 | static int parse_options(char *options, struct super_block *sb, | 1355 | #define MOPT_SET 0x0001 |
1500 | unsigned long *journal_devnum, | 1356 | #define MOPT_CLEAR 0x0002 |
1501 | unsigned int *journal_ioprio, | 1357 | #define MOPT_NOSUPPORT 0x0004 |
1502 | ext4_fsblk_t *n_blocks_count, int is_remount) | 1358 | #define MOPT_EXPLICIT 0x0008 |
1503 | { | 1359 | #define MOPT_CLEAR_ERR 0x0010 |
1504 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1360 | #define MOPT_GTE0 0x0020 |
1505 | char *p; | ||
1506 | substring_t args[MAX_OPT_ARGS]; | ||
1507 | int data_opt = 0; | ||
1508 | int option; | ||
1509 | #ifdef CONFIG_QUOTA | 1361 | #ifdef CONFIG_QUOTA |
1510 | int qfmt; | 1362 | #define MOPT_Q 0 |
1363 | #define MOPT_QFMT 0x0040 | ||
1364 | #else | ||
1365 | #define MOPT_Q MOPT_NOSUPPORT | ||
1366 | #define MOPT_QFMT MOPT_NOSUPPORT | ||
1511 | #endif | 1367 | #endif |
1512 | 1368 | #define MOPT_DATAJ 0x0080 | |
1513 | if (!options) | 1369 | |
1514 | return 1; | 1370 | static const struct mount_opts { |
1515 | 1371 | int token; | |
1516 | while ((p = strsep(&options, ",")) != NULL) { | 1372 | int mount_opt; |
1517 | int token; | 1373 | int flags; |
1518 | if (!*p) | 1374 | } ext4_mount_opts[] = { |
1519 | continue; | 1375 | {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, |
1520 | 1376 | {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, | |
1521 | /* | 1377 | {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, |
1522 | * Initialize args struct so we know whether arg was | 1378 | {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, |
1523 | * found; some options take optional arguments. | 1379 | {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET}, |
1524 | */ | 1380 | {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR}, |
1525 | args[0].to = args[0].from = NULL; | 1381 | {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, |
1526 | token = match_token(p, tokens, args); | 1382 | {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, |
1527 | switch (token) { | 1383 | {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET}, |
1528 | case Opt_bsd_df: | 1384 | {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR}, |
1529 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1385 | {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, |
1530 | clear_opt(sb, MINIX_DF); | 1386 | {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, |
1531 | break; | 1387 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT}, |
1532 | case Opt_minix_df: | 1388 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT}, |
1533 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1389 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET}, |
1534 | set_opt(sb, MINIX_DF); | 1390 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | |
1535 | 1391 | EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET}, | |
1536 | break; | 1392 | {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET}, |
1537 | case Opt_grpid: | 1393 | {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, |
1538 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1394 | {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, |
1539 | set_opt(sb, GRPID); | 1395 | {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, |
1540 | 1396 | {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET}, | |
1541 | break; | 1397 | {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR}, |
1542 | case Opt_nogrpid: | 1398 | {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, |
1543 | ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); | 1399 | {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, |
1544 | clear_opt(sb, GRPID); | 1400 | {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, |
1545 | 1401 | {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, | |
1546 | break; | 1402 | {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, |
1547 | case Opt_resuid: | 1403 | {Opt_commit, 0, MOPT_GTE0}, |
1548 | if (match_int(&args[0], &option)) | 1404 | {Opt_max_batch_time, 0, MOPT_GTE0}, |
1549 | return 0; | 1405 | {Opt_min_batch_time, 0, MOPT_GTE0}, |
1550 | sbi->s_resuid = option; | 1406 | {Opt_inode_readahead_blks, 0, MOPT_GTE0}, |
1551 | break; | 1407 | {Opt_init_itable, 0, MOPT_GTE0}, |
1552 | case Opt_resgid: | 1408 | {Opt_stripe, 0, MOPT_GTE0}, |
1553 | if (match_int(&args[0], &option)) | 1409 | {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, |
1554 | return 0; | 1410 | {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, |
1555 | sbi->s_resgid = option; | 1411 | {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, |
1556 | break; | ||
1557 | case Opt_sb: | ||
1558 | /* handled by get_sb_block() instead of here */ | ||
1559 | /* *sb_block = match_int(&args[0]); */ | ||
1560 | break; | ||
1561 | case Opt_err_panic: | ||
1562 | clear_opt(sb, ERRORS_CONT); | ||
1563 | clear_opt(sb, ERRORS_RO); | ||
1564 | set_opt(sb, ERRORS_PANIC); | ||
1565 | break; | ||
1566 | case Opt_err_ro: | ||
1567 | clear_opt(sb, ERRORS_CONT); | ||
1568 | clear_opt(sb, ERRORS_PANIC); | ||
1569 | set_opt(sb, ERRORS_RO); | ||
1570 | break; | ||
1571 | case Opt_err_cont: | ||
1572 | clear_opt(sb, ERRORS_RO); | ||
1573 | clear_opt(sb, ERRORS_PANIC); | ||
1574 | set_opt(sb, ERRORS_CONT); | ||
1575 | break; | ||
1576 | case Opt_nouid32: | ||
1577 | set_opt(sb, NO_UID32); | ||
1578 | break; | ||
1579 | case Opt_debug: | ||
1580 | set_opt(sb, DEBUG); | ||
1581 | break; | ||
1582 | case Opt_oldalloc: | ||
1583 | ext4_msg(sb, KERN_WARNING, | ||
1584 | "Ignoring deprecated oldalloc option"); | ||
1585 | break; | ||
1586 | case Opt_orlov: | ||
1587 | ext4_msg(sb, KERN_WARNING, | ||
1588 | "Ignoring deprecated orlov option"); | ||
1589 | break; | ||
1590 | #ifdef CONFIG_EXT4_FS_XATTR | 1412 | #ifdef CONFIG_EXT4_FS_XATTR |
1591 | case Opt_user_xattr: | 1413 | {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, |
1592 | set_opt(sb, XATTR_USER); | 1414 | {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, |
1593 | break; | ||
1594 | case Opt_nouser_xattr: | ||
1595 | clear_opt(sb, XATTR_USER); | ||
1596 | break; | ||
1597 | #else | 1415 | #else |
1598 | case Opt_user_xattr: | 1416 | {Opt_user_xattr, 0, MOPT_NOSUPPORT}, |
1599 | case Opt_nouser_xattr: | 1417 | {Opt_nouser_xattr, 0, MOPT_NOSUPPORT}, |
1600 | ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); | ||
1601 | break; | ||
1602 | #endif | 1418 | #endif |
1603 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1419 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1604 | case Opt_acl: | 1420 | {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, |
1605 | set_opt(sb, POSIX_ACL); | 1421 | {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, |
1606 | break; | ||
1607 | case Opt_noacl: | ||
1608 | clear_opt(sb, POSIX_ACL); | ||
1609 | break; | ||
1610 | #else | 1422 | #else |
1611 | case Opt_acl: | 1423 | {Opt_acl, 0, MOPT_NOSUPPORT}, |
1612 | case Opt_noacl: | 1424 | {Opt_noacl, 0, MOPT_NOSUPPORT}, |
1613 | ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); | ||
1614 | break; | ||
1615 | #endif | 1425 | #endif |
1616 | case Opt_journal_update: | 1426 | {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, |
1617 | /* @@@ FIXME */ | 1427 | {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, |
1618 | /* Eventually we will want to be able to create | 1428 | {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, |
1619 | a journal file here. For now, only allow the | 1429 | {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, |
1620 | user to specify an existing inode to be the | 1430 | MOPT_SET | MOPT_Q}, |
1621 | journal file. */ | 1431 | {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, |
1622 | if (is_remount) { | 1432 | MOPT_SET | MOPT_Q}, |
1623 | ext4_msg(sb, KERN_ERR, | 1433 | {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | |
1624 | "Cannot specify journal on remount"); | 1434 | EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, |
1625 | return 0; | 1435 | {Opt_usrjquota, 0, MOPT_Q}, |
1626 | } | 1436 | {Opt_grpjquota, 0, MOPT_Q}, |
1627 | set_opt(sb, UPDATE_JOURNAL); | 1437 | {Opt_offusrjquota, 0, MOPT_Q}, |
1628 | break; | 1438 | {Opt_offgrpjquota, 0, MOPT_Q}, |
1629 | case Opt_journal_dev: | 1439 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, |
1630 | if (is_remount) { | 1440 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, |
1441 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, | ||
1442 | {Opt_err, 0, 0} | ||
1443 | }; | ||
1444 | |||
1445 | static int handle_mount_opt(struct super_block *sb, char *opt, int token, | ||
1446 | substring_t *args, unsigned long *journal_devnum, | ||
1447 | unsigned int *journal_ioprio, int is_remount) | ||
1448 | { | ||
1449 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1450 | const struct mount_opts *m; | ||
1451 | int arg = 0; | ||
1452 | |||
1453 | if (args->from && match_int(args, &arg)) | ||
1454 | return -1; | ||
1455 | switch (token) { | ||
1456 | case Opt_noacl: | ||
1457 | case Opt_nouser_xattr: | ||
1458 | ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); | ||
1459 | break; | ||
1460 | case Opt_sb: | ||
1461 | return 1; /* handled by get_sb_block() */ | ||
1462 | case Opt_removed: | ||
1463 | ext4_msg(sb, KERN_WARNING, | ||
1464 | "Ignoring removed %s option", opt); | ||
1465 | return 1; | ||
1466 | case Opt_resuid: | ||
1467 | sbi->s_resuid = arg; | ||
1468 | return 1; | ||
1469 | case Opt_resgid: | ||
1470 | sbi->s_resgid = arg; | ||
1471 | return 1; | ||
1472 | case Opt_abort: | ||
1473 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; | ||
1474 | return 1; | ||
1475 | case Opt_i_version: | ||
1476 | sb->s_flags |= MS_I_VERSION; | ||
1477 | return 1; | ||
1478 | case Opt_journal_dev: | ||
1479 | if (is_remount) { | ||
1480 | ext4_msg(sb, KERN_ERR, | ||
1481 | "Cannot specify journal on remount"); | ||
1482 | return -1; | ||
1483 | } | ||
1484 | *journal_devnum = arg; | ||
1485 | return 1; | ||
1486 | case Opt_journal_ioprio: | ||
1487 | if (arg < 0 || arg > 7) | ||
1488 | return -1; | ||
1489 | *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); | ||
1490 | return 1; | ||
1491 | } | ||
1492 | |||
1493 | for (m = ext4_mount_opts; m->token != Opt_err; m++) { | ||
1494 | if (token != m->token) | ||
1495 | continue; | ||
1496 | if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) | ||
1497 | return -1; | ||
1498 | if (m->flags & MOPT_EXPLICIT) | ||
1499 | set_opt2(sb, EXPLICIT_DELALLOC); | ||
1500 | if (m->flags & MOPT_CLEAR_ERR) | ||
1501 | clear_opt(sb, ERRORS_MASK); | ||
1502 | if (token == Opt_noquota && sb_any_quota_loaded(sb)) { | ||
1503 | ext4_msg(sb, KERN_ERR, "Cannot change quota " | ||
1504 | "options when quota turned on"); | ||
1505 | return -1; | ||
1506 | } | ||
1507 | |||
1508 | if (m->flags & MOPT_NOSUPPORT) { | ||
1509 | ext4_msg(sb, KERN_ERR, "%s option not supported", opt); | ||
1510 | } else if (token == Opt_commit) { | ||
1511 | if (arg == 0) | ||
1512 | arg = JBD2_DEFAULT_MAX_COMMIT_AGE; | ||
1513 | sbi->s_commit_interval = HZ * arg; | ||
1514 | } else if (token == Opt_max_batch_time) { | ||
1515 | if (arg == 0) | ||
1516 | arg = EXT4_DEF_MAX_BATCH_TIME; | ||
1517 | sbi->s_max_batch_time = arg; | ||
1518 | } else if (token == Opt_min_batch_time) { | ||
1519 | sbi->s_min_batch_time = arg; | ||
1520 | } else if (token == Opt_inode_readahead_blks) { | ||
1521 | if (arg > (1 << 30)) | ||
1522 | return -1; | ||
1523 | if (arg && !is_power_of_2(arg)) { | ||
1631 | ext4_msg(sb, KERN_ERR, | 1524 | ext4_msg(sb, KERN_ERR, |
1632 | "Cannot specify journal on remount"); | 1525 | "EXT4-fs: inode_readahead_blks" |
1633 | return 0; | 1526 | " must be a power of 2"); |
1527 | return -1; | ||
1634 | } | 1528 | } |
1635 | if (match_int(&args[0], &option)) | 1529 | sbi->s_inode_readahead_blks = arg; |
1636 | return 0; | 1530 | } else if (token == Opt_init_itable) { |
1637 | *journal_devnum = option; | 1531 | set_opt(sb, INIT_INODE_TABLE); |
1638 | break; | 1532 | if (!args->from) |
1639 | case Opt_journal_checksum: | 1533 | arg = EXT4_DEF_LI_WAIT_MULT; |
1640 | set_opt(sb, JOURNAL_CHECKSUM); | 1534 | sbi->s_li_wait_mult = arg; |
1641 | break; | 1535 | } else if (token == Opt_stripe) { |
1642 | case Opt_journal_async_commit: | 1536 | sbi->s_stripe = arg; |
1643 | set_opt(sb, JOURNAL_ASYNC_COMMIT); | 1537 | } else if (m->flags & MOPT_DATAJ) { |
1644 | set_opt(sb, JOURNAL_CHECKSUM); | ||
1645 | break; | ||
1646 | case Opt_noload: | ||
1647 | set_opt(sb, NOLOAD); | ||
1648 | break; | ||
1649 | case Opt_commit: | ||
1650 | if (match_int(&args[0], &option)) | ||
1651 | return 0; | ||
1652 | if (option < 0) | ||
1653 | return 0; | ||
1654 | if (option == 0) | ||
1655 | option = JBD2_DEFAULT_MAX_COMMIT_AGE; | ||
1656 | sbi->s_commit_interval = HZ * option; | ||
1657 | break; | ||
1658 | case Opt_max_batch_time: | ||
1659 | if (match_int(&args[0], &option)) | ||
1660 | return 0; | ||
1661 | if (option < 0) | ||
1662 | return 0; | ||
1663 | if (option == 0) | ||
1664 | option = EXT4_DEF_MAX_BATCH_TIME; | ||
1665 | sbi->s_max_batch_time = option; | ||
1666 | break; | ||
1667 | case Opt_min_batch_time: | ||
1668 | if (match_int(&args[0], &option)) | ||
1669 | return 0; | ||
1670 | if (option < 0) | ||
1671 | return 0; | ||
1672 | sbi->s_min_batch_time = option; | ||
1673 | break; | ||
1674 | case Opt_data_journal: | ||
1675 | data_opt = EXT4_MOUNT_JOURNAL_DATA; | ||
1676 | goto datacheck; | ||
1677 | case Opt_data_ordered: | ||
1678 | data_opt = EXT4_MOUNT_ORDERED_DATA; | ||
1679 | goto datacheck; | ||
1680 | case Opt_data_writeback: | ||
1681 | data_opt = EXT4_MOUNT_WRITEBACK_DATA; | ||
1682 | datacheck: | ||
1683 | if (is_remount) { | 1538 | if (is_remount) { |
1684 | if (!sbi->s_journal) | 1539 | if (!sbi->s_journal) |
1685 | ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); | 1540 | ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); |
1686 | else if (test_opt(sb, DATA_FLAGS) != data_opt) { | 1541 | else if (test_opt(sb, DATA_FLAGS) != |
1542 | m->mount_opt) { | ||
1687 | ext4_msg(sb, KERN_ERR, | 1543 | ext4_msg(sb, KERN_ERR, |
1688 | "Cannot change data mode on remount"); | 1544 | "Cannot change data mode on remount"); |
1689 | return 0; | 1545 | return -1; |
1690 | } | 1546 | } |
1691 | } else { | 1547 | } else { |
1692 | clear_opt(sb, DATA_FLAGS); | 1548 | clear_opt(sb, DATA_FLAGS); |
1693 | sbi->s_mount_opt |= data_opt; | 1549 | sbi->s_mount_opt |= m->mount_opt; |
1694 | } | 1550 | } |
1695 | break; | ||
1696 | case Opt_data_err_abort: | ||
1697 | set_opt(sb, DATA_ERR_ABORT); | ||
1698 | break; | ||
1699 | case Opt_data_err_ignore: | ||
1700 | clear_opt(sb, DATA_ERR_ABORT); | ||
1701 | break; | ||
1702 | #ifdef CONFIG_QUOTA | 1551 | #ifdef CONFIG_QUOTA |
1703 | case Opt_usrjquota: | 1552 | } else if (token == Opt_usrjquota) { |
1704 | if (!set_qf_name(sb, USRQUOTA, &args[0])) | 1553 | if (!set_qf_name(sb, USRQUOTA, &args[0])) |
1705 | return 0; | 1554 | return -1; |
1706 | break; | 1555 | } else if (token == Opt_grpjquota) { |
1707 | case Opt_grpjquota: | ||
1708 | if (!set_qf_name(sb, GRPQUOTA, &args[0])) | 1556 | if (!set_qf_name(sb, GRPQUOTA, &args[0])) |
1709 | return 0; | 1557 | return -1; |
1710 | break; | 1558 | } else if (token == Opt_offusrjquota) { |
1711 | case Opt_offusrjquota: | ||
1712 | if (!clear_qf_name(sb, USRQUOTA)) | 1559 | if (!clear_qf_name(sb, USRQUOTA)) |
1713 | return 0; | 1560 | return -1; |
1714 | break; | 1561 | } else if (token == Opt_offgrpjquota) { |
1715 | case Opt_offgrpjquota: | ||
1716 | if (!clear_qf_name(sb, GRPQUOTA)) | 1562 | if (!clear_qf_name(sb, GRPQUOTA)) |
1717 | return 0; | 1563 | return -1; |
1718 | break; | 1564 | } else if (m->flags & MOPT_QFMT) { |
1719 | |||
1720 | case Opt_jqfmt_vfsold: | ||
1721 | qfmt = QFMT_VFS_OLD; | ||
1722 | goto set_qf_format; | ||
1723 | case Opt_jqfmt_vfsv0: | ||
1724 | qfmt = QFMT_VFS_V0; | ||
1725 | goto set_qf_format; | ||
1726 | case Opt_jqfmt_vfsv1: | ||
1727 | qfmt = QFMT_VFS_V1; | ||
1728 | set_qf_format: | ||
1729 | if (sb_any_quota_loaded(sb) && | 1565 | if (sb_any_quota_loaded(sb) && |
1730 | sbi->s_jquota_fmt != qfmt) { | 1566 | sbi->s_jquota_fmt != m->mount_opt) { |
1731 | ext4_msg(sb, KERN_ERR, "Cannot change " | 1567 | ext4_msg(sb, KERN_ERR, "Cannot " |
1732 | "journaled quota options when " | 1568 | "change journaled quota options " |
1733 | "quota turned on"); | 1569 | "when quota turned on"); |
1734 | return 0; | 1570 | return -1; |
1735 | } | ||
1736 | sbi->s_jquota_fmt = qfmt; | ||
1737 | break; | ||
1738 | case Opt_quota: | ||
1739 | case Opt_usrquota: | ||
1740 | set_opt(sb, QUOTA); | ||
1741 | set_opt(sb, USRQUOTA); | ||
1742 | break; | ||
1743 | case Opt_grpquota: | ||
1744 | set_opt(sb, QUOTA); | ||
1745 | set_opt(sb, GRPQUOTA); | ||
1746 | break; | ||
1747 | case Opt_noquota: | ||
1748 | if (sb_any_quota_loaded(sb)) { | ||
1749 | ext4_msg(sb, KERN_ERR, "Cannot change quota " | ||
1750 | "options when quota turned on"); | ||
1751 | return 0; | ||
1752 | } | 1571 | } |
1753 | clear_opt(sb, QUOTA); | 1572 | sbi->s_jquota_fmt = m->mount_opt; |
1754 | clear_opt(sb, USRQUOTA); | ||
1755 | clear_opt(sb, GRPQUOTA); | ||
1756 | break; | ||
1757 | #else | ||
1758 | case Opt_quota: | ||
1759 | case Opt_usrquota: | ||
1760 | case Opt_grpquota: | ||
1761 | ext4_msg(sb, KERN_ERR, | ||
1762 | "quota options not supported"); | ||
1763 | break; | ||
1764 | case Opt_usrjquota: | ||
1765 | case Opt_grpjquota: | ||
1766 | case Opt_offusrjquota: | ||
1767 | case Opt_offgrpjquota: | ||
1768 | case Opt_jqfmt_vfsold: | ||
1769 | case Opt_jqfmt_vfsv0: | ||
1770 | case Opt_jqfmt_vfsv1: | ||
1771 | ext4_msg(sb, KERN_ERR, | ||
1772 | "journaled quota options not supported"); | ||
1773 | break; | ||
1774 | case Opt_noquota: | ||
1775 | break; | ||
1776 | #endif | 1573 | #endif |
1777 | case Opt_abort: | 1574 | } else { |
1778 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; | 1575 | if (!args->from) |
1779 | break; | 1576 | arg = 1; |
1780 | case Opt_nobarrier: | 1577 | if (m->flags & MOPT_CLEAR) |
1781 | clear_opt(sb, BARRIER); | 1578 | arg = !arg; |
1782 | break; | 1579 | else if (unlikely(!(m->flags & MOPT_SET))) { |
1783 | case Opt_barrier: | 1580 | ext4_msg(sb, KERN_WARNING, |
1784 | if (args[0].from) { | 1581 | "buggy handling of option %s", opt); |
1785 | if (match_int(&args[0], &option)) | 1582 | WARN_ON(1); |
1786 | return 0; | 1583 | return -1; |
1787 | } else | ||
1788 | option = 1; /* No argument, default to 1 */ | ||
1789 | if (option) | ||
1790 | set_opt(sb, BARRIER); | ||
1791 | else | ||
1792 | clear_opt(sb, BARRIER); | ||
1793 | break; | ||
1794 | case Opt_ignore: | ||
1795 | break; | ||
1796 | case Opt_resize: | ||
1797 | if (!is_remount) { | ||
1798 | ext4_msg(sb, KERN_ERR, | ||
1799 | "resize option only available " | ||
1800 | "for remount"); | ||
1801 | return 0; | ||
1802 | } | ||
1803 | if (match_int(&args[0], &option) != 0) | ||
1804 | return 0; | ||
1805 | *n_blocks_count = option; | ||
1806 | break; | ||
1807 | case Opt_nobh: | ||
1808 | ext4_msg(sb, KERN_WARNING, | ||
1809 | "Ignoring deprecated nobh option"); | ||
1810 | break; | ||
1811 | case Opt_bh: | ||
1812 | ext4_msg(sb, KERN_WARNING, | ||
1813 | "Ignoring deprecated bh option"); | ||
1814 | break; | ||
1815 | case Opt_i_version: | ||
1816 | set_opt(sb, I_VERSION); | ||
1817 | sb->s_flags |= MS_I_VERSION; | ||
1818 | break; | ||
1819 | case Opt_nodelalloc: | ||
1820 | clear_opt(sb, DELALLOC); | ||
1821 | clear_opt2(sb, EXPLICIT_DELALLOC); | ||
1822 | break; | ||
1823 | case Opt_mblk_io_submit: | ||
1824 | set_opt(sb, MBLK_IO_SUBMIT); | ||
1825 | break; | ||
1826 | case Opt_nomblk_io_submit: | ||
1827 | clear_opt(sb, MBLK_IO_SUBMIT); | ||
1828 | break; | ||
1829 | case Opt_stripe: | ||
1830 | if (match_int(&args[0], &option)) | ||
1831 | return 0; | ||
1832 | if (option < 0) | ||
1833 | return 0; | ||
1834 | sbi->s_stripe = option; | ||
1835 | break; | ||
1836 | case Opt_delalloc: | ||
1837 | set_opt(sb, DELALLOC); | ||
1838 | set_opt2(sb, EXPLICIT_DELALLOC); | ||
1839 | break; | ||
1840 | case Opt_block_validity: | ||
1841 | set_opt(sb, BLOCK_VALIDITY); | ||
1842 | break; | ||
1843 | case Opt_noblock_validity: | ||
1844 | clear_opt(sb, BLOCK_VALIDITY); | ||
1845 | break; | ||
1846 | case Opt_inode_readahead_blks: | ||
1847 | if (match_int(&args[0], &option)) | ||
1848 | return 0; | ||
1849 | if (option < 0 || option > (1 << 30)) | ||
1850 | return 0; | ||
1851 | if (option && !is_power_of_2(option)) { | ||
1852 | ext4_msg(sb, KERN_ERR, | ||
1853 | "EXT4-fs: inode_readahead_blks" | ||
1854 | " must be a power of 2"); | ||
1855 | return 0; | ||
1856 | } | 1584 | } |
1857 | sbi->s_inode_readahead_blks = option; | 1585 | if (arg != 0) |
1858 | break; | 1586 | sbi->s_mount_opt |= m->mount_opt; |
1859 | case Opt_journal_ioprio: | ||
1860 | if (match_int(&args[0], &option)) | ||
1861 | return 0; | ||
1862 | if (option < 0 || option > 7) | ||
1863 | break; | ||
1864 | *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, | ||
1865 | option); | ||
1866 | break; | ||
1867 | case Opt_noauto_da_alloc: | ||
1868 | set_opt(sb, NO_AUTO_DA_ALLOC); | ||
1869 | break; | ||
1870 | case Opt_auto_da_alloc: | ||
1871 | if (args[0].from) { | ||
1872 | if (match_int(&args[0], &option)) | ||
1873 | return 0; | ||
1874 | } else | ||
1875 | option = 1; /* No argument, default to 1 */ | ||
1876 | if (option) | ||
1877 | clear_opt(sb, NO_AUTO_DA_ALLOC); | ||
1878 | else | 1587 | else |
1879 | set_opt(sb,NO_AUTO_DA_ALLOC); | 1588 | sbi->s_mount_opt &= ~m->mount_opt; |
1880 | break; | ||
1881 | case Opt_discard: | ||
1882 | set_opt(sb, DISCARD); | ||
1883 | break; | ||
1884 | case Opt_nodiscard: | ||
1885 | clear_opt(sb, DISCARD); | ||
1886 | break; | ||
1887 | case Opt_dioread_nolock: | ||
1888 | set_opt(sb, DIOREAD_NOLOCK); | ||
1889 | break; | ||
1890 | case Opt_dioread_lock: | ||
1891 | clear_opt(sb, DIOREAD_NOLOCK); | ||
1892 | break; | ||
1893 | case Opt_init_itable: | ||
1894 | set_opt(sb, INIT_INODE_TABLE); | ||
1895 | if (args[0].from) { | ||
1896 | if (match_int(&args[0], &option)) | ||
1897 | return 0; | ||
1898 | } else | ||
1899 | option = EXT4_DEF_LI_WAIT_MULT; | ||
1900 | if (option < 0) | ||
1901 | return 0; | ||
1902 | sbi->s_li_wait_mult = option; | ||
1903 | break; | ||
1904 | case Opt_noinit_itable: | ||
1905 | clear_opt(sb, INIT_INODE_TABLE); | ||
1906 | break; | ||
1907 | default: | ||
1908 | ext4_msg(sb, KERN_ERR, | ||
1909 | "Unrecognized mount option \"%s\" " | ||
1910 | "or missing value", p); | ||
1911 | return 0; | ||
1912 | } | 1589 | } |
1590 | return 1; | ||
1591 | } | ||
1592 | ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " | ||
1593 | "or missing value", opt); | ||
1594 | return -1; | ||
1595 | } | ||
1596 | |||
1597 | static int parse_options(char *options, struct super_block *sb, | ||
1598 | unsigned long *journal_devnum, | ||
1599 | unsigned int *journal_ioprio, | ||
1600 | int is_remount) | ||
1601 | { | ||
1602 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1603 | char *p; | ||
1604 | substring_t args[MAX_OPT_ARGS]; | ||
1605 | int token; | ||
1606 | |||
1607 | if (!options) | ||
1608 | return 1; | ||
1609 | |||
1610 | while ((p = strsep(&options, ",")) != NULL) { | ||
1611 | if (!*p) | ||
1612 | continue; | ||
1613 | /* | ||
1614 | * Initialize args struct so we know whether arg was | ||
1615 | * found; some options take optional arguments. | ||
1616 | */ | ||
1617 | args[0].to = args[0].from = 0; | ||
1618 | token = match_token(p, tokens, args); | ||
1619 | if (handle_mount_opt(sb, p, token, args, journal_devnum, | ||
1620 | journal_ioprio, is_remount) < 0) | ||
1621 | return 0; | ||
1913 | } | 1622 | } |
1914 | #ifdef CONFIG_QUOTA | 1623 | #ifdef CONFIG_QUOTA |
1915 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { | 1624 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { |
@@ -1942,6 +1651,160 @@ set_qf_format: | |||
1942 | return 1; | 1651 | return 1; |
1943 | } | 1652 | } |
1944 | 1653 | ||
1654 | static inline void ext4_show_quota_options(struct seq_file *seq, | ||
1655 | struct super_block *sb) | ||
1656 | { | ||
1657 | #if defined(CONFIG_QUOTA) | ||
1658 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1659 | |||
1660 | if (sbi->s_jquota_fmt) { | ||
1661 | char *fmtname = ""; | ||
1662 | |||
1663 | switch (sbi->s_jquota_fmt) { | ||
1664 | case QFMT_VFS_OLD: | ||
1665 | fmtname = "vfsold"; | ||
1666 | break; | ||
1667 | case QFMT_VFS_V0: | ||
1668 | fmtname = "vfsv0"; | ||
1669 | break; | ||
1670 | case QFMT_VFS_V1: | ||
1671 | fmtname = "vfsv1"; | ||
1672 | break; | ||
1673 | } | ||
1674 | seq_printf(seq, ",jqfmt=%s", fmtname); | ||
1675 | } | ||
1676 | |||
1677 | if (sbi->s_qf_names[USRQUOTA]) | ||
1678 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | ||
1679 | |||
1680 | if (sbi->s_qf_names[GRPQUOTA]) | ||
1681 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | ||
1682 | |||
1683 | if (test_opt(sb, USRQUOTA)) | ||
1684 | seq_puts(seq, ",usrquota"); | ||
1685 | |||
1686 | if (test_opt(sb, GRPQUOTA)) | ||
1687 | seq_puts(seq, ",grpquota"); | ||
1688 | #endif | ||
1689 | } | ||
1690 | |||
1691 | static const char *token2str(int token) | ||
1692 | { | ||
1693 | static const struct match_token *t; | ||
1694 | |||
1695 | for (t = tokens; t->token != Opt_err; t++) | ||
1696 | if (t->token == token && !strchr(t->pattern, '=')) | ||
1697 | break; | ||
1698 | return t->pattern; | ||
1699 | } | ||
1700 | |||
1701 | /* | ||
1702 | * Show an option if | ||
1703 | * - it's set to a non-default value OR | ||
1704 | * - if the per-sb default is different from the global default | ||
1705 | */ | ||
1706 | static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, | ||
1707 | int nodefs) | ||
1708 | { | ||
1709 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1710 | struct ext4_super_block *es = sbi->s_es; | ||
1711 | int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; | ||
1712 | const struct mount_opts *m; | ||
1713 | char sep = nodefs ? '\n' : ','; | ||
1714 | |||
1715 | #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) | ||
1716 | #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) | ||
1717 | |||
1718 | if (sbi->s_sb_block != 1) | ||
1719 | SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); | ||
1720 | |||
1721 | for (m = ext4_mount_opts; m->token != Opt_err; m++) { | ||
1722 | int want_set = m->flags & MOPT_SET; | ||
1723 | if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || | ||
1724 | (m->flags & MOPT_CLEAR_ERR)) | ||
1725 | continue; | ||
1726 | if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) | ||
1727 | continue; /* skip if same as the default */ | ||
1728 | if ((want_set && | ||
1729 | (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || | ||
1730 | (!want_set && (sbi->s_mount_opt & m->mount_opt))) | ||
1731 | continue; /* select Opt_noFoo vs Opt_Foo */ | ||
1732 | SEQ_OPTS_PRINT("%s", token2str(m->token)); | ||
1733 | } | ||
1734 | |||
1735 | if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID || | ||
1736 | le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) | ||
1737 | SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid); | ||
1738 | if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID || | ||
1739 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) | ||
1740 | SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid); | ||
1741 | def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); | ||
1742 | if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) | ||
1743 | SEQ_OPTS_PUTS("errors=remount-ro"); | ||
1744 | if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) | ||
1745 | SEQ_OPTS_PUTS("errors=continue"); | ||
1746 | if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) | ||
1747 | SEQ_OPTS_PUTS("errors=panic"); | ||
1748 | if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) | ||
1749 | SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); | ||
1750 | if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) | ||
1751 | SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); | ||
1752 | if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) | ||
1753 | SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); | ||
1754 | if (sb->s_flags & MS_I_VERSION) | ||
1755 | SEQ_OPTS_PUTS("i_version"); | ||
1756 | if (nodefs || sbi->s_stripe) | ||
1757 | SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); | ||
1758 | if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { | ||
1759 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | ||
1760 | SEQ_OPTS_PUTS("data=journal"); | ||
1761 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | ||
1762 | SEQ_OPTS_PUTS("data=ordered"); | ||
1763 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | ||
1764 | SEQ_OPTS_PUTS("data=writeback"); | ||
1765 | } | ||
1766 | if (nodefs || | ||
1767 | sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
1768 | SEQ_OPTS_PRINT("inode_readahead_blks=%u", | ||
1769 | sbi->s_inode_readahead_blks); | ||
1770 | |||
1771 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && | ||
1772 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) | ||
1773 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); | ||
1774 | |||
1775 | ext4_show_quota_options(seq, sb); | ||
1776 | return 0; | ||
1777 | } | ||
1778 | |||
1779 | static int ext4_show_options(struct seq_file *seq, struct dentry *root) | ||
1780 | { | ||
1781 | return _ext4_show_options(seq, root->d_sb, 0); | ||
1782 | } | ||
1783 | |||
1784 | static int options_seq_show(struct seq_file *seq, void *offset) | ||
1785 | { | ||
1786 | struct super_block *sb = seq->private; | ||
1787 | int rc; | ||
1788 | |||
1789 | seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); | ||
1790 | rc = _ext4_show_options(seq, sb, 1); | ||
1791 | seq_puts(seq, "\n"); | ||
1792 | return rc; | ||
1793 | } | ||
1794 | |||
1795 | static int options_open_fs(struct inode *inode, struct file *file) | ||
1796 | { | ||
1797 | return single_open(file, options_seq_show, PDE(inode)->data); | ||
1798 | } | ||
1799 | |||
1800 | static const struct file_operations ext4_seq_options_fops = { | ||
1801 | .owner = THIS_MODULE, | ||
1802 | .open = options_open_fs, | ||
1803 | .read = seq_read, | ||
1804 | .llseek = seq_lseek, | ||
1805 | .release = single_release, | ||
1806 | }; | ||
1807 | |||
1945 | static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | 1808 | static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, |
1946 | int read_only) | 1809 | int read_only) |
1947 | { | 1810 | { |
@@ -2945,7 +2808,7 @@ static int ext4_run_lazyinit_thread(void) | |||
2945 | ext4_clear_request_list(); | 2808 | ext4_clear_request_list(); |
2946 | kfree(ext4_li_info); | 2809 | kfree(ext4_li_info); |
2947 | ext4_li_info = NULL; | 2810 | ext4_li_info = NULL; |
2948 | printk(KERN_CRIT "EXT4: error %d creating inode table " | 2811 | printk(KERN_CRIT "EXT4-fs: error %d creating inode table " |
2949 | "initialization thread\n", | 2812 | "initialization thread\n", |
2950 | err); | 2813 | err); |
2951 | return err; | 2814 | return err; |
@@ -3183,11 +3046,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3183 | set_opt(sb, INIT_INODE_TABLE); | 3046 | set_opt(sb, INIT_INODE_TABLE); |
3184 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 3047 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
3185 | set_opt(sb, DEBUG); | 3048 | set_opt(sb, DEBUG); |
3186 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { | 3049 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) |
3187 | ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", | ||
3188 | "2.6.38"); | ||
3189 | set_opt(sb, GRPID); | 3050 | set_opt(sb, GRPID); |
3190 | } | ||
3191 | if (def_mount_opts & EXT4_DEFM_UID16) | 3051 | if (def_mount_opts & EXT4_DEFM_UID16) |
3192 | set_opt(sb, NO_UID32); | 3052 | set_opt(sb, NO_UID32); |
3193 | /* xattr user namespace & acls are now defaulted on */ | 3053 | /* xattr user namespace & acls are now defaulted on */ |
@@ -3240,13 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3240 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; | 3100 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; |
3241 | 3101 | ||
3242 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, | 3102 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, |
3243 | &journal_devnum, &journal_ioprio, NULL, 0)) { | 3103 | &journal_devnum, &journal_ioprio, 0)) { |
3244 | ext4_msg(sb, KERN_WARNING, | 3104 | ext4_msg(sb, KERN_WARNING, |
3245 | "failed to parse options in superblock: %s", | 3105 | "failed to parse options in superblock: %s", |
3246 | sbi->s_es->s_mount_opts); | 3106 | sbi->s_es->s_mount_opts); |
3247 | } | 3107 | } |
3108 | sbi->s_def_mount_opt = sbi->s_mount_opt; | ||
3248 | if (!parse_options((char *) data, sb, &journal_devnum, | 3109 | if (!parse_options((char *) data, sb, &journal_devnum, |
3249 | &journal_ioprio, NULL, 0)) | 3110 | &journal_ioprio, 0)) |
3250 | goto failed_mount; | 3111 | goto failed_mount; |
3251 | 3112 | ||
3252 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 3113 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
@@ -3416,7 +3277,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3416 | #else | 3277 | #else |
3417 | es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); | 3278 | es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); |
3418 | #endif | 3279 | #endif |
3419 | sb->s_dirt = 1; | ||
3420 | } | 3280 | } |
3421 | 3281 | ||
3422 | /* Handle clustersize */ | 3282 | /* Handle clustersize */ |
@@ -3540,6 +3400,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3540 | if (ext4_proc_root) | 3400 | if (ext4_proc_root) |
3541 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | 3401 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); |
3542 | 3402 | ||
3403 | if (sbi->s_proc) | ||
3404 | proc_create_data("options", S_IRUGO, sbi->s_proc, | ||
3405 | &ext4_seq_options_fops, sb); | ||
3406 | |||
3543 | bgl_lock_init(sbi->s_blockgroup_lock); | 3407 | bgl_lock_init(sbi->s_blockgroup_lock); |
3544 | 3408 | ||
3545 | for (i = 0; i < db_count; i++) { | 3409 | for (i = 0; i < db_count; i++) { |
@@ -3694,6 +3558,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3694 | } | 3558 | } |
3695 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 3559 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
3696 | 3560 | ||
3561 | sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; | ||
3562 | |||
3697 | /* | 3563 | /* |
3698 | * The journal may have updated the bg summary counts, so we | 3564 | * The journal may have updated the bg summary counts, so we |
3699 | * need to update the global counters. | 3565 | * need to update the global counters. |
@@ -3861,6 +3727,7 @@ failed_mount2: | |||
3861 | ext4_kvfree(sbi->s_group_desc); | 3727 | ext4_kvfree(sbi->s_group_desc); |
3862 | failed_mount: | 3728 | failed_mount: |
3863 | if (sbi->s_proc) { | 3729 | if (sbi->s_proc) { |
3730 | remove_proc_entry("options", sbi->s_proc); | ||
3864 | remove_proc_entry(sb->s_id, ext4_proc_root); | 3731 | remove_proc_entry(sb->s_id, ext4_proc_root); |
3865 | } | 3732 | } |
3866 | #ifdef CONFIG_QUOTA | 3733 | #ifdef CONFIG_QUOTA |
@@ -4090,15 +3957,6 @@ static int ext4_load_journal(struct super_block *sb, | |||
4090 | if (!(journal->j_flags & JBD2_BARRIER)) | 3957 | if (!(journal->j_flags & JBD2_BARRIER)) |
4091 | ext4_msg(sb, KERN_INFO, "barriers disabled"); | 3958 | ext4_msg(sb, KERN_INFO, "barriers disabled"); |
4092 | 3959 | ||
4093 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | ||
4094 | err = jbd2_journal_update_format(journal); | ||
4095 | if (err) { | ||
4096 | ext4_msg(sb, KERN_ERR, "error updating journal"); | ||
4097 | jbd2_journal_destroy(journal); | ||
4098 | return err; | ||
4099 | } | ||
4100 | } | ||
4101 | |||
4102 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) | 3960 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) |
4103 | err = jbd2_journal_wipe(journal, !really_read_only); | 3961 | err = jbd2_journal_wipe(journal, !really_read_only); |
4104 | if (!err) { | 3962 | if (!err) { |
@@ -4385,7 +4243,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4385 | { | 4243 | { |
4386 | struct ext4_super_block *es; | 4244 | struct ext4_super_block *es; |
4387 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4245 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4388 | ext4_fsblk_t n_blocks_count = 0; | ||
4389 | unsigned long old_sb_flags; | 4246 | unsigned long old_sb_flags; |
4390 | struct ext4_mount_options old_opts; | 4247 | struct ext4_mount_options old_opts; |
4391 | int enable_quota = 0; | 4248 | int enable_quota = 0; |
@@ -4418,8 +4275,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4418 | /* | 4275 | /* |
4419 | * Allow the "check" option to be passed as a remount option. | 4276 | * Allow the "check" option to be passed as a remount option. |
4420 | */ | 4277 | */ |
4421 | if (!parse_options(data, sb, NULL, &journal_ioprio, | 4278 | if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { |
4422 | &n_blocks_count, 1)) { | ||
4423 | err = -EINVAL; | 4279 | err = -EINVAL; |
4424 | goto restore_opts; | 4280 | goto restore_opts; |
4425 | } | 4281 | } |
@@ -4437,8 +4293,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4437 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 4293 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
4438 | } | 4294 | } |
4439 | 4295 | ||
4440 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || | 4296 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { |
4441 | n_blocks_count > ext4_blocks_count(es)) { | ||
4442 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { | 4297 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { |
4443 | err = -EROFS; | 4298 | err = -EROFS; |
4444 | goto restore_opts; | 4299 | goto restore_opts; |
@@ -4513,8 +4368,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4513 | if (sbi->s_journal) | 4368 | if (sbi->s_journal) |
4514 | ext4_clear_journal_err(sb, es); | 4369 | ext4_clear_journal_err(sb, es); |
4515 | sbi->s_mount_state = le16_to_cpu(es->s_state); | 4370 | sbi->s_mount_state = le16_to_cpu(es->s_state); |
4516 | if ((err = ext4_group_extend(sb, es, n_blocks_count))) | ||
4517 | goto restore_opts; | ||
4518 | if (!ext4_setup_super(sb, es, 0)) | 4371 | if (!ext4_setup_super(sb, es, 0)) |
4519 | sb->s_flags &= ~MS_RDONLY; | 4372 | sb->s_flags &= ~MS_RDONLY; |
4520 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 4373 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 93a00d89a220..e88748e55c0f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -82,8 +82,8 @@ | |||
82 | printk("\n"); \ | 82 | printk("\n"); \ |
83 | } while (0) | 83 | } while (0) |
84 | #else | 84 | #else |
85 | # define ea_idebug(f...) | 85 | # define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
86 | # define ea_bdebug(f...) | 86 | # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
87 | #endif | 87 | #endif |
88 | 88 | ||
89 | static void ext4_xattr_cache_insert(struct buffer_head *); | 89 | static void ext4_xattr_cache_insert(struct buffer_head *); |
@@ -158,13 +158,10 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) | |||
158 | static inline int | 158 | static inline int |
159 | ext4_xattr_check_block(struct buffer_head *bh) | 159 | ext4_xattr_check_block(struct buffer_head *bh) |
160 | { | 160 | { |
161 | int error; | ||
162 | |||
163 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || | 161 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || |
164 | BHDR(bh)->h_blocks != cpu_to_le32(1)) | 162 | BHDR(bh)->h_blocks != cpu_to_le32(1)) |
165 | return -EIO; | 163 | return -EIO; |
166 | error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); | 164 | return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); |
167 | return error; | ||
168 | } | 165 | } |
169 | 166 | ||
170 | static inline int | 167 | static inline int |
@@ -220,7 +217,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
220 | error = -ENODATA; | 217 | error = -ENODATA; |
221 | if (!EXT4_I(inode)->i_file_acl) | 218 | if (!EXT4_I(inode)->i_file_acl) |
222 | goto cleanup; | 219 | goto cleanup; |
223 | ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); | 220 | ea_idebug(inode, "reading block %llu", |
221 | (unsigned long long)EXT4_I(inode)->i_file_acl); | ||
224 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); | 222 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); |
225 | if (!bh) | 223 | if (!bh) |
226 | goto cleanup; | 224 | goto cleanup; |
@@ -363,7 +361,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
363 | error = 0; | 361 | error = 0; |
364 | if (!EXT4_I(inode)->i_file_acl) | 362 | if (!EXT4_I(inode)->i_file_acl) |
365 | goto cleanup; | 363 | goto cleanup; |
366 | ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); | 364 | ea_idebug(inode, "reading block %llu", |
365 | (unsigned long long)EXT4_I(inode)->i_file_acl); | ||
367 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); | 366 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); |
368 | error = -EIO; | 367 | error = -EIO; |
369 | if (!bh) | 368 | if (!bh) |
@@ -487,18 +486,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | |||
487 | ext4_free_blocks(handle, inode, bh, 0, 1, | 486 | ext4_free_blocks(handle, inode, bh, 0, 1, |
488 | EXT4_FREE_BLOCKS_METADATA | | 487 | EXT4_FREE_BLOCKS_METADATA | |
489 | EXT4_FREE_BLOCKS_FORGET); | 488 | EXT4_FREE_BLOCKS_FORGET); |
489 | unlock_buffer(bh); | ||
490 | } else { | 490 | } else { |
491 | le32_add_cpu(&BHDR(bh)->h_refcount, -1); | 491 | le32_add_cpu(&BHDR(bh)->h_refcount, -1); |
492 | if (ce) | ||
493 | mb_cache_entry_release(ce); | ||
494 | unlock_buffer(bh); | ||
492 | error = ext4_handle_dirty_metadata(handle, inode, bh); | 495 | error = ext4_handle_dirty_metadata(handle, inode, bh); |
493 | if (IS_SYNC(inode)) | 496 | if (IS_SYNC(inode)) |
494 | ext4_handle_sync(handle); | 497 | ext4_handle_sync(handle); |
495 | dquot_free_block(inode, 1); | 498 | dquot_free_block(inode, 1); |
496 | ea_bdebug(bh, "refcount now=%d; releasing", | 499 | ea_bdebug(bh, "refcount now=%d; releasing", |
497 | le32_to_cpu(BHDR(bh)->h_refcount)); | 500 | le32_to_cpu(BHDR(bh)->h_refcount)); |
498 | if (ce) | ||
499 | mb_cache_entry_release(ce); | ||
500 | } | 501 | } |
501 | unlock_buffer(bh); | ||
502 | out: | 502 | out: |
503 | ext4_std_error(inode->i_sb, error); | 503 | ext4_std_error(inode->i_sb, error); |
504 | return; | 504 | return; |
@@ -834,7 +834,8 @@ inserted: | |||
834 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 834 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
835 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | 835 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); |
836 | 836 | ||
837 | ea_idebug(inode, "creating block %d", block); | 837 | ea_idebug(inode, "creating block %llu", |
838 | (unsigned long long)block); | ||
838 | 839 | ||
839 | new_bh = sb_getblk(sb, block); | 840 | new_bh = sb_getblk(sb, block); |
840 | if (!new_bh) { | 841 | if (!new_bh) { |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index d49d202903fb..c78841ee81cf 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh) | |||
88 | * whole transaction. | 88 | * whole transaction. |
89 | * | 89 | * |
90 | * Requires j_list_lock | 90 | * Requires j_list_lock |
91 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
92 | */ | 91 | */ |
93 | static int __try_to_free_cp_buf(struct journal_head *jh) | 92 | static int __try_to_free_cp_buf(struct journal_head *jh) |
94 | { | 93 | { |
95 | int ret = 0; | 94 | int ret = 0; |
96 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
97 | 96 | ||
98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && |
99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | 99 | /* |
101 | * Get our reference so that bh cannot be freed before | 100 | * Get our reference so that bh cannot be freed before |
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
104 | get_bh(bh); | 103 | get_bh(bh); |
105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
106 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 105 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
107 | jbd_unlock_bh_state(bh); | ||
108 | BUFFER_TRACE(bh, "release"); | 106 | BUFFER_TRACE(bh, "release"); |
109 | __brelse(bh); | 107 | __brelse(bh); |
110 | } else { | ||
111 | jbd_unlock_bh_state(bh); | ||
112 | } | 108 | } |
113 | return ret; | 109 | return ret; |
114 | } | 110 | } |
@@ -180,21 +176,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
180 | } | 176 | } |
181 | 177 | ||
182 | /* | 178 | /* |
183 | * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. | ||
184 | * The caller must restart a list walk. Wait for someone else to run | ||
185 | * jbd_unlock_bh_state(). | ||
186 | */ | ||
187 | static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | ||
188 | __releases(journal->j_list_lock) | ||
189 | { | ||
190 | get_bh(bh); | ||
191 | spin_unlock(&journal->j_list_lock); | ||
192 | jbd_lock_bh_state(bh); | ||
193 | jbd_unlock_bh_state(bh); | ||
194 | put_bh(bh); | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * Clean up transaction's list of buffers submitted for io. | 179 | * Clean up transaction's list of buffers submitted for io. |
199 | * We wait for any pending IO to complete and remove any clean | 180 | * We wait for any pending IO to complete and remove any clean |
200 | * buffers. Note that we take the buffers in the opposite ordering | 181 | * buffers. Note that we take the buffers in the opposite ordering |
@@ -222,15 +203,9 @@ restart: | |||
222 | while (!released && transaction->t_checkpoint_io_list) { | 203 | while (!released && transaction->t_checkpoint_io_list) { |
223 | jh = transaction->t_checkpoint_io_list; | 204 | jh = transaction->t_checkpoint_io_list; |
224 | bh = jh2bh(jh); | 205 | bh = jh2bh(jh); |
225 | if (!jbd_trylock_bh_state(bh)) { | ||
226 | jbd_sync_bh(journal, bh); | ||
227 | spin_lock(&journal->j_list_lock); | ||
228 | goto restart; | ||
229 | } | ||
230 | get_bh(bh); | 206 | get_bh(bh); |
231 | if (buffer_locked(bh)) { | 207 | if (buffer_locked(bh)) { |
232 | spin_unlock(&journal->j_list_lock); | 208 | spin_unlock(&journal->j_list_lock); |
233 | jbd_unlock_bh_state(bh); | ||
234 | wait_on_buffer(bh); | 209 | wait_on_buffer(bh); |
235 | /* the journal_head may have gone by now */ | 210 | /* the journal_head may have gone by now */ |
236 | BUFFER_TRACE(bh, "brelse"); | 211 | BUFFER_TRACE(bh, "brelse"); |
@@ -246,7 +221,6 @@ restart: | |||
246 | * it has been written out and so we can drop it from the list | 221 | * it has been written out and so we can drop it from the list |
247 | */ | 222 | */ |
248 | released = __jbd2_journal_remove_checkpoint(jh); | 223 | released = __jbd2_journal_remove_checkpoint(jh); |
249 | jbd_unlock_bh_state(bh); | ||
250 | __brelse(bh); | 224 | __brelse(bh); |
251 | } | 225 | } |
252 | 226 | ||
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
266 | 240 | ||
267 | for (i = 0; i < *batch_count; i++) { | 241 | for (i = 0; i < *batch_count; i++) { |
268 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; | 242 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
269 | clear_buffer_jwrite(bh); | ||
270 | BUFFER_TRACE(bh, "brelse"); | 243 | BUFFER_TRACE(bh, "brelse"); |
271 | __brelse(bh); | 244 | __brelse(bh); |
272 | } | 245 | } |
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
281 | * be written out. | 254 | * be written out. |
282 | * | 255 | * |
283 | * Called with j_list_lock held and drops it if 1 is returned | 256 | * Called with j_list_lock held and drops it if 1 is returned |
284 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | ||
285 | */ | 257 | */ |
286 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 258 | static int __process_buffer(journal_t *journal, struct journal_head *jh, |
287 | int *batch_count, transaction_t *transaction) | 259 | int *batch_count, transaction_t *transaction) |
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
292 | if (buffer_locked(bh)) { | 264 | if (buffer_locked(bh)) { |
293 | get_bh(bh); | 265 | get_bh(bh); |
294 | spin_unlock(&journal->j_list_lock); | 266 | spin_unlock(&journal->j_list_lock); |
295 | jbd_unlock_bh_state(bh); | ||
296 | wait_on_buffer(bh); | 267 | wait_on_buffer(bh); |
297 | /* the journal_head may have gone by now */ | 268 | /* the journal_head may have gone by now */ |
298 | BUFFER_TRACE(bh, "brelse"); | 269 | BUFFER_TRACE(bh, "brelse"); |
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
304 | 275 | ||
305 | transaction->t_chp_stats.cs_forced_to_close++; | 276 | transaction->t_chp_stats.cs_forced_to_close++; |
306 | spin_unlock(&journal->j_list_lock); | 277 | spin_unlock(&journal->j_list_lock); |
307 | jbd_unlock_bh_state(bh); | ||
308 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | 278 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) |
309 | /* | 279 | /* |
310 | * The journal thread is dead; so starting and | 280 | * The journal thread is dead; so starting and |
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
323 | if (unlikely(buffer_write_io_error(bh))) | 293 | if (unlikely(buffer_write_io_error(bh))) |
324 | ret = -EIO; | 294 | ret = -EIO; |
325 | get_bh(bh); | 295 | get_bh(bh); |
326 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | ||
327 | BUFFER_TRACE(bh, "remove from checkpoint"); | 296 | BUFFER_TRACE(bh, "remove from checkpoint"); |
328 | __jbd2_journal_remove_checkpoint(jh); | 297 | __jbd2_journal_remove_checkpoint(jh); |
329 | spin_unlock(&journal->j_list_lock); | 298 | spin_unlock(&journal->j_list_lock); |
330 | jbd_unlock_bh_state(bh); | ||
331 | __brelse(bh); | 299 | __brelse(bh); |
332 | } else { | 300 | } else { |
333 | /* | 301 | /* |
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
340 | BUFFER_TRACE(bh, "queue"); | 308 | BUFFER_TRACE(bh, "queue"); |
341 | get_bh(bh); | 309 | get_bh(bh); |
342 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | 310 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); |
343 | set_buffer_jwrite(bh); | ||
344 | journal->j_chkpt_bhs[*batch_count] = bh; | 311 | journal->j_chkpt_bhs[*batch_count] = bh; |
345 | __buffer_relink_io(jh); | 312 | __buffer_relink_io(jh); |
346 | jbd_unlock_bh_state(bh); | ||
347 | transaction->t_chp_stats.cs_written++; | 313 | transaction->t_chp_stats.cs_written++; |
348 | (*batch_count)++; | 314 | (*batch_count)++; |
349 | if (*batch_count == JBD2_NR_BATCH) { | 315 | if (*batch_count == JBD2_NR_BATCH) { |
@@ -407,15 +373,7 @@ restart: | |||
407 | int retry = 0, err; | 373 | int retry = 0, err; |
408 | 374 | ||
409 | while (!retry && transaction->t_checkpoint_list) { | 375 | while (!retry && transaction->t_checkpoint_list) { |
410 | struct buffer_head *bh; | ||
411 | |||
412 | jh = transaction->t_checkpoint_list; | 376 | jh = transaction->t_checkpoint_list; |
413 | bh = jh2bh(jh); | ||
414 | if (!jbd_trylock_bh_state(bh)) { | ||
415 | jbd_sync_bh(journal, bh); | ||
416 | retry = 1; | ||
417 | break; | ||
418 | } | ||
419 | retry = __process_buffer(journal, jh, &batch_count, | 377 | retry = __process_buffer(journal, jh, &batch_count, |
420 | transaction); | 378 | transaction); |
421 | if (retry < 0 && !result) | 379 | if (retry < 0 && !result) |
@@ -478,79 +436,28 @@ out: | |||
478 | 436 | ||
479 | int jbd2_cleanup_journal_tail(journal_t *journal) | 437 | int jbd2_cleanup_journal_tail(journal_t *journal) |
480 | { | 438 | { |
481 | transaction_t * transaction; | ||
482 | tid_t first_tid; | 439 | tid_t first_tid; |
483 | unsigned long blocknr, freed; | 440 | unsigned long blocknr; |
484 | 441 | ||
485 | if (is_journal_aborted(journal)) | 442 | if (is_journal_aborted(journal)) |
486 | return 1; | 443 | return 1; |
487 | 444 | ||
488 | /* OK, work out the oldest transaction remaining in the log, and | 445 | if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) |
489 | * the log block it starts at. | ||
490 | * | ||
491 | * If the log is now empty, we need to work out which is the | ||
492 | * next transaction ID we will write, and where it will | ||
493 | * start. */ | ||
494 | |||
495 | write_lock(&journal->j_state_lock); | ||
496 | spin_lock(&journal->j_list_lock); | ||
497 | transaction = journal->j_checkpoint_transactions; | ||
498 | if (transaction) { | ||
499 | first_tid = transaction->t_tid; | ||
500 | blocknr = transaction->t_log_start; | ||
501 | } else if ((transaction = journal->j_committing_transaction) != NULL) { | ||
502 | first_tid = transaction->t_tid; | ||
503 | blocknr = transaction->t_log_start; | ||
504 | } else if ((transaction = journal->j_running_transaction) != NULL) { | ||
505 | first_tid = transaction->t_tid; | ||
506 | blocknr = journal->j_head; | ||
507 | } else { | ||
508 | first_tid = journal->j_transaction_sequence; | ||
509 | blocknr = journal->j_head; | ||
510 | } | ||
511 | spin_unlock(&journal->j_list_lock); | ||
512 | J_ASSERT(blocknr != 0); | ||
513 | |||
514 | /* If the oldest pinned transaction is at the tail of the log | ||
515 | already then there's not much we can do right now. */ | ||
516 | if (journal->j_tail_sequence == first_tid) { | ||
517 | write_unlock(&journal->j_state_lock); | ||
518 | return 1; | 446 | return 1; |
519 | } | 447 | J_ASSERT(blocknr != 0); |
520 | |||
521 | /* OK, update the superblock to recover the freed space. | ||
522 | * Physical blocks come first: have we wrapped beyond the end of | ||
523 | * the log? */ | ||
524 | freed = blocknr - journal->j_tail; | ||
525 | if (blocknr < journal->j_tail) | ||
526 | freed = freed + journal->j_last - journal->j_first; | ||
527 | |||
528 | trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); | ||
529 | jbd_debug(1, | ||
530 | "Cleaning journal tail from %d to %d (offset %lu), " | ||
531 | "freeing %lu\n", | ||
532 | journal->j_tail_sequence, first_tid, blocknr, freed); | ||
533 | |||
534 | journal->j_free += freed; | ||
535 | journal->j_tail_sequence = first_tid; | ||
536 | journal->j_tail = blocknr; | ||
537 | write_unlock(&journal->j_state_lock); | ||
538 | 448 | ||
539 | /* | 449 | /* |
540 | * If there is an external journal, we need to make sure that | 450 | * We need to make sure that any blocks that were recently written out |
541 | * any data blocks that were recently written out --- perhaps | 451 | * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before |
542 | * by jbd2_log_do_checkpoint() --- are flushed out before we | 452 | * we drop the transactions from the journal. It's unlikely this will |
543 | * drop the transactions from the external journal. It's | 453 | * be necessary, especially with an appropriately sized journal, but we |
544 | * unlikely this will be necessary, especially with a | 454 | * need this to guarantee correctness. Fortunately |
545 | * appropriately sized journal, but we need this to guarantee | 455 | * jbd2_cleanup_journal_tail() doesn't get called all that often. |
546 | * correctness. Fortunately jbd2_cleanup_journal_tail() | ||
547 | * doesn't get called all that often. | ||
548 | */ | 456 | */ |
549 | if ((journal->j_fs_dev != journal->j_dev) && | 457 | if (journal->j_flags & JBD2_BARRIER) |
550 | (journal->j_flags & JBD2_BARRIER)) | ||
551 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 458 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
552 | if (!(journal->j_flags & JBD2_ABORT)) | 459 | |
553 | jbd2_journal_update_superblock(journal, 1); | 460 | __jbd2_update_log_tail(journal, first_tid, blocknr); |
554 | return 0; | 461 | return 0; |
555 | } | 462 | } |
556 | 463 | ||
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
582 | do { | 489 | do { |
583 | jh = next_jh; | 490 | jh = next_jh; |
584 | next_jh = jh->b_cpnext; | 491 | next_jh = jh->b_cpnext; |
585 | /* Use trylock because of the ranking */ | 492 | ret = __try_to_free_cp_buf(jh); |
586 | if (jbd_trylock_bh_state(jh2bh(jh))) { | 493 | if (ret) { |
587 | ret = __try_to_free_cp_buf(jh); | 494 | freed++; |
588 | if (ret) { | 495 | if (ret == 2) { |
589 | freed++; | 496 | *released = 1; |
590 | if (ret == 2) { | 497 | return freed; |
591 | *released = 1; | ||
592 | return freed; | ||
593 | } | ||
594 | } | 498 | } |
595 | } | 499 | } |
596 | /* | 500 | /* |
@@ -673,9 +577,7 @@ out: | |||
673 | * The function can free jh and bh. | 577 | * The function can free jh and bh. |
674 | * | 578 | * |
675 | * This function is called with j_list_lock held. | 579 | * This function is called with j_list_lock held. |
676 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | ||
677 | */ | 580 | */ |
678 | |||
679 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | 581 | int __jbd2_journal_remove_checkpoint(struct journal_head *jh) |
680 | { | 582 | { |
681 | struct transaction_chp_stats_s *stats; | 583 | struct transaction_chp_stats_s *stats; |
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
722 | transaction->t_tid, stats); | 624 | transaction->t_tid, stats); |
723 | 625 | ||
724 | __jbd2_journal_drop_transaction(journal, transaction); | 626 | __jbd2_journal_drop_transaction(journal, transaction); |
725 | kfree(transaction); | 627 | jbd2_journal_free_transaction(transaction); |
726 | 628 | ||
727 | /* Just in case anybody was waiting for more transactions to be | 629 | /* Just in case anybody was waiting for more transactions to be |
728 | checkpointed... */ | 630 | checkpointed... */ |
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
797 | J_ASSERT(journal->j_committing_transaction != transaction); | 699 | J_ASSERT(journal->j_committing_transaction != transaction); |
798 | J_ASSERT(journal->j_running_transaction != transaction); | 700 | J_ASSERT(journal->j_running_transaction != transaction); |
799 | 701 | ||
702 | trace_jbd2_drop_transaction(journal, transaction); | ||
703 | |||
800 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 704 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
801 | } | 705 | } |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index c067a8cae63b..17f557f01cf0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
331 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 331 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
332 | __u32 crc32_sum = ~0; | 332 | __u32 crc32_sum = ~0; |
333 | struct blk_plug plug; | 333 | struct blk_plug plug; |
334 | /* Tail of the journal */ | ||
335 | unsigned long first_block; | ||
336 | tid_t first_tid; | ||
337 | int update_tail; | ||
334 | 338 | ||
335 | /* | 339 | /* |
336 | * First job: lock down the current transaction and wait for | 340 | * First job: lock down the current transaction and wait for |
@@ -340,7 +344,18 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
340 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ | 344 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ |
341 | if (journal->j_flags & JBD2_FLUSHED) { | 345 | if (journal->j_flags & JBD2_FLUSHED) { |
342 | jbd_debug(3, "super block updated\n"); | 346 | jbd_debug(3, "super block updated\n"); |
343 | jbd2_journal_update_superblock(journal, 1); | 347 | mutex_lock(&journal->j_checkpoint_mutex); |
348 | /* | ||
349 | * We hold j_checkpoint_mutex so tail cannot change under us. | ||
350 | * We don't need any special data guarantees for writing sb | ||
351 | * since journal is empty and it is ok for write to be | ||
352 | * flushed only with transaction commit. | ||
353 | */ | ||
354 | jbd2_journal_update_sb_log_tail(journal, | ||
355 | journal->j_tail_sequence, | ||
356 | journal->j_tail, | ||
357 | WRITE_SYNC); | ||
358 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
344 | } else { | 359 | } else { |
345 | jbd_debug(3, "superblock not updated\n"); | 360 | jbd_debug(3, "superblock not updated\n"); |
346 | } | 361 | } |
@@ -677,10 +692,30 @@ start_journal_io: | |||
677 | err = 0; | 692 | err = 0; |
678 | } | 693 | } |
679 | 694 | ||
695 | /* | ||
696 | * Get current oldest transaction in the log before we issue flush | ||
697 | * to the filesystem device. After the flush we can be sure that | ||
698 | * blocks of all older transactions are checkpointed to persistent | ||
699 | * storage and we will be safe to update journal start in the | ||
700 | * superblock with the numbers we get here. | ||
701 | */ | ||
702 | update_tail = | ||
703 | jbd2_journal_get_log_tail(journal, &first_tid, &first_block); | ||
704 | |||
680 | write_lock(&journal->j_state_lock); | 705 | write_lock(&journal->j_state_lock); |
706 | if (update_tail) { | ||
707 | long freed = first_block - journal->j_tail; | ||
708 | |||
709 | if (first_block < journal->j_tail) | ||
710 | freed += journal->j_last - journal->j_first; | ||
711 | /* Update tail only if we free significant amount of space */ | ||
712 | if (freed < journal->j_maxlen / 4) | ||
713 | update_tail = 0; | ||
714 | } | ||
681 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 715 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
682 | commit_transaction->t_state = T_COMMIT_DFLUSH; | 716 | commit_transaction->t_state = T_COMMIT_DFLUSH; |
683 | write_unlock(&journal->j_state_lock); | 717 | write_unlock(&journal->j_state_lock); |
718 | |||
684 | /* | 719 | /* |
685 | * If the journal is not located on the file system device, | 720 | * If the journal is not located on the file system device, |
686 | * then we must flush the file system device before we issue | 721 | * then we must flush the file system device before we issue |
@@ -831,6 +866,14 @@ wait_for_iobuf: | |||
831 | if (err) | 866 | if (err) |
832 | jbd2_journal_abort(journal, err); | 867 | jbd2_journal_abort(journal, err); |
833 | 868 | ||
869 | /* | ||
870 | * Now disk caches for filesystem device are flushed so we are safe to | ||
871 | * erase checkpointed transactions from the log by updating journal | ||
872 | * superblock. | ||
873 | */ | ||
874 | if (update_tail) | ||
875 | jbd2_update_log_tail(journal, first_tid, first_block); | ||
876 | |||
834 | /* End of a transaction! Finally, we can do checkpoint | 877 | /* End of a transaction! Finally, we can do checkpoint |
835 | processing: any buffers committed as a result of this | 878 | processing: any buffers committed as a result of this |
836 | transaction can be removed from any checkpoint list it was on | 879 | transaction can be removed from any checkpoint list it was on |
@@ -1048,7 +1091,7 @@ restart_loop: | |||
1048 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", | 1091 | jbd_debug(1, "JBD2: commit %d complete, head %d\n", |
1049 | journal->j_commit_sequence, journal->j_tail_sequence); | 1092 | journal->j_commit_sequence, journal->j_tail_sequence); |
1050 | if (to_free) | 1093 | if (to_free) |
1051 | kfree(commit_transaction); | 1094 | jbd2_journal_free_transaction(commit_transaction); |
1052 | 1095 | ||
1053 | wake_up(&journal->j_wait_done_commit); | 1096 | wake_up(&journal->j_wait_done_commit); |
1054 | } | 1097 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 839377e3d624..98ed6dbfe381 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -71,7 +71,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke); | |||
71 | 71 | ||
72 | EXPORT_SYMBOL(jbd2_journal_init_dev); | 72 | EXPORT_SYMBOL(jbd2_journal_init_dev); |
73 | EXPORT_SYMBOL(jbd2_journal_init_inode); | 73 | EXPORT_SYMBOL(jbd2_journal_init_inode); |
74 | EXPORT_SYMBOL(jbd2_journal_update_format); | ||
75 | EXPORT_SYMBOL(jbd2_journal_check_used_features); | 74 | EXPORT_SYMBOL(jbd2_journal_check_used_features); |
76 | EXPORT_SYMBOL(jbd2_journal_check_available_features); | 75 | EXPORT_SYMBOL(jbd2_journal_check_available_features); |
77 | EXPORT_SYMBOL(jbd2_journal_set_features); | 76 | EXPORT_SYMBOL(jbd2_journal_set_features); |
@@ -96,7 +95,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); | |||
96 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | 95 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); |
97 | EXPORT_SYMBOL(jbd2_inode_cache); | 96 | EXPORT_SYMBOL(jbd2_inode_cache); |
98 | 97 | ||
99 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | ||
100 | static void __journal_abort_soft (journal_t *journal, int errno); | 98 | static void __journal_abort_soft (journal_t *journal, int errno); |
101 | static int jbd2_journal_create_slab(size_t slab_size); | 99 | static int jbd2_journal_create_slab(size_t slab_size); |
102 | 100 | ||
@@ -746,6 +744,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
746 | return jbd2_journal_add_journal_head(bh); | 744 | return jbd2_journal_add_journal_head(bh); |
747 | } | 745 | } |
748 | 746 | ||
747 | /* | ||
748 | * Return tid of the oldest transaction in the journal and block in the journal | ||
749 | * where the transaction starts. | ||
750 | * | ||
751 | * If the journal is now empty, return which will be the next transaction ID | ||
752 | * we will write and where will that transaction start. | ||
753 | * | ||
754 | * The return value is 0 if journal tail cannot be pushed any further, 1 if | ||
755 | * it can. | ||
756 | */ | ||
757 | int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, | ||
758 | unsigned long *block) | ||
759 | { | ||
760 | transaction_t *transaction; | ||
761 | int ret; | ||
762 | |||
763 | read_lock(&journal->j_state_lock); | ||
764 | spin_lock(&journal->j_list_lock); | ||
765 | transaction = journal->j_checkpoint_transactions; | ||
766 | if (transaction) { | ||
767 | *tid = transaction->t_tid; | ||
768 | *block = transaction->t_log_start; | ||
769 | } else if ((transaction = journal->j_committing_transaction) != NULL) { | ||
770 | *tid = transaction->t_tid; | ||
771 | *block = transaction->t_log_start; | ||
772 | } else if ((transaction = journal->j_running_transaction) != NULL) { | ||
773 | *tid = transaction->t_tid; | ||
774 | *block = journal->j_head; | ||
775 | } else { | ||
776 | *tid = journal->j_transaction_sequence; | ||
777 | *block = journal->j_head; | ||
778 | } | ||
779 | ret = tid_gt(*tid, journal->j_tail_sequence); | ||
780 | spin_unlock(&journal->j_list_lock); | ||
781 | read_unlock(&journal->j_state_lock); | ||
782 | |||
783 | return ret; | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * Update information in journal structure and in on disk journal superblock | ||
788 | * about log tail. This function does not check whether information passed in | ||
789 | * really pushes log tail further. It's responsibility of the caller to make | ||
790 | * sure provided log tail information is valid (e.g. by holding | ||
791 | * j_checkpoint_mutex all the time between computing log tail and calling this | ||
792 | * function as is the case with jbd2_cleanup_journal_tail()). | ||
793 | * | ||
794 | * Requires j_checkpoint_mutex | ||
795 | */ | ||
796 | void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) | ||
797 | { | ||
798 | unsigned long freed; | ||
799 | |||
800 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
801 | |||
802 | /* | ||
803 | * We cannot afford for write to remain in drive's caches since as | ||
804 | * soon as we update j_tail, next transaction can start reusing journal | ||
805 | * space and if we lose sb update during power failure we'd replay | ||
806 | * old transaction with possibly newly overwritten data. | ||
807 | */ | ||
808 | jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); | ||
809 | write_lock(&journal->j_state_lock); | ||
810 | freed = block - journal->j_tail; | ||
811 | if (block < journal->j_tail) | ||
812 | freed += journal->j_last - journal->j_first; | ||
813 | |||
814 | trace_jbd2_update_log_tail(journal, tid, block, freed); | ||
815 | jbd_debug(1, | ||
816 | "Cleaning journal tail from %d to %d (offset %lu), " | ||
817 | "freeing %lu\n", | ||
818 | journal->j_tail_sequence, tid, block, freed); | ||
819 | |||
820 | journal->j_free += freed; | ||
821 | journal->j_tail_sequence = tid; | ||
822 | journal->j_tail = block; | ||
823 | write_unlock(&journal->j_state_lock); | ||
824 | } | ||
825 | |||
826 | /* | ||
827 | * This is a variaon of __jbd2_update_log_tail which checks for validity of | ||
828 | * provided log tail and locks j_checkpoint_mutex. So it is safe against races | ||
829 | * with other threads updating log tail. | ||
830 | */ | ||
831 | void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) | ||
832 | { | ||
833 | mutex_lock(&journal->j_checkpoint_mutex); | ||
834 | if (tid_gt(tid, journal->j_tail_sequence)) | ||
835 | __jbd2_update_log_tail(journal, tid, block); | ||
836 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
837 | } | ||
838 | |||
749 | struct jbd2_stats_proc_session { | 839 | struct jbd2_stats_proc_session { |
750 | journal_t *journal; | 840 | journal_t *journal; |
751 | struct transaction_stats_s *stats; | 841 | struct transaction_stats_s *stats; |
@@ -1114,40 +1204,45 @@ static int journal_reset(journal_t *journal) | |||
1114 | 1204 | ||
1115 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; | 1205 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; |
1116 | 1206 | ||
1117 | /* Add the dynamic fields and write it to disk. */ | ||
1118 | jbd2_journal_update_superblock(journal, 1); | ||
1119 | return jbd2_journal_start_thread(journal); | ||
1120 | } | ||
1121 | |||
1122 | /** | ||
1123 | * void jbd2_journal_update_superblock() - Update journal sb on disk. | ||
1124 | * @journal: The journal to update. | ||
1125 | * @wait: Set to '0' if you don't want to wait for IO completion. | ||
1126 | * | ||
1127 | * Update a journal's dynamic superblock fields and write it to disk, | ||
1128 | * optionally waiting for the IO to complete. | ||
1129 | */ | ||
1130 | void jbd2_journal_update_superblock(journal_t *journal, int wait) | ||
1131 | { | ||
1132 | journal_superblock_t *sb = journal->j_superblock; | ||
1133 | struct buffer_head *bh = journal->j_sb_buffer; | ||
1134 | |||
1135 | /* | 1207 | /* |
1136 | * As a special case, if the on-disk copy is already marked as needing | 1208 | * As a special case, if the on-disk copy is already marked as needing |
1137 | * no recovery (s_start == 0) and there are no outstanding transactions | 1209 | * no recovery (s_start == 0), then we can safely defer the superblock |
1138 | * in the filesystem, then we can safely defer the superblock update | 1210 | * update until the next commit by setting JBD2_FLUSHED. This avoids |
1139 | * until the next commit by setting JBD2_FLUSHED. This avoids | ||
1140 | * attempting a write to a potential-readonly device. | 1211 | * attempting a write to a potential-readonly device. |
1141 | */ | 1212 | */ |
1142 | if (sb->s_start == 0 && journal->j_tail_sequence == | 1213 | if (sb->s_start == 0) { |
1143 | journal->j_transaction_sequence) { | ||
1144 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " | 1214 | jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " |
1145 | "(start %ld, seq %d, errno %d)\n", | 1215 | "(start %ld, seq %d, errno %d)\n", |
1146 | journal->j_tail, journal->j_tail_sequence, | 1216 | journal->j_tail, journal->j_tail_sequence, |
1147 | journal->j_errno); | 1217 | journal->j_errno); |
1148 | goto out; | 1218 | journal->j_flags |= JBD2_FLUSHED; |
1219 | } else { | ||
1220 | /* Lock here to make assertions happy... */ | ||
1221 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1222 | /* | ||
1223 | * Update log tail information. We use WRITE_FUA since new | ||
1224 | * transaction will start reusing journal space and so we | ||
1225 | * must make sure information about current log tail is on | ||
1226 | * disk before that. | ||
1227 | */ | ||
1228 | jbd2_journal_update_sb_log_tail(journal, | ||
1229 | journal->j_tail_sequence, | ||
1230 | journal->j_tail, | ||
1231 | WRITE_FUA); | ||
1232 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1149 | } | 1233 | } |
1234 | return jbd2_journal_start_thread(journal); | ||
1235 | } | ||
1150 | 1236 | ||
1237 | static void jbd2_write_superblock(journal_t *journal, int write_op) | ||
1238 | { | ||
1239 | struct buffer_head *bh = journal->j_sb_buffer; | ||
1240 | int ret; | ||
1241 | |||
1242 | trace_jbd2_write_superblock(journal, write_op); | ||
1243 | if (!(journal->j_flags & JBD2_BARRIER)) | ||
1244 | write_op &= ~(REQ_FUA | REQ_FLUSH); | ||
1245 | lock_buffer(bh); | ||
1151 | if (buffer_write_io_error(bh)) { | 1246 | if (buffer_write_io_error(bh)) { |
1152 | /* | 1247 | /* |
1153 | * Oh, dear. A previous attempt to write the journal | 1248 | * Oh, dear. A previous attempt to write the journal |
@@ -1163,48 +1258,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1163 | clear_buffer_write_io_error(bh); | 1258 | clear_buffer_write_io_error(bh); |
1164 | set_buffer_uptodate(bh); | 1259 | set_buffer_uptodate(bh); |
1165 | } | 1260 | } |
1261 | get_bh(bh); | ||
1262 | bh->b_end_io = end_buffer_write_sync; | ||
1263 | ret = submit_bh(write_op, bh); | ||
1264 | wait_on_buffer(bh); | ||
1265 | if (buffer_write_io_error(bh)) { | ||
1266 | clear_buffer_write_io_error(bh); | ||
1267 | set_buffer_uptodate(bh); | ||
1268 | ret = -EIO; | ||
1269 | } | ||
1270 | if (ret) { | ||
1271 | printk(KERN_ERR "JBD2: Error %d detected when updating " | ||
1272 | "journal superblock for %s.\n", ret, | ||
1273 | journal->j_devname); | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | /** | ||
1278 | * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. | ||
1279 | * @journal: The journal to update. | ||
1280 | * @tail_tid: TID of the new transaction at the tail of the log | ||
1281 | * @tail_block: The first block of the transaction at the tail of the log | ||
1282 | * @write_op: With which operation should we write the journal sb | ||
1283 | * | ||
1284 | * Update a journal's superblock information about log tail and write it to | ||
1285 | * disk, waiting for the IO to complete. | ||
1286 | */ | ||
1287 | void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, | ||
1288 | unsigned long tail_block, int write_op) | ||
1289 | { | ||
1290 | journal_superblock_t *sb = journal->j_superblock; | ||
1291 | |||
1292 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1293 | jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", | ||
1294 | tail_block, tail_tid); | ||
1295 | |||
1296 | sb->s_sequence = cpu_to_be32(tail_tid); | ||
1297 | sb->s_start = cpu_to_be32(tail_block); | ||
1298 | |||
1299 | jbd2_write_superblock(journal, write_op); | ||
1300 | |||
1301 | /* Log is no longer empty */ | ||
1302 | write_lock(&journal->j_state_lock); | ||
1303 | WARN_ON(!sb->s_sequence); | ||
1304 | journal->j_flags &= ~JBD2_FLUSHED; | ||
1305 | write_unlock(&journal->j_state_lock); | ||
1306 | } | ||
1307 | |||
1308 | /** | ||
1309 | * jbd2_mark_journal_empty() - Mark on disk journal as empty. | ||
1310 | * @journal: The journal to update. | ||
1311 | * | ||
1312 | * Update a journal's dynamic superblock fields to show that journal is empty. | ||
1313 | * Write updated superblock to disk waiting for IO to complete. | ||
1314 | */ | ||
1315 | static void jbd2_mark_journal_empty(journal_t *journal) | ||
1316 | { | ||
1317 | journal_superblock_t *sb = journal->j_superblock; | ||
1166 | 1318 | ||
1319 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1167 | read_lock(&journal->j_state_lock); | 1320 | read_lock(&journal->j_state_lock); |
1168 | jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", | 1321 | jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", |
1169 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1322 | journal->j_tail_sequence); |
1170 | 1323 | ||
1171 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1324 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
1172 | sb->s_start = cpu_to_be32(journal->j_tail); | 1325 | sb->s_start = cpu_to_be32(0); |
1173 | sb->s_errno = cpu_to_be32(journal->j_errno); | ||
1174 | read_unlock(&journal->j_state_lock); | 1326 | read_unlock(&journal->j_state_lock); |
1175 | 1327 | ||
1176 | BUFFER_TRACE(bh, "marking dirty"); | 1328 | jbd2_write_superblock(journal, WRITE_FUA); |
1177 | mark_buffer_dirty(bh); | ||
1178 | if (wait) { | ||
1179 | sync_dirty_buffer(bh); | ||
1180 | if (buffer_write_io_error(bh)) { | ||
1181 | printk(KERN_ERR "JBD2: I/O error detected " | ||
1182 | "when updating journal superblock for %s.\n", | ||
1183 | journal->j_devname); | ||
1184 | clear_buffer_write_io_error(bh); | ||
1185 | set_buffer_uptodate(bh); | ||
1186 | } | ||
1187 | } else | ||
1188 | write_dirty_buffer(bh, WRITE); | ||
1189 | |||
1190 | out: | ||
1191 | /* If we have just flushed the log (by marking s_start==0), then | ||
1192 | * any future commit will have to be careful to update the | ||
1193 | * superblock again to re-record the true start of the log. */ | ||
1194 | 1329 | ||
1330 | /* Log is no longer empty */ | ||
1195 | write_lock(&journal->j_state_lock); | 1331 | write_lock(&journal->j_state_lock); |
1196 | if (sb->s_start) | 1332 | journal->j_flags |= JBD2_FLUSHED; |
1197 | journal->j_flags &= ~JBD2_FLUSHED; | ||
1198 | else | ||
1199 | journal->j_flags |= JBD2_FLUSHED; | ||
1200 | write_unlock(&journal->j_state_lock); | 1333 | write_unlock(&journal->j_state_lock); |
1201 | } | 1334 | } |
1202 | 1335 | ||
1336 | |||
1337 | /** | ||
1338 | * jbd2_journal_update_sb_errno() - Update error in the journal. | ||
1339 | * @journal: The journal to update. | ||
1340 | * | ||
1341 | * Update a journal's errno. Write updated superblock to disk waiting for IO | ||
1342 | * to complete. | ||
1343 | */ | ||
1344 | static void jbd2_journal_update_sb_errno(journal_t *journal) | ||
1345 | { | ||
1346 | journal_superblock_t *sb = journal->j_superblock; | ||
1347 | |||
1348 | read_lock(&journal->j_state_lock); | ||
1349 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | ||
1350 | journal->j_errno); | ||
1351 | sb->s_errno = cpu_to_be32(journal->j_errno); | ||
1352 | read_unlock(&journal->j_state_lock); | ||
1353 | |||
1354 | jbd2_write_superblock(journal, WRITE_SYNC); | ||
1355 | } | ||
1356 | |||
1203 | /* | 1357 | /* |
1204 | * Read the superblock for a given journal, performing initial | 1358 | * Read the superblock for a given journal, performing initial |
1205 | * validation of the format. | 1359 | * validation of the format. |
1206 | */ | 1360 | */ |
1207 | |||
1208 | static int journal_get_superblock(journal_t *journal) | 1361 | static int journal_get_superblock(journal_t *journal) |
1209 | { | 1362 | { |
1210 | struct buffer_head *bh; | 1363 | struct buffer_head *bh; |
@@ -1398,14 +1551,11 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1398 | 1551 | ||
1399 | if (journal->j_sb_buffer) { | 1552 | if (journal->j_sb_buffer) { |
1400 | if (!is_journal_aborted(journal)) { | 1553 | if (!is_journal_aborted(journal)) { |
1401 | /* We can now mark the journal as empty. */ | 1554 | mutex_lock(&journal->j_checkpoint_mutex); |
1402 | journal->j_tail = 0; | 1555 | jbd2_mark_journal_empty(journal); |
1403 | journal->j_tail_sequence = | 1556 | mutex_unlock(&journal->j_checkpoint_mutex); |
1404 | ++journal->j_transaction_sequence; | 1557 | } else |
1405 | jbd2_journal_update_superblock(journal, 1); | ||
1406 | } else { | ||
1407 | err = -EIO; | 1558 | err = -EIO; |
1408 | } | ||
1409 | brelse(journal->j_sb_buffer); | 1559 | brelse(journal->j_sb_buffer); |
1410 | } | 1560 | } |
1411 | 1561 | ||
@@ -1552,61 +1702,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, | |||
1552 | EXPORT_SYMBOL(jbd2_journal_clear_features); | 1702 | EXPORT_SYMBOL(jbd2_journal_clear_features); |
1553 | 1703 | ||
1554 | /** | 1704 | /** |
1555 | * int jbd2_journal_update_format () - Update on-disk journal structure. | ||
1556 | * @journal: Journal to act on. | ||
1557 | * | ||
1558 | * Given an initialised but unloaded journal struct, poke about in the | ||
1559 | * on-disk structure to update it to the most recent supported version. | ||
1560 | */ | ||
1561 | int jbd2_journal_update_format (journal_t *journal) | ||
1562 | { | ||
1563 | journal_superblock_t *sb; | ||
1564 | int err; | ||
1565 | |||
1566 | err = journal_get_superblock(journal); | ||
1567 | if (err) | ||
1568 | return err; | ||
1569 | |||
1570 | sb = journal->j_superblock; | ||
1571 | |||
1572 | switch (be32_to_cpu(sb->s_header.h_blocktype)) { | ||
1573 | case JBD2_SUPERBLOCK_V2: | ||
1574 | return 0; | ||
1575 | case JBD2_SUPERBLOCK_V1: | ||
1576 | return journal_convert_superblock_v1(journal, sb); | ||
1577 | default: | ||
1578 | break; | ||
1579 | } | ||
1580 | return -EINVAL; | ||
1581 | } | ||
1582 | |||
1583 | static int journal_convert_superblock_v1(journal_t *journal, | ||
1584 | journal_superblock_t *sb) | ||
1585 | { | ||
1586 | int offset, blocksize; | ||
1587 | struct buffer_head *bh; | ||
1588 | |||
1589 | printk(KERN_WARNING | ||
1590 | "JBD2: Converting superblock from version 1 to 2.\n"); | ||
1591 | |||
1592 | /* Pre-initialise new fields to zero */ | ||
1593 | offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); | ||
1594 | blocksize = be32_to_cpu(sb->s_blocksize); | ||
1595 | memset(&sb->s_feature_compat, 0, blocksize-offset); | ||
1596 | |||
1597 | sb->s_nr_users = cpu_to_be32(1); | ||
1598 | sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); | ||
1599 | journal->j_format_version = 2; | ||
1600 | |||
1601 | bh = journal->j_sb_buffer; | ||
1602 | BUFFER_TRACE(bh, "marking dirty"); | ||
1603 | mark_buffer_dirty(bh); | ||
1604 | sync_dirty_buffer(bh); | ||
1605 | return 0; | ||
1606 | } | ||
1607 | |||
1608 | |||
1609 | /** | ||
1610 | * int jbd2_journal_flush () - Flush journal | 1705 | * int jbd2_journal_flush () - Flush journal |
1611 | * @journal: Journal to act on. | 1706 | * @journal: Journal to act on. |
1612 | * | 1707 | * |
@@ -1619,7 +1714,6 @@ int jbd2_journal_flush(journal_t *journal) | |||
1619 | { | 1714 | { |
1620 | int err = 0; | 1715 | int err = 0; |
1621 | transaction_t *transaction = NULL; | 1716 | transaction_t *transaction = NULL; |
1622 | unsigned long old_tail; | ||
1623 | 1717 | ||
1624 | write_lock(&journal->j_state_lock); | 1718 | write_lock(&journal->j_state_lock); |
1625 | 1719 | ||
@@ -1654,6 +1748,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1654 | if (is_journal_aborted(journal)) | 1748 | if (is_journal_aborted(journal)) |
1655 | return -EIO; | 1749 | return -EIO; |
1656 | 1750 | ||
1751 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1657 | jbd2_cleanup_journal_tail(journal); | 1752 | jbd2_cleanup_journal_tail(journal); |
1658 | 1753 | ||
1659 | /* Finally, mark the journal as really needing no recovery. | 1754 | /* Finally, mark the journal as really needing no recovery. |
@@ -1661,14 +1756,9 @@ int jbd2_journal_flush(journal_t *journal) | |||
1661 | * the magic code for a fully-recovered superblock. Any future | 1756 | * the magic code for a fully-recovered superblock. Any future |
1662 | * commits of data to the journal will restore the current | 1757 | * commits of data to the journal will restore the current |
1663 | * s_start value. */ | 1758 | * s_start value. */ |
1759 | jbd2_mark_journal_empty(journal); | ||
1760 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1664 | write_lock(&journal->j_state_lock); | 1761 | write_lock(&journal->j_state_lock); |
1665 | old_tail = journal->j_tail; | ||
1666 | journal->j_tail = 0; | ||
1667 | write_unlock(&journal->j_state_lock); | ||
1668 | jbd2_journal_update_superblock(journal, 1); | ||
1669 | write_lock(&journal->j_state_lock); | ||
1670 | journal->j_tail = old_tail; | ||
1671 | |||
1672 | J_ASSERT(!journal->j_running_transaction); | 1762 | J_ASSERT(!journal->j_running_transaction); |
1673 | J_ASSERT(!journal->j_committing_transaction); | 1763 | J_ASSERT(!journal->j_committing_transaction); |
1674 | J_ASSERT(!journal->j_checkpoint_transactions); | 1764 | J_ASSERT(!journal->j_checkpoint_transactions); |
@@ -1708,8 +1798,12 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1708 | write ? "Clearing" : "Ignoring"); | 1798 | write ? "Clearing" : "Ignoring"); |
1709 | 1799 | ||
1710 | err = jbd2_journal_skip_recovery(journal); | 1800 | err = jbd2_journal_skip_recovery(journal); |
1711 | if (write) | 1801 | if (write) { |
1712 | jbd2_journal_update_superblock(journal, 1); | 1802 | /* Lock to make assertions happy... */ |
1803 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1804 | jbd2_mark_journal_empty(journal); | ||
1805 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1806 | } | ||
1713 | 1807 | ||
1714 | no_recovery: | 1808 | no_recovery: |
1715 | return err; | 1809 | return err; |
@@ -1759,7 +1853,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) | |||
1759 | __jbd2_journal_abort_hard(journal); | 1853 | __jbd2_journal_abort_hard(journal); |
1760 | 1854 | ||
1761 | if (errno) | 1855 | if (errno) |
1762 | jbd2_journal_update_superblock(journal, 1); | 1856 | jbd2_journal_update_sb_errno(journal); |
1763 | } | 1857 | } |
1764 | 1858 | ||
1765 | /** | 1859 | /** |
@@ -2017,7 +2111,7 @@ static struct kmem_cache *jbd2_journal_head_cache; | |||
2017 | static atomic_t nr_journal_heads = ATOMIC_INIT(0); | 2111 | static atomic_t nr_journal_heads = ATOMIC_INIT(0); |
2018 | #endif | 2112 | #endif |
2019 | 2113 | ||
2020 | static int journal_init_jbd2_journal_head_cache(void) | 2114 | static int jbd2_journal_init_journal_head_cache(void) |
2021 | { | 2115 | { |
2022 | int retval; | 2116 | int retval; |
2023 | 2117 | ||
@@ -2035,7 +2129,7 @@ static int journal_init_jbd2_journal_head_cache(void) | |||
2035 | return retval; | 2129 | return retval; |
2036 | } | 2130 | } |
2037 | 2131 | ||
2038 | static void jbd2_journal_destroy_jbd2_journal_head_cache(void) | 2132 | static void jbd2_journal_destroy_journal_head_cache(void) |
2039 | { | 2133 | { |
2040 | if (jbd2_journal_head_cache) { | 2134 | if (jbd2_journal_head_cache) { |
2041 | kmem_cache_destroy(jbd2_journal_head_cache); | 2135 | kmem_cache_destroy(jbd2_journal_head_cache); |
@@ -2323,7 +2417,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void) | |||
2323 | 2417 | ||
2324 | struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; | 2418 | struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; |
2325 | 2419 | ||
2326 | static int __init journal_init_handle_cache(void) | 2420 | static int __init jbd2_journal_init_handle_cache(void) |
2327 | { | 2421 | { |
2328 | jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); | 2422 | jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); |
2329 | if (jbd2_handle_cache == NULL) { | 2423 | if (jbd2_handle_cache == NULL) { |
@@ -2358,17 +2452,20 @@ static int __init journal_init_caches(void) | |||
2358 | 2452 | ||
2359 | ret = jbd2_journal_init_revoke_caches(); | 2453 | ret = jbd2_journal_init_revoke_caches(); |
2360 | if (ret == 0) | 2454 | if (ret == 0) |
2361 | ret = journal_init_jbd2_journal_head_cache(); | 2455 | ret = jbd2_journal_init_journal_head_cache(); |
2456 | if (ret == 0) | ||
2457 | ret = jbd2_journal_init_handle_cache(); | ||
2362 | if (ret == 0) | 2458 | if (ret == 0) |
2363 | ret = journal_init_handle_cache(); | 2459 | ret = jbd2_journal_init_transaction_cache(); |
2364 | return ret; | 2460 | return ret; |
2365 | } | 2461 | } |
2366 | 2462 | ||
2367 | static void jbd2_journal_destroy_caches(void) | 2463 | static void jbd2_journal_destroy_caches(void) |
2368 | { | 2464 | { |
2369 | jbd2_journal_destroy_revoke_caches(); | 2465 | jbd2_journal_destroy_revoke_caches(); |
2370 | jbd2_journal_destroy_jbd2_journal_head_cache(); | 2466 | jbd2_journal_destroy_journal_head_cache(); |
2371 | jbd2_journal_destroy_handle_cache(); | 2467 | jbd2_journal_destroy_handle_cache(); |
2468 | jbd2_journal_destroy_transaction_cache(); | ||
2372 | jbd2_journal_destroy_slabs(); | 2469 | jbd2_journal_destroy_slabs(); |
2373 | } | 2470 | } |
2374 | 2471 | ||
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index da6d7baf1390..c1a03354a22f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/jbd2.h> | 21 | #include <linux/jbd2.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/crc32.h> | 23 | #include <linux/crc32.h> |
24 | #include <linux/blkdev.h> | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal) | |||
265 | err2 = sync_blockdev(journal->j_fs_dev); | 266 | err2 = sync_blockdev(journal->j_fs_dev); |
266 | if (!err) | 267 | if (!err) |
267 | err = err2; | 268 | err = err2; |
268 | 269 | /* Make sure all replayed data is on permanent storage */ | |
270 | if (journal->j_flags & JBD2_BARRIER) | ||
271 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | ||
269 | return err; | 272 | return err; |
270 | } | 273 | } |
271 | 274 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 30b2867d6cc9..6973705d6a3d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void) | |||
208 | J_ASSERT(!jbd2_revoke_record_cache); | 208 | J_ASSERT(!jbd2_revoke_record_cache); |
209 | J_ASSERT(!jbd2_revoke_table_cache); | 209 | J_ASSERT(!jbd2_revoke_table_cache); |
210 | 210 | ||
211 | jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", | 211 | jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, |
212 | sizeof(struct jbd2_revoke_record_s), | 212 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); |
213 | 0, | ||
214 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | ||
215 | NULL); | ||
216 | if (!jbd2_revoke_record_cache) | 213 | if (!jbd2_revoke_record_cache) |
217 | goto record_cache_failure; | 214 | goto record_cache_failure; |
218 | 215 | ||
219 | jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", | 216 | jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, |
220 | sizeof(struct jbd2_revoke_table_s), | 217 | SLAB_TEMPORARY); |
221 | 0, SLAB_TEMPORARY, NULL); | ||
222 | if (!jbd2_revoke_table_cache) | 218 | if (!jbd2_revoke_table_cache) |
223 | goto table_cache_failure; | 219 | goto table_cache_failure; |
224 | return 0; | 220 | return 0; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5aba56e1fd5..ddcd3549c6c2 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -33,6 +33,35 @@ | |||
33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 33 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
34 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); | 34 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); |
35 | 35 | ||
36 | static struct kmem_cache *transaction_cache; | ||
37 | int __init jbd2_journal_init_transaction_cache(void) | ||
38 | { | ||
39 | J_ASSERT(!transaction_cache); | ||
40 | transaction_cache = kmem_cache_create("jbd2_transaction_s", | ||
41 | sizeof(transaction_t), | ||
42 | 0, | ||
43 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | ||
44 | NULL); | ||
45 | if (transaction_cache) | ||
46 | return 0; | ||
47 | return -ENOMEM; | ||
48 | } | ||
49 | |||
50 | void jbd2_journal_destroy_transaction_cache(void) | ||
51 | { | ||
52 | if (transaction_cache) { | ||
53 | kmem_cache_destroy(transaction_cache); | ||
54 | transaction_cache = NULL; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | void jbd2_journal_free_transaction(transaction_t *transaction) | ||
59 | { | ||
60 | if (unlikely(ZERO_OR_NULL_PTR(transaction))) | ||
61 | return; | ||
62 | kmem_cache_free(transaction_cache, transaction); | ||
63 | } | ||
64 | |||
36 | /* | 65 | /* |
37 | * jbd2_get_transaction: obtain a new transaction_t object. | 66 | * jbd2_get_transaction: obtain a new transaction_t object. |
38 | * | 67 | * |
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
133 | 162 | ||
134 | alloc_transaction: | 163 | alloc_transaction: |
135 | if (!journal->j_running_transaction) { | 164 | if (!journal->j_running_transaction) { |
136 | new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); | 165 | new_transaction = kmem_cache_alloc(transaction_cache, |
166 | gfp_mask | __GFP_ZERO); | ||
137 | if (!new_transaction) { | 167 | if (!new_transaction) { |
138 | /* | 168 | /* |
139 | * If __GFP_FS is not present, then we may be | 169 | * If __GFP_FS is not present, then we may be |
@@ -162,7 +192,7 @@ repeat: | |||
162 | if (is_journal_aborted(journal) || | 192 | if (is_journal_aborted(journal) || |
163 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { | 193 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { |
164 | read_unlock(&journal->j_state_lock); | 194 | read_unlock(&journal->j_state_lock); |
165 | kfree(new_transaction); | 195 | jbd2_journal_free_transaction(new_transaction); |
166 | return -EROFS; | 196 | return -EROFS; |
167 | } | 197 | } |
168 | 198 | ||
@@ -284,7 +314,7 @@ repeat: | |||
284 | read_unlock(&journal->j_state_lock); | 314 | read_unlock(&journal->j_state_lock); |
285 | 315 | ||
286 | lock_map_acquire(&handle->h_lockdep_map); | 316 | lock_map_acquire(&handle->h_lockdep_map); |
287 | kfree(new_transaction); | 317 | jbd2_journal_free_transaction(new_transaction); |
288 | return 0; | 318 | return 0; |
289 | } | 319 | } |
290 | 320 | ||
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
1549 | * of these pointers, it could go bad. Generally the caller needs to re-read | 1579 | * of these pointers, it could go bad. Generally the caller needs to re-read |
1550 | * the pointer from the transaction_t. | 1580 | * the pointer from the transaction_t. |
1551 | * | 1581 | * |
1552 | * Called under j_list_lock. The journal may not be locked. | 1582 | * Called under j_list_lock. |
1553 | */ | 1583 | */ |
1554 | void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | 1584 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) |
1555 | { | 1585 | { |
1556 | struct journal_head **list = NULL; | 1586 | struct journal_head **list = NULL; |
1557 | transaction_t *transaction; | 1587 | transaction_t *transaction; |
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1646 | spin_lock(&journal->j_list_lock); | 1676 | spin_lock(&journal->j_list_lock); |
1647 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1677 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1648 | /* written-back checkpointed metadata buffer */ | 1678 | /* written-back checkpointed metadata buffer */ |
1649 | if (jh->b_jlist == BJ_None) { | 1679 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1650 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1680 | __jbd2_journal_remove_checkpoint(jh); |
1651 | __jbd2_journal_remove_checkpoint(jh); | ||
1652 | } | ||
1653 | } | 1681 | } |
1654 | spin_unlock(&journal->j_list_lock); | 1682 | spin_unlock(&journal->j_list_lock); |
1655 | out: | 1683 | out: |
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked: | |||
1949 | clear_buffer_mapped(bh); | 1977 | clear_buffer_mapped(bh); |
1950 | clear_buffer_req(bh); | 1978 | clear_buffer_req(bh); |
1951 | clear_buffer_new(bh); | 1979 | clear_buffer_new(bh); |
1980 | clear_buffer_delay(bh); | ||
1981 | clear_buffer_unwritten(bh); | ||
1952 | bh->b_bdev = NULL; | 1982 | bh->b_bdev = NULL; |
1953 | return may_free; | 1983 | return may_free; |
1954 | } | 1984 | } |
diff --git a/include/linux/fs.h b/include/linux/fs.h index fa63f1b46103..c437f914d537 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1872,19 +1872,6 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *, | |||
1872 | const struct dentry_operations *dops, | 1872 | const struct dentry_operations *dops, |
1873 | unsigned long); | 1873 | unsigned long); |
1874 | 1874 | ||
1875 | static inline void sb_mark_dirty(struct super_block *sb) | ||
1876 | { | ||
1877 | sb->s_dirt = 1; | ||
1878 | } | ||
1879 | static inline void sb_mark_clean(struct super_block *sb) | ||
1880 | { | ||
1881 | sb->s_dirt = 0; | ||
1882 | } | ||
1883 | static inline int sb_is_dirty(struct super_block *sb) | ||
1884 | { | ||
1885 | return sb->s_dirt; | ||
1886 | } | ||
1887 | |||
1888 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ | 1875 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ |
1889 | #define fops_get(fops) \ | 1876 | #define fops_get(fops) \ |
1890 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) | 1877 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5557baefed60..912c30a8ddb1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -971,6 +971,10 @@ extern void __journal_clean_data_list(transaction_t *transaction); | |||
971 | /* Log buffer allocation */ | 971 | /* Log buffer allocation */ |
972 | extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); | 972 | extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); |
973 | int jbd2_journal_next_log_block(journal_t *, unsigned long long *); | 973 | int jbd2_journal_next_log_block(journal_t *, unsigned long long *); |
974 | int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, | ||
975 | unsigned long *block); | ||
976 | void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); | ||
977 | void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); | ||
974 | 978 | ||
975 | /* Commit management */ | 979 | /* Commit management */ |
976 | extern void jbd2_journal_commit_transaction(journal_t *); | 980 | extern void jbd2_journal_commit_transaction(journal_t *); |
@@ -1020,6 +1024,11 @@ jbd2_journal_write_metadata_buffer(transaction_t *transaction, | |||
1020 | /* Transaction locking */ | 1024 | /* Transaction locking */ |
1021 | extern void __wait_on_journal (journal_t *); | 1025 | extern void __wait_on_journal (journal_t *); |
1022 | 1026 | ||
1027 | /* Transaction cache support */ | ||
1028 | extern void jbd2_journal_destroy_transaction_cache(void); | ||
1029 | extern int jbd2_journal_init_transaction_cache(void); | ||
1030 | extern void jbd2_journal_free_transaction(transaction_t *); | ||
1031 | |||
1023 | /* | 1032 | /* |
1024 | * Journal locking. | 1033 | * Journal locking. |
1025 | * | 1034 | * |
@@ -1082,7 +1091,8 @@ extern int jbd2_journal_destroy (journal_t *); | |||
1082 | extern int jbd2_journal_recover (journal_t *journal); | 1091 | extern int jbd2_journal_recover (journal_t *journal); |
1083 | extern int jbd2_journal_wipe (journal_t *, int); | 1092 | extern int jbd2_journal_wipe (journal_t *, int); |
1084 | extern int jbd2_journal_skip_recovery (journal_t *); | 1093 | extern int jbd2_journal_skip_recovery (journal_t *); |
1085 | extern void jbd2_journal_update_superblock (journal_t *, int); | 1094 | extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, |
1095 | unsigned long, int); | ||
1086 | extern void __jbd2_journal_abort_hard (journal_t *); | 1096 | extern void __jbd2_journal_abort_hard (journal_t *); |
1087 | extern void jbd2_journal_abort (journal_t *, int); | 1097 | extern void jbd2_journal_abort (journal_t *, int); |
1088 | extern int jbd2_journal_errno (journal_t *); | 1098 | extern int jbd2_journal_errno (journal_t *); |
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h index 423cb6d78ee0..c18b46f8aeeb 100644 --- a/include/linux/journal-head.h +++ b/include/linux/journal-head.h | |||
@@ -66,6 +66,8 @@ struct journal_head { | |||
66 | * transaction (if there is one). Only applies to buffers on a | 66 | * transaction (if there is one). Only applies to buffers on a |
67 | * transaction's data or metadata journaling list. | 67 | * transaction's data or metadata journaling list. |
68 | * [j_list_lock] [jbd_lock_bh_state()] | 68 | * [j_list_lock] [jbd_lock_bh_state()] |
69 | * Either of these locks is enough for reading, both are needed for | ||
70 | * changes. | ||
69 | */ | 71 | */ |
70 | transaction_t *b_transaction; | 72 | transaction_t *b_transaction; |
71 | 73 | ||
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 75964412ddbb..127993dbf322 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h | |||
@@ -81,6 +81,13 @@ DEFINE_EVENT(jbd2_commit, jbd2_commit_logging, | |||
81 | TP_ARGS(journal, commit_transaction) | 81 | TP_ARGS(journal, commit_transaction) |
82 | ); | 82 | ); |
83 | 83 | ||
84 | DEFINE_EVENT(jbd2_commit, jbd2_drop_transaction, | ||
85 | |||
86 | TP_PROTO(journal_t *journal, transaction_t *commit_transaction), | ||
87 | |||
88 | TP_ARGS(journal, commit_transaction) | ||
89 | ); | ||
90 | |||
84 | TRACE_EVENT(jbd2_end_commit, | 91 | TRACE_EVENT(jbd2_end_commit, |
85 | TP_PROTO(journal_t *journal, transaction_t *commit_transaction), | 92 | TP_PROTO(journal_t *journal, transaction_t *commit_transaction), |
86 | 93 | ||
@@ -200,7 +207,7 @@ TRACE_EVENT(jbd2_checkpoint_stats, | |||
200 | __entry->forced_to_close, __entry->written, __entry->dropped) | 207 | __entry->forced_to_close, __entry->written, __entry->dropped) |
201 | ); | 208 | ); |
202 | 209 | ||
203 | TRACE_EVENT(jbd2_cleanup_journal_tail, | 210 | TRACE_EVENT(jbd2_update_log_tail, |
204 | 211 | ||
205 | TP_PROTO(journal_t *journal, tid_t first_tid, | 212 | TP_PROTO(journal_t *journal, tid_t first_tid, |
206 | unsigned long block_nr, unsigned long freed), | 213 | unsigned long block_nr, unsigned long freed), |
@@ -229,6 +236,26 @@ TRACE_EVENT(jbd2_cleanup_journal_tail, | |||
229 | __entry->block_nr, __entry->freed) | 236 | __entry->block_nr, __entry->freed) |
230 | ); | 237 | ); |
231 | 238 | ||
239 | TRACE_EVENT(jbd2_write_superblock, | ||
240 | |||
241 | TP_PROTO(journal_t *journal, int write_op), | ||
242 | |||
243 | TP_ARGS(journal, write_op), | ||
244 | |||
245 | TP_STRUCT__entry( | ||
246 | __field( dev_t, dev ) | ||
247 | __field( int, write_op ) | ||
248 | ), | ||
249 | |||
250 | TP_fast_assign( | ||
251 | __entry->dev = journal->j_fs_dev->bd_dev; | ||
252 | __entry->write_op = write_op; | ||
253 | ), | ||
254 | |||
255 | TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), | ||
256 | MINOR(__entry->dev), __entry->write_op) | ||
257 | ); | ||
258 | |||
232 | #endif /* _TRACE_JBD2_H */ | 259 | #endif /* _TRACE_JBD2_H */ |
233 | 260 | ||
234 | /* This part must be outside protection */ | 261 | /* This part must be outside protection */ |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3fc261705b1e..26adea8ca2e7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -95,6 +95,8 @@ unsigned long vm_dirty_bytes; | |||
95 | */ | 95 | */ |
96 | unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ | 96 | unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ |
97 | 97 | ||
98 | EXPORT_SYMBOL_GPL(dirty_writeback_interval); | ||
99 | |||
98 | /* | 100 | /* |
99 | * The longest time for which data is allowed to remain dirty | 101 | * The longest time for which data is allowed to remain dirty |
100 | */ | 102 | */ |