aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-28 00:54:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-28 00:54:31 -0400
commit81280572ca6f54009edfa4deee563e8678784218 (patch)
tree4fac10bcb71261823d261e5f8551fdb16ab653ba
parentb83db1deb29eb4eea9bf5992431d26978e039ce6 (diff)
parenta107e5a3a473a2ea62bd5af24e11b84adf1486ff (diff)
Merge branch 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (50 commits) ext4,jbd2: convert tracepoints to use major/minor numbers ext4: optimize orphan_list handling for ext4_setattr ext4: fix unbalanced mutex unlock in error path of ext4_li_request_new ext4: fix compile error in ext4_fallocate() ext4: move ext4_mb_{get,put}_buddy_cache_lock and make them static ext4: rename mark_bitmap_end() to ext4_mark_bitmap_end() ext4: move flush_completed_IO to fs/ext4/fsync.c and make it static ext4: rename {ext,idx}_pblock and inline small extent functions ext4: make various ext4 functions be static ext4: rename {exit,init}_ext4_*() to ext4_{exit,init}_*() ext4: fix kernel oops if the journal superblock has a non-zero j_errno ext4: update writeback_index based on last page scanned ext4: implement writeback livelock avoidance using page tagging ext4: tidy up a void argument in inode.c ext4: add batched_discard into ext4 feature list ext4: Add batched discard support for ext4 fs: Add FITRIM ioctl ext4: Use return value from sb_issue_discard() ext4: Check return value of sb_getblk() and friends ext4: use bio layer instead of buffer layer in mpage_da_submit_io ...
-rw-r--r--Documentation/filesystems/ext4.txt14
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/balloc.c5
-rw-r--r--fs/ext4/block_validity.c7
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h110
-rw-r--r--fs/ext4/ext4_extents.h65
-rw-r--r--fs/ext4/extents.c368
-rw-r--r--fs/ext4/file.c44
-rw-r--r--fs/ext4/fsync.c83
-rw-r--r--fs/ext4/ialloc.c135
-rw-r--r--fs/ext4/inode.c587
-rw-r--r--fs/ext4/mballoc.c554
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c22
-rw-r--r--fs/ext4/namei.c63
-rw-r--r--fs/ext4/page-io.c430
-rw-r--r--fs/ext4/resize.c52
-rw-r--r--fs/ext4/super.c531
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/ext4/xattr.h8
-rw-r--r--fs/ioctl.c39
-rw-r--r--fs/jbd2/checkpoint.c10
-rw-r--r--fs/jbd2/commit.c12
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/jbd2/transaction.c1
-rw-r--r--include/linux/blkdev.h8
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/jbd2.h2
-rw-r--r--include/linux/percpu_counter.h10
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/trace/events/ext4.h379
-rw-r--r--include/trace/events/jbd2.h78
33 files changed, 2510 insertions, 1131 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index e1def1786e50..6ab9442d7eeb 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -353,6 +353,20 @@ noauto_da_alloc replacing existing files via patterns such as
353 system crashes before the delayed allocation 353 system crashes before the delayed allocation
354 blocks are forced to disk. 354 blocks are forced to disk.
355 355
356noinit_itable Do not initialize any uninitialized inode table
357 blocks in the background. This feature may be
358 used by installation CD's so that the install
359 process can complete as quickly as possible; the
360 inode table initialization process would then be
361 deferred until the next time the file system
362 is unmounted.
363
364init_itable=n The lazy itable init code will wait n times the
365 number of milliseconds it took to zero out the
366 previous block group's inode table. This
367 minimizes the impact on the systme performance
368 while file system's inode table is being initialized.
369
356discard Controls whether ext4 should issue discard/TRIM 370discard Controls whether ext4 should issue discard/TRIM
357nodiscard(*) commands to the underlying block device when 371nodiscard(*) commands to the underlying block device when
358 blocks are freed. This is useful for SSD devices 372 blocks are freed. This is useful for SSD devices
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8867b2a1e5fe..c947e36eda6c 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_EXT4_FS) += ext4.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
10 10
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd30799a43ed..14c3af26c671 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
171 * less than the blocksize * 8 ( which is the size 171 * less than the blocksize * 8 ( which is the size
172 * of bitmap ), set rest of the block bitmap to 1 172 * of bitmap ), set rest of the block bitmap to 1
173 */ 173 */
174 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 174 ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
175 bh->b_data);
175 } 176 }
176 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); 177 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
177} 178}
@@ -489,7 +490,7 @@ error_return:
489 * Check if filesystem has nblocks free & available for allocation. 490 * Check if filesystem has nblocks free & available for allocation.
490 * On success return 1, return 0 on failure. 491 * On success return 1, return 0 on failure.
491 */ 492 */
492int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) 493static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
493{ 494{
494 s64 free_blocks, dirty_blocks, root_blocks; 495 s64 free_blocks, dirty_blocks, root_blocks;
495 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 496 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 3db5084db9bd..fac90f3fba80 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -29,16 +29,15 @@ struct ext4_system_zone {
29 29
30static struct kmem_cache *ext4_system_zone_cachep; 30static struct kmem_cache *ext4_system_zone_cachep;
31 31
32int __init init_ext4_system_zone(void) 32int __init ext4_init_system_zone(void)
33{ 33{
34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
35 SLAB_RECLAIM_ACCOUNT);
36 if (ext4_system_zone_cachep == NULL) 35 if (ext4_system_zone_cachep == NULL)
37 return -ENOMEM; 36 return -ENOMEM;
38 return 0; 37 return 0;
39} 38}
40 39
41void exit_ext4_system_zone(void) 40void ext4_exit_system_zone(void)
42{ 41{
43 kmem_cache_destroy(ext4_system_zone_cachep); 42 kmem_cache_destroy(ext4_system_zone_cachep);
44} 43}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 374510f72baa..ece76fb6a40c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
39 struct file *filp); 39 struct file *filp);
40 40
41const struct file_operations ext4_dir_operations = { 41const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = ext4_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext4_ioctl, 45 .unlocked_ioctl = ext4_ioctl,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 889ec9d5e6ad..8b5dd6369f82 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -168,7 +168,20 @@ struct mpage_da_data {
168 int pages_written; 168 int pages_written;
169 int retval; 169 int retval;
170}; 170};
171#define EXT4_IO_UNWRITTEN 0x1 171
172/*
173 * Flags for ext4_io_end->flags
174 */
175#define EXT4_IO_END_UNWRITTEN 0x0001
176#define EXT4_IO_END_ERROR 0x0002
177
178struct ext4_io_page {
179 struct page *p_page;
180 int p_count;
181};
182
183#define MAX_IO_PAGES 128
184
172typedef struct ext4_io_end { 185typedef struct ext4_io_end {
173 struct list_head list; /* per-file finished IO list */ 186 struct list_head list; /* per-file finished IO list */
174 struct inode *inode; /* file being written to */ 187 struct inode *inode; /* file being written to */
@@ -179,8 +192,18 @@ typedef struct ext4_io_end {
179 struct work_struct work; /* data work queue */ 192 struct work_struct work; /* data work queue */
180 struct kiocb *iocb; /* iocb struct for AIO */ 193 struct kiocb *iocb; /* iocb struct for AIO */
181 int result; /* error value for AIO */ 194 int result; /* error value for AIO */
195 int num_io_pages;
196 struct ext4_io_page *pages[MAX_IO_PAGES];
182} ext4_io_end_t; 197} ext4_io_end_t;
183 198
199struct ext4_io_submit {
200 int io_op;
201 struct bio *io_bio;
202 ext4_io_end_t *io_end;
203 struct ext4_io_page *io_page;
204 sector_t io_next_block;
205};
206
184/* 207/*
185 * Special inodes numbers 208 * Special inodes numbers
186 */ 209 */
@@ -205,6 +228,7 @@ typedef struct ext4_io_end {
205#define EXT4_MIN_BLOCK_SIZE 1024 228#define EXT4_MIN_BLOCK_SIZE 1024
206#define EXT4_MAX_BLOCK_SIZE 65536 229#define EXT4_MAX_BLOCK_SIZE 65536
207#define EXT4_MIN_BLOCK_LOG_SIZE 10 230#define EXT4_MIN_BLOCK_LOG_SIZE 10
231#define EXT4_MAX_BLOCK_LOG_SIZE 16
208#ifdef __KERNEL__ 232#ifdef __KERNEL__
209# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) 233# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
210#else 234#else
@@ -889,6 +913,7 @@ struct ext4_inode_info {
889#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 913#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
890#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 914#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
891#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 915#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
916#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
892 917
893#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 918#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
894#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 919#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1087,7 +1112,6 @@ struct ext4_sb_info {
1087 struct completion s_kobj_unregister; 1112 struct completion s_kobj_unregister;
1088 1113
1089 /* Journaling */ 1114 /* Journaling */
1090 struct inode *s_journal_inode;
1091 struct journal_s *s_journal; 1115 struct journal_s *s_journal;
1092 struct list_head s_orphan; 1116 struct list_head s_orphan;
1093 struct mutex s_orphan_lock; 1117 struct mutex s_orphan_lock;
@@ -1120,10 +1144,7 @@ struct ext4_sb_info {
1120 /* for buddy allocator */ 1144 /* for buddy allocator */
1121 struct ext4_group_info ***s_group_info; 1145 struct ext4_group_info ***s_group_info;
1122 struct inode *s_buddy_cache; 1146 struct inode *s_buddy_cache;
1123 long s_blocks_reserved;
1124 spinlock_t s_reserve_lock;
1125 spinlock_t s_md_lock; 1147 spinlock_t s_md_lock;
1126 tid_t s_last_transaction;
1127 unsigned short *s_mb_offsets; 1148 unsigned short *s_mb_offsets;
1128 unsigned int *s_mb_maxs; 1149 unsigned int *s_mb_maxs;
1129 1150
@@ -1141,7 +1162,6 @@ struct ext4_sb_info {
1141 unsigned long s_mb_last_start; 1162 unsigned long s_mb_last_start;
1142 1163
1143 /* stats for buddy allocator */ 1164 /* stats for buddy allocator */
1144 spinlock_t s_mb_pa_lock;
1145 atomic_t s_bal_reqs; /* number of reqs with len > 1 */ 1165 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
1146 atomic_t s_bal_success; /* we found long enough chunks */ 1166 atomic_t s_bal_success; /* we found long enough chunks */
1147 atomic_t s_bal_allocated; /* in blocks */ 1167 atomic_t s_bal_allocated; /* in blocks */
@@ -1172,6 +1192,11 @@ struct ext4_sb_info {
1172 1192
1173 /* timer for periodic error stats printing */ 1193 /* timer for periodic error stats printing */
1174 struct timer_list s_err_report; 1194 struct timer_list s_err_report;
1195
1196 /* Lazy inode table initialization info */
1197 struct ext4_li_request *s_li_request;
1198 /* Wait multiplier for lazy initialization thread */
1199 unsigned int s_li_wait_mult;
1175}; 1200};
1176 1201
1177static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1202static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1533,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
1533void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 1558void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
1534 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); 1559 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
1535 1560
1536extern struct proc_dir_entry *ext4_proc_root; 1561/*
1562 * Timeout and state flag for lazy initialization inode thread.
1563 */
1564#define EXT4_DEF_LI_WAIT_MULT 10
1565#define EXT4_DEF_LI_MAX_START_DELAY 5
1566#define EXT4_LAZYINIT_QUIT 0x0001
1567#define EXT4_LAZYINIT_RUNNING 0x0002
1568
1569/*
1570 * Lazy inode table initialization info
1571 */
1572struct ext4_lazy_init {
1573 unsigned long li_state;
1574
1575 wait_queue_head_t li_wait_daemon;
1576 wait_queue_head_t li_wait_task;
1577 struct timer_list li_timer;
1578 struct task_struct *li_task;
1579
1580 struct list_head li_request_list;
1581 struct mutex li_list_mtx;
1582};
1583
1584struct ext4_li_request {
1585 struct super_block *lr_super;
1586 struct ext4_sb_info *lr_sbi;
1587 ext4_group_t lr_next_group;
1588 struct list_head lr_request;
1589 unsigned long lr_next_sched;
1590 unsigned long lr_timeout;
1591};
1592
1593struct ext4_features {
1594 struct kobject f_kobj;
1595 struct completion f_kobj_unregister;
1596};
1537 1597
1538/* 1598/*
1539 * Function prototypes 1599 * Function prototypes
@@ -1561,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
1561extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 1621extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1562 ext4_fsblk_t goal, unsigned long *count, int *errp); 1622 ext4_fsblk_t goal, unsigned long *count, int *errp);
1563extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1623extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1564extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1565extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1624extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1566 ext4_fsblk_t block, unsigned long count); 1625 ext4_fsblk_t block, unsigned long count);
1567extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1626extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1605,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1605extern unsigned long ext4_count_free_inodes(struct super_block *); 1664extern unsigned long ext4_count_free_inodes(struct super_block *);
1606extern unsigned long ext4_count_dirs(struct super_block *); 1665extern unsigned long ext4_count_dirs(struct super_block *);
1607extern void ext4_check_inodes_bitmap(struct super_block *); 1666extern void ext4_check_inodes_bitmap(struct super_block *);
1608extern unsigned ext4_init_inode_bitmap(struct super_block *sb, 1667extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1609 struct buffer_head *bh, 1668extern int ext4_init_inode_table(struct super_block *sb,
1610 ext4_group_t group, 1669 ext4_group_t group, int barrier);
1611 struct ext4_group_desc *desc);
1612extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1613 1670
1614/* mballoc.c */ 1671/* mballoc.c */
1615extern long ext4_mb_stats; 1672extern long ext4_mb_stats;
@@ -1620,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1620 struct ext4_allocation_request *, int *); 1677 struct ext4_allocation_request *, int *);
1621extern int ext4_mb_reserve_blocks(struct super_block *, int); 1678extern int ext4_mb_reserve_blocks(struct super_block *, int);
1622extern void ext4_discard_preallocations(struct inode *); 1679extern void ext4_discard_preallocations(struct inode *);
1623extern int __init init_ext4_mballoc(void); 1680extern int __init ext4_init_mballoc(void);
1624extern void exit_ext4_mballoc(void); 1681extern void ext4_exit_mballoc(void);
1625extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1682extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1626 struct buffer_head *bh, ext4_fsblk_t block, 1683 struct buffer_head *bh, ext4_fsblk_t block,
1627 unsigned long count, int flags); 1684 unsigned long count, int flags);
1628extern int ext4_mb_add_groupinfo(struct super_block *sb, 1685extern int ext4_mb_add_groupinfo(struct super_block *sb,
1629 ext4_group_t i, struct ext4_group_desc *desc); 1686 ext4_group_t i, struct ext4_group_desc *desc);
1630extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); 1687extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
1631extern void ext4_mb_put_buddy_cache_lock(struct super_block *, 1688
1632 ext4_group_t, int);
1633/* inode.c */ 1689/* inode.c */
1634struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1690struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1635 ext4_lblk_t, int, int *); 1691 ext4_lblk_t, int, int *);
@@ -1657,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
1657extern int ext4_alloc_da_blocks(struct inode *inode); 1713extern int ext4_alloc_da_blocks(struct inode *inode);
1658extern void ext4_set_aops(struct inode *inode); 1714extern void ext4_set_aops(struct inode *inode);
1659extern int ext4_writepage_trans_blocks(struct inode *); 1715extern int ext4_writepage_trans_blocks(struct inode *);
1660extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1661extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1716extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1662extern int ext4_block_truncate_page(handle_t *handle, 1717extern int ext4_block_truncate_page(handle_t *handle,
1663 struct address_space *mapping, loff_t from); 1718 struct address_space *mapping, loff_t from);
1664extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1719extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1665extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1720extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1666extern int flush_completed_IO(struct inode *inode);
1667extern void ext4_da_update_reserve_space(struct inode *inode, 1721extern void ext4_da_update_reserve_space(struct inode *inode,
1668 int used, int quota_claim); 1722 int used, int quota_claim);
1669/* ioctl.c */ 1723/* ioctl.c */
@@ -1960,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations;
1960/* file.c */ 2014/* file.c */
1961extern const struct inode_operations ext4_file_inode_operations; 2015extern const struct inode_operations ext4_file_inode_operations;
1962extern const struct file_operations ext4_file_operations; 2016extern const struct file_operations ext4_file_operations;
2017extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
1963 2018
1964/* namei.c */ 2019/* namei.c */
1965extern const struct inode_operations ext4_dir_inode_operations; 2020extern const struct inode_operations ext4_dir_inode_operations;
@@ -1973,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
1973/* block_validity */ 2028/* block_validity */
1974extern void ext4_release_system_zone(struct super_block *sb); 2029extern void ext4_release_system_zone(struct super_block *sb);
1975extern int ext4_setup_system_zone(struct super_block *sb); 2030extern int ext4_setup_system_zone(struct super_block *sb);
1976extern int __init init_ext4_system_zone(void); 2031extern int __init ext4_init_system_zone(void);
1977extern void exit_ext4_system_zone(void); 2032extern void ext4_exit_system_zone(void);
1978extern int ext4_data_block_valid(struct ext4_sb_info *sbi, 2033extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
1979 ext4_fsblk_t start_blk, 2034 ext4_fsblk_t start_blk,
1980 unsigned int count); 2035 unsigned int count);
@@ -2002,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2002 __u64 start_orig, __u64 start_donor, 2057 __u64 start_orig, __u64 start_donor,
2003 __u64 len, __u64 *moved_len); 2058 __u64 len, __u64 *moved_len);
2004 2059
2060/* page-io.c */
2061extern int __init ext4_init_pageio(void);
2062extern void ext4_exit_pageio(void);
2063extern void ext4_free_io_end(ext4_io_end_t *io);
2064extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2065extern int ext4_end_io_nolock(ext4_io_end_t *io);
2066extern void ext4_io_submit(struct ext4_io_submit *io);
2067extern int ext4_bio_write_page(struct ext4_io_submit *io,
2068 struct page *page,
2069 int len,
2070 struct writeback_control *wbc);
2005 2071
2006/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ 2072/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
2007enum ext4_state_bits { 2073enum ext4_state_bits {
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index bdb6ce7e2eb4..28ce70fd9cd0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228/*
229 * ext4_ext_pblock:
230 * combine low and high parts of physical block number into ext4_fsblk_t
231 */
232static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
233{
234 ext4_fsblk_t block;
235
236 block = le32_to_cpu(ex->ee_start_lo);
237 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
238 return block;
239}
240
241/*
242 * ext4_idx_pblock:
243 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
244 */
245static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
246{
247 ext4_fsblk_t block;
248
249 block = le32_to_cpu(ix->ei_leaf_lo);
250 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
251 return block;
252}
253
254/*
255 * ext4_ext_store_pblock:
256 * stores a large physical block number into an extent struct,
257 * breaking it into parts
258 */
259static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
260 ext4_fsblk_t pb)
261{
262 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
263 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
264 0xffff);
265}
266
267/*
268 * ext4_idx_store_pblock:
269 * stores a large physical block number into an index struct,
270 * breaking it into parts
271 */
272static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
273 ext4_fsblk_t pb)
274{
275 ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
276 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
277 0xffff);
278}
279
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, 280extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks); 281 sector_t lblocks);
230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
233extern int ext4_extent_tree_init(handle_t *, struct inode *); 282extern int ext4_extent_tree_init(handle_t *, struct inode *);
234extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 283extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
235 int num, 284 int num,
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
237extern int ext4_can_extents_be_merged(struct inode *inode, 286extern int ext4_can_extents_be_merged(struct inode *inode,
238 struct ext4_extent *ex1, 287 struct ext4_extent *ex1,
239 struct ext4_extent *ex2); 288 struct ext4_extent *ex2);
240extern int ext4_ext_try_to_merge(struct inode *inode,
241 struct ext4_ext_path *path,
242 struct ext4_extent *);
243extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
244extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); 289extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
245extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
246 ext_prepare_callback, void *);
247extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 290extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
248 struct ext4_ext_path *); 291 struct ext4_ext_path *);
249extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
250 ext4_lblk_t *, ext4_fsblk_t *);
251extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
252 ext4_lblk_t *, ext4_fsblk_t *);
253extern void ext4_ext_drop_refs(struct ext4_ext_path *); 292extern void ext4_ext_drop_refs(struct ext4_ext_path *);
254extern int ext4_ext_check_inode(struct inode *inode); 293extern int ext4_ext_check_inode(struct inode *inode);
255#endif /* _EXT4_EXTENTS */ 294#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 06328d3e5717..0554c48cb1fd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,55 +44,6 @@
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h" 45#include "ext4_extents.h"
46 46
47
48/*
49 * ext_pblock:
50 * combine low and high parts of physical block number into ext4_fsblk_t
51 */
52ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53{
54 ext4_fsblk_t block;
55
56 block = le32_to_cpu(ex->ee_start_lo);
57 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
58 return block;
59}
60
61/*
62 * idx_pblock:
63 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
64 */
65ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
66{
67 ext4_fsblk_t block;
68
69 block = le32_to_cpu(ix->ei_leaf_lo);
70 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
71 return block;
72}
73
74/*
75 * ext4_ext_store_pblock:
76 * stores a large physical block number into an extent struct,
77 * breaking it into parts
78 */
79void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
80{
81 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
82 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
83}
84
85/*
86 * ext4_idx_store_pblock:
87 * stores a large physical block number into an index struct,
88 * breaking it into parts
89 */
90static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
91{
92 ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
93 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
94}
95
96static int ext4_ext_truncate_extend_restart(handle_t *handle, 47static int ext4_ext_truncate_extend_restart(handle_t *handle,
97 struct inode *inode, 48 struct inode *inode,
98 int needed) 49 int needed)
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
169 /* try to predict block placement */ 120 /* try to predict block placement */
170 ex = path[depth].p_ext; 121 ex = path[depth].p_ext;
171 if (ex) 122 if (ex)
172 return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); 123 return (ext4_ext_pblock(ex) +
124 (block - le32_to_cpu(ex->ee_block)));
173 125
174 /* it looks like index is empty; 126 /* it looks like index is empty;
175 * try to find starting block from index itself */ 127 * try to find starting block from index itself */
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
354 306
355static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 307static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
356{ 308{
357 ext4_fsblk_t block = ext_pblock(ext); 309 ext4_fsblk_t block = ext4_ext_pblock(ext);
358 int len = ext4_ext_get_actual_len(ext); 310 int len = ext4_ext_get_actual_len(ext);
359 311
360 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 312 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
363static int ext4_valid_extent_idx(struct inode *inode, 315static int ext4_valid_extent_idx(struct inode *inode,
364 struct ext4_extent_idx *ext_idx) 316 struct ext4_extent_idx *ext_idx)
365{ 317{
366 ext4_fsblk_t block = idx_pblock(ext_idx); 318 ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
367 319
368 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); 320 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
369} 321}
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
463 for (k = 0; k <= l; k++, path++) { 415 for (k = 0; k <= l; k++, path++) {
464 if (path->p_idx) { 416 if (path->p_idx) {
465 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 417 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
466 idx_pblock(path->p_idx)); 418 ext4_idx_pblock(path->p_idx));
467 } else if (path->p_ext) { 419 } else if (path->p_ext) {
468 ext_debug(" %d:[%d]%d:%llu ", 420 ext_debug(" %d:[%d]%d:%llu ",
469 le32_to_cpu(path->p_ext->ee_block), 421 le32_to_cpu(path->p_ext->ee_block),
470 ext4_ext_is_uninitialized(path->p_ext), 422 ext4_ext_is_uninitialized(path->p_ext),
471 ext4_ext_get_actual_len(path->p_ext), 423 ext4_ext_get_actual_len(path->p_ext),
472 ext_pblock(path->p_ext)); 424 ext4_ext_pblock(path->p_ext));
473 } else 425 } else
474 ext_debug(" []"); 426 ext_debug(" []");
475 } 427 }
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
494 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 446 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
495 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 447 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
496 ext4_ext_is_uninitialized(ex), 448 ext4_ext_is_uninitialized(ex),
497 ext4_ext_get_actual_len(ex), ext_pblock(ex)); 449 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
498 } 450 }
499 ext_debug("\n"); 451 ext_debug("\n");
500} 452}
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
545 497
546 path->p_idx = l - 1; 498 path->p_idx = l - 1;
547 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 499 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
548 idx_pblock(path->p_idx)); 500 ext4_idx_pblock(path->p_idx));
549 501
550#ifdef CHECK_BINSEARCH 502#ifdef CHECK_BINSEARCH
551 { 503 {
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode,
614 path->p_ext = l - 1; 566 path->p_ext = l - 1;
615 ext_debug(" -> %d:%llu:[%d]%d ", 567 ext_debug(" -> %d:%llu:[%d]%d ",
616 le32_to_cpu(path->p_ext->ee_block), 568 le32_to_cpu(path->p_ext->ee_block),
617 ext_pblock(path->p_ext), 569 ext4_ext_pblock(path->p_ext),
618 ext4_ext_is_uninitialized(path->p_ext), 570 ext4_ext_is_uninitialized(path->p_ext),
619 ext4_ext_get_actual_len(path->p_ext)); 571 ext4_ext_get_actual_len(path->p_ext));
620 572
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
682 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 634 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
683 635
684 ext4_ext_binsearch_idx(inode, path + ppos, block); 636 ext4_ext_binsearch_idx(inode, path + ppos, block);
685 path[ppos].p_block = idx_pblock(path[ppos].p_idx); 637 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
686 path[ppos].p_depth = i; 638 path[ppos].p_depth = i;
687 path[ppos].p_ext = NULL; 639 path[ppos].p_ext = NULL;
688 640
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
721 ext4_ext_binsearch(inode, path + ppos, block); 673 ext4_ext_binsearch(inode, path + ppos, block);
722 /* if not an empty leaf */ 674 /* if not an empty leaf */
723 if (path[ppos].p_ext) 675 if (path[ppos].p_ext)
724 path[ppos].p_block = ext_pblock(path[ppos].p_ext); 676 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
725 677
726 ext4_ext_show_path(inode, path); 678 ext4_ext_show_path(inode, path);
727 679
@@ -739,9 +691,9 @@ err:
739 * insert new index [@logical;@ptr] into the block at @curp; 691 * insert new index [@logical;@ptr] into the block at @curp;
740 * check where to insert: before @curp or after @curp 692 * check where to insert: before @curp or after @curp
741 */ 693 */
742int ext4_ext_insert_index(handle_t *handle, struct inode *inode, 694static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
743 struct ext4_ext_path *curp, 695 struct ext4_ext_path *curp,
744 int logical, ext4_fsblk_t ptr) 696 int logical, ext4_fsblk_t ptr)
745{ 697{
746 struct ext4_extent_idx *ix; 698 struct ext4_extent_idx *ix;
747 int len, err; 699 int len, err;
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
917 EXT_MAX_EXTENT(path[depth].p_hdr)) { 869 EXT_MAX_EXTENT(path[depth].p_hdr)) {
918 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 870 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
919 le32_to_cpu(path[depth].p_ext->ee_block), 871 le32_to_cpu(path[depth].p_ext->ee_block),
920 ext_pblock(path[depth].p_ext), 872 ext4_ext_pblock(path[depth].p_ext),
921 ext4_ext_is_uninitialized(path[depth].p_ext), 873 ext4_ext_is_uninitialized(path[depth].p_ext),
922 ext4_ext_get_actual_len(path[depth].p_ext), 874 ext4_ext_get_actual_len(path[depth].p_ext),
923 newblock); 875 newblock);
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
1007 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 959 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
1008 ext_debug("%d: move %d:%llu in new index %llu\n", i, 960 ext_debug("%d: move %d:%llu in new index %llu\n", i,
1009 le32_to_cpu(path[i].p_idx->ei_block), 961 le32_to_cpu(path[i].p_idx->ei_block),
1010 idx_pblock(path[i].p_idx), 962 ext4_idx_pblock(path[i].p_idx),
1011 newblock); 963 newblock);
1012 /*memmove(++fidx, path[i].p_idx++, 964 /*memmove(++fidx, path[i].p_idx++,
1013 sizeof(struct ext4_extent_idx)); 965 sizeof(struct ext4_extent_idx));
@@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1146 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1098 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1147 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1099 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1148 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1100 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1149 idx_pblock(EXT_FIRST_INDEX(neh))); 1101 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1150 1102
1151 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1103 neh->eh_depth = cpu_to_le16(path->p_depth + 1);
1152 err = ext4_ext_dirty(handle, inode, curp); 1104 err = ext4_ext_dirty(handle, inode, curp);
@@ -1232,9 +1184,9 @@ out:
1232 * returns 0 at @phys 1184 * returns 0 at @phys
1233 * return value contains 0 (success) or error code 1185 * return value contains 0 (success) or error code
1234 */ 1186 */
1235int 1187static int ext4_ext_search_left(struct inode *inode,
1236ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, 1188 struct ext4_ext_path *path,
1237 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1189 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1238{ 1190{
1239 struct ext4_extent_idx *ix; 1191 struct ext4_extent_idx *ix;
1240 struct ext4_extent *ex; 1192 struct ext4_extent *ex;
@@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1286 } 1238 }
1287 1239
1288 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1240 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1289 *phys = ext_pblock(ex) + ee_len - 1; 1241 *phys = ext4_ext_pblock(ex) + ee_len - 1;
1290 return 0; 1242 return 0;
1291} 1243}
1292 1244
@@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1297 * returns 0 at @phys 1249 * returns 0 at @phys
1298 * return value contains 0 (success) or error code 1250 * return value contains 0 (success) or error code
1299 */ 1251 */
1300int 1252static int ext4_ext_search_right(struct inode *inode,
1301ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, 1253 struct ext4_ext_path *path,
1302 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1254 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1303{ 1255{
1304 struct buffer_head *bh = NULL; 1256 struct buffer_head *bh = NULL;
1305 struct ext4_extent_header *eh; 1257 struct ext4_extent_header *eh;
@@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1342 } 1294 }
1343 } 1295 }
1344 *logical = le32_to_cpu(ex->ee_block); 1296 *logical = le32_to_cpu(ex->ee_block);
1345 *phys = ext_pblock(ex); 1297 *phys = ext4_ext_pblock(ex);
1346 return 0; 1298 return 0;
1347 } 1299 }
1348 1300
@@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1357 /* next allocated block in this leaf */ 1309 /* next allocated block in this leaf */
1358 ex++; 1310 ex++;
1359 *logical = le32_to_cpu(ex->ee_block); 1311 *logical = le32_to_cpu(ex->ee_block);
1360 *phys = ext_pblock(ex); 1312 *phys = ext4_ext_pblock(ex);
1361 return 0; 1313 return 0;
1362 } 1314 }
1363 1315
@@ -1376,7 +1328,7 @@ got_index:
1376 * follow it and find the closest allocated 1328 * follow it and find the closest allocated
1377 * block to the right */ 1329 * block to the right */
1378 ix++; 1330 ix++;
1379 block = idx_pblock(ix); 1331 block = ext4_idx_pblock(ix);
1380 while (++depth < path->p_depth) { 1332 while (++depth < path->p_depth) {
1381 bh = sb_bread(inode->i_sb, block); 1333 bh = sb_bread(inode->i_sb, block);
1382 if (bh == NULL) 1334 if (bh == NULL)
@@ -1388,7 +1340,7 @@ got_index:
1388 return -EIO; 1340 return -EIO;
1389 } 1341 }
1390 ix = EXT_FIRST_INDEX(eh); 1342 ix = EXT_FIRST_INDEX(eh);
1391 block = idx_pblock(ix); 1343 block = ext4_idx_pblock(ix);
1392 put_bh(bh); 1344 put_bh(bh);
1393 } 1345 }
1394 1346
@@ -1402,7 +1354,7 @@ got_index:
1402 } 1354 }
1403 ex = EXT_FIRST_EXTENT(eh); 1355 ex = EXT_FIRST_EXTENT(eh);
1404 *logical = le32_to_cpu(ex->ee_block); 1356 *logical = le32_to_cpu(ex->ee_block);
1405 *phys = ext_pblock(ex); 1357 *phys = ext4_ext_pblock(ex);
1406 put_bh(bh); 1358 put_bh(bh);
1407 return 0; 1359 return 0;
1408} 1360}
@@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1573 return 0; 1525 return 0;
1574#endif 1526#endif
1575 1527
1576 if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) 1528 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1577 return 1; 1529 return 1;
1578 return 0; 1530 return 0;
1579} 1531}
@@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1585 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1537 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1586 * 1 if they got merged. 1538 * 1 if they got merged.
1587 */ 1539 */
1588int ext4_ext_try_to_merge(struct inode *inode, 1540static int ext4_ext_try_to_merge(struct inode *inode,
1589 struct ext4_ext_path *path, 1541 struct ext4_ext_path *path,
1590 struct ext4_extent *ex) 1542 struct ext4_extent *ex)
1591{ 1543{
1592 struct ext4_extent_header *eh; 1544 struct ext4_extent_header *eh;
1593 unsigned int depth, len; 1545 unsigned int depth, len;
@@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
1632 * such that there will be no overlap, and then returns 1. 1584 * such that there will be no overlap, and then returns 1.
1633 * If there is no overlap found, it returns 0. 1585 * If there is no overlap found, it returns 0.
1634 */ 1586 */
1635unsigned int ext4_ext_check_overlap(struct inode *inode, 1587static unsigned int ext4_ext_check_overlap(struct inode *inode,
1636 struct ext4_extent *newext, 1588 struct ext4_extent *newext,
1637 struct ext4_ext_path *path) 1589 struct ext4_ext_path *path)
1638{ 1590{
1639 ext4_lblk_t b1, b2; 1591 ext4_lblk_t b1, b2;
1640 unsigned int depth, len1; 1592 unsigned int depth, len1;
@@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1706 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1658 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1707 && ext4_can_extents_be_merged(inode, ex, newext)) { 1659 && ext4_can_extents_be_merged(inode, ex, newext)) {
1708 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1660 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1709 ext4_ext_is_uninitialized(newext), 1661 ext4_ext_is_uninitialized(newext),
1710 ext4_ext_get_actual_len(newext), 1662 ext4_ext_get_actual_len(newext),
1711 le32_to_cpu(ex->ee_block), 1663 le32_to_cpu(ex->ee_block),
1712 ext4_ext_is_uninitialized(ex), 1664 ext4_ext_is_uninitialized(ex),
1713 ext4_ext_get_actual_len(ex), ext_pblock(ex)); 1665 ext4_ext_get_actual_len(ex),
1666 ext4_ext_pblock(ex));
1714 err = ext4_ext_get_access(handle, inode, path + depth); 1667 err = ext4_ext_get_access(handle, inode, path + depth);
1715 if (err) 1668 if (err)
1716 return err; 1669 return err;
@@ -1780,7 +1733,7 @@ has_space:
1780 /* there is no extent in this leaf, create first one */ 1733 /* there is no extent in this leaf, create first one */
1781 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1734 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
1782 le32_to_cpu(newext->ee_block), 1735 le32_to_cpu(newext->ee_block),
1783 ext_pblock(newext), 1736 ext4_ext_pblock(newext),
1784 ext4_ext_is_uninitialized(newext), 1737 ext4_ext_is_uninitialized(newext),
1785 ext4_ext_get_actual_len(newext)); 1738 ext4_ext_get_actual_len(newext));
1786 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1739 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
@@ -1794,7 +1747,7 @@ has_space:
1794 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " 1747 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1795 "move %d from 0x%p to 0x%p\n", 1748 "move %d from 0x%p to 0x%p\n",
1796 le32_to_cpu(newext->ee_block), 1749 le32_to_cpu(newext->ee_block),
1797 ext_pblock(newext), 1750 ext4_ext_pblock(newext),
1798 ext4_ext_is_uninitialized(newext), 1751 ext4_ext_is_uninitialized(newext),
1799 ext4_ext_get_actual_len(newext), 1752 ext4_ext_get_actual_len(newext),
1800 nearex, len, nearex + 1, nearex + 2); 1753 nearex, len, nearex + 1, nearex + 2);
@@ -1808,7 +1761,7 @@ has_space:
1808 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " 1761 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1809 "move %d from 0x%p to 0x%p\n", 1762 "move %d from 0x%p to 0x%p\n",
1810 le32_to_cpu(newext->ee_block), 1763 le32_to_cpu(newext->ee_block),
1811 ext_pblock(newext), 1764 ext4_ext_pblock(newext),
1812 ext4_ext_is_uninitialized(newext), 1765 ext4_ext_is_uninitialized(newext),
1813 ext4_ext_get_actual_len(newext), 1766 ext4_ext_get_actual_len(newext),
1814 nearex, len, nearex + 1, nearex + 2); 1767 nearex, len, nearex + 1, nearex + 2);
@@ -1819,7 +1772,7 @@ has_space:
1819 le16_add_cpu(&eh->eh_entries, 1); 1772 le16_add_cpu(&eh->eh_entries, 1);
1820 nearex = path[depth].p_ext; 1773 nearex = path[depth].p_ext;
1821 nearex->ee_block = newext->ee_block; 1774 nearex->ee_block = newext->ee_block;
1822 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1775 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1823 nearex->ee_len = newext->ee_len; 1776 nearex->ee_len = newext->ee_len;
1824 1777
1825merge: 1778merge:
@@ -1845,9 +1798,9 @@ cleanup:
1845 return err; 1798 return err;
1846} 1799}
1847 1800
1848int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1801static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1849 ext4_lblk_t num, ext_prepare_callback func, 1802 ext4_lblk_t num, ext_prepare_callback func,
1850 void *cbdata) 1803 void *cbdata)
1851{ 1804{
1852 struct ext4_ext_path *path = NULL; 1805 struct ext4_ext_path *path = NULL;
1853 struct ext4_ext_cache cbex; 1806 struct ext4_ext_cache cbex;
@@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1923 } else { 1876 } else {
1924 cbex.ec_block = le32_to_cpu(ex->ee_block); 1877 cbex.ec_block = le32_to_cpu(ex->ee_block);
1925 cbex.ec_len = ext4_ext_get_actual_len(ex); 1878 cbex.ec_len = ext4_ext_get_actual_len(ex);
1926 cbex.ec_start = ext_pblock(ex); 1879 cbex.ec_start = ext4_ext_pblock(ex);
1927 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1880 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1928 } 1881 }
1929 1882
@@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2073 2026
2074 /* free index block */ 2027 /* free index block */
2075 path--; 2028 path--;
2076 leaf = idx_pblock(path->p_idx); 2029 leaf = ext4_idx_pblock(path->p_idx);
2077 if (unlikely(path->p_hdr->eh_entries == 0)) { 2030 if (unlikely(path->p_hdr->eh_entries == 0)) {
2078 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2031 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2079 return -EIO; 2032 return -EIO;
@@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2181 ext4_fsblk_t start; 2134 ext4_fsblk_t start;
2182 2135
2183 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2136 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2184 start = ext_pblock(ex) + ee_len - num; 2137 start = ext4_ext_pblock(ex) + ee_len - num;
2185 ext_debug("free last %u blocks starting %llu\n", num, start); 2138 ext_debug("free last %u blocks starting %llu\n", num, start);
2186 ext4_free_blocks(handle, inode, 0, start, num, flags); 2139 ext4_free_blocks(handle, inode, 0, start, num, flags);
2187 } else if (from == le32_to_cpu(ex->ee_block) 2140 } else if (from == le32_to_cpu(ex->ee_block)
@@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2310 goto out; 2263 goto out;
2311 2264
2312 ext_debug("new extent: %u:%u:%llu\n", block, num, 2265 ext_debug("new extent: %u:%u:%llu\n", block, num,
2313 ext_pblock(ex)); 2266 ext4_ext_pblock(ex));
2314 ex--; 2267 ex--;
2315 ex_ee_block = le32_to_cpu(ex->ee_block); 2268 ex_ee_block = le32_to_cpu(ex->ee_block);
2316 ex_ee_len = ext4_ext_get_actual_len(ex); 2269 ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2421,9 +2374,9 @@ again:
2421 struct buffer_head *bh; 2374 struct buffer_head *bh;
2422 /* go to the next level */ 2375 /* go to the next level */
2423 ext_debug("move to level %d (block %llu)\n", 2376 ext_debug("move to level %d (block %llu)\n",
2424 i + 1, idx_pblock(path[i].p_idx)); 2377 i + 1, ext4_idx_pblock(path[i].p_idx));
2425 memset(path + i + 1, 0, sizeof(*path)); 2378 memset(path + i + 1, 0, sizeof(*path));
2426 bh = sb_bread(sb, idx_pblock(path[i].p_idx)); 2379 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
2427 if (!bh) { 2380 if (!bh) {
2428 /* should we reset i_size? */ 2381 /* should we reset i_size? */
2429 err = -EIO; 2382 err = -EIO;
@@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb)
2535#endif 2488#endif
2536} 2489}
2537 2490
2538static void bi_complete(struct bio *bio, int error)
2539{
2540 complete((struct completion *)bio->bi_private);
2541}
2542
2543/* FIXME!! we need to try to merge to left or right after zero-out */ 2491/* FIXME!! we need to try to merge to left or right after zero-out */
2544static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 2492static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2545{ 2493{
2494 ext4_fsblk_t ee_pblock;
2495 unsigned int ee_len;
2546 int ret; 2496 int ret;
2547 struct bio *bio;
2548 int blkbits, blocksize;
2549 sector_t ee_pblock;
2550 struct completion event;
2551 unsigned int ee_len, len, done, offset;
2552 2497
2553
2554 blkbits = inode->i_blkbits;
2555 blocksize = inode->i_sb->s_blocksize;
2556 ee_len = ext4_ext_get_actual_len(ex); 2498 ee_len = ext4_ext_get_actual_len(ex);
2557 ee_pblock = ext_pblock(ex); 2499 ee_pblock = ext4_ext_pblock(ex);
2558
2559 /* convert ee_pblock to 512 byte sectors */
2560 ee_pblock = ee_pblock << (blkbits - 9);
2561
2562 while (ee_len > 0) {
2563
2564 if (ee_len > BIO_MAX_PAGES)
2565 len = BIO_MAX_PAGES;
2566 else
2567 len = ee_len;
2568
2569 bio = bio_alloc(GFP_NOIO, len);
2570 if (!bio)
2571 return -ENOMEM;
2572
2573 bio->bi_sector = ee_pblock;
2574 bio->bi_bdev = inode->i_sb->s_bdev;
2575
2576 done = 0;
2577 offset = 0;
2578 while (done < len) {
2579 ret = bio_add_page(bio, ZERO_PAGE(0),
2580 blocksize, offset);
2581 if (ret != blocksize) {
2582 /*
2583 * We can't add any more pages because of
2584 * hardware limitations. Start a new bio.
2585 */
2586 break;
2587 }
2588 done++;
2589 offset += blocksize;
2590 if (offset >= PAGE_CACHE_SIZE)
2591 offset = 0;
2592 }
2593 2500
2594 init_completion(&event); 2501 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
2595 bio->bi_private = &event; 2502 if (ret > 0)
2596 bio->bi_end_io = bi_complete; 2503 ret = 0;
2597 submit_bio(WRITE, bio);
2598 wait_for_completion(&event);
2599 2504
2600 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2505 return ret;
2601 bio_put(bio);
2602 return -EIO;
2603 }
2604 bio_put(bio);
2605 ee_len -= done;
2606 ee_pblock += done << (blkbits - 9);
2607 }
2608 return 0;
2609} 2506}
2610 2507
2611#define EXT4_EXT_ZERO_LEN 7 2508#define EXT4_EXT_ZERO_LEN 7
@@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2651 ee_block = le32_to_cpu(ex->ee_block); 2548 ee_block = le32_to_cpu(ex->ee_block);
2652 ee_len = ext4_ext_get_actual_len(ex); 2549 ee_len = ext4_ext_get_actual_len(ex);
2653 allocated = ee_len - (map->m_lblk - ee_block); 2550 allocated = ee_len - (map->m_lblk - ee_block);
2654 newblock = map->m_lblk - ee_block + ext_pblock(ex); 2551 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2655 2552
2656 ex2 = ex; 2553 ex2 = ex;
2657 orig_ex.ee_block = ex->ee_block; 2554 orig_ex.ee_block = ex->ee_block;
2658 orig_ex.ee_len = cpu_to_le16(ee_len); 2555 orig_ex.ee_len = cpu_to_le16(ee_len);
2659 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2556 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2660 2557
2661 /* 2558 /*
2662 * It is safe to convert extent to initialized via explicit 2559 * It is safe to convert extent to initialized via explicit
@@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2675 /* update the extent length and mark as initialized */ 2572 /* update the extent length and mark as initialized */
2676 ex->ee_block = orig_ex.ee_block; 2573 ex->ee_block = orig_ex.ee_block;
2677 ex->ee_len = orig_ex.ee_len; 2574 ex->ee_len = orig_ex.ee_len;
2678 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2575 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2679 ext4_ext_dirty(handle, inode, path + depth); 2576 ext4_ext_dirty(handle, inode, path + depth);
2680 /* zeroed the full extent */ 2577 /* zeroed the full extent */
2681 return allocated; 2578 return allocated;
@@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2710 ex->ee_block = orig_ex.ee_block; 2607 ex->ee_block = orig_ex.ee_block;
2711 ex->ee_len = cpu_to_le16(ee_len - allocated); 2608 ex->ee_len = cpu_to_le16(ee_len - allocated);
2712 ext4_ext_mark_uninitialized(ex); 2609 ext4_ext_mark_uninitialized(ex);
2713 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2610 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2714 ext4_ext_dirty(handle, inode, path + depth); 2611 ext4_ext_dirty(handle, inode, path + depth);
2715 2612
2716 ex3 = &newex; 2613 ex3 = &newex;
@@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2725 goto fix_extent_len; 2622 goto fix_extent_len;
2726 ex->ee_block = orig_ex.ee_block; 2623 ex->ee_block = orig_ex.ee_block;
2727 ex->ee_len = orig_ex.ee_len; 2624 ex->ee_len = orig_ex.ee_len;
2728 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2625 ext4_ext_store_pblock(ex,
2626 ext4_ext_pblock(&orig_ex));
2729 ext4_ext_dirty(handle, inode, path + depth); 2627 ext4_ext_dirty(handle, inode, path + depth);
2730 /* blocks available from map->m_lblk */ 2628 /* blocks available from map->m_lblk */
2731 return allocated; 2629 return allocated;
@@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2782 /* update the extent length and mark as initialized */ 2680 /* update the extent length and mark as initialized */
2783 ex->ee_block = orig_ex.ee_block; 2681 ex->ee_block = orig_ex.ee_block;
2784 ex->ee_len = orig_ex.ee_len; 2682 ex->ee_len = orig_ex.ee_len;
2785 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2683 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2786 ext4_ext_dirty(handle, inode, path + depth); 2684 ext4_ext_dirty(handle, inode, path + depth);
2787 /* zeroed the full extent */ 2685 /* zeroed the full extent */
2788 /* blocks available from map->m_lblk */ 2686 /* blocks available from map->m_lblk */
@@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2833 /* update the extent length and mark as initialized */ 2731 /* update the extent length and mark as initialized */
2834 ex->ee_block = orig_ex.ee_block; 2732 ex->ee_block = orig_ex.ee_block;
2835 ex->ee_len = orig_ex.ee_len; 2733 ex->ee_len = orig_ex.ee_len;
2836 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2734 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2837 ext4_ext_dirty(handle, inode, path + depth); 2735 ext4_ext_dirty(handle, inode, path + depth);
2838 /* zero out the first half */ 2736 /* zero out the first half */
2839 /* blocks available from map->m_lblk */ 2737 /* blocks available from map->m_lblk */
@@ -2902,7 +2800,7 @@ insert:
2902 /* update the extent length and mark as initialized */ 2800 /* update the extent length and mark as initialized */
2903 ex->ee_block = orig_ex.ee_block; 2801 ex->ee_block = orig_ex.ee_block;
2904 ex->ee_len = orig_ex.ee_len; 2802 ex->ee_len = orig_ex.ee_len;
2905 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2803 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2906 ext4_ext_dirty(handle, inode, path + depth); 2804 ext4_ext_dirty(handle, inode, path + depth);
2907 /* zero out the first half */ 2805 /* zero out the first half */
2908 return allocated; 2806 return allocated;
@@ -2915,7 +2813,7 @@ out:
2915fix_extent_len: 2813fix_extent_len:
2916 ex->ee_block = orig_ex.ee_block; 2814 ex->ee_block = orig_ex.ee_block;
2917 ex->ee_len = orig_ex.ee_len; 2815 ex->ee_len = orig_ex.ee_len;
2918 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2816 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2919 ext4_ext_mark_uninitialized(ex); 2817 ext4_ext_mark_uninitialized(ex);
2920 ext4_ext_dirty(handle, inode, path + depth); 2818 ext4_ext_dirty(handle, inode, path + depth);
2921 return err; 2819 return err;
@@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2973 ee_block = le32_to_cpu(ex->ee_block); 2871 ee_block = le32_to_cpu(ex->ee_block);
2974 ee_len = ext4_ext_get_actual_len(ex); 2872 ee_len = ext4_ext_get_actual_len(ex);
2975 allocated = ee_len - (map->m_lblk - ee_block); 2873 allocated = ee_len - (map->m_lblk - ee_block);
2976 newblock = map->m_lblk - ee_block + ext_pblock(ex); 2874 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2977 2875
2978 ex2 = ex; 2876 ex2 = ex;
2979 orig_ex.ee_block = ex->ee_block; 2877 orig_ex.ee_block = ex->ee_block;
2980 orig_ex.ee_len = cpu_to_le16(ee_len); 2878 orig_ex.ee_len = cpu_to_le16(ee_len);
2981 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2879 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2982 2880
2983 /* 2881 /*
2984 * It is safe to convert extent to initialized via explicit 2882 * It is safe to convert extent to initialized via explicit
@@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3027 /* update the extent length and mark as initialized */ 2925 /* update the extent length and mark as initialized */
3028 ex->ee_block = orig_ex.ee_block; 2926 ex->ee_block = orig_ex.ee_block;
3029 ex->ee_len = orig_ex.ee_len; 2927 ex->ee_len = orig_ex.ee_len;
3030 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2928 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3031 ext4_ext_dirty(handle, inode, path + depth); 2929 ext4_ext_dirty(handle, inode, path + depth);
3032 /* zeroed the full extent */ 2930 /* zeroed the full extent */
3033 /* blocks available from map->m_lblk */ 2931 /* blocks available from map->m_lblk */
@@ -3099,7 +2997,7 @@ insert:
3099 /* update the extent length and mark as initialized */ 2997 /* update the extent length and mark as initialized */
3100 ex->ee_block = orig_ex.ee_block; 2998 ex->ee_block = orig_ex.ee_block;
3101 ex->ee_len = orig_ex.ee_len; 2999 ex->ee_len = orig_ex.ee_len;
3102 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 3000 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3103 ext4_ext_dirty(handle, inode, path + depth); 3001 ext4_ext_dirty(handle, inode, path + depth);
3104 /* zero out the first half */ 3002 /* zero out the first half */
3105 return allocated; 3003 return allocated;
@@ -3112,7 +3010,7 @@ out:
3112fix_extent_len: 3010fix_extent_len:
3113 ex->ee_block = orig_ex.ee_block; 3011 ex->ee_block = orig_ex.ee_block;
3114 ex->ee_len = orig_ex.ee_len; 3012 ex->ee_len = orig_ex.ee_len;
3115 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 3013 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3116 ext4_ext_mark_uninitialized(ex); 3014 ext4_ext_mark_uninitialized(ex);
3117 ext4_ext_dirty(handle, inode, path + depth); 3015 ext4_ext_dirty(handle, inode, path + depth);
3118 return err; 3016 return err;
@@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3180 unmap_underlying_metadata(bdev, block + i); 3078 unmap_underlying_metadata(bdev, block + i);
3181} 3079}
3182 3080
3081/*
3082 * Handle EOFBLOCKS_FL flag, clearing it if necessary
3083 */
3084static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3085 struct ext4_map_blocks *map,
3086 struct ext4_ext_path *path,
3087 unsigned int len)
3088{
3089 int i, depth;
3090 struct ext4_extent_header *eh;
3091 struct ext4_extent *ex, *last_ex;
3092
3093 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
3094 return 0;
3095
3096 depth = ext_depth(inode);
3097 eh = path[depth].p_hdr;
3098 ex = path[depth].p_ext;
3099
3100 if (unlikely(!eh->eh_entries)) {
3101 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
3102 "EOFBLOCKS_FL set");
3103 return -EIO;
3104 }
3105 last_ex = EXT_LAST_EXTENT(eh);
3106 /*
3107 * We should clear the EOFBLOCKS_FL flag if we are writing the
3108 * last block in the last extent in the file. We test this by
3109 * first checking to see if the caller to
3110 * ext4_ext_get_blocks() was interested in the last block (or
3111 * a block beyond the last block) in the current extent. If
3112 * this turns out to be false, we can bail out from this
3113 * function immediately.
3114 */
3115 if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
3116 ext4_ext_get_actual_len(last_ex))
3117 return 0;
3118 /*
3119 * If the caller does appear to be planning to write at or
3120 * beyond the end of the current extent, we then test to see
3121 * if the current extent is the last extent in the file, by
3122 * checking to make sure it was reached via the rightmost node
3123 * at each level of the tree.
3124 */
3125 for (i = depth-1; i >= 0; i--)
3126 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3127 return 0;
3128 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3129 return ext4_mark_inode_dirty(handle, inode);
3130}
3131
3183static int 3132static int
3184ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3133ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3185 struct ext4_map_blocks *map, 3134 struct ext4_map_blocks *map,
@@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3206 * completed 3155 * completed
3207 */ 3156 */
3208 if (io) 3157 if (io)
3209 io->flag = EXT4_IO_UNWRITTEN; 3158 io->flag = EXT4_IO_END_UNWRITTEN;
3210 else 3159 else
3211 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3160 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3212 if (ext4_should_dioread_nolock(inode)) 3161 if (ext4_should_dioread_nolock(inode))
@@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3217 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3166 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3218 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3167 ret = ext4_convert_unwritten_extents_endio(handle, inode,
3219 path); 3168 path);
3220 if (ret >= 0) 3169 if (ret >= 0) {
3221 ext4_update_inode_fsync_trans(handle, inode, 1); 3170 ext4_update_inode_fsync_trans(handle, inode, 1);
3171 err = check_eofblocks_fl(handle, inode, map, path,
3172 map->m_len);
3173 } else
3174 err = ret;
3222 goto out2; 3175 goto out2;
3223 } 3176 }
3224 /* buffered IO case */ 3177 /* buffered IO case */
@@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3244 3197
3245 /* buffered write, writepage time, convert*/ 3198 /* buffered write, writepage time, convert*/
3246 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3199 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3247 if (ret >= 0) 3200 if (ret >= 0) {
3248 ext4_update_inode_fsync_trans(handle, inode, 1); 3201 ext4_update_inode_fsync_trans(handle, inode, 1);
3202 err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
3203 if (err < 0)
3204 goto out2;
3205 }
3206
3249out: 3207out:
3250 if (ret <= 0) { 3208 if (ret <= 0) {
3251 err = ret; 3209 err = ret;
@@ -3292,6 +3250,7 @@ out2:
3292 } 3250 }
3293 return err ? err : allocated; 3251 return err ? err : allocated;
3294} 3252}
3253
3295/* 3254/*
3296 * Block allocation/map/preallocation routine for extents based files 3255 * Block allocation/map/preallocation routine for extents based files
3297 * 3256 *
@@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3315{ 3274{
3316 struct ext4_ext_path *path = NULL; 3275 struct ext4_ext_path *path = NULL;
3317 struct ext4_extent_header *eh; 3276 struct ext4_extent_header *eh;
3318 struct ext4_extent newex, *ex, *last_ex; 3277 struct ext4_extent newex, *ex;
3319 ext4_fsblk_t newblock; 3278 ext4_fsblk_t newblock;
3320 int i, err = 0, depth, ret, cache_type; 3279 int err = 0, depth, ret, cache_type;
3321 unsigned int allocated = 0; 3280 unsigned int allocated = 0;
3322 struct ext4_allocation_request ar; 3281 struct ext4_allocation_request ar;
3323 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3282 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3341 /* block is already allocated */ 3300 /* block is already allocated */
3342 newblock = map->m_lblk 3301 newblock = map->m_lblk
3343 - le32_to_cpu(newex.ee_block) 3302 - le32_to_cpu(newex.ee_block)
3344 + ext_pblock(&newex); 3303 + ext4_ext_pblock(&newex);
3345 /* number of remaining blocks in the extent */ 3304 /* number of remaining blocks in the extent */
3346 allocated = ext4_ext_get_actual_len(&newex) - 3305 allocated = ext4_ext_get_actual_len(&newex) -
3347 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3306 (map->m_lblk - le32_to_cpu(newex.ee_block));
@@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3379 ex = path[depth].p_ext; 3338 ex = path[depth].p_ext;
3380 if (ex) { 3339 if (ex) {
3381 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3340 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3382 ext4_fsblk_t ee_start = ext_pblock(ex); 3341 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3383 unsigned short ee_len; 3342 unsigned short ee_len;
3384 3343
3385 /* 3344 /*
@@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3488 */ 3447 */
3489 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3448 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3490 if (io) 3449 if (io)
3491 io->flag = EXT4_IO_UNWRITTEN; 3450 io->flag = EXT4_IO_END_UNWRITTEN;
3492 else 3451 else
3493 ext4_set_inode_state(inode, 3452 ext4_set_inode_state(inode,
3494 EXT4_STATE_DIO_UNWRITTEN); 3453 EXT4_STATE_DIO_UNWRITTEN);
@@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3497 map->m_flags |= EXT4_MAP_UNINIT; 3456 map->m_flags |= EXT4_MAP_UNINIT;
3498 } 3457 }
3499 3458
3500 if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { 3459 err = check_eofblocks_fl(handle, inode, map, path, ar.len);
3501 if (unlikely(!eh->eh_entries)) { 3460 if (err)
3502 EXT4_ERROR_INODE(inode, 3461 goto out2;
3503 "eh->eh_entries == 0 and " 3462
3504 "EOFBLOCKS_FL set");
3505 err = -EIO;
3506 goto out2;
3507 }
3508 last_ex = EXT_LAST_EXTENT(eh);
3509 /*
3510 * If the current leaf block was reached by looking at
3511 * the last index block all the way down the tree, and
3512 * we are extending the inode beyond the last extent
3513 * in the current leaf block, then clear the
3514 * EOFBLOCKS_FL flag.
3515 */
3516 for (i = depth-1; i >= 0; i--) {
3517 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3518 break;
3519 }
3520 if ((i < 0) &&
3521 (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
3522 ext4_ext_get_actual_len(last_ex)))
3523 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3524 }
3525 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3463 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3526 if (err) { 3464 if (err) {
3527 /* free data blocks we just allocated */ 3465 /* free data blocks we just allocated */
3528 /* not a good idea to call discard here directly, 3466 /* not a good idea to call discard here directly,
3529 * but otherwise we'd need to call it every free() */ 3467 * but otherwise we'd need to call it every free() */
3530 ext4_discard_preallocations(inode); 3468 ext4_discard_preallocations(inode);
3531 ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), 3469 ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
3532 ext4_ext_get_actual_len(&newex), 0); 3470 ext4_ext_get_actual_len(&newex), 0);
3533 goto out2; 3471 goto out2;
3534 } 3472 }
3535 3473
3536 /* previous routine could use block we allocated */ 3474 /* previous routine could use block we allocated */
3537 newblock = ext_pblock(&newex); 3475 newblock = ext4_ext_pblock(&newex);
3538 allocated = ext4_ext_get_actual_len(&newex); 3476 allocated = ext4_ext_get_actual_len(&newex);
3539 if (allocated > map->m_len) 3477 if (allocated > map->m_len)
3540 allocated = map->m_len; 3478 allocated = map->m_len;
@@ -3729,7 +3667,7 @@ retry:
3729 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3667 printk(KERN_ERR "%s: ext4_ext_map_blocks "
3730 "returned error inode#%lu, block=%u, " 3668 "returned error inode#%lu, block=%u, "
3731 "max_blocks=%u", __func__, 3669 "max_blocks=%u", __func__,
3732 inode->i_ino, block, max_blocks); 3670 inode->i_ino, map.m_lblk, max_blocks);
3733#endif 3671#endif
3734 ext4_mark_inode_dirty(handle, inode); 3672 ext4_mark_inode_dirty(handle, inode);
3735 ret2 = ext4_journal_stop(handle); 3673 ret2 = ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ee92b66d4558..5a5c55ddceef 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
130 return dquot_file_open(inode, filp); 130 return dquot_file_open(inode, filp);
131} 131}
132 132
133/*
134 * ext4_llseek() copied from generic_file_llseek() to handle both
135 * block-mapped and extent-mapped maxbytes values. This should
136 * otherwise be identical with generic_file_llseek().
137 */
138loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
139{
140 struct inode *inode = file->f_mapping->host;
141 loff_t maxbytes;
142
143 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
144 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
145 else
146 maxbytes = inode->i_sb->s_maxbytes;
147 mutex_lock(&inode->i_mutex);
148 switch (origin) {
149 case SEEK_END:
150 offset += inode->i_size;
151 break;
152 case SEEK_CUR:
153 if (offset == 0) {
154 mutex_unlock(&inode->i_mutex);
155 return file->f_pos;
156 }
157 offset += file->f_pos;
158 break;
159 }
160
161 if (offset < 0 || offset > maxbytes) {
162 mutex_unlock(&inode->i_mutex);
163 return -EINVAL;
164 }
165
166 if (offset != file->f_pos) {
167 file->f_pos = offset;
168 file->f_version = 0;
169 }
170 mutex_unlock(&inode->i_mutex);
171
172 return offset;
173}
174
133const struct file_operations ext4_file_operations = { 175const struct file_operations ext4_file_operations = {
134 .llseek = generic_file_llseek, 176 .llseek = ext4_llseek,
135 .read = do_sync_read, 177 .read = do_sync_read,
136 .write = do_sync_write, 178 .write = do_sync_write,
137 .aio_read = generic_file_aio_read, 179 .aio_read = generic_file_aio_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 3f3ff5ee8f9d..c1a7bc923cf6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,6 +34,89 @@
34 34
35#include <trace/events/ext4.h> 35#include <trace/events/ext4.h>
36 36
37static void dump_completed_IO(struct inode * inode)
38{
39#ifdef EXT4_DEBUG
40 struct list_head *cur, *before, *after;
41 ext4_io_end_t *io, *io0, *io1;
42 unsigned long flags;
43
44 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
45 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
46 return;
47 }
48
49 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
50 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
51 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
52 cur = &io->list;
53 before = cur->prev;
54 io0 = container_of(before, ext4_io_end_t, list);
55 after = cur->next;
56 io1 = container_of(after, ext4_io_end_t, list);
57
58 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
59 io, inode->i_ino, io0, io1);
60 }
61 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
62#endif
63}
64
65/*
66 * This function is called from ext4_sync_file().
67 *
68 * When IO is completed, the work to convert unwritten extents to
69 * written is queued on workqueue but may not get immediately
70 * scheduled. When fsync is called, we need to ensure the
71 * conversion is complete before fsync returns.
72 * The inode keeps track of a list of pending/completed IO that
73 * might needs to do the conversion. This function walks through
74 * the list and convert the related unwritten extents for completed IO
75 * to written.
76 * The function return the number of pending IOs on success.
77 */
78static int flush_completed_IO(struct inode *inode)
79{
80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode);
82 unsigned long flags;
83 int ret = 0;
84 int ret2 = 0;
85
86 if (list_empty(&ei->i_completed_io_list))
87 return ret;
88
89 dump_completed_IO(inode);
90 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
91 while (!list_empty(&ei->i_completed_io_list)){
92 io = list_entry(ei->i_completed_io_list.next,
93 ext4_io_end_t, list);
94 /*
95 * Calling ext4_end_io_nolock() to convert completed
96 * IO to written.
97 *
98 * When ext4_sync_file() is called, run_queue() may already
99 * about to flush the work corresponding to this io structure.
100 * It will be upset if it founds the io structure related
101 * to the work-to-be schedule is freed.
102 *
103 * Thus we need to keep the io structure still valid here after
104 * convertion finished. The io structure has a flag to
105 * avoid double converting from both fsync and background work
106 * queue work.
107 */
108 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
109 ret = ext4_end_io_nolock(io);
110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
111 if (ret < 0)
112 ret2 = ret;
113 else
114 list_del_init(&io->list);
115 }
116 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
117 return (ret2 < 0) ? ret2 : 0;
118}
119
37/* 120/*
38 * If we're not journaling and this is a just-created file, we have to 121 * If we're not journaling and this is a just-created file, we have to
39 * sync our parent directory (if it was freshly created) since 122 * sync our parent directory (if it was freshly created) since
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0d1f21..1ce240a23ebb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
50 * need to use it within a single byte (to ensure we get endianness right). 50 * need to use it within a single byte (to ensure we get endianness right).
51 * We can use memset for the rest of the bitmap as there are no other users. 51 * We can use memset for the rest of the bitmap as there are no other users.
52 */ 52 */
53void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 53void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
54{ 54{
55 int i; 55 int i;
56 56
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
65} 65}
66 66
67/* Initializes an uninitialized inode bitmap */ 67/* Initializes an uninitialized inode bitmap */
68unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, 68static unsigned ext4_init_inode_bitmap(struct super_block *sb,
69 ext4_group_t block_group, 69 struct buffer_head *bh,
70 struct ext4_group_desc *gdp) 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp)
71{ 72{
72 struct ext4_sb_info *sbi = EXT4_SB(sb); 73 struct ext4_sb_info *sbi = EXT4_SB(sb);
73 74
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
85 } 86 }
86 87
87 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 88 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
88 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 89 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
89 bh->b_data); 90 bh->b_data);
90 91
91 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
107 desc = ext4_get_group_desc(sb, block_group, NULL); 108 desc = ext4_get_group_desc(sb, block_group, NULL);
108 if (!desc) 109 if (!desc)
109 return NULL; 110 return NULL;
111
110 bitmap_blk = ext4_inode_bitmap(sb, desc); 112 bitmap_blk = ext4_inode_bitmap(sb, desc);
111 bh = sb_getblk(sb, bitmap_blk); 113 bh = sb_getblk(sb, bitmap_blk);
112 if (unlikely(!bh)) { 114 if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 125 unlock_buffer(bh);
124 return bh; 126 return bh;
125 } 127 }
128
126 ext4_lock_group(sb, block_group); 129 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 130 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 131 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
133 return bh; 136 return bh;
134 } 137 }
135 ext4_unlock_group(sb, block_group); 138 ext4_unlock_group(sb, block_group);
139
136 if (buffer_uptodate(bh)) { 140 if (buffer_uptodate(bh)) {
137 /* 141 /*
138 * if not uninit if bh is uptodate, 142 * if not uninit if bh is uptodate,
@@ -411,8 +415,8 @@ struct orlov_stats {
411 * for a particular block group or flex_bg. If flex_size is 1, then g 415 * for a particular block group or flex_bg. If flex_size is 1, then g
412 * is a block group number; otherwise it is flex_bg number. 416 * is a block group number; otherwise it is flex_bg number.
413 */ 417 */
414void get_orlov_stats(struct super_block *sb, ext4_group_t g, 418static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
415 int flex_size, struct orlov_stats *stats) 419 int flex_size, struct orlov_stats *stats)
416{ 420{
417 struct ext4_group_desc *desc; 421 struct ext4_group_desc *desc;
418 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; 422 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
712{ 716{
713 int free = 0, retval = 0, count; 717 int free = 0, retval = 0, count;
714 struct ext4_sb_info *sbi = EXT4_SB(sb); 718 struct ext4_sb_info *sbi = EXT4_SB(sb);
719 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 720 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
716 721
722 /*
723 * We have to be sure that new inode allocation does not race with
724 * inode table initialization, because otherwise we may end up
725 * allocating and writing new inode right before sb_issue_zeroout
726 * takes place and overwriting our new inode with zeroes. So we
727 * take alloc_sem to prevent it.
728 */
729 down_read(&grp->alloc_sem);
717 ext4_lock_group(sb, group); 730 ext4_lock_group(sb, group);
718 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 731 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
719 /* not a free inode */ 732 /* not a free inode */
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
724 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 737 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
725 ino > EXT4_INODES_PER_GROUP(sb)) { 738 ino > EXT4_INODES_PER_GROUP(sb)) {
726 ext4_unlock_group(sb, group); 739 ext4_unlock_group(sb, group);
740 up_read(&grp->alloc_sem);
727 ext4_error(sb, "reserved inode or inode > inodes count - " 741 ext4_error(sb, "reserved inode or inode > inodes count - "
728 "block_group = %u, inode=%lu", group, 742 "block_group = %u, inode=%lu", group,
729 ino + group * EXT4_INODES_PER_GROUP(sb)); 743 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
772 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 786 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
773err_ret: 787err_ret:
774 ext4_unlock_group(sb, group); 788 ext4_unlock_group(sb, group);
789 up_read(&grp->alloc_sem);
775 return retval; 790 return retval;
776} 791}
777 792
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1205 } 1220 }
1206 return count; 1221 return count;
1207} 1222}
1223
1224/*
1225 * Zeroes not yet zeroed inode table - just write zeroes through the whole
1226 * inode table. Must be called without any spinlock held. The only place
1227 * where it is called from on active part of filesystem is ext4lazyinit
1228 * thread, so we do not need any special locks, however we have to prevent
1229 * inode allocation from the current group, so we take alloc_sem lock, to
1230 * block ext4_claim_inode until we are finished.
1231 */
1232extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1233 int barrier)
1234{
1235 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1236 struct ext4_sb_info *sbi = EXT4_SB(sb);
1237 struct ext4_group_desc *gdp = NULL;
1238 struct buffer_head *group_desc_bh;
1239 handle_t *handle;
1240 ext4_fsblk_t blk;
1241 int num, ret = 0, used_blks = 0;
1242
1243 /* This should not happen, but just to be sure check this */
1244 if (sb->s_flags & MS_RDONLY) {
1245 ret = 1;
1246 goto out;
1247 }
1248
1249 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
1250 if (!gdp)
1251 goto out;
1252
1253 /*
1254 * We do not need to lock this, because we are the only one
1255 * handling this flag.
1256 */
1257 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1258 goto out;
1259
1260 handle = ext4_journal_start_sb(sb, 1);
1261 if (IS_ERR(handle)) {
1262 ret = PTR_ERR(handle);
1263 goto out;
1264 }
1265
1266 down_write(&grp->alloc_sem);
1267 /*
1268 * If inode bitmap was already initialized there may be some
1269 * used inodes so we need to skip blocks with used inodes in
1270 * inode table.
1271 */
1272 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
1273 used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
1274 ext4_itable_unused_count(sb, gdp)),
1275 sbi->s_inodes_per_block);
1276
1277 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1278 ext4_error(sb, "Something is wrong with group %u\n"
1279 "Used itable blocks: %d"
1280 "itable unused count: %u\n",
1281 group, used_blks,
1282 ext4_itable_unused_count(sb, gdp));
1283 ret = 1;
1284 goto out;
1285 }
1286
1287 blk = ext4_inode_table(sb, gdp) + used_blks;
1288 num = sbi->s_itb_per_group - used_blks;
1289
1290 BUFFER_TRACE(group_desc_bh, "get_write_access");
1291 ret = ext4_journal_get_write_access(handle,
1292 group_desc_bh);
1293 if (ret)
1294 goto err_out;
1295
1296 /*
1297 * Skip zeroout if the inode table is full. But we set the ZEROED
1298 * flag anyway, because obviously, when it is full it does not need
1299 * further zeroing.
1300 */
1301 if (unlikely(num == 0))
1302 goto skip_zeroout;
1303
1304 ext4_debug("going to zero out inode table in group %d\n",
1305 group);
1306 ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
1307 if (ret < 0)
1308 goto err_out;
1309 if (barrier)
1310 blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
1311
1312skip_zeroout:
1313 ext4_lock_group(sb, group);
1314 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
1315 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1316 ext4_unlock_group(sb, group);
1317
1318 BUFFER_TRACE(group_desc_bh,
1319 "call ext4_handle_dirty_metadata");
1320 ret = ext4_handle_dirty_metadata(handle, NULL,
1321 group_desc_bh);
1322
1323err_out:
1324 up_write(&grp->alloc_sem);
1325 ext4_journal_stop(handle);
1326out:
1327 return ret;
1328}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 49635ef236f8..2d6c6c8c036d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
60} 60}
61 61
62static void ext4_invalidatepage(struct page *page, unsigned long offset); 62static void ext4_invalidatepage(struct page *page, unsigned long offset);
63static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
64 struct buffer_head *bh_result, int create);
65static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
66static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
67static int __ext4_journalled_writepage(struct page *page, unsigned int len);
68static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
63 69
64/* 70/*
65 * Test whether an inode is a fast symlink. 71 * Test whether an inode is a fast symlink.
@@ -755,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
755 * parent to disk. 761 * parent to disk.
756 */ 762 */
757 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 763 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
764 if (unlikely(!bh)) {
765 err = -EIO;
766 goto failed;
767 }
768
758 branch[n].bh = bh; 769 branch[n].bh = bh;
759 lock_buffer(bh); 770 lock_buffer(bh);
760 BUFFER_TRACE(bh, "call get_create_access"); 771 BUFFER_TRACE(bh, "call get_create_access");
@@ -1207,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
1207 break; 1218 break;
1208 idx++; 1219 idx++;
1209 num++; 1220 num++;
1210 if (num >= max_pages) 1221 if (num >= max_pages) {
1222 done = 1;
1211 break; 1223 break;
1224 }
1212 } 1225 }
1213 pagevec_release(&pvec); 1226 pagevec_release(&pvec);
1214 } 1227 }
@@ -1995,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page,
1995 * 2008 *
1996 * As pages are already locked by write_cache_pages(), we can't use it 2009 * As pages are already locked by write_cache_pages(), we can't use it
1997 */ 2010 */
1998static int mpage_da_submit_io(struct mpage_da_data *mpd) 2011static int mpage_da_submit_io(struct mpage_da_data *mpd,
2012 struct ext4_map_blocks *map)
1999{ 2013{
2000 long pages_skipped;
2001 struct pagevec pvec; 2014 struct pagevec pvec;
2002 unsigned long index, end; 2015 unsigned long index, end;
2003 int ret = 0, err, nr_pages, i; 2016 int ret = 0, err, nr_pages, i;
2004 struct inode *inode = mpd->inode; 2017 struct inode *inode = mpd->inode;
2005 struct address_space *mapping = inode->i_mapping; 2018 struct address_space *mapping = inode->i_mapping;
2019 loff_t size = i_size_read(inode);
2020 unsigned int len, block_start;
2021 struct buffer_head *bh, *page_bufs = NULL;
2022 int journal_data = ext4_should_journal_data(inode);
2023 sector_t pblock = 0, cur_logical = 0;
2024 struct ext4_io_submit io_submit;
2006 2025
2007 BUG_ON(mpd->next_page <= mpd->first_page); 2026 BUG_ON(mpd->next_page <= mpd->first_page);
2027 memset(&io_submit, 0, sizeof(io_submit));
2008 /* 2028 /*
2009 * We need to start from the first_page to the next_page - 1 2029 * We need to start from the first_page to the next_page - 1
2010 * to make sure we also write the mapped dirty buffer_heads. 2030 * to make sure we also write the mapped dirty buffer_heads.
@@ -2020,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
2020 if (nr_pages == 0) 2040 if (nr_pages == 0)
2021 break; 2041 break;
2022 for (i = 0; i < nr_pages; i++) { 2042 for (i = 0; i < nr_pages; i++) {
2043 int commit_write = 0, redirty_page = 0;
2023 struct page *page = pvec.pages[i]; 2044 struct page *page = pvec.pages[i];
2024 2045
2025 index = page->index; 2046 index = page->index;
2026 if (index > end) 2047 if (index > end)
2027 break; 2048 break;
2049
2050 if (index == size >> PAGE_CACHE_SHIFT)
2051 len = size & ~PAGE_CACHE_MASK;
2052 else
2053 len = PAGE_CACHE_SIZE;
2054 if (map) {
2055 cur_logical = index << (PAGE_CACHE_SHIFT -
2056 inode->i_blkbits);
2057 pblock = map->m_pblk + (cur_logical -
2058 map->m_lblk);
2059 }
2028 index++; 2060 index++;
2029 2061
2030 BUG_ON(!PageLocked(page)); 2062 BUG_ON(!PageLocked(page));
2031 BUG_ON(PageWriteback(page)); 2063 BUG_ON(PageWriteback(page));
2032 2064
2033 pages_skipped = mpd->wbc->pages_skipped;
2034 err = mapping->a_ops->writepage(page, mpd->wbc);
2035 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
2036 /*
2037 * have successfully written the page
2038 * without skipping the same
2039 */
2040 mpd->pages_written++;
2041 /* 2065 /*
2042 * In error case, we have to continue because 2066 * If the page does not have buffers (for
2043 * remaining pages are still locked 2067 * whatever reason), try to create them using
2044 * XXX: unlock and re-dirty them? 2068 * __block_write_begin. If this fails,
2069 * redirty the page and move on.
2045 */ 2070 */
2046 if (ret == 0) 2071 if (!page_has_buffers(page)) {
2047 ret = err; 2072 if (__block_write_begin(page, 0, len,
2048 } 2073 noalloc_get_block_write)) {
2049 pagevec_release(&pvec); 2074 redirty_page:
2050 } 2075 redirty_page_for_writepage(mpd->wbc,
2051 return ret; 2076 page);
2052} 2077 unlock_page(page);
2053 2078 continue;
2054/* 2079 }
2055 * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers 2080 commit_write = 1;
2056 * 2081 }
2057 * the function goes through all passed space and put actual disk
2058 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
2059 */
2060static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
2061 struct ext4_map_blocks *map)
2062{
2063 struct inode *inode = mpd->inode;
2064 struct address_space *mapping = inode->i_mapping;
2065 int blocks = map->m_len;
2066 sector_t pblock = map->m_pblk, cur_logical;
2067 struct buffer_head *head, *bh;
2068 pgoff_t index, end;
2069 struct pagevec pvec;
2070 int nr_pages, i;
2071
2072 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
2073 end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
2074 cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2075
2076 pagevec_init(&pvec, 0);
2077
2078 while (index <= end) {
2079 /* XXX: optimize tail */
2080 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
2081 if (nr_pages == 0)
2082 break;
2083 for (i = 0; i < nr_pages; i++) {
2084 struct page *page = pvec.pages[i];
2085
2086 index = page->index;
2087 if (index > end)
2088 break;
2089 index++;
2090
2091 BUG_ON(!PageLocked(page));
2092 BUG_ON(PageWriteback(page));
2093 BUG_ON(!page_has_buffers(page));
2094
2095 bh = page_buffers(page);
2096 head = bh;
2097
2098 /* skip blocks out of the range */
2099 do {
2100 if (cur_logical >= map->m_lblk)
2101 break;
2102 cur_logical++;
2103 } while ((bh = bh->b_this_page) != head);
2104 2082
2083 bh = page_bufs = page_buffers(page);
2084 block_start = 0;
2105 do { 2085 do {
2106 if (cur_logical >= map->m_lblk + blocks) 2086 if (!bh)
2107 break; 2087 goto redirty_page;
2108 2088 if (map && (cur_logical >= map->m_lblk) &&
2109 if (buffer_delay(bh) || buffer_unwritten(bh)) { 2089 (cur_logical <= (map->m_lblk +
2110 2090 (map->m_len - 1)))) {
2111 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
2112
2113 if (buffer_delay(bh)) { 2091 if (buffer_delay(bh)) {
2114 clear_buffer_delay(bh); 2092 clear_buffer_delay(bh);
2115 bh->b_blocknr = pblock; 2093 bh->b_blocknr = pblock;
2116 } else {
2117 /*
2118 * unwritten already should have
2119 * blocknr assigned. Verify that
2120 */
2121 clear_buffer_unwritten(bh);
2122 BUG_ON(bh->b_blocknr != pblock);
2123 } 2094 }
2095 if (buffer_unwritten(bh) ||
2096 buffer_mapped(bh))
2097 BUG_ON(bh->b_blocknr != pblock);
2098 if (map->m_flags & EXT4_MAP_UNINIT)
2099 set_buffer_uninit(bh);
2100 clear_buffer_unwritten(bh);
2101 }
2124 2102
2125 } else if (buffer_mapped(bh)) 2103 /* redirty page if block allocation undone */
2126 BUG_ON(bh->b_blocknr != pblock); 2104 if (buffer_delay(bh) || buffer_unwritten(bh))
2127 2105 redirty_page = 1;
2128 if (map->m_flags & EXT4_MAP_UNINIT) 2106 bh = bh->b_this_page;
2129 set_buffer_uninit(bh); 2107 block_start += bh->b_size;
2130 cur_logical++; 2108 cur_logical++;
2131 pblock++; 2109 pblock++;
2132 } while ((bh = bh->b_this_page) != head); 2110 } while (bh != page_bufs);
2111
2112 if (redirty_page)
2113 goto redirty_page;
2114
2115 if (commit_write)
2116 /* mark the buffer_heads as dirty & uptodate */
2117 block_commit_write(page, 0, len);
2118
2119 /*
2120 * Delalloc doesn't support data journalling,
2121 * but eventually maybe we'll lift this
2122 * restriction.
2123 */
2124 if (unlikely(journal_data && PageChecked(page)))
2125 err = __ext4_journalled_writepage(page, len);
2126 else
2127 err = ext4_bio_write_page(&io_submit, page,
2128 len, mpd->wbc);
2129
2130 if (!err)
2131 mpd->pages_written++;
2132 /*
2133 * In error case, we have to continue because
2134 * remaining pages are still locked
2135 */
2136 if (ret == 0)
2137 ret = err;
2133 } 2138 }
2134 pagevec_release(&pvec); 2139 pagevec_release(&pvec);
2135 } 2140 }
2141 ext4_io_submit(&io_submit);
2142 return ret;
2136} 2143}
2137 2144
2138
2139static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, 2145static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2140 sector_t logical, long blk_cnt) 2146 sector_t logical, long blk_cnt)
2141{ 2147{
@@ -2187,35 +2193,32 @@ static void ext4_print_free_blocks(struct inode *inode)
2187} 2193}
2188 2194
2189/* 2195/*
2190 * mpage_da_map_blocks - go through given space 2196 * mpage_da_map_and_submit - go through given space, map them
2197 * if necessary, and then submit them for I/O
2191 * 2198 *
2192 * @mpd - bh describing space 2199 * @mpd - bh describing space
2193 * 2200 *
2194 * The function skips space we know is already mapped to disk blocks. 2201 * The function skips space we know is already mapped to disk blocks.
2195 * 2202 *
2196 */ 2203 */
2197static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2204static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2198{ 2205{
2199 int err, blks, get_blocks_flags; 2206 int err, blks, get_blocks_flags;
2200 struct ext4_map_blocks map; 2207 struct ext4_map_blocks map, *mapp = NULL;
2201 sector_t next = mpd->b_blocknr; 2208 sector_t next = mpd->b_blocknr;
2202 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; 2209 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
2203 loff_t disksize = EXT4_I(mpd->inode)->i_disksize; 2210 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
2204 handle_t *handle = NULL; 2211 handle_t *handle = NULL;
2205 2212
2206 /* 2213 /*
2207 * We consider only non-mapped and non-allocated blocks 2214 * If the blocks are mapped already, or we couldn't accumulate
2208 */ 2215 * any blocks, then proceed immediately to the submission stage.
2209 if ((mpd->b_state & (1 << BH_Mapped)) &&
2210 !(mpd->b_state & (1 << BH_Delay)) &&
2211 !(mpd->b_state & (1 << BH_Unwritten)))
2212 return 0;
2213
2214 /*
2215 * If we didn't accumulate anything to write simply return
2216 */ 2216 */
2217 if (!mpd->b_size) 2217 if ((mpd->b_size == 0) ||
2218 return 0; 2218 ((mpd->b_state & (1 << BH_Mapped)) &&
2219 !(mpd->b_state & (1 << BH_Delay)) &&
2220 !(mpd->b_state & (1 << BH_Unwritten))))
2221 goto submit_io;
2219 2222
2220 handle = ext4_journal_current_handle(); 2223 handle = ext4_journal_current_handle();
2221 BUG_ON(!handle); 2224 BUG_ON(!handle);
@@ -2252,17 +2255,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2252 2255
2253 err = blks; 2256 err = blks;
2254 /* 2257 /*
2255 * If get block returns with error we simply 2258 * If get block returns EAGAIN or ENOSPC and there
2256 * return. Later writepage will redirty the page and 2259 * appears to be free blocks we will call
2257 * writepages will find the dirty page again 2260 * ext4_writepage() for all of the pages which will
2261 * just redirty the pages.
2258 */ 2262 */
2259 if (err == -EAGAIN) 2263 if (err == -EAGAIN)
2260 return 0; 2264 goto submit_io;
2261 2265
2262 if (err == -ENOSPC && 2266 if (err == -ENOSPC &&
2263 ext4_count_free_blocks(sb)) { 2267 ext4_count_free_blocks(sb)) {
2264 mpd->retval = err; 2268 mpd->retval = err;
2265 return 0; 2269 goto submit_io;
2266 } 2270 }
2267 2271
2268 /* 2272 /*
@@ -2287,10 +2291,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2287 /* invalidate all the pages */ 2291 /* invalidate all the pages */
2288 ext4_da_block_invalidatepages(mpd, next, 2292 ext4_da_block_invalidatepages(mpd, next,
2289 mpd->b_size >> mpd->inode->i_blkbits); 2293 mpd->b_size >> mpd->inode->i_blkbits);
2290 return err; 2294 return;
2291 } 2295 }
2292 BUG_ON(blks == 0); 2296 BUG_ON(blks == 0);
2293 2297
2298 mapp = &map;
2294 if (map.m_flags & EXT4_MAP_NEW) { 2299 if (map.m_flags & EXT4_MAP_NEW) {
2295 struct block_device *bdev = mpd->inode->i_sb->s_bdev; 2300 struct block_device *bdev = mpd->inode->i_sb->s_bdev;
2296 int i; 2301 int i;
@@ -2299,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2299 unmap_underlying_metadata(bdev, map.m_pblk + i); 2304 unmap_underlying_metadata(bdev, map.m_pblk + i);
2300 } 2305 }
2301 2306
2302 /*
2303 * If blocks are delayed marked, we need to
2304 * put actual blocknr and drop delayed bit
2305 */
2306 if ((mpd->b_state & (1 << BH_Delay)) ||
2307 (mpd->b_state & (1 << BH_Unwritten)))
2308 mpage_put_bnr_to_bhs(mpd, &map);
2309
2310 if (ext4_should_order_data(mpd->inode)) { 2307 if (ext4_should_order_data(mpd->inode)) {
2311 err = ext4_jbd2_file_inode(handle, mpd->inode); 2308 err = ext4_jbd2_file_inode(handle, mpd->inode);
2312 if (err) 2309 if (err)
2313 return err; 2310 /* This only happens if the journal is aborted */
2311 return;
2314 } 2312 }
2315 2313
2316 /* 2314 /*
@@ -2321,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2321 disksize = i_size_read(mpd->inode); 2319 disksize = i_size_read(mpd->inode);
2322 if (disksize > EXT4_I(mpd->inode)->i_disksize) { 2320 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
2323 ext4_update_i_disksize(mpd->inode, disksize); 2321 ext4_update_i_disksize(mpd->inode, disksize);
2324 return ext4_mark_inode_dirty(handle, mpd->inode); 2322 err = ext4_mark_inode_dirty(handle, mpd->inode);
2323 if (err)
2324 ext4_error(mpd->inode->i_sb,
2325 "Failed to mark inode %lu dirty",
2326 mpd->inode->i_ino);
2325 } 2327 }
2326 2328
2327 return 0; 2329submit_io:
2330 mpage_da_submit_io(mpd, mapp);
2331 mpd->io_done = 1;
2328} 2332}
2329 2333
2330#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ 2334#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -2401,9 +2405,7 @@ flush_it:
2401 * We couldn't merge the block to our extent, so we 2405 * We couldn't merge the block to our extent, so we
2402 * need to flush current extent and start new one 2406 * need to flush current extent and start new one
2403 */ 2407 */
2404 if (mpage_da_map_blocks(mpd) == 0) 2408 mpage_da_map_and_submit(mpd);
2405 mpage_da_submit_io(mpd);
2406 mpd->io_done = 1;
2407 return; 2409 return;
2408} 2410}
2409 2411
@@ -2422,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2422 * The function finds extents of pages and scan them for all blocks. 2424 * The function finds extents of pages and scan them for all blocks.
2423 */ 2425 */
2424static int __mpage_da_writepage(struct page *page, 2426static int __mpage_da_writepage(struct page *page,
2425 struct writeback_control *wbc, void *data) 2427 struct writeback_control *wbc,
2428 struct mpage_da_data *mpd)
2426{ 2429{
2427 struct mpage_da_data *mpd = data;
2428 struct inode *inode = mpd->inode; 2430 struct inode *inode = mpd->inode;
2429 struct buffer_head *bh, *head; 2431 struct buffer_head *bh, *head;
2430 sector_t logical; 2432 sector_t logical;
@@ -2435,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page,
2435 if (mpd->next_page != page->index) { 2437 if (mpd->next_page != page->index) {
2436 /* 2438 /*
2437 * Nope, we can't. So, we map non-allocated blocks 2439 * Nope, we can't. So, we map non-allocated blocks
2438 * and start IO on them using writepage() 2440 * and start IO on them
2439 */ 2441 */
2440 if (mpd->next_page != mpd->first_page) { 2442 if (mpd->next_page != mpd->first_page) {
2441 if (mpage_da_map_blocks(mpd) == 0) 2443 mpage_da_map_and_submit(mpd);
2442 mpage_da_submit_io(mpd);
2443 /* 2444 /*
2444 * skip rest of the page in the page_vec 2445 * skip rest of the page in the page_vec
2445 */ 2446 */
2446 mpd->io_done = 1;
2447 redirty_page_for_writepage(wbc, page); 2447 redirty_page_for_writepage(wbc, page);
2448 unlock_page(page); 2448 unlock_page(page);
2449 return MPAGE_DA_EXTENT_TAIL; 2449 return MPAGE_DA_EXTENT_TAIL;
@@ -2622,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page,
2622 int ret = 0; 2622 int ret = 0;
2623 int err; 2623 int err;
2624 2624
2625 ClearPageChecked(page);
2625 page_bufs = page_buffers(page); 2626 page_bufs = page_buffers(page);
2626 BUG_ON(!page_bufs); 2627 BUG_ON(!page_bufs);
2627 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); 2628 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@@ -2699,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2699static int ext4_writepage(struct page *page, 2700static int ext4_writepage(struct page *page,
2700 struct writeback_control *wbc) 2701 struct writeback_control *wbc)
2701{ 2702{
2702 int ret = 0; 2703 int ret = 0, commit_write = 0;
2703 loff_t size; 2704 loff_t size;
2704 unsigned int len; 2705 unsigned int len;
2705 struct buffer_head *page_bufs = NULL; 2706 struct buffer_head *page_bufs = NULL;
@@ -2712,71 +2713,46 @@ static int ext4_writepage(struct page *page,
2712 else 2713 else
2713 len = PAGE_CACHE_SIZE; 2714 len = PAGE_CACHE_SIZE;
2714 2715
2715 if (page_has_buffers(page)) { 2716 /*
2716 page_bufs = page_buffers(page); 2717 * If the page does not have buffers (for whatever reason),
2717 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2718 * try to create them using __block_write_begin. If this
2718 ext4_bh_delay_or_unwritten)) { 2719 * fails, redirty the page and move on.
2719 /* 2720 */
2720 * We don't want to do block allocation 2721 if (!page_buffers(page)) {
2721 * So redirty the page and return 2722 if (__block_write_begin(page, 0, len,
2722 * We may reach here when we do a journal commit 2723 noalloc_get_block_write)) {
2723 * via journal_submit_inode_data_buffers. 2724 redirty_page:
2724 * If we don't have mapping block we just ignore
2725 * them. We can also reach here via shrink_page_list
2726 */
2727 redirty_page_for_writepage(wbc, page); 2725 redirty_page_for_writepage(wbc, page);
2728 unlock_page(page); 2726 unlock_page(page);
2729 return 0; 2727 return 0;
2730 } 2728 }
2731 } else { 2729 commit_write = 1;
2730 }
2731 page_bufs = page_buffers(page);
2732 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2733 ext4_bh_delay_or_unwritten)) {
2732 /* 2734 /*
2733 * The test for page_has_buffers() is subtle: 2735 * We don't want to do block allocation So redirty the
2734 * We know the page is dirty but it lost buffers. That means 2736 * page and return We may reach here when we do a
2735 * that at some moment in time after write_begin()/write_end() 2737 * journal commit via
2736 * has been called all buffers have been clean and thus they 2738 * journal_submit_inode_data_buffers. If we don't
2737 * must have been written at least once. So they are all 2739 * have mapping block we just ignore them. We can also
2738 * mapped and we can happily proceed with mapping them 2740 * reach here via shrink_page_list
2739 * and writing the page.
2740 *
2741 * Try to initialize the buffer_heads and check whether
2742 * all are mapped and non delay. We don't want to
2743 * do block allocation here.
2744 */ 2741 */
2745 ret = __block_write_begin(page, 0, len, 2742 goto redirty_page;
2746 noalloc_get_block_write); 2743 }
2747 if (!ret) { 2744 if (commit_write)
2748 page_bufs = page_buffers(page);
2749 /* check whether all are mapped and non delay */
2750 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2751 ext4_bh_delay_or_unwritten)) {
2752 redirty_page_for_writepage(wbc, page);
2753 unlock_page(page);
2754 return 0;
2755 }
2756 } else {
2757 /*
2758 * We can't do block allocation here
2759 * so just redity the page and unlock
2760 * and return
2761 */
2762 redirty_page_for_writepage(wbc, page);
2763 unlock_page(page);
2764 return 0;
2765 }
2766 /* now mark the buffer_heads as dirty and uptodate */ 2745 /* now mark the buffer_heads as dirty and uptodate */
2767 block_commit_write(page, 0, len); 2746 block_commit_write(page, 0, len);
2768 }
2769 2747
2770 if (PageChecked(page) && ext4_should_journal_data(inode)) { 2748 if (PageChecked(page) && ext4_should_journal_data(inode))
2771 /* 2749 /*
2772 * It's mmapped pagecache. Add buffers and journal it. There 2750 * It's mmapped pagecache. Add buffers and journal it. There
2773 * doesn't seem much point in redirtying the page here. 2751 * doesn't seem much point in redirtying the page here.
2774 */ 2752 */
2775 ClearPageChecked(page);
2776 return __ext4_journalled_writepage(page, len); 2753 return __ext4_journalled_writepage(page, len);
2777 }
2778 2754
2779 if (page_bufs && buffer_uninit(page_bufs)) { 2755 if (buffer_uninit(page_bufs)) {
2780 ext4_set_bh_endio(page_bufs, inode); 2756 ext4_set_bh_endio(page_bufs, inode);
2781 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2757 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2782 wbc, ext4_end_io_buffer_write); 2758 wbc, ext4_end_io_buffer_write);
@@ -2823,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2823 */ 2799 */
2824static int write_cache_pages_da(struct address_space *mapping, 2800static int write_cache_pages_da(struct address_space *mapping,
2825 struct writeback_control *wbc, 2801 struct writeback_control *wbc,
2826 struct mpage_da_data *mpd) 2802 struct mpage_da_data *mpd,
2803 pgoff_t *done_index)
2827{ 2804{
2828 int ret = 0; 2805 int ret = 0;
2829 int done = 0; 2806 int done = 0;
2830 struct pagevec pvec; 2807 struct pagevec pvec;
2831 int nr_pages; 2808 unsigned nr_pages;
2832 pgoff_t index; 2809 pgoff_t index;
2833 pgoff_t end; /* Inclusive */ 2810 pgoff_t end; /* Inclusive */
2834 long nr_to_write = wbc->nr_to_write; 2811 long nr_to_write = wbc->nr_to_write;
2812 int tag;
2835 2813
2836 pagevec_init(&pvec, 0); 2814 pagevec_init(&pvec, 0);
2837 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2815 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2838 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2816 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2839 2817
2818 if (wbc->sync_mode == WB_SYNC_ALL)
2819 tag = PAGECACHE_TAG_TOWRITE;
2820 else
2821 tag = PAGECACHE_TAG_DIRTY;
2822
2823 *done_index = index;
2840 while (!done && (index <= end)) { 2824 while (!done && (index <= end)) {
2841 int i; 2825 int i;
2842 2826
2843 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2827 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2844 PAGECACHE_TAG_DIRTY,
2845 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2828 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2846 if (nr_pages == 0) 2829 if (nr_pages == 0)
2847 break; 2830 break;
@@ -2861,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping,
2861 break; 2844 break;
2862 } 2845 }
2863 2846
2847 *done_index = page->index + 1;
2848
2864 lock_page(page); 2849 lock_page(page);
2865 2850
2866 /* 2851 /*
@@ -2946,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping,
2946 long desired_nr_to_write, nr_to_writebump = 0; 2931 long desired_nr_to_write, nr_to_writebump = 0;
2947 loff_t range_start = wbc->range_start; 2932 loff_t range_start = wbc->range_start;
2948 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2933 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2934 pgoff_t done_index = 0;
2935 pgoff_t end;
2949 2936
2950 trace_ext4_da_writepages(inode, wbc); 2937 trace_ext4_da_writepages(inode, wbc);
2951 2938
@@ -2981,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping,
2981 wbc->range_start = index << PAGE_CACHE_SHIFT; 2968 wbc->range_start = index << PAGE_CACHE_SHIFT;
2982 wbc->range_end = LLONG_MAX; 2969 wbc->range_end = LLONG_MAX;
2983 wbc->range_cyclic = 0; 2970 wbc->range_cyclic = 0;
2984 } else 2971 end = -1;
2972 } else {
2985 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2973 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2974 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2975 }
2986 2976
2987 /* 2977 /*
2988 * This works around two forms of stupidity. The first is in 2978 * This works around two forms of stupidity. The first is in
@@ -3001,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping,
3001 * sbi->max_writeback_mb_bump whichever is smaller. 2991 * sbi->max_writeback_mb_bump whichever is smaller.
3002 */ 2992 */
3003 max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); 2993 max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
3004 if (!range_cyclic && range_whole) 2994 if (!range_cyclic && range_whole) {
3005 desired_nr_to_write = wbc->nr_to_write * 8; 2995 if (wbc->nr_to_write == LONG_MAX)
3006 else 2996 desired_nr_to_write = wbc->nr_to_write;
2997 else
2998 desired_nr_to_write = wbc->nr_to_write * 8;
2999 } else
3007 desired_nr_to_write = ext4_num_dirty_pages(inode, index, 3000 desired_nr_to_write = ext4_num_dirty_pages(inode, index,
3008 max_pages); 3001 max_pages);
3009 if (desired_nr_to_write > max_pages) 3002 if (desired_nr_to_write > max_pages)
@@ -3020,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping,
3020 pages_skipped = wbc->pages_skipped; 3013 pages_skipped = wbc->pages_skipped;
3021 3014
3022retry: 3015retry:
3016 if (wbc->sync_mode == WB_SYNC_ALL)
3017 tag_pages_for_writeback(mapping, index, end);
3018
3023 while (!ret && wbc->nr_to_write > 0) { 3019 while (!ret && wbc->nr_to_write > 0) {
3024 3020
3025 /* 3021 /*
@@ -3058,16 +3054,14 @@ retry:
3058 mpd.io_done = 0; 3054 mpd.io_done = 0;
3059 mpd.pages_written = 0; 3055 mpd.pages_written = 0;
3060 mpd.retval = 0; 3056 mpd.retval = 0;
3061 ret = write_cache_pages_da(mapping, wbc, &mpd); 3057 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
3062 /* 3058 /*
3063 * If we have a contiguous extent of pages and we 3059 * If we have a contiguous extent of pages and we
3064 * haven't done the I/O yet, map the blocks and submit 3060 * haven't done the I/O yet, map the blocks and submit
3065 * them for I/O. 3061 * them for I/O.
3066 */ 3062 */
3067 if (!mpd.io_done && mpd.next_page != mpd.first_page) { 3063 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
3068 if (mpage_da_map_blocks(&mpd) == 0) 3064 mpage_da_map_and_submit(&mpd);
3069 mpage_da_submit_io(&mpd);
3070 mpd.io_done = 1;
3071 ret = MPAGE_DA_EXTENT_TAIL; 3065 ret = MPAGE_DA_EXTENT_TAIL;
3072 } 3066 }
3073 trace_ext4_da_write_pages(inode, &mpd); 3067 trace_ext4_da_write_pages(inode, &mpd);
@@ -3114,14 +3108,13 @@ retry:
3114 __func__, wbc->nr_to_write, ret); 3108 __func__, wbc->nr_to_write, ret);
3115 3109
3116 /* Update index */ 3110 /* Update index */
3117 index += pages_written;
3118 wbc->range_cyclic = range_cyclic; 3111 wbc->range_cyclic = range_cyclic;
3119 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 3112 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3120 /* 3113 /*
3121 * set the writeback_index so that range_cyclic 3114 * set the writeback_index so that range_cyclic
3122 * mode will write it back later 3115 * mode will write it back later
3123 */ 3116 */
3124 mapping->writeback_index = index; 3117 mapping->writeback_index = done_index;
3125 3118
3126out_writepages: 3119out_writepages:
3127 wbc->nr_to_write -= nr_to_writebump; 3120 wbc->nr_to_write -= nr_to_writebump;
@@ -3456,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3456 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3449 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3457} 3450}
3458 3451
3459static void ext4_free_io_end(ext4_io_end_t *io)
3460{
3461 BUG_ON(!io);
3462 if (io->page)
3463 put_page(io->page);
3464 iput(io->inode);
3465 kfree(io);
3466}
3467
3468static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) 3452static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3469{ 3453{
3470 struct buffer_head *head, *bh; 3454 struct buffer_head *head, *bh;
@@ -3641,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3641 EXT4_GET_BLOCKS_IO_CREATE_EXT); 3625 EXT4_GET_BLOCKS_IO_CREATE_EXT);
3642} 3626}
3643 3627
3644static void dump_completed_IO(struct inode * inode)
3645{
3646#ifdef EXT4_DEBUG
3647 struct list_head *cur, *before, *after;
3648 ext4_io_end_t *io, *io0, *io1;
3649 unsigned long flags;
3650
3651 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3652 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3653 return;
3654 }
3655
3656 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3657 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3658 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3659 cur = &io->list;
3660 before = cur->prev;
3661 io0 = container_of(before, ext4_io_end_t, list);
3662 after = cur->next;
3663 io1 = container_of(after, ext4_io_end_t, list);
3664
3665 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
3666 io, inode->i_ino, io0, io1);
3667 }
3668 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3669#endif
3670}
3671
3672/*
3673 * check a range of space and convert unwritten extents to written.
3674 */
3675static int ext4_end_io_nolock(ext4_io_end_t *io)
3676{
3677 struct inode *inode = io->inode;
3678 loff_t offset = io->offset;
3679 ssize_t size = io->size;
3680 int ret = 0;
3681
3682 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3683 "list->prev 0x%p\n",
3684 io, inode->i_ino, io->list.next, io->list.prev);
3685
3686 if (list_empty(&io->list))
3687 return ret;
3688
3689 if (io->flag != EXT4_IO_UNWRITTEN)
3690 return ret;
3691
3692 ret = ext4_convert_unwritten_extents(inode, offset, size);
3693 if (ret < 0) {
3694 printk(KERN_EMERG "%s: failed to convert unwritten"
3695 "extents to written extents, error is %d"
3696 " io is still on inode %lu aio dio list\n",
3697 __func__, ret, inode->i_ino);
3698 return ret;
3699 }
3700
3701 if (io->iocb)
3702 aio_complete(io->iocb, io->result, 0);
3703 /* clear the DIO AIO unwritten flag */
3704 io->flag = 0;
3705 return ret;
3706}
3707
3708/*
3709 * work on completed aio dio IO, to convert unwritten extents to extents
3710 */
3711static void ext4_end_io_work(struct work_struct *work)
3712{
3713 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3714 struct inode *inode = io->inode;
3715 struct ext4_inode_info *ei = EXT4_I(inode);
3716 unsigned long flags;
3717 int ret;
3718
3719 mutex_lock(&inode->i_mutex);
3720 ret = ext4_end_io_nolock(io);
3721 if (ret < 0) {
3722 mutex_unlock(&inode->i_mutex);
3723 return;
3724 }
3725
3726 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3727 if (!list_empty(&io->list))
3728 list_del_init(&io->list);
3729 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3730 mutex_unlock(&inode->i_mutex);
3731 ext4_free_io_end(io);
3732}
3733
3734/*
3735 * This function is called from ext4_sync_file().
3736 *
3737 * When IO is completed, the work to convert unwritten extents to
3738 * written is queued on workqueue but may not get immediately
3739 * scheduled. When fsync is called, we need to ensure the
3740 * conversion is complete before fsync returns.
3741 * The inode keeps track of a list of pending/completed IO that
3742 * might needs to do the conversion. This function walks through
3743 * the list and convert the related unwritten extents for completed IO
3744 * to written.
3745 * The function return the number of pending IOs on success.
3746 */
3747int flush_completed_IO(struct inode *inode)
3748{
3749 ext4_io_end_t *io;
3750 struct ext4_inode_info *ei = EXT4_I(inode);
3751 unsigned long flags;
3752 int ret = 0;
3753 int ret2 = 0;
3754
3755 if (list_empty(&ei->i_completed_io_list))
3756 return ret;
3757
3758 dump_completed_IO(inode);
3759 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3760 while (!list_empty(&ei->i_completed_io_list)){
3761 io = list_entry(ei->i_completed_io_list.next,
3762 ext4_io_end_t, list);
3763 /*
3764 * Calling ext4_end_io_nolock() to convert completed
3765 * IO to written.
3766 *
3767 * When ext4_sync_file() is called, run_queue() may already
3768 * about to flush the work corresponding to this io structure.
3769 * It will be upset if it founds the io structure related
3770 * to the work-to-be schedule is freed.
3771 *
3772 * Thus we need to keep the io structure still valid here after
3773 * convertion finished. The io structure has a flag to
3774 * avoid double converting from both fsync and background work
3775 * queue work.
3776 */
3777 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3778 ret = ext4_end_io_nolock(io);
3779 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3780 if (ret < 0)
3781 ret2 = ret;
3782 else
3783 list_del_init(&io->list);
3784 }
3785 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3786 return (ret2 < 0) ? ret2 : 0;
3787}
3788
3789static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3790{
3791 ext4_io_end_t *io = NULL;
3792
3793 io = kmalloc(sizeof(*io), flags);
3794
3795 if (io) {
3796 igrab(inode);
3797 io->inode = inode;
3798 io->flag = 0;
3799 io->offset = 0;
3800 io->size = 0;
3801 io->page = NULL;
3802 io->iocb = NULL;
3803 io->result = 0;
3804 INIT_WORK(&io->work, ext4_end_io_work);
3805 INIT_LIST_HEAD(&io->list);
3806 }
3807
3808 return io;
3809}
3810
3811static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3628static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3812 ssize_t size, void *private, int ret, 3629 ssize_t size, void *private, int ret,
3813 bool is_async) 3630 bool is_async)
@@ -3827,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3827 size); 3644 size);
3828 3645
3829 /* if not aio dio with unwritten extents, just free io and return */ 3646 /* if not aio dio with unwritten extents, just free io and return */
3830 if (io_end->flag != EXT4_IO_UNWRITTEN){ 3647 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3831 ext4_free_io_end(io_end); 3648 ext4_free_io_end(io_end);
3832 iocb->private = NULL; 3649 iocb->private = NULL;
3833out: 3650out:
@@ -3844,14 +3661,14 @@ out:
3844 } 3661 }
3845 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3662 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3846 3663
3847 /* queue the work to convert unwritten extents to written */
3848 queue_work(wq, &io_end->work);
3849
3850 /* Add the io_end to per-inode completed aio dio list*/ 3664 /* Add the io_end to per-inode completed aio dio list*/
3851 ei = EXT4_I(io_end->inode); 3665 ei = EXT4_I(io_end->inode);
3852 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 3666 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3853 list_add_tail(&io_end->list, &ei->i_completed_io_list); 3667 list_add_tail(&io_end->list, &ei->i_completed_io_list);
3854 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 3668 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3669
3670 /* queue the work to convert unwritten extents to written */
3671 queue_work(wq, &io_end->work);
3855 iocb->private = NULL; 3672 iocb->private = NULL;
3856} 3673}
3857 3674
@@ -3872,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3872 goto out; 3689 goto out;
3873 } 3690 }
3874 3691
3875 io_end->flag = EXT4_IO_UNWRITTEN; 3692 io_end->flag = EXT4_IO_END_UNWRITTEN;
3876 inode = io_end->inode; 3693 inode = io_end->inode;
3877 3694
3878 /* Add the io_end to per-inode completed io list*/ 3695 /* Add the io_end to per-inode completed io list*/
@@ -5463,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5463{ 5280{
5464 struct inode *inode = dentry->d_inode; 5281 struct inode *inode = dentry->d_inode;
5465 int error, rc = 0; 5282 int error, rc = 0;
5283 int orphan = 0;
5466 const unsigned int ia_valid = attr->ia_valid; 5284 const unsigned int ia_valid = attr->ia_valid;
5467 5285
5468 error = inode_change_ok(inode, attr); 5286 error = inode_change_ok(inode, attr);
@@ -5518,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5518 error = PTR_ERR(handle); 5336 error = PTR_ERR(handle);
5519 goto err_out; 5337 goto err_out;
5520 } 5338 }
5521 5339 if (ext4_handle_valid(handle)) {
5522 error = ext4_orphan_add(handle, inode); 5340 error = ext4_orphan_add(handle, inode);
5341 orphan = 1;
5342 }
5523 EXT4_I(inode)->i_disksize = attr->ia_size; 5343 EXT4_I(inode)->i_disksize = attr->ia_size;
5524 rc = ext4_mark_inode_dirty(handle, inode); 5344 rc = ext4_mark_inode_dirty(handle, inode);
5525 if (!error) 5345 if (!error)
@@ -5537,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5537 goto err_out; 5357 goto err_out;
5538 } 5358 }
5539 ext4_orphan_del(handle, inode); 5359 ext4_orphan_del(handle, inode);
5360 orphan = 0;
5540 ext4_journal_stop(handle); 5361 ext4_journal_stop(handle);
5541 goto err_out; 5362 goto err_out;
5542 } 5363 }
@@ -5559,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5559 * If the call to ext4_truncate failed to get a transaction handle at 5380 * If the call to ext4_truncate failed to get a transaction handle at
5560 * all, we need to clean up the in-core orphan list manually. 5381 * all, we need to clean up the in-core orphan list manually.
5561 */ 5382 */
5562 if (inode->i_nlink) 5383 if (orphan && inode->i_nlink)
5563 ext4_orphan_del(NULL, inode); 5384 ext4_orphan_del(NULL, inode);
5564 5385
5565 if (!rc && (ia_valid & ATTR_MODE)) 5386 if (!rc && (ia_valid & ATTR_MODE))
@@ -5642,7 +5463,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5642 * 5463 *
5643 * Also account for superblock, inode, quota and xattr blocks 5464 * Also account for superblock, inode, quota and xattr blocks
5644 */ 5465 */
5645int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) 5466static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5646{ 5467{
5647 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); 5468 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
5648 int gdpblocks; 5469 int gdpblocks;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 42f77b1dc72d..c58eba34724a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -338,6 +338,14 @@
338static struct kmem_cache *ext4_pspace_cachep; 338static struct kmem_cache *ext4_pspace_cachep;
339static struct kmem_cache *ext4_ac_cachep; 339static struct kmem_cache *ext4_ac_cachep;
340static struct kmem_cache *ext4_free_ext_cachep; 340static struct kmem_cache *ext4_free_ext_cachep;
341
342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
341static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
342 ext4_group_t group); 350 ext4_group_t group);
343static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -939,6 +947,85 @@ out:
939} 947}
940 948
941/* 949/*
950 * lock the group_info alloc_sem of all the groups
951 * belonging to the same buddy cache page. This
952 * make sure other parallel operation on the buddy
953 * cache doesn't happen whild holding the buddy cache
954 * lock
955 */
956static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
957 ext4_group_t group)
958{
959 int i;
960 int block, pnum;
961 int blocks_per_page;
962 int groups_per_page;
963 ext4_group_t ngroups = ext4_get_groups_count(sb);
964 ext4_group_t first_group;
965 struct ext4_group_info *grp;
966
967 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
968 /*
969 * the buddy cache inode stores the block bitmap
970 * and buddy information in consecutive blocks.
971 * So for each group we need two blocks.
972 */
973 block = group * 2;
974 pnum = block / blocks_per_page;
975 first_group = pnum * blocks_per_page / 2;
976
977 groups_per_page = blocks_per_page >> 1;
978 if (groups_per_page == 0)
979 groups_per_page = 1;
980 /* read all groups the page covers into the cache */
981 for (i = 0; i < groups_per_page; i++) {
982
983 if ((first_group + i) >= ngroups)
984 break;
985 grp = ext4_get_group_info(sb, first_group + i);
986 /* take all groups write allocation
987 * semaphore. This make sure there is
988 * no block allocation going on in any
989 * of that groups
990 */
991 down_write_nested(&grp->alloc_sem, i);
992 }
993 return i;
994}
995
996static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
997 ext4_group_t group, int locked_group)
998{
999 int i;
1000 int block, pnum;
1001 int blocks_per_page;
1002 ext4_group_t first_group;
1003 struct ext4_group_info *grp;
1004
1005 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1006 /*
1007 * the buddy cache inode stores the block bitmap
1008 * and buddy information in consecutive blocks.
1009 * So for each group we need two blocks.
1010 */
1011 block = group * 2;
1012 pnum = block / blocks_per_page;
1013 first_group = pnum * blocks_per_page / 2;
1014 /* release locks on all the groups */
1015 for (i = 0; i < locked_group; i++) {
1016
1017 grp = ext4_get_group_info(sb, first_group + i);
1018 /* take all groups write allocation
1019 * semaphore. This make sure there is
1020 * no block allocation going on in any
1021 * of that groups
1022 */
1023 up_write(&grp->alloc_sem);
1024 }
1025
1026}
1027
1028/*
942 * Locking note: This routine calls ext4_mb_init_cache(), which takes the 1029 * Locking note: This routine calls ext4_mb_init_cache(), which takes the
943 * block group lock of all groups for this page; do not hold the BG lock when 1030 * block group lock of all groups for this page; do not hold the BG lock when
944 * calling this routine! 1031 * calling this routine!
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1915 return 0; 2002 return 0;
1916} 2003}
1917 2004
1918/*
1919 * lock the group_info alloc_sem of all the groups
1920 * belonging to the same buddy cache page. This
1921 * make sure other parallel operation on the buddy
1922 * cache doesn't happen whild holding the buddy cache
1923 * lock
1924 */
1925int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1926{
1927 int i;
1928 int block, pnum;
1929 int blocks_per_page;
1930 int groups_per_page;
1931 ext4_group_t ngroups = ext4_get_groups_count(sb);
1932 ext4_group_t first_group;
1933 struct ext4_group_info *grp;
1934
1935 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1936 /*
1937 * the buddy cache inode stores the block bitmap
1938 * and buddy information in consecutive blocks.
1939 * So for each group we need two blocks.
1940 */
1941 block = group * 2;
1942 pnum = block / blocks_per_page;
1943 first_group = pnum * blocks_per_page / 2;
1944
1945 groups_per_page = blocks_per_page >> 1;
1946 if (groups_per_page == 0)
1947 groups_per_page = 1;
1948 /* read all groups the page covers into the cache */
1949 for (i = 0; i < groups_per_page; i++) {
1950
1951 if ((first_group + i) >= ngroups)
1952 break;
1953 grp = ext4_get_group_info(sb, first_group + i);
1954 /* take all groups write allocation
1955 * semaphore. This make sure there is
1956 * no block allocation going on in any
1957 * of that groups
1958 */
1959 down_write_nested(&grp->alloc_sem, i);
1960 }
1961 return i;
1962}
1963
1964void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1965 ext4_group_t group, int locked_group)
1966{
1967 int i;
1968 int block, pnum;
1969 int blocks_per_page;
1970 ext4_group_t first_group;
1971 struct ext4_group_info *grp;
1972
1973 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1974 /*
1975 * the buddy cache inode stores the block bitmap
1976 * and buddy information in consecutive blocks.
1977 * So for each group we need two blocks.
1978 */
1979 block = group * 2;
1980 pnum = block / blocks_per_page;
1981 first_group = pnum * blocks_per_page / 2;
1982 /* release locks on all the groups */
1983 for (i = 0; i < locked_group; i++) {
1984
1985 grp = ext4_get_group_info(sb, first_group + i);
1986 /* take all groups write allocation
1987 * semaphore. This make sure there is
1988 * no block allocation going on in any
1989 * of that groups
1990 */
1991 up_write(&grp->alloc_sem);
1992 }
1993
1994}
1995
1996static noinline_for_stack int 2005static noinline_for_stack int
1997ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 2006ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1998{ 2007{
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
2233 .release = seq_release, 2242 .release = seq_release,
2234}; 2243};
2235 2244
2245static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2246{
2247 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2248 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2249
2250 BUG_ON(!cachep);
2251 return cachep;
2252}
2236 2253
2237/* Create and initialize ext4_group_info data for the given group. */ 2254/* Create and initialize ext4_group_info data for the given group. */
2238int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2255int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2239 struct ext4_group_desc *desc) 2256 struct ext4_group_desc *desc)
2240{ 2257{
2241 int i, len; 2258 int i;
2242 int metalen = 0; 2259 int metalen = 0;
2243 struct ext4_sb_info *sbi = EXT4_SB(sb); 2260 struct ext4_sb_info *sbi = EXT4_SB(sb);
2244 struct ext4_group_info **meta_group_info; 2261 struct ext4_group_info **meta_group_info;
2262 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2245 2263
2246 /* 2264 /*
2247 * First check if this group is the first of a reserved block. 2265 * First check if this group is the first of a reserved block.
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2261 meta_group_info; 2279 meta_group_info;
2262 } 2280 }
2263 2281
2264 /*
2265 * calculate needed size. if change bb_counters size,
2266 * don't forget about ext4_mb_generate_buddy()
2267 */
2268 len = offsetof(typeof(**meta_group_info),
2269 bb_counters[sb->s_blocksize_bits + 2]);
2270
2271 meta_group_info = 2282 meta_group_info =
2272 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; 2283 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2273 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); 2284 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2274 2285
2275 meta_group_info[i] = kzalloc(len, GFP_KERNEL); 2286 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2276 if (meta_group_info[i] == NULL) { 2287 if (meta_group_info[i] == NULL) {
2277 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2288 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2278 goto exit_group_info; 2289 goto exit_group_info;
2279 } 2290 }
2291 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2280 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2292 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2281 &(meta_group_info[i]->bb_state)); 2293 &(meta_group_info[i]->bb_state));
2282 2294
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2331 int num_meta_group_infos_max; 2343 int num_meta_group_infos_max;
2332 int array_size; 2344 int array_size;
2333 struct ext4_group_desc *desc; 2345 struct ext4_group_desc *desc;
2346 struct kmem_cache *cachep;
2334 2347
2335 /* This is the number of blocks used by GDT */ 2348 /* This is the number of blocks used by GDT */
2336 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 2349 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2389,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
2389 return 0; 2402 return 0;
2390 2403
2391err_freebuddy: 2404err_freebuddy:
2405 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2392 while (i-- > 0) 2406 while (i-- > 0)
2393 kfree(ext4_get_group_info(sb, i)); 2407 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2394 i = num_meta_group_infos; 2408 i = num_meta_group_infos;
2395 while (i-- > 0) 2409 while (i-- > 0)
2396 kfree(sbi->s_group_info[i]); 2410 kfree(sbi->s_group_info[i]);
@@ -2407,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2407 unsigned offset; 2421 unsigned offset;
2408 unsigned max; 2422 unsigned max;
2409 int ret; 2423 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2410 2427
2411 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2412 2429
2413 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2430 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2414 if (sbi->s_mb_offsets == NULL) { 2431 if (sbi->s_mb_offsets == NULL) {
2415 return -ENOMEM; 2432 ret = -ENOMEM;
2433 goto out;
2416 } 2434 }
2417 2435
2418 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); 2436 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2419 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2437 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2420 if (sbi->s_mb_maxs == NULL) { 2438 if (sbi->s_mb_maxs == NULL) {
2421 kfree(sbi->s_mb_offsets); 2439 ret = -ENOMEM;
2422 return -ENOMEM; 2440 goto out;
2441 }
2442
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2444 cachep = ext4_groupinfo_caches[cache_index];
2445 if (!cachep) {
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2423 } 2466 }
2424 2467
2425 /* order 0 is regular bitmap */ 2468 /* order 0 is regular bitmap */
@@ -2440,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 /* init file for buddy data */ 2483 /* init file for buddy data */
2441 ret = ext4_mb_init_backend(sb); 2484 ret = ext4_mb_init_backend(sb);
2442 if (ret != 0) { 2485 if (ret != 0) {
2443 kfree(sbi->s_mb_offsets); 2486 goto out;
2444 kfree(sbi->s_mb_maxs);
2445 return ret;
2446 } 2487 }
2447 2488
2448 spin_lock_init(&sbi->s_md_lock); 2489 spin_lock_init(&sbi->s_md_lock);
@@ -2457,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2457 2498
2458 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2499 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2459 if (sbi->s_locality_groups == NULL) { 2500 if (sbi->s_locality_groups == NULL) {
2460 kfree(sbi->s_mb_offsets); 2501 ret = -ENOMEM;
2461 kfree(sbi->s_mb_maxs); 2502 goto out;
2462 return -ENOMEM;
2463 } 2503 }
2464 for_each_possible_cpu(i) { 2504 for_each_possible_cpu(i) {
2465 struct ext4_locality_group *lg; 2505 struct ext4_locality_group *lg;
@@ -2476,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2476 2516
2477 if (sbi->s_journal) 2517 if (sbi->s_journal)
2478 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2518 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2479 return 0; 2519out:
2520 if (ret) {
2521 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 }
2525 return ret;
2480} 2526}
2481 2527
2482/* need to called with the ext4 group lock held */ 2528/* need to called with the ext4 group lock held */
@@ -2504,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
2504 int num_meta_group_infos; 2550 int num_meta_group_infos;
2505 struct ext4_group_info *grinfo; 2551 struct ext4_group_info *grinfo;
2506 struct ext4_sb_info *sbi = EXT4_SB(sb); 2552 struct ext4_sb_info *sbi = EXT4_SB(sb);
2553 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2507 2554
2508 if (sbi->s_group_info) { 2555 if (sbi->s_group_info) {
2509 for (i = 0; i < ngroups; i++) { 2556 for (i = 0; i < ngroups; i++) {
@@ -2514,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
2514 ext4_lock_group(sb, i); 2561 ext4_lock_group(sb, i);
2515 ext4_mb_cleanup_pa(grinfo); 2562 ext4_mb_cleanup_pa(grinfo);
2516 ext4_unlock_group(sb, i); 2563 ext4_unlock_group(sb, i);
2517 kfree(grinfo); 2564 kmem_cache_free(cachep, grinfo);
2518 } 2565 }
2519 num_meta_group_infos = (ngroups + 2566 num_meta_group_infos = (ngroups +
2520 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2567 EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2558,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb)
2558 return 0; 2605 return 0;
2559} 2606}
2560 2607
2561static inline void ext4_issue_discard(struct super_block *sb, 2608static inline int ext4_issue_discard(struct super_block *sb,
2562 ext4_group_t block_group, ext4_grpblk_t block, int count) 2609 ext4_group_t block_group, ext4_grpblk_t block, int count)
2563{ 2610{
2564 int ret; 2611 int ret;
@@ -2568,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb,
2568 trace_ext4_discard_blocks(sb, 2615 trace_ext4_discard_blocks(sb,
2569 (unsigned long long) discard_block, count); 2616 (unsigned long long) discard_block, count);
2570 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2617 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2571 if (ret == EOPNOTSUPP) { 2618 if (ret == -EOPNOTSUPP) {
2572 ext4_warning(sb, "discard not supported, disabling"); 2619 ext4_warning(sb, "discard not supported, disabling");
2573 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); 2620 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2574 } 2621 }
2622 return ret;
2575} 2623}
2576 2624
2577/* 2625/*
@@ -2659,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
2659 2707
2660#endif 2708#endif
2661 2709
2662int __init init_ext4_mballoc(void) 2710int __init ext4_init_mballoc(void)
2663{ 2711{
2664 ext4_pspace_cachep = 2712 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2665 kmem_cache_create("ext4_prealloc_space", 2713 SLAB_RECLAIM_ACCOUNT);
2666 sizeof(struct ext4_prealloc_space),
2667 0, SLAB_RECLAIM_ACCOUNT, NULL);
2668 if (ext4_pspace_cachep == NULL) 2714 if (ext4_pspace_cachep == NULL)
2669 return -ENOMEM; 2715 return -ENOMEM;
2670 2716
2671 ext4_ac_cachep = 2717 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2672 kmem_cache_create("ext4_alloc_context", 2718 SLAB_RECLAIM_ACCOUNT);
2673 sizeof(struct ext4_allocation_context),
2674 0, SLAB_RECLAIM_ACCOUNT, NULL);
2675 if (ext4_ac_cachep == NULL) { 2719 if (ext4_ac_cachep == NULL) {
2676 kmem_cache_destroy(ext4_pspace_cachep); 2720 kmem_cache_destroy(ext4_pspace_cachep);
2677 return -ENOMEM; 2721 return -ENOMEM;
2678 } 2722 }
2679 2723
2680 ext4_free_ext_cachep = 2724 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
2681 kmem_cache_create("ext4_free_block_extents", 2725 SLAB_RECLAIM_ACCOUNT);
2682 sizeof(struct ext4_free_data),
2683 0, SLAB_RECLAIM_ACCOUNT, NULL);
2684 if (ext4_free_ext_cachep == NULL) { 2726 if (ext4_free_ext_cachep == NULL) {
2685 kmem_cache_destroy(ext4_pspace_cachep); 2727 kmem_cache_destroy(ext4_pspace_cachep);
2686 kmem_cache_destroy(ext4_ac_cachep); 2728 kmem_cache_destroy(ext4_ac_cachep);
@@ -2690,8 +2732,9 @@ int __init init_ext4_mballoc(void)
2690 return 0; 2732 return 0;
2691} 2733}
2692 2734
2693void exit_ext4_mballoc(void) 2735void ext4_exit_mballoc(void)
2694{ 2736{
2737 int i;
2695 /* 2738 /*
2696 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2697 * before destroying the slab cache. 2740 * before destroying the slab cache.
@@ -2700,6 +2743,15 @@ void exit_ext4_mballoc(void)
2700 kmem_cache_destroy(ext4_pspace_cachep); 2743 kmem_cache_destroy(ext4_pspace_cachep);
2701 kmem_cache_destroy(ext4_ac_cachep); 2744 kmem_cache_destroy(ext4_ac_cachep);
2702 kmem_cache_destroy(ext4_free_ext_cachep); 2745 kmem_cache_destroy(ext4_free_ext_cachep);
2746
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2703 ext4_remove_debugfs_entry(); 2755 ext4_remove_debugfs_entry();
2704} 2756}
2705 2757
@@ -3536,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3536 */ 3588 */
3537static noinline_for_stack int 3589static noinline_for_stack int
3538ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, 3590ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3539 struct ext4_prealloc_space *pa, 3591 struct ext4_prealloc_space *pa)
3540 struct ext4_allocation_context *ac)
3541{ 3592{
3542 struct super_block *sb = e4b->bd_sb; 3593 struct super_block *sb = e4b->bd_sb;
3543 struct ext4_sb_info *sbi = EXT4_SB(sb); 3594 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3555,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3555 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3606 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3556 end = bit + pa->pa_len; 3607 end = bit + pa->pa_len;
3557 3608
3558 if (ac) {
3559 ac->ac_sb = sb;
3560 ac->ac_inode = pa->pa_inode;
3561 }
3562
3563 while (bit < end) { 3609 while (bit < end) {
3564 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); 3610 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3565 if (bit >= end) 3611 if (bit >= end)
@@ -3570,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3570 (unsigned) next - bit, (unsigned) group); 3616 (unsigned) next - bit, (unsigned) group);
3571 free += next - bit; 3617 free += next - bit;
3572 3618
3573 if (ac) { 3619 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3574 ac->ac_b_ex.fe_group = group; 3620 trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
3575 ac->ac_b_ex.fe_start = bit; 3621 grp_blk_start + bit, next - bit);
3576 ac->ac_b_ex.fe_len = next - bit;
3577 ac->ac_b_ex.fe_logical = 0;
3578 trace_ext4_mballoc_discard(ac);
3579 }
3580
3581 trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
3582 next - bit);
3583 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3622 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3584 bit = next + 1; 3623 bit = next + 1;
3585 } 3624 }
@@ -3602,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3602 3641
3603static noinline_for_stack int 3642static noinline_for_stack int
3604ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3643ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3605 struct ext4_prealloc_space *pa, 3644 struct ext4_prealloc_space *pa)
3606 struct ext4_allocation_context *ac)
3607{ 3645{
3608 struct super_block *sb = e4b->bd_sb; 3646 struct super_block *sb = e4b->bd_sb;
3609 ext4_group_t group; 3647 ext4_group_t group;
3610 ext4_grpblk_t bit; 3648 ext4_grpblk_t bit;
3611 3649
3612 trace_ext4_mb_release_group_pa(sb, ac, pa); 3650 trace_ext4_mb_release_group_pa(sb, pa);
3613 BUG_ON(pa->pa_deleted == 0); 3651 BUG_ON(pa->pa_deleted == 0);
3614 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3652 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3615 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3653 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3616 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); 3654 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3617 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); 3655 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3618 3656 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3619 if (ac) {
3620 ac->ac_sb = sb;
3621 ac->ac_inode = NULL;
3622 ac->ac_b_ex.fe_group = group;
3623 ac->ac_b_ex.fe_start = bit;
3624 ac->ac_b_ex.fe_len = pa->pa_len;
3625 ac->ac_b_ex.fe_logical = 0;
3626 trace_ext4_mballoc_discard(ac);
3627 }
3628 3657
3629 return 0; 3658 return 0;
3630} 3659}
@@ -3645,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3645 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3674 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3646 struct buffer_head *bitmap_bh = NULL; 3675 struct buffer_head *bitmap_bh = NULL;
3647 struct ext4_prealloc_space *pa, *tmp; 3676 struct ext4_prealloc_space *pa, *tmp;
3648 struct ext4_allocation_context *ac;
3649 struct list_head list; 3677 struct list_head list;
3650 struct ext4_buddy e4b; 3678 struct ext4_buddy e4b;
3651 int err; 3679 int err;
@@ -3674,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3674 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3702 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3675 3703
3676 INIT_LIST_HEAD(&list); 3704 INIT_LIST_HEAD(&list);
3677 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3678 if (ac)
3679 ac->ac_sb = sb;
3680repeat: 3705repeat:
3681 ext4_lock_group(sb, group); 3706 ext4_lock_group(sb, group);
3682 list_for_each_entry_safe(pa, tmp, 3707 list_for_each_entry_safe(pa, tmp,
@@ -3731,9 +3756,9 @@ repeat:
3731 spin_unlock(pa->pa_obj_lock); 3756 spin_unlock(pa->pa_obj_lock);
3732 3757
3733 if (pa->pa_type == MB_GROUP_PA) 3758 if (pa->pa_type == MB_GROUP_PA)
3734 ext4_mb_release_group_pa(&e4b, pa, ac); 3759 ext4_mb_release_group_pa(&e4b, pa);
3735 else 3760 else
3736 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3761 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3737 3762
3738 list_del(&pa->u.pa_tmp_list); 3763 list_del(&pa->u.pa_tmp_list);
3739 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3764 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3741,8 +3766,6 @@ repeat:
3741 3766
3742out: 3767out:
3743 ext4_unlock_group(sb, group); 3768 ext4_unlock_group(sb, group);
3744 if (ac)
3745 kmem_cache_free(ext4_ac_cachep, ac);
3746 ext4_mb_unload_buddy(&e4b); 3769 ext4_mb_unload_buddy(&e4b);
3747 put_bh(bitmap_bh); 3770 put_bh(bitmap_bh);
3748 return free; 3771 return free;
@@ -3763,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
3763 struct super_block *sb = inode->i_sb; 3786 struct super_block *sb = inode->i_sb;
3764 struct buffer_head *bitmap_bh = NULL; 3787 struct buffer_head *bitmap_bh = NULL;
3765 struct ext4_prealloc_space *pa, *tmp; 3788 struct ext4_prealloc_space *pa, *tmp;
3766 struct ext4_allocation_context *ac;
3767 ext4_group_t group = 0; 3789 ext4_group_t group = 0;
3768 struct list_head list; 3790 struct list_head list;
3769 struct ext4_buddy e4b; 3791 struct ext4_buddy e4b;
@@ -3779,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
3779 3801
3780 INIT_LIST_HEAD(&list); 3802 INIT_LIST_HEAD(&list);
3781 3803
3782 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3783 if (ac) {
3784 ac->ac_sb = sb;
3785 ac->ac_inode = inode;
3786 }
3787repeat: 3804repeat:
3788 /* first, collect all pa's in the inode */ 3805 /* first, collect all pa's in the inode */
3789 spin_lock(&ei->i_prealloc_lock); 3806 spin_lock(&ei->i_prealloc_lock);
@@ -3853,7 +3870,7 @@ repeat:
3853 3870
3854 ext4_lock_group(sb, group); 3871 ext4_lock_group(sb, group);
3855 list_del(&pa->pa_group_list); 3872 list_del(&pa->pa_group_list);
3856 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3873 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3857 ext4_unlock_group(sb, group); 3874 ext4_unlock_group(sb, group);
3858 3875
3859 ext4_mb_unload_buddy(&e4b); 3876 ext4_mb_unload_buddy(&e4b);
@@ -3862,8 +3879,6 @@ repeat:
3862 list_del(&pa->u.pa_tmp_list); 3879 list_del(&pa->u.pa_tmp_list);
3863 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3880 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3864 } 3881 }
3865 if (ac)
3866 kmem_cache_free(ext4_ac_cachep, ac);
3867} 3882}
3868 3883
3869/* 3884/*
@@ -4061,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4061 struct ext4_buddy e4b; 4076 struct ext4_buddy e4b;
4062 struct list_head discard_list; 4077 struct list_head discard_list;
4063 struct ext4_prealloc_space *pa, *tmp; 4078 struct ext4_prealloc_space *pa, *tmp;
4064 struct ext4_allocation_context *ac;
4065 4079
4066 mb_debug(1, "discard locality group preallocation\n"); 4080 mb_debug(1, "discard locality group preallocation\n");
4067 4081
4068 INIT_LIST_HEAD(&discard_list); 4082 INIT_LIST_HEAD(&discard_list);
4069 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4070 if (ac)
4071 ac->ac_sb = sb;
4072 4083
4073 spin_lock(&lg->lg_prealloc_lock); 4084 spin_lock(&lg->lg_prealloc_lock);
4074 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], 4085 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4120,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4120 } 4131 }
4121 ext4_lock_group(sb, group); 4132 ext4_lock_group(sb, group);
4122 list_del(&pa->pa_group_list); 4133 list_del(&pa->pa_group_list);
4123 ext4_mb_release_group_pa(&e4b, pa, ac); 4134 ext4_mb_release_group_pa(&e4b, pa);
4124 ext4_unlock_group(sb, group); 4135 ext4_unlock_group(sb, group);
4125 4136
4126 ext4_mb_unload_buddy(&e4b); 4137 ext4_mb_unload_buddy(&e4b);
4127 list_del(&pa->u.pa_tmp_list); 4138 list_del(&pa->u.pa_tmp_list);
4128 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 4139 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4129 } 4140 }
4130 if (ac)
4131 kmem_cache_free(ext4_ac_cachep, ac);
4132} 4141}
4133 4142
4134/* 4143/*
@@ -4492,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4492{ 4501{
4493 struct buffer_head *bitmap_bh = NULL; 4502 struct buffer_head *bitmap_bh = NULL;
4494 struct super_block *sb = inode->i_sb; 4503 struct super_block *sb = inode->i_sb;
4495 struct ext4_allocation_context *ac = NULL;
4496 struct ext4_group_desc *gdp; 4504 struct ext4_group_desc *gdp;
4497 unsigned long freed = 0; 4505 unsigned long freed = 0;
4498 unsigned int overflow; 4506 unsigned int overflow;
@@ -4532,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4532 if (!bh) 4540 if (!bh)
4533 tbh = sb_find_get_block(inode->i_sb, 4541 tbh = sb_find_get_block(inode->i_sb,
4534 block + i); 4542 block + i);
4543 if (unlikely(!tbh))
4544 continue;
4535 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 4545 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4536 inode, tbh, block + i); 4546 inode, tbh, block + i);
4537 } 4547 }
@@ -4547,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4547 if (!ext4_should_writeback_data(inode)) 4557 if (!ext4_should_writeback_data(inode))
4548 flags |= EXT4_FREE_BLOCKS_METADATA; 4558 flags |= EXT4_FREE_BLOCKS_METADATA;
4549 4559
4550 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4551 if (ac) {
4552 ac->ac_inode = inode;
4553 ac->ac_sb = sb;
4554 }
4555
4556do_more: 4560do_more:
4557 overflow = 0; 4561 overflow = 0;
4558 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4562 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4610,12 +4614,7 @@ do_more:
4610 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4614 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4611 } 4615 }
4612#endif 4616#endif
4613 if (ac) { 4617 trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
4614 ac->ac_b_ex.fe_group = block_group;
4615 ac->ac_b_ex.fe_start = bit;
4616 ac->ac_b_ex.fe_len = count;
4617 trace_ext4_mballoc_free(ac);
4618 }
4619 4618
4620 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4619 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4621 if (err) 4620 if (err)
@@ -4641,12 +4640,12 @@ do_more:
4641 * with group lock held. generate_buddy look at 4640 * with group lock held. generate_buddy look at
4642 * them with group lock_held 4641 * them with group lock_held
4643 */ 4642 */
4643 if (test_opt(sb, DISCARD))
4644 ext4_issue_discard(sb, block_group, bit, count);
4644 ext4_lock_group(sb, block_group); 4645 ext4_lock_group(sb, block_group);
4645 mb_clear_bits(bitmap_bh->b_data, bit, count); 4646 mb_clear_bits(bitmap_bh->b_data, bit, count);
4646 mb_free_blocks(inode, &e4b, bit, count); 4647 mb_free_blocks(inode, &e4b, bit, count);
4647 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4648 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4648 if (test_opt(sb, DISCARD))
4649 ext4_issue_discard(sb, block_group, bit, count);
4650 } 4649 }
4651 4650
4652 ret = ext4_free_blks_count(sb, gdp) + count; 4651 ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4686,7 +4685,190 @@ error_return:
4686 dquot_free_block(inode, freed); 4685 dquot_free_block(inode, freed);
4687 brelse(bitmap_bh); 4686 brelse(bitmap_bh);
4688 ext4_std_error(sb, err); 4687 ext4_std_error(sb, err);
4689 if (ac)
4690 kmem_cache_free(ext4_ac_cachep, ac);
4691 return; 4688 return;
4692} 4689}
4690
4691/**
4692 * ext4_trim_extent -- function to TRIM one single free extent in the group
4693 * @sb: super block for the file system
4694 * @start: starting block of the free extent in the alloc. group
4695 * @count: number of blocks to TRIM
4696 * @group: alloc. group we are working with
4697 * @e4b: ext4 buddy for the group
4698 *
4699 * Trim "count" blocks starting at "start" in the "group". To assure that no
4700 * one will allocate those blocks, mark it as used in buddy bitmap. This must
4701 * be called with under the group lock.
4702 */
4703static int ext4_trim_extent(struct super_block *sb, int start, int count,
4704 ext4_group_t group, struct ext4_buddy *e4b)
4705{
4706 struct ext4_free_extent ex;
4707 int ret = 0;
4708
4709 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4710
4711 ex.fe_start = start;
4712 ex.fe_group = group;
4713 ex.fe_len = count;
4714
4715 /*
4716 * Mark blocks used, so no one can reuse them while
4717 * being trimmed.
4718 */
4719 mb_mark_used(e4b, &ex);
4720 ext4_unlock_group(sb, group);
4721
4722 ret = ext4_issue_discard(sb, group, start, count);
4723 if (ret)
4724 ext4_std_error(sb, ret);
4725
4726 ext4_lock_group(sb, group);
4727 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4728 return ret;
4729}
4730
4731/**
4732 * ext4_trim_all_free -- function to trim all free space in alloc. group
4733 * @sb: super block for file system
4734 * @e4b: ext4 buddy
4735 * @start: first group block to examine
4736 * @max: last group block to examine
4737 * @minblocks: minimum extent block count
4738 *
4739 * ext4_trim_all_free walks through group's buddy bitmap searching for free
4740 * extents. When the free block is found, ext4_trim_extent is called to TRIM
4741 * the extent.
4742 *
4743 *
4744 * ext4_trim_all_free walks through group's block bitmap searching for free
4745 * extents. When the free extent is found, mark it as used in group buddy
4746 * bitmap. Then issue a TRIM command on this extent and free the extent in
4747 * the group buddy bitmap. This is done until whole group is scanned.
4748 */
4749ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4750 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
4751{
4752 void *bitmap;
4753 ext4_grpblk_t next, count = 0;
4754 ext4_group_t group;
4755 int ret = 0;
4756
4757 BUG_ON(e4b == NULL);
4758
4759 bitmap = e4b->bd_bitmap;
4760 group = e4b->bd_group;
4761 start = (e4b->bd_info->bb_first_free > start) ?
4762 e4b->bd_info->bb_first_free : start;
4763 ext4_lock_group(sb, group);
4764
4765 while (start < max) {
4766 start = mb_find_next_zero_bit(bitmap, max, start);
4767 if (start >= max)
4768 break;
4769 next = mb_find_next_bit(bitmap, max, start);
4770
4771 if ((next - start) >= minblocks) {
4772 ret = ext4_trim_extent(sb, start,
4773 next - start, group, e4b);
4774 if (ret < 0)
4775 break;
4776 count += next - start;
4777 }
4778 start = next + 1;
4779
4780 if (fatal_signal_pending(current)) {
4781 count = -ERESTARTSYS;
4782 break;
4783 }
4784
4785 if (need_resched()) {
4786 ext4_unlock_group(sb, group);
4787 cond_resched();
4788 ext4_lock_group(sb, group);
4789 }
4790
4791 if ((e4b->bd_info->bb_free - count) < minblocks)
4792 break;
4793 }
4794 ext4_unlock_group(sb, group);
4795
4796 ext4_debug("trimmed %d blocks in the group %d\n",
4797 count, group);
4798
4799 if (ret < 0)
4800 count = ret;
4801
4802 return count;
4803}
4804
4805/**
4806 * ext4_trim_fs() -- trim ioctl handle function
4807 * @sb: superblock for filesystem
4808 * @range: fstrim_range structure
4809 *
4810 * start: First Byte to trim
4811 * len: number of Bytes to trim from start
4812 * minlen: minimum extent length in Bytes
4813 * ext4_trim_fs goes through all allocation groups containing Bytes from
4814 * start to start+len. For each such a group ext4_trim_all_free function
4815 * is invoked to trim all free space.
4816 */
4817int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4818{
4819 struct ext4_buddy e4b;
4820 ext4_group_t first_group, last_group;
4821 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4822 ext4_grpblk_t cnt = 0, first_block, last_block;
4823 uint64_t start, len, minlen, trimmed;
4824 int ret = 0;
4825
4826 start = range->start >> sb->s_blocksize_bits;
4827 len = range->len >> sb->s_blocksize_bits;
4828 minlen = range->minlen >> sb->s_blocksize_bits;
4829 trimmed = 0;
4830
4831 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4832 return -EINVAL;
4833
4834 /* Determine first and last group to examine based on start and len */
4835 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4836 &first_group, &first_block);
4837 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4838 &last_group, &last_block);
4839 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4840 last_block = EXT4_BLOCKS_PER_GROUP(sb);
4841
4842 if (first_group > last_group)
4843 return -EINVAL;
4844
4845 for (group = first_group; group <= last_group; group++) {
4846 ret = ext4_mb_load_buddy(sb, group, &e4b);
4847 if (ret) {
4848 ext4_error(sb, "Error in loading buddy "
4849 "information for %u", group);
4850 break;
4851 }
4852
4853 if (len >= EXT4_BLOCKS_PER_GROUP(sb))
4854 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
4855 else
4856 last_block = len;
4857
4858 if (e4b.bd_info->bb_free >= minlen) {
4859 cnt = ext4_trim_all_free(sb, &e4b, first_block,
4860 last_block, minlen);
4861 if (cnt < 0) {
4862 ret = cnt;
4863 ext4_mb_unload_buddy(&e4b);
4864 break;
4865 }
4866 }
4867 ext4_mb_unload_buddy(&e4b);
4868 trimmed += cnt;
4869 first_block = 0;
4870 }
4871 range->len = trimmed * sb->s_blocksize;
4872
4873 return ret;
4874}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 1765c2c50a9b..25f3a974b725 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
412 struct buffer_head *bh; 412 struct buffer_head *bh;
413 struct ext4_extent_header *eh; 413 struct ext4_extent_header *eh;
414 414
415 block = idx_pblock(ix); 415 block = ext4_idx_pblock(ix);
416 bh = sb_bread(inode->i_sb, block); 416 bh = sb_bread(inode->i_sb, block);
417 if (!bh) 417 if (!bh)
418 return -EIO; 418 return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 5f1ed9fc913c..b9f3e7862f13 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
86 /* leaf block */ 86 /* leaf block */
87 *extent = ++path[ppos].p_ext; 87 *extent = ++path[ppos].p_ext;
88 path[ppos].p_block = ext_pblock(path[ppos].p_ext); 88 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
89 return 0; 89 return 0;
90 } 90 }
91 91
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
96 96
97 /* index block */ 97 /* index block */
98 path[ppos].p_idx++; 98 path[ppos].p_idx++;
99 path[ppos].p_block = idx_pblock(path[ppos].p_idx); 99 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
100 if (path[ppos+1].p_bh) 100 if (path[ppos+1].p_bh)
101 brelse(path[ppos+1].p_bh); 101 brelse(path[ppos+1].p_bh);
102 path[ppos+1].p_bh = 102 path[ppos+1].p_bh =
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
111 path[cur_ppos].p_idx = 111 path[cur_ppos].p_idx =
112 EXT_FIRST_INDEX(path[cur_ppos].p_hdr); 112 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
113 path[cur_ppos].p_block = 113 path[cur_ppos].p_block =
114 idx_pblock(path[cur_ppos].p_idx); 114 ext4_idx_pblock(path[cur_ppos].p_idx);
115 if (path[cur_ppos+1].p_bh) 115 if (path[cur_ppos+1].p_bh)
116 brelse(path[cur_ppos+1].p_bh); 116 brelse(path[cur_ppos+1].p_bh);
117 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, 117 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
133 path[leaf_ppos].p_ext = *extent = 133 path[leaf_ppos].p_ext = *extent =
134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
135 path[leaf_ppos].p_block = 135 path[leaf_ppos].p_block =
136 ext_pblock(path[leaf_ppos].p_ext); 136 ext4_ext_pblock(path[leaf_ppos].p_ext);
137 return 0; 137 return 0;
138 } 138 }
139 } 139 }
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
249 */ 249 */
250 o_end->ee_block = end_ext->ee_block; 250 o_end->ee_block = end_ext->ee_block;
251 o_end->ee_len = end_ext->ee_len; 251 o_end->ee_len = end_ext->ee_len;
252 ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); 252 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
253 } 253 }
254 254
255 o_start->ee_len = start_ext->ee_len; 255 o_start->ee_len = start_ext->ee_len;
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
276 */ 276 */
277 o_end->ee_block = end_ext->ee_block; 277 o_end->ee_block = end_ext->ee_block;
278 o_end->ee_len = end_ext->ee_len; 278 o_end->ee_len = end_ext->ee_len;
279 ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); 279 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
280 280
281 /* 281 /*
282 * Set 0 to the extent block if new_ext was 282 * Set 0 to the extent block if new_ext was
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
361 /* Insert new entry */ 361 /* Insert new entry */
362 if (new_ext->ee_len) { 362 if (new_ext->ee_len) {
363 o_start[i] = *new_ext; 363 o_start[i] = *new_ext;
364 ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); 364 ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
365 } 365 }
366 366
367 /* Insert end entry */ 367 /* Insert end entry */
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
488 start_ext.ee_len = end_ext.ee_len = 0; 488 start_ext.ee_len = end_ext.ee_len = 0;
489 489
490 new_ext.ee_block = cpu_to_le32(*from); 490 new_ext.ee_block = cpu_to_le32(*from);
491 ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); 491 ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
492 new_ext.ee_len = dext->ee_len; 492 new_ext.ee_len = dext->ee_len;
493 new_ext_alen = ext4_ext_get_actual_len(&new_ext); 493 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
494 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; 494 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
553 copy_extent_status(oext, &end_ext); 553 copy_extent_status(oext, &end_ext);
554 end_ext_alen = ext4_ext_get_actual_len(&end_ext); 554 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
555 ext4_ext_store_pblock(&end_ext, 555 ext4_ext_store_pblock(&end_ext,
556 (ext_pblock(o_end) + oext_alen - end_ext_alen)); 556 (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
557 end_ext.ee_block = 557 end_ext.ee_block =
558 cpu_to_le32(le32_to_cpu(o_end->ee_block) + 558 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
559 oext_alen - end_ext_alen); 559 oext_alen - end_ext_alen);
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
604 /* When tmp_dext is too large, pick up the target range. */ 604 /* When tmp_dext is too large, pick up the target range. */
605 diff = donor_off - le32_to_cpu(tmp_dext->ee_block); 605 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
606 606
607 ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); 607 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
608 tmp_dext->ee_block = 608 tmp_dext->ee_block =
609 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); 609 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
610 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); 610 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
613 tmp_dext->ee_len = cpu_to_le16(max_count); 613 tmp_dext->ee_len = cpu_to_le16(max_count);
614 614
615 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); 615 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
616 ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); 616 ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
617 617
618 /* Adjust extent length if donor extent is larger than orig */ 618 /* Adjust extent length if donor extent is larger than orig */
619 if (ext4_ext_get_actual_len(tmp_dext) > 619 if (ext4_ext_get_actual_len(tmp_dext) >
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index bd39885b5998..92203b8a099f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
856 struct buffer_head *bh_use[NAMEI_RA_SIZE]; 856 struct buffer_head *bh_use[NAMEI_RA_SIZE];
857 struct buffer_head *bh, *ret = NULL; 857 struct buffer_head *bh, *ret = NULL;
858 ext4_lblk_t start, block, b; 858 ext4_lblk_t start, block, b;
859 const u8 *name = d_name->name;
859 int ra_max = 0; /* Number of bh's in the readahead 860 int ra_max = 0; /* Number of bh's in the readahead
860 buffer, bh_use[] */ 861 buffer, bh_use[] */
861 int ra_ptr = 0; /* Current index into readahead 862 int ra_ptr = 0; /* Current index into readahead
@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
870 namelen = d_name->len; 871 namelen = d_name->len;
871 if (namelen > EXT4_NAME_LEN) 872 if (namelen > EXT4_NAME_LEN)
872 return NULL; 873 return NULL;
874 if ((namelen <= 2) && (name[0] == '.') &&
875 (name[1] == '.' || name[1] == '0')) {
876 /*
877 * "." or ".." will only be in the first block
878 * NFS may look up ".."; "." should be handled by the VFS
879 */
880 block = start = 0;
881 nblocks = 1;
882 goto restart;
883 }
873 if (is_dx(dir)) { 884 if (is_dx(dir)) {
874 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); 885 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
875 /* 886 /*
@@ -960,55 +971,35 @@ cleanup_and_exit:
960static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, 971static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
961 struct ext4_dir_entry_2 **res_dir, int *err) 972 struct ext4_dir_entry_2 **res_dir, int *err)
962{ 973{
963 struct super_block * sb; 974 struct super_block * sb = dir->i_sb;
964 struct dx_hash_info hinfo; 975 struct dx_hash_info hinfo;
965 u32 hash;
966 struct dx_frame frames[2], *frame; 976 struct dx_frame frames[2], *frame;
967 struct ext4_dir_entry_2 *de, *top;
968 struct buffer_head *bh; 977 struct buffer_head *bh;
969 ext4_lblk_t block; 978 ext4_lblk_t block;
970 int retval; 979 int retval;
971 int namelen = d_name->len;
972 const u8 *name = d_name->name;
973 980
974 sb = dir->i_sb; 981 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
975 /* NFS may look up ".." - look at dx_root directory block */ 982 return NULL;
976 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
977 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
978 return NULL;
979 } else {
980 frame = frames;
981 frame->bh = NULL; /* for dx_release() */
982 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
983 dx_set_block(frame->at, 0); /* dx_root block is 0 */
984 }
985 hash = hinfo.hash;
986 do { 983 do {
987 block = dx_get_block(frame->at); 984 block = dx_get_block(frame->at);
988 if (!(bh = ext4_bread (NULL,dir, block, 0, err))) 985 if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
989 goto errout; 986 goto errout;
990 de = (struct ext4_dir_entry_2 *) bh->b_data;
991 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
992 EXT4_DIR_REC_LEN(0));
993 for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
994 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
995 + ((char *) de - bh->b_data);
996
997 if (!ext4_check_dir_entry(dir, de, bh, off)) {
998 brelse(bh);
999 *err = ERR_BAD_DX_DIR;
1000 goto errout;
1001 }
1002 987
1003 if (ext4_match(namelen, name, de)) { 988 retval = search_dirblock(bh, dir, d_name,
1004 *res_dir = de; 989 block << EXT4_BLOCK_SIZE_BITS(sb),
1005 dx_release(frames); 990 res_dir);
1006 return bh; 991 if (retval == 1) { /* Success! */
1007 } 992 dx_release(frames);
993 return bh;
1008 } 994 }
1009 brelse(bh); 995 brelse(bh);
996 if (retval == -1) {
997 *err = ERR_BAD_DX_DIR;
998 goto errout;
999 }
1000
1010 /* Check to see if we should continue to search */ 1001 /* Check to see if we should continue to search */
1011 retval = ext4_htree_next_block(dir, hash, frame, 1002 retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1012 frames, NULL); 1003 frames, NULL);
1013 if (retval < 0) { 1004 if (retval < 0) {
1014 ext4_warning(sb, 1005 ext4_warning(sb,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
new file mode 100644
index 000000000000..46a7d6a9d976
--- /dev/null
+++ b/fs/ext4/page-io.c
@@ -0,0 +1,430 @@
1/*
2 * linux/fs/ext4/page-io.c
3 *
4 * This contains the new page_io functions for ext4
5 *
6 * Written by Theodore Ts'o, 2010.
7 */
8
9#include <linux/module.h>
10#include <linux/fs.h>
11#include <linux/time.h>
12#include <linux/jbd2.h>
13#include <linux/highuid.h>
14#include <linux/pagemap.h>
15#include <linux/quotaops.h>
16#include <linux/string.h>
17#include <linux/buffer_head.h>
18#include <linux/writeback.h>
19#include <linux/pagevec.h>
20#include <linux/mpage.h>
21#include <linux/namei.h>
22#include <linux/uio.h>
23#include <linux/bio.h>
24#include <linux/workqueue.h>
25#include <linux/kernel.h>
26#include <linux/slab.h>
27
28#include "ext4_jbd2.h"
29#include "xattr.h"
30#include "acl.h"
31#include "ext4_extents.h"
32
33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34
35int __init ext4_init_pageio(void)
36{
37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
38 if (io_page_cachep == NULL)
39 return -ENOMEM;
40 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
41 if (io_page_cachep == NULL) {
42 kmem_cache_destroy(io_page_cachep);
43 return -ENOMEM;
44 }
45
46 return 0;
47}
48
49void ext4_exit_pageio(void)
50{
51 kmem_cache_destroy(io_end_cachep);
52 kmem_cache_destroy(io_page_cachep);
53}
54
55void ext4_free_io_end(ext4_io_end_t *io)
56{
57 int i;
58
59 BUG_ON(!io);
60 if (io->page)
61 put_page(io->page);
62 for (i = 0; i < io->num_io_pages; i++) {
63 if (--io->pages[i]->p_count == 0) {
64 struct page *page = io->pages[i]->p_page;
65
66 end_page_writeback(page);
67 put_page(page);
68 kmem_cache_free(io_page_cachep, io->pages[i]);
69 }
70 }
71 io->num_io_pages = 0;
72 iput(io->inode);
73 kmem_cache_free(io_end_cachep, io);
74}
75
76/*
77 * check a range of space and convert unwritten extents to written.
78 */
79int ext4_end_io_nolock(ext4_io_end_t *io)
80{
81 struct inode *inode = io->inode;
82 loff_t offset = io->offset;
83 ssize_t size = io->size;
84 int ret = 0;
85
86 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
87 "list->prev 0x%p\n",
88 io, inode->i_ino, io->list.next, io->list.prev);
89
90 if (list_empty(&io->list))
91 return ret;
92
93 if (!(io->flag & EXT4_IO_END_UNWRITTEN))
94 return ret;
95
96 ret = ext4_convert_unwritten_extents(inode, offset, size);
97 if (ret < 0) {
98 printk(KERN_EMERG "%s: failed to convert unwritten "
99 "extents to written extents, error is %d "
100 "io is still on inode %lu aio dio list\n",
101 __func__, ret, inode->i_ino);
102 return ret;
103 }
104
105 if (io->iocb)
106 aio_complete(io->iocb, io->result, 0);
107 /* clear the DIO AIO unwritten flag */
108 io->flag &= ~EXT4_IO_END_UNWRITTEN;
109 return ret;
110}
111
112/*
113 * work on completed aio dio IO, to convert unwritten extents to extents
114 */
115static void ext4_end_io_work(struct work_struct *work)
116{
117 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
118 struct inode *inode = io->inode;
119 struct ext4_inode_info *ei = EXT4_I(inode);
120 unsigned long flags;
121 int ret;
122
123 mutex_lock(&inode->i_mutex);
124 ret = ext4_end_io_nolock(io);
125 if (ret < 0) {
126 mutex_unlock(&inode->i_mutex);
127 return;
128 }
129
130 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
131 if (!list_empty(&io->list))
132 list_del_init(&io->list);
133 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
134 mutex_unlock(&inode->i_mutex);
135 ext4_free_io_end(io);
136}
137
138ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
139{
140 ext4_io_end_t *io = NULL;
141
142 io = kmem_cache_alloc(io_end_cachep, flags);
143 if (io) {
144 memset(io, 0, sizeof(*io));
145 io->inode = igrab(inode);
146 BUG_ON(!io->inode);
147 INIT_WORK(&io->work, ext4_end_io_work);
148 INIT_LIST_HEAD(&io->list);
149 }
150 return io;
151}
152
153/*
154 * Print an buffer I/O error compatible with the fs/buffer.c. This
155 * provides compatibility with dmesg scrapers that look for a specific
156 * buffer I/O error message. We really need a unified error reporting
157 * structure to userspace ala Digital Unix's uerf system, but it's
158 * probably not going to happen in my lifetime, due to LKML politics...
159 */
160static void buffer_io_error(struct buffer_head *bh)
161{
162 char b[BDEVNAME_SIZE];
163 printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
164 bdevname(bh->b_bdev, b),
165 (unsigned long long)bh->b_blocknr);
166}
167
168static void ext4_end_bio(struct bio *bio, int error)
169{
170 ext4_io_end_t *io_end = bio->bi_private;
171 struct workqueue_struct *wq;
172 struct inode *inode;
173 unsigned long flags;
174 ext4_fsblk_t err_block;
175 int i;
176
177 BUG_ON(!io_end);
178 inode = io_end->inode;
179 bio->bi_private = NULL;
180 bio->bi_end_io = NULL;
181 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
182 error = 0;
183 err_block = bio->bi_sector >> (inode->i_blkbits - 9);
184 bio_put(bio);
185
186 if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
187 pr_err("sb umounted, discard end_io request for inode %lu\n",
188 io_end->inode->i_ino);
189 ext4_free_io_end(io_end);
190 return;
191 }
192
193 if (error) {
194 io_end->flag |= EXT4_IO_END_ERROR;
195 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
196 "(offset %llu size %ld starting block %llu)",
197 inode->i_ino,
198 (unsigned long long) io_end->offset,
199 (long) io_end->size,
200 (unsigned long long) err_block);
201 }
202
203 for (i = 0; i < io_end->num_io_pages; i++) {
204 struct page *page = io_end->pages[i]->p_page;
205 struct buffer_head *bh, *head;
206 int partial_write = 0;
207
208 head = page_buffers(page);
209 if (error)
210 SetPageError(page);
211 BUG_ON(!head);
212 if (head->b_size == PAGE_CACHE_SIZE)
213 clear_buffer_dirty(head);
214 else {
215 loff_t offset;
216 loff_t io_end_offset = io_end->offset + io_end->size;
217
218 offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
219 bh = head;
220 do {
221 if ((offset >= io_end->offset) &&
222 (offset+bh->b_size <= io_end_offset)) {
223 if (error)
224 buffer_io_error(bh);
225
226 clear_buffer_dirty(bh);
227 }
228 if (buffer_delay(bh))
229 partial_write = 1;
230 else if (!buffer_mapped(bh))
231 clear_buffer_dirty(bh);
232 else if (buffer_dirty(bh))
233 partial_write = 1;
234 offset += bh->b_size;
235 bh = bh->b_this_page;
236 } while (bh != head);
237 }
238
239 if (--io_end->pages[i]->p_count == 0) {
240 struct page *page = io_end->pages[i]->p_page;
241
242 end_page_writeback(page);
243 put_page(page);
244 kmem_cache_free(io_page_cachep, io_end->pages[i]);
245 }
246
247 /*
248 * If this is a partial write which happened to make
249 * all buffers uptodate then we can optimize away a
250 * bogus readpage() for the next read(). Here we
251 * 'discover' whether the page went uptodate as a
252 * result of this (potentially partial) write.
253 */
254 if (!partial_write)
255 SetPageUptodate(page);
256 }
257
258 io_end->num_io_pages = 0;
259
260 /* Add the io_end to per-inode completed io list*/
261 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
262 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
263 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
264
265 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
266 /* queue the work to convert unwritten extents to written */
267 queue_work(wq, &io_end->work);
268}
269
270void ext4_io_submit(struct ext4_io_submit *io)
271{
272 struct bio *bio = io->io_bio;
273
274 if (bio) {
275 bio_get(io->io_bio);
276 submit_bio(io->io_op, io->io_bio);
277 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
278 bio_put(io->io_bio);
279 }
280 io->io_bio = 0;
281 io->io_op = 0;
282 io->io_end = 0;
283}
284
285static int io_submit_init(struct ext4_io_submit *io,
286 struct inode *inode,
287 struct writeback_control *wbc,
288 struct buffer_head *bh)
289{
290 ext4_io_end_t *io_end;
291 struct page *page = bh->b_page;
292 int nvecs = bio_get_nr_vecs(bh->b_bdev);
293 struct bio *bio;
294
295 io_end = ext4_init_io_end(inode, GFP_NOFS);
296 if (!io_end)
297 return -ENOMEM;
298 do {
299 bio = bio_alloc(GFP_NOIO, nvecs);
300 nvecs >>= 1;
301 } while (bio == NULL);
302
303 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
304 bio->bi_bdev = bh->b_bdev;
305 bio->bi_private = io->io_end = io_end;
306 bio->bi_end_io = ext4_end_bio;
307
308 io_end->inode = inode;
309 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
310
311 io->io_bio = bio;
312 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
313 WRITE_SYNC_PLUG : WRITE);
314 io->io_next_block = bh->b_blocknr;
315 return 0;
316}
317
318static int io_submit_add_bh(struct ext4_io_submit *io,
319 struct ext4_io_page *io_page,
320 struct inode *inode,
321 struct writeback_control *wbc,
322 struct buffer_head *bh)
323{
324 ext4_io_end_t *io_end;
325 int ret;
326
327 if (buffer_new(bh)) {
328 clear_buffer_new(bh);
329 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
330 }
331
332 if (!buffer_mapped(bh) || buffer_delay(bh)) {
333 if (!buffer_mapped(bh))
334 clear_buffer_dirty(bh);
335 if (io->io_bio)
336 ext4_io_submit(io);
337 return 0;
338 }
339
340 if (io->io_bio && bh->b_blocknr != io->io_next_block) {
341submit_and_retry:
342 ext4_io_submit(io);
343 }
344 if (io->io_bio == NULL) {
345 ret = io_submit_init(io, inode, wbc, bh);
346 if (ret)
347 return ret;
348 }
349 io_end = io->io_end;
350 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
351 (io_end->pages[io_end->num_io_pages-1] != io_page))
352 goto submit_and_retry;
353 if (buffer_uninit(bh))
354 io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
355 io->io_end->size += bh->b_size;
356 io->io_next_block++;
357 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
358 if (ret != bh->b_size)
359 goto submit_and_retry;
360 if ((io_end->num_io_pages == 0) ||
361 (io_end->pages[io_end->num_io_pages-1] != io_page)) {
362 io_end->pages[io_end->num_io_pages++] = io_page;
363 io_page->p_count++;
364 }
365 return 0;
366}
367
368int ext4_bio_write_page(struct ext4_io_submit *io,
369 struct page *page,
370 int len,
371 struct writeback_control *wbc)
372{
373 struct inode *inode = page->mapping->host;
374 unsigned block_start, block_end, blocksize;
375 struct ext4_io_page *io_page;
376 struct buffer_head *bh, *head;
377 int ret = 0;
378
379 blocksize = 1 << inode->i_blkbits;
380
381 BUG_ON(PageWriteback(page));
382 set_page_writeback(page);
383 ClearPageError(page);
384
385 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
386 if (!io_page) {
387 set_page_dirty(page);
388 unlock_page(page);
389 return -ENOMEM;
390 }
391 io_page->p_page = page;
392 io_page->p_count = 0;
393 get_page(page);
394
395 for (bh = head = page_buffers(page), block_start = 0;
396 bh != head || !block_start;
397 block_start = block_end, bh = bh->b_this_page) {
398 block_end = block_start + blocksize;
399 if (block_start >= len) {
400 clear_buffer_dirty(bh);
401 set_buffer_uptodate(bh);
402 continue;
403 }
404 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
405 if (ret) {
406 /*
407 * We only get here on ENOMEM. Not much else
408 * we can do but mark the page as dirty, and
409 * better luck next time.
410 */
411 set_page_dirty(page);
412 break;
413 }
414 }
415 unlock_page(page);
416 /*
417 * If the page was truncated before we could do the writeback,
418 * or we had a memory allocation error while trying to write
419 * the first buffer head, we won't have submitted any pages for
420 * I/O. In that case we need to make sure we've cleared the
421 * PageWriteback bit from the page to prevent the system from
422 * wedging later on.
423 */
424 if (io_page->p_count == 0) {
425 put_page(page);
426 end_page_writeback(page);
427 kmem_cache_free(io_page_cachep, io_page);
428 }
429 return ret;
430}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca5c8aa00a2f..dc963929de65 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb,
226 } 226 }
227 227
228 /* Zero out all of the reserved backup group descriptor table blocks */ 228 /* Zero out all of the reserved backup group descriptor table blocks */
229 for (i = 0, bit = gdblocks + 1, block = start + bit; 229 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
230 i < reserved_gdb; i++, block++, bit++) { 230 block, sbi->s_itb_per_group);
231 struct buffer_head *gdb; 231 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
232 232 GFP_NOFS);
233 ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); 233 if (err)
234 234 goto exit_bh;
235 if ((err = extend_or_restart_transaction(handle, 1, bh)))
236 goto exit_bh;
237 235
238 if (IS_ERR(gdb = bclean(handle, sb, block))) {
239 err = PTR_ERR(gdb);
240 goto exit_bh;
241 }
242 ext4_handle_dirty_metadata(handle, NULL, gdb);
243 ext4_set_bit(bit, bh->b_data);
244 brelse(gdb);
245 }
246 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
247 input->block_bitmap - start); 237 input->block_bitmap - start);
248 ext4_set_bit(input->block_bitmap - start, bh->b_data); 238 ext4_set_bit(input->block_bitmap - start, bh->b_data);
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb,
251 ext4_set_bit(input->inode_bitmap - start, bh->b_data); 241 ext4_set_bit(input->inode_bitmap - start, bh->b_data);
252 242
253 /* Zero out all of the inode table blocks */ 243 /* Zero out all of the inode table blocks */
254 for (i = 0, block = input->inode_table, bit = block - start; 244 block = input->inode_table;
255 i < sbi->s_itb_per_group; i++, bit++, block++) { 245 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
256 struct buffer_head *it; 246 block, sbi->s_itb_per_group);
257 247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
258 ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); 248 if (err)
259 249 goto exit_bh;
260 if ((err = extend_or_restart_transaction(handle, 1, bh)))
261 goto exit_bh;
262
263 if (IS_ERR(it = bclean(handle, sb, block))) {
264 err = PTR_ERR(it);
265 goto exit_bh;
266 }
267 ext4_handle_dirty_metadata(handle, NULL, it);
268 brelse(it);
269 ext4_set_bit(bit, bh->b_data);
270 }
271 250
272 if ((err = extend_or_restart_transaction(handle, 2, bh))) 251 if ((err = extend_or_restart_transaction(handle, 2, bh)))
273 goto exit_bh; 252 goto exit_bh;
274 253
275 mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); 254 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
255 bh->b_data);
276 ext4_handle_dirty_metadata(handle, NULL, bh); 256 ext4_handle_dirty_metadata(handle, NULL, bh);
277 brelse(bh); 257 brelse(bh);
278 /* Mark unused entries in inode bitmap used */ 258 /* Mark unused entries in inode bitmap used */
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb,
283 goto exit_journal; 263 goto exit_journal;
284 } 264 }
285 265
286 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 266 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
287 bh->b_data); 267 bh->b_data);
288 ext4_handle_dirty_metadata(handle, NULL, bh); 268 ext4_handle_dirty_metadata(handle, NULL, bh);
289exit_bh: 269exit_bh:
290 brelse(bh); 270 brelse(bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8ecc1e590303..0348ce066592 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -40,6 +40,9 @@
40#include <linux/crc16.h> 40#include <linux/crc16.h>
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42 42
43#include <linux/kthread.h>
44#include <linux/freezer.h>
45
43#include "ext4.h" 46#include "ext4.h"
44#include "ext4_jbd2.h" 47#include "ext4_jbd2.h"
45#include "xattr.h" 48#include "xattr.h"
@@ -49,8 +52,11 @@
49#define CREATE_TRACE_POINTS 52#define CREATE_TRACE_POINTS
50#include <trace/events/ext4.h> 53#include <trace/events/ext4.h>
51 54
52struct proc_dir_entry *ext4_proc_root; 55static struct proc_dir_entry *ext4_proc_root;
53static struct kset *ext4_kset; 56static struct kset *ext4_kset;
57struct ext4_lazy_init *ext4_li_info;
58struct mutex ext4_li_mtx;
59struct ext4_features *ext4_feat;
54 60
55static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 61static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
56 unsigned long journal_devnum); 62 unsigned long journal_devnum);
@@ -69,6 +75,8 @@ static void ext4_write_super(struct super_block *sb);
69static int ext4_freeze(struct super_block *sb); 75static int ext4_freeze(struct super_block *sb);
70static int ext4_get_sb(struct file_system_type *fs_type, int flags, 76static int ext4_get_sb(struct file_system_type *fs_type, int flags,
71 const char *dev_name, void *data, struct vfsmount *mnt); 77 const char *dev_name, void *data, struct vfsmount *mnt);
78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb);
72 80
73#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
74static struct file_system_type ext3_fs_type = { 82static struct file_system_type ext3_fs_type = {
@@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb)
701 struct ext4_super_block *es = sbi->s_es; 709 struct ext4_super_block *es = sbi->s_es;
702 int i, err; 710 int i, err;
703 711
712 ext4_unregister_li_request(sb);
704 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 713 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
705 714
706 flush_workqueue(sbi->dio_unwritten_wq); 715 flush_workqueue(sbi->dio_unwritten_wq);
@@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb)
717 ext4_abort(sb, "Couldn't clean up the journal"); 726 ext4_abort(sb, "Couldn't clean up the journal");
718 } 727 }
719 728
729 del_timer(&sbi->s_err_report);
720 ext4_release_system_zone(sb); 730 ext4_release_system_zone(sb);
721 ext4_mb_release(sb); 731 ext4_mb_release(sb);
722 ext4_ext_release(sb); 732 ext4_ext_release(sb);
@@ -1042,6 +1052,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1042 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1052 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
1043 seq_puts(seq, ",block_validity"); 1053 seq_puts(seq, ",block_validity");
1044 1054
1055 if (!test_opt(sb, INIT_INODE_TABLE))
1056 seq_puts(seq, ",noinit_inode_table");
1057 else if (sbi->s_li_wait_mult)
1058 seq_printf(seq, ",init_inode_table=%u",
1059 (unsigned) sbi->s_li_wait_mult);
1060
1045 ext4_show_quota_options(seq, sb); 1061 ext4_show_quota_options(seq, sb);
1046 1062
1047 return 0; 1063 return 0;
@@ -1170,6 +1186,7 @@ static const struct super_operations ext4_sops = {
1170 .quota_write = ext4_quota_write, 1186 .quota_write = ext4_quota_write,
1171#endif 1187#endif
1172 .bdev_try_to_free_page = bdev_try_to_free_page, 1188 .bdev_try_to_free_page = bdev_try_to_free_page,
1189 .trim_fs = ext4_trim_fs
1173}; 1190};
1174 1191
1175static const struct super_operations ext4_nojournal_sops = { 1192static const struct super_operations ext4_nojournal_sops = {
@@ -1216,6 +1233,7 @@ enum {
1216 Opt_inode_readahead_blks, Opt_journal_ioprio, 1233 Opt_inode_readahead_blks, Opt_journal_ioprio,
1217 Opt_dioread_nolock, Opt_dioread_lock, 1234 Opt_dioread_nolock, Opt_dioread_lock,
1218 Opt_discard, Opt_nodiscard, 1235 Opt_discard, Opt_nodiscard,
1236 Opt_init_inode_table, Opt_noinit_inode_table,
1219}; 1237};
1220 1238
1221static const match_table_t tokens = { 1239static const match_table_t tokens = {
@@ -1286,6 +1304,9 @@ static const match_table_t tokens = {
1286 {Opt_dioread_lock, "dioread_lock"}, 1304 {Opt_dioread_lock, "dioread_lock"},
1287 {Opt_discard, "discard"}, 1305 {Opt_discard, "discard"},
1288 {Opt_nodiscard, "nodiscard"}, 1306 {Opt_nodiscard, "nodiscard"},
1307 {Opt_init_inode_table, "init_itable=%u"},
1308 {Opt_init_inode_table, "init_itable"},
1309 {Opt_noinit_inode_table, "noinit_itable"},
1289 {Opt_err, NULL}, 1310 {Opt_err, NULL},
1290}; 1311};
1291 1312
@@ -1756,6 +1777,20 @@ set_qf_format:
1756 case Opt_dioread_lock: 1777 case Opt_dioread_lock:
1757 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1778 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
1758 break; 1779 break;
1780 case Opt_init_inode_table:
1781 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
1782 if (args[0].from) {
1783 if (match_int(&args[0], &option))
1784 return 0;
1785 } else
1786 option = EXT4_DEF_LI_WAIT_MULT;
1787 if (option < 0)
1788 return 0;
1789 sbi->s_li_wait_mult = option;
1790 break;
1791 case Opt_noinit_inode_table:
1792 clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
1793 break;
1759 default: 1794 default:
1760 ext4_msg(sb, KERN_ERR, 1795 ext4_msg(sb, KERN_ERR,
1761 "Unrecognized mount option \"%s\" " 1796 "Unrecognized mount option \"%s\" "
@@ -1939,7 +1974,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1939} 1974}
1940 1975
1941/* Called at mount-time, super-block is locked */ 1976/* Called at mount-time, super-block is locked */
1942static int ext4_check_descriptors(struct super_block *sb) 1977static int ext4_check_descriptors(struct super_block *sb,
1978 ext4_group_t *first_not_zeroed)
1943{ 1979{
1944 struct ext4_sb_info *sbi = EXT4_SB(sb); 1980 struct ext4_sb_info *sbi = EXT4_SB(sb);
1945 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1981 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
@@ -1948,7 +1984,7 @@ static int ext4_check_descriptors(struct super_block *sb)
1948 ext4_fsblk_t inode_bitmap; 1984 ext4_fsblk_t inode_bitmap;
1949 ext4_fsblk_t inode_table; 1985 ext4_fsblk_t inode_table;
1950 int flexbg_flag = 0; 1986 int flexbg_flag = 0;
1951 ext4_group_t i; 1987 ext4_group_t i, grp = sbi->s_groups_count;
1952 1988
1953 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1989 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1954 flexbg_flag = 1; 1990 flexbg_flag = 1;
@@ -1964,6 +2000,10 @@ static int ext4_check_descriptors(struct super_block *sb)
1964 last_block = first_block + 2000 last_block = first_block +
1965 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2001 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1966 2002
2003 if ((grp == sbi->s_groups_count) &&
2004 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2005 grp = i;
2006
1967 block_bitmap = ext4_block_bitmap(sb, gdp); 2007 block_bitmap = ext4_block_bitmap(sb, gdp);
1968 if (block_bitmap < first_block || block_bitmap > last_block) { 2008 if (block_bitmap < first_block || block_bitmap > last_block) {
1969 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2009 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2001,6 +2041,8 @@ static int ext4_check_descriptors(struct super_block *sb)
2001 if (!flexbg_flag) 2041 if (!flexbg_flag)
2002 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2042 first_block += EXT4_BLOCKS_PER_GROUP(sb);
2003 } 2043 }
2044 if (NULL != first_not_zeroed)
2045 *first_not_zeroed = grp;
2004 2046
2005 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2047 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
2006 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2048 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
@@ -2373,6 +2415,7 @@ static struct ext4_attr ext4_attr_##_name = { \
2373#define EXT4_ATTR(name, mode, show, store) \ 2415#define EXT4_ATTR(name, mode, show, store) \
2374static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2416static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2375 2417
2418#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2376#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2419#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2377#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2420#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2378#define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2421#define EXT4_RW_ATTR_SBI_UI(name, elname) \
@@ -2409,6 +2452,16 @@ static struct attribute *ext4_attrs[] = {
2409 NULL, 2452 NULL,
2410}; 2453};
2411 2454
2455/* Features this copy of ext4 supports */
2456EXT4_INFO_ATTR(lazy_itable_init);
2457EXT4_INFO_ATTR(batched_discard);
2458
2459static struct attribute *ext4_feat_attrs[] = {
2460 ATTR_LIST(lazy_itable_init),
2461 ATTR_LIST(batched_discard),
2462 NULL,
2463};
2464
2412static ssize_t ext4_attr_show(struct kobject *kobj, 2465static ssize_t ext4_attr_show(struct kobject *kobj,
2413 struct attribute *attr, char *buf) 2466 struct attribute *attr, char *buf)
2414{ 2467{
@@ -2437,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj)
2437 complete(&sbi->s_kobj_unregister); 2490 complete(&sbi->s_kobj_unregister);
2438} 2491}
2439 2492
2440
2441static const struct sysfs_ops ext4_attr_ops = { 2493static const struct sysfs_ops ext4_attr_ops = {
2442 .show = ext4_attr_show, 2494 .show = ext4_attr_show,
2443 .store = ext4_attr_store, 2495 .store = ext4_attr_store,
@@ -2449,6 +2501,17 @@ static struct kobj_type ext4_ktype = {
2449 .release = ext4_sb_release, 2501 .release = ext4_sb_release,
2450}; 2502};
2451 2503
2504static void ext4_feat_release(struct kobject *kobj)
2505{
2506 complete(&ext4_feat->f_kobj_unregister);
2507}
2508
2509static struct kobj_type ext4_feat_ktype = {
2510 .default_attrs = ext4_feat_attrs,
2511 .sysfs_ops = &ext4_attr_ops,
2512 .release = ext4_feat_release,
2513};
2514
2452/* 2515/*
2453 * Check whether this filesystem can be mounted based on 2516 * Check whether this filesystem can be mounted based on
2454 * the features present and the RDONLY/RDWR mount requested. 2517 * the features present and the RDONLY/RDWR mount requested.
@@ -2539,6 +2602,372 @@ static void print_daily_error_info(unsigned long arg)
2539 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2602 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
2540} 2603}
2541 2604
2605static void ext4_lazyinode_timeout(unsigned long data)
2606{
2607 struct task_struct *p = (struct task_struct *)data;
2608 wake_up_process(p);
2609}
2610
2611/* Find next suitable group and run ext4_init_inode_table */
2612static int ext4_run_li_request(struct ext4_li_request *elr)
2613{
2614 struct ext4_group_desc *gdp = NULL;
2615 ext4_group_t group, ngroups;
2616 struct super_block *sb;
2617 unsigned long timeout = 0;
2618 int ret = 0;
2619
2620 sb = elr->lr_super;
2621 ngroups = EXT4_SB(sb)->s_groups_count;
2622
2623 for (group = elr->lr_next_group; group < ngroups; group++) {
2624 gdp = ext4_get_group_desc(sb, group, NULL);
2625 if (!gdp) {
2626 ret = 1;
2627 break;
2628 }
2629
2630 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2631 break;
2632 }
2633
2634 if (group == ngroups)
2635 ret = 1;
2636
2637 if (!ret) {
2638 timeout = jiffies;
2639 ret = ext4_init_inode_table(sb, group,
2640 elr->lr_timeout ? 0 : 1);
2641 if (elr->lr_timeout == 0) {
2642 timeout = jiffies - timeout;
2643 if (elr->lr_sbi->s_li_wait_mult)
2644 timeout *= elr->lr_sbi->s_li_wait_mult;
2645 else
2646 timeout *= 20;
2647 elr->lr_timeout = timeout;
2648 }
2649 elr->lr_next_sched = jiffies + elr->lr_timeout;
2650 elr->lr_next_group = group + 1;
2651 }
2652
2653 return ret;
2654}
2655
2656/*
2657 * Remove lr_request from the list_request and free the
2658 * request tructure. Should be called with li_list_mtx held
2659 */
2660static void ext4_remove_li_request(struct ext4_li_request *elr)
2661{
2662 struct ext4_sb_info *sbi;
2663
2664 if (!elr)
2665 return;
2666
2667 sbi = elr->lr_sbi;
2668
2669 list_del(&elr->lr_request);
2670 sbi->s_li_request = NULL;
2671 kfree(elr);
2672}
2673
2674static void ext4_unregister_li_request(struct super_block *sb)
2675{
2676 struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
2677
2678 if (!ext4_li_info)
2679 return;
2680
2681 mutex_lock(&ext4_li_info->li_list_mtx);
2682 ext4_remove_li_request(elr);
2683 mutex_unlock(&ext4_li_info->li_list_mtx);
2684}
2685
2686/*
2687 * This is the function where ext4lazyinit thread lives. It walks
2688 * through the request list searching for next scheduled filesystem.
2689 * When such a fs is found, run the lazy initialization request
2690 * (ext4_rn_li_request) and keep track of the time spend in this
2691 * function. Based on that time we compute next schedule time of
2692 * the request. When walking through the list is complete, compute
2693 * next waking time and put itself into sleep.
2694 */
2695static int ext4_lazyinit_thread(void *arg)
2696{
2697 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2698 struct list_head *pos, *n;
2699 struct ext4_li_request *elr;
2700 unsigned long next_wakeup;
2701 DEFINE_WAIT(wait);
2702 int ret;
2703
2704 BUG_ON(NULL == eli);
2705
2706 eli->li_timer.data = (unsigned long)current;
2707 eli->li_timer.function = ext4_lazyinode_timeout;
2708
2709 eli->li_task = current;
2710 wake_up(&eli->li_wait_task);
2711
2712cont_thread:
2713 while (true) {
2714 next_wakeup = MAX_JIFFY_OFFSET;
2715
2716 mutex_lock(&eli->li_list_mtx);
2717 if (list_empty(&eli->li_request_list)) {
2718 mutex_unlock(&eli->li_list_mtx);
2719 goto exit_thread;
2720 }
2721
2722 list_for_each_safe(pos, n, &eli->li_request_list) {
2723 elr = list_entry(pos, struct ext4_li_request,
2724 lr_request);
2725
2726 if (time_after_eq(jiffies, elr->lr_next_sched))
2727 ret = ext4_run_li_request(elr);
2728
2729 if (ret) {
2730 ret = 0;
2731 ext4_remove_li_request(elr);
2732 continue;
2733 }
2734
2735 if (time_before(elr->lr_next_sched, next_wakeup))
2736 next_wakeup = elr->lr_next_sched;
2737 }
2738 mutex_unlock(&eli->li_list_mtx);
2739
2740 if (freezing(current))
2741 refrigerator();
2742
2743 if (time_after_eq(jiffies, next_wakeup)) {
2744 cond_resched();
2745 continue;
2746 }
2747
2748 eli->li_timer.expires = next_wakeup;
2749 add_timer(&eli->li_timer);
2750 prepare_to_wait(&eli->li_wait_daemon, &wait,
2751 TASK_INTERRUPTIBLE);
2752 if (time_before(jiffies, next_wakeup))
2753 schedule();
2754 finish_wait(&eli->li_wait_daemon, &wait);
2755 }
2756
2757exit_thread:
2758 /*
2759 * It looks like the request list is empty, but we need
2760 * to check it under the li_list_mtx lock, to prevent any
2761 * additions into it, and of course we should lock ext4_li_mtx
2762 * to atomically free the list and ext4_li_info, because at
2763 * this point another ext4 filesystem could be registering
2764 * new one.
2765 */
2766 mutex_lock(&ext4_li_mtx);
2767 mutex_lock(&eli->li_list_mtx);
2768 if (!list_empty(&eli->li_request_list)) {
2769 mutex_unlock(&eli->li_list_mtx);
2770 mutex_unlock(&ext4_li_mtx);
2771 goto cont_thread;
2772 }
2773 mutex_unlock(&eli->li_list_mtx);
2774 del_timer_sync(&ext4_li_info->li_timer);
2775 eli->li_task = NULL;
2776 wake_up(&eli->li_wait_task);
2777
2778 kfree(ext4_li_info);
2779 ext4_li_info = NULL;
2780 mutex_unlock(&ext4_li_mtx);
2781
2782 return 0;
2783}
2784
2785static void ext4_clear_request_list(void)
2786{
2787 struct list_head *pos, *n;
2788 struct ext4_li_request *elr;
2789
2790 mutex_lock(&ext4_li_info->li_list_mtx);
2791 if (list_empty(&ext4_li_info->li_request_list))
2792 return;
2793
2794 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2795 elr = list_entry(pos, struct ext4_li_request,
2796 lr_request);
2797 ext4_remove_li_request(elr);
2798 }
2799 mutex_unlock(&ext4_li_info->li_list_mtx);
2800}
2801
2802static int ext4_run_lazyinit_thread(void)
2803{
2804 struct task_struct *t;
2805
2806 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
2807 if (IS_ERR(t)) {
2808 int err = PTR_ERR(t);
2809 ext4_clear_request_list();
2810 del_timer_sync(&ext4_li_info->li_timer);
2811 kfree(ext4_li_info);
2812 ext4_li_info = NULL;
2813 printk(KERN_CRIT "EXT4: error %d creating inode table "
2814 "initialization thread\n",
2815 err);
2816 return err;
2817 }
2818 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2819
2820 wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
2821 return 0;
2822}
2823
2824/*
2825 * Check whether it make sense to run itable init. thread or not.
2826 * If there is at least one uninitialized inode table, return
2827 * corresponding group number, else the loop goes through all
2828 * groups and return total number of groups.
2829 */
2830static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
2831{
2832 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
2833 struct ext4_group_desc *gdp = NULL;
2834
2835 for (group = 0; group < ngroups; group++) {
2836 gdp = ext4_get_group_desc(sb, group, NULL);
2837 if (!gdp)
2838 continue;
2839
2840 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2841 break;
2842 }
2843
2844 return group;
2845}
2846
2847static int ext4_li_info_new(void)
2848{
2849 struct ext4_lazy_init *eli = NULL;
2850
2851 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
2852 if (!eli)
2853 return -ENOMEM;
2854
2855 eli->li_task = NULL;
2856 INIT_LIST_HEAD(&eli->li_request_list);
2857 mutex_init(&eli->li_list_mtx);
2858
2859 init_waitqueue_head(&eli->li_wait_daemon);
2860 init_waitqueue_head(&eli->li_wait_task);
2861 init_timer(&eli->li_timer);
2862 eli->li_state |= EXT4_LAZYINIT_QUIT;
2863
2864 ext4_li_info = eli;
2865
2866 return 0;
2867}
2868
2869static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
2870 ext4_group_t start)
2871{
2872 struct ext4_sb_info *sbi = EXT4_SB(sb);
2873 struct ext4_li_request *elr;
2874 unsigned long rnd;
2875
2876 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
2877 if (!elr)
2878 return NULL;
2879
2880 elr->lr_super = sb;
2881 elr->lr_sbi = sbi;
2882 elr->lr_next_group = start;
2883
2884 /*
2885 * Randomize first schedule time of the request to
2886 * spread the inode table initialization requests
2887 * better.
2888 */
2889 get_random_bytes(&rnd, sizeof(rnd));
2890 elr->lr_next_sched = jiffies + (unsigned long)rnd %
2891 (EXT4_DEF_LI_MAX_START_DELAY * HZ);
2892
2893 return elr;
2894}
2895
2896static int ext4_register_li_request(struct super_block *sb,
2897 ext4_group_t first_not_zeroed)
2898{
2899 struct ext4_sb_info *sbi = EXT4_SB(sb);
2900 struct ext4_li_request *elr;
2901 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2902 int ret;
2903
2904 if (sbi->s_li_request != NULL)
2905 return 0;
2906
2907 if (first_not_zeroed == ngroups ||
2908 (sb->s_flags & MS_RDONLY) ||
2909 !test_opt(sb, INIT_INODE_TABLE)) {
2910 sbi->s_li_request = NULL;
2911 return 0;
2912 }
2913
2914 if (first_not_zeroed == ngroups) {
2915 sbi->s_li_request = NULL;
2916 return 0;
2917 }
2918
2919 elr = ext4_li_request_new(sb, first_not_zeroed);
2920 if (!elr)
2921 return -ENOMEM;
2922
2923 mutex_lock(&ext4_li_mtx);
2924
2925 if (NULL == ext4_li_info) {
2926 ret = ext4_li_info_new();
2927 if (ret)
2928 goto out;
2929 }
2930
2931 mutex_lock(&ext4_li_info->li_list_mtx);
2932 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
2933 mutex_unlock(&ext4_li_info->li_list_mtx);
2934
2935 sbi->s_li_request = elr;
2936
2937 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2938 ret = ext4_run_lazyinit_thread();
2939 if (ret)
2940 goto out;
2941 }
2942out:
2943 mutex_unlock(&ext4_li_mtx);
2944 if (ret)
2945 kfree(elr);
2946 return ret;
2947}
2948
2949/*
2950 * We do not need to lock anything since this is called on
2951 * module unload.
2952 */
2953static void ext4_destroy_lazyinit_thread(void)
2954{
2955 /*
2956 * If thread exited earlier
2957 * there's nothing to be done.
2958 */
2959 if (!ext4_li_info)
2960 return;
2961
2962 ext4_clear_request_list();
2963
2964 while (ext4_li_info->li_task) {
2965 wake_up(&ext4_li_info->li_wait_daemon);
2966 wait_event(ext4_li_info->li_wait_task,
2967 ext4_li_info->li_task == NULL);
2968 }
2969}
2970
2542static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2971static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2543 __releases(kernel_lock) 2972 __releases(kernel_lock)
2544 __acquires(kernel_lock) 2973 __acquires(kernel_lock)
@@ -2564,6 +2993,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2564 __u64 blocks_count; 2993 __u64 blocks_count;
2565 int err; 2994 int err;
2566 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2995 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2996 ext4_group_t first_not_zeroed;
2567 2997
2568 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2998 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2569 if (!sbi) 2999 if (!sbi)
@@ -2624,6 +3054,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2624 3054
2625 /* Set defaults before we parse the mount options */ 3055 /* Set defaults before we parse the mount options */
2626 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3056 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3057 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
2627 if (def_mount_opts & EXT4_DEFM_DEBUG) 3058 if (def_mount_opts & EXT4_DEFM_DEBUG)
2628 set_opt(sbi->s_mount_opt, DEBUG); 3059 set_opt(sbi->s_mount_opt, DEBUG);
2629 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3060 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
@@ -2901,7 +3332,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2901 goto failed_mount2; 3332 goto failed_mount2;
2902 } 3333 }
2903 } 3334 }
2904 if (!ext4_check_descriptors(sb)) { 3335 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
2905 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3336 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2906 goto failed_mount2; 3337 goto failed_mount2;
2907 } 3338 }
@@ -3122,6 +3553,10 @@ no_journal:
3122 goto failed_mount4; 3553 goto failed_mount4;
3123 } 3554 }
3124 3555
3556 err = ext4_register_li_request(sb, first_not_zeroed);
3557 if (err)
3558 goto failed_mount4;
3559
3125 sbi->s_kobj.kset = ext4_kset; 3560 sbi->s_kobj.kset = ext4_kset;
3126 init_completion(&sbi->s_kobj_unregister); 3561 init_completion(&sbi->s_kobj_unregister);
3127 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3562 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
@@ -3461,7 +3896,7 @@ static int ext4_load_journal(struct super_block *sb,
3461 EXT4_SB(sb)->s_journal = journal; 3896 EXT4_SB(sb)->s_journal = journal;
3462 ext4_clear_journal_err(sb, es); 3897 ext4_clear_journal_err(sb, es);
3463 3898
3464 if (journal_devnum && 3899 if (!really_read_only && journal_devnum &&
3465 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3900 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3466 es->s_journal_dev = cpu_to_le32(journal_devnum); 3901 es->s_journal_dev = cpu_to_le32(journal_devnum);
3467 3902
@@ -3514,9 +3949,12 @@ static int ext4_commit_super(struct super_block *sb, int sync)
3514 else 3949 else
3515 es->s_kbytes_written = 3950 es->s_kbytes_written =
3516 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 3951 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
3517 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3952 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
3953 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3518 &EXT4_SB(sb)->s_freeblocks_counter)); 3954 &EXT4_SB(sb)->s_freeblocks_counter));
3519 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3955 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
3956 es->s_free_inodes_count =
3957 cpu_to_le32(percpu_counter_sum_positive(
3520 &EXT4_SB(sb)->s_freeinodes_counter)); 3958 &EXT4_SB(sb)->s_freeinodes_counter));
3521 sb->s_dirt = 0; 3959 sb->s_dirt = 0;
3522 BUFFER_TRACE(sbh, "marking dirty"); 3960 BUFFER_TRACE(sbh, "marking dirty");
@@ -3835,6 +4273,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3835 enable_quota = 1; 4273 enable_quota = 1;
3836 } 4274 }
3837 } 4275 }
4276
4277 /*
4278 * Reinitialize lazy itable initialization thread based on
4279 * current settings
4280 */
4281 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
4282 ext4_unregister_li_request(sb);
4283 else {
4284 ext4_group_t first_not_zeroed;
4285 first_not_zeroed = ext4_has_uninit_itable(sb);
4286 ext4_register_li_request(sb, first_not_zeroed);
4287 }
4288
3838 ext4_setup_system_zone(sb); 4289 ext4_setup_system_zone(sb);
3839 if (sbi->s_journal == NULL) 4290 if (sbi->s_journal == NULL)
3840 ext4_commit_super(sb, 1); 4291 ext4_commit_super(sb, 1);
@@ -4276,23 +4727,53 @@ static struct file_system_type ext4_fs_type = {
4276 .fs_flags = FS_REQUIRES_DEV, 4727 .fs_flags = FS_REQUIRES_DEV,
4277}; 4728};
4278 4729
4279static int __init init_ext4_fs(void) 4730int __init ext4_init_feat_adverts(void)
4731{
4732 struct ext4_features *ef;
4733 int ret = -ENOMEM;
4734
4735 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
4736 if (!ef)
4737 goto out;
4738
4739 ef->f_kobj.kset = ext4_kset;
4740 init_completion(&ef->f_kobj_unregister);
4741 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
4742 "features");
4743 if (ret) {
4744 kfree(ef);
4745 goto out;
4746 }
4747
4748 ext4_feat = ef;
4749 ret = 0;
4750out:
4751 return ret;
4752}
4753
4754static int __init ext4_init_fs(void)
4280{ 4755{
4281 int err; 4756 int err;
4282 4757
4283 ext4_check_flag_values(); 4758 ext4_check_flag_values();
4284 err = init_ext4_system_zone(); 4759 err = ext4_init_pageio();
4285 if (err) 4760 if (err)
4286 return err; 4761 return err;
4762 err = ext4_init_system_zone();
4763 if (err)
4764 goto out5;
4287 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4765 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4288 if (!ext4_kset) 4766 if (!ext4_kset)
4289 goto out4; 4767 goto out4;
4290 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4768 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4291 err = init_ext4_mballoc(); 4769
4770 err = ext4_init_feat_adverts();
4771
4772 err = ext4_init_mballoc();
4292 if (err) 4773 if (err)
4293 goto out3; 4774 goto out3;
4294 4775
4295 err = init_ext4_xattr(); 4776 err = ext4_init_xattr();
4296 if (err) 4777 if (err)
4297 goto out2; 4778 goto out2;
4298 err = init_inodecache(); 4779 err = init_inodecache();
@@ -4303,38 +4784,46 @@ static int __init init_ext4_fs(void)
4303 err = register_filesystem(&ext4_fs_type); 4784 err = register_filesystem(&ext4_fs_type);
4304 if (err) 4785 if (err)
4305 goto out; 4786 goto out;
4787
4788 ext4_li_info = NULL;
4789 mutex_init(&ext4_li_mtx);
4306 return 0; 4790 return 0;
4307out: 4791out:
4308 unregister_as_ext2(); 4792 unregister_as_ext2();
4309 unregister_as_ext3(); 4793 unregister_as_ext3();
4310 destroy_inodecache(); 4794 destroy_inodecache();
4311out1: 4795out1:
4312 exit_ext4_xattr(); 4796 ext4_exit_xattr();
4313out2: 4797out2:
4314 exit_ext4_mballoc(); 4798 ext4_exit_mballoc();
4315out3: 4799out3:
4800 kfree(ext4_feat);
4316 remove_proc_entry("fs/ext4", NULL); 4801 remove_proc_entry("fs/ext4", NULL);
4317 kset_unregister(ext4_kset); 4802 kset_unregister(ext4_kset);
4318out4: 4803out4:
4319 exit_ext4_system_zone(); 4804 ext4_exit_system_zone();
4805out5:
4806 ext4_exit_pageio();
4320 return err; 4807 return err;
4321} 4808}
4322 4809
4323static void __exit exit_ext4_fs(void) 4810static void __exit ext4_exit_fs(void)
4324{ 4811{
4812 ext4_destroy_lazyinit_thread();
4325 unregister_as_ext2(); 4813 unregister_as_ext2();
4326 unregister_as_ext3(); 4814 unregister_as_ext3();
4327 unregister_filesystem(&ext4_fs_type); 4815 unregister_filesystem(&ext4_fs_type);
4328 destroy_inodecache(); 4816 destroy_inodecache();
4329 exit_ext4_xattr(); 4817 ext4_exit_xattr();
4330 exit_ext4_mballoc(); 4818 ext4_exit_mballoc();
4331 remove_proc_entry("fs/ext4", NULL); 4819 remove_proc_entry("fs/ext4", NULL);
4332 kset_unregister(ext4_kset); 4820 kset_unregister(ext4_kset);
4333 exit_ext4_system_zone(); 4821 ext4_exit_system_zone();
4822 ext4_exit_pageio();
4334} 4823}
4335 4824
4336MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4825MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
4337MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4826MODULE_DESCRIPTION("Fourth Extended Filesystem");
4338MODULE_LICENSE("GPL"); 4827MODULE_LICENSE("GPL");
4339module_init(init_ext4_fs) 4828module_init(ext4_init_fs)
4340module_exit(exit_ext4_fs) 4829module_exit(ext4_exit_fs)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a8cd8dff1ad..fa4b899da4b3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1588#undef BLOCK_HASH_SHIFT 1588#undef BLOCK_HASH_SHIFT
1589 1589
1590int __init 1590int __init
1591init_ext4_xattr(void) 1591ext4_init_xattr(void)
1592{ 1592{
1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); 1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
1594 if (!ext4_xattr_cache) 1594 if (!ext4_xattr_cache)
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void)
1597} 1597}
1598 1598
1599void 1599void
1600exit_ext4_xattr(void) 1600ext4_exit_xattr(void)
1601{ 1601{
1602 if (ext4_xattr_cache) 1602 if (ext4_xattr_cache)
1603 mb_cache_destroy(ext4_xattr_cache); 1603 mb_cache_destroy(ext4_xattr_cache);
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 518e96e43905..281dd8353652 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
83extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 83extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
84 struct ext4_inode *raw_inode, handle_t *handle); 84 struct ext4_inode *raw_inode, handle_t *handle);
85 85
86extern int init_ext4_xattr(void); 86extern int __init ext4_init_xattr(void);
87extern void exit_ext4_xattr(void); 87extern void ext4_exit_xattr(void);
88 88
89extern const struct xattr_handler *ext4_xattr_handlers[]; 89extern const struct xattr_handler *ext4_xattr_handlers[];
90 90
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
121{ 121{
122} 122}
123 123
124static inline int 124static __init inline int
125init_ext4_xattr(void) 125init_ext4_xattr(void)
126{ 126{
127 return 0; 127 return 0;
128} 128}
129 129
130static inline void 130static inline void
131exit_ext4_xattr(void) 131ext4_exit_xattr(void)
132{ 132{
133} 133}
134 134
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f855ea4fc888..e92fdbb3bc3a 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp)
530 return thaw_super(sb); 530 return thaw_super(sb);
531} 531}
532 532
533static int ioctl_fstrim(struct file *filp, void __user *argp)
534{
535 struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
536 struct fstrim_range range;
537 int ret = 0;
538
539 if (!capable(CAP_SYS_ADMIN))
540 return -EPERM;
541
542 /* If filesystem doesn't support trim feature, return. */
543 if (sb->s_op->trim_fs == NULL)
544 return -EOPNOTSUPP;
545
546 /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
547 if (sb->s_bdev == NULL)
548 return -EINVAL;
549
550 if (argp == NULL) {
551 range.start = 0;
552 range.len = ULLONG_MAX;
553 range.minlen = 0;
554 } else if (copy_from_user(&range, argp, sizeof(range)))
555 return -EFAULT;
556
557 ret = sb->s_op->trim_fs(sb, &range);
558 if (ret < 0)
559 return ret;
560
561 if ((argp != NULL) &&
562 (copy_to_user(argp, &range, sizeof(range))))
563 return -EFAULT;
564
565 return 0;
566}
567
533/* 568/*
534 * When you add any new common ioctls to the switches above and below 569 * When you add any new common ioctls to the switches above and below
535 * please update compat_sys_ioctl() too. 570 * please update compat_sys_ioctl() too.
@@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
580 error = ioctl_fsthaw(filp); 615 error = ioctl_fsthaw(filp);
581 break; 616 break;
582 617
618 case FITRIM:
619 error = ioctl_fstrim(filp, argp);
620 break;
621
583 case FS_IOC_FIEMAP: 622 case FS_IOC_FIEMAP:
584 return ioctl_fiemap(filp, arg); 623 return ioctl_fiemap(filp, arg);
585 624
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6571a056e55d..6a79fd0a1a32 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
299 transaction->t_chp_stats.cs_forced_to_close++; 299 transaction->t_chp_stats.cs_forced_to_close++;
300 spin_unlock(&journal->j_list_lock); 300 spin_unlock(&journal->j_list_lock);
301 jbd_unlock_bh_state(bh); 301 jbd_unlock_bh_state(bh);
302 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
303 /*
304 * The journal thread is dead; so starting and
305 * waiting for a commit to finish will cause
306 * us to wait for a _very_ long time.
307 */
308 printk(KERN_ERR "JBD2: %s: "
309 "Waiting for Godot: block %llu\n",
310 journal->j_devname,
311 (unsigned long long) bh->b_blocknr);
302 jbd2_log_start_commit(journal, tid); 312 jbd2_log_start_commit(journal, tid);
303 jbd2_log_wait_commit(journal, tid); 313 jbd2_log_wait_commit(journal, tid);
304 ret = 1; 314 ret = 1;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index bc6be8bda1cc..f3ad1598b201 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
26#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
27#include <linux/bio.h> 27#include <linux/bio.h>
28#include <linux/blkdev.h> 28#include <linux/blkdev.h>
29#include <linux/bitops.h>
29#include <trace/events/jbd2.h> 30#include <trace/events/jbd2.h>
31#include <asm/system.h>
30 32
31/* 33/*
32 * Default IO end handler for temporary BJ_IO buffer_heads. 34 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal,
201 spin_lock(&journal->j_list_lock); 203 spin_lock(&journal->j_list_lock);
202 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 204 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
203 mapping = jinode->i_vfs_inode->i_mapping; 205 mapping = jinode->i_vfs_inode->i_mapping;
204 jinode->i_flags |= JI_COMMIT_RUNNING; 206 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
205 spin_unlock(&journal->j_list_lock); 207 spin_unlock(&journal->j_list_lock);
206 /* 208 /*
207 * submit the inode data buffers. We use writepage 209 * submit the inode data buffers. We use writepage
@@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal,
216 spin_lock(&journal->j_list_lock); 218 spin_lock(&journal->j_list_lock);
217 J_ASSERT(jinode->i_transaction == commit_transaction); 219 J_ASSERT(jinode->i_transaction == commit_transaction);
218 commit_transaction->t_flushed_data_blocks = 1; 220 commit_transaction->t_flushed_data_blocks = 1;
219 jinode->i_flags &= ~JI_COMMIT_RUNNING; 221 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
222 smp_mb__after_clear_bit();
220 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 223 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
221 } 224 }
222 spin_unlock(&journal->j_list_lock); 225 spin_unlock(&journal->j_list_lock);
@@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
237 /* For locking, see the comment in journal_submit_data_buffers() */ 240 /* For locking, see the comment in journal_submit_data_buffers() */
238 spin_lock(&journal->j_list_lock); 241 spin_lock(&journal->j_list_lock);
239 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 242 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
240 jinode->i_flags |= JI_COMMIT_RUNNING; 243 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
241 spin_unlock(&journal->j_list_lock); 244 spin_unlock(&journal->j_list_lock);
242 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 245 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
243 if (err) { 246 if (err) {
@@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
253 ret = err; 256 ret = err;
254 } 257 }
255 spin_lock(&journal->j_list_lock); 258 spin_lock(&journal->j_list_lock);
256 jinode->i_flags &= ~JI_COMMIT_RUNNING; 259 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
260 smp_mb__after_clear_bit();
257 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 261 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
258 } 262 }
259 263
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 168d1894317a..538417c1fdbb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
42#include <linux/log2.h> 42#include <linux/log2.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/bitops.h>
45 46
46#define CREATE_TRACE_POINTS 47#define CREATE_TRACE_POINTS
47#include <trace/events/jbd2.h> 48#include <trace/events/jbd2.h>
48 49
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/system.h>
51 53
52EXPORT_SYMBOL(jbd2_journal_extend); 54EXPORT_SYMBOL(jbd2_journal_extend);
53EXPORT_SYMBOL(jbd2_journal_stop); 55EXPORT_SYMBOL(jbd2_journal_stop);
@@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
2210restart: 2212restart:
2211 spin_lock(&journal->j_list_lock); 2213 spin_lock(&journal->j_list_lock);
2212 /* Is commit writing out inode - we have to wait */ 2214 /* Is commit writing out inode - we have to wait */
2213 if (jinode->i_flags & JI_COMMIT_RUNNING) { 2215 if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
2214 wait_queue_head_t *wq; 2216 wait_queue_head_t *wq;
2215 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 2217 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
2216 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 2218 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index f3479d6e0a83..6bf0a242613e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -156,6 +156,7 @@ alloc_transaction:
156 */ 156 */
157repeat: 157repeat:
158 read_lock(&journal->j_state_lock); 158 read_lock(&journal->j_state_lock);
159 BUG_ON(journal->j_flags & JBD2_UNMOUNT);
159 if (is_journal_aborted(journal) || 160 if (is_journal_aborted(journal) ||
160 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 161 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
161 read_unlock(&journal->j_state_lock); 162 read_unlock(&journal->j_state_lock);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 646b462d04df..5027a599077d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -891,6 +891,14 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block,
891 nr_blocks << (sb->s_blocksize_bits - 9), 891 nr_blocks << (sb->s_blocksize_bits - 9),
892 gfp_mask, flags); 892 gfp_mask, flags);
893} 893}
894static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
895 sector_t nr_blocks, gfp_t gfp_mask)
896{
897 return blkdev_issue_zeroout(sb->s_bdev,
898 block << (sb->s_blocksize_bits - 9),
899 nr_blocks << (sb->s_blocksize_bits - 9),
900 gfp_mask);
901}
894 902
895extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); 903extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
896 904
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b2a6009cba10..6ed7ace74b7c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -32,6 +32,12 @@
32#define SEEK_END 2 /* seek relative to end of file */ 32#define SEEK_END 2 /* seek relative to end of file */
33#define SEEK_MAX SEEK_END 33#define SEEK_MAX SEEK_END
34 34
35struct fstrim_range {
36 uint64_t start;
37 uint64_t len;
38 uint64_t minlen;
39};
40
35/* And dynamically-tunable limits and defaults: */ 41/* And dynamically-tunable limits and defaults: */
36struct files_stat_struct { 42struct files_stat_struct {
37 unsigned long nr_files; /* read only */ 43 unsigned long nr_files; /* read only */
@@ -317,6 +323,7 @@ struct inodes_stat_t {
317#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ 323#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
318#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ 324#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
319#define FITHAW _IOWR('X', 120, int) /* Thaw */ 325#define FITHAW _IOWR('X', 120, int) /* Thaw */
326#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
320 327
321#define FS_IOC_GETFLAGS _IOR('f', 1, long) 328#define FS_IOC_GETFLAGS _IOR('f', 1, long)
322#define FS_IOC_SETFLAGS _IOW('f', 2, long) 329#define FS_IOC_SETFLAGS _IOW('f', 2, long)
@@ -1604,6 +1611,7 @@ struct super_operations {
1604 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1611 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1605#endif 1612#endif
1606 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1613 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1614 int (*trim_fs) (struct super_block *, struct fstrim_range *);
1607}; 1615};
1608 1616
1609/* 1617/*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0b52924a0cb6..2ae86aa21fce 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -395,7 +395,7 @@ struct jbd2_inode {
395 struct inode *i_vfs_inode; 395 struct inode *i_vfs_inode;
396 396
397 /* Flags of inode [j_list_lock] */ 397 /* Flags of inode [j_list_lock] */
398 unsigned int i_flags; 398 unsigned long i_flags;
399}; 399};
400 400
401struct jbd2_revoke_table_s; 401struct jbd2_revoke_table_s;
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 8a7d510ffa9c..46f6ba56fa91 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -78,6 +78,11 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
78 return 1; 78 return 1;
79} 79}
80 80
81static inline int percpu_counter_initialized(struct percpu_counter *fbc)
82{
83 return (fbc->counters != NULL);
84}
85
81#else 86#else
82 87
83struct percpu_counter { 88struct percpu_counter {
@@ -143,6 +148,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
143 return percpu_counter_read(fbc); 148 return percpu_counter_read(fbc);
144} 149}
145 150
151static inline int percpu_counter_initialized(struct percpu_counter *fbc)
152{
153 return 1;
154}
155
146#endif /* CONFIG_SMP */ 156#endif /* CONFIG_SMP */
147 157
148static inline void percpu_counter_inc(struct percpu_counter *fbc) 158static inline void percpu_counter_inc(struct percpu_counter *fbc)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d5c7aaadda59..09eec350054d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -141,6 +141,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
141 141
142int generic_writepages(struct address_space *mapping, 142int generic_writepages(struct address_space *mapping,
143 struct writeback_control *wbc); 143 struct writeback_control *wbc);
144void tag_pages_for_writeback(struct address_space *mapping,
145 pgoff_t start, pgoff_t end);
144int write_cache_pages(struct address_space *mapping, 146int write_cache_pages(struct address_space *mapping,
145 struct writeback_control *wbc, writepage_t writepage, 147 struct writeback_control *wbc, writepage_t writepage,
146 void *data); 148 void *data);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6bcb00645de4..289010d3270b 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -21,7 +21,8 @@ TRACE_EVENT(ext4_free_inode,
21 TP_ARGS(inode), 21 TP_ARGS(inode),
22 22
23 TP_STRUCT__entry( 23 TP_STRUCT__entry(
24 __field( dev_t, dev ) 24 __field( int, dev_major )
25 __field( int, dev_minor )
25 __field( ino_t, ino ) 26 __field( ino_t, ino )
26 __field( umode_t, mode ) 27 __field( umode_t, mode )
27 __field( uid_t, uid ) 28 __field( uid_t, uid )
@@ -30,7 +31,8 @@ TRACE_EVENT(ext4_free_inode,
30 ), 31 ),
31 32
32 TP_fast_assign( 33 TP_fast_assign(
33 __entry->dev = inode->i_sb->s_dev; 34 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
35 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
34 __entry->ino = inode->i_ino; 36 __entry->ino = inode->i_ino;
35 __entry->mode = inode->i_mode; 37 __entry->mode = inode->i_mode;
36 __entry->uid = inode->i_uid; 38 __entry->uid = inode->i_uid;
@@ -38,9 +40,10 @@ TRACE_EVENT(ext4_free_inode,
38 __entry->blocks = inode->i_blocks; 40 __entry->blocks = inode->i_blocks;
39 ), 41 ),
40 42
41 TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu", 43 TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
42 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 44 __entry->dev_major, __entry->dev_minor,
43 __entry->mode, __entry->uid, __entry->gid, 45 (unsigned long) __entry->ino, __entry->mode,
46 __entry->uid, __entry->gid,
44 (unsigned long long) __entry->blocks) 47 (unsigned long long) __entry->blocks)
45); 48);
46 49
@@ -50,20 +53,22 @@ TRACE_EVENT(ext4_request_inode,
50 TP_ARGS(dir, mode), 53 TP_ARGS(dir, mode),
51 54
52 TP_STRUCT__entry( 55 TP_STRUCT__entry(
53 __field( dev_t, dev ) 56 __field( int, dev_major )
57 __field( int, dev_minor )
54 __field( ino_t, dir ) 58 __field( ino_t, dir )
55 __field( umode_t, mode ) 59 __field( umode_t, mode )
56 ), 60 ),
57 61
58 TP_fast_assign( 62 TP_fast_assign(
59 __entry->dev = dir->i_sb->s_dev; 63 __entry->dev_major = MAJOR(dir->i_sb->s_dev);
64 __entry->dev_minor = MINOR(dir->i_sb->s_dev);
60 __entry->dir = dir->i_ino; 65 __entry->dir = dir->i_ino;
61 __entry->mode = mode; 66 __entry->mode = mode;
62 ), 67 ),
63 68
64 TP_printk("dev %s dir %lu mode 0%o", 69 TP_printk("dev %d,%d dir %lu mode 0%o",
65 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir, 70 __entry->dev_major, __entry->dev_minor,
66 __entry->mode) 71 (unsigned long) __entry->dir, __entry->mode)
67); 72);
68 73
69TRACE_EVENT(ext4_allocate_inode, 74TRACE_EVENT(ext4_allocate_inode,
@@ -72,21 +77,24 @@ TRACE_EVENT(ext4_allocate_inode,
72 TP_ARGS(inode, dir, mode), 77 TP_ARGS(inode, dir, mode),
73 78
74 TP_STRUCT__entry( 79 TP_STRUCT__entry(
75 __field( dev_t, dev ) 80 __field( int, dev_major )
81 __field( int, dev_minor )
76 __field( ino_t, ino ) 82 __field( ino_t, ino )
77 __field( ino_t, dir ) 83 __field( ino_t, dir )
78 __field( umode_t, mode ) 84 __field( umode_t, mode )
79 ), 85 ),
80 86
81 TP_fast_assign( 87 TP_fast_assign(
82 __entry->dev = inode->i_sb->s_dev; 88 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
89 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
83 __entry->ino = inode->i_ino; 90 __entry->ino = inode->i_ino;
84 __entry->dir = dir->i_ino; 91 __entry->dir = dir->i_ino;
85 __entry->mode = mode; 92 __entry->mode = mode;
86 ), 93 ),
87 94
88 TP_printk("dev %s ino %lu dir %lu mode 0%o", 95 TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
89 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 96 __entry->dev_major, __entry->dev_minor,
97 (unsigned long) __entry->ino,
90 (unsigned long) __entry->dir, __entry->mode) 98 (unsigned long) __entry->dir, __entry->mode)
91); 99);
92 100
@@ -98,7 +106,8 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
98 TP_ARGS(inode, pos, len, flags), 106 TP_ARGS(inode, pos, len, flags),
99 107
100 TP_STRUCT__entry( 108 TP_STRUCT__entry(
101 __field( dev_t, dev ) 109 __field( int, dev_major )
110 __field( int, dev_minor )
102 __field( ino_t, ino ) 111 __field( ino_t, ino )
103 __field( loff_t, pos ) 112 __field( loff_t, pos )
104 __field( unsigned int, len ) 113 __field( unsigned int, len )
@@ -106,15 +115,17 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
106 ), 115 ),
107 116
108 TP_fast_assign( 117 TP_fast_assign(
109 __entry->dev = inode->i_sb->s_dev; 118 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
119 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
110 __entry->ino = inode->i_ino; 120 __entry->ino = inode->i_ino;
111 __entry->pos = pos; 121 __entry->pos = pos;
112 __entry->len = len; 122 __entry->len = len;
113 __entry->flags = flags; 123 __entry->flags = flags;
114 ), 124 ),
115 125
116 TP_printk("dev %s ino %lu pos %llu len %u flags %u", 126 TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
117 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 127 __entry->dev_major, __entry->dev_minor,
128 (unsigned long) __entry->ino,
118 __entry->pos, __entry->len, __entry->flags) 129 __entry->pos, __entry->len, __entry->flags)
119); 130);
120 131
@@ -141,7 +152,8 @@ DECLARE_EVENT_CLASS(ext4__write_end,
141 TP_ARGS(inode, pos, len, copied), 152 TP_ARGS(inode, pos, len, copied),
142 153
143 TP_STRUCT__entry( 154 TP_STRUCT__entry(
144 __field( dev_t, dev ) 155 __field( int, dev_major )
156 __field( int, dev_minor )
145 __field( ino_t, ino ) 157 __field( ino_t, ino )
146 __field( loff_t, pos ) 158 __field( loff_t, pos )
147 __field( unsigned int, len ) 159 __field( unsigned int, len )
@@ -149,16 +161,18 @@ DECLARE_EVENT_CLASS(ext4__write_end,
149 ), 161 ),
150 162
151 TP_fast_assign( 163 TP_fast_assign(
152 __entry->dev = inode->i_sb->s_dev; 164 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
165 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
153 __entry->ino = inode->i_ino; 166 __entry->ino = inode->i_ino;
154 __entry->pos = pos; 167 __entry->pos = pos;
155 __entry->len = len; 168 __entry->len = len;
156 __entry->copied = copied; 169 __entry->copied = copied;
157 ), 170 ),
158 171
159 TP_printk("dev %s ino %lu pos %llu len %u copied %u", 172 TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
160 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 173 __entry->dev_major, __entry->dev_minor,
161 __entry->pos, __entry->len, __entry->copied) 174 (unsigned long) __entry->ino, __entry->pos,
175 __entry->len, __entry->copied)
162); 176);
163 177
164DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, 178DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
@@ -199,21 +213,23 @@ TRACE_EVENT(ext4_writepage,
199 TP_ARGS(inode, page), 213 TP_ARGS(inode, page),
200 214
201 TP_STRUCT__entry( 215 TP_STRUCT__entry(
202 __field( dev_t, dev ) 216 __field( int, dev_major )
217 __field( int, dev_minor )
203 __field( ino_t, ino ) 218 __field( ino_t, ino )
204 __field( pgoff_t, index ) 219 __field( pgoff_t, index )
205 220
206 ), 221 ),
207 222
208 TP_fast_assign( 223 TP_fast_assign(
209 __entry->dev = inode->i_sb->s_dev; 224 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
225 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
210 __entry->ino = inode->i_ino; 226 __entry->ino = inode->i_ino;
211 __entry->index = page->index; 227 __entry->index = page->index;
212 ), 228 ),
213 229
214 TP_printk("dev %s ino %lu page_index %lu", 230 TP_printk("dev %d,%d ino %lu page_index %lu",
215 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 231 __entry->dev_major, __entry->dev_minor,
216 __entry->index) 232 (unsigned long) __entry->ino, __entry->index)
217); 233);
218 234
219TRACE_EVENT(ext4_da_writepages, 235TRACE_EVENT(ext4_da_writepages,
@@ -222,13 +238,13 @@ TRACE_EVENT(ext4_da_writepages,
222 TP_ARGS(inode, wbc), 238 TP_ARGS(inode, wbc),
223 239
224 TP_STRUCT__entry( 240 TP_STRUCT__entry(
225 __field( dev_t, dev ) 241 __field( int, dev_major )
242 __field( int, dev_minor )
226 __field( ino_t, ino ) 243 __field( ino_t, ino )
227 __field( long, nr_to_write ) 244 __field( long, nr_to_write )
228 __field( long, pages_skipped ) 245 __field( long, pages_skipped )
229 __field( loff_t, range_start ) 246 __field( loff_t, range_start )
230 __field( loff_t, range_end ) 247 __field( loff_t, range_end )
231 __field( char, nonblocking )
232 __field( char, for_kupdate ) 248 __field( char, for_kupdate )
233 __field( char, for_reclaim ) 249 __field( char, for_reclaim )
234 __field( char, range_cyclic ) 250 __field( char, range_cyclic )
@@ -236,7 +252,8 @@ TRACE_EVENT(ext4_da_writepages,
236 ), 252 ),
237 253
238 TP_fast_assign( 254 TP_fast_assign(
239 __entry->dev = inode->i_sb->s_dev; 255 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
256 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
240 __entry->ino = inode->i_ino; 257 __entry->ino = inode->i_ino;
241 __entry->nr_to_write = wbc->nr_to_write; 258 __entry->nr_to_write = wbc->nr_to_write;
242 __entry->pages_skipped = wbc->pages_skipped; 259 __entry->pages_skipped = wbc->pages_skipped;
@@ -248,11 +265,11 @@ TRACE_EVENT(ext4_da_writepages,
248 __entry->writeback_index = inode->i_mapping->writeback_index; 265 __entry->writeback_index = inode->i_mapping->writeback_index;
249 ), 266 ),
250 267
251 TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld " 268 TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
252 "range_start %llu range_end %llu " 269 "range_start %llu range_end %llu "
253 "for_kupdate %d for_reclaim %d " 270 "for_kupdate %d for_reclaim %d "
254 "range_cyclic %d writeback_index %lu", 271 "range_cyclic %d writeback_index %lu",
255 jbd2_dev_to_name(__entry->dev), 272 __entry->dev_major, __entry->dev_minor,
256 (unsigned long) __entry->ino, __entry->nr_to_write, 273 (unsigned long) __entry->ino, __entry->nr_to_write,
257 __entry->pages_skipped, __entry->range_start, 274 __entry->pages_skipped, __entry->range_start,
258 __entry->range_end, 275 __entry->range_end,
@@ -267,7 +284,8 @@ TRACE_EVENT(ext4_da_write_pages,
267 TP_ARGS(inode, mpd), 284 TP_ARGS(inode, mpd),
268 285
269 TP_STRUCT__entry( 286 TP_STRUCT__entry(
270 __field( dev_t, dev ) 287 __field( int, dev_major )
288 __field( int, dev_minor )
271 __field( ino_t, ino ) 289 __field( ino_t, ino )
272 __field( __u64, b_blocknr ) 290 __field( __u64, b_blocknr )
273 __field( __u32, b_size ) 291 __field( __u32, b_size )
@@ -278,7 +296,8 @@ TRACE_EVENT(ext4_da_write_pages,
278 ), 296 ),
279 297
280 TP_fast_assign( 298 TP_fast_assign(
281 __entry->dev = inode->i_sb->s_dev; 299 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
300 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
282 __entry->ino = inode->i_ino; 301 __entry->ino = inode->i_ino;
283 __entry->b_blocknr = mpd->b_blocknr; 302 __entry->b_blocknr = mpd->b_blocknr;
284 __entry->b_size = mpd->b_size; 303 __entry->b_size = mpd->b_size;
@@ -288,8 +307,9 @@ TRACE_EVENT(ext4_da_write_pages,
288 __entry->pages_written = mpd->pages_written; 307 __entry->pages_written = mpd->pages_written;
289 ), 308 ),
290 309
291 TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", 310 TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
292 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 311 __entry->dev_major, __entry->dev_minor,
312 (unsigned long) __entry->ino,
293 __entry->b_blocknr, __entry->b_size, 313 __entry->b_blocknr, __entry->b_size,
294 __entry->b_state, __entry->first_page, 314 __entry->b_state, __entry->first_page,
295 __entry->io_done, __entry->pages_written) 315 __entry->io_done, __entry->pages_written)
@@ -302,7 +322,8 @@ TRACE_EVENT(ext4_da_writepages_result,
302 TP_ARGS(inode, wbc, ret, pages_written), 322 TP_ARGS(inode, wbc, ret, pages_written),
303 323
304 TP_STRUCT__entry( 324 TP_STRUCT__entry(
305 __field( dev_t, dev ) 325 __field( int, dev_major )
326 __field( int, dev_minor )
306 __field( ino_t, ino ) 327 __field( ino_t, ino )
307 __field( int, ret ) 328 __field( int, ret )
308 __field( int, pages_written ) 329 __field( int, pages_written )
@@ -312,7 +333,8 @@ TRACE_EVENT(ext4_da_writepages_result,
312 ), 333 ),
313 334
314 TP_fast_assign( 335 TP_fast_assign(
315 __entry->dev = inode->i_sb->s_dev; 336 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
337 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
316 __entry->ino = inode->i_ino; 338 __entry->ino = inode->i_ino;
317 __entry->ret = ret; 339 __entry->ret = ret;
318 __entry->pages_written = pages_written; 340 __entry->pages_written = pages_written;
@@ -321,8 +343,8 @@ TRACE_EVENT(ext4_da_writepages_result,
321 __entry->writeback_index = inode->i_mapping->writeback_index; 343 __entry->writeback_index = inode->i_mapping->writeback_index;
322 ), 344 ),
323 345
324 TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu", 346 TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
325 jbd2_dev_to_name(__entry->dev), 347 __entry->dev_major, __entry->dev_minor,
326 (unsigned long) __entry->ino, __entry->ret, 348 (unsigned long) __entry->ino, __entry->ret,
327 __entry->pages_written, __entry->pages_skipped, 349 __entry->pages_written, __entry->pages_skipped,
328 __entry->more_io, 350 __entry->more_io,
@@ -336,20 +358,23 @@ TRACE_EVENT(ext4_discard_blocks,
336 TP_ARGS(sb, blk, count), 358 TP_ARGS(sb, blk, count),
337 359
338 TP_STRUCT__entry( 360 TP_STRUCT__entry(
339 __field( dev_t, dev ) 361 __field( int, dev_major )
362 __field( int, dev_minor )
340 __field( __u64, blk ) 363 __field( __u64, blk )
341 __field( __u64, count ) 364 __field( __u64, count )
342 365
343 ), 366 ),
344 367
345 TP_fast_assign( 368 TP_fast_assign(
346 __entry->dev = sb->s_dev; 369 __entry->dev_major = MAJOR(sb->s_dev);
370 __entry->dev_minor = MINOR(sb->s_dev);
347 __entry->blk = blk; 371 __entry->blk = blk;
348 __entry->count = count; 372 __entry->count = count;
349 ), 373 ),
350 374
351 TP_printk("dev %s blk %llu count %llu", 375 TP_printk("dev %d,%d blk %llu count %llu",
352 jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count) 376 __entry->dev_major, __entry->dev_minor,
377 __entry->blk, __entry->count)
353); 378);
354 379
355DECLARE_EVENT_CLASS(ext4__mb_new_pa, 380DECLARE_EVENT_CLASS(ext4__mb_new_pa,
@@ -359,7 +384,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
359 TP_ARGS(ac, pa), 384 TP_ARGS(ac, pa),
360 385
361 TP_STRUCT__entry( 386 TP_STRUCT__entry(
362 __field( dev_t, dev ) 387 __field( int, dev_major )
388 __field( int, dev_minor )
363 __field( ino_t, ino ) 389 __field( ino_t, ino )
364 __field( __u64, pa_pstart ) 390 __field( __u64, pa_pstart )
365 __field( __u32, pa_len ) 391 __field( __u32, pa_len )
@@ -368,16 +394,18 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
368 ), 394 ),
369 395
370 TP_fast_assign( 396 TP_fast_assign(
371 __entry->dev = ac->ac_sb->s_dev; 397 __entry->dev_major = MAJOR(ac->ac_sb->s_dev);
398 __entry->dev_minor = MINOR(ac->ac_sb->s_dev);
372 __entry->ino = ac->ac_inode->i_ino; 399 __entry->ino = ac->ac_inode->i_ino;
373 __entry->pa_pstart = pa->pa_pstart; 400 __entry->pa_pstart = pa->pa_pstart;
374 __entry->pa_len = pa->pa_len; 401 __entry->pa_len = pa->pa_len;
375 __entry->pa_lstart = pa->pa_lstart; 402 __entry->pa_lstart = pa->pa_lstart;
376 ), 403 ),
377 404
378 TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", 405 TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
379 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 406 __entry->dev_major, __entry->dev_minor,
380 __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) 407 (unsigned long) __entry->ino, __entry->pa_pstart,
408 __entry->pa_len, __entry->pa_lstart)
381); 409);
382 410
383DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa, 411DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
@@ -398,14 +426,15 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
398 426
399TRACE_EVENT(ext4_mb_release_inode_pa, 427TRACE_EVENT(ext4_mb_release_inode_pa,
400 TP_PROTO(struct super_block *sb, 428 TP_PROTO(struct super_block *sb,
401 struct ext4_allocation_context *ac, 429 struct inode *inode,
402 struct ext4_prealloc_space *pa, 430 struct ext4_prealloc_space *pa,
403 unsigned long long block, unsigned int count), 431 unsigned long long block, unsigned int count),
404 432
405 TP_ARGS(sb, ac, pa, block, count), 433 TP_ARGS(sb, inode, pa, block, count),
406 434
407 TP_STRUCT__entry( 435 TP_STRUCT__entry(
408 __field( dev_t, dev ) 436 __field( int, dev_major )
437 __field( int, dev_minor )
409 __field( ino_t, ino ) 438 __field( ino_t, ino )
410 __field( __u64, block ) 439 __field( __u64, block )
411 __field( __u32, count ) 440 __field( __u32, count )
@@ -413,43 +442,42 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
413 ), 442 ),
414 443
415 TP_fast_assign( 444 TP_fast_assign(
416 __entry->dev = sb->s_dev; 445 __entry->dev_major = MAJOR(sb->s_dev);
417 __entry->ino = (ac && ac->ac_inode) ? 446 __entry->dev_minor = MINOR(sb->s_dev);
418 ac->ac_inode->i_ino : 0; 447 __entry->ino = inode->i_ino;
419 __entry->block = block; 448 __entry->block = block;
420 __entry->count = count; 449 __entry->count = count;
421 ), 450 ),
422 451
423 TP_printk("dev %s ino %lu block %llu count %u", 452 TP_printk("dev %d,%d ino %lu block %llu count %u",
424 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 453 __entry->dev_major, __entry->dev_minor,
425 __entry->block, __entry->count) 454 (unsigned long) __entry->ino, __entry->block, __entry->count)
426); 455);
427 456
428TRACE_EVENT(ext4_mb_release_group_pa, 457TRACE_EVENT(ext4_mb_release_group_pa,
429 TP_PROTO(struct super_block *sb, 458 TP_PROTO(struct super_block *sb,
430 struct ext4_allocation_context *ac,
431 struct ext4_prealloc_space *pa), 459 struct ext4_prealloc_space *pa),
432 460
433 TP_ARGS(sb, ac, pa), 461 TP_ARGS(sb, pa),
434 462
435 TP_STRUCT__entry( 463 TP_STRUCT__entry(
436 __field( dev_t, dev ) 464 __field( int, dev_major )
437 __field( ino_t, ino ) 465 __field( int, dev_minor )
438 __field( __u64, pa_pstart ) 466 __field( __u64, pa_pstart )
439 __field( __u32, pa_len ) 467 __field( __u32, pa_len )
440 468
441 ), 469 ),
442 470
443 TP_fast_assign( 471 TP_fast_assign(
444 __entry->dev = sb->s_dev; 472 __entry->dev_major = MAJOR(sb->s_dev);
445 __entry->ino = (ac && ac->ac_inode) ? 473 __entry->dev_minor = MINOR(sb->s_dev);
446 ac->ac_inode->i_ino : 0;
447 __entry->pa_pstart = pa->pa_pstart; 474 __entry->pa_pstart = pa->pa_pstart;
448 __entry->pa_len = pa->pa_len; 475 __entry->pa_len = pa->pa_len;
449 ), 476 ),
450 477
451 TP_printk("dev %s pstart %llu len %u", 478 TP_printk("dev %d,%d pstart %llu len %u",
452 jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len) 479 __entry->dev_major, __entry->dev_minor,
480 __entry->pa_pstart, __entry->pa_len)
453); 481);
454 482
455TRACE_EVENT(ext4_discard_preallocations, 483TRACE_EVENT(ext4_discard_preallocations,
@@ -458,18 +486,21 @@ TRACE_EVENT(ext4_discard_preallocations,
458 TP_ARGS(inode), 486 TP_ARGS(inode),
459 487
460 TP_STRUCT__entry( 488 TP_STRUCT__entry(
461 __field( dev_t, dev ) 489 __field( int, dev_major )
490 __field( int, dev_minor )
462 __field( ino_t, ino ) 491 __field( ino_t, ino )
463 492
464 ), 493 ),
465 494
466 TP_fast_assign( 495 TP_fast_assign(
467 __entry->dev = inode->i_sb->s_dev; 496 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
497 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
468 __entry->ino = inode->i_ino; 498 __entry->ino = inode->i_ino;
469 ), 499 ),
470 500
471 TP_printk("dev %s ino %lu", 501 TP_printk("dev %d,%d ino %lu",
472 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) 502 __entry->dev_major, __entry->dev_minor,
503 (unsigned long) __entry->ino)
473); 504);
474 505
475TRACE_EVENT(ext4_mb_discard_preallocations, 506TRACE_EVENT(ext4_mb_discard_preallocations,
@@ -478,18 +509,20 @@ TRACE_EVENT(ext4_mb_discard_preallocations,
478 TP_ARGS(sb, needed), 509 TP_ARGS(sb, needed),
479 510
480 TP_STRUCT__entry( 511 TP_STRUCT__entry(
481 __field( dev_t, dev ) 512 __field( int, dev_major )
513 __field( int, dev_minor )
482 __field( int, needed ) 514 __field( int, needed )
483 515
484 ), 516 ),
485 517
486 TP_fast_assign( 518 TP_fast_assign(
487 __entry->dev = sb->s_dev; 519 __entry->dev_major = MAJOR(sb->s_dev);
520 __entry->dev_minor = MINOR(sb->s_dev);
488 __entry->needed = needed; 521 __entry->needed = needed;
489 ), 522 ),
490 523
491 TP_printk("dev %s needed %d", 524 TP_printk("dev %d,%d needed %d",
492 jbd2_dev_to_name(__entry->dev), __entry->needed) 525 __entry->dev_major, __entry->dev_minor, __entry->needed)
493); 526);
494 527
495TRACE_EVENT(ext4_request_blocks, 528TRACE_EVENT(ext4_request_blocks,
@@ -498,7 +531,8 @@ TRACE_EVENT(ext4_request_blocks,
498 TP_ARGS(ar), 531 TP_ARGS(ar),
499 532
500 TP_STRUCT__entry( 533 TP_STRUCT__entry(
501 __field( dev_t, dev ) 534 __field( int, dev_major )
535 __field( int, dev_minor )
502 __field( ino_t, ino ) 536 __field( ino_t, ino )
503 __field( unsigned int, flags ) 537 __field( unsigned int, flags )
504 __field( unsigned int, len ) 538 __field( unsigned int, len )
@@ -511,7 +545,8 @@ TRACE_EVENT(ext4_request_blocks,
511 ), 545 ),
512 546
513 TP_fast_assign( 547 TP_fast_assign(
514 __entry->dev = ar->inode->i_sb->s_dev; 548 __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
549 __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
515 __entry->ino = ar->inode->i_ino; 550 __entry->ino = ar->inode->i_ino;
516 __entry->flags = ar->flags; 551 __entry->flags = ar->flags;
517 __entry->len = ar->len; 552 __entry->len = ar->len;
@@ -523,8 +558,9 @@ TRACE_EVENT(ext4_request_blocks,
523 __entry->pright = ar->pright; 558 __entry->pright = ar->pright;
524 ), 559 ),
525 560
526 TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", 561 TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
527 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 562 __entry->dev_major, __entry->dev_minor,
563 (unsigned long) __entry->ino,
528 __entry->flags, __entry->len, 564 __entry->flags, __entry->len,
529 (unsigned long long) __entry->logical, 565 (unsigned long long) __entry->logical,
530 (unsigned long long) __entry->goal, 566 (unsigned long long) __entry->goal,
@@ -540,7 +576,8 @@ TRACE_EVENT(ext4_allocate_blocks,
540 TP_ARGS(ar, block), 576 TP_ARGS(ar, block),
541 577
542 TP_STRUCT__entry( 578 TP_STRUCT__entry(
543 __field( dev_t, dev ) 579 __field( int, dev_major )
580 __field( int, dev_minor )
544 __field( ino_t, ino ) 581 __field( ino_t, ino )
545 __field( __u64, block ) 582 __field( __u64, block )
546 __field( unsigned int, flags ) 583 __field( unsigned int, flags )
@@ -554,7 +591,8 @@ TRACE_EVENT(ext4_allocate_blocks,
554 ), 591 ),
555 592
556 TP_fast_assign( 593 TP_fast_assign(
557 __entry->dev = ar->inode->i_sb->s_dev; 594 __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
595 __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
558 __entry->ino = ar->inode->i_ino; 596 __entry->ino = ar->inode->i_ino;
559 __entry->block = block; 597 __entry->block = block;
560 __entry->flags = ar->flags; 598 __entry->flags = ar->flags;
@@ -567,9 +605,10 @@ TRACE_EVENT(ext4_allocate_blocks,
567 __entry->pright = ar->pright; 605 __entry->pright = ar->pright;
568 ), 606 ),
569 607
570 TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", 608 TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
571 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 609 __entry->dev_major, __entry->dev_minor,
572 __entry->flags, __entry->len, __entry->block, 610 (unsigned long) __entry->ino, __entry->flags,
611 __entry->len, __entry->block,
573 (unsigned long long) __entry->logical, 612 (unsigned long long) __entry->logical,
574 (unsigned long long) __entry->goal, 613 (unsigned long long) __entry->goal,
575 (unsigned long long) __entry->lleft, 614 (unsigned long long) __entry->lleft,
@@ -585,7 +624,8 @@ TRACE_EVENT(ext4_free_blocks,
585 TP_ARGS(inode, block, count, flags), 624 TP_ARGS(inode, block, count, flags),
586 625
587 TP_STRUCT__entry( 626 TP_STRUCT__entry(
588 __field( dev_t, dev ) 627 __field( int, dev_major )
628 __field( int, dev_minor )
589 __field( ino_t, ino ) 629 __field( ino_t, ino )
590 __field( umode_t, mode ) 630 __field( umode_t, mode )
591 __field( __u64, block ) 631 __field( __u64, block )
@@ -594,7 +634,8 @@ TRACE_EVENT(ext4_free_blocks,
594 ), 634 ),
595 635
596 TP_fast_assign( 636 TP_fast_assign(
597 __entry->dev = inode->i_sb->s_dev; 637 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
638 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
598 __entry->ino = inode->i_ino; 639 __entry->ino = inode->i_ino;
599 __entry->mode = inode->i_mode; 640 __entry->mode = inode->i_mode;
600 __entry->block = block; 641 __entry->block = block;
@@ -602,8 +643,9 @@ TRACE_EVENT(ext4_free_blocks,
602 __entry->flags = flags; 643 __entry->flags = flags;
603 ), 644 ),
604 645
605 TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d", 646 TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
606 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 647 __entry->dev_major, __entry->dev_minor,
648 (unsigned long) __entry->ino,
607 __entry->mode, __entry->block, __entry->count, 649 __entry->mode, __entry->block, __entry->count,
608 __entry->flags) 650 __entry->flags)
609); 651);
@@ -614,7 +656,8 @@ TRACE_EVENT(ext4_sync_file,
614 TP_ARGS(file, datasync), 656 TP_ARGS(file, datasync),
615 657
616 TP_STRUCT__entry( 658 TP_STRUCT__entry(
617 __field( dev_t, dev ) 659 __field( int, dev_major )
660 __field( int, dev_minor )
618 __field( ino_t, ino ) 661 __field( ino_t, ino )
619 __field( ino_t, parent ) 662 __field( ino_t, parent )
620 __field( int, datasync ) 663 __field( int, datasync )
@@ -623,14 +666,16 @@ TRACE_EVENT(ext4_sync_file,
623 TP_fast_assign( 666 TP_fast_assign(
624 struct dentry *dentry = file->f_path.dentry; 667 struct dentry *dentry = file->f_path.dentry;
625 668
626 __entry->dev = dentry->d_inode->i_sb->s_dev; 669 __entry->dev_major = MAJOR(dentry->d_inode->i_sb->s_dev);
670 __entry->dev_minor = MINOR(dentry->d_inode->i_sb->s_dev);
627 __entry->ino = dentry->d_inode->i_ino; 671 __entry->ino = dentry->d_inode->i_ino;
628 __entry->datasync = datasync; 672 __entry->datasync = datasync;
629 __entry->parent = dentry->d_parent->d_inode->i_ino; 673 __entry->parent = dentry->d_parent->d_inode->i_ino;
630 ), 674 ),
631 675
632 TP_printk("dev %s ino %ld parent %ld datasync %d ", 676 TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
633 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 677 __entry->dev_major, __entry->dev_minor,
678 (unsigned long) __entry->ino,
634 (unsigned long) __entry->parent, __entry->datasync) 679 (unsigned long) __entry->parent, __entry->datasync)
635); 680);
636 681
@@ -640,18 +685,20 @@ TRACE_EVENT(ext4_sync_fs,
640 TP_ARGS(sb, wait), 685 TP_ARGS(sb, wait),
641 686
642 TP_STRUCT__entry( 687 TP_STRUCT__entry(
643 __field( dev_t, dev ) 688 __field( int, dev_major )
689 __field( int, dev_minor )
644 __field( int, wait ) 690 __field( int, wait )
645 691
646 ), 692 ),
647 693
648 TP_fast_assign( 694 TP_fast_assign(
649 __entry->dev = sb->s_dev; 695 __entry->dev_major = MAJOR(sb->s_dev);
696 __entry->dev_minor = MINOR(sb->s_dev);
650 __entry->wait = wait; 697 __entry->wait = wait;
651 ), 698 ),
652 699
653 TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev), 700 TP_printk("dev %d,%d wait %d", __entry->dev_major,
654 __entry->wait) 701 __entry->dev_minor, __entry->wait)
655); 702);
656 703
657TRACE_EVENT(ext4_alloc_da_blocks, 704TRACE_EVENT(ext4_alloc_da_blocks,
@@ -660,21 +707,24 @@ TRACE_EVENT(ext4_alloc_da_blocks,
660 TP_ARGS(inode), 707 TP_ARGS(inode),
661 708
662 TP_STRUCT__entry( 709 TP_STRUCT__entry(
663 __field( dev_t, dev ) 710 __field( int, dev_major )
711 __field( int, dev_minor )
664 __field( ino_t, ino ) 712 __field( ino_t, ino )
665 __field( unsigned int, data_blocks ) 713 __field( unsigned int, data_blocks )
666 __field( unsigned int, meta_blocks ) 714 __field( unsigned int, meta_blocks )
667 ), 715 ),
668 716
669 TP_fast_assign( 717 TP_fast_assign(
670 __entry->dev = inode->i_sb->s_dev; 718 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
719 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
671 __entry->ino = inode->i_ino; 720 __entry->ino = inode->i_ino;
672 __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; 721 __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
673 __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; 722 __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
674 ), 723 ),
675 724
676 TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u", 725 TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
677 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 726 __entry->dev_major, __entry->dev_minor,
727 (unsigned long) __entry->ino,
678 __entry->data_blocks, __entry->meta_blocks) 728 __entry->data_blocks, __entry->meta_blocks)
679); 729);
680 730
@@ -684,7 +734,8 @@ TRACE_EVENT(ext4_mballoc_alloc,
684 TP_ARGS(ac), 734 TP_ARGS(ac),
685 735
686 TP_STRUCT__entry( 736 TP_STRUCT__entry(
687 __field( dev_t, dev ) 737 __field( int, dev_major )
738 __field( int, dev_minor )
688 __field( ino_t, ino ) 739 __field( ino_t, ino )
689 __field( __u16, found ) 740 __field( __u16, found )
690 __field( __u16, groups ) 741 __field( __u16, groups )
@@ -707,7 +758,8 @@ TRACE_EVENT(ext4_mballoc_alloc,
707 ), 758 ),
708 759
709 TP_fast_assign( 760 TP_fast_assign(
710 __entry->dev = ac->ac_inode->i_sb->s_dev; 761 __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
762 __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
711 __entry->ino = ac->ac_inode->i_ino; 763 __entry->ino = ac->ac_inode->i_ino;
712 __entry->found = ac->ac_found; 764 __entry->found = ac->ac_found;
713 __entry->flags = ac->ac_flags; 765 __entry->flags = ac->ac_flags;
@@ -729,10 +781,11 @@ TRACE_EVENT(ext4_mballoc_alloc,
729 __entry->result_len = ac->ac_f_ex.fe_len; 781 __entry->result_len = ac->ac_f_ex.fe_len;
730 ), 782 ),
731 783
732 TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " 784 TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
733 "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x " 785 "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
734 "tail %u broken %u", 786 "tail %u broken %u",
735 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 787 __entry->dev_major, __entry->dev_minor,
788 (unsigned long) __entry->ino,
736 __entry->orig_group, __entry->orig_start, 789 __entry->orig_group, __entry->orig_start,
737 __entry->orig_len, __entry->orig_logical, 790 __entry->orig_len, __entry->orig_logical,
738 __entry->goal_group, __entry->goal_start, 791 __entry->goal_group, __entry->goal_start,
@@ -750,7 +803,8 @@ TRACE_EVENT(ext4_mballoc_prealloc,
750 TP_ARGS(ac), 803 TP_ARGS(ac),
751 804
752 TP_STRUCT__entry( 805 TP_STRUCT__entry(
753 __field( dev_t, dev ) 806 __field( int, dev_major )
807 __field( int, dev_minor )
754 __field( ino_t, ino ) 808 __field( ino_t, ino )
755 __field( __u32, orig_logical ) 809 __field( __u32, orig_logical )
756 __field( int, orig_start ) 810 __field( int, orig_start )
@@ -763,7 +817,8 @@ TRACE_EVENT(ext4_mballoc_prealloc,
763 ), 817 ),
764 818
765 TP_fast_assign( 819 TP_fast_assign(
766 __entry->dev = ac->ac_inode->i_sb->s_dev; 820 __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
821 __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
767 __entry->ino = ac->ac_inode->i_ino; 822 __entry->ino = ac->ac_inode->i_ino;
768 __entry->orig_logical = ac->ac_o_ex.fe_logical; 823 __entry->orig_logical = ac->ac_o_ex.fe_logical;
769 __entry->orig_start = ac->ac_o_ex.fe_start; 824 __entry->orig_start = ac->ac_o_ex.fe_start;
@@ -775,8 +830,9 @@ TRACE_EVENT(ext4_mballoc_prealloc,
775 __entry->result_len = ac->ac_b_ex.fe_len; 830 __entry->result_len = ac->ac_b_ex.fe_len;
776 ), 831 ),
777 832
778 TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", 833 TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
779 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 834 __entry->dev_major, __entry->dev_minor,
835 (unsigned long) __entry->ino,
780 __entry->orig_group, __entry->orig_start, 836 __entry->orig_group, __entry->orig_start,
781 __entry->orig_len, __entry->orig_logical, 837 __entry->orig_len, __entry->orig_logical,
782 __entry->result_group, __entry->result_start, 838 __entry->result_group, __entry->result_start,
@@ -784,46 +840,59 @@ TRACE_EVENT(ext4_mballoc_prealloc,
784); 840);
785 841
786DECLARE_EVENT_CLASS(ext4__mballoc, 842DECLARE_EVENT_CLASS(ext4__mballoc,
787 TP_PROTO(struct ext4_allocation_context *ac), 843 TP_PROTO(struct super_block *sb,
844 struct inode *inode,
845 ext4_group_t group,
846 ext4_grpblk_t start,
847 ext4_grpblk_t len),
788 848
789 TP_ARGS(ac), 849 TP_ARGS(sb, inode, group, start, len),
790 850
791 TP_STRUCT__entry( 851 TP_STRUCT__entry(
792 __field( dev_t, dev ) 852 __field( int, dev_major )
853 __field( int, dev_minor )
793 __field( ino_t, ino ) 854 __field( ino_t, ino )
794 __field( __u32, result_logical )
795 __field( int, result_start ) 855 __field( int, result_start )
796 __field( __u32, result_group ) 856 __field( __u32, result_group )
797 __field( int, result_len ) 857 __field( int, result_len )
798 ), 858 ),
799 859
800 TP_fast_assign( 860 TP_fast_assign(
801 __entry->dev = ac->ac_inode->i_sb->s_dev; 861 __entry->dev_major = MAJOR(sb->s_dev);
802 __entry->ino = ac->ac_inode->i_ino; 862 __entry->dev_minor = MINOR(sb->s_dev);
803 __entry->result_logical = ac->ac_b_ex.fe_logical; 863 __entry->ino = inode ? inode->i_ino : 0;
804 __entry->result_start = ac->ac_b_ex.fe_start; 864 __entry->result_start = start;
805 __entry->result_group = ac->ac_b_ex.fe_group; 865 __entry->result_group = group;
806 __entry->result_len = ac->ac_b_ex.fe_len; 866 __entry->result_len = len;
807 ), 867 ),
808 868
809 TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", 869 TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
810 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 870 __entry->dev_major, __entry->dev_minor,
871 (unsigned long) __entry->ino,
811 __entry->result_group, __entry->result_start, 872 __entry->result_group, __entry->result_start,
812 __entry->result_len, __entry->result_logical) 873 __entry->result_len)
813); 874);
814 875
815DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard, 876DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard,
816 877
817 TP_PROTO(struct ext4_allocation_context *ac), 878 TP_PROTO(struct super_block *sb,
879 struct inode *inode,
880 ext4_group_t group,
881 ext4_grpblk_t start,
882 ext4_grpblk_t len),
818 883
819 TP_ARGS(ac) 884 TP_ARGS(sb, inode, group, start, len)
820); 885);
821 886
822DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free, 887DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free,
823 888
824 TP_PROTO(struct ext4_allocation_context *ac), 889 TP_PROTO(struct super_block *sb,
890 struct inode *inode,
891 ext4_group_t group,
892 ext4_grpblk_t start,
893 ext4_grpblk_t len),
825 894
826 TP_ARGS(ac) 895 TP_ARGS(sb, inode, group, start, len)
827); 896);
828 897
829TRACE_EVENT(ext4_forget, 898TRACE_EVENT(ext4_forget,
@@ -832,7 +901,8 @@ TRACE_EVENT(ext4_forget,
832 TP_ARGS(inode, is_metadata, block), 901 TP_ARGS(inode, is_metadata, block),
833 902
834 TP_STRUCT__entry( 903 TP_STRUCT__entry(
835 __field( dev_t, dev ) 904 __field( int, dev_major )
905 __field( int, dev_minor )
836 __field( ino_t, ino ) 906 __field( ino_t, ino )
837 __field( umode_t, mode ) 907 __field( umode_t, mode )
838 __field( int, is_metadata ) 908 __field( int, is_metadata )
@@ -840,16 +910,18 @@ TRACE_EVENT(ext4_forget,
840 ), 910 ),
841 911
842 TP_fast_assign( 912 TP_fast_assign(
843 __entry->dev = inode->i_sb->s_dev; 913 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
914 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
844 __entry->ino = inode->i_ino; 915 __entry->ino = inode->i_ino;
845 __entry->mode = inode->i_mode; 916 __entry->mode = inode->i_mode;
846 __entry->is_metadata = is_metadata; 917 __entry->is_metadata = is_metadata;
847 __entry->block = block; 918 __entry->block = block;
848 ), 919 ),
849 920
850 TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu", 921 TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
851 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 922 __entry->dev_major, __entry->dev_minor,
852 __entry->mode, __entry->is_metadata, __entry->block) 923 (unsigned long) __entry->ino, __entry->mode,
924 __entry->is_metadata, __entry->block)
853); 925);
854 926
855TRACE_EVENT(ext4_da_update_reserve_space, 927TRACE_EVENT(ext4_da_update_reserve_space,
@@ -858,7 +930,8 @@ TRACE_EVENT(ext4_da_update_reserve_space,
858 TP_ARGS(inode, used_blocks), 930 TP_ARGS(inode, used_blocks),
859 931
860 TP_STRUCT__entry( 932 TP_STRUCT__entry(
861 __field( dev_t, dev ) 933 __field( int, dev_major )
934 __field( int, dev_minor )
862 __field( ino_t, ino ) 935 __field( ino_t, ino )
863 __field( umode_t, mode ) 936 __field( umode_t, mode )
864 __field( __u64, i_blocks ) 937 __field( __u64, i_blocks )
@@ -869,7 +942,8 @@ TRACE_EVENT(ext4_da_update_reserve_space,
869 ), 942 ),
870 943
871 TP_fast_assign( 944 TP_fast_assign(
872 __entry->dev = inode->i_sb->s_dev; 945 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
946 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
873 __entry->ino = inode->i_ino; 947 __entry->ino = inode->i_ino;
874 __entry->mode = inode->i_mode; 948 __entry->mode = inode->i_mode;
875 __entry->i_blocks = inode->i_blocks; 949 __entry->i_blocks = inode->i_blocks;
@@ -879,9 +953,10 @@ TRACE_EVENT(ext4_da_update_reserve_space,
879 __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; 953 __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
880 ), 954 ),
881 955
882 TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d", 956 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
883 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 957 __entry->dev_major, __entry->dev_minor,
884 __entry->mode, (unsigned long long) __entry->i_blocks, 958 (unsigned long) __entry->ino, __entry->mode,
959 (unsigned long long) __entry->i_blocks,
885 __entry->used_blocks, __entry->reserved_data_blocks, 960 __entry->used_blocks, __entry->reserved_data_blocks,
886 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) 961 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
887); 962);
@@ -892,7 +967,8 @@ TRACE_EVENT(ext4_da_reserve_space,
892 TP_ARGS(inode, md_needed), 967 TP_ARGS(inode, md_needed),
893 968
894 TP_STRUCT__entry( 969 TP_STRUCT__entry(
895 __field( dev_t, dev ) 970 __field( int, dev_major )
971 __field( int, dev_minor )
896 __field( ino_t, ino ) 972 __field( ino_t, ino )
897 __field( umode_t, mode ) 973 __field( umode_t, mode )
898 __field( __u64, i_blocks ) 974 __field( __u64, i_blocks )
@@ -902,7 +978,8 @@ TRACE_EVENT(ext4_da_reserve_space,
902 ), 978 ),
903 979
904 TP_fast_assign( 980 TP_fast_assign(
905 __entry->dev = inode->i_sb->s_dev; 981 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
982 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
906 __entry->ino = inode->i_ino; 983 __entry->ino = inode->i_ino;
907 __entry->mode = inode->i_mode; 984 __entry->mode = inode->i_mode;
908 __entry->i_blocks = inode->i_blocks; 985 __entry->i_blocks = inode->i_blocks;
@@ -911,8 +988,9 @@ TRACE_EVENT(ext4_da_reserve_space,
911 __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; 988 __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
912 ), 989 ),
913 990
914 TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d", 991 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
915 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 992 __entry->dev_major, __entry->dev_minor,
993 (unsigned long) __entry->ino,
916 __entry->mode, (unsigned long long) __entry->i_blocks, 994 __entry->mode, (unsigned long long) __entry->i_blocks,
917 __entry->md_needed, __entry->reserved_data_blocks, 995 __entry->md_needed, __entry->reserved_data_blocks,
918 __entry->reserved_meta_blocks) 996 __entry->reserved_meta_blocks)
@@ -924,7 +1002,8 @@ TRACE_EVENT(ext4_da_release_space,
924 TP_ARGS(inode, freed_blocks), 1002 TP_ARGS(inode, freed_blocks),
925 1003
926 TP_STRUCT__entry( 1004 TP_STRUCT__entry(
927 __field( dev_t, dev ) 1005 __field( int, dev_major )
1006 __field( int, dev_minor )
928 __field( ino_t, ino ) 1007 __field( ino_t, ino )
929 __field( umode_t, mode ) 1008 __field( umode_t, mode )
930 __field( __u64, i_blocks ) 1009 __field( __u64, i_blocks )
@@ -935,7 +1014,8 @@ TRACE_EVENT(ext4_da_release_space,
935 ), 1014 ),
936 1015
937 TP_fast_assign( 1016 TP_fast_assign(
938 __entry->dev = inode->i_sb->s_dev; 1017 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
1018 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
939 __entry->ino = inode->i_ino; 1019 __entry->ino = inode->i_ino;
940 __entry->mode = inode->i_mode; 1020 __entry->mode = inode->i_mode;
941 __entry->i_blocks = inode->i_blocks; 1021 __entry->i_blocks = inode->i_blocks;
@@ -945,8 +1025,9 @@ TRACE_EVENT(ext4_da_release_space,
945 __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; 1025 __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
946 ), 1026 ),
947 1027
948 TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d", 1028 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
949 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 1029 __entry->dev_major, __entry->dev_minor,
1030 (unsigned long) __entry->ino,
950 __entry->mode, (unsigned long long) __entry->i_blocks, 1031 __entry->mode, (unsigned long long) __entry->i_blocks,
951 __entry->freed_blocks, __entry->reserved_data_blocks, 1032 __entry->freed_blocks, __entry->reserved_data_blocks,
952 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) 1033 __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
@@ -958,18 +1039,20 @@ DECLARE_EVENT_CLASS(ext4__bitmap_load,
958 TP_ARGS(sb, group), 1039 TP_ARGS(sb, group),
959 1040
960 TP_STRUCT__entry( 1041 TP_STRUCT__entry(
961 __field( dev_t, dev ) 1042 __field( int, dev_major )
1043 __field( int, dev_minor )
962 __field( __u32, group ) 1044 __field( __u32, group )
963 1045
964 ), 1046 ),
965 1047
966 TP_fast_assign( 1048 TP_fast_assign(
967 __entry->dev = sb->s_dev; 1049 __entry->dev_major = MAJOR(sb->s_dev);
1050 __entry->dev_minor = MINOR(sb->s_dev);
968 __entry->group = group; 1051 __entry->group = group;
969 ), 1052 ),
970 1053
971 TP_printk("dev %s group %u", 1054 TP_printk("dev %d,%d group %u",
972 jbd2_dev_to_name(__entry->dev), __entry->group) 1055 __entry->dev_major, __entry->dev_minor, __entry->group)
973); 1056);
974 1057
975DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load, 1058DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index bf16545cc977..7447ea9305b5 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -17,17 +17,19 @@ TRACE_EVENT(jbd2_checkpoint,
17 TP_ARGS(journal, result), 17 TP_ARGS(journal, result),
18 18
19 TP_STRUCT__entry( 19 TP_STRUCT__entry(
20 __field( dev_t, dev ) 20 __field( int, dev_major )
21 __field( int, dev_minor )
21 __field( int, result ) 22 __field( int, result )
22 ), 23 ),
23 24
24 TP_fast_assign( 25 TP_fast_assign(
25 __entry->dev = journal->j_fs_dev->bd_dev; 26 __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
27 __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
26 __entry->result = result; 28 __entry->result = result;
27 ), 29 ),
28 30
29 TP_printk("dev %s result %d", 31 TP_printk("dev %d,%d result %d",
30 jbd2_dev_to_name(__entry->dev), __entry->result) 32 __entry->dev_major, __entry->dev_minor, __entry->result)
31); 33);
32 34
33DECLARE_EVENT_CLASS(jbd2_commit, 35DECLARE_EVENT_CLASS(jbd2_commit,
@@ -37,20 +39,22 @@ DECLARE_EVENT_CLASS(jbd2_commit,
37 TP_ARGS(journal, commit_transaction), 39 TP_ARGS(journal, commit_transaction),
38 40
39 TP_STRUCT__entry( 41 TP_STRUCT__entry(
40 __field( dev_t, dev ) 42 __field( int, dev_major )
43 __field( int, dev_minor )
41 __field( char, sync_commit ) 44 __field( char, sync_commit )
42 __field( int, transaction ) 45 __field( int, transaction )
43 ), 46 ),
44 47
45 TP_fast_assign( 48 TP_fast_assign(
46 __entry->dev = journal->j_fs_dev->bd_dev; 49 __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
50 __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
47 __entry->sync_commit = commit_transaction->t_synchronous_commit; 51 __entry->sync_commit = commit_transaction->t_synchronous_commit;
48 __entry->transaction = commit_transaction->t_tid; 52 __entry->transaction = commit_transaction->t_tid;
49 ), 53 ),
50 54
51 TP_printk("dev %s transaction %d sync %d", 55 TP_printk("dev %d,%d transaction %d sync %d",
52 jbd2_dev_to_name(__entry->dev), __entry->transaction, 56 __entry->dev_major, __entry->dev_minor,
53 __entry->sync_commit) 57 __entry->transaction, __entry->sync_commit)
54); 58);
55 59
56DEFINE_EVENT(jbd2_commit, jbd2_start_commit, 60DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@@ -87,22 +91,24 @@ TRACE_EVENT(jbd2_end_commit,
87 TP_ARGS(journal, commit_transaction), 91 TP_ARGS(journal, commit_transaction),
88 92
89 TP_STRUCT__entry( 93 TP_STRUCT__entry(
90 __field( dev_t, dev ) 94 __field( int, dev_major )
95 __field( int, dev_minor )
91 __field( char, sync_commit ) 96 __field( char, sync_commit )
92 __field( int, transaction ) 97 __field( int, transaction )
93 __field( int, head ) 98 __field( int, head )
94 ), 99 ),
95 100
96 TP_fast_assign( 101 TP_fast_assign(
97 __entry->dev = journal->j_fs_dev->bd_dev; 102 __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
103 __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
98 __entry->sync_commit = commit_transaction->t_synchronous_commit; 104 __entry->sync_commit = commit_transaction->t_synchronous_commit;
99 __entry->transaction = commit_transaction->t_tid; 105 __entry->transaction = commit_transaction->t_tid;
100 __entry->head = journal->j_tail_sequence; 106 __entry->head = journal->j_tail_sequence;
101 ), 107 ),
102 108
103 TP_printk("dev %s transaction %d sync %d head %d", 109 TP_printk("dev %d,%d transaction %d sync %d head %d",
104 jbd2_dev_to_name(__entry->dev), __entry->transaction, 110 __entry->dev_major, __entry->dev_minor,
105 __entry->sync_commit, __entry->head) 111 __entry->transaction, __entry->sync_commit, __entry->head)
106); 112);
107 113
108TRACE_EVENT(jbd2_submit_inode_data, 114TRACE_EVENT(jbd2_submit_inode_data,
@@ -111,17 +117,20 @@ TRACE_EVENT(jbd2_submit_inode_data,
111 TP_ARGS(inode), 117 TP_ARGS(inode),
112 118
113 TP_STRUCT__entry( 119 TP_STRUCT__entry(
114 __field( dev_t, dev ) 120 __field( int, dev_major )
121 __field( int, dev_minor )
115 __field( ino_t, ino ) 122 __field( ino_t, ino )
116 ), 123 ),
117 124
118 TP_fast_assign( 125 TP_fast_assign(
119 __entry->dev = inode->i_sb->s_dev; 126 __entry->dev_major = MAJOR(inode->i_sb->s_dev);
127 __entry->dev_minor = MINOR(inode->i_sb->s_dev);
120 __entry->ino = inode->i_ino; 128 __entry->ino = inode->i_ino;
121 ), 129 ),
122 130
123 TP_printk("dev %s ino %lu", 131 TP_printk("dev %d,%d ino %lu",
124 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) 132 __entry->dev_major, __entry->dev_minor,
133 (unsigned long) __entry->ino)
125); 134);
126 135
127TRACE_EVENT(jbd2_run_stats, 136TRACE_EVENT(jbd2_run_stats,
@@ -131,7 +140,8 @@ TRACE_EVENT(jbd2_run_stats,
131 TP_ARGS(dev, tid, stats), 140 TP_ARGS(dev, tid, stats),
132 141
133 TP_STRUCT__entry( 142 TP_STRUCT__entry(
134 __field( dev_t, dev ) 143 __field( int, dev_major )
144 __field( int, dev_minor )
135 __field( unsigned long, tid ) 145 __field( unsigned long, tid )
136 __field( unsigned long, wait ) 146 __field( unsigned long, wait )
137 __field( unsigned long, running ) 147 __field( unsigned long, running )
@@ -144,7 +154,8 @@ TRACE_EVENT(jbd2_run_stats,
144 ), 154 ),
145 155
146 TP_fast_assign( 156 TP_fast_assign(
147 __entry->dev = dev; 157 __entry->dev_major = MAJOR(dev);
158 __entry->dev_minor = MINOR(dev);
148 __entry->tid = tid; 159 __entry->tid = tid;
149 __entry->wait = stats->rs_wait; 160 __entry->wait = stats->rs_wait;
150 __entry->running = stats->rs_running; 161 __entry->running = stats->rs_running;
@@ -156,9 +167,9 @@ TRACE_EVENT(jbd2_run_stats,
156 __entry->blocks_logged = stats->rs_blocks_logged; 167 __entry->blocks_logged = stats->rs_blocks_logged;
157 ), 168 ),
158 169
159 TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u " 170 TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
160 "logging %u handle_count %u blocks %u blocks_logged %u", 171 "logging %u handle_count %u blocks %u blocks_logged %u",
161 jbd2_dev_to_name(__entry->dev), __entry->tid, 172 __entry->dev_major, __entry->dev_minor, __entry->tid,
162 jiffies_to_msecs(__entry->wait), 173 jiffies_to_msecs(__entry->wait),
163 jiffies_to_msecs(__entry->running), 174 jiffies_to_msecs(__entry->running),
164 jiffies_to_msecs(__entry->locked), 175 jiffies_to_msecs(__entry->locked),
@@ -175,7 +186,8 @@ TRACE_EVENT(jbd2_checkpoint_stats,
175 TP_ARGS(dev, tid, stats), 186 TP_ARGS(dev, tid, stats),
176 187
177 TP_STRUCT__entry( 188 TP_STRUCT__entry(
178 __field( dev_t, dev ) 189 __field( int, dev_major )
190 __field( int, dev_minor )
179 __field( unsigned long, tid ) 191 __field( unsigned long, tid )
180 __field( unsigned long, chp_time ) 192 __field( unsigned long, chp_time )
181 __field( __u32, forced_to_close ) 193 __field( __u32, forced_to_close )
@@ -184,7 +196,8 @@ TRACE_EVENT(jbd2_checkpoint_stats,
184 ), 196 ),
185 197
186 TP_fast_assign( 198 TP_fast_assign(
187 __entry->dev = dev; 199 __entry->dev_major = MAJOR(dev);
200 __entry->dev_minor = MINOR(dev);
188 __entry->tid = tid; 201 __entry->tid = tid;
189 __entry->chp_time = stats->cs_chp_time; 202 __entry->chp_time = stats->cs_chp_time;
190 __entry->forced_to_close= stats->cs_forced_to_close; 203 __entry->forced_to_close= stats->cs_forced_to_close;
@@ -192,9 +205,9 @@ TRACE_EVENT(jbd2_checkpoint_stats,
192 __entry->dropped = stats->cs_dropped; 205 __entry->dropped = stats->cs_dropped;
193 ), 206 ),
194 207
195 TP_printk("dev %s tid %lu chp_time %u forced_to_close %u " 208 TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
196 "written %u dropped %u", 209 "written %u dropped %u",
197 jbd2_dev_to_name(__entry->dev), __entry->tid, 210 __entry->dev_major, __entry->dev_minor, __entry->tid,
198 jiffies_to_msecs(__entry->chp_time), 211 jiffies_to_msecs(__entry->chp_time),
199 __entry->forced_to_close, __entry->written, __entry->dropped) 212 __entry->forced_to_close, __entry->written, __entry->dropped)
200); 213);
@@ -207,7 +220,8 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
207 TP_ARGS(journal, first_tid, block_nr, freed), 220 TP_ARGS(journal, first_tid, block_nr, freed),
208 221
209 TP_STRUCT__entry( 222 TP_STRUCT__entry(
210 __field( dev_t, dev ) 223 __field( int, dev_major )
224 __field( int, dev_minor )
211 __field( tid_t, tail_sequence ) 225 __field( tid_t, tail_sequence )
212 __field( tid_t, first_tid ) 226 __field( tid_t, first_tid )
213 __field(unsigned long, block_nr ) 227 __field(unsigned long, block_nr )
@@ -215,16 +229,18 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
215 ), 229 ),
216 230
217 TP_fast_assign( 231 TP_fast_assign(
218 __entry->dev = journal->j_fs_dev->bd_dev; 232 __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
233 __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
219 __entry->tail_sequence = journal->j_tail_sequence; 234 __entry->tail_sequence = journal->j_tail_sequence;
220 __entry->first_tid = first_tid; 235 __entry->first_tid = first_tid;
221 __entry->block_nr = block_nr; 236 __entry->block_nr = block_nr;
222 __entry->freed = freed; 237 __entry->freed = freed;
223 ), 238 ),
224 239
225 TP_printk("dev %s from %u to %u offset %lu freed %lu", 240 TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
226 jbd2_dev_to_name(__entry->dev), __entry->tail_sequence, 241 __entry->dev_major, __entry->dev_minor,
227 __entry->first_tid, __entry->block_nr, __entry->freed) 242 __entry->tail_sequence, __entry->first_tid,
243 __entry->block_nr, __entry->freed)
228); 244);
229 245
230#endif /* _TRACE_JBD2_H */ 246#endif /* _TRACE_JBD2_H */