aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2011-02-15 04:24:31 -0500
committerJiri Kosina <jkosina@suse.cz>2011-02-15 04:24:31 -0500
commit0a9d59a2461477bd9ed143c01af9df3f8f00fa81 (patch)
treedf997d1cfb0786427a0df1fbd6f0640fa4248cf4 /fs/ext4
parenta23ce6da9677d245aa0aadc99f4197030350ab54 (diff)
parent795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 (diff)
Merge branch 'master' into for-next
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/extents.c21
-rw-r--r--fs/ext4/file.c62
-rw-r--r--fs/ext4/mballoc.c100
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c91
6 files changed, 213 insertions, 109 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1de65f572033..3aa0b72b3b94 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
849 /* current io_end structure for async DIO write*/ 849 /* current io_end structure for async DIO write*/
850 ext4_io_end_t *cur_aio_dio; 850 ext4_io_end_t *cur_aio_dio;
851 atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
851 852
852 spinlock_t i_block_reservation_lock; 853 spinlock_t i_block_reservation_lock;
853 854
@@ -2065,7 +2066,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
2065extern void ext4_ext_truncate(struct inode *); 2066extern void ext4_ext_truncate(struct inode *);
2066extern void ext4_ext_init(struct super_block *); 2067extern void ext4_ext_init(struct super_block *);
2067extern void ext4_ext_release(struct super_block *); 2068extern void ext4_ext_release(struct super_block *);
2068extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 2069extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
2069 loff_t len); 2070 loff_t len);
2070extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 2071extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
2071 ssize_t len); 2072 ssize_t len);
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2119 2120
2120#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2121#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2121 2122
2123/* For ioend & aio unwritten conversion wait queues */
2124#define EXT4_WQ_HASH_SZ 37
2125#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
2126 EXT4_WQ_HASH_SZ])
2127#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
2128 EXT4_WQ_HASH_SZ])
2129extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
2130extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
2131
2122#endif /* __KERNEL__ */ 2132#endif /* __KERNEL__ */
2123 2133
2124#endif /* _EXT4_H */ 2134#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 6b90b6825d32..686240e89df1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3174 * that this IO needs to convertion to written when IO is 3174 * that this IO needs to convertion to written when IO is
3175 * completed 3175 * completed
3176 */ 3176 */
3177 if (io) 3177 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3178 io->flag = EXT4_IO_END_UNWRITTEN; 3178 io->flag = EXT4_IO_END_UNWRITTEN;
3179 else 3179 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3180 } else
3180 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3181 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3181 if (ext4_should_dioread_nolock(inode)) 3182 if (ext4_should_dioread_nolock(inode))
3182 map->m_flags |= EXT4_MAP_UNINIT; 3183 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3463 * that we need to perform convertion when IO is done. 3464 * that we need to perform convertion when IO is done.
3464 */ 3465 */
3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3466 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3466 if (io) 3467 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3467 io->flag = EXT4_IO_END_UNWRITTEN; 3468 io->flag = EXT4_IO_END_UNWRITTEN;
3468 else 3469 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3470 } else
3469 ext4_set_inode_state(inode, 3471 ext4_set_inode_state(inode,
3470 EXT4_STATE_DIO_UNWRITTEN); 3472 EXT4_STATE_DIO_UNWRITTEN);
3471 } 3473 }
@@ -3627,14 +3629,15 @@ static void ext4_falloc_update_inode(struct inode *inode,
3627} 3629}
3628 3630
3629/* 3631/*
3630 * preallocate space for a file. This implements ext4's fallocate inode 3632 * preallocate space for a file. This implements ext4's fallocate file
3631 * operation, which gets called from sys_fallocate system call. 3633 * operation, which gets called from sys_fallocate system call.
3632 * For block-mapped files, posix_fallocate should fall back to the method 3634 * For block-mapped files, posix_fallocate should fall back to the method
3633 * of writing zeroes to the required new blocks (the same behavior which is 3635 * of writing zeroes to the required new blocks (the same behavior which is
3634 * expected for file systems which do not support fallocate() system call). 3636 * expected for file systems which do not support fallocate() system call).
3635 */ 3637 */
3636long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) 3638long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3637{ 3639{
3640 struct inode *inode = file->f_path.dentry->d_inode;
3638 handle_t *handle; 3641 handle_t *handle;
3639 loff_t new_size; 3642 loff_t new_size;
3640 unsigned int max_blocks; 3643 unsigned int max_blocks;
@@ -3645,7 +3648,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
3645 unsigned int credits, blkbits = inode->i_blkbits; 3648 unsigned int credits, blkbits = inode->i_blkbits;
3646 3649
3647 /* We only support the FALLOC_FL_KEEP_SIZE mode */ 3650 /* We only support the FALLOC_FL_KEEP_SIZE mode */
3648 if (mode && (mode != FALLOC_FL_KEEP_SIZE)) 3651 if (mode & ~FALLOC_FL_KEEP_SIZE)
3649 return -EOPNOTSUPP; 3652 return -EOPNOTSUPP;
3650 3653
3651 /* 3654 /*
@@ -3655,10 +3658,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
3655 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3658 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
3656 return -EOPNOTSUPP; 3659 return -EOPNOTSUPP;
3657 3660
3658 /* preallocation to directories is currently not supported */
3659 if (S_ISDIR(inode->i_mode))
3660 return -ENODEV;
3661
3662 map.m_lblk = offset >> blkbits; 3661 map.m_lblk = offset >> blkbits;
3663 /* 3662 /*
3664 * We can't just convert len to max_blocks because 3663 * We can't just convert len to max_blocks because
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bb003dc9ffff..7b80d543b89e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_aiodio_wait(struct inode *inode)
59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61
62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
63}
64
65/*
66 * This tests whether the IO in question is block-aligned or not.
67 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
68 * are converted to written only after the IO is complete. Until they are
69 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
70 * it needs to zero out portions of the start and/or end block. If 2 AIO
71 * threads are at work on the same unwritten block, they must be synchronized
72 * or one thread will zero the other's data, causing corruption.
73 */
74static int
75ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
76 unsigned long nr_segs, loff_t pos)
77{
78 struct super_block *sb = inode->i_sb;
79 int blockmask = sb->s_blocksize - 1;
80 size_t count = iov_length(iov, nr_segs);
81 loff_t final_size = pos + count;
82
83 if (pos >= inode->i_size)
84 return 0;
85
86 if ((pos & blockmask) || (final_size & blockmask))
87 return 1;
88
89 return 0;
90}
91
58static ssize_t 92static ssize_t
59ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 93ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
60 unsigned long nr_segs, loff_t pos) 94 unsigned long nr_segs, loff_t pos)
61{ 95{
62 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 96 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
97 int unaligned_aio = 0;
98 int ret;
63 99
64 /* 100 /*
65 * If we have encountered a bitmap-format file, the size limit 101 * If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
78 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, 114 nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
79 sbi->s_bitmap_maxbytes - pos); 115 sbi->s_bitmap_maxbytes - pos);
80 } 116 }
117 } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
118 !is_sync_kiocb(iocb))) {
119 unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
120 }
121
122 /* Unaligned direct AIO must be serialized; see comment above */
123 if (unaligned_aio) {
124 static unsigned long unaligned_warn_time;
125
126 /* Warn about this once per day */
127 if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
128 ext4_msg(inode->i_sb, KERN_WARNING,
129 "Unaligned AIO/DIO on inode %ld by %s; "
130 "performance will be poor.",
131 inode->i_ino, current->comm);
132 mutex_lock(ext4_aio_mutex(inode));
133 ext4_aiodio_wait(inode);
81 } 134 }
82 135
83 return generic_file_aio_write(iocb, iov, nr_segs, pos); 136 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
137
138 if (unaligned_aio)
139 mutex_unlock(ext4_aio_mutex(inode));
140
141 return ret;
84} 142}
85 143
86static const struct vm_operations_struct ext4_file_vm_ops = { 144static const struct vm_operations_struct ext4_file_vm_ops = {
@@ -210,6 +268,7 @@ const struct file_operations ext4_file_operations = {
210 .fsync = ext4_sync_file, 268 .fsync = ext4_sync_file,
211 .splice_read = generic_file_splice_read, 269 .splice_read = generic_file_splice_read,
212 .splice_write = generic_file_splice_write, 270 .splice_write = generic_file_splice_write,
271 .fallocate = ext4_fallocate,
213}; 272};
214 273
215const struct inode_operations ext4_file_inode_operations = { 274const struct inode_operations ext4_file_inode_operations = {
@@ -223,7 +282,6 @@ const struct inode_operations ext4_file_inode_operations = {
223 .removexattr = generic_removexattr, 282 .removexattr = generic_removexattr,
224#endif 283#endif
225 .check_acl = ext4_check_acl, 284 .check_acl = ext4_check_acl,
226 .fallocate = ext4_fallocate,
227 .fiemap = ext4_fiemap, 285 .fiemap = ext4_fiemap,
228}; 286};
229 287
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b2f9d2..d1fe09aea73d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
342/* We create slab caches for groupinfo data structures based on the 342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for 343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */ 344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \ 345#define NR_GRPINFO_CACHES 8
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; 346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348 347
348static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
350 ext4_group_t group); 355 ext4_group_t group);
351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
2414 return -ENOMEM; 2419 return -ENOMEM;
2415} 2420}
2416 2421
2422static void ext4_groupinfo_destroy_slabs(void)
2423{
2424 int i;
2425
2426 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2427 if (ext4_groupinfo_caches[i])
2428 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2429 ext4_groupinfo_caches[i] = NULL;
2430 }
2431}
2432
2433static int ext4_groupinfo_create_slab(size_t size)
2434{
2435 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2436 int slab_size;
2437 int blocksize_bits = order_base_2(size);
2438 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2439 struct kmem_cache *cachep;
2440
2441 if (cache_index >= NR_GRPINFO_CACHES)
2442 return -EINVAL;
2443
2444 if (unlikely(cache_index < 0))
2445 cache_index = 0;
2446
2447 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2448 if (ext4_groupinfo_caches[cache_index]) {
2449 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2450 return 0; /* Already created */
2451 }
2452
2453 slab_size = offsetof(struct ext4_group_info,
2454 bb_counters[blocksize_bits + 2]);
2455
2456 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2457 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2458 NULL);
2459
2460 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2461 if (!cachep) {
2462 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
2463 return -ENOMEM;
2464 }
2465
2466 ext4_groupinfo_caches[cache_index] = cachep;
2467
2468 return 0;
2469}
2470
2417int ext4_mb_init(struct super_block *sb, int needs_recovery) 2471int ext4_mb_init(struct super_block *sb, int needs_recovery)
2418{ 2472{
2419 struct ext4_sb_info *sbi = EXT4_SB(sb); 2473 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2421 unsigned offset; 2475 unsigned offset;
2422 unsigned max; 2476 unsigned max;
2423 int ret; 2477 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2427 2478
2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2479 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2429 2480
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 goto out; 2491 goto out;
2441 } 2492 }
2442 2493
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; 2494 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2444 cachep = ext4_groupinfo_caches[cache_index]; 2495 if (ret < 0)
2445 if (!cachep) { 2496 goto out;
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2466 }
2467 2497
2468 /* order 0 is regular bitmap */ 2498 /* order 0 is regular bitmap */
2469 sbi->s_mb_maxs[0] = sb->s_blocksize << 3; 2499 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
2520 if (ret) { 2550 if (ret) {
2521 kfree(sbi->s_mb_offsets); 2551 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs); 2552 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 } 2553 }
2525 return ret; 2554 return ret;
2526} 2555}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
2734 2763
2735void ext4_exit_mballoc(void) 2764void ext4_exit_mballoc(void)
2736{ 2765{
2737 int i;
2738 /* 2766 /*
2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2767 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2740 * before destroying the slab cache. 2768 * before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
2743 kmem_cache_destroy(ext4_pspace_cachep); 2771 kmem_cache_destroy(ext4_pspace_cachep);
2744 kmem_cache_destroy(ext4_ac_cachep); 2772 kmem_cache_destroy(ext4_ac_cachep);
2745 kmem_cache_destroy(ext4_free_ext_cachep); 2773 kmem_cache_destroy(ext4_free_ext_cachep);
2746 2774 ext4_groupinfo_destroy_slabs();
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2755 ext4_remove_debugfs_entry(); 2775 ext4_remove_debugfs_entry();
2756} 2776}
2757 2777
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92a..955cc309142f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
32 32
33static struct kmem_cache *io_page_cachep, *io_end_cachep; 33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34 34
35#define WQ_HASH_SZ 37
36#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
37static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
38
39int __init ext4_init_pageio(void) 35int __init ext4_init_pageio(void)
40{ 36{
41 int i;
42
43 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); 37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
44 if (io_page_cachep == NULL) 38 if (io_page_cachep == NULL)
45 return -ENOMEM; 39 return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
48 kmem_cache_destroy(io_page_cachep); 42 kmem_cache_destroy(io_page_cachep);
49 return -ENOMEM; 43 return -ENOMEM;
50 } 44 }
51 for (i = 0; i < WQ_HASH_SZ; i++)
52 init_waitqueue_head(&ioend_wq[i]);
53
54 return 0; 45 return 0;
55} 46}
56 47
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
62 53
63void ext4_ioend_wait(struct inode *inode) 54void ext4_ioend_wait(struct inode *inode)
64{ 55{
65 wait_queue_head_t *wq = to_ioend_wq(inode); 56 wait_queue_head_t *wq = ext4_ioend_wq(inode);
66 57
67 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 58 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
68} 59}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
87 for (i = 0; i < io->num_io_pages; i++) 78 for (i = 0; i < io->num_io_pages; i++)
88 put_io_page(io->pages[i]); 79 put_io_page(io->pages[i]);
89 io->num_io_pages = 0; 80 io->num_io_pages = 0;
90 wq = to_ioend_wq(io->inode); 81 wq = ext4_ioend_wq(io->inode);
91 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
92 waitqueue_active(wq)) 83 waitqueue_active(wq))
93 wake_up_all(wq); 84 wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
102 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
103 loff_t offset = io->offset; 94 loff_t offset = io->offset;
104 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
105 int ret = 0; 97 int ret = 0;
106 98
107 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
126 if (io->iocb) 118 if (io->iocb)
127 aio_complete(io->iocb, io->result, 0); 119 aio_complete(io->iocb, io->result, 0);
128 /* clear the DIO AIO unwritten flag */ 120 /* clear the DIO AIO unwritten flag */
129 io->flag &= ~EXT4_IO_END_UNWRITTEN; 121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130
130 return ret; 131 return ret;
131} 132}
132 133
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
190 struct inode *inode; 191 struct inode *inode;
191 unsigned long flags; 192 unsigned long flags;
192 int i; 193 int i;
194 sector_t bi_sector = bio->bi_sector;
193 195
194 BUG_ON(!io_end); 196 BUG_ON(!io_end);
195 bio->bi_private = NULL; 197 bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
207 if (error) 209 if (error)
208 SetPageError(page); 210 SetPageError(page);
209 BUG_ON(!head); 211 BUG_ON(!head);
210 if (head->b_size == PAGE_CACHE_SIZE) 212 if (head->b_size != PAGE_CACHE_SIZE) {
211 clear_buffer_dirty(head);
212 else {
213 loff_t offset; 213 loff_t offset;
214 loff_t io_end_offset = io_end->offset + io_end->size; 214 loff_t io_end_offset = io_end->offset + io_end->size;
215 215
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
221 if (error) 221 if (error)
222 buffer_io_error(bh); 222 buffer_io_error(bh);
223 223
224 clear_buffer_dirty(bh);
225 } 224 }
226 if (buffer_delay(bh)) 225 if (buffer_delay(bh))
227 partial_write = 1; 226 partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
257 (unsigned long long) io_end->offset, 256 (unsigned long long) io_end->offset,
258 (long) io_end->size, 257 (long) io_end->size,
259 (unsigned long long) 258 (unsigned long long)
260 bio->bi_sector >> (inode->i_blkbits - 9)); 259 bi_sector >> (inode->i_blkbits - 9));
261 } 260 }
262 261
263 /* Add the io_end to per-inode completed io list*/ 262 /* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
380 379
381 blocksize = 1 << inode->i_blkbits; 380 blocksize = 1 << inode->i_blkbits;
382 381
382 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 383 BUG_ON(PageWriteback(page));
384 set_page_writeback(page); 384 set_page_writeback(page);
385 ClearPageError(page); 385 ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
397 for (bh = head = page_buffers(page), block_start = 0; 397 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 398 bh != head || !block_start;
399 block_start = block_end, bh = bh->b_this_page) { 399 block_start = block_end, bh = bh->b_this_page) {
400
400 block_end = block_start + blocksize; 401 block_end = block_start + blocksize;
401 if (block_start >= len) { 402 if (block_start >= len) {
402 clear_buffer_dirty(bh); 403 clear_buffer_dirty(bh);
403 set_buffer_uptodate(bh); 404 set_buffer_uptodate(bh);
404 continue; 405 continue;
405 } 406 }
407 clear_buffer_dirty(bh);
406 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 408 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
407 if (ret) { 409 if (ret) {
408 /* 410 /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cb10a06775e4..f6a318f836b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void); 78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 79static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void);
80 81
81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 82#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
82static struct file_system_type ext3_fs_type = { 83static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
832 ei->i_sync_tid = 0; 833 ei->i_sync_tid = 0;
833 ei->i_datasync_tid = 0; 834 ei->i_datasync_tid = 0;
834 atomic_set(&ei->i_ioend_count, 0); 835 atomic_set(&ei->i_ioend_count, 0);
836 atomic_set(&ei->i_aiodio_unwritten, 0);
835 837
836 return &ei->vfs_inode; 838 return &ei->vfs_inode;
837} 839}
@@ -1161,7 +1163,7 @@ static int ext4_release_dquot(struct dquot *dquot);
1161static int ext4_mark_dquot_dirty(struct dquot *dquot); 1163static int ext4_mark_dquot_dirty(struct dquot *dquot);
1162static int ext4_write_info(struct super_block *sb, int type); 1164static int ext4_write_info(struct super_block *sb, int type);
1163static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1165static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1164 char *path); 1166 struct path *path);
1165static int ext4_quota_off(struct super_block *sb, int type); 1167static int ext4_quota_off(struct super_block *sb, int type);
1166static int ext4_quota_on_mount(struct super_block *sb, int type); 1168static int ext4_quota_on_mount(struct super_block *sb, int type);
1167static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1169static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
2716 mutex_unlock(&ext4_li_info->li_list_mtx); 2718 mutex_unlock(&ext4_li_info->li_list_mtx);
2717} 2719}
2718 2720
2721static struct task_struct *ext4_lazyinit_task;
2722
2719/* 2723/*
2720 * This is the function where ext4lazyinit thread lives. It walks 2724 * This is the function where ext4lazyinit thread lives. It walks
2721 * through the request list searching for next scheduled filesystem. 2725 * through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
2784 if (time_before(jiffies, next_wakeup)) 2788 if (time_before(jiffies, next_wakeup))
2785 schedule(); 2789 schedule();
2786 finish_wait(&eli->li_wait_daemon, &wait); 2790 finish_wait(&eli->li_wait_daemon, &wait);
2791 if (kthread_should_stop()) {
2792 ext4_clear_request_list();
2793 goto exit_thread;
2794 }
2787 } 2795 }
2788 2796
2789exit_thread: 2797exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
2808 wake_up(&eli->li_wait_task); 2816 wake_up(&eli->li_wait_task);
2809 2817
2810 kfree(ext4_li_info); 2818 kfree(ext4_li_info);
2819 ext4_lazyinit_task = NULL;
2811 ext4_li_info = NULL; 2820 ext4_li_info = NULL;
2812 mutex_unlock(&ext4_li_mtx); 2821 mutex_unlock(&ext4_li_mtx);
2813 2822
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
2830 2839
2831static int ext4_run_lazyinit_thread(void) 2840static int ext4_run_lazyinit_thread(void)
2832{ 2841{
2833 struct task_struct *t; 2842 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2834 2843 ext4_li_info, "ext4lazyinit");
2835 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); 2844 if (IS_ERR(ext4_lazyinit_task)) {
2836 if (IS_ERR(t)) { 2845 int err = PTR_ERR(ext4_lazyinit_task);
2837 int err = PTR_ERR(t);
2838 ext4_clear_request_list(); 2846 ext4_clear_request_list();
2839 del_timer_sync(&ext4_li_info->li_timer); 2847 del_timer_sync(&ext4_li_info->li_timer);
2840 kfree(ext4_li_info); 2848 kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
2985 * If thread exited earlier 2993 * If thread exited earlier
2986 * there's nothing to be done. 2994 * there's nothing to be done.
2987 */ 2995 */
2988 if (!ext4_li_info) 2996 if (!ext4_li_info || !ext4_lazyinit_task)
2989 return; 2997 return;
2990 2998
2991 ext4_clear_request_list(); 2999 kthread_stop(ext4_lazyinit_task);
2992
2993 while (ext4_li_info->li_task) {
2994 wake_up(&ext4_li_info->li_wait_daemon);
2995 wait_event(ext4_li_info->li_wait_task,
2996 ext4_li_info->li_task == NULL);
2997 }
2998} 3000}
2999 3001
3000static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3002static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -4558,27 +4560,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
4558 * Standard function to be called on quota_on 4560 * Standard function to be called on quota_on
4559 */ 4561 */
4560static int ext4_quota_on(struct super_block *sb, int type, int format_id, 4562static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4561 char *name) 4563 struct path *path)
4562{ 4564{
4563 int err; 4565 int err;
4564 struct path path;
4565 4566
4566 if (!test_opt(sb, QUOTA)) 4567 if (!test_opt(sb, QUOTA))
4567 return -EINVAL; 4568 return -EINVAL;
4568 4569
4569 err = kern_path(name, LOOKUP_FOLLOW, &path);
4570 if (err)
4571 return err;
4572
4573 /* Quotafile not on the same filesystem? */ 4570 /* Quotafile not on the same filesystem? */
4574 if (path.mnt->mnt_sb != sb) { 4571 if (path->mnt->mnt_sb != sb)
4575 path_put(&path);
4576 return -EXDEV; 4572 return -EXDEV;
4577 }
4578 /* Journaling quota? */ 4573 /* Journaling quota? */
4579 if (EXT4_SB(sb)->s_qf_names[type]) { 4574 if (EXT4_SB(sb)->s_qf_names[type]) {
4580 /* Quotafile not in fs root? */ 4575 /* Quotafile not in fs root? */
4581 if (path.dentry->d_parent != sb->s_root) 4576 if (path->dentry->d_parent != sb->s_root)
4582 ext4_msg(sb, KERN_WARNING, 4577 ext4_msg(sb, KERN_WARNING,
4583 "Quota file not on filesystem root. " 4578 "Quota file not on filesystem root. "
4584 "Journaled quota will not work"); 4579 "Journaled quota will not work");
@@ -4589,7 +4584,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4589 * all updates to the file when we bypass pagecache... 4584 * all updates to the file when we bypass pagecache...
4590 */ 4585 */
4591 if (EXT4_SB(sb)->s_journal && 4586 if (EXT4_SB(sb)->s_journal &&
4592 ext4_should_journal_data(path.dentry->d_inode)) { 4587 ext4_should_journal_data(path->dentry->d_inode)) {
4593 /* 4588 /*
4594 * We don't need to lock updates but journal_flush() could 4589 * We don't need to lock updates but journal_flush() could
4595 * otherwise be livelocked... 4590 * otherwise be livelocked...
@@ -4597,15 +4592,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4597 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 4592 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
4598 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 4593 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
4599 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4594 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4600 if (err) { 4595 if (err)
4601 path_put(&path);
4602 return err; 4596 return err;
4603 }
4604 } 4597 }
4605 4598
4606 err = dquot_quota_on_path(sb, type, format_id, &path); 4599 return dquot_quota_on(sb, type, format_id, path);
4607 path_put(&path);
4608 return err;
4609} 4600}
4610 4601
4611static int ext4_quota_off(struct super_block *sb, int type) 4602static int ext4_quota_off(struct super_block *sb, int type)
@@ -4779,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
4779 .fs_flags = FS_REQUIRES_DEV, 4770 .fs_flags = FS_REQUIRES_DEV,
4780}; 4771};
4781 4772
4782int __init ext4_init_feat_adverts(void) 4773static int __init ext4_init_feat_adverts(void)
4783{ 4774{
4784 struct ext4_features *ef; 4775 struct ext4_features *ef;
4785 int ret = -ENOMEM; 4776 int ret = -ENOMEM;
@@ -4803,23 +4794,44 @@ out:
4803 return ret; 4794 return ret;
4804} 4795}
4805 4796
4797static void ext4_exit_feat_adverts(void)
4798{
4799 kobject_put(&ext4_feat->f_kobj);
4800 wait_for_completion(&ext4_feat->f_kobj_unregister);
4801 kfree(ext4_feat);
4802}
4803
4804/* Shared across all ext4 file systems */
4805wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
4806struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
4807
4806static int __init ext4_init_fs(void) 4808static int __init ext4_init_fs(void)
4807{ 4809{
4808 int err; 4810 int i, err;
4809 4811
4810 ext4_check_flag_values(); 4812 ext4_check_flag_values();
4813
4814 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
4815 mutex_init(&ext4__aio_mutex[i]);
4816 init_waitqueue_head(&ext4__ioend_wq[i]);
4817 }
4818
4811 err = ext4_init_pageio(); 4819 err = ext4_init_pageio();
4812 if (err) 4820 if (err)
4813 return err; 4821 return err;
4814 err = ext4_init_system_zone(); 4822 err = ext4_init_system_zone();
4815 if (err) 4823 if (err)
4816 goto out5; 4824 goto out7;
4817 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4825 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4818 if (!ext4_kset) 4826 if (!ext4_kset)
4819 goto out4; 4827 goto out6;
4820 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4828 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4829 if (!ext4_proc_root)
4830 goto out5;
4821 4831
4822 err = ext4_init_feat_adverts(); 4832 err = ext4_init_feat_adverts();
4833 if (err)
4834 goto out4;
4823 4835
4824 err = ext4_init_mballoc(); 4836 err = ext4_init_mballoc();
4825 if (err) 4837 if (err)
@@ -4849,12 +4861,14 @@ out1:
4849out2: 4861out2:
4850 ext4_exit_mballoc(); 4862 ext4_exit_mballoc();
4851out3: 4863out3:
4852 kfree(ext4_feat); 4864 ext4_exit_feat_adverts();
4865out4:
4853 remove_proc_entry("fs/ext4", NULL); 4866 remove_proc_entry("fs/ext4", NULL);
4867out5:
4854 kset_unregister(ext4_kset); 4868 kset_unregister(ext4_kset);
4855out4: 4869out6:
4856 ext4_exit_system_zone(); 4870 ext4_exit_system_zone();
4857out5: 4871out7:
4858 ext4_exit_pageio(); 4872 ext4_exit_pageio();
4859 return err; 4873 return err;
4860} 4874}
@@ -4868,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
4868 destroy_inodecache(); 4882 destroy_inodecache();
4869 ext4_exit_xattr(); 4883 ext4_exit_xattr();
4870 ext4_exit_mballoc(); 4884 ext4_exit_mballoc();
4885 ext4_exit_feat_adverts();
4871 remove_proc_entry("fs/ext4", NULL); 4886 remove_proc_entry("fs/ext4", NULL);
4872 kset_unregister(ext4_kset); 4887 kset_unregister(ext4_kset);
4873 ext4_exit_system_zone(); 4888 ext4_exit_system_zone();