aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-06-04 14:23:41 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-06-04 14:23:41 -0400
commitb0857d309faefaf5443752458e8af1a4b22b3e92 (patch)
treeb6b18b3d41831a205a5f90609abb567fd756e07c /fs/ext4
parent2e8fa54e3b48e4ce8c4e9ca4674ffbc973f58be5 (diff)
ext4: defer clearing of PageWriteback after extent conversion
Currently PageWriteback bit gets cleared from put_io_page() called from ext4_end_bio(). This is somewhat inconvenient as extent tree is not fully updated at that time (unwritten extents are not marked as written) so we cannot read the data back yet. This design was dictated by lock ordering as we cannot start a transaction while PageWriteback bit is set (we could easily deadlock with ext4_da_writepages()). But now that we use transaction reservation for extent conversion, locking issues are solved and we can move PageWriteback bit clearing after extent conversion is done. As a result we can remove wait for unwritten extent conversion from ext4_sync_file() because it already implicitely happens through wait_on_page_writeback(). We implement deferring of PageWriteback clearing by queueing completed bios to appropriate io_end and processing all the pages when io_end is going to be freed instead of at the moment ext4_io_end() is called. Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/fsync.c4
-rw-r--r--fs/ext4/page-io.c138
3 files changed, 82 insertions, 65 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b69a733b5b42..74db579bb482 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -180,8 +180,7 @@ struct ext4_map_blocks {
180 * Flags for ext4_io_end->flags 180 * Flags for ext4_io_end->flags
181 */ 181 */
182#define EXT4_IO_END_UNWRITTEN 0x0001 182#define EXT4_IO_END_UNWRITTEN 0x0001
183#define EXT4_IO_END_ERROR 0x0002 183#define EXT4_IO_END_DIRECT 0x0002
184#define EXT4_IO_END_DIRECT 0x0004
185 184
186/* 185/*
187 * For converting uninitialized extents on a work queue. 'handle' is used for 186 * For converting uninitialized extents on a work queue. 'handle' is used for
@@ -192,6 +191,8 @@ typedef struct ext4_io_end {
192 handle_t *handle; /* handle reserved for extent 191 handle_t *handle; /* handle reserved for extent
193 * conversion */ 192 * conversion */
194 struct inode *inode; /* file being written to */ 193 struct inode *inode; /* file being written to */
194 struct bio *bio; /* Linked list of completed
195 * bios covering the extent */
195 unsigned int flag; /* unwritten or not */ 196 unsigned int flag; /* unwritten or not */
196 loff_t offset; /* offset in the file */ 197 loff_t offset; /* offset in the file */
197 ssize_t size; /* size of the extent */ 198 ssize_t size; /* size of the extent */
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index e0ba8a408def..dcc881b30849 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -132,10 +132,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
132 if (inode->i_sb->s_flags & MS_RDONLY) 132 if (inode->i_sb->s_flags & MS_RDONLY)
133 goto out; 133 goto out;
134 134
135 ret = ext4_flush_unwritten_io(inode);
136 if (ret < 0)
137 goto out;
138
139 if (!journal) { 135 if (!journal) {
140 ret = __sync_inode(inode, datasync); 136 ret = __sync_inode(inode, datasync);
141 if (!ret && !hlist_empty(&inode->i_dentry)) 137 if (!ret && !hlist_empty(&inode->i_dentry))
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index bcdfd6bdde06..755741c211a4 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -64,14 +64,83 @@ void ext4_ioend_shutdown(struct inode *inode)
64 cancel_work_sync(&EXT4_I(inode)->i_unrsv_conversion_work); 64 cancel_work_sync(&EXT4_I(inode)->i_unrsv_conversion_work);
65} 65}
66 66
67/*
68 * Print an buffer I/O error compatible with the fs/buffer.c. This
69 * provides compatibility with dmesg scrapers that look for a specific
70 * buffer I/O error message. We really need a unified error reporting
71 * structure to userspace ala Digital Unix's uerf system, but it's
72 * probably not going to happen in my lifetime, due to LKML politics...
73 */
74static void buffer_io_error(struct buffer_head *bh)
75{
76 char b[BDEVNAME_SIZE];
77 printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
78 bdevname(bh->b_bdev, b),
79 (unsigned long long)bh->b_blocknr);
80}
81
82static void ext4_finish_bio(struct bio *bio)
83{
84 int i;
85 int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
86
87 for (i = 0; i < bio->bi_vcnt; i++) {
88 struct bio_vec *bvec = &bio->bi_io_vec[i];
89 struct page *page = bvec->bv_page;
90 struct buffer_head *bh, *head;
91 unsigned bio_start = bvec->bv_offset;
92 unsigned bio_end = bio_start + bvec->bv_len;
93 unsigned under_io = 0;
94 unsigned long flags;
95
96 if (!page)
97 continue;
98
99 if (error) {
100 SetPageError(page);
101 set_bit(AS_EIO, &page->mapping->flags);
102 }
103 bh = head = page_buffers(page);
104 /*
105 * We check all buffers in the page under BH_Uptodate_Lock
106 * to avoid races with other end io clearing async_write flags
107 */
108 local_irq_save(flags);
109 bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
110 do {
111 if (bh_offset(bh) < bio_start ||
112 bh_offset(bh) + bh->b_size > bio_end) {
113 if (buffer_async_write(bh))
114 under_io++;
115 continue;
116 }
117 clear_buffer_async_write(bh);
118 if (error)
119 buffer_io_error(bh);
120 } while ((bh = bh->b_this_page) != head);
121 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
122 local_irq_restore(flags);
123 if (!under_io)
124 end_page_writeback(page);
125 }
126}
127
67static void ext4_release_io_end(ext4_io_end_t *io_end) 128static void ext4_release_io_end(ext4_io_end_t *io_end)
68{ 129{
130 struct bio *bio, *next_bio;
131
69 BUG_ON(!list_empty(&io_end->list)); 132 BUG_ON(!list_empty(&io_end->list));
70 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 133 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
71 WARN_ON(io_end->handle); 134 WARN_ON(io_end->handle);
72 135
73 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) 136 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
74 wake_up_all(ext4_ioend_wq(io_end->inode)); 137 wake_up_all(ext4_ioend_wq(io_end->inode));
138
139 for (bio = io_end->bio; bio; bio = next_bio) {
140 next_bio = bio->bi_private;
141 ext4_finish_bio(bio);
142 bio_put(bio);
143 }
75 if (io_end->flag & EXT4_IO_END_DIRECT) 144 if (io_end->flag & EXT4_IO_END_DIRECT)
76 inode_dio_done(io_end->inode); 145 inode_dio_done(io_end->inode);
77 if (io_end->iocb) 146 if (io_end->iocb)
@@ -267,79 +336,31 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
267 return io_end; 336 return io_end;
268} 337}
269 338
270/*
271 * Print an buffer I/O error compatible with the fs/buffer.c. This
272 * provides compatibility with dmesg scrapers that look for a specific
273 * buffer I/O error message. We really need a unified error reporting
274 * structure to userspace ala Digital Unix's uerf system, but it's
275 * probably not going to happen in my lifetime, due to LKML politics...
276 */
277static void buffer_io_error(struct buffer_head *bh)
278{
279 char b[BDEVNAME_SIZE];
280 printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
281 bdevname(bh->b_bdev, b),
282 (unsigned long long)bh->b_blocknr);
283}
284
285static void ext4_end_bio(struct bio *bio, int error) 339static void ext4_end_bio(struct bio *bio, int error)
286{ 340{
287 ext4_io_end_t *io_end = bio->bi_private; 341 ext4_io_end_t *io_end = bio->bi_private;
288 struct inode *inode;
289 int i;
290 int blocksize;
291 sector_t bi_sector = bio->bi_sector; 342 sector_t bi_sector = bio->bi_sector;
292 343
293 BUG_ON(!io_end); 344 BUG_ON(!io_end);
294 inode = io_end->inode;
295 blocksize = 1 << inode->i_blkbits;
296 bio->bi_private = NULL;
297 bio->bi_end_io = NULL; 345 bio->bi_end_io = NULL;
298 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 346 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
299 error = 0; 347 error = 0;
300 for (i = 0; i < bio->bi_vcnt; i++) {
301 struct bio_vec *bvec = &bio->bi_io_vec[i];
302 struct page *page = bvec->bv_page;
303 struct buffer_head *bh, *head;
304 unsigned bio_start = bvec->bv_offset;
305 unsigned bio_end = bio_start + bvec->bv_len;
306 unsigned under_io = 0;
307 unsigned long flags;
308 348
309 if (!page) 349 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
310 continue;
311
312 if (error) {
313 SetPageError(page);
314 set_bit(AS_EIO, &page->mapping->flags);
315 }
316 bh = head = page_buffers(page);
317 /* 350 /*
318 * We check all buffers in the page under BH_Uptodate_Lock 351 * Link bio into list hanging from io_end. We have to do it
319 * to avoid races with other end io clearing async_write flags 352 * atomically as bio completions can be racing against each
353 * other.
320 */ 354 */
321 local_irq_save(flags); 355 bio->bi_private = xchg(&io_end->bio, bio);
322 bit_spin_lock(BH_Uptodate_Lock, &head->b_state); 356 } else {
323 do { 357 ext4_finish_bio(bio);
324 if (bh_offset(bh) < bio_start || 358 bio_put(bio);
325 bh_offset(bh) + blocksize > bio_end) {
326 if (buffer_async_write(bh))
327 under_io++;
328 continue;
329 }
330 clear_buffer_async_write(bh);
331 if (error)
332 buffer_io_error(bh);
333 } while ((bh = bh->b_this_page) != head);
334 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
335 local_irq_restore(flags);
336 if (!under_io)
337 end_page_writeback(page);
338 } 359 }
339 bio_put(bio);
340 360
341 if (error) { 361 if (error) {
342 io_end->flag |= EXT4_IO_END_ERROR; 362 struct inode *inode = io_end->inode;
363
343 ext4_warning(inode->i_sb, "I/O error writing to inode %lu " 364 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
344 "(offset %llu size %ld starting block %llu)", 365 "(offset %llu size %ld starting block %llu)",
345 inode->i_ino, 366 inode->i_ino,
@@ -348,7 +369,6 @@ static void ext4_end_bio(struct bio *bio, int error)
348 (unsigned long long) 369 (unsigned long long)
349 bi_sector >> (inode->i_blkbits - 9)); 370 bi_sector >> (inode->i_blkbits - 9));
350 } 371 }
351
352 ext4_put_io_end_defer(io_end); 372 ext4_put_io_end_defer(io_end);
353} 373}
354 374