From 81a052273998f94b098945c4c313e05246956eb2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Jan 2009 16:58:01 +0100 Subject: ext3: Use lowercase names of quota functions Use lowercase names of quota functions instead of old uppercase ones. Signed-off-by: Jan Kara CC: linux-ext4@vger.kernel.org --- fs/ext3/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5fa453b49a64..c8f9bd308821 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3055,7 +3055,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) error = PTR_ERR(handle); goto err_out; } - error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; if (error) { ext3_journal_stop(handle); return error; @@ -3146,7 +3146,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode) ret = 2 * (bpp + indirects) + 2; #ifdef CONFIG_QUOTA - /* We know that structure was already allocated during DQUOT_INIT so + /* We know that structure was already allocated during vfs_dq_init so * we will be updating only the data blocks + inodes */ ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); #endif @@ -3237,7 +3237,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) * i_size has been changed by generic_commit_write() and we thus need * to include the updated inode in the current transaction. * - * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks + * Also, vfs_dq_alloc_space() will always dirty the inode when blocks * are allocated to the file. * * If the inode is marked synchronous, we don't honour that here - doing -- cgit v1.2.2 From 9e80d407736161d9b8b0c5a0d44f786e44c322ea Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Mar 2009 13:08:04 +0100 Subject: ext3: Avoid starting a transaction in writepage when not necessary We don't have to start a transaction in writepage() when all the blocks are a properly allocated. Even in ordered mode either the data has been written via write() and they are thus already added to transaction's list or the data was written via mmap and then it's random in which transaction they get written anyway. This should help VM to pageout dirty memory without blocking on transaction commits. Signed-off-by: Jan Kara Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5fa453b49a64..05e5c2e5c0d7 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1435,6 +1435,10 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) return 0; } +static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) +{ + return !buffer_mapped(bh); +} /* * Note that we always start a transaction even if we're not journalling * data. This is to preserve ordering: any hole instantiation within @@ -1505,6 +1509,15 @@ static int ext3_ordered_writepage(struct page *page, if (ext3_journal_current_handle()) goto out_fail; + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { + /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ + return block_write_full_page(page, NULL, wbc); + } + page_bufs = page_buffers(page); + handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { @@ -1512,11 +1525,6 @@ static int ext3_ordered_writepage(struct page *page, goto out_fail; } - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - page_bufs = page_buffers(page); walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, bget_one); -- cgit v1.2.2 From 695f6ae0dcea3dd83bfbb9634ff067f780649ba8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Apr 2009 16:57:17 -0700 Subject: ext3: avoid false EIO errors Sometimes block_write_begin() can map buffers in a page but later we fail to copy data into those buffers (because the source page has been paged out in the mean time). We then end up with !uptodate mapped buffers. To add a bit more to the confusion, block_write_end() does not commit any data (and thus does not any mark buffers as uptodate) if we didn't succeed with copying all the data. Commit f4fc66a894546bdc88a775d0e83ad20a65210bcb (ext3: convert to new aops) missed these cases and thus we were inserting non-uptodate buffers to transaction's list which confuses JBD code and it reports IO errors, aborts a transaction and generally makes users afraid about their data ;-P. This patch fixes the problem by reorganizing ext3_..._write_end() code to first call block_write_end() to mark buffers with valid data uptodate and after that we file only uptodate buffers to transaction's lists. We also fix a problem where we could leave blocks allocated beyond i_size (i_disksize in fact) because of failed write. We now add inode to orphan list when write fails (to be safe in case we crash) and then truncate blocks beyond i_size in a separate transaction. Signed-off-by: Jan Kara Reviewed-by: Aneesh Kumar K.V Cc: Nick Piggin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 139 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 74 insertions(+), 65 deletions(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 4a09ff169870..d3ef6566b019 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1149,12 +1149,15 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; - int ret, needed_blocks = ext3_writepage_trans_blocks(inode); + int ret; handle_t *handle; int retries = 0; struct page *page; pgoff_t index; unsigned from, to; + /* Reserve one block more for addition to orphan list in case + * we allocate blocks but write fails for some reason */ + int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); @@ -1184,14 +1187,19 @@ retry: } write_begin_failed: if (ret) { - ext3_journal_stop(handle); - unlock_page(page); - page_cache_release(page); /* * block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. + * + * Add inode to orphan list in case we crash before truncate + * finishes. */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); + ext3_journal_stop(handle); + unlock_page(page); + page_cache_release(page); if (pos + len > inode->i_size) vmtruncate(inode, inode->i_size); } @@ -1211,6 +1219,18 @@ int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) return err; } +/* For ordered writepage and write_end functions */ +static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) +{ + /* + * Write could have mapped the buffer but it didn't copy the data in + * yet. So avoid filing such buffer into a transaction. + */ + if (buffer_mapped(bh) && buffer_uptodate(bh)) + return ext3_journal_dirty_data(handle, bh); + return 0; +} + /* For write_end() in data=journal mode */ static int write_end_fn(handle_t *handle, struct buffer_head *bh) { @@ -1221,26 +1241,20 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) } /* - * Generic write_end handler for ordered and writeback ext3 journal modes. - * We can't use generic_write_end, because that unlocks the page and we need to - * unlock the page after ext3_journal_stop, but ext3_journal_stop must run - * after block_write_end. + * This is nasty and subtle: ext3_write_begin() could have allocated blocks + * for the whole page but later we failed to copy the data in. Update inode + * size according to what we managed to copy. The rest is going to be + * truncated in write_end function. */ -static int ext3_generic_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) +static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) { - struct inode *inode = file->f_mapping->host; - - copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - - if (pos+copied > inode->i_size) { - i_size_write(inode, pos+copied); + /* What matters to us is i_disksize. We don't write i_size anywhere */ + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + if (pos + copied > EXT3_I(inode)->i_disksize) { + EXT3_I(inode)->i_disksize = pos + copied; mark_inode_dirty(inode); } - - return copied; } /* @@ -1260,35 +1274,29 @@ static int ext3_ordered_write_end(struct file *file, unsigned from, to; int ret = 0, ret2; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + copied; ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, ext3_journal_dirty_data); + from, to, NULL, journal_dirty_data_fn); - if (ret == 0) { - /* - * generic_write_end() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. - */ - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = new_i_size; - ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, - page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; - } + if (ret == 0) + update_file_sizes(inode, pos, copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@ -1299,25 +1307,22 @@ static int ext3_writeback_write_end(struct file *file, { handle_t *handle = ext3_journal_current_handle(); struct inode *inode = file->f_mapping->host; - int ret = 0, ret2; - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = new_i_size; - - ret2 = ext3_generic_write_end(file, mapping, pos, len, copied, - page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; + int ret; - ret2 = ext3_journal_stop(handle); - if (!ret) - ret = ret2; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + update_file_sizes(inode, pos, copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); + ret = ext3_journal_stop(handle); unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@ -1338,15 +1343,23 @@ static int ext3_journalled_write_end(struct file *file, if (copied < len) { if (!PageUptodate(page)) copied = 0; - page_zero_new_buffers(page, from+copied, to); + page_zero_new_buffers(page, from + copied, to); + to = from + copied; } ret = walk_page_buffers(handle, page_buffers(page), from, to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos+copied > inode->i_size) - i_size_write(inode, pos+copied); + + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + /* + * There may be allocated blocks outside of i_size because + * we failed to copy some data. Prepare for truncate. + */ + if (pos + len > inode->i_size) + ext3_orphan_add(handle, inode); EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; @@ -1361,6 +1374,8 @@ static int ext3_journalled_write_end(struct file *file, unlock_page(page); page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); return ret ? ret : copied; } @@ -1428,17 +1443,11 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) return 0; } -static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) -{ - if (buffer_mapped(bh)) - return ext3_journal_dirty_data(handle, bh); - return 0; -} - static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) { return !buffer_mapped(bh); } + /* * Note that we always start a transaction even if we're not journalling * data. This is to preserve ordering: any hole instantiation within -- cgit v1.2.2 From f7ab34ea723ed304b19698efca85d6f40cecd99b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 3 Apr 2009 01:34:35 -0400 Subject: ext3: Add replace-on-truncate hueristics for data=writeback mode In data=writeback mode, start an asynchronous flush when closing a file which had been previously truncated down to zero. This lowers the probability of data loss in the case of applications that attempt to replace a file using truncate. Signed-off-by: "Theodore Ts'o" --- fs/ext3/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5fa453b49a64..0f5bca0d82fc 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2346,6 +2346,9 @@ void ext3_truncate(struct inode *inode) if (!ext3_can_truncate(inode)) return; + if (inode->i_size == 0 && ext3_should_writeback_data(inode)) + ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; + /* * We have to lock the EOF page here, because lock_page() nests * outside journal_start(). -- cgit v1.2.2