From 35121c9860316d7799cea0fbc359a9186e7c2747 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 16 May 2010 00:00:00 -0400 Subject: ext4: fix quota accounting in case of fallocate allocated_meta_data is already included in 'used' variable. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 81d60541284..55bfcd94d1a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1126,7 +1126,8 @@ void ext4_da_update_reserve_space(struct inode *inode, */ if (allocated_meta_blocks) dquot_claim_block(inode, allocated_meta_blocks); - dquot_release_reservation_block(inode, mdb_free + used); + dquot_release_reservation_block(inode, mdb_free + used - + allocated_meta_blocks); } /* -- cgit v1.2.2 From c445e3e0a5c2804524dec6e55f66d63f6bc5bc3e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 16 May 2010 04:00:00 -0400 Subject: ext4: don't scan/accumulate more pages than mballoc will allocate There was a bug reported on RHEL5 that a 10G dd on a 12G box had a very, very slow sync after that. At issue was the loop in write_cache_pages scanning all the way to the end of the 10G file, even though the subsequent call to mpage_da_submit_io would only actually write a smallish amt; then we went back to the write_cache_pages loop ... wasting tons of time in calling __mpage_da_writepage for thousands of pages we would just revisit (many times) later. Upstream it's not such a big issue for sys_sync because we get to the loop with a much smaller nr_to_write, which limits the loop. However, talking with Aneesh he realized that fsync upstream still gets here with a very large nr_to_write and we face the same problem. This patch makes mpage_add_bh_to_extent stop the loop after we've accumulated 2048 pages, by setting mpd->io_done = 1; which ultimately causes the write_cache_pages loop to break. Repeating the test with a dirty_ratio of 80 (to leave something for fsync to do), I don't see huge IO performance gains, but the reduction in cpu usage is striking: 80% usage with stock, and 2% with the below patch. Instrumenting the loop in write_cache_pages clearly shows that we are wasting time here. Eventually we need to change mpage_da_map_pages() also submit its I/O to the block layer, subsuming mpage_da_submit_io(), and then change it call ext4_get_blocks() multiple times. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 55bfcd94d1a..89a31e8869c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2350,6 +2350,15 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t next; int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; + /* + * XXX Don't go larger than mballoc is willing to allocate + * This is a stopgap solution. We eventually need to fold + * mpage_da_submit_io() into this function and then call + * ext4_get_blocks() multiple times in a loop + */ + if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) + goto flush_it; + /* check if thereserved journal credits might overflow */ if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { if (nrblocks >= EXT4_MAX_TRANS_DATA) { -- cgit v1.2.2 From 72b8ab9dde211ea518ff27e631b2046ef90c29a2 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 16 May 2010 11:00:00 -0400 Subject: ext4: don't use quota reservation for speculative metadata Because we can badly over-reserve metadata when we calculate worst-case, it complicates things for quota, since we must reserve and then claim later, retry on EDQUOT, etc. Quota is also a generally smaller pool than fs free blocks, so this over-reservation hurts more, and more often. I'm of the opinion that it's not the worst thing to allow metadata to push a user slightly over quota. This simplifies the code and avoids the false quota rejections that result from worst-case speculation. This patch stops the speculative quota-charging for worst-case metadata requirements, and just charges quota when the blocks are allocated at writeout. It also is able to remove the try-again loop on EDQUOT. This patch has been tested indirectly by running the xfstests suite with a hack to mount & enable quota prior to the test. I also did a more specific test of fragmenting freespace and then doing a large delalloc write under quota; quota stopped me at the right amount of file IO, and then the writeout generated enough metadata (due to the fragmentation) that it put me slightly over quota, as expected. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 69 +++++++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 46 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 89a31e8869c..df43217e4e7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1076,7 +1076,6 @@ void ext4_da_update_reserve_space(struct inode *inode, { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - int mdb_free = 0, allocated_meta_blocks = 0; spin_lock(&ei->i_block_reservation_lock); trace_ext4_da_update_reserve_space(inode, used); @@ -1091,11 +1090,10 @@ void ext4_da_update_reserve_space(struct inode *inode, /* Update per-inode reservations */ ei->i_reserved_data_blocks -= used; - used += ei->i_allocated_meta_blocks; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; - allocated_meta_blocks = ei->i_allocated_meta_blocks; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, + used + ei->i_allocated_meta_blocks); ei->i_allocated_meta_blocks = 0; - percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); if (ei->i_reserved_data_blocks == 0) { /* @@ -1103,31 +1101,23 @@ void ext4_da_update_reserve_space(struct inode *inode, * only when we have written all of the delayed * allocation blocks. */ - mdb_free = ei->i_reserved_meta_blocks; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, + ei->i_reserved_meta_blocks); ei->i_reserved_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; - percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); } spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - /* Update quota subsystem */ - if (quota_claim) { + /* Update quota subsystem for data blocks */ + if (quota_claim) dquot_claim_block(inode, used); - if (mdb_free) - dquot_release_reservation_block(inode, mdb_free); - } else { + else { /* * We did fallocate with an offset that is already delayed * allocated. So on delayed allocated writeback we should - * not update the quota for allocated blocks. But then - * converting an fallocate region to initialized region would - * have caused a metadata allocation. So claim quota for - * that + * not re-claim the quota for fallocated blocks. */ - if (allocated_meta_blocks) - dquot_claim_block(inode, allocated_meta_blocks); - dquot_release_reservation_block(inode, mdb_free + used - - allocated_meta_blocks); + dquot_release_reservation_block(inode, used); } /* @@ -1861,7 +1851,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - unsigned long md_needed, md_reserved; + unsigned long md_needed; int ret; /* @@ -1871,22 +1861,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) */ repeat: spin_lock(&ei->i_block_reservation_lock); - md_reserved = ei->i_reserved_meta_blocks; md_needed = ext4_calc_metadata_amount(inode, lblock); trace_ext4_da_reserve_space(inode, md_needed); spin_unlock(&ei->i_block_reservation_lock); /* - * Make quota reservation here to prevent quota overflow - * later. Real quota accounting is done at pages writeout - * time. + * We will charge metadata quota at writeout time; this saves + * us from metadata over-estimation, though we may go over by + * a small amount in the end. Here we just reserve for data. */ - ret = dquot_reserve_block(inode, md_needed + 1); + ret = dquot_reserve_block(inode, 1); if (ret) return ret; - + /* + * We do still charge estimated metadata to the sb though; + * we cannot afford to run out of free blocks. + */ if (ext4_claim_free_blocks(sbi, md_needed + 1)) { - dquot_release_reservation_block(inode, md_needed + 1); + dquot_release_reservation_block(inode, 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); goto repeat; @@ -1933,12 +1925,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free) * only when we have written all of the delayed * allocation blocks. */ - to_free += ei->i_reserved_meta_blocks; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, + ei->i_reserved_meta_blocks); ei->i_reserved_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; } - /* update fs dirty blocks counter */ + /* update fs dirty data blocks counter */ percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); @@ -3086,7 +3079,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int ret, retries = 0, quota_retries = 0; + int ret, retries = 0; struct page *page; pgoff_t index; unsigned from, to; @@ -3145,22 +3138,6 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - - if ((ret == -EDQUOT) && - EXT4_I(inode)->i_reserved_meta_blocks && - (quota_retries++ < 3)) { - /* - * Since we often over-estimate the number of meta - * data blocks required, we may sometimes get a - * spurios out of quota error even though there would - * be enough space once we write the data blocks and - * find out how many meta data blocks were _really_ - * required. So try forcing the inode write to see if - * that helps. - */ - write_inode_now(inode, (quota_retries == 3)); - goto retry; - } out: return ret; } -- cgit v1.2.2 From fbe845ddf368f77f86aa7500f8fd2690f54c66a8 Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Sun, 16 May 2010 13:00:00 -0400 Subject: ext4: Remove extraneous newlines in ext4_msg() calls Addresses-Google-Bug: #2562325 Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index df43217e4e7..6aa0442811d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2276,7 +2276,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) ext4_msg(mpd->inode->i_sb, KERN_CRIT, "delayed block allocation failed for inode %lu at " "logical offset %llu with max blocks %zd with " - "error %d\n", mpd->inode->i_ino, + "error %d", mpd->inode->i_ino, (unsigned long long) next, mpd->b_size >> mpd->inode->i_blkbits, err); printk(KERN_CRIT "This should not happen!! " @@ -2944,7 +2944,7 @@ retry: if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " - "%ld pages, ino %lu; err %d\n", __func__, + "%ld pages, ino %lu; err %d", __func__, wbc->nr_to_write, inode->i_ino, ret); goto out_writepages; } @@ -3019,7 +3019,7 @@ retry: if (pages_skipped != wbc->pages_skipped) ext4_msg(inode->i_sb, KERN_CRIT, "This should not happen leaving %s " - "with nr_to_write = %ld ret = %d\n", + "with nr_to_write = %ld ret = %d", __func__, wbc->nr_to_write, ret); /* Update index */ -- cgit v1.2.2 From 8e48dcfbd7c0892b4cfd064d682cc4c95a29df32 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 18:00:00 -0400 Subject: ext4: Use our own write_cache_pages() Make a copy of write_cache_pages() for the benefit of ext4_da_writepages(). This allows us to simplify the code some, and will allow us to further customize the code in future patches. There are some nasty hacks in write_cache_pages(), which Linus has (correctly) characterized as vile. I've just copied it into write_cache_pages_da(), without trying to clean those bits up lest I break something in the ext4's delalloc implementation, which is a bit fragile right now. This will allow Dave Chinner to clean up write_cache_pages() in mm/page-writeback.c, without worrying about breaking ext4. Eventually write_cache_pages_da() will go away when I rewrite ext4's delayed allocation and create a general ext4_writepages() which is used for all of ext4's writeback. Until now this is the lowest risk way to clean up the core write_cache_pages() function. Signed-off-by: "Theodore Ts'o" Cc: Dave Chinner --- fs/ext4/inode.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 119 insertions(+), 22 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6aa0442811d..830336d3911 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2426,17 +2426,6 @@ static int __mpage_da_writepage(struct page *page, struct buffer_head *bh, *head; sector_t logical; - if (mpd->io_done) { - /* - * Rest of the page in the page_vec - * redirty then and skip then. We will - * try to write them again after - * starting a new transaction - */ - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return MPAGE_DA_EXTENT_TAIL; - } /* * Can we merge this page to current extent? */ @@ -2831,6 +2820,124 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) return ext4_chunk_trans_blocks(inode, max_blocks); } +/* + * write_cache_pages_da - walk the list of dirty pages of the given + * address space and call the callback function (which usually writes + * the pages). + * + * This is a forked version of write_cache_pages(). Differences: + * Range cyclic is ignored. + * no_nrwrite_index_update is always presumed true + */ +static int write_cache_pages_da(struct address_space *mapping, + struct writeback_control *wbc, + struct mpage_da_data *mpd) +{ + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + long nr_to_write = wbc->nr_to_write; + + pagevec_init(&pvec, 0); + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + + while (!done && (index <= end)) { + int i; + + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point, the page may be truncated or + * invalidated (changing page->mapping to NULL), or + * even swizzled back from swapper_space to tmpfs file + * mapping. However, page->index will not change + * because we have a reference on the page. + */ + if (page->index > end) { + done = 1; + break; + } + + lock_page(page); + + /* + * Page truncated or invalidated. We can freely skip it + * then, even for data integrity operations: the page + * has disappeared concurrently, so there could be no + * real expectation of this data interity operation + * even if there is now a new, dirty page at the same + * pagecache address. + */ + if (unlikely(page->mapping != mapping)) { +continue_unlock: + unlock_page(page); + continue; + } + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + if (PageWriteback(page)) { + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + else + goto continue_unlock; + } + + BUG_ON(PageWriteback(page)); + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + ret = __mpage_da_writepage(page, wbc, mpd); + if (unlikely(ret)) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(page); + ret = 0; + } else { + done = 1; + break; + } + } + + if (nr_to_write > 0) { + nr_to_write--; + if (nr_to_write == 0 && + wbc->sync_mode == WB_SYNC_NONE) { + /* + * We stop writing back only if we are + * not doing integrity sync. In case of + * integrity sync we have to keep going + * because someone may be concurrently + * dirtying pages, and we might have + * synced a lot of newly appeared dirty + * pages, but have not synced all of the + * old dirty pages. + */ + done = 1; + break; + } + } + } + pagevec_release(&pvec); + cond_resched(); + } + return ret; +} + + static int ext4_da_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -2839,7 +2946,6 @@ static int ext4_da_writepages(struct address_space *mapping, handle_t *handle = NULL; struct mpage_da_data mpd; struct inode *inode = mapping->host; - int no_nrwrite_index_update; int pages_written = 0; long pages_skipped; unsigned int max_pages; @@ -2919,12 +3025,6 @@ static int ext4_da_writepages(struct address_space *mapping, mpd.wbc = wbc; mpd.inode = mapping->host; - /* - * we don't want write_cache_pages to update - * nr_to_write and writeback_index - */ - no_nrwrite_index_update = wbc->no_nrwrite_index_update; - wbc->no_nrwrite_index_update = 1; pages_skipped = wbc->pages_skipped; retry: @@ -2966,8 +3066,7 @@ retry: mpd.io_done = 0; mpd.pages_written = 0; mpd.retval = 0; - ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, - &mpd); + ret = write_cache_pages_da(mapping, wbc, &mpd); /* * If we have a contiguous extent of pages and we * haven't done the I/O yet, map the blocks and submit @@ -3033,8 +3132,6 @@ retry: mapping->writeback_index = index; out_writepages: - if (!no_nrwrite_index_update) - wbc->no_nrwrite_index_update = 0; wbc->nr_to_write -= nr_to_writebump; wbc->range_start = range_start; trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); -- cgit v1.2.2 From e35fd6609b2fee54484d520deccb8f18bf7d38f3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 19:00:00 -0400 Subject: ext4: Add new abstraction ext4_map_blocks() underneath ext4_get_blocks() Jack up ext4_get_blocks() and add a new function, ext4_map_blocks() which uses a much smaller structure, struct ext4_map_blocks which is 20 bytes, as opposed to a struct buffer_head, which nearly 5 times bigger on an x86_64 machine. By switching things to use ext4_map_blocks(), we can save stack space by using ext4_map_blocks() since we can avoid allocating a struct buffer_head on the stack. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 102 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 44 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 830336d3911..ff2f5fd681b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, int ret; /* - * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this + * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by * i_mutex. So we can safely drop the i_data_sem here. @@ -890,9 +890,9 @@ err_out: } /* - * The ext4_ind_get_blocks() function handles non-extents inodes + * The ext4_ind_map_blocks() function handles non-extents inodes * (i.e., using the traditional indirect/double-indirect i_blocks - * scheme) for ext4_get_blocks(). + * scheme) for ext4_map_blocks(). * * Allocation strategy is simple: if we have to allocate something, we will * have to go the whole way to leaf. So let's do it before attaching anything @@ -917,9 +917,8 @@ err_out: * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system * blocks. */ -static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned int maxblocks, - struct buffer_head *bh_result, +static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags) { int err = -EIO; @@ -935,7 +934,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); - depth = ext4_block_to_path(inode, iblock, offsets, + depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); if (depth == 0) @@ -946,10 +945,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, /* Simplest case - block found, no allocation needed */ if (!partial) { first_block = le32_to_cpu(chain[depth - 1].key); - clear_buffer_new(bh_result); count++; /*map more blocks*/ - while (count < maxblocks && count <= blocks_to_boundary) { + while (count < map->m_len && count <= blocks_to_boundary) { ext4_fsblk_t blk; blk = le32_to_cpu(*(chain[depth-1].p + count)); @@ -969,7 +967,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, /* * Okay, we need to do block allocation. */ - goal = ext4_find_goal(inode, iblock, partial); + goal = ext4_find_goal(inode, map->m_lblk, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -979,11 +977,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, * direct blocks to allocate for this branch. */ count = ext4_blks_to_allocate(partial, indirect_blks, - maxblocks, blocks_to_boundary); + map->m_len, blocks_to_boundary); /* * Block out ext4_truncate while we alter the tree */ - err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, + err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, &count, goal, offsets + (partial - chain), partial); @@ -995,18 +993,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, * may need to return -EAGAIN upwards in the worst case. --sct */ if (!err) - err = ext4_splice_branch(handle, inode, iblock, + err = ext4_splice_branch(handle, inode, map->m_lblk, partial, indirect_blks, count); if (err) goto cleanup; - set_buffer_new(bh_result); + map->m_flags |= EXT4_MAP_NEW; ext4_update_inode_fsync_trans(handle, inode, 1); got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = le32_to_cpu(chain[depth-1].key); + map->m_len = count; if (count > blocks_to_boundary) - set_buffer_boundary(bh_result); + map->m_flags |= EXT4_MAP_BOUNDARY; err = count; /* Clean up and exit */ partial = chain + depth - 1; /* the whole chain */ @@ -1016,7 +1016,6 @@ cleanup: brelse(partial->bh); partial--; } - BUFFER_TRACE(bh_result, "returned"); out: return err; } @@ -1203,15 +1202,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, } /* - * The ext4_get_blocks() function tries to look up the requested blocks, + * The ext4_map_blocks() function tries to look up the requested blocks, * and returns if the blocks are already mapped. * * Otherwise it takes the write lock of the i_data_sem and allocate blocks * and store the allocated blocks in the result buffer head and mark it * mapped. * - * If file type is extents based, it will call ext4_ext_get_blocks(), - * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping + * If file type is extents based, it will call ext4_ext_map_blocks(), + * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping * based files * * On success, it returns the number of blocks being mapped or allocate. @@ -1224,35 +1223,30 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, * * It returns the error in case of allocation failure. */ -int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, - unsigned int max_blocks, struct buffer_head *bh, - int flags) +int ext4_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags) { int retval; - clear_buffer_mapped(bh); - clear_buffer_unwritten(bh); - - ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u," - "logical block %lu\n", inode->i_ino, flags, max_blocks, - (unsigned long)block); + map->m_flags = 0; + ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," + "logical block %lu\n", inode->i_ino, flags, map->m_len, + (unsigned long) map->m_lblk); /* * Try to see if we can get the block without requesting a new * file system block. */ down_read((&EXT4_I(inode)->i_data_sem)); if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, 0); + retval = ext4_ext_map_blocks(handle, inode, map, 0); } else { - retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, - bh, 0); + retval = ext4_ind_map_blocks(handle, inode, map, 0); } up_read((&EXT4_I(inode)->i_data_sem)); - if (retval > 0 && buffer_mapped(bh)) { + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { int ret = check_block_validity(inode, "file system corruption", - block, bh->b_blocknr, retval); + map->m_lblk, map->m_pblk, retval); if (ret != 0) return ret; } @@ -1268,7 +1262,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * ext4_ext_get_block() returns th create = 0 * with buffer head unmapped. */ - if (retval > 0 && buffer_mapped(bh)) + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) return retval; /* @@ -1281,7 +1275,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * of BH_Unwritten and BH_Mapped flags being simultaneously * set on the buffer_head. */ - clear_buffer_unwritten(bh); + map->m_flags &= ~EXT4_MAP_UNWRITTEN; /* * New blocks allocate and/or writing to uninitialized extent @@ -1304,13 +1298,11 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * could have changed the inode type in between */ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, flags); + retval = ext4_ext_map_blocks(handle, inode, map, flags); } else { - retval = ext4_ind_get_blocks(handle, inode, block, - max_blocks, bh, flags); + retval = ext4_ind_map_blocks(handle, inode, map, flags); - if (retval > 0 && buffer_new(bh)) { + if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { /* * We allocated new blocks which will result in * i_data's format changing. Force the migrate @@ -1333,16 +1325,38 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, EXT4_I(inode)->i_delalloc_reserved_flag = 0; up_write((&EXT4_I(inode)->i_data_sem)); - if (retval > 0 && buffer_mapped(bh)) { + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { int ret = check_block_validity(inode, "file system " "corruption after allocation", - block, bh->b_blocknr, retval); + map->m_lblk, map->m_pblk, + retval); if (ret != 0) return ret; } return retval; } +int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, + unsigned int max_blocks, struct buffer_head *bh, + int flags) +{ + struct ext4_map_blocks map; + int ret; + + map.m_lblk = block; + map.m_len = max_blocks; + + ret = ext4_map_blocks(handle, inode, &map, flags); + if (ret < 0) + return ret; + + bh->b_blocknr = map.m_pblk; + bh->b_size = inode->i_sb->s_blocksize * map.m_len; + bh->b_bdev = inode->i_sb->s_bdev; + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + return ret; +} + /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 -- cgit v1.2.2 From 2ed886852adfcb070bf350e66a0da0d98b2f3ab5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 20:00:00 -0400 Subject: ext4: Convert callers of ext4_get_blocks() to use ext4_map_blocks() This saves a huge amount of stack space by avoiding unnecesary struct buffer_head's from being allocated on the stack. In addition, to make the code easier to understand, collapse and refactor ext4_get_block(), ext4_get_block_write(), noalloc_get_block_write(), into a single function. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 327 ++++++++++++++++++++++---------------------------------- 1 file changed, 127 insertions(+), 200 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ff2f5fd681b..0b1d7c89f93 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1336,133 +1336,112 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, return retval; } -int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, - unsigned int max_blocks, struct buffer_head *bh, - int flags) -{ - struct ext4_map_blocks map; - int ret; - - map.m_lblk = block; - map.m_len = max_blocks; - - ret = ext4_map_blocks(handle, inode, &map, flags); - if (ret < 0) - return ret; - - bh->b_blocknr = map.m_pblk; - bh->b_size = inode->i_sb->s_blocksize * map.m_len; - bh->b_bdev = inode->i_sb->s_bdev; - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; - return ret; -} - /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 -int ext4_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +static int _ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int flags) { handle_t *handle = ext4_journal_current_handle(); + struct ext4_map_blocks map; int ret = 0, started = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; int dio_credits; - if (create && !handle) { + map.m_lblk = iblock; + map.m_len = bh->b_size >> inode->i_blkbits; + + if (flags && !handle) { /* Direct IO write... */ - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); + if (map.m_len > DIO_MAX_BLOCKS) + map.m_len = DIO_MAX_BLOCKS; + dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); handle = ext4_journal_start(inode, dio_credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out; + return ret; } started = 1; } - ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, - create ? EXT4_GET_BLOCKS_CREATE : 0); + ret = ext4_map_blocks(handle, inode, &map, flags); if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); + map_bh(bh, inode->i_sb, map.m_pblk); + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + bh->b_size = inode->i_sb->s_blocksize * map.m_len; ret = 0; } if (started) ext4_journal_stop(handle); -out: return ret; } +int ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + return _ext4_get_block(inode, iblock, bh, + create ? EXT4_GET_BLOCKS_CREATE : 0); +} + /* * `handle' can be NULL if create is zero */ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, ext4_lblk_t block, int create, int *errp) { - struct buffer_head dummy; + struct ext4_map_blocks map; + struct buffer_head *bh; int fatal = 0, err; - int flags = 0; J_ASSERT(handle != NULL || create == 0); - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); - if (create) - flags |= EXT4_GET_BLOCKS_CREATE; - err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); - /* - * ext4_get_blocks() returns number of blocks mapped. 0 in - * case of a HOLE. - */ - if (err > 0) { - if (err > 1) - WARN_ON(1); - err = 0; + map.m_lblk = block; + map.m_len = 1; + err = ext4_map_blocks(handle, inode, &map, + create ? EXT4_GET_BLOCKS_CREATE : 0); + + if (err < 0) + *errp = err; + if (err <= 0) + return NULL; + *errp = 0; + + bh = sb_getblk(inode->i_sb, map.m_pblk); + if (!bh) { + *errp = -EIO; + return NULL; } - *errp = err; - if (!err && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); - if (!bh) { - *errp = -EIO; - goto err; - } - if (buffer_new(&dummy)) { - J_ASSERT(create != 0); - J_ASSERT(handle != NULL); + if (map.m_flags & EXT4_MAP_NEW) { + J_ASSERT(create != 0); + J_ASSERT(handle != NULL); - /* - * Now that we do not always journal data, we should - * keep in mind whether this should always journal the - * new buffer as metadata. For now, regular file - * writes use ext4_get_block instead, so it's not a - * problem. - */ - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - fatal = ext4_journal_get_create_access(handle, bh); - if (!fatal && !buffer_uptodate(bh)) { - memset(bh->b_data, 0, inode->i_sb->s_blocksize); - set_buffer_uptodate(bh); - } - unlock_buffer(bh); - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (!fatal) - fatal = err; - } else { - BUFFER_TRACE(bh, "not a new buffer"); - } - if (fatal) { - *errp = fatal; - brelse(bh); - bh = NULL; + /* + * Now that we do not always journal data, we should + * keep in mind whether this should always journal the + * new buffer as metadata. For now, regular file + * writes use ext4_get_block instead, so it's not a + * problem. + */ + lock_buffer(bh); + BUFFER_TRACE(bh, "call get_create_access"); + fatal = ext4_journal_get_create_access(handle, bh); + if (!fatal && !buffer_uptodate(bh)) { + memset(bh->b_data, 0, inode->i_sb->s_blocksize); + set_buffer_uptodate(bh); } - return bh; + unlock_buffer(bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (!fatal) + fatal = err; + } else { + BUFFER_TRACE(bh, "not a new buffer"); } -err: - return NULL; + if (fatal) { + *errp = fatal; + brelse(bh); + bh = NULL; + } + return bh; } struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, @@ -2050,28 +2029,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) /* * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers * - * @mpd->inode - inode to walk through - * @exbh->b_blocknr - first block on a disk - * @exbh->b_size - amount of space in bytes - * @logical - first logical block to start assignment with - * * the function goes through all passed space and put actual disk * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten */ -static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, - struct buffer_head *exbh) +static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, + struct ext4_map_blocks *map) { struct inode *inode = mpd->inode; struct address_space *mapping = inode->i_mapping; - int blocks = exbh->b_size >> inode->i_blkbits; - sector_t pblock = exbh->b_blocknr, cur_logical; + int blocks = map->m_len; + sector_t pblock = map->m_pblk, cur_logical; struct buffer_head *head, *bh; pgoff_t index, end; struct pagevec pvec; int nr_pages, i; - index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); pagevec_init(&pvec, 0); @@ -2098,17 +2072,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, /* skip blocks out of the range */ do { - if (cur_logical >= logical) + if (cur_logical >= map->m_lblk) break; cur_logical++; } while ((bh = bh->b_this_page) != head); do { - if (cur_logical >= logical + blocks) + if (cur_logical >= map->m_lblk + blocks) break; - if (buffer_delay(bh) || - buffer_unwritten(bh)) { + if (buffer_delay(bh) || buffer_unwritten(bh)) { BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); @@ -2127,7 +2100,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); - if (buffer_uninit(exbh)) + if (map->m_flags & EXT4_MAP_UNINIT) set_buffer_uninit(bh); cur_logical++; pblock++; @@ -2138,21 +2111,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, } -/* - * __unmap_underlying_blocks - just a helper function to unmap - * set of blocks described by @bh - */ -static inline void __unmap_underlying_blocks(struct inode *inode, - struct buffer_head *bh) -{ - struct block_device *bdev = inode->i_sb->s_bdev; - int blocks, i; - - blocks = bh->b_size >> inode->i_blkbits; - for (i = 0; i < blocks; i++) - unmap_underlying_metadata(bdev, bh->b_blocknr + i); -} - static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, sector_t logical, long blk_cnt) { @@ -2214,7 +2172,7 @@ static void ext4_print_free_blocks(struct inode *inode) static int mpage_da_map_blocks(struct mpage_da_data *mpd) { int err, blks, get_blocks_flags; - struct buffer_head new; + struct ext4_map_blocks map; sector_t next = mpd->b_blocknr; unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; loff_t disksize = EXT4_I(mpd->inode)->i_disksize; @@ -2255,15 +2213,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting * variables are updated after the blocks have been allocated. */ - new.b_state = 0; + map.m_lblk = next; + map.m_len = max_blocks; get_blocks_flags = EXT4_GET_BLOCKS_CREATE; if (ext4_should_dioread_nolock(mpd->inode)) get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; if (mpd->b_state & (1 << BH_Delay)) get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; - blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, - &new, get_blocks_flags); + blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); if (blks < 0) { err = blks; /* @@ -2305,10 +2263,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) } BUG_ON(blks == 0); - new.b_size = (blks << mpd->inode->i_blkbits); + if (map.m_flags & EXT4_MAP_NEW) { + struct block_device *bdev = mpd->inode->i_sb->s_bdev; + int i; - if (buffer_new(&new)) - __unmap_underlying_blocks(mpd->inode, &new); + for (i = 0; i < map.m_len; i++) + unmap_underlying_metadata(bdev, map.m_pblk + i); + } /* * If blocks are delayed marked, we need to @@ -2316,7 +2277,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) */ if ((mpd->b_state & (1 << BH_Delay)) || (mpd->b_state & (1 << BH_Unwritten))) - mpage_put_bnr_to_bhs(mpd, next, &new); + mpage_put_bnr_to_bhs(mpd, &map); if (ext4_should_order_data(mpd->inode)) { err = ext4_jbd2_file_inode(handle, mpd->inode); @@ -2534,8 +2495,9 @@ static int __mpage_da_writepage(struct page *page, * initialized properly. */ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) + struct buffer_head *bh, int create) { + struct ext4_map_blocks map; int ret = 0; sector_t invalid_block = ~((sector_t) 0xffff); @@ -2543,16 +2505,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, invalid_block = ~0; BUG_ON(create == 0); - BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); + BUG_ON(bh->b_size != inode->i_sb->s_blocksize); + + map.m_lblk = iblock; + map.m_len = 1; /* * first, we need to know whether the block is allocated already * preallocated blocks are unmapped but should treated * the same as allocated blocks. */ - ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); - if ((ret == 0) && !buffer_delay(bh_result)) { - /* the block isn't (pre)allocated yet, let's reserve space */ + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + return ret; + if (ret == 0) { + if (buffer_delay(bh)) + return 0; /* Not sure this could or should happen */ /* * XXX: __block_prepare_write() unmaps passed block, * is it OK? @@ -2562,26 +2530,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, /* not enough space to reserve */ return ret; - map_bh(bh_result, inode->i_sb, invalid_block); - set_buffer_new(bh_result); - set_buffer_delay(bh_result); - } else if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - if (buffer_unwritten(bh_result)) { - /* A delayed write to unwritten bh should - * be marked new and mapped. Mapped ensures - * that we don't do get_block multiple times - * when we write to the same offset and new - * ensures that we do proper zero out for - * partial write. - */ - set_buffer_new(bh_result); - set_buffer_mapped(bh_result); - } - ret = 0; + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); + return 0; } - return ret; + map_bh(bh, inode->i_sb, map.m_pblk); + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + + if (buffer_unwritten(bh)) { + /* A delayed write to unwritten bh should be marked + * new and mapped. Mapped ensures that we don't do + * get_block multiple times when we write to the same + * offset and new ensures that we do proper zero out + * for partial write. + */ + set_buffer_new(bh); + set_buffer_mapped(bh); + } + return 0; } /* @@ -2603,21 +2571,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, static int noalloc_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - int ret = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); - - /* - * we don't want to do block allocation in writepage - * so call get_block_wrap with create = 0 - */ - ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - return ret; + return _ext4_get_block(inode, iblock, bh_result, 0); } static int bget_one(handle_t *handle, struct buffer_head *bh) @@ -3644,46 +3599,18 @@ out: return ret; } +/* + * ext4_get_block used when preparing for a DIO write or buffer write. + * We allocate an uinitialized extent if blocks haven't been allocated. + * The extent will be converted to initialized after the IO is complete. + */ static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - handle_t *handle = ext4_journal_current_handle(); - int ret = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - int dio_credits; - int started = 0; - ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", inode->i_ino, create); - /* - * ext4_get_block in prepare for a DIO write or buffer write. - * We allocate an uinitialized extent if blocks haven't been allocated. - * The extent will be converted to initialized after IO complete. - */ - create = EXT4_GET_BLOCKS_IO_CREATE_EXT; - - if (!handle) { - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); - handle = ext4_journal_start(inode, dio_credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - started = 1; - } - - ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, - create); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - if (started) - ext4_journal_stop(handle); -out: - return ret; + return _ext4_get_block(inode, iblock, bh_result, + EXT4_GET_BLOCKS_IO_CREATE_EXT); } static void dump_completed_IO(struct inode * inode) -- cgit v1.2.2 From 24676da469f50f433baa347845639662c561d1f6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 21:00:00 -0400 Subject: ext4: Convert calls of ext4_error() to EXT4_ERROR_INODE() EXT4_ERROR_INODE() tends to provide better error information and in a more consistent format. Some errors were not even identifying the inode or directory which was corrupted, which made them not very useful. Addresses-Google-Bug: #2507977 Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 83 ++++++++++++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 45 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0b1d7c89f93..3b687725758 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, if (blk && unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), blk, 1))) { - __ext4_error(inode->i_sb, function, - "invalid block reference %u " - "in inode #%lu", blk, inode->i_ino); + ext4_error_inode(function, inode, + "invalid block reference %u", blk); return -EIO; } } @@ -1129,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode, ext4_discard_preallocations(inode); } -static int check_block_validity(struct inode *inode, const char *msg, - sector_t logical, sector_t phys, int len) +static int check_block_validity(struct inode *inode, const char *func, + struct ext4_map_blocks *map) { - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { - __ext4_error(inode->i_sb, msg, - "inode #%lu logical block %llu mapped to %llu " - "(size %d)", inode->i_ino, - (unsigned long long) logical, - (unsigned long long) phys, len); + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, + map->m_len)) { + ext4_error_inode(func, inode, + "lblock %lu mapped to illegal pblock %llu " + "(length %d)", (unsigned long) map->m_lblk, + map->m_pblk, map->m_len); return -EIO; } return 0; @@ -1245,8 +1244,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, up_read((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { - int ret = check_block_validity(inode, "file system corruption", - map->m_lblk, map->m_pblk, retval); + int ret = check_block_validity(inode, __func__, map); if (ret != 0) return ret; } @@ -1326,10 +1324,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { - int ret = check_block_validity(inode, "file system " - "corruption after allocation", - map->m_lblk, map->m_pblk, - retval); + int ret = check_block_validity(inode, + "ext4_map_blocks_after_alloc", + map); if (ret != 0) return ret; } @@ -4327,10 +4324,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, count)) { - ext4_error(inode->i_sb, "inode #%lu: " - "attempt to clear blocks %llu len %lu, invalid", - inode->i_ino, (unsigned long long) block_to_free, - count); + EXT4_ERROR_INODE(inode, "attempt to clear invalid " + "blocks %llu len %lu", + (unsigned long long) block_to_free, count); return 1; } @@ -4435,11 +4431,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) ext4_handle_dirty_metadata(handle, inode, this_bh); else - ext4_error(inode->i_sb, - "circular indirect block detected, " - "inode=%lu, block=%llu", - inode->i_ino, - (unsigned long long) this_bh->b_blocknr); + EXT4_ERROR_INODE(inode, + "circular indirect block detected at " + "block %llu", + (unsigned long long) this_bh->b_blocknr); } } @@ -4477,11 +4472,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), nr, 1)) { - ext4_error(inode->i_sb, - "indirect mapped block in inode " - "#%lu invalid (level %d, blk #%lu)", - inode->i_ino, depth, - (unsigned long) nr); + EXT4_ERROR_INODE(inode, + "invalid indirect mapped " + "block %lu (level %d)", + (unsigned long) nr, depth); break; } @@ -4493,9 +4487,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, * (should be rare). */ if (!bh) { - ext4_error(inode->i_sb, - "Read failure, inode=%lu, block=%llu", - inode->i_ino, nr); + EXT4_ERROR_INODE(inode, + "Read failure block=%llu", + (unsigned long long) nr); continue; } @@ -4810,8 +4804,8 @@ static int __ext4_get_inode_loc(struct inode *inode, bh = sb_getblk(sb, block); if (!bh) { - ext4_error(sb, "unable to read inode block - " - "inode=%lu, block=%llu", inode->i_ino, block); + EXT4_ERROR_INODE(inode, "unable to read inode block - " + "block %llu", block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -4909,8 +4903,8 @@ make_io: submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - ext4_error(sb, "unable to read inode block - inode=%lu," - " block=%llu", inode->i_ino, block); + EXT4_ERROR_INODE(inode, "unable to read inode " + "block %llu", block); brelse(bh); return -EIO; } @@ -5121,8 +5115,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { - ext4_error(sb, "bad extended attribute block %llu inode #%lu", - ei->i_file_acl, inode->i_ino); + EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", + ei->i_file_acl); ret = -EIO; goto bad_inode; } else if (ei->i_flags & EXT4_EXTENTS_FL) { @@ -5167,8 +5161,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } else { ret = -EIO; - ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", - inode->i_mode, inode->i_ino); + EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); goto bad_inode; } brelse(iloc.bh); @@ -5406,9 +5399,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - ext4_error(inode->i_sb, "IO error syncing inode, " - "inode=%lu, block=%llu", inode->i_ino, - (unsigned long long)iloc.bh->b_blocknr); + EXT4_ERROR_INODE(inode, + "IO error syncing inode (block=%llu)", + (unsigned long long) iloc.bh->b_blocknr); err = -EIO; } brelse(iloc.bh); -- cgit v1.2.2 From 12e9b892002d9af057655d35b44db8ee9243b0dc Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 16 May 2010 22:00:00 -0400 Subject: ext4: Use bitops to read/modify i_flags in struct ext4_inode_info At several places we modify EXT4_I(inode)->i_flags without holding i_mutex (ext4_do_update_inode, ...). These modifications are racy and we can lose updates to i_flags. So convert handling of i_flags to use bitops which are atomic. https://bugzilla.kernel.org/show_bug.cgi?id=15792 Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3b687725758..c2b0724e8ad 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -931,7 +931,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, int count = 0; ext4_fsblk_t first_block = 0; - J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); + J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); @@ -1059,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, */ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) { - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return ext4_ext_calc_metadata_amount(inode, lblock); return ext4_indirect_calc_metadata_amount(inode, lblock); @@ -1236,7 +1236,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * file system block. */ down_read((&EXT4_I(inode)->i_data_sem)); - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, 0); } else { retval = ext4_ind_map_blocks(handle, inode, map, 0); @@ -1295,7 +1295,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * We need to check for EXT4 here because migrate * could have changed the inode type in between */ - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags); } else { retval = ext4_ind_map_blocks(handle, inode, map, flags); @@ -2325,7 +2325,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, goto flush_it; /* check if thereserved journal credits might overflow */ - if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { + if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { if (nrblocks >= EXT4_MAX_TRANS_DATA) { /* * With non-extent format we are limited by the journal @@ -2779,7 +2779,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) * number of contiguous block. So we will limit * number of contiguous block to a sane value */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && (max_blocks > EXT4_MAX_TRANS_DATA)) max_blocks = EXT4_MAX_TRANS_DATA; @@ -3995,7 +3995,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); @@ -4631,12 +4631,12 @@ void ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return; - EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ext4_ext_truncate(inode); return; } @@ -5473,7 +5473,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if (attr->ia_size > sbi->s_bitmap_maxbytes) { @@ -5486,7 +5486,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE && (attr->ia_size < inode->i_size || - (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { + (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { handle_t *handle; handle = ext4_journal_start(inode, 3); @@ -5518,7 +5518,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } } /* ext4_truncate will clear the flag */ - if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) + if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) ext4_truncate(inode); } @@ -5594,7 +5594,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return ext4_indirect_trans_blocks(inode, nrblocks, chunk); return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); } @@ -5929,9 +5929,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) */ if (val) - EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; + ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); else - EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); -- cgit v1.2.2 From 5a58ec8766e0ce98fd585eb404b3e56935afafe6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 17 May 2010 02:00:00 -0400 Subject: ext4: Add a missing trace hook Commit f8ec9d6837241865cf99bed97bb99f4399fd5a03 added a trace event ext4_da_release_space, but didn't add some corresponding trace hook. Signed-off-by: Li Zefan Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2b0724e8ad..8e45331b656 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1893,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + trace_ext4_da_release_space(inode, to_free); if (unlikely(to_free > ei->i_reserved_data_blocks)) { /* * if there aren't enough reserved blocks, then the -- cgit v1.2.2 From 60e6679e28518ccd67169c4a539d8cc7490eb8a6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 17 May 2010 07:00:00 -0400 Subject: ext4: Drop whitespace at end of lines This patch was generated using: #!/usr/bin/perl -i while (<>) { s/[ ]+$//; print; } Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8e45331b656..502b07dc70d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -784,7 +784,7 @@ failed: /* Allocation failed, free what we already allocated */ ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); for (i = 1; i <= n ; i++) { - /* + /* * branch[i].bh is newly allocated, so there is no * need to revoke the block, which is why we don't * need to set EXT4_FREE_BLOCKS_METADATA. @@ -874,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, err_out: for (i = 1; i <= num; i++) { - /* + /* * branch[i].bh is newly allocated, so there is no * need to revoke the block, which is why we don't * need to set EXT4_FREE_BLOCKS_METADATA. -- cgit v1.2.2