diff options
Diffstat (limited to 'fs/ext4/inode.c')
| -rw-r--r-- | fs/ext4/inode.c | 258 |
1 files changed, 141 insertions, 117 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 42272d67955a..4b8debeb3965 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
| 167 | /* | 167 | /* |
| 168 | * Called at the last iput() if i_nlink is zero. | 168 | * Called at the last iput() if i_nlink is zero. |
| 169 | */ | 169 | */ |
| 170 | void ext4_delete_inode(struct inode *inode) | 170 | void ext4_evict_inode(struct inode *inode) |
| 171 | { | 171 | { |
| 172 | handle_t *handle; | 172 | handle_t *handle; |
| 173 | int err; | 173 | int err; |
| 174 | 174 | ||
| 175 | if (inode->i_nlink) { | ||
| 176 | truncate_inode_pages(&inode->i_data, 0); | ||
| 177 | goto no_delete; | ||
| 178 | } | ||
| 179 | |||
| 175 | if (!is_bad_inode(inode)) | 180 | if (!is_bad_inode(inode)) |
| 176 | dquot_initialize(inode); | 181 | dquot_initialize(inode); |
| 177 | 182 | ||
| @@ -221,6 +226,7 @@ void ext4_delete_inode(struct inode *inode) | |||
| 221 | "couldn't extend journal (err %d)", err); | 226 | "couldn't extend journal (err %d)", err); |
| 222 | stop_handle: | 227 | stop_handle: |
| 223 | ext4_journal_stop(handle); | 228 | ext4_journal_stop(handle); |
| 229 | ext4_orphan_del(NULL, inode); | ||
| 224 | goto no_delete; | 230 | goto no_delete; |
| 225 | } | 231 | } |
| 226 | } | 232 | } |
| @@ -245,13 +251,13 @@ void ext4_delete_inode(struct inode *inode) | |||
| 245 | */ | 251 | */ |
| 246 | if (ext4_mark_inode_dirty(handle, inode)) | 252 | if (ext4_mark_inode_dirty(handle, inode)) |
| 247 | /* If that failed, just do the required in-core inode clear. */ | 253 | /* If that failed, just do the required in-core inode clear. */ |
| 248 | clear_inode(inode); | 254 | ext4_clear_inode(inode); |
| 249 | else | 255 | else |
| 250 | ext4_free_inode(handle, inode); | 256 | ext4_free_inode(handle, inode); |
| 251 | ext4_journal_stop(handle); | 257 | ext4_journal_stop(handle); |
| 252 | return; | 258 | return; |
| 253 | no_delete: | 259 | no_delete: |
| 254 | clear_inode(inode); /* We must guarantee clearing of inode... */ | 260 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
| 255 | } | 261 | } |
| 256 | 262 | ||
| 257 | typedef struct { | 263 | typedef struct { |
| @@ -337,9 +343,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
| 337 | return n; | 343 | return n; |
| 338 | } | 344 | } |
| 339 | 345 | ||
| 340 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 346 | static int __ext4_check_blockref(const char *function, unsigned int line, |
| 347 | struct inode *inode, | ||
| 341 | __le32 *p, unsigned int max) | 348 | __le32 *p, unsigned int max) |
| 342 | { | 349 | { |
| 350 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
| 343 | __le32 *bref = p; | 351 | __le32 *bref = p; |
| 344 | unsigned int blk; | 352 | unsigned int blk; |
| 345 | 353 | ||
| @@ -348,8 +356,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
| 348 | if (blk && | 356 | if (blk && |
| 349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 357 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
| 350 | blk, 1))) { | 358 | blk, 1))) { |
| 351 | ext4_error_inode(function, inode, | 359 | es->s_last_error_block = cpu_to_le64(blk); |
| 352 | "invalid block reference %u", blk); | 360 | ext4_error_inode(inode, function, line, blk, |
| 361 | "invalid block"); | ||
| 353 | return -EIO; | 362 | return -EIO; |
| 354 | } | 363 | } |
| 355 | } | 364 | } |
| @@ -358,11 +367,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
| 358 | 367 | ||
| 359 | 368 | ||
| 360 | #define ext4_check_indirect_blockref(inode, bh) \ | 369 | #define ext4_check_indirect_blockref(inode, bh) \ |
| 361 | __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ | 370 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
| 371 | (__le32 *)(bh)->b_data, \ | ||
| 362 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | 372 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) |
| 363 | 373 | ||
| 364 | #define ext4_check_inode_blockref(inode) \ | 374 | #define ext4_check_inode_blockref(inode) \ |
| 365 | __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ | 375 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
| 376 | EXT4_I(inode)->i_data, \ | ||
| 366 | EXT4_NDIR_BLOCKS) | 377 | EXT4_NDIR_BLOCKS) |
| 367 | 378 | ||
| 368 | /** | 379 | /** |
| @@ -1128,20 +1139,24 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 1128 | ext4_discard_preallocations(inode); | 1139 | ext4_discard_preallocations(inode); |
| 1129 | } | 1140 | } |
| 1130 | 1141 | ||
| 1131 | static int check_block_validity(struct inode *inode, const char *func, | 1142 | static int __check_block_validity(struct inode *inode, const char *func, |
| 1143 | unsigned int line, | ||
| 1132 | struct ext4_map_blocks *map) | 1144 | struct ext4_map_blocks *map) |
| 1133 | { | 1145 | { |
| 1134 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, | 1146 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
| 1135 | map->m_len)) { | 1147 | map->m_len)) { |
| 1136 | ext4_error_inode(func, inode, | 1148 | ext4_error_inode(inode, func, line, map->m_pblk, |
| 1137 | "lblock %lu mapped to illegal pblock %llu " | 1149 | "lblock %lu mapped to illegal pblock " |
| 1138 | "(length %d)", (unsigned long) map->m_lblk, | 1150 | "(length %d)", (unsigned long) map->m_lblk, |
| 1139 | map->m_pblk, map->m_len); | 1151 | map->m_len); |
| 1140 | return -EIO; | 1152 | return -EIO; |
| 1141 | } | 1153 | } |
| 1142 | return 0; | 1154 | return 0; |
| 1143 | } | 1155 | } |
| 1144 | 1156 | ||
| 1157 | #define check_block_validity(inode, map) \ | ||
| 1158 | __check_block_validity((inode), __func__, __LINE__, (map)) | ||
| 1159 | |||
| 1145 | /* | 1160 | /* |
| 1146 | * Return the number of contiguous dirty pages in a given inode | 1161 | * Return the number of contiguous dirty pages in a given inode |
| 1147 | * starting at page frame idx. | 1162 | * starting at page frame idx. |
| @@ -1244,7 +1259,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 1244 | up_read((&EXT4_I(inode)->i_data_sem)); | 1259 | up_read((&EXT4_I(inode)->i_data_sem)); |
| 1245 | 1260 | ||
| 1246 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1261 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
| 1247 | int ret = check_block_validity(inode, __func__, map); | 1262 | int ret = check_block_validity(inode, map); |
| 1248 | if (ret != 0) | 1263 | if (ret != 0) |
| 1249 | return ret; | 1264 | return ret; |
| 1250 | } | 1265 | } |
| @@ -1324,9 +1339,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 1324 | 1339 | ||
| 1325 | up_write((&EXT4_I(inode)->i_data_sem)); | 1340 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 1326 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1341 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
| 1327 | int ret = check_block_validity(inode, | 1342 | int ret = check_block_validity(inode, map); |
| 1328 | "ext4_map_blocks_after_alloc", | ||
| 1329 | map); | ||
| 1330 | if (ret != 0) | 1343 | if (ret != 0) |
| 1331 | return ret; | 1344 | return ret; |
| 1332 | } | 1345 | } |
| @@ -1519,9 +1532,25 @@ static int walk_page_buffers(handle_t *handle, | |||
| 1519 | static int do_journal_get_write_access(handle_t *handle, | 1532 | static int do_journal_get_write_access(handle_t *handle, |
| 1520 | struct buffer_head *bh) | 1533 | struct buffer_head *bh) |
| 1521 | { | 1534 | { |
| 1535 | int dirty = buffer_dirty(bh); | ||
| 1536 | int ret; | ||
| 1537 | |||
| 1522 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
| 1523 | return 0; | 1539 | return 0; |
| 1524 | return ext4_journal_get_write_access(handle, bh); | 1540 | /* |
| 1541 | * __block_prepare_write() could have dirtied some buffers. Clean | ||
| 1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | ||
| 1543 | * otherwise about fs integrity issues. Setting of the dirty bit | ||
| 1544 | * by __block_prepare_write() isn't a real problem here as we clear | ||
| 1545 | * the bit before releasing a page lock and thus writeback cannot | ||
| 1546 | * ever write the buffer. | ||
| 1547 | */ | ||
| 1548 | if (dirty) | ||
| 1549 | clear_buffer_dirty(bh); | ||
| 1550 | ret = ext4_journal_get_write_access(handle, bh); | ||
| 1551 | if (!ret && dirty) | ||
| 1552 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
| 1553 | return ret; | ||
| 1525 | } | 1554 | } |
| 1526 | 1555 | ||
| 1527 | /* | 1556 | /* |
| @@ -1578,11 +1607,9 @@ retry: | |||
| 1578 | *pagep = page; | 1607 | *pagep = page; |
| 1579 | 1608 | ||
| 1580 | if (ext4_should_dioread_nolock(inode)) | 1609 | if (ext4_should_dioread_nolock(inode)) |
| 1581 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1610 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
| 1582 | fsdata, ext4_get_block_write); | ||
| 1583 | else | 1611 | else |
| 1584 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1612 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
| 1585 | fsdata, ext4_get_block); | ||
| 1586 | 1613 | ||
| 1587 | if (!ret && ext4_should_journal_data(inode)) { | 1614 | if (!ret && ext4_should_journal_data(inode)) { |
| 1588 | ret = walk_page_buffers(handle, page_buffers(page), | 1615 | ret = walk_page_buffers(handle, page_buffers(page), |
| @@ -1593,7 +1620,7 @@ retry: | |||
| 1593 | unlock_page(page); | 1620 | unlock_page(page); |
| 1594 | page_cache_release(page); | 1621 | page_cache_release(page); |
| 1595 | /* | 1622 | /* |
| 1596 | * block_write_begin may have instantiated a few blocks | 1623 | * __block_write_begin may have instantiated a few blocks |
| 1597 | * outside i_size. Trim these off again. Don't need | 1624 | * outside i_size. Trim these off again. Don't need |
| 1598 | * i_size_read because we hold i_mutex. | 1625 | * i_size_read because we hold i_mutex. |
| 1599 | * | 1626 | * |
| @@ -2194,7 +2221,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2194 | BUG_ON(!handle); | 2221 | BUG_ON(!handle); |
| 2195 | 2222 | ||
| 2196 | /* | 2223 | /* |
| 2197 | * Call ext4_get_blocks() to allocate any delayed allocation | 2224 | * Call ext4_map_blocks() to allocate any delayed allocation |
| 2198 | * blocks, or to convert an uninitialized extent to be | 2225 | * blocks, or to convert an uninitialized extent to be |
| 2199 | * initialized (in the case where we have written into | 2226 | * initialized (in the case where we have written into |
| 2200 | * one or more preallocated blocks). | 2227 | * one or more preallocated blocks). |
| @@ -2203,7 +2230,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2203 | * indicate that we are on the delayed allocation path. This | 2230 | * indicate that we are on the delayed allocation path. This |
| 2204 | * affects functions in many different parts of the allocation | 2231 | * affects functions in many different parts of the allocation |
| 2205 | * call path. This flag exists primarily because we don't | 2232 | * call path. This flag exists primarily because we don't |
| 2206 | * want to change *many* call functions, so ext4_get_blocks() | 2233 | * want to change *many* call functions, so ext4_map_blocks() |
| 2207 | * will set the magic i_delalloc_reserved_flag once the | 2234 | * will set the magic i_delalloc_reserved_flag once the |
| 2208 | * inode's allocation semaphore is taken. | 2235 | * inode's allocation semaphore is taken. |
| 2209 | * | 2236 | * |
| @@ -2221,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2221 | 2248 | ||
| 2222 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | 2249 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
| 2223 | if (blks < 0) { | 2250 | if (blks < 0) { |
| 2251 | struct super_block *sb = mpd->inode->i_sb; | ||
| 2252 | |||
| 2224 | err = blks; | 2253 | err = blks; |
| 2225 | /* | 2254 | /* |
| 2226 | * If get block returns with error we simply | 2255 | * If get block returns with error we simply |
| @@ -2231,7 +2260,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2231 | return 0; | 2260 | return 0; |
| 2232 | 2261 | ||
| 2233 | if (err == -ENOSPC && | 2262 | if (err == -ENOSPC && |
| 2234 | ext4_count_free_blocks(mpd->inode->i_sb)) { | 2263 | ext4_count_free_blocks(sb)) { |
| 2235 | mpd->retval = err; | 2264 | mpd->retval = err; |
| 2236 | return 0; | 2265 | return 0; |
| 2237 | } | 2266 | } |
| @@ -2243,16 +2272,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2243 | * writepage and writepages will again try to write | 2272 | * writepage and writepages will again try to write |
| 2244 | * the same. | 2273 | * the same. |
| 2245 | */ | 2274 | */ |
| 2246 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, | 2275 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { |
| 2247 | "delayed block allocation failed for inode %lu at " | 2276 | ext4_msg(sb, KERN_CRIT, |
| 2248 | "logical offset %llu with max blocks %zd with " | 2277 | "delayed block allocation failed for inode %lu " |
| 2249 | "error %d", mpd->inode->i_ino, | 2278 | "at logical offset %llu with max blocks %zd " |
| 2250 | (unsigned long long) next, | 2279 | "with error %d", mpd->inode->i_ino, |
| 2251 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2280 | (unsigned long long) next, |
| 2252 | printk(KERN_CRIT "This should not happen!! " | 2281 | mpd->b_size >> mpd->inode->i_blkbits, err); |
| 2253 | "Data will be lost\n"); | 2282 | ext4_msg(sb, KERN_CRIT, |
| 2254 | if (err == -ENOSPC) { | 2283 | "This should not happen!! Data will be lost\n"); |
| 2255 | ext4_print_free_blocks(mpd->inode); | 2284 | if (err == -ENOSPC) |
| 2285 | ext4_print_free_blocks(mpd->inode); | ||
| 2256 | } | 2286 | } |
| 2257 | /* invalidate all the pages */ | 2287 | /* invalidate all the pages */ |
| 2258 | ext4_da_block_invalidatepages(mpd, next, | 2288 | ext4_da_block_invalidatepages(mpd, next, |
| @@ -2320,7 +2350,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
| 2320 | * XXX Don't go larger than mballoc is willing to allocate | 2350 | * XXX Don't go larger than mballoc is willing to allocate |
| 2321 | * This is a stopgap solution. We eventually need to fold | 2351 | * This is a stopgap solution. We eventually need to fold |
| 2322 | * mpage_da_submit_io() into this function and then call | 2352 | * mpage_da_submit_io() into this function and then call |
| 2323 | * ext4_get_blocks() multiple times in a loop | 2353 | * ext4_map_blocks() multiple times in a loop |
| 2324 | */ | 2354 | */ |
| 2325 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | 2355 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) |
| 2326 | goto flush_it; | 2356 | goto flush_it; |
| @@ -2553,18 +2583,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
| 2553 | /* | 2583 | /* |
| 2554 | * This function is used as a standard get_block_t calback function | 2584 | * This function is used as a standard get_block_t calback function |
| 2555 | * when there is no desire to allocate any blocks. It is used as a | 2585 | * when there is no desire to allocate any blocks. It is used as a |
| 2556 | * callback function for block_prepare_write(), nobh_writepage(), and | 2586 | * callback function for block_prepare_write() and block_write_full_page(). |
| 2557 | * block_write_full_page(). These functions should only try to map a | 2587 | * These functions should only try to map a single block at a time. |
| 2558 | * single block at a time. | ||
| 2559 | * | 2588 | * |
| 2560 | * Since this function doesn't do block allocations even if the caller | 2589 | * Since this function doesn't do block allocations even if the caller |
| 2561 | * requests it by passing in create=1, it is critically important that | 2590 | * requests it by passing in create=1, it is critically important that |
| 2562 | * any caller checks to make sure that any buffer heads are returned | 2591 | * any caller checks to make sure that any buffer heads are returned |
| 2563 | * by this function are either all already mapped or marked for | 2592 | * by this function are either all already mapped or marked for |
| 2564 | * delayed allocation before calling nobh_writepage() or | 2593 | * delayed allocation before calling block_write_full_page(). Otherwise, |
| 2565 | * block_write_full_page(). Otherwise, b_blocknr could be left | 2594 | * b_blocknr could be left unitialized, and the page write functions will |
| 2566 | * unitialized, and the page write functions will be taken by | 2595 | * be taken by surprise. |
| 2567 | * surprise. | ||
| 2568 | */ | 2596 | */ |
| 2569 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2597 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
| 2570 | struct buffer_head *bh_result, int create) | 2598 | struct buffer_head *bh_result, int create) |
| @@ -2749,9 +2777,7 @@ static int ext4_writepage(struct page *page, | |||
| 2749 | return __ext4_journalled_writepage(page, len); | 2777 | return __ext4_journalled_writepage(page, len); |
| 2750 | } | 2778 | } |
| 2751 | 2779 | ||
| 2752 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2780 | if (page_bufs && buffer_uninit(page_bufs)) { |
| 2753 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | ||
| 2754 | else if (page_bufs && buffer_uninit(page_bufs)) { | ||
| 2755 | ext4_set_bh_endio(page_bufs, inode); | 2781 | ext4_set_bh_endio(page_bufs, inode); |
| 2756 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2782 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
| 2757 | wbc, ext4_end_io_buffer_write); | 2783 | wbc, ext4_end_io_buffer_write); |
| @@ -3146,13 +3172,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
| 3146 | int ret, retries = 0; | 3172 | int ret, retries = 0; |
| 3147 | struct page *page; | 3173 | struct page *page; |
| 3148 | pgoff_t index; | 3174 | pgoff_t index; |
| 3149 | unsigned from, to; | ||
| 3150 | struct inode *inode = mapping->host; | 3175 | struct inode *inode = mapping->host; |
| 3151 | handle_t *handle; | 3176 | handle_t *handle; |
| 3152 | 3177 | ||
| 3153 | index = pos >> PAGE_CACHE_SHIFT; | 3178 | index = pos >> PAGE_CACHE_SHIFT; |
| 3154 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
| 3155 | to = from + len; | ||
| 3156 | 3179 | ||
| 3157 | if (ext4_nonda_switch(inode->i_sb)) { | 3180 | if (ext4_nonda_switch(inode->i_sb)) { |
| 3158 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | 3181 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; |
| @@ -3185,8 +3208,7 @@ retry: | |||
| 3185 | } | 3208 | } |
| 3186 | *pagep = page; | 3209 | *pagep = page; |
| 3187 | 3210 | ||
| 3188 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 3211 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
| 3189 | ext4_da_get_block_prep); | ||
| 3190 | if (ret < 0) { | 3212 | if (ret < 0) { |
| 3191 | unlock_page(page); | 3213 | unlock_page(page); |
| 3192 | ext4_journal_stop(handle); | 3214 | ext4_journal_stop(handle); |
| @@ -3545,15 +3567,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
| 3545 | 3567 | ||
| 3546 | retry: | 3568 | retry: |
| 3547 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 3569 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
| 3548 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 3570 | ret = __blockdev_direct_IO(rw, iocb, inode, |
| 3549 | inode->i_sb->s_bdev, iov, | 3571 | inode->i_sb->s_bdev, iov, |
| 3550 | offset, nr_segs, | 3572 | offset, nr_segs, |
| 3551 | ext4_get_block, NULL); | 3573 | ext4_get_block, NULL, NULL, 0); |
| 3552 | else | 3574 | else { |
| 3553 | ret = blockdev_direct_IO(rw, iocb, inode, | 3575 | ret = blockdev_direct_IO(rw, iocb, inode, |
| 3554 | inode->i_sb->s_bdev, iov, | 3576 | inode->i_sb->s_bdev, iov, |
| 3555 | offset, nr_segs, | 3577 | offset, nr_segs, |
| 3556 | ext4_get_block, NULL); | 3578 | ext4_get_block, NULL); |
| 3579 | |||
| 3580 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
| 3581 | loff_t isize = i_size_read(inode); | ||
| 3582 | loff_t end = offset + iov_length(iov, nr_segs); | ||
| 3583 | |||
| 3584 | if (end > isize) | ||
| 3585 | vmtruncate(inode, isize); | ||
| 3586 | } | ||
| 3587 | } | ||
| 3557 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3588 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
| 3558 | goto retry; | 3589 | goto retry; |
| 3559 | 3590 | ||
| @@ -3668,6 +3699,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io) | |||
| 3668 | return ret; | 3699 | return ret; |
| 3669 | } | 3700 | } |
| 3670 | 3701 | ||
| 3702 | if (io->iocb) | ||
| 3703 | aio_complete(io->iocb, io->result, 0); | ||
| 3671 | /* clear the DIO AIO unwritten flag */ | 3704 | /* clear the DIO AIO unwritten flag */ |
| 3672 | io->flag = 0; | 3705 | io->flag = 0; |
| 3673 | return ret; | 3706 | return ret; |
| @@ -3767,6 +3800,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | |||
| 3767 | io->offset = 0; | 3800 | io->offset = 0; |
| 3768 | io->size = 0; | 3801 | io->size = 0; |
| 3769 | io->page = NULL; | 3802 | io->page = NULL; |
| 3803 | io->iocb = NULL; | ||
| 3804 | io->result = 0; | ||
| 3770 | INIT_WORK(&io->work, ext4_end_io_work); | 3805 | INIT_WORK(&io->work, ext4_end_io_work); |
| 3771 | INIT_LIST_HEAD(&io->list); | 3806 | INIT_LIST_HEAD(&io->list); |
| 3772 | } | 3807 | } |
| @@ -3775,7 +3810,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | |||
| 3775 | } | 3810 | } |
| 3776 | 3811 | ||
| 3777 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3812 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
| 3778 | ssize_t size, void *private) | 3813 | ssize_t size, void *private, int ret, |
| 3814 | bool is_async) | ||
| 3779 | { | 3815 | { |
| 3780 | ext4_io_end_t *io_end = iocb->private; | 3816 | ext4_io_end_t *io_end = iocb->private; |
| 3781 | struct workqueue_struct *wq; | 3817 | struct workqueue_struct *wq; |
| @@ -3784,7 +3820,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3784 | 3820 | ||
| 3785 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3821 | /* if not async direct IO or dio with 0 bytes write, just return */ |
| 3786 | if (!io_end || !size) | 3822 | if (!io_end || !size) |
| 3787 | return; | 3823 | goto out; |
| 3788 | 3824 | ||
| 3789 | ext_debug("ext4_end_io_dio(): io_end 0x%p" | 3825 | ext_debug("ext4_end_io_dio(): io_end 0x%p" |
| 3790 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 3826 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", |
| @@ -3795,12 +3831,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3795 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3831 | if (io_end->flag != EXT4_IO_UNWRITTEN){ |
| 3796 | ext4_free_io_end(io_end); | 3832 | ext4_free_io_end(io_end); |
| 3797 | iocb->private = NULL; | 3833 | iocb->private = NULL; |
| 3834 | out: | ||
| 3835 | if (is_async) | ||
| 3836 | aio_complete(iocb, ret, 0); | ||
| 3798 | return; | 3837 | return; |
| 3799 | } | 3838 | } |
| 3800 | 3839 | ||
| 3801 | io_end->offset = offset; | 3840 | io_end->offset = offset; |
| 3802 | io_end->size = size; | 3841 | io_end->size = size; |
| 3803 | io_end->flag = EXT4_IO_UNWRITTEN; | 3842 | if (is_async) { |
| 3843 | io_end->iocb = iocb; | ||
| 3844 | io_end->result = ret; | ||
| 3845 | } | ||
| 3804 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3846 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
| 3805 | 3847 | ||
| 3806 | /* queue the work to convert unwritten extents to written */ | 3848 | /* queue the work to convert unwritten extents to written */ |
| @@ -3937,7 +3979,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3937 | return -ENOMEM; | 3979 | return -ENOMEM; |
| 3938 | /* | 3980 | /* |
| 3939 | * we save the io structure for current async | 3981 | * we save the io structure for current async |
| 3940 | * direct IO, so that later ext4_get_blocks() | 3982 | * direct IO, so that later ext4_map_blocks() |
| 3941 | * could flag the io structure whether there | 3983 | * could flag the io structure whether there |
| 3942 | * is a unwritten extents needs to be converted | 3984 | * is a unwritten extents needs to be converted |
| 3943 | * when IO is completed. | 3985 | * when IO is completed. |
| @@ -4128,17 +4170,6 @@ int ext4_block_truncate_page(handle_t *handle, | |||
| 4128 | length = blocksize - (offset & (blocksize - 1)); | 4170 | length = blocksize - (offset & (blocksize - 1)); |
| 4129 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 4171 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
| 4130 | 4172 | ||
| 4131 | /* | ||
| 4132 | * For "nobh" option, we can only work if we don't need to | ||
| 4133 | * read-in the page - otherwise we create buffers to do the IO. | ||
| 4134 | */ | ||
| 4135 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | ||
| 4136 | ext4_should_writeback_data(inode) && PageUptodate(page)) { | ||
| 4137 | zero_user(page, offset, length); | ||
| 4138 | set_page_dirty(page); | ||
| 4139 | goto unlock; | ||
| 4140 | } | ||
| 4141 | |||
| 4142 | if (!page_has_buffers(page)) | 4173 | if (!page_has_buffers(page)) |
| 4143 | create_empty_buffers(page, blocksize, 0); | 4174 | create_empty_buffers(page, blocksize, 0); |
| 4144 | 4175 | ||
| @@ -4488,9 +4519,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 4488 | * (should be rare). | 4519 | * (should be rare). |
| 4489 | */ | 4520 | */ |
| 4490 | if (!bh) { | 4521 | if (!bh) { |
| 4491 | EXT4_ERROR_INODE(inode, | 4522 | EXT4_ERROR_INODE_BLOCK(inode, nr, |
| 4492 | "Read failure block=%llu", | 4523 | "Read failure"); |
| 4493 | (unsigned long long) nr); | ||
| 4494 | continue; | 4524 | continue; |
| 4495 | } | 4525 | } |
| 4496 | 4526 | ||
| @@ -4502,27 +4532,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 4502 | depth); | 4532 | depth); |
| 4503 | 4533 | ||
| 4504 | /* | 4534 | /* |
| 4505 | * We've probably journalled the indirect block several | ||
| 4506 | * times during the truncate. But it's no longer | ||
| 4507 | * needed and we now drop it from the transaction via | ||
| 4508 | * jbd2_journal_revoke(). | ||
| 4509 | * | ||
| 4510 | * That's easy if it's exclusively part of this | ||
| 4511 | * transaction. But if it's part of the committing | ||
| 4512 | * transaction then jbd2_journal_forget() will simply | ||
| 4513 | * brelse() it. That means that if the underlying | ||
| 4514 | * block is reallocated in ext4_get_block(), | ||
| 4515 | * unmap_underlying_metadata() will find this block | ||
| 4516 | * and will try to get rid of it. damn, damn. | ||
| 4517 | * | ||
| 4518 | * If this block has already been committed to the | ||
| 4519 | * journal, a revoke record will be written. And | ||
| 4520 | * revoke records must be emitted *before* clearing | ||
| 4521 | * this block's bit in the bitmaps. | ||
| 4522 | */ | ||
| 4523 | ext4_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
| 4524 | |||
| 4525 | /* | ||
| 4526 | * Everything below this this pointer has been | 4535 | * Everything below this this pointer has been |
| 4527 | * released. Now let this top-of-subtree go. | 4536 | * released. Now let this top-of-subtree go. |
| 4528 | * | 4537 | * |
| @@ -4546,8 +4555,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 4546 | blocks_for_truncate(inode)); | 4555 | blocks_for_truncate(inode)); |
| 4547 | } | 4556 | } |
| 4548 | 4557 | ||
| 4558 | /* | ||
| 4559 | * The forget flag here is critical because if | ||
| 4560 | * we are journaling (and not doing data | ||
| 4561 | * journaling), we have to make sure a revoke | ||
| 4562 | * record is written to prevent the journal | ||
| 4563 | * replay from overwriting the (former) | ||
| 4564 | * indirect block if it gets reallocated as a | ||
| 4565 | * data block. This must happen in the same | ||
| 4566 | * transaction where the data blocks are | ||
| 4567 | * actually freed. | ||
| 4568 | */ | ||
| 4549 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4569 | ext4_free_blocks(handle, inode, 0, nr, 1, |
| 4550 | EXT4_FREE_BLOCKS_METADATA); | 4570 | EXT4_FREE_BLOCKS_METADATA| |
| 4571 | EXT4_FREE_BLOCKS_FORGET); | ||
| 4551 | 4572 | ||
| 4552 | if (parent_bh) { | 4573 | if (parent_bh) { |
| 4553 | /* | 4574 | /* |
| @@ -4805,8 +4826,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
| 4805 | 4826 | ||
| 4806 | bh = sb_getblk(sb, block); | 4827 | bh = sb_getblk(sb, block); |
| 4807 | if (!bh) { | 4828 | if (!bh) { |
| 4808 | EXT4_ERROR_INODE(inode, "unable to read inode block - " | 4829 | EXT4_ERROR_INODE_BLOCK(inode, block, |
| 4809 | "block %llu", block); | 4830 | "unable to read itable block"); |
| 4810 | return -EIO; | 4831 | return -EIO; |
| 4811 | } | 4832 | } |
| 4812 | if (!buffer_uptodate(bh)) { | 4833 | if (!buffer_uptodate(bh)) { |
| @@ -4904,8 +4925,8 @@ make_io: | |||
| 4904 | submit_bh(READ_META, bh); | 4925 | submit_bh(READ_META, bh); |
| 4905 | wait_on_buffer(bh); | 4926 | wait_on_buffer(bh); |
| 4906 | if (!buffer_uptodate(bh)) { | 4927 | if (!buffer_uptodate(bh)) { |
| 4907 | EXT4_ERROR_INODE(inode, "unable to read inode " | 4928 | EXT4_ERROR_INODE_BLOCK(inode, block, |
| 4908 | "block %llu", block); | 4929 | "unable to read itable block"); |
| 4909 | brelse(bh); | 4930 | brelse(bh); |
| 4910 | return -EIO; | 4931 | return -EIO; |
| 4911 | } | 4932 | } |
| @@ -4976,7 +4997,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | |||
| 4976 | /* we are using combined 48 bit field */ | 4997 | /* we are using combined 48 bit field */ |
| 4977 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | | 4998 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | |
| 4978 | le32_to_cpu(raw_inode->i_blocks_lo); | 4999 | le32_to_cpu(raw_inode->i_blocks_lo); |
| 4979 | if (ei->i_flags & EXT4_HUGE_FILE_FL) { | 5000 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { |
| 4980 | /* i_blocks represent file system block size */ | 5001 | /* i_blocks represent file system block size */ |
| 4981 | return i_blocks << (inode->i_blkbits - 9); | 5002 | return i_blocks << (inode->i_blkbits - 9); |
| 4982 | } else { | 5003 | } else { |
| @@ -5072,7 +5093,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 5072 | transaction_t *transaction; | 5093 | transaction_t *transaction; |
| 5073 | tid_t tid; | 5094 | tid_t tid; |
| 5074 | 5095 | ||
| 5075 | spin_lock(&journal->j_state_lock); | 5096 | read_lock(&journal->j_state_lock); |
| 5076 | if (journal->j_running_transaction) | 5097 | if (journal->j_running_transaction) |
| 5077 | transaction = journal->j_running_transaction; | 5098 | transaction = journal->j_running_transaction; |
| 5078 | else | 5099 | else |
| @@ -5081,7 +5102,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 5081 | tid = transaction->t_tid; | 5102 | tid = transaction->t_tid; |
| 5082 | else | 5103 | else |
| 5083 | tid = journal->j_commit_sequence; | 5104 | tid = journal->j_commit_sequence; |
| 5084 | spin_unlock(&journal->j_state_lock); | 5105 | read_unlock(&journal->j_state_lock); |
| 5085 | ei->i_sync_tid = tid; | 5106 | ei->i_sync_tid = tid; |
| 5086 | ei->i_datasync_tid = tid; | 5107 | ei->i_datasync_tid = tid; |
| 5087 | } | 5108 | } |
| @@ -5126,7 +5147,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 5126 | ei->i_file_acl); | 5147 | ei->i_file_acl); |
| 5127 | ret = -EIO; | 5148 | ret = -EIO; |
| 5128 | goto bad_inode; | 5149 | goto bad_inode; |
| 5129 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | 5150 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
| 5130 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 5151 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
| 5131 | (S_ISLNK(inode->i_mode) && | 5152 | (S_ISLNK(inode->i_mode) && |
| 5132 | !ext4_inode_is_fast_symlink(inode))) | 5153 | !ext4_inode_is_fast_symlink(inode))) |
| @@ -5406,9 +5427,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 5406 | if (wbc->sync_mode == WB_SYNC_ALL) | 5427 | if (wbc->sync_mode == WB_SYNC_ALL) |
| 5407 | sync_dirty_buffer(iloc.bh); | 5428 | sync_dirty_buffer(iloc.bh); |
| 5408 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5429 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
| 5409 | EXT4_ERROR_INODE(inode, | 5430 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
| 5410 | "IO error syncing inode (block=%llu)", | 5431 | "IO error syncing inode"); |
| 5411 | (unsigned long long) iloc.bh->b_blocknr); | ||
| 5412 | err = -EIO; | 5432 | err = -EIO; |
| 5413 | } | 5433 | } |
| 5414 | brelse(iloc.bh); | 5434 | brelse(iloc.bh); |
| @@ -5483,10 +5503,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5483 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 5503 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
| 5484 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5504 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 5485 | 5505 | ||
| 5486 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | 5506 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
| 5487 | error = -EFBIG; | 5507 | return -EFBIG; |
| 5488 | goto err_out; | ||
| 5489 | } | ||
| 5490 | } | 5508 | } |
| 5491 | } | 5509 | } |
| 5492 | 5510 | ||
| @@ -5529,11 +5547,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5529 | ext4_truncate(inode); | 5547 | ext4_truncate(inode); |
| 5530 | } | 5548 | } |
| 5531 | 5549 | ||
| 5532 | rc = inode_setattr(inode, attr); | 5550 | if ((attr->ia_valid & ATTR_SIZE) && |
| 5551 | attr->ia_size != i_size_read(inode)) | ||
| 5552 | rc = vmtruncate(inode, attr->ia_size); | ||
| 5553 | |||
| 5554 | if (!rc) { | ||
| 5555 | setattr_copy(inode, attr); | ||
| 5556 | mark_inode_dirty(inode); | ||
| 5557 | } | ||
| 5533 | 5558 | ||
| 5534 | /* If inode_setattr's call to ext4_truncate failed to get a | 5559 | /* |
| 5535 | * transaction handle at all, we need to clean up the in-core | 5560 | * If the call to ext4_truncate failed to get a transaction handle at |
| 5536 | * orphan list manually. */ | 5561 | * all, we need to clean up the in-core orphan list manually. |
| 5562 | */ | ||
| 5537 | if (inode->i_nlink) | 5563 | if (inode->i_nlink) |
| 5538 | ext4_orphan_del(NULL, inode); | 5564 | ext4_orphan_del(NULL, inode); |
| 5539 | 5565 | ||
| @@ -5688,7 +5714,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
| 5688 | * Calculate the journal credits for a chunk of data modification. | 5714 | * Calculate the journal credits for a chunk of data modification. |
| 5689 | * | 5715 | * |
| 5690 | * This is called from DIO, fallocate or whoever calling | 5716 | * This is called from DIO, fallocate or whoever calling |
| 5691 | * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. | 5717 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. |
| 5692 | * | 5718 | * |
| 5693 | * journal buffers for data blocks are not included here, as DIO | 5719 | * journal buffers for data blocks are not included here, as DIO |
| 5694 | * and fallocate do no need to journal data buffers. | 5720 | * and fallocate do no need to journal data buffers. |
| @@ -5754,7 +5780,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
| 5754 | { | 5780 | { |
| 5755 | struct ext4_inode *raw_inode; | 5781 | struct ext4_inode *raw_inode; |
| 5756 | struct ext4_xattr_ibody_header *header; | 5782 | struct ext4_xattr_ibody_header *header; |
| 5757 | struct ext4_xattr_entry *entry; | ||
| 5758 | 5783 | ||
| 5759 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) | 5784 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
| 5760 | return 0; | 5785 | return 0; |
| @@ -5762,7 +5787,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
| 5762 | raw_inode = ext4_raw_inode(&iloc); | 5787 | raw_inode = ext4_raw_inode(&iloc); |
| 5763 | 5788 | ||
| 5764 | header = IHDR(inode, raw_inode); | 5789 | header = IHDR(inode, raw_inode); |
| 5765 | entry = IFIRST(header); | ||
| 5766 | 5790 | ||
| 5767 | /* No extended attributes present */ | 5791 | /* No extended attributes present */ |
| 5768 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || | 5792 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
