aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c530
1 files changed, 453 insertions, 77 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c4da98a959ae..fffec40d5996 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -42,7 +42,6 @@
42#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
43#include "xattr.h" 43#include "xattr.h"
44#include "acl.h" 44#include "acl.h"
45#include "ext4_extents.h"
46#include "truncate.h" 45#include "truncate.h"
47 46
48#include <trace/events/ext4.h> 47#include <trace/events/ext4.h>
@@ -121,9 +120,6 @@ void ext4_evict_inode(struct inode *inode)
121 120
122 trace_ext4_evict_inode(inode); 121 trace_ext4_evict_inode(inode);
123 122
124 mutex_lock(&inode->i_mutex);
125 ext4_flush_completed_IO(inode);
126 mutex_unlock(&inode->i_mutex);
127 ext4_ioend_wait(inode); 123 ext4_ioend_wait(inode);
128 124
129 if (inode->i_nlink) { 125 if (inode->i_nlink) {
@@ -271,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
271 struct ext4_inode_info *ei = EXT4_I(inode); 267 struct ext4_inode_info *ei = EXT4_I(inode);
272 268
273 spin_lock(&ei->i_block_reservation_lock); 269 spin_lock(&ei->i_block_reservation_lock);
274 trace_ext4_da_update_reserve_space(inode, used); 270 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
275 if (unlikely(used > ei->i_reserved_data_blocks)) { 271 if (unlikely(used > ei->i_reserved_data_blocks)) {
276 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 272 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
277 "with only %d reserved data blocks\n", 273 "with only %d reserved data blocks\n",
@@ -284,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
284 /* Update per-inode reservations */ 280 /* Update per-inode reservations */
285 ei->i_reserved_data_blocks -= used; 281 ei->i_reserved_data_blocks -= used;
286 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 282 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
287 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 283 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
288 used + ei->i_allocated_meta_blocks); 284 used + ei->i_allocated_meta_blocks);
289 ei->i_allocated_meta_blocks = 0; 285 ei->i_allocated_meta_blocks = 0;
290 286
@@ -294,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
294 * only when we have written all of the delayed 290 * only when we have written all of the delayed
295 * allocation blocks. 291 * allocation blocks.
296 */ 292 */
297 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 293 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
298 ei->i_reserved_meta_blocks); 294 ei->i_reserved_meta_blocks);
299 ei->i_reserved_meta_blocks = 0; 295 ei->i_reserved_meta_blocks = 0;
300 ei->i_da_metadata_calc_len = 0; 296 ei->i_da_metadata_calc_len = 0;
@@ -303,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
303 299
304 /* Update quota subsystem for data blocks */ 300 /* Update quota subsystem for data blocks */
305 if (quota_claim) 301 if (quota_claim)
306 dquot_claim_block(inode, used); 302 dquot_claim_block(inode, EXT4_C2B(sbi, used));
307 else { 303 else {
308 /* 304 /*
309 * We did fallocate with an offset that is already delayed 305 * We did fallocate with an offset that is already delayed
310 * allocated. So on delayed allocated writeback we should 306 * allocated. So on delayed allocated writeback we should
311 * not re-claim the quota for fallocated blocks. 307 * not re-claim the quota for fallocated blocks.
312 */ 308 */
313 dquot_release_reservation_block(inode, used); 309 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
314 } 310 }
315 311
316 /* 312 /*
@@ -402,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
402} 398}
403 399
404/* 400/*
401 * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
402 */
403static void set_buffers_da_mapped(struct inode *inode,
404 struct ext4_map_blocks *map)
405{
406 struct address_space *mapping = inode->i_mapping;
407 struct pagevec pvec;
408 int i, nr_pages;
409 pgoff_t index, end;
410
411 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
412 end = (map->m_lblk + map->m_len - 1) >>
413 (PAGE_CACHE_SHIFT - inode->i_blkbits);
414
415 pagevec_init(&pvec, 0);
416 while (index <= end) {
417 nr_pages = pagevec_lookup(&pvec, mapping, index,
418 min(end - index + 1,
419 (pgoff_t)PAGEVEC_SIZE));
420 if (nr_pages == 0)
421 break;
422 for (i = 0; i < nr_pages; i++) {
423 struct page *page = pvec.pages[i];
424 struct buffer_head *bh, *head;
425
426 if (unlikely(page->mapping != mapping) ||
427 !PageDirty(page))
428 break;
429
430 if (page_has_buffers(page)) {
431 bh = head = page_buffers(page);
432 do {
433 set_buffer_da_mapped(bh);
434 bh = bh->b_this_page;
435 } while (bh != head);
436 }
437 index++;
438 }
439 pagevec_release(&pvec);
440 }
441}
442
443/*
405 * The ext4_map_blocks() function tries to look up the requested blocks, 444 * The ext4_map_blocks() function tries to look up the requested blocks,
406 * and returns if the blocks are already mapped. 445 * and returns if the blocks are already mapped.
407 * 446 *
@@ -419,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
419 * the buffer head is mapped. 458 * the buffer head is mapped.
420 * 459 *
421 * It returns 0 if plain look up failed (blocks have not been allocated), in 460 * It returns 0 if plain look up failed (blocks have not been allocated), in
422 * that casem, buffer head is unmapped 461 * that case, buffer head is unmapped
423 * 462 *
424 * It returns the error in case of allocation failure. 463 * It returns the error in case of allocation failure.
425 */ 464 */
@@ -438,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
438 */ 477 */
439 down_read((&EXT4_I(inode)->i_data_sem)); 478 down_read((&EXT4_I(inode)->i_data_sem));
440 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 479 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
441 retval = ext4_ext_map_blocks(handle, inode, map, 0); 480 retval = ext4_ext_map_blocks(handle, inode, map, flags &
481 EXT4_GET_BLOCKS_KEEP_SIZE);
442 } else { 482 } else {
443 retval = ext4_ind_map_blocks(handle, inode, map, 0); 483 retval = ext4_ind_map_blocks(handle, inode, map, flags &
484 EXT4_GET_BLOCKS_KEEP_SIZE);
444 } 485 }
445 up_read((&EXT4_I(inode)->i_data_sem)); 486 up_read((&EXT4_I(inode)->i_data_sem));
446 487
@@ -458,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
458 * Returns if the blocks have already allocated 499 * Returns if the blocks have already allocated
459 * 500 *
460 * Note that if blocks have been preallocated 501 * Note that if blocks have been preallocated
461 * ext4_ext_get_block() returns th create = 0 502 * ext4_ext_get_block() returns the create = 0
462 * with buffer head unmapped. 503 * with buffer head unmapped.
463 */ 504 */
464 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 505 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@@ -520,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
520 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 561 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
521 ext4_da_update_reserve_space(inode, retval, 1); 562 ext4_da_update_reserve_space(inode, retval, 1);
522 } 563 }
523 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 564 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
524 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 565 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
525 566
567 /* If we have successfully mapped the delayed allocated blocks,
568 * set the BH_Da_Mapped bit on them. Its important to do this
569 * under the protection of i_data_sem.
570 */
571 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
572 set_buffers_da_mapped(inode, map);
573 }
574
526 up_write((&EXT4_I(inode)->i_data_sem)); 575 up_write((&EXT4_I(inode)->i_data_sem));
527 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 576 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
528 int ret = check_block_validity(inode, map); 577 int ret = check_block_validity(inode, map);
@@ -650,7 +699,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
650 return bh; 699 return bh;
651 if (buffer_uptodate(bh)) 700 if (buffer_uptodate(bh))
652 return bh; 701 return bh;
653 ll_rw_block(READ_META, 1, &bh); 702 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
654 wait_on_buffer(bh); 703 wait_on_buffer(bh);
655 if (buffer_uptodate(bh)) 704 if (buffer_uptodate(bh))
656 return bh; 705 return bh;
@@ -912,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file,
912 ext4_orphan_add(handle, inode); 961 ext4_orphan_add(handle, inode);
913 if (ret2 < 0) 962 if (ret2 < 0)
914 ret = ret2; 963 ret = ret2;
964 } else {
965 unlock_page(page);
966 page_cache_release(page);
915 } 967 }
968
916 ret2 = ext4_journal_stop(handle); 969 ret2 = ext4_journal_stop(handle);
917 if (!ret) 970 if (!ret)
918 ret = ret2; 971 ret = ret2;
@@ -1040,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file,
1040} 1093}
1041 1094
1042/* 1095/*
1043 * Reserve a single block located at lblock 1096 * Reserve a single cluster located at lblock
1044 */ 1097 */
1045static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1098static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1046{ 1099{
1047 int retries = 0; 1100 int retries = 0;
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1101 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 struct ext4_inode_info *ei = EXT4_I(inode); 1102 struct ext4_inode_info *ei = EXT4_I(inode);
1050 unsigned long md_needed; 1103 unsigned int md_needed;
1051 int ret; 1104 int ret;
1052 1105
1053 /* 1106 /*
@@ -1057,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1057 */ 1110 */
1058repeat: 1111repeat:
1059 spin_lock(&ei->i_block_reservation_lock); 1112 spin_lock(&ei->i_block_reservation_lock);
1060 md_needed = ext4_calc_metadata_amount(inode, lblock); 1113 md_needed = EXT4_NUM_B2C(sbi,
1114 ext4_calc_metadata_amount(inode, lblock));
1061 trace_ext4_da_reserve_space(inode, md_needed); 1115 trace_ext4_da_reserve_space(inode, md_needed);
1062 spin_unlock(&ei->i_block_reservation_lock); 1116 spin_unlock(&ei->i_block_reservation_lock);
1063 1117
@@ -1066,15 +1120,15 @@ repeat:
1066 * us from metadata over-estimation, though we may go over by 1120 * us from metadata over-estimation, though we may go over by
1067 * a small amount in the end. Here we just reserve for data. 1121 * a small amount in the end. Here we just reserve for data.
1068 */ 1122 */
1069 ret = dquot_reserve_block(inode, 1); 1123 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1070 if (ret) 1124 if (ret)
1071 return ret; 1125 return ret;
1072 /* 1126 /*
1073 * We do still charge estimated metadata to the sb though; 1127 * We do still charge estimated metadata to the sb though;
1074 * we cannot afford to run out of free blocks. 1128 * we cannot afford to run out of free blocks.
1075 */ 1129 */
1076 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { 1130 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1077 dquot_release_reservation_block(inode, 1); 1131 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1078 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1132 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1079 yield(); 1133 yield();
1080 goto repeat; 1134 goto repeat;
@@ -1121,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1121 * We can release all of the reserved metadata blocks 1175 * We can release all of the reserved metadata blocks
1122 * only when we have written all of the delayed 1176 * only when we have written all of the delayed
1123 * allocation blocks. 1177 * allocation blocks.
1178 * Note that in case of bigalloc, i_reserved_meta_blocks,
1179 * i_reserved_data_blocks, etc. refer to number of clusters.
1124 */ 1180 */
1125 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 1181 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1126 ei->i_reserved_meta_blocks); 1182 ei->i_reserved_meta_blocks);
1127 ei->i_reserved_meta_blocks = 0; 1183 ei->i_reserved_meta_blocks = 0;
1128 ei->i_da_metadata_calc_len = 0; 1184 ei->i_da_metadata_calc_len = 0;
1129 } 1185 }
1130 1186
1131 /* update fs dirty data blocks counter */ 1187 /* update fs dirty data blocks counter */
1132 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); 1188 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1133 1189
1134 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1190 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1135 1191
1136 dquot_release_reservation_block(inode, to_free); 1192 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1137} 1193}
1138 1194
1139static void ext4_da_page_release_reservation(struct page *page, 1195static void ext4_da_page_release_reservation(struct page *page,
@@ -1142,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page,
1142 int to_release = 0; 1198 int to_release = 0;
1143 struct buffer_head *head, *bh; 1199 struct buffer_head *head, *bh;
1144 unsigned int curr_off = 0; 1200 unsigned int curr_off = 0;
1201 struct inode *inode = page->mapping->host;
1202 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1203 int num_clusters;
1145 1204
1146 head = page_buffers(page); 1205 head = page_buffers(page);
1147 bh = head; 1206 bh = head;
@@ -1151,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page,
1151 if ((offset <= curr_off) && (buffer_delay(bh))) { 1210 if ((offset <= curr_off) && (buffer_delay(bh))) {
1152 to_release++; 1211 to_release++;
1153 clear_buffer_delay(bh); 1212 clear_buffer_delay(bh);
1213 clear_buffer_da_mapped(bh);
1154 } 1214 }
1155 curr_off = next_off; 1215 curr_off = next_off;
1156 } while ((bh = bh->b_this_page) != head); 1216 } while ((bh = bh->b_this_page) != head);
1157 ext4_da_release_space(page->mapping->host, to_release); 1217
1218 /* If we have released all the blocks belonging to a cluster, then we
1219 * need to release the reserved space for that cluster. */
1220 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1221 while (num_clusters > 0) {
1222 ext4_fsblk_t lblk;
1223 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1224 ((num_clusters - 1) << sbi->s_cluster_bits);
1225 if (sbi->s_cluster_ratio == 1 ||
1226 !ext4_find_delalloc_cluster(inode, lblk, 1))
1227 ext4_da_release_space(inode, 1);
1228
1229 num_clusters--;
1230 }
1158} 1231}
1159 1232
1160/* 1233/*
@@ -1256,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1256 clear_buffer_delay(bh); 1329 clear_buffer_delay(bh);
1257 bh->b_blocknr = pblock; 1330 bh->b_blocknr = pblock;
1258 } 1331 }
1332 if (buffer_da_mapped(bh))
1333 clear_buffer_da_mapped(bh);
1259 if (buffer_unwritten(bh) || 1334 if (buffer_unwritten(bh) ||
1260 buffer_mapped(bh)) 1335 buffer_mapped(bh))
1261 BUG_ON(bh->b_blocknr != pblock); 1336 BUG_ON(bh->b_blocknr != pblock);
@@ -1349,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode)
1349{ 1424{
1350 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1425 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1351 printk(KERN_CRIT "Total free blocks count %lld\n", 1426 printk(KERN_CRIT "Total free blocks count %lld\n",
1352 ext4_count_free_blocks(inode->i_sb)); 1427 EXT4_C2B(EXT4_SB(inode->i_sb),
1428 ext4_count_free_clusters(inode->i_sb)));
1353 printk(KERN_CRIT "Free/Dirty block details\n"); 1429 printk(KERN_CRIT "Free/Dirty block details\n");
1354 printk(KERN_CRIT "free_blocks=%lld\n", 1430 printk(KERN_CRIT "free_blocks=%lld\n",
1355 (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); 1431 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1432 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1356 printk(KERN_CRIT "dirty_blocks=%lld\n", 1433 printk(KERN_CRIT "dirty_blocks=%lld\n",
1357 (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 1434 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1435 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1358 printk(KERN_CRIT "Block reservation details\n"); 1436 printk(KERN_CRIT "Block reservation details\n");
1359 printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1437 printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
1360 EXT4_I(inode)->i_reserved_data_blocks); 1438 EXT4_I(inode)->i_reserved_data_blocks);
@@ -1433,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1433 if (err == -EAGAIN) 1511 if (err == -EAGAIN)
1434 goto submit_io; 1512 goto submit_io;
1435 1513
1436 if (err == -ENOSPC && 1514 if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
1437 ext4_count_free_blocks(sb)) {
1438 mpd->retval = err; 1515 mpd->retval = err;
1439 goto submit_io; 1516 goto submit_io;
1440 } 1517 }
@@ -1474,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1474 1551
1475 for (i = 0; i < map.m_len; i++) 1552 for (i = 0; i < map.m_len; i++)
1476 unmap_underlying_metadata(bdev, map.m_pblk + i); 1553 unmap_underlying_metadata(bdev, map.m_pblk + i);
1477 }
1478 1554
1479 if (ext4_should_order_data(mpd->inode)) { 1555 if (ext4_should_order_data(mpd->inode)) {
1480 err = ext4_jbd2_file_inode(handle, mpd->inode); 1556 err = ext4_jbd2_file_inode(handle, mpd->inode);
1481 if (err) 1557 if (err) {
1482 /* This only happens if the journal is aborted */ 1558 /* Only if the journal is aborted */
1483 return; 1559 mpd->retval = err;
1560 goto submit_io;
1561 }
1562 }
1484 } 1563 }
1485 1564
1486 /* 1565 /*
@@ -1587,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1587} 1666}
1588 1667
1589/* 1668/*
1669 * This function is grabs code from the very beginning of
1670 * ext4_map_blocks, but assumes that the caller is from delayed write
1671 * time. This function looks up the requested blocks and sets the
1672 * buffer delay bit under the protection of i_data_sem.
1673 */
1674static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1675 struct ext4_map_blocks *map,
1676 struct buffer_head *bh)
1677{
1678 int retval;
1679 sector_t invalid_block = ~((sector_t) 0xffff);
1680
1681 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1682 invalid_block = ~0;
1683
1684 map->m_flags = 0;
1685 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1686 "logical block %lu\n", inode->i_ino, map->m_len,
1687 (unsigned long) map->m_lblk);
1688 /*
1689 * Try to see if we can get the block without requesting a new
1690 * file system block.
1691 */
1692 down_read((&EXT4_I(inode)->i_data_sem));
1693 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1694 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1695 else
1696 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
1697
1698 if (retval == 0) {
1699 /*
1700 * XXX: __block_prepare_write() unmaps passed block,
1701 * is it OK?
1702 */
1703 /* If the block was allocated from previously allocated cluster,
1704 * then we dont need to reserve it again. */
1705 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1706 retval = ext4_da_reserve_space(inode, iblock);
1707 if (retval)
1708 /* not enough space to reserve */
1709 goto out_unlock;
1710 }
1711
1712 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1713 * and it should not appear on the bh->b_state.
1714 */
1715 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1716
1717 map_bh(bh, inode->i_sb, invalid_block);
1718 set_buffer_new(bh);
1719 set_buffer_delay(bh);
1720 }
1721
1722out_unlock:
1723 up_read((&EXT4_I(inode)->i_data_sem));
1724
1725 return retval;
1726}
1727
1728/*
1590 * This is a special get_blocks_t callback which is used by 1729 * This is a special get_blocks_t callback which is used by
1591 * ext4_da_write_begin(). It will either return mapped block or 1730 * ext4_da_write_begin(). It will either return mapped block or
1592 * reserve space for a single block. 1731 * reserve space for a single block.
@@ -1603,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1603{ 1742{
1604 struct ext4_map_blocks map; 1743 struct ext4_map_blocks map;
1605 int ret = 0; 1744 int ret = 0;
1606 sector_t invalid_block = ~((sector_t) 0xffff);
1607
1608 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1609 invalid_block = ~0;
1610 1745
1611 BUG_ON(create == 0); 1746 BUG_ON(create == 0);
1612 BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 1747 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@@ -1619,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1619 * preallocated blocks are unmapped but should treated 1754 * preallocated blocks are unmapped but should treated
1620 * the same as allocated blocks. 1755 * the same as allocated blocks.
1621 */ 1756 */
1622 ret = ext4_map_blocks(NULL, inode, &map, 0); 1757 ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1623 if (ret < 0) 1758 if (ret <= 0)
1624 return ret; 1759 return ret;
1625 if (ret == 0) {
1626 if (buffer_delay(bh))
1627 return 0; /* Not sure this could or should happen */
1628 /*
1629 * XXX: __block_write_begin() unmaps passed block, is it OK?
1630 */
1631 ret = ext4_da_reserve_space(inode, iblock);
1632 if (ret)
1633 /* not enough space to reserve */
1634 return ret;
1635
1636 map_bh(bh, inode->i_sb, invalid_block);
1637 set_buffer_new(bh);
1638 set_buffer_delay(bh);
1639 return 0;
1640 }
1641 1760
1642 map_bh(bh, inode->i_sb, map.m_pblk); 1761 map_bh(bh, inode->i_sb, map.m_pblk);
1643 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 1762 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@ -1814,8 +1933,12 @@ static int ext4_writepage(struct page *page,
1814 * We don't want to do block allocation, so redirty 1933 * We don't want to do block allocation, so redirty
1815 * the page and return. We may reach here when we do 1934 * the page and return. We may reach here when we do
1816 * a journal commit via journal_submit_inode_data_buffers. 1935 * a journal commit via journal_submit_inode_data_buffers.
1817 * We can also reach here via shrink_page_list 1936 * We can also reach here via shrink_page_list but it
1937 * should never be for direct reclaim so warn if that
1938 * happens
1818 */ 1939 */
1940 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
1941 PF_MEMALLOC);
1819 goto redirty_page; 1942 goto redirty_page;
1820 } 1943 }
1821 if (commit_write) 1944 if (commit_write)
@@ -2049,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2049 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2172 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2050 pgoff_t done_index = 0; 2173 pgoff_t done_index = 0;
2051 pgoff_t end; 2174 pgoff_t end;
2175 struct blk_plug plug;
2052 2176
2053 trace_ext4_da_writepages(inode, wbc); 2177 trace_ext4_da_writepages(inode, wbc);
2054 2178
@@ -2127,6 +2251,7 @@ retry:
2127 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2251 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2128 tag_pages_for_writeback(mapping, index, end); 2252 tag_pages_for_writeback(mapping, index, end);
2129 2253
2254 blk_start_plug(&plug);
2130 while (!ret && wbc->nr_to_write > 0) { 2255 while (!ret && wbc->nr_to_write > 0) {
2131 2256
2132 /* 2257 /*
@@ -2145,6 +2270,7 @@ retry:
2145 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2270 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
2146 "%ld pages, ino %lu; err %d", __func__, 2271 "%ld pages, ino %lu; err %d", __func__,
2147 wbc->nr_to_write, inode->i_ino, ret); 2272 wbc->nr_to_write, inode->i_ino, ret);
2273 blk_finish_plug(&plug);
2148 goto out_writepages; 2274 goto out_writepages;
2149 } 2275 }
2150 2276
@@ -2177,11 +2303,12 @@ retry:
2177 ret = 0; 2303 ret = 0;
2178 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 2304 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2179 /* 2305 /*
2180 * got one extent now try with 2306 * Got one extent now try with rest of the pages.
2181 * rest of the pages 2307 * If mpd.retval is set -EIO, journal is aborted.
2308 * So we don't need to write any more.
2182 */ 2309 */
2183 pages_written += mpd.pages_written; 2310 pages_written += mpd.pages_written;
2184 ret = 0; 2311 ret = mpd.retval;
2185 io_done = 1; 2312 io_done = 1;
2186 } else if (wbc->nr_to_write) 2313 } else if (wbc->nr_to_write)
2187 /* 2314 /*
@@ -2191,6 +2318,7 @@ retry:
2191 */ 2318 */
2192 break; 2319 break;
2193 } 2320 }
2321 blk_finish_plug(&plug);
2194 if (!io_done && !cycled) { 2322 if (!io_done && !cycled) {
2195 cycled = 1; 2323 cycled = 1;
2196 index = 0; 2324 index = 0;
@@ -2229,10 +2357,11 @@ static int ext4_nonda_switch(struct super_block *sb)
2229 * Delalloc need an accurate free block accounting. So switch 2357 * Delalloc need an accurate free block accounting. So switch
2230 * to non delalloc when we are near to error range. 2358 * to non delalloc when we are near to error range.
2231 */ 2359 */
2232 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 2360 free_blocks = EXT4_C2B(sbi,
2233 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); 2361 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2362 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2234 if (2 * free_blocks < 3 * dirty_blocks || 2363 if (2 * free_blocks < 3 * dirty_blocks ||
2235 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 2364 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2236 /* 2365 /*
2237 * free block count is less than 150% of dirty blocks 2366 * free block count is less than 150% of dirty blocks
2238 * or free blocks is less than watermark 2367 * or free blocks is less than watermark
@@ -2244,7 +2373,7 @@ static int ext4_nonda_switch(struct super_block *sb)
2244 * start pushing delalloc when 1/2 of free blocks are dirty. 2373 * start pushing delalloc when 1/2 of free blocks are dirty.
2245 */ 2374 */
2246 if (free_blocks < 2 * dirty_blocks) 2375 if (free_blocks < 2 * dirty_blocks)
2247 writeback_inodes_sb_if_idle(sb); 2376 writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
2248 2377
2249 return 0; 2378 return 0;
2250} 2379}
@@ -2258,6 +2387,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2258 pgoff_t index; 2387 pgoff_t index;
2259 struct inode *inode = mapping->host; 2388 struct inode *inode = mapping->host;
2260 handle_t *handle; 2389 handle_t *handle;
2390 loff_t page_len;
2261 2391
2262 index = pos >> PAGE_CACHE_SHIFT; 2392 index = pos >> PAGE_CACHE_SHIFT;
2263 2393
@@ -2304,6 +2434,13 @@ retry:
2304 */ 2434 */
2305 if (pos + len > inode->i_size) 2435 if (pos + len > inode->i_size)
2306 ext4_truncate_failed_write(inode); 2436 ext4_truncate_failed_write(inode);
2437 } else {
2438 page_len = pos & (PAGE_CACHE_SIZE - 1);
2439 if (page_len > 0) {
2440 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2441 inode, page, pos - page_len, page_len,
2442 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2443 }
2307 } 2444 }
2308 2445
2309 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2446 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2346,6 +2483,7 @@ static int ext4_da_write_end(struct file *file,
2346 loff_t new_i_size; 2483 loff_t new_i_size;
2347 unsigned long start, end; 2484 unsigned long start, end;
2348 int write_mode = (int)(unsigned long)fsdata; 2485 int write_mode = (int)(unsigned long)fsdata;
2486 loff_t page_len;
2349 2487
2350 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2488 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2351 if (ext4_should_order_data(inode)) { 2489 if (ext4_should_order_data(inode)) {
@@ -2394,6 +2532,16 @@ static int ext4_da_write_end(struct file *file,
2394 } 2532 }
2395 ret2 = generic_write_end(file, mapping, pos, len, copied, 2533 ret2 = generic_write_end(file, mapping, pos, len, copied,
2396 page, fsdata); 2534 page, fsdata);
2535
2536 page_len = PAGE_CACHE_SIZE -
2537 ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
2538
2539 if (page_len > 0) {
2540 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2541 inode, page, pos + copied - 1, page_len,
2542 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2543 }
2544
2397 copied = ret2; 2545 copied = ret2;
2398 if (ret2 < 0) 2546 if (ret2 < 0)
2399 ret = ret2; 2547 ret = ret2;
@@ -2688,10 +2836,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2688 * but being more careful is always safe for the future change. 2836 * but being more careful is always safe for the future change.
2689 */ 2837 */
2690 inode = io_end->inode; 2838 inode = io_end->inode;
2691 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2839 ext4_set_io_unwritten_flag(inode, io_end);
2692 io_end->flag |= EXT4_IO_END_UNWRITTEN;
2693 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
2694 }
2695 2840
2696 /* Add the io_end to per-inode completed io list*/ 2841 /* Add the io_end to per-inode completed io list*/
2697 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 2842 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -2857,6 +3002,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
2857 struct inode *inode = file->f_mapping->host; 3002 struct inode *inode = file->f_mapping->host;
2858 ssize_t ret; 3003 ssize_t ret;
2859 3004
3005 /*
3006 * If we are doing data journalling we don't support O_DIRECT
3007 */
3008 if (ext4_should_journal_data(inode))
3009 return 0;
3010
2860 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 3011 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
2861 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3012 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
2862 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3013 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -2926,6 +3077,7 @@ static const struct address_space_operations ext4_journalled_aops = {
2926 .bmap = ext4_bmap, 3077 .bmap = ext4_bmap,
2927 .invalidatepage = ext4_invalidatepage, 3078 .invalidatepage = ext4_invalidatepage,
2928 .releasepage = ext4_releasepage, 3079 .releasepage = ext4_releasepage,
3080 .direct_IO = ext4_direct_IO,
2929 .is_partially_uptodate = block_is_partially_uptodate, 3081 .is_partially_uptodate = block_is_partially_uptodate,
2930 .error_remove_page = generic_error_remove_page, 3082 .error_remove_page = generic_error_remove_page,
2931}; 3083};
@@ -2962,6 +3114,227 @@ void ext4_set_aops(struct inode *inode)
2962 inode->i_mapping->a_ops = &ext4_journalled_aops; 3114 inode->i_mapping->a_ops = &ext4_journalled_aops;
2963} 3115}
2964 3116
3117
3118/*
3119 * ext4_discard_partial_page_buffers()
3120 * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
3121 * This function finds and locks the page containing the offset
3122 * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
3123 * Calling functions that already have the page locked should call
3124 * ext4_discard_partial_page_buffers_no_lock directly.
3125 */
3126int ext4_discard_partial_page_buffers(handle_t *handle,
3127 struct address_space *mapping, loff_t from,
3128 loff_t length, int flags)
3129{
3130 struct inode *inode = mapping->host;
3131 struct page *page;
3132 int err = 0;
3133
3134 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3135 mapping_gfp_mask(mapping) & ~__GFP_FS);
3136 if (!page)
3137 return -ENOMEM;
3138
3139 err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
3140 from, length, flags);
3141
3142 unlock_page(page);
3143 page_cache_release(page);
3144 return err;
3145}
3146
3147/*
3148 * ext4_discard_partial_page_buffers_no_lock()
3149 * Zeros a page range of length 'length' starting from offset 'from'.
3150 * Buffer heads that correspond to the block aligned regions of the
3151 * zeroed range will be unmapped. Unblock aligned regions
3152 * will have the corresponding buffer head mapped if needed so that
3153 * that region of the page can be updated with the partial zero out.
3154 *
3155 * This function assumes that the page has already been locked. The
3156 * The range to be discarded must be contained with in the given page.
3157 * If the specified range exceeds the end of the page it will be shortened
3158 * to the end of the page that corresponds to 'from'. This function is
3159 * appropriate for updating a page and it buffer heads to be unmapped and
3160 * zeroed for blocks that have been either released, or are going to be
3161 * released.
3162 *
3163 * handle: The journal handle
3164 * inode: The files inode
3165 * page: A locked page that contains the offset "from"
3166 * from: The starting byte offset (from the begining of the file)
3167 * to begin discarding
3168 * len: The length of bytes to discard
3169 * flags: Optional flags that may be used:
3170 *
3171 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
3172 * Only zero the regions of the page whose buffer heads
3173 * have already been unmapped. This flag is appropriate
3174 * for updateing the contents of a page whose blocks may
3175 * have already been released, and we only want to zero
3176 * out the regions that correspond to those released blocks.
3177 *
3178 * Returns zero on sucess or negative on failure.
3179 */
3180int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3181 struct inode *inode, struct page *page, loff_t from,
3182 loff_t length, int flags)
3183{
3184 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3185 unsigned int offset = from & (PAGE_CACHE_SIZE-1);
3186 unsigned int blocksize, max, pos;
3187 ext4_lblk_t iblock;
3188 struct buffer_head *bh;
3189 int err = 0;
3190
3191 blocksize = inode->i_sb->s_blocksize;
3192 max = PAGE_CACHE_SIZE - offset;
3193
3194 if (index != page->index)
3195 return -EINVAL;
3196
3197 /*
3198 * correct length if it does not fall between
3199 * 'from' and the end of the page
3200 */
3201 if (length > max || length < 0)
3202 length = max;
3203
3204 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3205
3206 if (!page_has_buffers(page)) {
3207 /*
3208 * If the range to be discarded covers a partial block
3209 * we need to get the page buffers. This is because
3210 * partial blocks cannot be released and the page needs
3211 * to be updated with the contents of the block before
3212 * we write the zeros on top of it.
3213 */
3214 if ((from & (blocksize - 1)) ||
3215 ((from + length) & (blocksize - 1))) {
3216 create_empty_buffers(page, blocksize, 0);
3217 } else {
3218 /*
3219 * If there are no partial blocks,
3220 * there is nothing to update,
3221 * so we can return now
3222 */
3223 return 0;
3224 }
3225 }
3226
3227 /* Find the buffer that contains "offset" */
3228 bh = page_buffers(page);
3229 pos = blocksize;
3230 while (offset >= pos) {
3231 bh = bh->b_this_page;
3232 iblock++;
3233 pos += blocksize;
3234 }
3235
3236 pos = offset;
3237 while (pos < offset + length) {
3238 unsigned int end_of_block, range_to_discard;
3239
3240 err = 0;
3241
3242 /* The length of space left to zero and unmap */
3243 range_to_discard = offset + length - pos;
3244
3245 /* The length of space until the end of the block */
3246 end_of_block = blocksize - (pos & (blocksize-1));
3247
3248 /*
3249 * Do not unmap or zero past end of block
3250 * for this buffer head
3251 */
3252 if (range_to_discard > end_of_block)
3253 range_to_discard = end_of_block;
3254
3255
3256 /*
3257 * Skip this buffer head if we are only zeroing unampped
3258 * regions of the page
3259 */
3260 if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
3261 buffer_mapped(bh))
3262 goto next;
3263
3264 /* If the range is block aligned, unmap */
3265 if (range_to_discard == blocksize) {
3266 clear_buffer_dirty(bh);
3267 bh->b_bdev = NULL;
3268 clear_buffer_mapped(bh);
3269 clear_buffer_req(bh);
3270 clear_buffer_new(bh);
3271 clear_buffer_delay(bh);
3272 clear_buffer_unwritten(bh);
3273 clear_buffer_uptodate(bh);
3274 zero_user(page, pos, range_to_discard);
3275 BUFFER_TRACE(bh, "Buffer discarded");
3276 goto next;
3277 }
3278
3279 /*
3280 * If this block is not completely contained in the range
3281 * to be discarded, then it is not going to be released. Because
3282 * we need to keep this block, we need to make sure this part
3283 * of the page is uptodate before we modify it by writeing
3284 * partial zeros on it.
3285 */
3286 if (!buffer_mapped(bh)) {
3287 /*
3288 * Buffer head must be mapped before we can read
3289 * from the block
3290 */
3291 BUFFER_TRACE(bh, "unmapped");
3292 ext4_get_block(inode, iblock, bh, 0);
3293 /* unmapped? It's a hole - nothing to do */
3294 if (!buffer_mapped(bh)) {
3295 BUFFER_TRACE(bh, "still unmapped");
3296 goto next;
3297 }
3298 }
3299
3300 /* Ok, it's mapped. Make sure it's up-to-date */
3301 if (PageUptodate(page))
3302 set_buffer_uptodate(bh);
3303
3304 if (!buffer_uptodate(bh)) {
3305 err = -EIO;
3306 ll_rw_block(READ, 1, &bh);
3307 wait_on_buffer(bh);
3308 /* Uhhuh. Read error. Complain and punt.*/
3309 if (!buffer_uptodate(bh))
3310 goto next;
3311 }
3312
3313 if (ext4_should_journal_data(inode)) {
3314 BUFFER_TRACE(bh, "get write access");
3315 err = ext4_journal_get_write_access(handle, bh);
3316 if (err)
3317 goto next;
3318 }
3319
3320 zero_user(page, pos, range_to_discard);
3321
3322 err = 0;
3323 if (ext4_should_journal_data(inode)) {
3324 err = ext4_handle_dirty_metadata(handle, inode, bh);
3325 } else
3326 mark_buffer_dirty(bh);
3327
3328 BUFFER_TRACE(bh, "Partial buffer zeroed");
3329next:
3330 bh = bh->b_this_page;
3331 iblock++;
3332 pos += range_to_discard;
3333 }
3334
3335 return err;
3336}
3337
2965/* 3338/*
2966 * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3339 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
2967 * up to the end of the block which corresponds to `from'. 3340 * up to the end of the block which corresponds to `from'.
@@ -3004,7 +3377,7 @@ int ext4_block_zero_page_range(handle_t *handle,
3004 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, 3377 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3005 mapping_gfp_mask(mapping) & ~__GFP_FS); 3378 mapping_gfp_mask(mapping) & ~__GFP_FS);
3006 if (!page) 3379 if (!page)
3007 return -EINVAL; 3380 return -ENOMEM;
3008 3381
3009 blocksize = inode->i_sb->s_blocksize; 3382 blocksize = inode->i_sb->s_blocksize;
3010 max = blocksize - (offset & (blocksize - 1)); 3383 max = blocksize - (offset & (blocksize - 1));
@@ -3073,11 +3446,8 @@ int ext4_block_zero_page_range(handle_t *handle,
3073 err = 0; 3446 err = 0;
3074 if (ext4_should_journal_data(inode)) { 3447 if (ext4_should_journal_data(inode)) {
3075 err = ext4_handle_dirty_metadata(handle, inode, bh); 3448 err = ext4_handle_dirty_metadata(handle, inode, bh);
3076 } else { 3449 } else
3077 if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
3078 err = ext4_jbd2_file_inode(handle, inode);
3079 mark_buffer_dirty(bh); 3450 mark_buffer_dirty(bh);
3080 }
3081 3451
3082unlock: 3452unlock:
3083 unlock_page(page); 3453 unlock_page(page);
@@ -3118,6 +3488,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3118 return -ENOTSUPP; 3488 return -ENOTSUPP;
3119 } 3489 }
3120 3490
3491 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3492 /* TODO: Add support for bigalloc file systems */
3493 return -ENOTSUPP;
3494 }
3495
3121 return ext4_ext_punch_hole(file, offset, length); 3496 return ext4_ext_punch_hole(file, offset, length);
3122} 3497}
3123 3498
@@ -3301,7 +3676,7 @@ make_io:
3301 trace_ext4_load_inode(inode); 3676 trace_ext4_load_inode(inode);
3302 get_bh(bh); 3677 get_bh(bh);
3303 bh->b_end_io = end_buffer_read_sync; 3678 bh->b_end_io = end_buffer_read_sync;
3304 submit_bh(READ_META, bh); 3679 submit_bh(READ | REQ_META | REQ_PRIO, bh);
3305 wait_on_buffer(bh); 3680 wait_on_buffer(bh);
3306 if (!buffer_uptodate(bh)) { 3681 if (!buffer_uptodate(bh)) {
3307 EXT4_ERROR_INODE_BLOCK(inode, block, 3682 EXT4_ERROR_INODE_BLOCK(inode, block,
@@ -3417,7 +3792,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3417 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 3792 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3418 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 3793 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3419 } 3794 }
3420 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 3795 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
3421 3796
3422 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 3797 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
3423 ei->i_dir_start_lookup = 0; 3798 ei->i_dir_start_lookup = 0;
@@ -4419,6 +4794,7 @@ retry_alloc:
4419 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { 4794 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
4420 unlock_page(page); 4795 unlock_page(page);
4421 ret = VM_FAULT_SIGBUS; 4796 ret = VM_FAULT_SIGBUS;
4797 ext4_journal_stop(handle);
4422 goto out; 4798 goto out;
4423 } 4799 }
4424 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 4800 ext4_set_inode_state(inode, EXT4_STATE_JDATA);