diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/binfmt_flat.c | 4 | ||||
-rw-r--r-- | fs/binfmt_misc.c | 4 | ||||
-rw-r--r-- | fs/cramfs/inode.c | 84 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 3 | ||||
-rw-r--r-- | fs/ext4/dir.c | 20 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 4 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 4 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 8 | ||||
-rw-r--r-- | fs/ext4/extents.c | 113 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 478 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 53 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 3 | ||||
-rw-r--r-- | fs/ext4/resize.c | 3 | ||||
-rw-r--r-- | fs/ext4/super.c | 1 | ||||
-rw-r--r-- | fs/fat/inode.c | 10 | ||||
-rw-r--r-- | fs/ioprio.c | 8 | ||||
-rw-r--r-- | fs/jffs2/jffs2_fs_i.h | 1 | ||||
-rw-r--r-- | fs/proc/nommu.c | 4 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_dmapi.h | 1 |
21 files changed, 508 insertions, 304 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 56372ecf1690..dfc0197905ca 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
914 | /* Stash our initial stack pointer into the mm structure */ | 914 | /* Stash our initial stack pointer into the mm structure */ |
915 | current->mm->start_stack = (unsigned long )sp; | 915 | current->mm->start_stack = (unsigned long )sp; |
916 | 916 | ||
917 | 917 | #ifdef FLAT_PLAT_INIT | |
918 | FLAT_PLAT_INIT(regs); | ||
919 | #endif | ||
918 | DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", | 920 | DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", |
919 | (int)regs, (int)start_addr, (int)current->mm->start_stack); | 921 | (int)regs, (int)start_addr, (int)current->mm->start_stack); |
920 | 922 | ||
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 756205314c24..8d7e88e02e0f 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
120 | if (bprm->misc_bang) | 120 | if (bprm->misc_bang) |
121 | goto _ret; | 121 | goto _ret; |
122 | 122 | ||
123 | bprm->misc_bang = 1; | ||
124 | |||
125 | /* to keep locking time low, we copy the interpreter string */ | 123 | /* to keep locking time low, we copy the interpreter string */ |
126 | read_lock(&entries_lock); | 124 | read_lock(&entries_lock); |
127 | fmt = check_file(bprm); | 125 | fmt = check_file(bprm); |
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
199 | if (retval < 0) | 197 | if (retval < 0) |
200 | goto _error; | 198 | goto _error; |
201 | 199 | ||
200 | bprm->misc_bang = 1; | ||
201 | |||
202 | retval = search_binary_handler (bprm, regs); | 202 | retval = search_binary_handler (bprm, regs); |
203 | if (retval < 0) | 203 | if (retval < 0) |
204 | goto _error; | 204 | goto _error; |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 0c3b618c15b3..f40423eb1a14 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex); | |||
43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) | 43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) |
44 | { | 44 | { |
45 | struct cramfs_inode *cramfs_inode = opaque; | 45 | struct cramfs_inode *cramfs_inode = opaque; |
46 | 46 | return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1; | |
47 | if (inode->i_ino != CRAMINO(cramfs_inode)) | ||
48 | return 0; /* does not match */ | ||
49 | |||
50 | if (inode->i_ino != 1) | ||
51 | return 1; | ||
52 | |||
53 | /* all empty directories, char, block, pipe, and sock, share inode #1 */ | ||
54 | |||
55 | if ((inode->i_mode != cramfs_inode->mode) || | ||
56 | (inode->i_gid != cramfs_inode->gid) || | ||
57 | (inode->i_uid != cramfs_inode->uid)) | ||
58 | return 0; /* does not match */ | ||
59 | |||
60 | if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) && | ||
61 | (inode->i_rdev != old_decode_dev(cramfs_inode->size))) | ||
62 | return 0; /* does not match */ | ||
63 | |||
64 | return 1; /* matches */ | ||
65 | } | 47 | } |
66 | 48 | ||
67 | static int cramfs_iget5_set(struct inode *inode, void *opaque) | 49 | static int cramfs_iget5_set(struct inode *inode, void *opaque) |
68 | { | 50 | { |
69 | static struct timespec zerotime; | ||
70 | struct cramfs_inode *cramfs_inode = opaque; | 51 | struct cramfs_inode *cramfs_inode = opaque; |
71 | inode->i_mode = cramfs_inode->mode; | ||
72 | inode->i_uid = cramfs_inode->uid; | ||
73 | inode->i_size = cramfs_inode->size; | ||
74 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | ||
75 | inode->i_gid = cramfs_inode->gid; | ||
76 | /* Struct copy intentional */ | ||
77 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
78 | inode->i_ino = CRAMINO(cramfs_inode); | 52 | inode->i_ino = CRAMINO(cramfs_inode); |
79 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
80 | but it's the best we can do without reading the directory | ||
81 | contents. 1 yields the right result in GNU find, even | ||
82 | without -noleaf option. */ | ||
83 | if (S_ISREG(inode->i_mode)) { | ||
84 | inode->i_fop = &generic_ro_fops; | ||
85 | inode->i_data.a_ops = &cramfs_aops; | ||
86 | } else if (S_ISDIR(inode->i_mode)) { | ||
87 | inode->i_op = &cramfs_dir_inode_operations; | ||
88 | inode->i_fop = &cramfs_directory_operations; | ||
89 | } else if (S_ISLNK(inode->i_mode)) { | ||
90 | inode->i_op = &page_symlink_inode_operations; | ||
91 | inode->i_data.a_ops = &cramfs_aops; | ||
92 | } else { | ||
93 | inode->i_size = 0; | ||
94 | inode->i_blocks = 0; | ||
95 | init_special_inode(inode, inode->i_mode, | ||
96 | old_decode_dev(cramfs_inode->size)); | ||
97 | } | ||
98 | return 0; | 53 | return 0; |
99 | } | 54 | } |
100 | 55 | ||
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb, | |||
104 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), | 59 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), |
105 | cramfs_iget5_test, cramfs_iget5_set, | 60 | cramfs_iget5_test, cramfs_iget5_set, |
106 | cramfs_inode); | 61 | cramfs_inode); |
62 | static struct timespec zerotime; | ||
63 | |||
107 | if (inode && (inode->i_state & I_NEW)) { | 64 | if (inode && (inode->i_state & I_NEW)) { |
65 | inode->i_mode = cramfs_inode->mode; | ||
66 | inode->i_uid = cramfs_inode->uid; | ||
67 | inode->i_size = cramfs_inode->size; | ||
68 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | ||
69 | inode->i_gid = cramfs_inode->gid; | ||
70 | /* Struct copy intentional */ | ||
71 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
72 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
73 | but it's the best we can do without reading the directory | ||
74 | contents. 1 yields the right result in GNU find, even | ||
75 | without -noleaf option. */ | ||
76 | if (S_ISREG(inode->i_mode)) { | ||
77 | inode->i_fop = &generic_ro_fops; | ||
78 | inode->i_data.a_ops = &cramfs_aops; | ||
79 | } else if (S_ISDIR(inode->i_mode)) { | ||
80 | inode->i_op = &cramfs_dir_inode_operations; | ||
81 | inode->i_fop = &cramfs_directory_operations; | ||
82 | } else if (S_ISLNK(inode->i_mode)) { | ||
83 | inode->i_op = &page_symlink_inode_operations; | ||
84 | inode->i_data.a_ops = &cramfs_aops; | ||
85 | } else { | ||
86 | inode->i_size = 0; | ||
87 | inode->i_blocks = 0; | ||
88 | init_special_inode(inode, inode->i_mode, | ||
89 | old_decode_dev(cramfs_inode->size)); | ||
90 | } | ||
108 | unlock_new_inode(inode); | 91 | unlock_new_inode(inode); |
109 | } | 92 | } |
110 | return inode; | 93 | return inode; |
111 | } | 94 | } |
112 | 95 | ||
96 | static void cramfs_drop_inode(struct inode *inode) | ||
97 | { | ||
98 | if (inode->i_ino == 1) | ||
99 | generic_delete_inode(inode); | ||
100 | else | ||
101 | generic_drop_inode(inode); | ||
102 | } | ||
103 | |||
113 | /* | 104 | /* |
114 | * We have our own block cache: don't fill up the buffer cache | 105 | * We have our own block cache: don't fill up the buffer cache |
115 | * with the rom-image, because the way the filesystem is set | 106 | * with the rom-image, because the way the filesystem is set |
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = { | |||
534 | .put_super = cramfs_put_super, | 525 | .put_super = cramfs_put_super, |
535 | .remount_fs = cramfs_remount, | 526 | .remount_fs = cramfs_remount, |
536 | .statfs = cramfs_statfs, | 527 | .statfs = cramfs_statfs, |
528 | .drop_inode = cramfs_drop_inode, | ||
537 | }; | 529 | }; |
538 | 530 | ||
539 | static int cramfs_get_sb(struct file_system_type *fs_type, | 531 | static int cramfs_get_sb(struct file_system_type *fs_type, |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ae5004e93fc..e9fa960ba6da 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | |||
1626 | free_blocks = | 1626 | free_blocks = |
1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); |
1628 | #endif | 1628 | #endif |
1629 | if (free_blocks <= root_blocks) | ||
1630 | /* we don't have free space */ | ||
1631 | return 0; | ||
1629 | if (free_blocks - root_blocks < nblocks) | 1632 | if (free_blocks - root_blocks < nblocks) |
1630 | return free_blocks - root_blocks; | 1633 | return free_blocks - root_blocks; |
1631 | return nblocks; | 1634 | return nblocks; |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d3d23d73c08b..ec8e33b45219 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
411 | get_dtype(sb, fname->file_type)); | 411 | get_dtype(sb, fname->file_type)); |
412 | if (error) { | 412 | if (error) { |
413 | filp->f_pos = curr_pos; | 413 | filp->f_pos = curr_pos; |
414 | info->extra_fname = fname->next; | 414 | info->extra_fname = fname; |
415 | return error; | 415 | return error; |
416 | } | 416 | } |
417 | fname = fname->next; | 417 | fname = fname->next; |
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp, | |||
450 | * If there are any leftover names on the hash collision | 450 | * If there are any leftover names on the hash collision |
451 | * chain, return them first. | 451 | * chain, return them first. |
452 | */ | 452 | */ |
453 | if (info->extra_fname && | 453 | if (info->extra_fname) { |
454 | call_filldir(filp, dirent, filldir, info->extra_fname)) | 454 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) |
455 | goto finished; | 455 | goto finished; |
456 | 456 | ||
457 | if (!info->curr_node) | 457 | info->extra_fname = NULL; |
458 | info->curr_node = rb_next(info->curr_node); | ||
459 | if (!info->curr_node) { | ||
460 | if (info->next_hash == ~0) { | ||
461 | filp->f_pos = EXT4_HTREE_EOF; | ||
462 | goto finished; | ||
463 | } | ||
464 | info->curr_hash = info->next_hash; | ||
465 | info->curr_minor_hash = 0; | ||
466 | } | ||
467 | } else if (!info->curr_node) | ||
458 | info->curr_node = rb_first(&info->root); | 468 | info->curr_node = rb_first(&info->root); |
459 | 469 | ||
460 | while (1) { | 470 | while (1) { |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6c7924d9e358..295003241d3d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1072,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *); | |||
1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1073 | extern void ext4_set_aops(struct inode *inode); | 1073 | extern void ext4_set_aops(struct inode *inode); |
1074 | extern int ext4_writepage_trans_blocks(struct inode *); | 1074 | extern int ext4_writepage_trans_blocks(struct inode *); |
1075 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1076 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | ||
1075 | extern int ext4_block_truncate_page(handle_t *handle, | 1077 | extern int ext4_block_truncate_page(handle_t *handle, |
1076 | struct address_space *mapping, loff_t from); | 1078 | struct address_space *mapping, loff_t from); |
1077 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 1079 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); |
@@ -1227,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1227 | /* extents.c */ | 1229 | /* extents.c */ |
1228 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 1230 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
1229 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1231 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
1232 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | ||
1233 | int chunk); | ||
1230 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1234 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
1231 | ext4_lblk_t iblock, | 1235 | ext4_lblk_t iblock, |
1232 | unsigned long max_blocks, struct buffer_head *bh_result, | 1236 | unsigned long max_blocks, struct buffer_head *bh_result, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6c166c0a54b7..d33dc56d6986 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | |||
216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
219 | extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); | 219 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
220 | int num, | ||
221 | struct ext4_ext_path *path); | ||
220 | extern int ext4_ext_try_to_merge(struct inode *inode, | 222 | extern int ext4_ext_try_to_merge(struct inode *inode, |
221 | struct ext4_ext_path *path, | 223 | struct ext4_ext_path *path, |
222 | struct ext4_extent *); | 224 | struct ext4_extent *); |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index eb8bc3afe6e9..b455c685a98b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -51,6 +51,14 @@ | |||
51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ | 51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ |
52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) |
53 | 53 | ||
54 | /* | ||
55 | * Define the number of metadata blocks we need to account to modify data. | ||
56 | * | ||
57 | * This include super block, inode block, quota blocks and xattr blocks | ||
58 | */ | ||
59 | #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ | ||
60 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | ||
61 | |||
54 | /* Delete operations potentially hit one directory's namespace plus an | 62 | /* Delete operations potentially hit one directory's namespace plus an |
55 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be | 63 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be |
56 | * generous. We can grow the delete transaction later if necessary. */ | 64 | * generous. We can grow the delete transaction later if necessary. */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 612c3d2c3824..b24d3c53f20c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1747 | } | 1747 | } |
1748 | 1748 | ||
1749 | /* | 1749 | /* |
1750 | * ext4_ext_calc_credits_for_insert: | 1750 | * ext4_ext_calc_credits_for_single_extent: |
1751 | * This routine returns max. credits that the extent tree can consume. | 1751 | * This routine returns max. credits that needed to insert an extent |
1752 | * It should be OK for low-performance paths like ->writepage() | 1752 | * to the extent tree. |
1753 | * To allow many writing processes to fit into a single transaction, | 1753 | * When pass the actual path, the caller should calculate credits |
1754 | * the caller should calculate credits under i_data_sem and | 1754 | * under i_data_sem. |
1755 | * pass the actual path. | ||
1756 | */ | 1755 | */ |
1757 | int ext4_ext_calc_credits_for_insert(struct inode *inode, | 1756 | int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, |
1758 | struct ext4_ext_path *path) | 1757 | struct ext4_ext_path *path) |
1759 | { | 1758 | { |
1760 | int depth, needed; | ||
1761 | |||
1762 | if (path) { | 1759 | if (path) { |
1760 | int depth = ext_depth(inode); | ||
1761 | int ret = 0; | ||
1762 | |||
1763 | /* probably there is space in leaf? */ | 1763 | /* probably there is space in leaf? */ |
1764 | depth = ext_depth(inode); | ||
1765 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) | 1764 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) |
1766 | < le16_to_cpu(path[depth].p_hdr->eh_max)) | 1765 | < le16_to_cpu(path[depth].p_hdr->eh_max)) { |
1767 | return 1; | ||
1768 | } | ||
1769 | 1766 | ||
1770 | /* | 1767 | /* |
1771 | * given 32-bit logical block (4294967296 blocks), max. tree | 1768 | * There are some space in the leaf tree, no |
1772 | * can be 4 levels in depth -- 4 * 340^4 == 53453440000. | 1769 | * need to account for leaf block credit |
1773 | * Let's also add one more level for imbalance. | 1770 | * |
1774 | */ | 1771 | * bitmaps and block group descriptor blocks |
1775 | depth = 5; | 1772 | * and other metadat blocks still need to be |
1776 | 1773 | * accounted. | |
1777 | /* allocation of new data block(s) */ | 1774 | */ |
1778 | needed = 2; | 1775 | /* 1 bitmap, 1 block group descriptor */ |
1776 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
1777 | } | ||
1778 | } | ||
1779 | 1779 | ||
1780 | /* | 1780 | return ext4_chunk_trans_blocks(inode, nrblocks); |
1781 | * tree can be full, so it would need to grow in depth: | 1781 | } |
1782 | * we need one credit to modify old root, credits for | ||
1783 | * new root will be added in split accounting | ||
1784 | */ | ||
1785 | needed += 1; | ||
1786 | 1782 | ||
1787 | /* | 1783 | /* |
1788 | * Index split can happen, we would need: | 1784 | * How many index/leaf blocks need to change/allocate to modify nrblocks? |
1789 | * allocate intermediate indexes (bitmap + group) | 1785 | * |
1790 | * + change two blocks at each level, but root (already included) | 1786 | * if nrblocks are fit in a single extent (chunk flag is 1), then |
1791 | */ | 1787 | * in the worse case, each tree level index/leaf need to be changed |
1792 | needed += (depth * 2) + (depth * 2); | 1788 | * if the tree split due to insert a new extent, then the old tree |
1789 | * index/leaf need to be updated too | ||
1790 | * | ||
1791 | * If the nrblocks are discontiguous, they could cause | ||
1792 | * the whole tree split more than once, but this is really rare. | ||
1793 | */ | ||
1794 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
1795 | { | ||
1796 | int index; | ||
1797 | int depth = ext_depth(inode); | ||
1793 | 1798 | ||
1794 | /* any allocation modifies superblock */ | 1799 | if (chunk) |
1795 | needed += 1; | 1800 | index = depth * 2; |
1801 | else | ||
1802 | index = depth * 3; | ||
1796 | 1803 | ||
1797 | return needed; | 1804 | return index; |
1798 | } | 1805 | } |
1799 | 1806 | ||
1800 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 1807 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
@@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
1921 | correct_index = 1; | 1928 | correct_index = 1; |
1922 | credits += (ext_depth(inode)) + 1; | 1929 | credits += (ext_depth(inode)) + 1; |
1923 | } | 1930 | } |
1924 | #ifdef CONFIG_QUOTA | ||
1925 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 1931 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
1926 | #endif | ||
1927 | 1932 | ||
1928 | err = ext4_ext_journal_restart(handle, credits); | 1933 | err = ext4_ext_journal_restart(handle, credits); |
1929 | if (err) | 1934 | if (err) |
@@ -2805,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2805 | /* | 2810 | /* |
2806 | * probably first extent we're gonna free will be last in block | 2811 | * probably first extent we're gonna free will be last in block |
2807 | */ | 2812 | */ |
2808 | err = ext4_writepage_trans_blocks(inode) + 3; | 2813 | err = ext4_writepage_trans_blocks(inode); |
2809 | handle = ext4_journal_start(inode, err); | 2814 | handle = ext4_journal_start(inode, err); |
2810 | if (IS_ERR(handle)) | 2815 | if (IS_ERR(handle)) |
2811 | return; | 2816 | return; |
@@ -2819,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2819 | down_write(&EXT4_I(inode)->i_data_sem); | 2824 | down_write(&EXT4_I(inode)->i_data_sem); |
2820 | ext4_ext_invalidate_cache(inode); | 2825 | ext4_ext_invalidate_cache(inode); |
2821 | 2826 | ||
2822 | ext4_mb_discard_inode_preallocations(inode); | 2827 | ext4_discard_reservation(inode); |
2823 | 2828 | ||
2824 | /* | 2829 | /* |
2825 | * TODO: optimization is possible here. | 2830 | * TODO: optimization is possible here. |
@@ -2858,27 +2863,6 @@ out_stop: | |||
2858 | ext4_journal_stop(handle); | 2863 | ext4_journal_stop(handle); |
2859 | } | 2864 | } |
2860 | 2865 | ||
2861 | /* | ||
2862 | * ext4_ext_writepage_trans_blocks: | ||
2863 | * calculate max number of blocks we could modify | ||
2864 | * in order to allocate new block for an inode | ||
2865 | */ | ||
2866 | int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) | ||
2867 | { | ||
2868 | int needed; | ||
2869 | |||
2870 | needed = ext4_ext_calc_credits_for_insert(inode, NULL); | ||
2871 | |||
2872 | /* caller wants to allocate num blocks, but note it includes sb */ | ||
2873 | needed = needed * num - (num - 1); | ||
2874 | |||
2875 | #ifdef CONFIG_QUOTA | ||
2876 | needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
2877 | #endif | ||
2878 | |||
2879 | return needed; | ||
2880 | } | ||
2881 | |||
2882 | static void ext4_falloc_update_inode(struct inode *inode, | 2866 | static void ext4_falloc_update_inode(struct inode *inode, |
2883 | int mode, loff_t new_size, int update_ctime) | 2867 | int mode, loff_t new_size, int update_ctime) |
2884 | { | 2868 | { |
@@ -2939,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
2939 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 2923 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
2940 | - block; | 2924 | - block; |
2941 | /* | 2925 | /* |
2942 | * credits to insert 1 extent into extent tree + buffers to be able to | 2926 | * credits to insert 1 extent into extent tree |
2943 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | ||
2944 | */ | 2927 | */ |
2945 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 2928 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
2946 | mutex_lock(&inode->i_mutex); | 2929 | mutex_lock(&inode->i_mutex); |
2947 | retry: | 2930 | retry: |
2948 | while (ret >= 0 && ret < max_blocks) { | 2931 | while (ret >= 0 && ret < max_blocks) { |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 655e760212b8..f344834bbf58 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -351,7 +351,7 @@ find_close_to_parent: | |||
351 | goto found_flexbg; | 351 | goto found_flexbg; |
352 | } | 352 | } |
353 | 353 | ||
354 | if (best_flex < 0 || | 354 | if (flex_group[best_flex].free_inodes == 0 || |
355 | (flex_group[i].free_blocks > | 355 | (flex_group[i].free_blocks > |
356 | flex_group[best_flex].free_blocks && | 356 | flex_group[best_flex].free_blocks && |
357 | flex_group[i].free_inodes)) | 357 | flex_group[i].free_inodes)) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 59fbbe899acc..7e91913e325b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -41,6 +41,8 @@ | |||
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | #include "ext4_extents.h" | 42 | #include "ext4_extents.h" |
43 | 43 | ||
44 | #define MPAGE_DA_EXTENT_TAIL 0x01 | ||
45 | |||
44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 46 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
45 | loff_t new_size) | 47 | loff_t new_size) |
46 | { | 48 | { |
@@ -1005,6 +1007,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | |||
1005 | */ | 1007 | */ |
1006 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 1008 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) |
1007 | { | 1009 | { |
1010 | if (!blocks) | ||
1011 | return 0; | ||
1012 | |||
1008 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1013 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1009 | return ext4_ext_calc_metadata_amount(inode, blocks); | 1014 | return ext4_ext_calc_metadata_amount(inode, blocks); |
1010 | 1015 | ||
@@ -1041,18 +1046,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1041 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1046 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1042 | } | 1047 | } |
1043 | 1048 | ||
1044 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1045 | #define DIO_MAX_BLOCKS 4096 | ||
1046 | /* | ||
1047 | * Number of credits we need for writing DIO_MAX_BLOCKS: | ||
1048 | * We need sb + group descriptor + bitmap + inode -> 4 | ||
1049 | * For B blocks with A block pointers per block we need: | ||
1050 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | ||
1051 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | ||
1052 | */ | ||
1053 | #define DIO_CREDITS 25 | ||
1054 | |||
1055 | |||
1056 | /* | 1049 | /* |
1057 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1050 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, |
1058 | * and returns if the blocks are already mapped. | 1051 | * and returns if the blocks are already mapped. |
@@ -1164,19 +1157,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1164 | return retval; | 1157 | return retval; |
1165 | } | 1158 | } |
1166 | 1159 | ||
1160 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1161 | #define DIO_MAX_BLOCKS 4096 | ||
1162 | |||
1167 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1163 | static int ext4_get_block(struct inode *inode, sector_t iblock, |
1168 | struct buffer_head *bh_result, int create) | 1164 | struct buffer_head *bh_result, int create) |
1169 | { | 1165 | { |
1170 | handle_t *handle = ext4_journal_current_handle(); | 1166 | handle_t *handle = ext4_journal_current_handle(); |
1171 | int ret = 0, started = 0; | 1167 | int ret = 0, started = 0; |
1172 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 1168 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
1169 | int dio_credits; | ||
1173 | 1170 | ||
1174 | if (create && !handle) { | 1171 | if (create && !handle) { |
1175 | /* Direct IO write... */ | 1172 | /* Direct IO write... */ |
1176 | if (max_blocks > DIO_MAX_BLOCKS) | 1173 | if (max_blocks > DIO_MAX_BLOCKS) |
1177 | max_blocks = DIO_MAX_BLOCKS; | 1174 | max_blocks = DIO_MAX_BLOCKS; |
1178 | handle = ext4_journal_start(inode, DIO_CREDITS + | 1175 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
1179 | 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | 1176 | handle = ext4_journal_start(inode, dio_credits); |
1180 | if (IS_ERR(handle)) { | 1177 | if (IS_ERR(handle)) { |
1181 | ret = PTR_ERR(handle); | 1178 | ret = PTR_ERR(handle); |
1182 | goto out; | 1179 | goto out; |
@@ -1559,7 +1556,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1559 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1556 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1560 | int total, mdb, mdb_free, release; | 1557 | int total, mdb, mdb_free, release; |
1561 | 1558 | ||
1559 | if (!to_free) | ||
1560 | return; /* Nothing to release, exit */ | ||
1561 | |||
1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1563 | |||
1564 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | ||
1565 | /* | ||
1566 | * if there is no reserved blocks, but we try to free some | ||
1567 | * then the counter is messed up somewhere. | ||
1568 | * but since this function is called from invalidate | ||
1569 | * page, it's harmless to return without any action | ||
1570 | */ | ||
1571 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | ||
1572 | "blocks for inode %lu, but there is no reserved " | ||
1573 | "data blocks\n", to_free, inode->i_ino); | ||
1574 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1575 | return; | ||
1576 | } | ||
1577 | |||
1563 | /* recalculate the number of metablocks still need to be reserved */ | 1578 | /* recalculate the number of metablocks still need to be reserved */ |
1564 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1579 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; |
1565 | mdb = ext4_calc_metadata_amount(inode, total); | 1580 | mdb = ext4_calc_metadata_amount(inode, total); |
@@ -1613,11 +1628,13 @@ struct mpage_da_data { | |||
1613 | unsigned long first_page, next_page; /* extent of pages */ | 1628 | unsigned long first_page, next_page; /* extent of pages */ |
1614 | get_block_t *get_block; | 1629 | get_block_t *get_block; |
1615 | struct writeback_control *wbc; | 1630 | struct writeback_control *wbc; |
1631 | int io_done; | ||
1632 | long pages_written; | ||
1616 | }; | 1633 | }; |
1617 | 1634 | ||
1618 | /* | 1635 | /* |
1619 | * mpage_da_submit_io - walks through extent of pages and try to write | 1636 | * mpage_da_submit_io - walks through extent of pages and try to write |
1620 | * them with __mpage_writepage() | 1637 | * them with writepage() call back |
1621 | * | 1638 | * |
1622 | * @mpd->inode: inode | 1639 | * @mpd->inode: inode |
1623 | * @mpd->first_page: first page of the extent | 1640 | * @mpd->first_page: first page of the extent |
@@ -1632,18 +1649,11 @@ struct mpage_da_data { | |||
1632 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 1649 | static int mpage_da_submit_io(struct mpage_da_data *mpd) |
1633 | { | 1650 | { |
1634 | struct address_space *mapping = mpd->inode->i_mapping; | 1651 | struct address_space *mapping = mpd->inode->i_mapping; |
1635 | struct mpage_data mpd_pp = { | ||
1636 | .bio = NULL, | ||
1637 | .last_block_in_bio = 0, | ||
1638 | .get_block = mpd->get_block, | ||
1639 | .use_writepage = 1, | ||
1640 | }; | ||
1641 | int ret = 0, err, nr_pages, i; | 1652 | int ret = 0, err, nr_pages, i; |
1642 | unsigned long index, end; | 1653 | unsigned long index, end; |
1643 | struct pagevec pvec; | 1654 | struct pagevec pvec; |
1644 | 1655 | ||
1645 | BUG_ON(mpd->next_page <= mpd->first_page); | 1656 | BUG_ON(mpd->next_page <= mpd->first_page); |
1646 | |||
1647 | pagevec_init(&pvec, 0); | 1657 | pagevec_init(&pvec, 0); |
1648 | index = mpd->first_page; | 1658 | index = mpd->first_page; |
1649 | end = mpd->next_page - 1; | 1659 | end = mpd->next_page - 1; |
@@ -1661,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1661 | break; | 1671 | break; |
1662 | index++; | 1672 | index++; |
1663 | 1673 | ||
1664 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | 1674 | err = mapping->a_ops->writepage(page, mpd->wbc); |
1665 | 1675 | if (!err) | |
1676 | mpd->pages_written++; | ||
1666 | /* | 1677 | /* |
1667 | * In error case, we have to continue because | 1678 | * In error case, we have to continue because |
1668 | * remaining pages are still locked | 1679 | * remaining pages are still locked |
@@ -1673,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1673 | } | 1684 | } |
1674 | pagevec_release(&pvec); | 1685 | pagevec_release(&pvec); |
1675 | } | 1686 | } |
1676 | if (mpd_pp.bio) | ||
1677 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
1678 | |||
1679 | return ret; | 1687 | return ret; |
1680 | } | 1688 | } |
1681 | 1689 | ||
@@ -1698,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1698 | int blocks = exbh->b_size >> inode->i_blkbits; | 1706 | int blocks = exbh->b_size >> inode->i_blkbits; |
1699 | sector_t pblock = exbh->b_blocknr, cur_logical; | 1707 | sector_t pblock = exbh->b_blocknr, cur_logical; |
1700 | struct buffer_head *head, *bh; | 1708 | struct buffer_head *head, *bh; |
1701 | unsigned long index, end; | 1709 | pgoff_t index, end; |
1702 | struct pagevec pvec; | 1710 | struct pagevec pvec; |
1703 | int nr_pages, i; | 1711 | int nr_pages, i; |
1704 | 1712 | ||
@@ -1741,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1741 | if (buffer_delay(bh)) { | 1749 | if (buffer_delay(bh)) { |
1742 | bh->b_blocknr = pblock; | 1750 | bh->b_blocknr = pblock; |
1743 | clear_buffer_delay(bh); | 1751 | clear_buffer_delay(bh); |
1752 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1753 | } else if (buffer_unwritten(bh)) { | ||
1754 | bh->b_blocknr = pblock; | ||
1755 | clear_buffer_unwritten(bh); | ||
1756 | set_buffer_mapped(bh); | ||
1757 | set_buffer_new(bh); | ||
1758 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1744 | } else if (buffer_mapped(bh)) | 1759 | } else if (buffer_mapped(bh)) |
1745 | BUG_ON(bh->b_blocknr != pblock); | 1760 | BUG_ON(bh->b_blocknr != pblock); |
1746 | 1761 | ||
@@ -1776,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1776 | * | 1791 | * |
1777 | * The function skips space we know is already mapped to disk blocks. | 1792 | * The function skips space we know is already mapped to disk blocks. |
1778 | * | 1793 | * |
1779 | * The function ignores errors ->get_block() returns, thus real | ||
1780 | * error handling is postponed to __mpage_writepage() | ||
1781 | */ | 1794 | */ |
1782 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1795 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) |
1783 | { | 1796 | { |
1797 | int err = 0; | ||
1784 | struct buffer_head *lbh = &mpd->lbh; | 1798 | struct buffer_head *lbh = &mpd->lbh; |
1785 | int err = 0, remain = lbh->b_size; | ||
1786 | sector_t next = lbh->b_blocknr; | 1799 | sector_t next = lbh->b_blocknr; |
1787 | struct buffer_head new; | 1800 | struct buffer_head new; |
1788 | 1801 | ||
@@ -1792,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1792 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1805 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
1793 | return; | 1806 | return; |
1794 | 1807 | ||
1795 | while (remain) { | 1808 | new.b_state = lbh->b_state; |
1796 | new.b_state = lbh->b_state; | 1809 | new.b_blocknr = 0; |
1797 | new.b_blocknr = 0; | 1810 | new.b_size = lbh->b_size; |
1798 | new.b_size = remain; | ||
1799 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1800 | if (err) { | ||
1801 | /* | ||
1802 | * Rather than implement own error handling | ||
1803 | * here, we just leave remaining blocks | ||
1804 | * unallocated and try again with ->writepage() | ||
1805 | */ | ||
1806 | break; | ||
1807 | } | ||
1808 | BUG_ON(new.b_size == 0); | ||
1809 | 1811 | ||
1810 | if (buffer_new(&new)) | 1812 | /* |
1811 | __unmap_underlying_blocks(mpd->inode, &new); | 1813 | * If we didn't accumulate anything |
1814 | * to write simply return | ||
1815 | */ | ||
1816 | if (!new.b_size) | ||
1817 | return; | ||
1818 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1819 | if (err) | ||
1820 | return; | ||
1821 | BUG_ON(new.b_size == 0); | ||
1812 | 1822 | ||
1813 | /* | 1823 | if (buffer_new(&new)) |
1814 | * If blocks are delayed marked, we need to | 1824 | __unmap_underlying_blocks(mpd->inode, &new); |
1815 | * put actual blocknr and drop delayed bit | ||
1816 | */ | ||
1817 | if (buffer_delay(lbh)) | ||
1818 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1819 | 1825 | ||
1820 | /* go for the remaining blocks */ | 1826 | /* |
1821 | next += new.b_size >> mpd->inode->i_blkbits; | 1827 | * If blocks are delayed marked, we need to |
1822 | remain -= new.b_size; | 1828 | * put actual blocknr and drop delayed bit |
1823 | } | 1829 | */ |
1830 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | ||
1831 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1832 | |||
1833 | return; | ||
1824 | } | 1834 | } |
1825 | 1835 | ||
1826 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | 1836 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
1837 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1827 | 1838 | ||
1828 | /* | 1839 | /* |
1829 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | 1840 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks |
@@ -1837,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1837 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 1848 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
1838 | sector_t logical, struct buffer_head *bh) | 1849 | sector_t logical, struct buffer_head *bh) |
1839 | { | 1850 | { |
1840 | struct buffer_head *lbh = &mpd->lbh; | ||
1841 | sector_t next; | 1851 | sector_t next; |
1852 | size_t b_size = bh->b_size; | ||
1853 | struct buffer_head *lbh = &mpd->lbh; | ||
1854 | int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; | ||
1842 | 1855 | ||
1843 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | 1856 | /* check if thereserved journal credits might overflow */ |
1844 | 1857 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | |
1858 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1859 | /* | ||
1860 | * With non-extent format we are limited by the journal | ||
1861 | * credit available. Total credit needed to insert | ||
1862 | * nrblocks contiguous blocks is dependent on the | ||
1863 | * nrblocks. So limit nrblocks. | ||
1864 | */ | ||
1865 | goto flush_it; | ||
1866 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | ||
1867 | EXT4_MAX_TRANS_DATA) { | ||
1868 | /* | ||
1869 | * Adding the new buffer_head would make it cross the | ||
1870 | * allowed limit for which we have journal credit | ||
1871 | * reserved. So limit the new bh->b_size | ||
1872 | */ | ||
1873 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | ||
1874 | mpd->inode->i_blkbits; | ||
1875 | /* we will do mpage_da_submit_io in the next loop */ | ||
1876 | } | ||
1877 | } | ||
1845 | /* | 1878 | /* |
1846 | * First block in the extent | 1879 | * First block in the extent |
1847 | */ | 1880 | */ |
1848 | if (lbh->b_size == 0) { | 1881 | if (lbh->b_size == 0) { |
1849 | lbh->b_blocknr = logical; | 1882 | lbh->b_blocknr = logical; |
1850 | lbh->b_size = bh->b_size; | 1883 | lbh->b_size = b_size; |
1851 | lbh->b_state = bh->b_state & BH_FLAGS; | 1884 | lbh->b_state = bh->b_state & BH_FLAGS; |
1852 | return; | 1885 | return; |
1853 | } | 1886 | } |
1854 | 1887 | ||
1888 | next = lbh->b_blocknr + nrblocks; | ||
1855 | /* | 1889 | /* |
1856 | * Can we merge the block to our big extent? | 1890 | * Can we merge the block to our big extent? |
1857 | */ | 1891 | */ |
1858 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | 1892 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { |
1859 | lbh->b_size += bh->b_size; | 1893 | lbh->b_size += b_size; |
1860 | return; | 1894 | return; |
1861 | } | 1895 | } |
1862 | 1896 | ||
1897 | flush_it: | ||
1863 | /* | 1898 | /* |
1864 | * We couldn't merge the block to our extent, so we | 1899 | * We couldn't merge the block to our extent, so we |
1865 | * need to flush current extent and start new one | 1900 | * need to flush current extent and start new one |
1866 | */ | 1901 | */ |
1867 | mpage_da_map_blocks(mpd); | 1902 | mpage_da_map_blocks(mpd); |
1868 | 1903 | mpage_da_submit_io(mpd); | |
1869 | /* | 1904 | mpd->io_done = 1; |
1870 | * Now start a new extent | 1905 | return; |
1871 | */ | ||
1872 | lbh->b_size = bh->b_size; | ||
1873 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
1874 | lbh->b_blocknr = logical; | ||
1875 | } | 1906 | } |
1876 | 1907 | ||
1877 | /* | 1908 | /* |
@@ -1891,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page, | |||
1891 | struct buffer_head *bh, *head, fake; | 1922 | struct buffer_head *bh, *head, fake; |
1892 | sector_t logical; | 1923 | sector_t logical; |
1893 | 1924 | ||
1925 | if (mpd->io_done) { | ||
1926 | /* | ||
1927 | * Rest of the page in the page_vec | ||
1928 | * redirty then and skip then. We will | ||
1929 | * try to to write them again after | ||
1930 | * starting a new transaction | ||
1931 | */ | ||
1932 | redirty_page_for_writepage(wbc, page); | ||
1933 | unlock_page(page); | ||
1934 | return MPAGE_DA_EXTENT_TAIL; | ||
1935 | } | ||
1894 | /* | 1936 | /* |
1895 | * Can we merge this page to current extent? | 1937 | * Can we merge this page to current extent? |
1896 | */ | 1938 | */ |
1897 | if (mpd->next_page != page->index) { | 1939 | if (mpd->next_page != page->index) { |
1898 | /* | 1940 | /* |
1899 | * Nope, we can't. So, we map non-allocated blocks | 1941 | * Nope, we can't. So, we map non-allocated blocks |
1900 | * and start IO on them using __mpage_writepage() | 1942 | * and start IO on them using writepage() |
1901 | */ | 1943 | */ |
1902 | if (mpd->next_page != mpd->first_page) { | 1944 | if (mpd->next_page != mpd->first_page) { |
1903 | mpage_da_map_blocks(mpd); | 1945 | mpage_da_map_blocks(mpd); |
1904 | mpage_da_submit_io(mpd); | 1946 | mpage_da_submit_io(mpd); |
1947 | /* | ||
1948 | * skip rest of the page in the page_vec | ||
1949 | */ | ||
1950 | mpd->io_done = 1; | ||
1951 | redirty_page_for_writepage(wbc, page); | ||
1952 | unlock_page(page); | ||
1953 | return MPAGE_DA_EXTENT_TAIL; | ||
1905 | } | 1954 | } |
1906 | 1955 | ||
1907 | /* | 1956 | /* |
@@ -1932,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page, | |||
1932 | set_buffer_dirty(bh); | 1981 | set_buffer_dirty(bh); |
1933 | set_buffer_uptodate(bh); | 1982 | set_buffer_uptodate(bh); |
1934 | mpage_add_bh_to_extent(mpd, logical, bh); | 1983 | mpage_add_bh_to_extent(mpd, logical, bh); |
1984 | if (mpd->io_done) | ||
1985 | return MPAGE_DA_EXTENT_TAIL; | ||
1935 | } else { | 1986 | } else { |
1936 | /* | 1987 | /* |
1937 | * Page with regular buffer heads, just add all dirty ones | 1988 | * Page with regular buffer heads, just add all dirty ones |
@@ -1940,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page, | |||
1940 | bh = head; | 1991 | bh = head; |
1941 | do { | 1992 | do { |
1942 | BUG_ON(buffer_locked(bh)); | 1993 | BUG_ON(buffer_locked(bh)); |
1943 | if (buffer_dirty(bh)) | 1994 | if (buffer_dirty(bh) && |
1995 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
1944 | mpage_add_bh_to_extent(mpd, logical, bh); | 1996 | mpage_add_bh_to_extent(mpd, logical, bh); |
1997 | if (mpd->io_done) | ||
1998 | return MPAGE_DA_EXTENT_TAIL; | ||
1999 | } | ||
1945 | logical++; | 2000 | logical++; |
1946 | } while ((bh = bh->b_this_page) != head); | 2001 | } while ((bh = bh->b_this_page) != head); |
1947 | } | 2002 | } |
@@ -1960,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page, | |||
1960 | * | 2015 | * |
1961 | * This is a library function, which implements the writepages() | 2016 | * This is a library function, which implements the writepages() |
1962 | * address_space_operation. | 2017 | * address_space_operation. |
1963 | * | ||
1964 | * In order to avoid duplication of logic that deals with partial pages, | ||
1965 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
1966 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
1967 | * | ||
1968 | * It's important that we call __mpage_writepage() only once for each | ||
1969 | * involved page, otherwise we'd have to implement more complicated logic | ||
1970 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
1971 | * | ||
1972 | * See comments to mpage_writepages() | ||
1973 | */ | 2018 | */ |
1974 | static int mpage_da_writepages(struct address_space *mapping, | 2019 | static int mpage_da_writepages(struct address_space *mapping, |
1975 | struct writeback_control *wbc, | 2020 | struct writeback_control *wbc, |
1976 | get_block_t get_block) | 2021 | get_block_t get_block) |
1977 | { | 2022 | { |
1978 | struct mpage_da_data mpd; | 2023 | struct mpage_da_data mpd; |
2024 | long to_write; | ||
1979 | int ret; | 2025 | int ret; |
1980 | 2026 | ||
1981 | if (!get_block) | 2027 | if (!get_block) |
@@ -1989,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
1989 | mpd.first_page = 0; | 2035 | mpd.first_page = 0; |
1990 | mpd.next_page = 0; | 2036 | mpd.next_page = 0; |
1991 | mpd.get_block = get_block; | 2037 | mpd.get_block = get_block; |
2038 | mpd.io_done = 0; | ||
2039 | mpd.pages_written = 0; | ||
2040 | |||
2041 | to_write = wbc->nr_to_write; | ||
1992 | 2042 | ||
1993 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | 2043 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); |
1994 | 2044 | ||
1995 | /* | 2045 | /* |
1996 | * Handle last extent of pages | 2046 | * Handle last extent of pages |
1997 | */ | 2047 | */ |
1998 | if (mpd.next_page != mpd.first_page) { | 2048 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
1999 | mpage_da_map_blocks(&mpd); | 2049 | mpage_da_map_blocks(&mpd); |
2000 | mpage_da_submit_io(&mpd); | 2050 | mpage_da_submit_io(&mpd); |
2001 | } | 2051 | } |
2002 | 2052 | ||
2053 | wbc->nr_to_write = to_write - mpd.pages_written; | ||
2003 | return ret; | 2054 | return ret; |
2004 | } | 2055 | } |
2005 | 2056 | ||
@@ -2204,63 +2255,95 @@ static int ext4_da_writepage(struct page *page, | |||
2204 | } | 2255 | } |
2205 | 2256 | ||
2206 | /* | 2257 | /* |
2207 | * For now just follow the DIO way to estimate the max credits | 2258 | * This is called via ext4_da_writepages() to |
2208 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | 2259 | * calulate the total number of credits to reserve to fit |
2209 | * todo: need to calculate the max credits need for | 2260 | * a single extent allocation into a single transaction, |
2210 | * extent based files, currently the DIO credits is based on | 2261 | * ext4_da_writpeages() will loop calling this before |
2211 | * indirect-blocks mapping way. | 2262 | * the block allocation. |
2212 | * | ||
2213 | * Probably should have a generic way to calculate credits | ||
2214 | * for DIO, writepages, and truncate | ||
2215 | */ | 2263 | */ |
2216 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | 2264 | |
2217 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | 2265 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
2266 | { | ||
2267 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
2268 | |||
2269 | /* | ||
2270 | * With non-extent format the journal credit needed to | ||
2271 | * insert nrblocks contiguous block is dependent on | ||
2272 | * number of contiguous block. So we will limit | ||
2273 | * number of contiguous block to a sane value | ||
2274 | */ | ||
2275 | if (!(inode->i_flags & EXT4_EXTENTS_FL) && | ||
2276 | (max_blocks > EXT4_MAX_TRANS_DATA)) | ||
2277 | max_blocks = EXT4_MAX_TRANS_DATA; | ||
2278 | |||
2279 | return ext4_chunk_trans_blocks(inode, max_blocks); | ||
2280 | } | ||
2218 | 2281 | ||
2219 | static int ext4_da_writepages(struct address_space *mapping, | 2282 | static int ext4_da_writepages(struct address_space *mapping, |
2220 | struct writeback_control *wbc) | 2283 | struct writeback_control *wbc) |
2221 | { | 2284 | { |
2222 | struct inode *inode = mapping->host; | ||
2223 | handle_t *handle = NULL; | 2285 | handle_t *handle = NULL; |
2224 | int needed_blocks; | ||
2225 | int ret = 0; | ||
2226 | long to_write; | ||
2227 | loff_t range_start = 0; | 2286 | loff_t range_start = 0; |
2287 | struct inode *inode = mapping->host; | ||
2288 | int needed_blocks, ret = 0, nr_to_writebump = 0; | ||
2289 | long to_write, pages_skipped = 0; | ||
2290 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | ||
2228 | 2291 | ||
2229 | /* | 2292 | /* |
2230 | * No pages to write? This is mainly a kludge to avoid starting | 2293 | * No pages to write? This is mainly a kludge to avoid starting |
2231 | * a transaction for special inodes like journal inode on last iput() | 2294 | * a transaction for special inodes like journal inode on last iput() |
2232 | * because that could violate lock ordering on umount | 2295 | * because that could violate lock ordering on umount |
2233 | */ | 2296 | */ |
2234 | if (!mapping->nrpages) | 2297 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2235 | return 0; | 2298 | return 0; |
2236 | |||
2237 | /* | 2299 | /* |
2238 | * Estimate the worse case needed credits to write out | 2300 | * Make sure nr_to_write is >= sbi->s_mb_stream_request |
2239 | * EXT4_MAX_BUF_BLOCKS pages | 2301 | * This make sure small files blocks are allocated in |
2302 | * single attempt. This ensure that small files | ||
2303 | * get less fragmented. | ||
2240 | */ | 2304 | */ |
2241 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | 2305 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { |
2306 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
2307 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
2308 | } | ||
2242 | 2309 | ||
2243 | to_write = wbc->nr_to_write; | 2310 | if (!wbc->range_cyclic) |
2244 | if (!wbc->range_cyclic) { | ||
2245 | /* | 2311 | /* |
2246 | * If range_cyclic is not set force range_cont | 2312 | * If range_cyclic is not set force range_cont |
2247 | * and save the old writeback_index | 2313 | * and save the old writeback_index |
2248 | */ | 2314 | */ |
2249 | wbc->range_cont = 1; | 2315 | wbc->range_cont = 1; |
2250 | range_start = wbc->range_start; | ||
2251 | } | ||
2252 | 2316 | ||
2253 | while (!ret && to_write) { | 2317 | range_start = wbc->range_start; |
2318 | pages_skipped = wbc->pages_skipped; | ||
2319 | |||
2320 | restart_loop: | ||
2321 | to_write = wbc->nr_to_write; | ||
2322 | while (!ret && to_write > 0) { | ||
2323 | |||
2324 | /* | ||
2325 | * we insert one extent at a time. So we need | ||
2326 | * credit needed for single extent allocation. | ||
2327 | * journalled mode is currently not supported | ||
2328 | * by delalloc | ||
2329 | */ | ||
2330 | BUG_ON(ext4_should_journal_data(inode)); | ||
2331 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | ||
2332 | |||
2254 | /* start a new transaction*/ | 2333 | /* start a new transaction*/ |
2255 | handle = ext4_journal_start(inode, needed_blocks); | 2334 | handle = ext4_journal_start(inode, needed_blocks); |
2256 | if (IS_ERR(handle)) { | 2335 | if (IS_ERR(handle)) { |
2257 | ret = PTR_ERR(handle); | 2336 | ret = PTR_ERR(handle); |
2337 | printk(KERN_EMERG "%s: jbd2_start: " | ||
2338 | "%ld pages, ino %lu; err %d\n", __func__, | ||
2339 | wbc->nr_to_write, inode->i_ino, ret); | ||
2340 | dump_stack(); | ||
2258 | goto out_writepages; | 2341 | goto out_writepages; |
2259 | } | 2342 | } |
2260 | if (ext4_should_order_data(inode)) { | 2343 | if (ext4_should_order_data(inode)) { |
2261 | /* | 2344 | /* |
2262 | * With ordered mode we need to add | 2345 | * With ordered mode we need to add |
2263 | * the inode to the journal handle | 2346 | * the inode to the journal handl |
2264 | * when we do block allocation. | 2347 | * when we do block allocation. |
2265 | */ | 2348 | */ |
2266 | ret = ext4_jbd2_file_inode(handle, inode); | 2349 | ret = ext4_jbd2_file_inode(handle, inode); |
@@ -2268,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2268 | ext4_journal_stop(handle); | 2351 | ext4_journal_stop(handle); |
2269 | goto out_writepages; | 2352 | goto out_writepages; |
2270 | } | 2353 | } |
2271 | |||
2272 | } | 2354 | } |
2273 | /* | ||
2274 | * set the max dirty pages could be write at a time | ||
2275 | * to fit into the reserved transaction credits | ||
2276 | */ | ||
2277 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
2278 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
2279 | 2355 | ||
2280 | to_write -= wbc->nr_to_write; | 2356 | to_write -= wbc->nr_to_write; |
2281 | ret = mpage_da_writepages(mapping, wbc, | 2357 | ret = mpage_da_writepages(mapping, wbc, |
2282 | ext4_da_get_block_write); | 2358 | ext4_da_get_block_write); |
2283 | ext4_journal_stop(handle); | 2359 | ext4_journal_stop(handle); |
2284 | if (wbc->nr_to_write) { | 2360 | if (ret == MPAGE_DA_EXTENT_TAIL) { |
2361 | /* | ||
2362 | * got one extent now try with | ||
2363 | * rest of the pages | ||
2364 | */ | ||
2365 | to_write += wbc->nr_to_write; | ||
2366 | ret = 0; | ||
2367 | } else if (wbc->nr_to_write) { | ||
2285 | /* | 2368 | /* |
2286 | * There is no more writeout needed | 2369 | * There is no more writeout needed |
2287 | * or we requested for a noblocking writeout | 2370 | * or we requested for a noblocking writeout |
@@ -2293,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2293 | wbc->nr_to_write = to_write; | 2376 | wbc->nr_to_write = to_write; |
2294 | } | 2377 | } |
2295 | 2378 | ||
2296 | out_writepages: | 2379 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { |
2297 | wbc->nr_to_write = to_write; | 2380 | /* We skipped pages in this loop */ |
2298 | if (range_start) | ||
2299 | wbc->range_start = range_start; | 2381 | wbc->range_start = range_start; |
2382 | wbc->nr_to_write = to_write + | ||
2383 | wbc->pages_skipped - pages_skipped; | ||
2384 | wbc->pages_skipped = pages_skipped; | ||
2385 | goto restart_loop; | ||
2386 | } | ||
2387 | |||
2388 | out_writepages: | ||
2389 | wbc->nr_to_write = to_write - nr_to_writebump; | ||
2390 | wbc->range_start = range_start; | ||
2300 | return ret; | 2391 | return ret; |
2301 | } | 2392 | } |
2302 | 2393 | ||
@@ -3486,6 +3577,9 @@ void ext4_truncate(struct inode *inode) | |||
3486 | * modify the block allocation tree. | 3577 | * modify the block allocation tree. |
3487 | */ | 3578 | */ |
3488 | down_write(&ei->i_data_sem); | 3579 | down_write(&ei->i_data_sem); |
3580 | |||
3581 | ext4_discard_reservation(inode); | ||
3582 | |||
3489 | /* | 3583 | /* |
3490 | * The orphan list entry will now protect us from any crash which | 3584 | * The orphan list entry will now protect us from any crash which |
3491 | * occurs before the truncate completes, so it is now safe to propagate | 3585 | * occurs before the truncate completes, so it is now safe to propagate |
@@ -3555,8 +3649,6 @@ do_indirects: | |||
3555 | ; | 3649 | ; |
3556 | } | 3650 | } |
3557 | 3651 | ||
3558 | ext4_discard_reservation(inode); | ||
3559 | |||
3560 | up_write(&ei->i_data_sem); | 3652 | up_write(&ei->i_data_sem); |
3561 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3653 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3562 | ext4_mark_inode_dirty(handle, inode); | 3654 | ext4_mark_inode_dirty(handle, inode); |
@@ -4324,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4324 | return 0; | 4416 | return 0; |
4325 | } | 4417 | } |
4326 | 4418 | ||
4419 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | ||
4420 | int chunk) | ||
4421 | { | ||
4422 | int indirects; | ||
4423 | |||
4424 | /* if nrblocks are contiguous */ | ||
4425 | if (chunk) { | ||
4426 | /* | ||
4427 | * With N contiguous data blocks, it need at most | ||
4428 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | ||
4429 | * 2 dindirect blocks | ||
4430 | * 1 tindirect block | ||
4431 | */ | ||
4432 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4433 | return indirects + 3; | ||
4434 | } | ||
4435 | /* | ||
4436 | * if nrblocks are not contiguous, worse case, each block touch | ||
4437 | * a indirect block, and each indirect block touch a double indirect | ||
4438 | * block, plus a triple indirect block | ||
4439 | */ | ||
4440 | indirects = nrblocks * 2 + 1; | ||
4441 | return indirects; | ||
4442 | } | ||
4443 | |||
4444 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4445 | { | ||
4446 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
4447 | return ext4_indirect_trans_blocks(inode, nrblocks, 0); | ||
4448 | return ext4_ext_index_trans_blocks(inode, nrblocks, 0); | ||
4449 | } | ||
4327 | /* | 4450 | /* |
4328 | * How many blocks doth make a writepage()? | 4451 | * Account for index blocks, block groups bitmaps and block group |
4329 | * | 4452 | * descriptor blocks if modify datablocks and index blocks |
4330 | * With N blocks per page, it may be: | 4453 | * worse case, the indexs blocks spread over different block groups |
4331 | * N data blocks | ||
4332 | * 2 indirect block | ||
4333 | * 2 dindirect | ||
4334 | * 1 tindirect | ||
4335 | * N+5 bitmap blocks (from the above) | ||
4336 | * N+5 group descriptor summary blocks | ||
4337 | * 1 inode block | ||
4338 | * 1 superblock. | ||
4339 | * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files | ||
4340 | * | 4454 | * |
4341 | * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS | 4455 | * If datablocks are discontiguous, they are possible to spread over |
4456 | * different block groups too. If they are contiugous, with flexbg, | ||
4457 | * they could still across block group boundary. | ||
4342 | * | 4458 | * |
4343 | * With ordered or writeback data it's the same, less the N data blocks. | 4459 | * Also account for superblock, inode, quota and xattr blocks |
4460 | */ | ||
4461 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4462 | { | ||
4463 | int groups, gdpblocks; | ||
4464 | int idxblocks; | ||
4465 | int ret = 0; | ||
4466 | |||
4467 | /* | ||
4468 | * How many index blocks need to touch to modify nrblocks? | ||
4469 | * The "Chunk" flag indicating whether the nrblocks is | ||
4470 | * physically contiguous on disk | ||
4471 | * | ||
4472 | * For Direct IO and fallocate, they calls get_block to allocate | ||
4473 | * one single extent at a time, so they could set the "Chunk" flag | ||
4474 | */ | ||
4475 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | ||
4476 | |||
4477 | ret = idxblocks; | ||
4478 | |||
4479 | /* | ||
4480 | * Now let's see how many group bitmaps and group descriptors need | ||
4481 | * to account | ||
4482 | */ | ||
4483 | groups = idxblocks; | ||
4484 | if (chunk) | ||
4485 | groups += 1; | ||
4486 | else | ||
4487 | groups += nrblocks; | ||
4488 | |||
4489 | gdpblocks = groups; | ||
4490 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | ||
4491 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | ||
4492 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | ||
4493 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | ||
4494 | |||
4495 | /* bitmaps and block group descriptor blocks */ | ||
4496 | ret += groups + gdpblocks; | ||
4497 | |||
4498 | /* Blocks for super block, inode, quota and xattr blocks */ | ||
4499 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
4500 | |||
4501 | return ret; | ||
4502 | } | ||
4503 | |||
4504 | /* | ||
4505 | * Calulate the total number of credits to reserve to fit | ||
4506 | * the modification of a single pages into a single transaction, | ||
4507 | * which may include multiple chunks of block allocations. | ||
4344 | * | 4508 | * |
4345 | * If the inode's direct blocks can hold an integral number of pages then a | 4509 | * This could be called via ext4_write_begin() |
4346 | * page cannot straddle two indirect blocks, and we can only touch one indirect | ||
4347 | * and dindirect block, and the "5" above becomes "3". | ||
4348 | * | 4510 | * |
4349 | * This still overestimates under most circumstances. If we were to pass the | 4511 | * We need to consider the worse case, when |
4350 | * start and end offsets in here as well we could do block_to_path() on each | 4512 | * one new block per extent. |
4351 | * block and work out the exact number of indirects which are touched. Pah. | ||
4352 | */ | 4513 | */ |
4353 | |||
4354 | int ext4_writepage_trans_blocks(struct inode *inode) | 4514 | int ext4_writepage_trans_blocks(struct inode *inode) |
4355 | { | 4515 | { |
4356 | int bpp = ext4_journal_blocks_per_page(inode); | 4516 | int bpp = ext4_journal_blocks_per_page(inode); |
4357 | int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; | ||
4358 | int ret; | 4517 | int ret; |
4359 | 4518 | ||
4360 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 4519 | ret = ext4_meta_trans_blocks(inode, bpp, 0); |
4361 | return ext4_ext_writepage_trans_blocks(inode, bpp); | ||
4362 | 4520 | ||
4521 | /* Account for data blocks for journalled mode */ | ||
4363 | if (ext4_should_journal_data(inode)) | 4522 | if (ext4_should_journal_data(inode)) |
4364 | ret = 3 * (bpp + indirects) + 2; | 4523 | ret += bpp; |
4365 | else | ||
4366 | ret = 2 * (bpp + indirects) + 2; | ||
4367 | |||
4368 | #ifdef CONFIG_QUOTA | ||
4369 | /* We know that structure was already allocated during DQUOT_INIT so | ||
4370 | * we will be updating only the data blocks + inodes */ | ||
4371 | ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
4372 | #endif | ||
4373 | |||
4374 | return ret; | 4524 | return ret; |
4375 | } | 4525 | } |
4376 | 4526 | ||
4377 | /* | 4527 | /* |
4528 | * Calculate the journal credits for a chunk of data modification. | ||
4529 | * | ||
4530 | * This is called from DIO, fallocate or whoever calling | ||
4531 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | ||
4532 | * | ||
4533 | * journal buffers for data blocks are not included here, as DIO | ||
4534 | * and fallocate do no need to journal data buffers. | ||
4535 | */ | ||
4536 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | ||
4537 | { | ||
4538 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | ||
4539 | } | ||
4540 | |||
4541 | /* | ||
4378 | * The caller must have previously called ext4_reserve_inode_write(). | 4542 | * The caller must have previously called ext4_reserve_inode_write(). |
4379 | * Give this, we know that the caller already has write access to iloc->bh. | 4543 | * Give this, we know that the caller already has write access to iloc->bh. |
4380 | */ | 4544 | */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..e0e3a5eb1ddb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3282 | } | 3282 | } |
3283 | 3283 | ||
3284 | /* | 3284 | /* |
3285 | * Return the prealloc space that have minimal distance | ||
3286 | * from the goal block. @cpa is the prealloc | ||
3287 | * space that is having currently known minimal distance | ||
3288 | * from the goal block. | ||
3289 | */ | ||
3290 | static struct ext4_prealloc_space * | ||
3291 | ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | ||
3292 | struct ext4_prealloc_space *pa, | ||
3293 | struct ext4_prealloc_space *cpa) | ||
3294 | { | ||
3295 | ext4_fsblk_t cur_distance, new_distance; | ||
3296 | |||
3297 | if (cpa == NULL) { | ||
3298 | atomic_inc(&pa->pa_count); | ||
3299 | return pa; | ||
3300 | } | ||
3301 | cur_distance = abs(goal_block - cpa->pa_pstart); | ||
3302 | new_distance = abs(goal_block - pa->pa_pstart); | ||
3303 | |||
3304 | if (cur_distance < new_distance) | ||
3305 | return cpa; | ||
3306 | |||
3307 | /* drop the previous reference */ | ||
3308 | atomic_dec(&cpa->pa_count); | ||
3309 | atomic_inc(&pa->pa_count); | ||
3310 | return pa; | ||
3311 | } | ||
3312 | |||
3313 | /* | ||
3285 | * search goal blocks in preallocated space | 3314 | * search goal blocks in preallocated space |
3286 | */ | 3315 | */ |
3287 | static noinline_for_stack int | 3316 | static noinline_for_stack int |
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3290 | int order, i; | 3319 | int order, i; |
3291 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3320 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
3292 | struct ext4_locality_group *lg; | 3321 | struct ext4_locality_group *lg; |
3293 | struct ext4_prealloc_space *pa; | 3322 | struct ext4_prealloc_space *pa, *cpa = NULL; |
3323 | ext4_fsblk_t goal_block; | ||
3294 | 3324 | ||
3295 | /* only data can be preallocated */ | 3325 | /* only data can be preallocated */ |
3296 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 3326 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3333 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3363 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3334 | order = PREALLOC_TB_SIZE - 1; | 3364 | order = PREALLOC_TB_SIZE - 1; |
3335 | 3365 | ||
3366 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | ||
3367 | ac->ac_g_ex.fe_start + | ||
3368 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3369 | /* | ||
3370 | * search for the prealloc space that is having | ||
3371 | * minimal distance from the goal block. | ||
3372 | */ | ||
3336 | for (i = order; i < PREALLOC_TB_SIZE; i++) { | 3373 | for (i = order; i < PREALLOC_TB_SIZE; i++) { |
3337 | rcu_read_lock(); | 3374 | rcu_read_lock(); |
3338 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], | 3375 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], |
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3340 | spin_lock(&pa->pa_lock); | 3377 | spin_lock(&pa->pa_lock); |
3341 | if (pa->pa_deleted == 0 && | 3378 | if (pa->pa_deleted == 0 && |
3342 | pa->pa_free >= ac->ac_o_ex.fe_len) { | 3379 | pa->pa_free >= ac->ac_o_ex.fe_len) { |
3343 | atomic_inc(&pa->pa_count); | 3380 | |
3344 | ext4_mb_use_group_pa(ac, pa); | 3381 | cpa = ext4_mb_check_group_pa(goal_block, |
3345 | spin_unlock(&pa->pa_lock); | 3382 | pa, cpa); |
3346 | ac->ac_criteria = 20; | ||
3347 | rcu_read_unlock(); | ||
3348 | return 1; | ||
3349 | } | 3383 | } |
3350 | spin_unlock(&pa->pa_lock); | 3384 | spin_unlock(&pa->pa_lock); |
3351 | } | 3385 | } |
3352 | rcu_read_unlock(); | 3386 | rcu_read_unlock(); |
3353 | } | 3387 | } |
3388 | if (cpa) { | ||
3389 | ext4_mb_use_group_pa(ac, cpa); | ||
3390 | ac->ac_criteria = 20; | ||
3391 | return 1; | ||
3392 | } | ||
3354 | return 0; | 3393 | return 0; |
3355 | } | 3394 | } |
3356 | 3395 | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b9e077ba07e9..46fc0b5b12ba 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
53 | * credit. But below we try to not accumalate too much | 53 | * credit. But below we try to not accumalate too much |
54 | * of them by restarting the journal. | 54 | * of them by restarting the journal. |
55 | */ | 55 | */ |
56 | needed = ext4_ext_calc_credits_for_insert(inode, path); | 56 | needed = ext4_ext_calc_credits_for_single_extent(inode, |
57 | lb->last_block - lb->first_block + 1, path); | ||
57 | 58 | ||
58 | /* | 59 | /* |
59 | * Make sure the credit we accumalated is not really high | 60 | * Make sure the credit we accumalated is not really high |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0a9265164265..b3d35604ea18 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
773 | 773 | ||
774 | if (reserved_gdb || gdb_off == 0) { | 774 | if (reserved_gdb || gdb_off == 0) { |
775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, | 775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, |
776 | EXT4_FEATURE_COMPAT_RESIZE_INODE)){ | 776 | EXT4_FEATURE_COMPAT_RESIZE_INODE) |
777 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
777 | ext4_warning(sb, __func__, | 778 | ext4_warning(sb, __func__, |
778 | "No reserved GDT blocks, can't resize"); | 779 | "No reserved GDT blocks, can't resize"); |
779 | return -EPERM; | 780 | return -EPERM; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d5d77958b861..566344b926b7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
568 | #endif | 568 | #endif |
569 | ei->i_block_alloc_info = NULL; | 569 | ei->i_block_alloc_info = NULL; |
570 | ei->vfs_inode.i_version = 1; | 570 | ei->vfs_inode.i_version = 1; |
571 | ei->vfs_inode.i_data.writeback_index = 0; | ||
571 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 572 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
572 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 573 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
573 | spin_lock_init(&ei->i_prealloc_lock); | 574 | spin_lock_init(&ei->i_prealloc_lock); |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 6d266d793e2c..80ff3381fa21 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait) | |||
562 | struct buffer_head *bh; | 562 | struct buffer_head *bh; |
563 | struct msdos_dir_entry *raw_entry; | 563 | struct msdos_dir_entry *raw_entry; |
564 | loff_t i_pos; | 564 | loff_t i_pos; |
565 | int err = 0; | 565 | int err; |
566 | 566 | ||
567 | retry: | 567 | retry: |
568 | i_pos = MSDOS_I(inode)->i_pos; | 568 | i_pos = MSDOS_I(inode)->i_pos; |
569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) | 569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) |
570 | return 0; | 570 | return 0; |
571 | 571 | ||
572 | lock_super(sb); | ||
573 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); | 572 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); |
574 | if (!bh) { | 573 | if (!bh) { |
575 | printk(KERN_ERR "FAT: unable to read inode block " | 574 | printk(KERN_ERR "FAT: unable to read inode block " |
576 | "for updating (i_pos %lld)\n", i_pos); | 575 | "for updating (i_pos %lld)\n", i_pos); |
577 | err = -EIO; | 576 | return -EIO; |
578 | goto out; | ||
579 | } | 577 | } |
580 | spin_lock(&sbi->inode_hash_lock); | 578 | spin_lock(&sbi->inode_hash_lock); |
581 | if (i_pos != MSDOS_I(inode)->i_pos) { | 579 | if (i_pos != MSDOS_I(inode)->i_pos) { |
582 | spin_unlock(&sbi->inode_hash_lock); | 580 | spin_unlock(&sbi->inode_hash_lock); |
583 | brelse(bh); | 581 | brelse(bh); |
584 | unlock_super(sb); | ||
585 | goto retry; | 582 | goto retry; |
586 | } | 583 | } |
587 | 584 | ||
@@ -607,11 +604,10 @@ retry: | |||
607 | } | 604 | } |
608 | spin_unlock(&sbi->inode_hash_lock); | 605 | spin_unlock(&sbi->inode_hash_lock); |
609 | mark_buffer_dirty(bh); | 606 | mark_buffer_dirty(bh); |
607 | err = 0; | ||
610 | if (wait) | 608 | if (wait) |
611 | err = sync_dirty_buffer(bh); | 609 | err = sync_dirty_buffer(bh); |
612 | brelse(bh); | 610 | brelse(bh); |
613 | out: | ||
614 | unlock_super(sb); | ||
615 | return err; | 611 | return err; |
616 | } | 612 | } |
617 | 613 | ||
diff --git a/fs/ioprio.c b/fs/ioprio.c index c4a1c3c65aac..da3cc460d4df 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | |||
115 | pgrp = task_pgrp(current); | 115 | pgrp = task_pgrp(current); |
116 | else | 116 | else |
117 | pgrp = find_vpid(who); | 117 | pgrp = find_vpid(who); |
118 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 118 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
119 | ret = set_task_ioprio(p, ioprio); | 119 | ret = set_task_ioprio(p, ioprio); |
120 | if (ret) | 120 | if (ret) |
121 | break; | 121 | break; |
122 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 122 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
123 | break; | 123 | break; |
124 | case IOPRIO_WHO_USER: | 124 | case IOPRIO_WHO_USER: |
125 | if (!who) | 125 | if (!who) |
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who) | |||
204 | pgrp = task_pgrp(current); | 204 | pgrp = task_pgrp(current); |
205 | else | 205 | else |
206 | pgrp = find_vpid(who); | 206 | pgrp = find_vpid(who); |
207 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 207 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
208 | tmpio = get_task_ioprio(p); | 208 | tmpio = get_task_ioprio(p); |
209 | if (tmpio < 0) | 209 | if (tmpio < 0) |
210 | continue; | 210 | continue; |
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who) | |||
212 | ret = tmpio; | 212 | ret = tmpio; |
213 | else | 213 | else |
214 | ret = ioprio_best(ret, tmpio); | 214 | ret = ioprio_best(ret, tmpio); |
215 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 215 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
216 | break; | 216 | break; |
217 | case IOPRIO_WHO_USER: | 217 | case IOPRIO_WHO_USER: |
218 | if (!who) | 218 | if (!who) |
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 31559f45fdde..4c41db91eaa4 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h | |||
@@ -12,7 +12,6 @@ | |||
12 | #ifndef _JFFS2_FS_I | 12 | #ifndef _JFFS2_FS_I |
13 | #define _JFFS2_FS_I | 13 | #define _JFFS2_FS_I |
14 | 14 | ||
15 | #include <linux/version.h> | ||
16 | #include <linux/rbtree.h> | 15 | #include <linux/rbtree.h> |
17 | #include <linux/posix_acl.h> | 16 | #include <linux/posix_acl.h> |
18 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 79ecd281d2cb..3f87d2632947 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c | |||
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
52 | } | 52 | } |
53 | 53 | ||
54 | seq_printf(m, | 54 | seq_printf(m, |
55 | "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 55 | "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
56 | vma->vm_start, | 56 | vma->vm_start, |
57 | vma->vm_end, | 57 | vma->vm_end, |
58 | flags & VM_READ ? 'r' : '-', | 58 | flags & VM_READ ? 'r' : '-', |
59 | flags & VM_WRITE ? 'w' : '-', | 59 | flags & VM_WRITE ? 'w' : '-', |
60 | flags & VM_EXEC ? 'x' : '-', | 60 | flags & VM_EXEC ? 'x' : '-', |
61 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', | 61 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', |
62 | vma->vm_pgoff << PAGE_SHIFT, | 62 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, |
63 | MAJOR(dev), MINOR(dev), ino, &len); | 63 | MAJOR(dev), MINOR(dev), ino, &len); |
64 | 64 | ||
65 | if (file) { | 65 | if (file) { |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7546a918f790..73d1891ee625 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v) | |||
219 | ino = inode->i_ino; | 219 | ino = inode->i_ino; |
220 | } | 220 | } |
221 | 221 | ||
222 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 222 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
223 | vma->vm_start, | 223 | vma->vm_start, |
224 | vma->vm_end, | 224 | vma->vm_end, |
225 | flags & VM_READ ? 'r' : '-', | 225 | flags & VM_READ ? 'r' : '-', |
226 | flags & VM_WRITE ? 'w' : '-', | 226 | flags & VM_WRITE ? 'w' : '-', |
227 | flags & VM_EXEC ? 'x' : '-', | 227 | flags & VM_EXEC ? 'x' : '-', |
228 | flags & VM_MAYSHARE ? 's' : 'p', | 228 | flags & VM_MAYSHARE ? 's' : 'p', |
229 | vma->vm_pgoff << PAGE_SHIFT, | 229 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, |
230 | MAJOR(dev), MINOR(dev), ino, &len); | 230 | MAJOR(dev), MINOR(dev), ino, &len); |
231 | 231 | ||
232 | /* | 232 | /* |
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index cdc2d3464a1a..2813cdd72375 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h | |||
@@ -18,7 +18,6 @@ | |||
18 | #ifndef __XFS_DMAPI_H__ | 18 | #ifndef __XFS_DMAPI_H__ |
19 | #define __XFS_DMAPI_H__ | 19 | #define __XFS_DMAPI_H__ |
20 | 20 | ||
21 | #include <linux/version.h> | ||
22 | /* Values used to define the on-disk version of dm_attrname_t. All | 21 | /* Values used to define the on-disk version of dm_attrname_t. All |
23 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". | 22 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". |
24 | * | 23 | * |