diff options
-rw-r--r-- | Documentation/filesystems/ext4/bigalloc.rst | 32 | ||||
-rw-r--r-- | Documentation/filesystems/ext4/blockgroup.rst | 10 | ||||
-rw-r--r-- | Documentation/filesystems/ext4/blocks.rst | 4 | ||||
-rw-r--r-- | Documentation/filesystems/ext4/directory.rst | 2 | ||||
-rw-r--r-- | Documentation/filesystems/ext4/group_descr.rst | 9 | ||||
-rw-r--r-- | Documentation/filesystems/ext4/inodes.rst | 4 | ||||
-rw-r--r-- | Documentation/filesystems/ext4/super.rst | 20 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 189 | ||||
-rw-r--r-- | fs/ext4/dir.c | 7 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 64 | ||||
-rw-r--r-- | fs/ext4/extents.c | 98 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 521 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 8 | ||||
-rw-r--r-- | fs/ext4/file.c | 2 | ||||
-rw-r--r-- | fs/ext4/hash.c | 2 | ||||
-rw-r--r-- | fs/ext4/inline.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 103 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 98 | ||||
-rw-r--r-- | fs/ext4/namei.c | 4 | ||||
-rw-r--r-- | fs/ext4/super.c | 7 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 4 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 3 | ||||
-rw-r--r-- | fs/unicode/utf8-core.c | 2 | ||||
-rw-r--r-- | fs/unicode/utf8-selftest.c | 4 |
24 files changed, 890 insertions, 309 deletions
diff --git a/Documentation/filesystems/ext4/bigalloc.rst b/Documentation/filesystems/ext4/bigalloc.rst index c6d88557553c..72075aa608e4 100644 --- a/Documentation/filesystems/ext4/bigalloc.rst +++ b/Documentation/filesystems/ext4/bigalloc.rst | |||
@@ -9,14 +9,26 @@ ext4 code is not prepared to handle the case where the block size | |||
9 | exceeds the page size. However, for a filesystem of mostly huge files, | 9 | exceeds the page size. However, for a filesystem of mostly huge files, |
10 | it is desirable to be able to allocate disk blocks in units of multiple | 10 | it is desirable to be able to allocate disk blocks in units of multiple |
11 | blocks to reduce both fragmentation and metadata overhead. The | 11 | blocks to reduce both fragmentation and metadata overhead. The |
12 | `bigalloc <Bigalloc>`__ feature provides exactly this ability. The | 12 | bigalloc feature provides exactly this ability. |
13 | administrator can set a block cluster size at mkfs time (which is stored | 13 | |
14 | in the s\_log\_cluster\_size field in the superblock); from then on, the | 14 | The bigalloc feature (EXT4_FEATURE_RO_COMPAT_BIGALLOC) changes ext4 to |
15 | block bitmaps track clusters, not individual blocks. This means that | 15 | use clustered allocation, so that each bit in the ext4 block allocation |
16 | block groups can be several gigabytes in size (instead of just 128MiB); | 16 | bitmap addresses a power of two number of blocks. For example, if the |
17 | however, the minimum allocation unit becomes a cluster, not a block, | 17 | file system is mainly going to be storing large files in the 4-32 |
18 | even for directories. TaoBao had a patchset to extend the “use units of | 18 | megabyte range, it might make sense to set a cluster size of 1 megabyte. |
19 | clusters instead of blocks” to the extent tree, though it is not clear | 19 | This means that each bit in the block allocation bitmap now addresses |
20 | where those patches went-- they eventually morphed into “extent tree v2” | 20 | 256 4k blocks. This shrinks the total size of the block allocation |
21 | but that code has not landed as of May 2015. | 21 | bitmaps for a 2T file system from 64 megabytes to 256 kilobytes. It also |
22 | means that a block group addresses 32 gigabytes instead of 128 megabytes, | ||
23 | also shrinking the amount of file system overhead for metadata. | ||
24 | |||
25 | The administrator can set a block cluster size at mkfs time (which is | ||
26 | stored in the s\_log\_cluster\_size field in the superblock); from then | ||
27 | on, the block bitmaps track clusters, not individual blocks. This means | ||
28 | that block groups can be several gigabytes in size (instead of just | ||
29 | 128MiB); however, the minimum allocation unit becomes a cluster, not a | ||
30 | block, even for directories. TaoBao had a patchset to extend the “use | ||
31 | units of clusters instead of blocks” to the extent tree, though it is | ||
32 | not clear where those patches went-- they eventually morphed into | ||
33 | “extent tree v2” but that code has not landed as of May 2015. | ||
22 | 34 | ||
diff --git a/Documentation/filesystems/ext4/blockgroup.rst b/Documentation/filesystems/ext4/blockgroup.rst index baf888e4c06a..3da156633339 100644 --- a/Documentation/filesystems/ext4/blockgroup.rst +++ b/Documentation/filesystems/ext4/blockgroup.rst | |||
@@ -71,11 +71,11 @@ if the flex\_bg size is 4, then group 0 will contain (in order) the | |||
71 | superblock, group descriptors, data block bitmaps for groups 0-3, inode | 71 | superblock, group descriptors, data block bitmaps for groups 0-3, inode |
72 | bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining | 72 | bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining |
73 | space in group 0 is for file data. The effect of this is to group the | 73 | space in group 0 is for file data. The effect of this is to group the |
74 | block metadata close together for faster loading, and to enable large | 74 | block group metadata close together for faster loading, and to enable |
75 | files to be continuous on disk. Backup copies of the superblock and | 75 | large files to be continuous on disk. Backup copies of the superblock |
76 | group descriptors are always at the beginning of block groups, even if | 76 | and group descriptors are always at the beginning of block groups, even |
77 | flex\_bg is enabled. The number of block groups that make up a flex\_bg | 77 | if flex\_bg is enabled. The number of block groups that make up a |
78 | is given by 2 ^ ``sb.s_log_groups_per_flex``. | 78 | flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``. |
79 | 79 | ||
80 | Meta Block Groups | 80 | Meta Block Groups |
81 | ----------------- | 81 | ----------------- |
diff --git a/Documentation/filesystems/ext4/blocks.rst b/Documentation/filesystems/ext4/blocks.rst index 73d4dc0f7bda..bd722ecd92d6 100644 --- a/Documentation/filesystems/ext4/blocks.rst +++ b/Documentation/filesystems/ext4/blocks.rst | |||
@@ -10,7 +10,9 @@ block groups. Block size is specified at mkfs time and typically is | |||
10 | 4KiB. You may experience mounting problems if block size is greater than | 10 | 4KiB. You may experience mounting problems if block size is greater than |
11 | page size (i.e. 64KiB blocks on a i386 which only has 4KiB memory | 11 | page size (i.e. 64KiB blocks on a i386 which only has 4KiB memory |
12 | pages). By default a filesystem can contain 2^32 blocks; if the '64bit' | 12 | pages). By default a filesystem can contain 2^32 blocks; if the '64bit' |
13 | feature is enabled, then a filesystem can have 2^64 blocks. | 13 | feature is enabled, then a filesystem can have 2^64 blocks. The location |
14 | of structures is stored in terms of the block number the structure lives | ||
15 | in and not the absolute offset on disk. | ||
14 | 16 | ||
15 | For 32-bit filesystems, limits are as follows: | 17 | For 32-bit filesystems, limits are as follows: |
16 | 18 | ||
diff --git a/Documentation/filesystems/ext4/directory.rst b/Documentation/filesystems/ext4/directory.rst index 614034e24669..073940cc64ed 100644 --- a/Documentation/filesystems/ext4/directory.rst +++ b/Documentation/filesystems/ext4/directory.rst | |||
@@ -59,7 +59,7 @@ is at most 263 bytes long, though on disk you'll need to reference | |||
59 | - File name. | 59 | - File name. |
60 | 60 | ||
61 | Since file names cannot be longer than 255 bytes, the new directory | 61 | Since file names cannot be longer than 255 bytes, the new directory |
62 | entry format shortens the rec\_len field and uses the space for a file | 62 | entry format shortens the name\_len field and uses the space for a file |
63 | type flag, probably to avoid having to load every inode during directory | 63 | type flag, probably to avoid having to load every inode during directory |
64 | tree traversal. This format is ``ext4_dir_entry_2``, which is at most | 64 | tree traversal. This format is ``ext4_dir_entry_2``, which is at most |
65 | 263 bytes long, though on disk you'll need to reference | 65 | 263 bytes long, though on disk you'll need to reference |
diff --git a/Documentation/filesystems/ext4/group_descr.rst b/Documentation/filesystems/ext4/group_descr.rst index 0f783ed88592..7ba6114e7f5c 100644 --- a/Documentation/filesystems/ext4/group_descr.rst +++ b/Documentation/filesystems/ext4/group_descr.rst | |||
@@ -99,9 +99,12 @@ The block group descriptor is laid out in ``struct ext4_group_desc``. | |||
99 | * - 0x1E | 99 | * - 0x1E |
100 | - \_\_le16 | 100 | - \_\_le16 |
101 | - bg\_checksum | 101 | - bg\_checksum |
102 | - Group descriptor checksum; crc16(sb\_uuid+group+desc) if the | 102 | - Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the |
103 | RO\_COMPAT\_GDT\_CSUM feature is set, or crc32c(sb\_uuid+group\_desc) & | 103 | RO\_COMPAT\_GDT\_CSUM feature is set, or |
104 | 0xFFFF if the RO\_COMPAT\_METADATA\_CSUM feature is set. | 104 | crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the |
105 | RO\_COMPAT\_METADATA\_CSUM feature is set. The bg\_checksum | ||
106 | field in bg\_desc is skipped when calculating crc16 checksum, | ||
107 | and set to zero if crc32c checksum is used. | ||
105 | * - | 108 | * - |
106 | - | 109 | - |
107 | - | 110 | - |
diff --git a/Documentation/filesystems/ext4/inodes.rst b/Documentation/filesystems/ext4/inodes.rst index e851e6ca31fa..a65baffb4ebf 100644 --- a/Documentation/filesystems/ext4/inodes.rst +++ b/Documentation/filesystems/ext4/inodes.rst | |||
@@ -472,8 +472,8 @@ inode, which allows struct ext4\_inode to grow for a new kernel without | |||
472 | having to upgrade all of the on-disk inodes. Access to fields beyond | 472 | having to upgrade all of the on-disk inodes. Access to fields beyond |
473 | EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within | 473 | EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within |
474 | ``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as | 474 | ``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as |
475 | of October 2013) the inode structure is 156 bytes | 475 | of August 2019) the inode structure is 160 bytes |
476 | (``i_extra_isize = 28``). The extra space between the end of the inode | 476 | (``i_extra_isize = 32``). The extra space between the end of the inode |
477 | structure and the end of the inode record can be used to store extended | 477 | structure and the end of the inode record can be used to store extended |
478 | attributes. Each inode record can be as large as the filesystem block | 478 | attributes. Each inode record can be as large as the filesystem block |
479 | size, though this is not terribly efficient. | 479 | size, though this is not terribly efficient. |
diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index 6eae92054827..93e55d7c1d40 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst | |||
@@ -58,7 +58,7 @@ The ext4 superblock is laid out as follows in | |||
58 | * - 0x1C | 58 | * - 0x1C |
59 | - \_\_le32 | 59 | - \_\_le32 |
60 | - s\_log\_cluster\_size | 60 | - s\_log\_cluster\_size |
61 | - Cluster size is (2 ^ s\_log\_cluster\_size) blocks if bigalloc is | 61 | - Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is |
62 | enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size. | 62 | enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size. |
63 | * - 0x20 | 63 | * - 0x20 |
64 | - \_\_le32 | 64 | - \_\_le32 |
@@ -447,7 +447,7 @@ The ext4 superblock is laid out as follows in | |||
447 | - Upper 8 bits of the s_wtime field. | 447 | - Upper 8 bits of the s_wtime field. |
448 | * - 0x275 | 448 | * - 0x275 |
449 | - \_\_u8 | 449 | - \_\_u8 |
450 | - s\_wtime_hi | 450 | - s\_mtime_hi |
451 | - Upper 8 bits of the s_mtime field. | 451 | - Upper 8 bits of the s_mtime field. |
452 | * - 0x276 | 452 | * - 0x276 |
453 | - \_\_u8 | 453 | - \_\_u8 |
@@ -466,12 +466,20 @@ The ext4 superblock is laid out as follows in | |||
466 | - s\_last_error_time_hi | 466 | - s\_last_error_time_hi |
467 | - Upper 8 bits of the s_last_error_time_hi field. | 467 | - Upper 8 bits of the s_last_error_time_hi field. |
468 | * - 0x27A | 468 | * - 0x27A |
469 | - \_\_u8[2] | 469 | - \_\_u8 |
470 | - s\_pad | 470 | - s\_pad[2] |
471 | - Zero padding. | 471 | - Zero padding. |
472 | * - 0x27C | 472 | * - 0x27C |
473 | - \_\_le16 | ||
474 | - s\_encoding | ||
475 | - Filename charset encoding. | ||
476 | * - 0x27E | ||
477 | - \_\_le16 | ||
478 | - s\_encoding_flags | ||
479 | - Filename charset encoding flags. | ||
480 | * - 0x280 | ||
473 | - \_\_le32 | 481 | - \_\_le32 |
474 | - s\_reserved[96] | 482 | - s\_reserved[95] |
475 | - Padding to the end of the block. | 483 | - Padding to the end of the block. |
476 | * - 0x3FC | 484 | * - 0x3FC |
477 | - \_\_le32 | 485 | - \_\_le32 |
@@ -617,7 +625,7 @@ following: | |||
617 | * - 0x80 | 625 | * - 0x80 |
618 | - Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT). | 626 | - Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT). |
619 | * - 0x100 | 627 | * - 0x100 |
620 | - Multiple mount protection. Not implemented (INCOMPAT\_MMP). | 628 | - Multiple mount protection (INCOMPAT\_MMP). |
621 | * - 0x200 | 629 | * - 0x200 |
622 | - Flexible block groups. See the earlier discussion of this feature | 630 | - Flexible block groups. See the earlier discussion of this feature |
623 | (INCOMPAT\_FLEX\_BG). | 631 | (INCOMPAT\_FLEX\_BG). |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 8e83741b02e0..d4d4fdfac1a6 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -38,6 +38,7 @@ int __init ext4_init_system_zone(void) | |||
38 | 38 | ||
39 | void ext4_exit_system_zone(void) | 39 | void ext4_exit_system_zone(void) |
40 | { | 40 | { |
41 | rcu_barrier(); | ||
41 | kmem_cache_destroy(ext4_system_zone_cachep); | 42 | kmem_cache_destroy(ext4_system_zone_cachep); |
42 | } | 43 | } |
43 | 44 | ||
@@ -49,17 +50,26 @@ static inline int can_merge(struct ext4_system_zone *entry1, | |||
49 | return 0; | 50 | return 0; |
50 | } | 51 | } |
51 | 52 | ||
53 | static void release_system_zone(struct ext4_system_blocks *system_blks) | ||
54 | { | ||
55 | struct ext4_system_zone *entry, *n; | ||
56 | |||
57 | rbtree_postorder_for_each_entry_safe(entry, n, | ||
58 | &system_blks->root, node) | ||
59 | kmem_cache_free(ext4_system_zone_cachep, entry); | ||
60 | } | ||
61 | |||
52 | /* | 62 | /* |
53 | * Mark a range of blocks as belonging to the "system zone" --- that | 63 | * Mark a range of blocks as belonging to the "system zone" --- that |
54 | * is, filesystem metadata blocks which should never be used by | 64 | * is, filesystem metadata blocks which should never be used by |
55 | * inodes. | 65 | * inodes. |
56 | */ | 66 | */ |
57 | static int add_system_zone(struct ext4_sb_info *sbi, | 67 | static int add_system_zone(struct ext4_system_blocks *system_blks, |
58 | ext4_fsblk_t start_blk, | 68 | ext4_fsblk_t start_blk, |
59 | unsigned int count) | 69 | unsigned int count) |
60 | { | 70 | { |
61 | struct ext4_system_zone *new_entry = NULL, *entry; | 71 | struct ext4_system_zone *new_entry = NULL, *entry; |
62 | struct rb_node **n = &sbi->system_blks.rb_node, *node; | 72 | struct rb_node **n = &system_blks->root.rb_node, *node; |
63 | struct rb_node *parent = NULL, *new_node = NULL; | 73 | struct rb_node *parent = NULL, *new_node = NULL; |
64 | 74 | ||
65 | while (*n) { | 75 | while (*n) { |
@@ -91,7 +101,7 @@ static int add_system_zone(struct ext4_sb_info *sbi, | |||
91 | new_node = &new_entry->node; | 101 | new_node = &new_entry->node; |
92 | 102 | ||
93 | rb_link_node(new_node, parent, n); | 103 | rb_link_node(new_node, parent, n); |
94 | rb_insert_color(new_node, &sbi->system_blks); | 104 | rb_insert_color(new_node, &system_blks->root); |
95 | } | 105 | } |
96 | 106 | ||
97 | /* Can we merge to the left? */ | 107 | /* Can we merge to the left? */ |
@@ -101,7 +111,7 @@ static int add_system_zone(struct ext4_sb_info *sbi, | |||
101 | if (can_merge(entry, new_entry)) { | 111 | if (can_merge(entry, new_entry)) { |
102 | new_entry->start_blk = entry->start_blk; | 112 | new_entry->start_blk = entry->start_blk; |
103 | new_entry->count += entry->count; | 113 | new_entry->count += entry->count; |
104 | rb_erase(node, &sbi->system_blks); | 114 | rb_erase(node, &system_blks->root); |
105 | kmem_cache_free(ext4_system_zone_cachep, entry); | 115 | kmem_cache_free(ext4_system_zone_cachep, entry); |
106 | } | 116 | } |
107 | } | 117 | } |
@@ -112,7 +122,7 @@ static int add_system_zone(struct ext4_sb_info *sbi, | |||
112 | entry = rb_entry(node, struct ext4_system_zone, node); | 122 | entry = rb_entry(node, struct ext4_system_zone, node); |
113 | if (can_merge(new_entry, entry)) { | 123 | if (can_merge(new_entry, entry)) { |
114 | new_entry->count += entry->count; | 124 | new_entry->count += entry->count; |
115 | rb_erase(node, &sbi->system_blks); | 125 | rb_erase(node, &system_blks->root); |
116 | kmem_cache_free(ext4_system_zone_cachep, entry); | 126 | kmem_cache_free(ext4_system_zone_cachep, entry); |
117 | } | 127 | } |
118 | } | 128 | } |
@@ -126,7 +136,7 @@ static void debug_print_tree(struct ext4_sb_info *sbi) | |||
126 | int first = 1; | 136 | int first = 1; |
127 | 137 | ||
128 | printk(KERN_INFO "System zones: "); | 138 | printk(KERN_INFO "System zones: "); |
129 | node = rb_first(&sbi->system_blks); | 139 | node = rb_first(&sbi->system_blks->root); |
130 | while (node) { | 140 | while (node) { |
131 | entry = rb_entry(node, struct ext4_system_zone, node); | 141 | entry = rb_entry(node, struct ext4_system_zone, node); |
132 | printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ", | 142 | printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ", |
@@ -137,7 +147,47 @@ static void debug_print_tree(struct ext4_sb_info *sbi) | |||
137 | printk(KERN_CONT "\n"); | 147 | printk(KERN_CONT "\n"); |
138 | } | 148 | } |
139 | 149 | ||
140 | static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) | 150 | /* |
151 | * Returns 1 if the passed-in block region (start_blk, | ||
152 | * start_blk+count) is valid; 0 if some part of the block region | ||
153 | * overlaps with filesystem metadata blocks. | ||
154 | */ | ||
155 | static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, | ||
156 | struct ext4_system_blocks *system_blks, | ||
157 | ext4_fsblk_t start_blk, | ||
158 | unsigned int count) | ||
159 | { | ||
160 | struct ext4_system_zone *entry; | ||
161 | struct rb_node *n; | ||
162 | |||
163 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || | ||
164 | (start_blk + count < start_blk) || | ||
165 | (start_blk + count > ext4_blocks_count(sbi->s_es))) { | ||
166 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | if (system_blks == NULL) | ||
171 | return 1; | ||
172 | |||
173 | n = system_blks->root.rb_node; | ||
174 | while (n) { | ||
175 | entry = rb_entry(n, struct ext4_system_zone, node); | ||
176 | if (start_blk + count - 1 < entry->start_blk) | ||
177 | n = n->rb_left; | ||
178 | else if (start_blk >= (entry->start_blk + entry->count)) | ||
179 | n = n->rb_right; | ||
180 | else { | ||
181 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | ||
182 | return 0; | ||
183 | } | ||
184 | } | ||
185 | return 1; | ||
186 | } | ||
187 | |||
188 | static int ext4_protect_reserved_inode(struct super_block *sb, | ||
189 | struct ext4_system_blocks *system_blks, | ||
190 | u32 ino) | ||
141 | { | 191 | { |
142 | struct inode *inode; | 192 | struct inode *inode; |
143 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 193 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -163,14 +213,15 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) | |||
163 | if (n == 0) { | 213 | if (n == 0) { |
164 | i++; | 214 | i++; |
165 | } else { | 215 | } else { |
166 | if (!ext4_data_block_valid(sbi, map.m_pblk, n)) { | 216 | if (!ext4_data_block_valid_rcu(sbi, system_blks, |
217 | map.m_pblk, n)) { | ||
167 | ext4_error(sb, "blocks %llu-%llu from inode %u " | 218 | ext4_error(sb, "blocks %llu-%llu from inode %u " |
168 | "overlap system zone", map.m_pblk, | 219 | "overlap system zone", map.m_pblk, |
169 | map.m_pblk + map.m_len - 1, ino); | 220 | map.m_pblk + map.m_len - 1, ino); |
170 | err = -EFSCORRUPTED; | 221 | err = -EFSCORRUPTED; |
171 | break; | 222 | break; |
172 | } | 223 | } |
173 | err = add_system_zone(sbi, map.m_pblk, n); | 224 | err = add_system_zone(system_blks, map.m_pblk, n); |
174 | if (err < 0) | 225 | if (err < 0) |
175 | break; | 226 | break; |
176 | i += n; | 227 | i += n; |
@@ -180,94 +231,130 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) | |||
180 | return err; | 231 | return err; |
181 | } | 232 | } |
182 | 233 | ||
234 | static void ext4_destroy_system_zone(struct rcu_head *rcu) | ||
235 | { | ||
236 | struct ext4_system_blocks *system_blks; | ||
237 | |||
238 | system_blks = container_of(rcu, struct ext4_system_blocks, rcu); | ||
239 | release_system_zone(system_blks); | ||
240 | kfree(system_blks); | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * Build system zone rbtree which is used for block validity checking. | ||
245 | * | ||
246 | * The update of system_blks pointer in this function is protected by | ||
247 | * sb->s_umount semaphore. However we have to be careful as we can be | ||
248 | * racing with ext4_data_block_valid() calls reading system_blks rbtree | ||
249 | * protected only by RCU. That's why we first build the rbtree and then | ||
250 | * swap it in place. | ||
251 | */ | ||
183 | int ext4_setup_system_zone(struct super_block *sb) | 252 | int ext4_setup_system_zone(struct super_block *sb) |
184 | { | 253 | { |
185 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 254 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
186 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 255 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
256 | struct ext4_system_blocks *system_blks; | ||
187 | struct ext4_group_desc *gdp; | 257 | struct ext4_group_desc *gdp; |
188 | ext4_group_t i; | 258 | ext4_group_t i; |
189 | int flex_size = ext4_flex_bg_size(sbi); | 259 | int flex_size = ext4_flex_bg_size(sbi); |
190 | int ret; | 260 | int ret; |
191 | 261 | ||
192 | if (!test_opt(sb, BLOCK_VALIDITY)) { | 262 | if (!test_opt(sb, BLOCK_VALIDITY)) { |
193 | if (sbi->system_blks.rb_node) | 263 | if (sbi->system_blks) |
194 | ext4_release_system_zone(sb); | 264 | ext4_release_system_zone(sb); |
195 | return 0; | 265 | return 0; |
196 | } | 266 | } |
197 | if (sbi->system_blks.rb_node) | 267 | if (sbi->system_blks) |
198 | return 0; | 268 | return 0; |
199 | 269 | ||
270 | system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); | ||
271 | if (!system_blks) | ||
272 | return -ENOMEM; | ||
273 | |||
200 | for (i=0; i < ngroups; i++) { | 274 | for (i=0; i < ngroups; i++) { |
201 | cond_resched(); | 275 | cond_resched(); |
202 | if (ext4_bg_has_super(sb, i) && | 276 | if (ext4_bg_has_super(sb, i) && |
203 | ((i < 5) || ((i % flex_size) == 0))) | 277 | ((i < 5) || ((i % flex_size) == 0))) |
204 | add_system_zone(sbi, ext4_group_first_block_no(sb, i), | 278 | add_system_zone(system_blks, |
279 | ext4_group_first_block_no(sb, i), | ||
205 | ext4_bg_num_gdb(sb, i) + 1); | 280 | ext4_bg_num_gdb(sb, i) + 1); |
206 | gdp = ext4_get_group_desc(sb, i, NULL); | 281 | gdp = ext4_get_group_desc(sb, i, NULL); |
207 | ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); | 282 | ret = add_system_zone(system_blks, |
283 | ext4_block_bitmap(sb, gdp), 1); | ||
208 | if (ret) | 284 | if (ret) |
209 | return ret; | 285 | goto err; |
210 | ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1); | 286 | ret = add_system_zone(system_blks, |
287 | ext4_inode_bitmap(sb, gdp), 1); | ||
211 | if (ret) | 288 | if (ret) |
212 | return ret; | 289 | goto err; |
213 | ret = add_system_zone(sbi, ext4_inode_table(sb, gdp), | 290 | ret = add_system_zone(system_blks, |
291 | ext4_inode_table(sb, gdp), | ||
214 | sbi->s_itb_per_group); | 292 | sbi->s_itb_per_group); |
215 | if (ret) | 293 | if (ret) |
216 | return ret; | 294 | goto err; |
217 | } | 295 | } |
218 | if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) { | 296 | if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) { |
219 | ret = ext4_protect_reserved_inode(sb, | 297 | ret = ext4_protect_reserved_inode(sb, system_blks, |
220 | le32_to_cpu(sbi->s_es->s_journal_inum)); | 298 | le32_to_cpu(sbi->s_es->s_journal_inum)); |
221 | if (ret) | 299 | if (ret) |
222 | return ret; | 300 | goto err; |
223 | } | 301 | } |
224 | 302 | ||
303 | /* | ||
304 | * System blks rbtree complete, announce it once to prevent racing | ||
305 | * with ext4_data_block_valid() accessing the rbtree at the same | ||
306 | * time. | ||
307 | */ | ||
308 | rcu_assign_pointer(sbi->system_blks, system_blks); | ||
309 | |||
225 | if (test_opt(sb, DEBUG)) | 310 | if (test_opt(sb, DEBUG)) |
226 | debug_print_tree(sbi); | 311 | debug_print_tree(sbi); |
227 | return 0; | 312 | return 0; |
313 | err: | ||
314 | release_system_zone(system_blks); | ||
315 | kfree(system_blks); | ||
316 | return ret; | ||
228 | } | 317 | } |
229 | 318 | ||
230 | /* Called when the filesystem is unmounted */ | 319 | /* |
320 | * Called when the filesystem is unmounted or when remounting it with | ||
321 | * noblock_validity specified. | ||
322 | * | ||
323 | * The update of system_blks pointer in this function is protected by | ||
324 | * sb->s_umount semaphore. However we have to be careful as we can be | ||
325 | * racing with ext4_data_block_valid() calls reading system_blks rbtree | ||
326 | * protected only by RCU. So we first clear the system_blks pointer and | ||
327 | * then free the rbtree only after RCU grace period expires. | ||
328 | */ | ||
231 | void ext4_release_system_zone(struct super_block *sb) | 329 | void ext4_release_system_zone(struct super_block *sb) |
232 | { | 330 | { |
233 | struct ext4_system_zone *entry, *n; | 331 | struct ext4_system_blocks *system_blks; |
234 | 332 | ||
235 | rbtree_postorder_for_each_entry_safe(entry, n, | 333 | system_blks = rcu_dereference_protected(EXT4_SB(sb)->system_blks, |
236 | &EXT4_SB(sb)->system_blks, node) | 334 | lockdep_is_held(&sb->s_umount)); |
237 | kmem_cache_free(ext4_system_zone_cachep, entry); | 335 | rcu_assign_pointer(EXT4_SB(sb)->system_blks, NULL); |
238 | 336 | ||
239 | EXT4_SB(sb)->system_blks = RB_ROOT; | 337 | if (system_blks) |
338 | call_rcu(&system_blks->rcu, ext4_destroy_system_zone); | ||
240 | } | 339 | } |
241 | 340 | ||
242 | /* | ||
243 | * Returns 1 if the passed-in block region (start_blk, | ||
244 | * start_blk+count) is valid; 0 if some part of the block region | ||
245 | * overlaps with filesystem metadata blocks. | ||
246 | */ | ||
247 | int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, | 341 | int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, |
248 | unsigned int count) | 342 | unsigned int count) |
249 | { | 343 | { |
250 | struct ext4_system_zone *entry; | 344 | struct ext4_system_blocks *system_blks; |
251 | struct rb_node *n = sbi->system_blks.rb_node; | 345 | int ret; |
252 | 346 | ||
253 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || | 347 | /* |
254 | (start_blk + count < start_blk) || | 348 | * Lock the system zone to prevent it being released concurrently |
255 | (start_blk + count > ext4_blocks_count(sbi->s_es))) { | 349 | * when doing a remount which inverse current "[no]block_validity" |
256 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | 350 | * mount option. |
257 | return 0; | 351 | */ |
258 | } | 352 | rcu_read_lock(); |
259 | while (n) { | 353 | system_blks = rcu_dereference(sbi->system_blks); |
260 | entry = rb_entry(n, struct ext4_system_zone, node); | 354 | ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk, |
261 | if (start_blk + count - 1 < entry->start_blk) | 355 | count); |
262 | n = n->rb_left; | 356 | rcu_read_unlock(); |
263 | else if (start_blk >= (entry->start_blk + entry->count)) | 357 | return ret; |
264 | n = n->rb_right; | ||
265 | else { | ||
266 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | ||
267 | return 0; | ||
268 | } | ||
269 | } | ||
270 | return 1; | ||
271 | } | 358 | } |
272 | 359 | ||
273 | int ext4_check_blockref(const char *function, unsigned int line, | 360 | int ext4_check_blockref(const char *function, unsigned int line, |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 86054f31fe4d..9fdd2b269d61 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -668,14 +668,15 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, | |||
668 | const char *str, const struct qstr *name) | 668 | const char *str, const struct qstr *name) |
669 | { | 669 | { |
670 | struct qstr qstr = {.name = str, .len = len }; | 670 | struct qstr qstr = {.name = str, .len = len }; |
671 | struct inode *inode = dentry->d_parent->d_inode; | ||
671 | 672 | ||
672 | if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { | 673 | if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { |
673 | if (len != name->len) | 674 | if (len != name->len) |
674 | return -1; | 675 | return -1; |
675 | return memcmp(str, name->name, len); | 676 | return memcmp(str, name->name, len); |
676 | } | 677 | } |
677 | 678 | ||
678 | return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); | 679 | return ext4_ci_compare(inode, name, &qstr, false); |
679 | } | 680 | } |
680 | 681 | ||
681 | static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) | 682 | static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) |
@@ -685,7 +686,7 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) | |||
685 | unsigned char *norm; | 686 | unsigned char *norm; |
686 | int len, ret = 0; | 687 | int len, ret = 0; |
687 | 688 | ||
688 | if (!IS_CASEFOLDED(dentry->d_inode)) | 689 | if (!IS_CASEFOLDED(dentry->d_inode) || !um) |
689 | return 0; | 690 | return 0; |
690 | 691 | ||
691 | norm = kmalloc(PATH_MAX, GFP_ATOMIC); | 692 | norm = kmalloc(PATH_MAX, GFP_ATOMIC); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 42c6e4a5e673..03db3e71676c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -186,6 +186,14 @@ struct ext4_map_blocks { | |||
186 | }; | 186 | }; |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * Block validity checking, system zone rbtree. | ||
190 | */ | ||
191 | struct ext4_system_blocks { | ||
192 | struct rb_root root; | ||
193 | struct rcu_head rcu; | ||
194 | }; | ||
195 | |||
196 | /* | ||
189 | * Flags for ext4_io_end->flags | 197 | * Flags for ext4_io_end->flags |
190 | */ | 198 | */ |
191 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 199 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
@@ -285,6 +293,9 @@ struct ext4_io_submit { | |||
285 | ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | 293 | ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) |
286 | #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ | 294 | #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ |
287 | ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) | 295 | ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) |
296 | /* Fill in the low bits to get the last block of the cluster */ | ||
297 | #define EXT4_LBLK_CFILL(sbi, lblk) ((lblk) | \ | ||
298 | ((ext4_lblk_t) (sbi)->s_cluster_ratio - 1)) | ||
288 | /* Get the cluster offset */ | 299 | /* Get the cluster offset */ |
289 | #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ | 300 | #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ |
290 | ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | 301 | ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) |
@@ -653,6 +664,10 @@ enum { | |||
653 | #define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY | 664 | #define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY |
654 | #define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT | 665 | #define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT |
655 | #define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY | 666 | #define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY |
667 | /* ioctl codes 19--39 are reserved for fscrypt */ | ||
668 | #define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40) | ||
669 | #define EXT4_IOC_GETSTATE _IOW('f', 41, __u32) | ||
670 | #define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap) | ||
656 | 671 | ||
657 | #define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR | 672 | #define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR |
658 | #define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR | 673 | #define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR |
@@ -666,6 +681,16 @@ enum { | |||
666 | #define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ | 681 | #define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ |
667 | #define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ | 682 | #define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ |
668 | 683 | ||
684 | /* | ||
685 | * Flags returned by EXT4_IOC_GETSTATE | ||
686 | * | ||
687 | * We only expose to userspace a subset of the state flags in | ||
688 | * i_state_flags | ||
689 | */ | ||
690 | #define EXT4_STATE_FLAG_EXT_PRECACHED 0x00000001 | ||
691 | #define EXT4_STATE_FLAG_NEW 0x00000002 | ||
692 | #define EXT4_STATE_FLAG_NEWENTRY 0x00000004 | ||
693 | #define EXT4_STATE_FLAG_DA_ALLOC_CLOSE 0x00000008 | ||
669 | 694 | ||
670 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 695 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
671 | /* | 696 | /* |
@@ -683,6 +708,12 @@ enum { | |||
683 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 708 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
684 | #endif | 709 | #endif |
685 | 710 | ||
711 | /* | ||
712 | * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag. | ||
713 | * It indicates that the entry in extent status cache is for a hole. | ||
714 | */ | ||
715 | #define EXT4_FIEMAP_EXTENT_HOLE 0x08000000 | ||
716 | |||
686 | /* Max physical block we can address w/o extents */ | 717 | /* Max physical block we can address w/o extents */ |
687 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF | 718 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF |
688 | 719 | ||
@@ -812,21 +843,8 @@ static inline __le32 ext4_encode_extra_time(struct timespec64 *time) | |||
812 | static inline void ext4_decode_extra_time(struct timespec64 *time, | 843 | static inline void ext4_decode_extra_time(struct timespec64 *time, |
813 | __le32 extra) | 844 | __le32 extra) |
814 | { | 845 | { |
815 | if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) { | 846 | if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) |
816 | |||
817 | #if 1 | ||
818 | /* Handle legacy encoding of pre-1970 dates with epoch | ||
819 | * bits 1,1. (This backwards compatibility may be removed | ||
820 | * at the discretion of the ext4 developers.) | ||
821 | */ | ||
822 | u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK; | ||
823 | if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0) | ||
824 | extra_bits = 0; | ||
825 | time->tv_sec += extra_bits << 32; | ||
826 | #else | ||
827 | time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; | 847 | time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; |
828 | #endif | ||
829 | } | ||
830 | time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; | 848 | time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; |
831 | } | 849 | } |
832 | 850 | ||
@@ -1427,7 +1445,7 @@ struct ext4_sb_info { | |||
1427 | int s_jquota_fmt; /* Format of quota to use */ | 1445 | int s_jquota_fmt; /* Format of quota to use */ |
1428 | #endif | 1446 | #endif |
1429 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | 1447 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ |
1430 | struct rb_root system_blks; | 1448 | struct ext4_system_blocks __rcu *system_blks; |
1431 | 1449 | ||
1432 | #ifdef EXTENTS_STATS | 1450 | #ifdef EXTENTS_STATS |
1433 | /* ext4 extents stats */ | 1451 | /* ext4 extents stats */ |
@@ -3267,6 +3285,9 @@ extern int ext4_ext_check_inode(struct inode *inode); | |||
3267 | extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); | 3285 | extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); |
3268 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 3286 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
3269 | __u64 start, __u64 len); | 3287 | __u64 start, __u64 len); |
3288 | extern int ext4_get_es_cache(struct inode *inode, | ||
3289 | struct fiemap_extent_info *fieinfo, | ||
3290 | __u64 start, __u64 len); | ||
3270 | extern int ext4_ext_precache(struct inode *inode); | 3291 | extern int ext4_ext_precache(struct inode *inode); |
3271 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); | 3292 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); |
3272 | extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); | 3293 | extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); |
@@ -3359,6 +3380,19 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) | |||
3359 | 3380 | ||
3360 | extern const struct iomap_ops ext4_iomap_ops; | 3381 | extern const struct iomap_ops ext4_iomap_ops; |
3361 | 3382 | ||
3383 | static inline int ext4_buffer_uptodate(struct buffer_head *bh) | ||
3384 | { | ||
3385 | /* | ||
3386 | * If the buffer has the write error flag, we have failed | ||
3387 | * to write out data in the block. In this case, we don't | ||
3388 | * have to read the block because we may read the old data | ||
3389 | * successfully. | ||
3390 | */ | ||
3391 | if (!buffer_uptodate(bh) && buffer_write_io_error(bh)) | ||
3392 | set_buffer_uptodate(bh); | ||
3393 | return buffer_uptodate(bh); | ||
3394 | } | ||
3395 | |||
3362 | #endif /* __KERNEL__ */ | 3396 | #endif /* __KERNEL__ */ |
3363 | 3397 | ||
3364 | #define EFSBADCRC EBADMSG /* Bad CRC detected */ | 3398 | #define EFSBADCRC EBADMSG /* Bad CRC detected */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 92266a2da7d6..fb0f99dc8c22 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2315,6 +2315,52 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2315 | return err; | 2315 | return err; |
2316 | } | 2316 | } |
2317 | 2317 | ||
2318 | static int ext4_fill_es_cache_info(struct inode *inode, | ||
2319 | ext4_lblk_t block, ext4_lblk_t num, | ||
2320 | struct fiemap_extent_info *fieinfo) | ||
2321 | { | ||
2322 | ext4_lblk_t next, end = block + num - 1; | ||
2323 | struct extent_status es; | ||
2324 | unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; | ||
2325 | unsigned int flags; | ||
2326 | int err; | ||
2327 | |||
2328 | while (block <= end) { | ||
2329 | next = 0; | ||
2330 | flags = 0; | ||
2331 | if (!ext4_es_lookup_extent(inode, block, &next, &es)) | ||
2332 | break; | ||
2333 | if (ext4_es_is_unwritten(&es)) | ||
2334 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
2335 | if (ext4_es_is_delayed(&es)) | ||
2336 | flags |= (FIEMAP_EXTENT_DELALLOC | | ||
2337 | FIEMAP_EXTENT_UNKNOWN); | ||
2338 | if (ext4_es_is_hole(&es)) | ||
2339 | flags |= EXT4_FIEMAP_EXTENT_HOLE; | ||
2340 | if (next == 0) | ||
2341 | flags |= FIEMAP_EXTENT_LAST; | ||
2342 | if (flags & (FIEMAP_EXTENT_DELALLOC| | ||
2343 | EXT4_FIEMAP_EXTENT_HOLE)) | ||
2344 | es.es_pblk = 0; | ||
2345 | else | ||
2346 | es.es_pblk = ext4_es_pblock(&es); | ||
2347 | err = fiemap_fill_next_extent(fieinfo, | ||
2348 | (__u64)es.es_lblk << blksize_bits, | ||
2349 | (__u64)es.es_pblk << blksize_bits, | ||
2350 | (__u64)es.es_len << blksize_bits, | ||
2351 | flags); | ||
2352 | if (next == 0) | ||
2353 | break; | ||
2354 | block = next; | ||
2355 | if (err < 0) | ||
2356 | return err; | ||
2357 | if (err == 1) | ||
2358 | return 0; | ||
2359 | } | ||
2360 | return 0; | ||
2361 | } | ||
2362 | |||
2363 | |||
2318 | /* | 2364 | /* |
2319 | * ext4_ext_determine_hole - determine hole around given block | 2365 | * ext4_ext_determine_hole - determine hole around given block |
2320 | * @inode: inode we lookup in | 2366 | * @inode: inode we lookup in |
@@ -3813,8 +3859,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3813 | * illegal. | 3859 | * illegal. |
3814 | */ | 3860 | */ |
3815 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | 3861 | if (ee_block != map->m_lblk || ee_len > map->m_len) { |
3816 | #ifdef EXT4_DEBUG | 3862 | #ifdef CONFIG_EXT4_DEBUG |
3817 | ext4_warning("Inode (%ld) finished: extent logical block %llu," | 3863 | ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu," |
3818 | " len %u; IO logical block %llu, len %u", | 3864 | " len %u; IO logical block %llu, len %u", |
3819 | inode->i_ino, (unsigned long long)ee_block, ee_len, | 3865 | inode->i_ino, (unsigned long long)ee_block, ee_len, |
3820 | (unsigned long long)map->m_lblk, map->m_len); | 3866 | (unsigned long long)map->m_lblk, map->m_len); |
@@ -5017,8 +5063,6 @@ static int ext4_find_delayed_extent(struct inode *inode, | |||
5017 | 5063 | ||
5018 | return next_del; | 5064 | return next_del; |
5019 | } | 5065 | } |
5020 | /* fiemap flags we can handle specified here */ | ||
5021 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | ||
5022 | 5066 | ||
5023 | static int ext4_xattr_fiemap(struct inode *inode, | 5067 | static int ext4_xattr_fiemap(struct inode *inode, |
5024 | struct fiemap_extent_info *fieinfo) | 5068 | struct fiemap_extent_info *fieinfo) |
@@ -5055,10 +5099,16 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
5055 | return (error < 0 ? error : 0); | 5099 | return (error < 0 ? error : 0); |
5056 | } | 5100 | } |
5057 | 5101 | ||
5058 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 5102 | static int _ext4_fiemap(struct inode *inode, |
5059 | __u64 start, __u64 len) | 5103 | struct fiemap_extent_info *fieinfo, |
5104 | __u64 start, __u64 len, | ||
5105 | int (*fill)(struct inode *, ext4_lblk_t, | ||
5106 | ext4_lblk_t, | ||
5107 | struct fiemap_extent_info *)) | ||
5060 | { | 5108 | { |
5061 | ext4_lblk_t start_blk; | 5109 | ext4_lblk_t start_blk; |
5110 | u32 ext4_fiemap_flags = FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR; | ||
5111 | |||
5062 | int error = 0; | 5112 | int error = 0; |
5063 | 5113 | ||
5064 | if (ext4_has_inline_data(inode)) { | 5114 | if (ext4_has_inline_data(inode)) { |
@@ -5075,14 +5125,18 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5075 | error = ext4_ext_precache(inode); | 5125 | error = ext4_ext_precache(inode); |
5076 | if (error) | 5126 | if (error) |
5077 | return error; | 5127 | return error; |
5128 | fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; | ||
5078 | } | 5129 | } |
5079 | 5130 | ||
5080 | /* fallback to generic here if not in extents fmt */ | 5131 | /* fallback to generic here if not in extents fmt */ |
5081 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 5132 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && |
5133 | fill == ext4_fill_fiemap_extents) | ||
5082 | return generic_block_fiemap(inode, fieinfo, start, len, | 5134 | return generic_block_fiemap(inode, fieinfo, start, len, |
5083 | ext4_get_block); | 5135 | ext4_get_block); |
5084 | 5136 | ||
5085 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | 5137 | if (fill == ext4_fill_es_cache_info) |
5138 | ext4_fiemap_flags &= FIEMAP_FLAG_XATTR; | ||
5139 | if (fiemap_check_flags(fieinfo, ext4_fiemap_flags)) | ||
5086 | return -EBADR; | 5140 | return -EBADR; |
5087 | 5141 | ||
5088 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { | 5142 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { |
@@ -5101,12 +5155,36 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5101 | * Walk the extent tree gathering extent information | 5155 | * Walk the extent tree gathering extent information |
5102 | * and pushing extents back to the user. | 5156 | * and pushing extents back to the user. |
5103 | */ | 5157 | */ |
5104 | error = ext4_fill_fiemap_extents(inode, start_blk, | 5158 | error = fill(inode, start_blk, len_blks, fieinfo); |
5105 | len_blks, fieinfo); | ||
5106 | } | 5159 | } |
5107 | return error; | 5160 | return error; |
5108 | } | 5161 | } |
5109 | 5162 | ||
5163 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
5164 | __u64 start, __u64 len) | ||
5165 | { | ||
5166 | return _ext4_fiemap(inode, fieinfo, start, len, | ||
5167 | ext4_fill_fiemap_extents); | ||
5168 | } | ||
5169 | |||
5170 | int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
5171 | __u64 start, __u64 len) | ||
5172 | { | ||
5173 | if (ext4_has_inline_data(inode)) { | ||
5174 | int has_inline; | ||
5175 | |||
5176 | down_read(&EXT4_I(inode)->xattr_sem); | ||
5177 | has_inline = ext4_has_inline_data(inode); | ||
5178 | up_read(&EXT4_I(inode)->xattr_sem); | ||
5179 | if (has_inline) | ||
5180 | return 0; | ||
5181 | } | ||
5182 | |||
5183 | return _ext4_fiemap(inode, fieinfo, start, len, | ||
5184 | ext4_fill_es_cache_info); | ||
5185 | } | ||
5186 | |||
5187 | |||
5110 | /* | 5188 | /* |
5111 | * ext4_access_path: | 5189 | * ext4_access_path: |
5112 | * Function to access the path buffer for marking it dirty. | 5190 | * Function to access the path buffer for marking it dirty. |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 7521de2dcf3a..d996b44d2265 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -146,7 +146,7 @@ static struct kmem_cache *ext4_pending_cachep; | |||
146 | 146 | ||
147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); | 147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); |
148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
149 | ext4_lblk_t end); | 149 | ext4_lblk_t end, int *reserved); |
150 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); | 150 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); |
151 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 151 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
152 | struct ext4_inode_info *locked_ei); | 152 | struct ext4_inode_info *locked_ei); |
@@ -836,7 +836,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
836 | ext4_es_insert_extent_check(inode, &newes); | 836 | ext4_es_insert_extent_check(inode, &newes); |
837 | 837 | ||
838 | write_lock(&EXT4_I(inode)->i_es_lock); | 838 | write_lock(&EXT4_I(inode)->i_es_lock); |
839 | err = __es_remove_extent(inode, lblk, end); | 839 | err = __es_remove_extent(inode, lblk, end, NULL); |
840 | if (err != 0) | 840 | if (err != 0) |
841 | goto error; | 841 | goto error; |
842 | retry: | 842 | retry: |
@@ -899,6 +899,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, | |||
899 | * Return: 1 on found, 0 on not | 899 | * Return: 1 on found, 0 on not |
900 | */ | 900 | */ |
901 | int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 901 | int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
902 | ext4_lblk_t *next_lblk, | ||
902 | struct extent_status *es) | 903 | struct extent_status *es) |
903 | { | 904 | { |
904 | struct ext4_es_tree *tree; | 905 | struct ext4_es_tree *tree; |
@@ -947,9 +948,18 @@ out: | |||
947 | es->es_pblk = es1->es_pblk; | 948 | es->es_pblk = es1->es_pblk; |
948 | if (!ext4_es_is_referenced(es1)) | 949 | if (!ext4_es_is_referenced(es1)) |
949 | ext4_es_set_referenced(es1); | 950 | ext4_es_set_referenced(es1); |
950 | stats->es_stats_cache_hits++; | 951 | percpu_counter_inc(&stats->es_stats_cache_hits); |
952 | if (next_lblk) { | ||
953 | node = rb_next(&es1->rb_node); | ||
954 | if (node) { | ||
955 | es1 = rb_entry(node, struct extent_status, | ||
956 | rb_node); | ||
957 | *next_lblk = es1->es_lblk; | ||
958 | } else | ||
959 | *next_lblk = 0; | ||
960 | } | ||
951 | } else { | 961 | } else { |
952 | stats->es_stats_cache_misses++; | 962 | percpu_counter_inc(&stats->es_stats_cache_misses); |
953 | } | 963 | } |
954 | 964 | ||
955 | read_unlock(&EXT4_I(inode)->i_es_lock); | 965 | read_unlock(&EXT4_I(inode)->i_es_lock); |
@@ -958,8 +968,322 @@ out: | |||
958 | return found; | 968 | return found; |
959 | } | 969 | } |
960 | 970 | ||
971 | struct rsvd_count { | ||
972 | int ndelonly; | ||
973 | bool first_do_lblk_found; | ||
974 | ext4_lblk_t first_do_lblk; | ||
975 | ext4_lblk_t last_do_lblk; | ||
976 | struct extent_status *left_es; | ||
977 | bool partial; | ||
978 | ext4_lblk_t lclu; | ||
979 | }; | ||
980 | |||
981 | /* | ||
982 | * init_rsvd - initialize reserved count data before removing block range | ||
983 | * in file from extent status tree | ||
984 | * | ||
985 | * @inode - file containing range | ||
986 | * @lblk - first block in range | ||
987 | * @es - pointer to first extent in range | ||
988 | * @rc - pointer to reserved count data | ||
989 | * | ||
990 | * Assumes es is not NULL | ||
991 | */ | ||
992 | static void init_rsvd(struct inode *inode, ext4_lblk_t lblk, | ||
993 | struct extent_status *es, struct rsvd_count *rc) | ||
994 | { | ||
995 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
996 | struct rb_node *node; | ||
997 | |||
998 | rc->ndelonly = 0; | ||
999 | |||
1000 | /* | ||
1001 | * for bigalloc, note the first delonly block in the range has not | ||
1002 | * been found, record the extent containing the block to the left of | ||
1003 | * the region to be removed, if any, and note that there's no partial | ||
1004 | * cluster to track | ||
1005 | */ | ||
1006 | if (sbi->s_cluster_ratio > 1) { | ||
1007 | rc->first_do_lblk_found = false; | ||
1008 | if (lblk > es->es_lblk) { | ||
1009 | rc->left_es = es; | ||
1010 | } else { | ||
1011 | node = rb_prev(&es->rb_node); | ||
1012 | rc->left_es = node ? rb_entry(node, | ||
1013 | struct extent_status, | ||
1014 | rb_node) : NULL; | ||
1015 | } | ||
1016 | rc->partial = false; | ||
1017 | } | ||
1018 | } | ||
1019 | |||
1020 | /* | ||
1021 | * count_rsvd - count the clusters containing delayed and not unwritten | ||
1022 | * (delonly) blocks in a range within an extent and add to | ||
1023 | * the running tally in rsvd_count | ||
1024 | * | ||
1025 | * @inode - file containing extent | ||
1026 | * @lblk - first block in range | ||
1027 | * @len - length of range in blocks | ||
1028 | * @es - pointer to extent containing clusters to be counted | ||
1029 | * @rc - pointer to reserved count data | ||
1030 | * | ||
1031 | * Tracks partial clusters found at the beginning and end of extents so | ||
1032 | * they aren't overcounted when they span adjacent extents | ||
1033 | */ | ||
1034 | static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len, | ||
1035 | struct extent_status *es, struct rsvd_count *rc) | ||
1036 | { | ||
1037 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1038 | ext4_lblk_t i, end, nclu; | ||
1039 | |||
1040 | if (!ext4_es_is_delonly(es)) | ||
1041 | return; | ||
1042 | |||
1043 | WARN_ON(len <= 0); | ||
1044 | |||
1045 | if (sbi->s_cluster_ratio == 1) { | ||
1046 | rc->ndelonly += (int) len; | ||
1047 | return; | ||
1048 | } | ||
1049 | |||
1050 | /* bigalloc */ | ||
1051 | |||
1052 | i = (lblk < es->es_lblk) ? es->es_lblk : lblk; | ||
1053 | end = lblk + (ext4_lblk_t) len - 1; | ||
1054 | end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end; | ||
1055 | |||
1056 | /* record the first block of the first delonly extent seen */ | ||
1057 | if (rc->first_do_lblk_found == false) { | ||
1058 | rc->first_do_lblk = i; | ||
1059 | rc->first_do_lblk_found = true; | ||
1060 | } | ||
1061 | |||
1062 | /* update the last lblk in the region seen so far */ | ||
1063 | rc->last_do_lblk = end; | ||
1064 | |||
1065 | /* | ||
1066 | * if we're tracking a partial cluster and the current extent | ||
1067 | * doesn't start with it, count it and stop tracking | ||
1068 | */ | ||
1069 | if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) { | ||
1070 | rc->ndelonly++; | ||
1071 | rc->partial = false; | ||
1072 | } | ||
1073 | |||
1074 | /* | ||
1075 | * if the first cluster doesn't start on a cluster boundary but | ||
1076 | * ends on one, count it | ||
1077 | */ | ||
1078 | if (EXT4_LBLK_COFF(sbi, i) != 0) { | ||
1079 | if (end >= EXT4_LBLK_CFILL(sbi, i)) { | ||
1080 | rc->ndelonly++; | ||
1081 | rc->partial = false; | ||
1082 | i = EXT4_LBLK_CFILL(sbi, i) + 1; | ||
1083 | } | ||
1084 | } | ||
1085 | |||
1086 | /* | ||
1087 | * if the current cluster starts on a cluster boundary, count the | ||
1088 | * number of whole delonly clusters in the extent | ||
1089 | */ | ||
1090 | if ((i + sbi->s_cluster_ratio - 1) <= end) { | ||
1091 | nclu = (end - i + 1) >> sbi->s_cluster_bits; | ||
1092 | rc->ndelonly += nclu; | ||
1093 | i += nclu << sbi->s_cluster_bits; | ||
1094 | } | ||
1095 | |||
1096 | /* | ||
1097 | * start tracking a partial cluster if there's a partial at the end | ||
1098 | * of the current extent and we're not already tracking one | ||
1099 | */ | ||
1100 | if (!rc->partial && i <= end) { | ||
1101 | rc->partial = true; | ||
1102 | rc->lclu = EXT4_B2C(sbi, i); | ||
1103 | } | ||
1104 | } | ||
1105 | |||
1106 | /* | ||
1107 | * __pr_tree_search - search for a pending cluster reservation | ||
1108 | * | ||
1109 | * @root - root of pending reservation tree | ||
1110 | * @lclu - logical cluster to search for | ||
1111 | * | ||
1112 | * Returns the pending reservation for the cluster identified by @lclu | ||
1113 | * if found. If not, returns a reservation for the next cluster if any, | ||
1114 | * and if not, returns NULL. | ||
1115 | */ | ||
1116 | static struct pending_reservation *__pr_tree_search(struct rb_root *root, | ||
1117 | ext4_lblk_t lclu) | ||
1118 | { | ||
1119 | struct rb_node *node = root->rb_node; | ||
1120 | struct pending_reservation *pr = NULL; | ||
1121 | |||
1122 | while (node) { | ||
1123 | pr = rb_entry(node, struct pending_reservation, rb_node); | ||
1124 | if (lclu < pr->lclu) | ||
1125 | node = node->rb_left; | ||
1126 | else if (lclu > pr->lclu) | ||
1127 | node = node->rb_right; | ||
1128 | else | ||
1129 | return pr; | ||
1130 | } | ||
1131 | if (pr && lclu < pr->lclu) | ||
1132 | return pr; | ||
1133 | if (pr && lclu > pr->lclu) { | ||
1134 | node = rb_next(&pr->rb_node); | ||
1135 | return node ? rb_entry(node, struct pending_reservation, | ||
1136 | rb_node) : NULL; | ||
1137 | } | ||
1138 | return NULL; | ||
1139 | } | ||
1140 | |||
1141 | /* | ||
1142 | * get_rsvd - calculates and returns the number of cluster reservations to be | ||
1143 | * released when removing a block range from the extent status tree | ||
1144 | * and releases any pending reservations within the range | ||
1145 | * | ||
1146 | * @inode - file containing block range | ||
1147 | * @end - last block in range | ||
1148 | * @right_es - pointer to extent containing next block beyond end or NULL | ||
1149 | * @rc - pointer to reserved count data | ||
1150 | * | ||
1151 | * The number of reservations to be released is equal to the number of | ||
1152 | * clusters containing delayed and not unwritten (delonly) blocks within | ||
1153 | * the range, minus the number of clusters still containing delonly blocks | ||
1154 | * at the ends of the range, and minus the number of pending reservations | ||
1155 | * within the range. | ||
1156 | */ | ||
1157 | static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, | ||
1158 | struct extent_status *right_es, | ||
1159 | struct rsvd_count *rc) | ||
1160 | { | ||
1161 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1162 | struct pending_reservation *pr; | ||
1163 | struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; | ||
1164 | struct rb_node *node; | ||
1165 | ext4_lblk_t first_lclu, last_lclu; | ||
1166 | bool left_delonly, right_delonly, count_pending; | ||
1167 | struct extent_status *es; | ||
1168 | |||
1169 | if (sbi->s_cluster_ratio > 1) { | ||
1170 | /* count any remaining partial cluster */ | ||
1171 | if (rc->partial) | ||
1172 | rc->ndelonly++; | ||
1173 | |||
1174 | if (rc->ndelonly == 0) | ||
1175 | return 0; | ||
1176 | |||
1177 | first_lclu = EXT4_B2C(sbi, rc->first_do_lblk); | ||
1178 | last_lclu = EXT4_B2C(sbi, rc->last_do_lblk); | ||
1179 | |||
1180 | /* | ||
1181 | * decrease the delonly count by the number of clusters at the | ||
1182 | * ends of the range that still contain delonly blocks - | ||
1183 | * these clusters still need to be reserved | ||
1184 | */ | ||
1185 | left_delonly = right_delonly = false; | ||
1186 | |||
1187 | es = rc->left_es; | ||
1188 | while (es && ext4_es_end(es) >= | ||
1189 | EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) { | ||
1190 | if (ext4_es_is_delonly(es)) { | ||
1191 | rc->ndelonly--; | ||
1192 | left_delonly = true; | ||
1193 | break; | ||
1194 | } | ||
1195 | node = rb_prev(&es->rb_node); | ||
1196 | if (!node) | ||
1197 | break; | ||
1198 | es = rb_entry(node, struct extent_status, rb_node); | ||
1199 | } | ||
1200 | if (right_es && (!left_delonly || first_lclu != last_lclu)) { | ||
1201 | if (end < ext4_es_end(right_es)) { | ||
1202 | es = right_es; | ||
1203 | } else { | ||
1204 | node = rb_next(&right_es->rb_node); | ||
1205 | es = node ? rb_entry(node, struct extent_status, | ||
1206 | rb_node) : NULL; | ||
1207 | } | ||
1208 | while (es && es->es_lblk <= | ||
1209 | EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) { | ||
1210 | if (ext4_es_is_delonly(es)) { | ||
1211 | rc->ndelonly--; | ||
1212 | right_delonly = true; | ||
1213 | break; | ||
1214 | } | ||
1215 | node = rb_next(&es->rb_node); | ||
1216 | if (!node) | ||
1217 | break; | ||
1218 | es = rb_entry(node, struct extent_status, | ||
1219 | rb_node); | ||
1220 | } | ||
1221 | } | ||
1222 | |||
1223 | /* | ||
1224 | * Determine the block range that should be searched for | ||
1225 | * pending reservations, if any. Clusters on the ends of the | ||
1226 | * original removed range containing delonly blocks are | ||
1227 | * excluded. They've already been accounted for and it's not | ||
1228 | * possible to determine if an associated pending reservation | ||
1229 | * should be released with the information available in the | ||
1230 | * extents status tree. | ||
1231 | */ | ||
1232 | if (first_lclu == last_lclu) { | ||
1233 | if (left_delonly | right_delonly) | ||
1234 | count_pending = false; | ||
1235 | else | ||
1236 | count_pending = true; | ||
1237 | } else { | ||
1238 | if (left_delonly) | ||
1239 | first_lclu++; | ||
1240 | if (right_delonly) | ||
1241 | last_lclu--; | ||
1242 | if (first_lclu <= last_lclu) | ||
1243 | count_pending = true; | ||
1244 | else | ||
1245 | count_pending = false; | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * a pending reservation found between first_lclu and last_lclu | ||
1250 | * represents an allocated cluster that contained at least one | ||
1251 | * delonly block, so the delonly total must be reduced by one | ||
1252 | * for each pending reservation found and released | ||
1253 | */ | ||
1254 | if (count_pending) { | ||
1255 | pr = __pr_tree_search(&tree->root, first_lclu); | ||
1256 | while (pr && pr->lclu <= last_lclu) { | ||
1257 | rc->ndelonly--; | ||
1258 | node = rb_next(&pr->rb_node); | ||
1259 | rb_erase(&pr->rb_node, &tree->root); | ||
1260 | kmem_cache_free(ext4_pending_cachep, pr); | ||
1261 | if (!node) | ||
1262 | break; | ||
1263 | pr = rb_entry(node, struct pending_reservation, | ||
1264 | rb_node); | ||
1265 | } | ||
1266 | } | ||
1267 | } | ||
1268 | return rc->ndelonly; | ||
1269 | } | ||
1270 | |||
1271 | |||
1272 | /* | ||
1273 | * __es_remove_extent - removes block range from extent status tree | ||
1274 | * | ||
1275 | * @inode - file containing range | ||
1276 | * @lblk - first block in range | ||
1277 | * @end - last block in range | ||
1278 | * @reserved - number of cluster reservations released | ||
1279 | * | ||
1280 | * If @reserved is not NULL and delayed allocation is enabled, counts | ||
1281 | * block/cluster reservations freed by removing range and if bigalloc | ||
1282 | * enabled cancels pending reservations as needed. Returns 0 on success, | ||
1283 | * error code on failure. | ||
1284 | */ | ||
961 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 1285 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
962 | ext4_lblk_t end) | 1286 | ext4_lblk_t end, int *reserved) |
963 | { | 1287 | { |
964 | struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; | 1288 | struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; |
965 | struct rb_node *node; | 1289 | struct rb_node *node; |
@@ -968,9 +1292,14 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
968 | ext4_lblk_t len1, len2; | 1292 | ext4_lblk_t len1, len2; |
969 | ext4_fsblk_t block; | 1293 | ext4_fsblk_t block; |
970 | int err; | 1294 | int err; |
1295 | bool count_reserved = true; | ||
1296 | struct rsvd_count rc; | ||
971 | 1297 | ||
1298 | if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC)) | ||
1299 | count_reserved = false; | ||
972 | retry: | 1300 | retry: |
973 | err = 0; | 1301 | err = 0; |
1302 | |||
974 | es = __es_tree_search(&tree->root, lblk); | 1303 | es = __es_tree_search(&tree->root, lblk); |
975 | if (!es) | 1304 | if (!es) |
976 | goto out; | 1305 | goto out; |
@@ -979,6 +1308,8 @@ retry: | |||
979 | 1308 | ||
980 | /* Simply invalidate cache_es. */ | 1309 | /* Simply invalidate cache_es. */ |
981 | tree->cache_es = NULL; | 1310 | tree->cache_es = NULL; |
1311 | if (count_reserved) | ||
1312 | init_rsvd(inode, lblk, es, &rc); | ||
982 | 1313 | ||
983 | orig_es.es_lblk = es->es_lblk; | 1314 | orig_es.es_lblk = es->es_lblk; |
984 | orig_es.es_len = es->es_len; | 1315 | orig_es.es_len = es->es_len; |
@@ -1020,10 +1351,16 @@ retry: | |||
1020 | ext4_es_store_pblock(es, block); | 1351 | ext4_es_store_pblock(es, block); |
1021 | } | 1352 | } |
1022 | } | 1353 | } |
1354 | if (count_reserved) | ||
1355 | count_rsvd(inode, lblk, orig_es.es_len - len1 - len2, | ||
1356 | &orig_es, &rc); | ||
1023 | goto out; | 1357 | goto out; |
1024 | } | 1358 | } |
1025 | 1359 | ||
1026 | if (len1 > 0) { | 1360 | if (len1 > 0) { |
1361 | if (count_reserved) | ||
1362 | count_rsvd(inode, lblk, orig_es.es_len - len1, | ||
1363 | &orig_es, &rc); | ||
1027 | node = rb_next(&es->rb_node); | 1364 | node = rb_next(&es->rb_node); |
1028 | if (node) | 1365 | if (node) |
1029 | es = rb_entry(node, struct extent_status, rb_node); | 1366 | es = rb_entry(node, struct extent_status, rb_node); |
@@ -1032,6 +1369,8 @@ retry: | |||
1032 | } | 1369 | } |
1033 | 1370 | ||
1034 | while (es && ext4_es_end(es) <= end) { | 1371 | while (es && ext4_es_end(es) <= end) { |
1372 | if (count_reserved) | ||
1373 | count_rsvd(inode, es->es_lblk, es->es_len, es, &rc); | ||
1035 | node = rb_next(&es->rb_node); | 1374 | node = rb_next(&es->rb_node); |
1036 | rb_erase(&es->rb_node, &tree->root); | 1375 | rb_erase(&es->rb_node, &tree->root); |
1037 | ext4_es_free_extent(inode, es); | 1376 | ext4_es_free_extent(inode, es); |
@@ -1046,6 +1385,9 @@ retry: | |||
1046 | ext4_lblk_t orig_len = es->es_len; | 1385 | ext4_lblk_t orig_len = es->es_len; |
1047 | 1386 | ||
1048 | len1 = ext4_es_end(es) - end; | 1387 | len1 = ext4_es_end(es) - end; |
1388 | if (count_reserved) | ||
1389 | count_rsvd(inode, es->es_lblk, orig_len - len1, | ||
1390 | es, &rc); | ||
1049 | es->es_lblk = end + 1; | 1391 | es->es_lblk = end + 1; |
1050 | es->es_len = len1; | 1392 | es->es_len = len1; |
1051 | if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) { | 1393 | if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) { |
@@ -1054,20 +1396,28 @@ retry: | |||
1054 | } | 1396 | } |
1055 | } | 1397 | } |
1056 | 1398 | ||
1399 | if (count_reserved) | ||
1400 | *reserved = get_rsvd(inode, end, es, &rc); | ||
1057 | out: | 1401 | out: |
1058 | return err; | 1402 | return err; |
1059 | } | 1403 | } |
1060 | 1404 | ||
1061 | /* | 1405 | /* |
1062 | * ext4_es_remove_extent() removes a space from a extent status tree. | 1406 | * ext4_es_remove_extent - removes block range from extent status tree |
1063 | * | 1407 | * |
1064 | * Return 0 on success, error code on failure. | 1408 | * @inode - file containing range |
1409 | * @lblk - first block in range | ||
1410 | * @len - number of blocks to remove | ||
1411 | * | ||
1412 | * Reduces block/cluster reservation count and for bigalloc cancels pending | ||
1413 | * reservations as needed. Returns 0 on success, error code on failure. | ||
1065 | */ | 1414 | */ |
1066 | int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 1415 | int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
1067 | ext4_lblk_t len) | 1416 | ext4_lblk_t len) |
1068 | { | 1417 | { |
1069 | ext4_lblk_t end; | 1418 | ext4_lblk_t end; |
1070 | int err = 0; | 1419 | int err = 0; |
1420 | int reserved = 0; | ||
1071 | 1421 | ||
1072 | trace_ext4_es_remove_extent(inode, lblk, len); | 1422 | trace_ext4_es_remove_extent(inode, lblk, len); |
1073 | es_debug("remove [%u/%u) from extent status tree of inode %lu\n", | 1423 | es_debug("remove [%u/%u) from extent status tree of inode %lu\n", |
@@ -1085,9 +1435,10 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
1085 | * is reclaimed. | 1435 | * is reclaimed. |
1086 | */ | 1436 | */ |
1087 | write_lock(&EXT4_I(inode)->i_es_lock); | 1437 | write_lock(&EXT4_I(inode)->i_es_lock); |
1088 | err = __es_remove_extent(inode, lblk, end); | 1438 | err = __es_remove_extent(inode, lblk, end, &reserved); |
1089 | write_unlock(&EXT4_I(inode)->i_es_lock); | 1439 | write_unlock(&EXT4_I(inode)->i_es_lock); |
1090 | ext4_es_print_tree(inode); | 1440 | ext4_es_print_tree(inode); |
1441 | ext4_da_release_space(inode, reserved); | ||
1091 | return err; | 1442 | return err; |
1092 | } | 1443 | } |
1093 | 1444 | ||
@@ -1235,9 +1586,9 @@ int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v) | |||
1235 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | 1586 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", |
1236 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | 1587 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), |
1237 | percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); | 1588 | percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); |
1238 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | 1589 | seq_printf(seq, " %lld/%lld cache hits/misses\n", |
1239 | es_stats->es_stats_cache_hits, | 1590 | percpu_counter_sum_positive(&es_stats->es_stats_cache_hits), |
1240 | es_stats->es_stats_cache_misses); | 1591 | percpu_counter_sum_positive(&es_stats->es_stats_cache_misses)); |
1241 | if (inode_cnt) | 1592 | if (inode_cnt) |
1242 | seq_printf(seq, " %d inodes on list\n", inode_cnt); | 1593 | seq_printf(seq, " %d inodes on list\n", inode_cnt); |
1243 | 1594 | ||
@@ -1264,35 +1615,46 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1264 | sbi->s_es_nr_inode = 0; | 1615 | sbi->s_es_nr_inode = 0; |
1265 | spin_lock_init(&sbi->s_es_lock); | 1616 | spin_lock_init(&sbi->s_es_lock); |
1266 | sbi->s_es_stats.es_stats_shrunk = 0; | 1617 | sbi->s_es_stats.es_stats_shrunk = 0; |
1267 | sbi->s_es_stats.es_stats_cache_hits = 0; | 1618 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0, |
1268 | sbi->s_es_stats.es_stats_cache_misses = 0; | 1619 | GFP_KERNEL); |
1620 | if (err) | ||
1621 | return err; | ||
1622 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0, | ||
1623 | GFP_KERNEL); | ||
1624 | if (err) | ||
1625 | goto err1; | ||
1269 | sbi->s_es_stats.es_stats_scan_time = 0; | 1626 | sbi->s_es_stats.es_stats_scan_time = 0; |
1270 | sbi->s_es_stats.es_stats_max_scan_time = 0; | 1627 | sbi->s_es_stats.es_stats_max_scan_time = 0; |
1271 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | 1628 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); |
1272 | if (err) | 1629 | if (err) |
1273 | return err; | 1630 | goto err2; |
1274 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); | 1631 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); |
1275 | if (err) | 1632 | if (err) |
1276 | goto err1; | 1633 | goto err3; |
1277 | 1634 | ||
1278 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; | 1635 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; |
1279 | sbi->s_es_shrinker.count_objects = ext4_es_count; | 1636 | sbi->s_es_shrinker.count_objects = ext4_es_count; |
1280 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 1637 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
1281 | err = register_shrinker(&sbi->s_es_shrinker); | 1638 | err = register_shrinker(&sbi->s_es_shrinker); |
1282 | if (err) | 1639 | if (err) |
1283 | goto err2; | 1640 | goto err4; |
1284 | 1641 | ||
1285 | return 0; | 1642 | return 0; |
1286 | 1643 | err4: | |
1287 | err2: | ||
1288 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); | 1644 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1289 | err1: | 1645 | err3: |
1290 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1646 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1647 | err2: | ||
1648 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses); | ||
1649 | err1: | ||
1650 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits); | ||
1291 | return err; | 1651 | return err; |
1292 | } | 1652 | } |
1293 | 1653 | ||
1294 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | 1654 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
1295 | { | 1655 | { |
1656 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits); | ||
1657 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses); | ||
1296 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1658 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1297 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); | 1659 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1298 | unregister_shrinker(&sbi->s_es_shrinker); | 1660 | unregister_shrinker(&sbi->s_es_shrinker); |
@@ -1317,6 +1679,7 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, | |||
1317 | es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); | 1679 | es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); |
1318 | if (!es) | 1680 | if (!es) |
1319 | goto out_wrap; | 1681 | goto out_wrap; |
1682 | |||
1320 | while (*nr_to_scan > 0) { | 1683 | while (*nr_to_scan > 0) { |
1321 | if (es->es_lblk > end) { | 1684 | if (es->es_lblk > end) { |
1322 | ei->i_es_shrink_lblk = end + 1; | 1685 | ei->i_es_shrink_lblk = end + 1; |
@@ -1374,6 +1737,34 @@ static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan) | |||
1374 | return nr_shrunk; | 1737 | return nr_shrunk; |
1375 | } | 1738 | } |
1376 | 1739 | ||
1740 | /* | ||
1741 | * Called to support EXT4_IOC_CLEAR_ES_CACHE. We can only remove | ||
1742 | * discretionary entries from the extent status cache. (Some entries | ||
1743 | * must be present for proper operations.) | ||
1744 | */ | ||
1745 | void ext4_clear_inode_es(struct inode *inode) | ||
1746 | { | ||
1747 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1748 | struct extent_status *es; | ||
1749 | struct ext4_es_tree *tree; | ||
1750 | struct rb_node *node; | ||
1751 | |||
1752 | write_lock(&ei->i_es_lock); | ||
1753 | tree = &EXT4_I(inode)->i_es_tree; | ||
1754 | tree->cache_es = NULL; | ||
1755 | node = rb_first(&tree->root); | ||
1756 | while (node) { | ||
1757 | es = rb_entry(node, struct extent_status, rb_node); | ||
1758 | node = rb_next(node); | ||
1759 | if (!ext4_es_is_delayed(es)) { | ||
1760 | rb_erase(&es->rb_node, &tree->root); | ||
1761 | ext4_es_free_extent(inode, es); | ||
1762 | } | ||
1763 | } | ||
1764 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED); | ||
1765 | write_unlock(&ei->i_es_lock); | ||
1766 | } | ||
1767 | |||
1377 | #ifdef ES_DEBUG__ | 1768 | #ifdef ES_DEBUG__ |
1378 | static void ext4_print_pending_tree(struct inode *inode) | 1769 | static void ext4_print_pending_tree(struct inode *inode) |
1379 | { | 1770 | { |
@@ -1590,7 +1981,7 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, | |||
1590 | 1981 | ||
1591 | write_lock(&EXT4_I(inode)->i_es_lock); | 1982 | write_lock(&EXT4_I(inode)->i_es_lock); |
1592 | 1983 | ||
1593 | err = __es_remove_extent(inode, lblk, lblk); | 1984 | err = __es_remove_extent(inode, lblk, lblk, NULL); |
1594 | if (err != 0) | 1985 | if (err != 0) |
1595 | goto error; | 1986 | goto error; |
1596 | retry: | 1987 | retry: |
@@ -1779,93 +2170,3 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, | |||
1779 | __remove_pending(inode, last); | 2170 | __remove_pending(inode, last); |
1780 | } | 2171 | } |
1781 | } | 2172 | } |
1782 | |||
1783 | /* | ||
1784 | * ext4_es_remove_blks - remove block range from extents status tree and | ||
1785 | * reduce reservation count or cancel pending | ||
1786 | * reservation as needed | ||
1787 | * | ||
1788 | * @inode - file containing range | ||
1789 | * @lblk - first block in range | ||
1790 | * @len - number of blocks to remove | ||
1791 | * | ||
1792 | */ | ||
1793 | void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk, | ||
1794 | ext4_lblk_t len) | ||
1795 | { | ||
1796 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1797 | unsigned int clu_size, reserved = 0; | ||
1798 | ext4_lblk_t last_lclu, first, length, remainder, last; | ||
1799 | bool delonly; | ||
1800 | int err = 0; | ||
1801 | struct pending_reservation *pr; | ||
1802 | struct ext4_pending_tree *tree; | ||
1803 | |||
1804 | /* | ||
1805 | * Process cluster by cluster for bigalloc - there may be up to | ||
1806 | * two clusters in a 4k page with a 1k block size and two blocks | ||
1807 | * per cluster. Also necessary for systems with larger page sizes | ||
1808 | * and potentially larger block sizes. | ||
1809 | */ | ||
1810 | clu_size = sbi->s_cluster_ratio; | ||
1811 | last_lclu = EXT4_B2C(sbi, lblk + len - 1); | ||
1812 | |||
1813 | write_lock(&EXT4_I(inode)->i_es_lock); | ||
1814 | |||
1815 | for (first = lblk, remainder = len; | ||
1816 | remainder > 0; | ||
1817 | first += length, remainder -= length) { | ||
1818 | |||
1819 | if (EXT4_B2C(sbi, first) == last_lclu) | ||
1820 | length = remainder; | ||
1821 | else | ||
1822 | length = clu_size - EXT4_LBLK_COFF(sbi, first); | ||
1823 | |||
1824 | /* | ||
1825 | * The BH_Delay flag, which triggers calls to this function, | ||
1826 | * and the contents of the extents status tree can be | ||
1827 | * inconsistent due to writepages activity. So, note whether | ||
1828 | * the blocks to be removed actually belong to an extent with | ||
1829 | * delayed only status. | ||
1830 | */ | ||
1831 | delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first); | ||
1832 | |||
1833 | /* | ||
1834 | * because of the writepages effect, written and unwritten | ||
1835 | * blocks could be removed here | ||
1836 | */ | ||
1837 | last = first + length - 1; | ||
1838 | err = __es_remove_extent(inode, first, last); | ||
1839 | if (err) | ||
1840 | ext4_warning(inode->i_sb, | ||
1841 | "%s: couldn't remove page (err = %d)", | ||
1842 | __func__, err); | ||
1843 | |||
1844 | /* non-bigalloc case: simply count the cluster for release */ | ||
1845 | if (sbi->s_cluster_ratio == 1 && delonly) { | ||
1846 | reserved++; | ||
1847 | continue; | ||
1848 | } | ||
1849 | |||
1850 | /* | ||
1851 | * bigalloc case: if all delayed allocated only blocks have | ||
1852 | * just been removed from a cluster, either cancel a pending | ||
1853 | * reservation if it exists or count a cluster for release | ||
1854 | */ | ||
1855 | if (delonly && | ||
1856 | !__es_scan_clu(inode, &ext4_es_is_delonly, first)) { | ||
1857 | pr = __get_pending(inode, EXT4_B2C(sbi, first)); | ||
1858 | if (pr != NULL) { | ||
1859 | tree = &EXT4_I(inode)->i_pending_tree; | ||
1860 | rb_erase(&pr->rb_node, &tree->root); | ||
1861 | kmem_cache_free(ext4_pending_cachep, pr); | ||
1862 | } else { | ||
1863 | reserved++; | ||
1864 | } | ||
1865 | } | ||
1866 | } | ||
1867 | |||
1868 | write_unlock(&EXT4_I(inode)->i_es_lock); | ||
1869 | |||
1870 | ext4_da_release_space(inode, reserved); | ||
1871 | } | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 131a8b7df265..825313c59752 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -70,8 +70,8 @@ struct ext4_es_tree { | |||
70 | 70 | ||
71 | struct ext4_es_stats { | 71 | struct ext4_es_stats { |
72 | unsigned long es_stats_shrunk; | 72 | unsigned long es_stats_shrunk; |
73 | unsigned long es_stats_cache_hits; | 73 | struct percpu_counter es_stats_cache_hits; |
74 | unsigned long es_stats_cache_misses; | 74 | struct percpu_counter es_stats_cache_misses; |
75 | u64 es_stats_scan_time; | 75 | u64 es_stats_scan_time; |
76 | u64 es_stats_max_scan_time; | 76 | u64 es_stats_max_scan_time; |
77 | struct percpu_counter es_stats_all_cnt; | 77 | struct percpu_counter es_stats_all_cnt; |
@@ -140,6 +140,7 @@ extern void ext4_es_find_extent_range(struct inode *inode, | |||
140 | ext4_lblk_t lblk, ext4_lblk_t end, | 140 | ext4_lblk_t lblk, ext4_lblk_t end, |
141 | struct extent_status *es); | 141 | struct extent_status *es); |
142 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 142 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
143 | ext4_lblk_t *next_lblk, | ||
143 | struct extent_status *es); | 144 | struct extent_status *es); |
144 | extern bool ext4_es_scan_range(struct inode *inode, | 145 | extern bool ext4_es_scan_range(struct inode *inode, |
145 | int (*matching_fn)(struct extent_status *es), | 146 | int (*matching_fn)(struct extent_status *es), |
@@ -246,7 +247,6 @@ extern int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, | |||
246 | bool allocated); | 247 | bool allocated); |
247 | extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk, | 248 | extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk, |
248 | ext4_lblk_t len); | 249 | ext4_lblk_t len); |
249 | extern void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk, | 250 | extern void ext4_clear_inode_es(struct inode *inode); |
250 | ext4_lblk_t len); | ||
251 | 251 | ||
252 | #endif /* _EXT4_EXTENTS_STATUS_H */ | 252 | #endif /* _EXT4_EXTENTS_STATUS_H */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b8a20bb9a145..8d2bbcc2d813 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -230,8 +230,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
230 | if (IS_DAX(inode)) | 230 | if (IS_DAX(inode)) |
231 | return ext4_dax_write_iter(iocb, from); | 231 | return ext4_dax_write_iter(iocb, from); |
232 | #endif | 232 | #endif |
233 | if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT)) | ||
234 | return -EOPNOTSUPP; | ||
235 | 233 | ||
236 | if (!inode_trylock(inode)) { | 234 | if (!inode_trylock(inode)) { |
237 | if (iocb->ki_flags & IOCB_NOWAIT) | 235 | if (iocb->ki_flags & IOCB_NOWAIT) |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index d358bfcb6b3f..3e133793a5a3 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
@@ -280,7 +280,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, | |||
280 | unsigned char *buff; | 280 | unsigned char *buff; |
281 | struct qstr qstr = {.name = name, .len = len }; | 281 | struct qstr qstr = {.name = name, .len = len }; |
282 | 282 | ||
283 | if (len && IS_CASEFOLDED(dir)) { | 283 | if (len && IS_CASEFOLDED(dir) && um) { |
284 | buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); | 284 | buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); |
285 | if (!buff) | 285 | if (!buff) |
286 | return -ENOMEM; | 286 | return -ENOMEM; |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 88cdf3c90bd1..2fec62d764fa 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -1416,7 +1416,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file, | |||
1416 | err = ext4_htree_store_dirent(dir_file, hinfo->hash, | 1416 | err = ext4_htree_store_dirent(dir_file, hinfo->hash, |
1417 | hinfo->minor_hash, de, &tmp_str); | 1417 | hinfo->minor_hash, de, &tmp_str); |
1418 | if (err) { | 1418 | if (err) { |
1419 | count = err; | 1419 | ret = err; |
1420 | goto out; | 1420 | goto out; |
1421 | } | 1421 | } |
1422 | count++; | 1422 | count++; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d0dc0e3463db..123e3dee7733 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -527,7 +527,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
527 | return -EFSCORRUPTED; | 527 | return -EFSCORRUPTED; |
528 | 528 | ||
529 | /* Lookup extent status tree firstly */ | 529 | /* Lookup extent status tree firstly */ |
530 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 530 | if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { |
531 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 531 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
532 | map->m_pblk = ext4_es_pblock(&es) + | 532 | map->m_pblk = ext4_es_pblock(&es) + |
533 | map->m_lblk - es.es_lblk; | 533 | map->m_lblk - es.es_lblk; |
@@ -695,7 +695,7 @@ found: | |||
695 | * extent status tree. | 695 | * extent status tree. |
696 | */ | 696 | */ |
697 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && | 697 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && |
698 | ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 698 | ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { |
699 | if (ext4_es_is_written(&es)) | 699 | if (ext4_es_is_written(&es)) |
700 | goto out_sem; | 700 | goto out_sem; |
701 | } | 701 | } |
@@ -1024,7 +1024,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
1024 | bh = ext4_getblk(handle, inode, block, map_flags); | 1024 | bh = ext4_getblk(handle, inode, block, map_flags); |
1025 | if (IS_ERR(bh)) | 1025 | if (IS_ERR(bh)) |
1026 | return bh; | 1026 | return bh; |
1027 | if (!bh || buffer_uptodate(bh)) | 1027 | if (!bh || ext4_buffer_uptodate(bh)) |
1028 | return bh; | 1028 | return bh; |
1029 | ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); | 1029 | ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); |
1030 | wait_on_buffer(bh); | 1030 | wait_on_buffer(bh); |
@@ -1051,7 +1051,7 @@ int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, | |||
1051 | 1051 | ||
1052 | for (i = 0; i < bh_count; i++) | 1052 | for (i = 0; i < bh_count; i++) |
1053 | /* Note that NULL bhs[i] is valid because of holes. */ | 1053 | /* Note that NULL bhs[i] is valid because of holes. */ |
1054 | if (bhs[i] && !buffer_uptodate(bhs[i])) | 1054 | if (bhs[i] && !ext4_buffer_uptodate(bhs[i])) |
1055 | ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, | 1055 | ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, |
1056 | &bhs[i]); | 1056 | &bhs[i]); |
1057 | 1057 | ||
@@ -1656,49 +1656,6 @@ void ext4_da_release_space(struct inode *inode, int to_free) | |||
1656 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); | 1656 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | static void ext4_da_page_release_reservation(struct page *page, | ||
1660 | unsigned int offset, | ||
1661 | unsigned int length) | ||
1662 | { | ||
1663 | int contiguous_blks = 0; | ||
1664 | struct buffer_head *head, *bh; | ||
1665 | unsigned int curr_off = 0; | ||
1666 | struct inode *inode = page->mapping->host; | ||
1667 | unsigned int stop = offset + length; | ||
1668 | ext4_fsblk_t lblk; | ||
1669 | |||
1670 | BUG_ON(stop > PAGE_SIZE || stop < length); | ||
1671 | |||
1672 | head = page_buffers(page); | ||
1673 | bh = head; | ||
1674 | do { | ||
1675 | unsigned int next_off = curr_off + bh->b_size; | ||
1676 | |||
1677 | if (next_off > stop) | ||
1678 | break; | ||
1679 | |||
1680 | if ((offset <= curr_off) && (buffer_delay(bh))) { | ||
1681 | contiguous_blks++; | ||
1682 | clear_buffer_delay(bh); | ||
1683 | } else if (contiguous_blks) { | ||
1684 | lblk = page->index << | ||
1685 | (PAGE_SHIFT - inode->i_blkbits); | ||
1686 | lblk += (curr_off >> inode->i_blkbits) - | ||
1687 | contiguous_blks; | ||
1688 | ext4_es_remove_blks(inode, lblk, contiguous_blks); | ||
1689 | contiguous_blks = 0; | ||
1690 | } | ||
1691 | curr_off = next_off; | ||
1692 | } while ((bh = bh->b_this_page) != head); | ||
1693 | |||
1694 | if (contiguous_blks) { | ||
1695 | lblk = page->index << (PAGE_SHIFT - inode->i_blkbits); | ||
1696 | lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; | ||
1697 | ext4_es_remove_blks(inode, lblk, contiguous_blks); | ||
1698 | } | ||
1699 | |||
1700 | } | ||
1701 | |||
1702 | /* | 1659 | /* |
1703 | * Delayed allocation stuff | 1660 | * Delayed allocation stuff |
1704 | */ | 1661 | */ |
@@ -1878,7 +1835,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1878 | (unsigned long) map->m_lblk); | 1835 | (unsigned long) map->m_lblk); |
1879 | 1836 | ||
1880 | /* Lookup extent status tree firstly */ | 1837 | /* Lookup extent status tree firstly */ |
1881 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1838 | if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { |
1882 | if (ext4_es_is_hole(&es)) { | 1839 | if (ext4_es_is_hole(&es)) { |
1883 | retval = 0; | 1840 | retval = 0; |
1884 | down_read(&EXT4_I(inode)->i_data_sem); | 1841 | down_read(&EXT4_I(inode)->i_data_sem); |
@@ -2800,15 +2757,6 @@ static int ext4_writepages(struct address_space *mapping, | |||
2800 | goto out_writepages; | 2757 | goto out_writepages; |
2801 | } | 2758 | } |
2802 | 2759 | ||
2803 | if (ext4_should_dioread_nolock(inode)) { | ||
2804 | /* | ||
2805 | * We may need to convert up to one extent per block in | ||
2806 | * the page and we may dirty the inode. | ||
2807 | */ | ||
2808 | rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, | ||
2809 | PAGE_SIZE >> inode->i_blkbits); | ||
2810 | } | ||
2811 | |||
2812 | /* | 2760 | /* |
2813 | * If we have inline data and arrive here, it means that | 2761 | * If we have inline data and arrive here, it means that |
2814 | * we will soon create the block for the 1st page, so | 2762 | * we will soon create the block for the 1st page, so |
@@ -2827,6 +2775,15 @@ static int ext4_writepages(struct address_space *mapping, | |||
2827 | ext4_journal_stop(handle); | 2775 | ext4_journal_stop(handle); |
2828 | } | 2776 | } |
2829 | 2777 | ||
2778 | if (ext4_should_dioread_nolock(inode)) { | ||
2779 | /* | ||
2780 | * We may need to convert up to one extent per block in | ||
2781 | * the page and we may dirty the inode. | ||
2782 | */ | ||
2783 | rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, | ||
2784 | PAGE_SIZE >> inode->i_blkbits); | ||
2785 | } | ||
2786 | |||
2830 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2787 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2831 | range_whole = 1; | 2788 | range_whole = 1; |
2832 | 2789 | ||
@@ -3242,24 +3199,6 @@ static int ext4_da_write_end(struct file *file, | |||
3242 | return ret ? ret : copied; | 3199 | return ret ? ret : copied; |
3243 | } | 3200 | } |
3244 | 3201 | ||
3245 | static void ext4_da_invalidatepage(struct page *page, unsigned int offset, | ||
3246 | unsigned int length) | ||
3247 | { | ||
3248 | /* | ||
3249 | * Drop reserved blocks | ||
3250 | */ | ||
3251 | BUG_ON(!PageLocked(page)); | ||
3252 | if (!page_has_buffers(page)) | ||
3253 | goto out; | ||
3254 | |||
3255 | ext4_da_page_release_reservation(page, offset, length); | ||
3256 | |||
3257 | out: | ||
3258 | ext4_invalidatepage(page, offset, length); | ||
3259 | |||
3260 | return; | ||
3261 | } | ||
3262 | |||
3263 | /* | 3202 | /* |
3264 | * Force all delayed allocation blocks to be allocated for a given inode. | 3203 | * Force all delayed allocation blocks to be allocated for a given inode. |
3265 | */ | 3204 | */ |
@@ -4002,7 +3941,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
4002 | .write_end = ext4_da_write_end, | 3941 | .write_end = ext4_da_write_end, |
4003 | .set_page_dirty = ext4_set_page_dirty, | 3942 | .set_page_dirty = ext4_set_page_dirty, |
4004 | .bmap = ext4_bmap, | 3943 | .bmap = ext4_bmap, |
4005 | .invalidatepage = ext4_da_invalidatepage, | 3944 | .invalidatepage = ext4_invalidatepage, |
4006 | .releasepage = ext4_releasepage, | 3945 | .releasepage = ext4_releasepage, |
4007 | .direct_IO = ext4_direct_IO, | 3946 | .direct_IO = ext4_direct_IO, |
4008 | .migratepage = buffer_migrate_page, | 3947 | .migratepage = buffer_migrate_page, |
@@ -4314,6 +4253,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
4314 | 4253 | ||
4315 | trace_ext4_punch_hole(inode, offset, length, 0); | 4254 | trace_ext4_punch_hole(inode, offset, length, 0); |
4316 | 4255 | ||
4256 | ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); | ||
4257 | if (ext4_has_inline_data(inode)) { | ||
4258 | down_write(&EXT4_I(inode)->i_mmap_sem); | ||
4259 | ret = ext4_convert_inline_data(inode); | ||
4260 | up_write(&EXT4_I(inode)->i_mmap_sem); | ||
4261 | if (ret) | ||
4262 | return ret; | ||
4263 | } | ||
4264 | |||
4317 | /* | 4265 | /* |
4318 | * Write out all dirty pages to avoid race conditions | 4266 | * Write out all dirty pages to avoid race conditions |
4319 | * Then release them. | 4267 | * Then release them. |
@@ -5137,6 +5085,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, | |||
5137 | "iget: bogus i_mode (%o)", inode->i_mode); | 5085 | "iget: bogus i_mode (%o)", inode->i_mode); |
5138 | goto bad_inode; | 5086 | goto bad_inode; |
5139 | } | 5087 | } |
5088 | if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) | ||
5089 | ext4_error_inode(inode, function, line, 0, | ||
5090 | "casefold flag without casefold feature"); | ||
5140 | brelse(iloc.bh); | 5091 | brelse(iloc.bh); |
5141 | 5092 | ||
5142 | unlock_new_inode(inode); | 5093 | unlock_new_inode(inode); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5444d49cbf09..0b7f316fd30f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -745,6 +745,74 @@ static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa) | |||
745 | fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid); | 745 | fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid); |
746 | } | 746 | } |
747 | 747 | ||
748 | /* copied from fs/ioctl.c */ | ||
749 | static int fiemap_check_ranges(struct super_block *sb, | ||
750 | u64 start, u64 len, u64 *new_len) | ||
751 | { | ||
752 | u64 maxbytes = (u64) sb->s_maxbytes; | ||
753 | |||
754 | *new_len = len; | ||
755 | |||
756 | if (len == 0) | ||
757 | return -EINVAL; | ||
758 | |||
759 | if (start > maxbytes) | ||
760 | return -EFBIG; | ||
761 | |||
762 | /* | ||
763 | * Shrink request scope to what the fs can actually handle. | ||
764 | */ | ||
765 | if (len > maxbytes || (maxbytes - len) < start) | ||
766 | *new_len = maxbytes - start; | ||
767 | |||
768 | return 0; | ||
769 | } | ||
770 | |||
771 | /* So that the fiemap access checks can't overflow on 32 bit machines. */ | ||
772 | #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) | ||
773 | |||
774 | static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) | ||
775 | { | ||
776 | struct fiemap fiemap; | ||
777 | struct fiemap __user *ufiemap = (struct fiemap __user *) arg; | ||
778 | struct fiemap_extent_info fieinfo = { 0, }; | ||
779 | struct inode *inode = file_inode(filp); | ||
780 | struct super_block *sb = inode->i_sb; | ||
781 | u64 len; | ||
782 | int error; | ||
783 | |||
784 | if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) | ||
785 | return -EFAULT; | ||
786 | |||
787 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) | ||
788 | return -EINVAL; | ||
789 | |||
790 | error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, | ||
791 | &len); | ||
792 | if (error) | ||
793 | return error; | ||
794 | |||
795 | fieinfo.fi_flags = fiemap.fm_flags; | ||
796 | fieinfo.fi_extents_max = fiemap.fm_extent_count; | ||
797 | fieinfo.fi_extents_start = ufiemap->fm_extents; | ||
798 | |||
799 | if (fiemap.fm_extent_count != 0 && | ||
800 | !access_ok(fieinfo.fi_extents_start, | ||
801 | fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) | ||
802 | return -EFAULT; | ||
803 | |||
804 | if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) | ||
805 | filemap_write_and_wait(inode->i_mapping); | ||
806 | |||
807 | error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start, len); | ||
808 | fiemap.fm_flags = fieinfo.fi_flags; | ||
809 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; | ||
810 | if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) | ||
811 | error = -EFAULT; | ||
812 | |||
813 | return error; | ||
814 | } | ||
815 | |||
748 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 816 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
749 | { | 817 | { |
750 | struct inode *inode = file_inode(filp); | 818 | struct inode *inode = file_inode(filp); |
@@ -1142,6 +1210,33 @@ resizefs_out: | |||
1142 | return -EOPNOTSUPP; | 1210 | return -EOPNOTSUPP; |
1143 | return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); | 1211 | return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); |
1144 | 1212 | ||
1213 | case EXT4_IOC_CLEAR_ES_CACHE: | ||
1214 | { | ||
1215 | if (!inode_owner_or_capable(inode)) | ||
1216 | return -EACCES; | ||
1217 | ext4_clear_inode_es(inode); | ||
1218 | return 0; | ||
1219 | } | ||
1220 | |||
1221 | case EXT4_IOC_GETSTATE: | ||
1222 | { | ||
1223 | __u32 state = 0; | ||
1224 | |||
1225 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED)) | ||
1226 | state |= EXT4_STATE_FLAG_EXT_PRECACHED; | ||
1227 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) | ||
1228 | state |= EXT4_STATE_FLAG_NEW; | ||
1229 | if (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) | ||
1230 | state |= EXT4_STATE_FLAG_NEWENTRY; | ||
1231 | if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) | ||
1232 | state |= EXT4_STATE_FLAG_DA_ALLOC_CLOSE; | ||
1233 | |||
1234 | return put_user(state, (__u32 __user *) arg); | ||
1235 | } | ||
1236 | |||
1237 | case EXT4_IOC_GET_ES_CACHE: | ||
1238 | return ext4_ioctl_get_es_cache(filp, arg); | ||
1239 | |||
1145 | case EXT4_IOC_FSGETXATTR: | 1240 | case EXT4_IOC_FSGETXATTR: |
1146 | { | 1241 | { |
1147 | struct fsxattr fa; | 1242 | struct fsxattr fa; |
@@ -1278,6 +1373,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
1278 | case FS_IOC_GETFSMAP: | 1373 | case FS_IOC_GETFSMAP: |
1279 | case FS_IOC_ENABLE_VERITY: | 1374 | case FS_IOC_ENABLE_VERITY: |
1280 | case FS_IOC_MEASURE_VERITY: | 1375 | case FS_IOC_MEASURE_VERITY: |
1376 | case EXT4_IOC_CLEAR_ES_CACHE: | ||
1377 | case EXT4_IOC_GETSTATE: | ||
1378 | case EXT4_IOC_GET_ES_CACHE: | ||
1281 | break; | 1379 | break; |
1282 | default: | 1380 | default: |
1283 | return -ENOIOCTLCMD; | 1381 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 129029534075..a427d2031a8d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1312,7 +1312,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, | |||
1312 | { | 1312 | { |
1313 | int len; | 1313 | int len; |
1314 | 1314 | ||
1315 | if (!IS_CASEFOLDED(dir)) { | 1315 | if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) { |
1316 | cf_name->name = NULL; | 1316 | cf_name->name = NULL; |
1317 | return; | 1317 | return; |
1318 | } | 1318 | } |
@@ -2183,7 +2183,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
2183 | 2183 | ||
2184 | #ifdef CONFIG_UNICODE | 2184 | #ifdef CONFIG_UNICODE |
2185 | if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) && | 2185 | if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) && |
2186 | utf8_validate(sbi->s_encoding, &dentry->d_name)) | 2186 | sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name)) |
2187 | return -EINVAL; | 2187 | return -EINVAL; |
2188 | #endif | 2188 | #endif |
2189 | 2189 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3db5f17228b7..dd654e53ba3d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -1878,6 +1878,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1878 | } else if (token == Opt_commit) { | 1878 | } else if (token == Opt_commit) { |
1879 | if (arg == 0) | 1879 | if (arg == 0) |
1880 | arg = JBD2_DEFAULT_MAX_COMMIT_AGE; | 1880 | arg = JBD2_DEFAULT_MAX_COMMIT_AGE; |
1881 | else if (arg > INT_MAX / HZ) { | ||
1882 | ext4_msg(sb, KERN_ERR, | ||
1883 | "Invalid commit interval %d, " | ||
1884 | "must be smaller than %d", | ||
1885 | arg, INT_MAX / HZ); | ||
1886 | return -1; | ||
1887 | } | ||
1881 | sbi->s_commit_interval = HZ * arg; | 1888 | sbi->s_commit_interval = HZ * arg; |
1882 | } else if (token == Opt_debug_want_extra_isize) { | 1889 | } else if (token == Opt_debug_want_extra_isize) { |
1883 | sbi->s_want_extra_isize = arg; | 1890 | sbi->s_want_extra_isize = arg; |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 69b9bc329964..f08073d7bbf5 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -638,10 +638,8 @@ static void flush_descriptor(journal_t *journal, | |||
638 | { | 638 | { |
639 | jbd2_journal_revoke_header_t *header; | 639 | jbd2_journal_revoke_header_t *header; |
640 | 640 | ||
641 | if (is_journal_aborted(journal)) { | 641 | if (is_journal_aborted(journal)) |
642 | put_bh(descriptor); | ||
643 | return; | 642 | return; |
644 | } | ||
645 | 643 | ||
646 | header = (jbd2_journal_revoke_header_t *)descriptor->b_data; | 644 | header = (jbd2_journal_revoke_header_t *)descriptor->b_data; |
647 | header->r_count = cpu_to_be32(offset); | 645 | header->r_count = cpu_to_be32(offset); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 990e7b5062e7..afc06daee5bb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -569,6 +569,9 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, | |||
569 | } | 569 | } |
570 | handle->h_type = type; | 570 | handle->h_type = type; |
571 | handle->h_line_no = line_no; | 571 | handle->h_line_no = line_no; |
572 | trace_jbd2_handle_start(journal->j_fs_dev->bd_dev, | ||
573 | handle->h_transaction->t_tid, type, | ||
574 | line_no, handle->h_buffer_credits); | ||
572 | return 0; | 575 | return 0; |
573 | } | 576 | } |
574 | EXPORT_SYMBOL(jbd2_journal_start_reserved); | 577 | EXPORT_SYMBOL(jbd2_journal_start_reserved); |
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 71ca4d047d65..2a878b739115 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c | |||
@@ -154,7 +154,7 @@ static int utf8_parse_version(const char *version, unsigned int *maj, | |||
154 | { | 154 | { |
155 | substring_t args[3]; | 155 | substring_t args[3]; |
156 | char version_string[12]; | 156 | char version_string[12]; |
157 | const struct match_token token[] = { | 157 | static const struct match_token token[] = { |
158 | {1, "%d.%d.%d"}, | 158 | {1, "%d.%d.%d"}, |
159 | {0, NULL} | 159 | {0, NULL} |
160 | }; | 160 | }; |
diff --git a/fs/unicode/utf8-selftest.c b/fs/unicode/utf8-selftest.c index 6c1a36bbf6ad..6fe8af7edccb 100644 --- a/fs/unicode/utf8-selftest.c +++ b/fs/unicode/utf8-selftest.c | |||
@@ -35,7 +35,7 @@ unsigned int total_tests; | |||
35 | #define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__) | 35 | #define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__) |
36 | #define test(cond) _test(cond, __func__, __LINE__, "") | 36 | #define test(cond) _test(cond, __func__, __LINE__, "") |
37 | 37 | ||
38 | const static struct { | 38 | static const struct { |
39 | /* UTF-8 strings in this vector _must_ be NULL-terminated. */ | 39 | /* UTF-8 strings in this vector _must_ be NULL-terminated. */ |
40 | unsigned char str[10]; | 40 | unsigned char str[10]; |
41 | unsigned char dec[10]; | 41 | unsigned char dec[10]; |
@@ -89,7 +89,7 @@ const static struct { | |||
89 | 89 | ||
90 | }; | 90 | }; |
91 | 91 | ||
92 | const static struct { | 92 | static const struct { |
93 | /* UTF-8 strings in this vector _must_ be NULL-terminated. */ | 93 | /* UTF-8 strings in this vector _must_ be NULL-terminated. */ |
94 | unsigned char str[30]; | 94 | unsigned char str[30]; |
95 | unsigned char ncf[30]; | 95 | unsigned char ncf[30]; |