diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-11 16:23:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-11 16:23:48 -0400 |
commit | fd048088306656824958e7783ffcee27e241b361 (patch) | |
tree | be11bebe3bbd2cac88ff27bd3c7450339d21bdc7 | |
parent | 5c3c4d9b5810c9aabd8c05219c62ca088aa83eb0 (diff) | |
parent | 03010a3350301baac2154fa66de925ae2981b7e3 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (43 commits)
ext4: Rename ext4dev to ext4
ext4: Avoid double dirtying of super block in ext4_put_super()
Update ext4 MAINTAINERS file
Hook ext4 to the vfs fiemap interface.
generic block based fiemap implementation
ocfs2: fiemap support
vfs: vfs-level fiemap interface
ext4: fix xattr deadlock
jbd2: Fix buffer head leak when writing the commit block
ext4: Add debugging markers that can be used by systemtap
jbd2: abort instead of waiting for nonexistent transaction
ext4: fix initialization of UNINIT bitmap blocks
ext4: Remove old legacy block allocator
ext4: Use readahead when reading an inode from the inode table
ext4: Improve the documentation for ext4's /proc tunables
ext4: Combine proc file handling into a single set of functions
ext4: move /proc setup and teardown out of mballoc.c
ext4: Don't use 'struct dentry' for internal lookups
ext4/jbd2: Avoid WARN() messages when failing to write to the superblock
ext4: use percpu data structures for lg_prealloc_list
...
51 files changed, 2581 insertions, 2533 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 0d5394920a31..74484e696405 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -32,9 +32,9 @@ Mailing list: linux-ext4@vger.kernel.org | |||
32 | you will need to merge your changes with the version from e2fsprogs | 32 | you will need to merge your changes with the version from e2fsprogs |
33 | 1.41.x. | 33 | 1.41.x. |
34 | 34 | ||
35 | - Create a new filesystem using the ext4dev filesystem type: | 35 | - Create a new filesystem using the ext4 filesystem type: |
36 | 36 | ||
37 | # mke2fs -t ext4dev /dev/hda1 | 37 | # mke2fs -t ext4 /dev/hda1 |
38 | 38 | ||
39 | Or configure an existing ext3 filesystem to support extents and set | 39 | Or configure an existing ext3 filesystem to support extents and set |
40 | the test_fs flag to indicate that it's ok for an in-development | 40 | the test_fs flag to indicate that it's ok for an in-development |
@@ -47,13 +47,13 @@ Mailing list: linux-ext4@vger.kernel.org | |||
47 | 47 | ||
48 | # tune2fs -I 256 /dev/hda1 | 48 | # tune2fs -I 256 /dev/hda1 |
49 | 49 | ||
50 | (Note: we currently do not have tools to convert an ext4dev | 50 | (Note: we currently do not have tools to convert an ext4 |
51 | filesystem back to ext3; so please do not do try this on production | 51 | filesystem back to ext3; so please do not do try this on production |
52 | filesystems.) | 52 | filesystems.) |
53 | 53 | ||
54 | - Mounting: | 54 | - Mounting: |
55 | 55 | ||
56 | # mount -t ext4dev /dev/hda1 /wherever | 56 | # mount -t ext4 /dev/hda1 /wherever |
57 | 57 | ||
58 | - When comparing performance with other filesystems, remember that | 58 | - When comparing performance with other filesystems, remember that |
59 | ext3/4 by default offers higher data integrity guarantees than most. | 59 | ext3/4 by default offers higher data integrity guarantees than most. |
@@ -177,6 +177,11 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in | |||
177 | your disks are battery-backed in one way or another, | 177 | your disks are battery-backed in one way or another, |
178 | disabling barriers may safely improve performance. | 178 | disabling barriers may safely improve performance. |
179 | 179 | ||
180 | inode_readahead=n This tuning parameter controls the maximum | ||
181 | number of inode table blocks that ext4's inode | ||
182 | table readahead algorithm will pre-read into | ||
183 | the buffer cache. The default value is 32 blocks. | ||
184 | |||
180 | orlov (*) This enables the new Orlov block allocator. It is | 185 | orlov (*) This enables the new Orlov block allocator. It is |
181 | enabled by default. | 186 | enabled by default. |
182 | 187 | ||
@@ -252,6 +257,7 @@ stripe=n Number of filesystem blocks that mballoc will try | |||
252 | delalloc (*) Deferring block allocation until write-out time. | 257 | delalloc (*) Deferring block allocation until write-out time. |
253 | nodelalloc Disable delayed allocation. Blocks are allocation | 258 | nodelalloc Disable delayed allocation. Blocks are allocation |
254 | when data is copied from user to page cache. | 259 | when data is copied from user to page cache. |
260 | |||
255 | Data Mode | 261 | Data Mode |
256 | ========= | 262 | ========= |
257 | There are 3 different data modes: | 263 | There are 3 different data modes: |
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt new file mode 100644 index 000000000000..1e3defcfe50b --- /dev/null +++ b/Documentation/filesystems/fiemap.txt | |||
@@ -0,0 +1,228 @@ | |||
1 | ============ | ||
2 | Fiemap Ioctl | ||
3 | ============ | ||
4 | |||
5 | The fiemap ioctl is an efficient method for userspace to get file | ||
6 | extent mappings. Instead of block-by-block mapping (such as bmap), fiemap | ||
7 | returns a list of extents. | ||
8 | |||
9 | |||
10 | Request Basics | ||
11 | -------------- | ||
12 | |||
13 | A fiemap request is encoded within struct fiemap: | ||
14 | |||
15 | struct fiemap { | ||
16 | __u64 fm_start; /* logical offset (inclusive) at | ||
17 | * which to start mapping (in) */ | ||
18 | __u64 fm_length; /* logical length of mapping which | ||
19 | * userspace cares about (in) */ | ||
20 | __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ | ||
21 | __u32 fm_mapped_extents; /* number of extents that were | ||
22 | * mapped (out) */ | ||
23 | __u32 fm_extent_count; /* size of fm_extents array (in) */ | ||
24 | __u32 fm_reserved; | ||
25 | struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ | ||
26 | }; | ||
27 | |||
28 | |||
29 | fm_start, and fm_length specify the logical range within the file | ||
30 | which the process would like mappings for. Extents returned mirror | ||
31 | those on disk - that is, the logical offset of the 1st returned extent | ||
32 | may start before fm_start, and the range covered by the last returned | ||
33 | extent may end after fm_length. All offsets and lengths are in bytes. | ||
34 | |||
35 | Certain flags to modify the way in which mappings are looked up can be | ||
36 | set in fm_flags. If the kernel doesn't understand some particular | ||
37 | flags, it will return EBADR and the contents of fm_flags will contain | ||
38 | the set of flags which caused the error. If the kernel is compatible | ||
39 | with all flags passed, the contents of fm_flags will be unmodified. | ||
40 | It is up to userspace to determine whether rejection of a particular | ||
41 | flag is fatal to it's operation. This scheme is intended to allow the | ||
42 | fiemap interface to grow in the future but without losing | ||
43 | compatibility with old software. | ||
44 | |||
45 | fm_extent_count specifies the number of elements in the fm_extents[] array | ||
46 | that can be used to return extents. If fm_extent_count is zero, then the | ||
47 | fm_extents[] array is ignored (no extents will be returned), and the | ||
48 | fm_mapped_extents count will hold the number of extents needed in | ||
49 | fm_extents[] to hold the file's current mapping. Note that there is | ||
50 | nothing to prevent the file from changing between calls to FIEMAP. | ||
51 | |||
52 | The following flags can be set in fm_flags: | ||
53 | |||
54 | * FIEMAP_FLAG_SYNC | ||
55 | If this flag is set, the kernel will sync the file before mapping extents. | ||
56 | |||
57 | * FIEMAP_FLAG_XATTR | ||
58 | If this flag is set, the extents returned will describe the inodes | ||
59 | extended attribute lookup tree, instead of it's data tree. | ||
60 | |||
61 | |||
62 | Extent Mapping | ||
63 | -------------- | ||
64 | |||
65 | Extent information is returned within the embedded fm_extents array | ||
66 | which userspace must allocate along with the fiemap structure. The | ||
67 | number of elements in the fiemap_extents[] array should be passed via | ||
68 | fm_extent_count. The number of extents mapped by kernel will be | ||
69 | returned via fm_mapped_extents. If the number of fiemap_extents | ||
70 | allocated is less than would be required to map the requested range, | ||
71 | the maximum number of extents that can be mapped in the fm_extent[] | ||
72 | array will be returned and fm_mapped_extents will be equal to | ||
73 | fm_extent_count. In that case, the last extent in the array will not | ||
74 | complete the requested range and will not have the FIEMAP_EXTENT_LAST | ||
75 | flag set (see the next section on extent flags). | ||
76 | |||
77 | Each extent is described by a single fiemap_extent structure as | ||
78 | returned in fm_extents. | ||
79 | |||
80 | struct fiemap_extent { | ||
81 | __u64 fe_logical; /* logical offset in bytes for the start of | ||
82 | * the extent */ | ||
83 | __u64 fe_physical; /* physical offset in bytes for the start | ||
84 | * of the extent */ | ||
85 | __u64 fe_length; /* length in bytes for the extent */ | ||
86 | __u64 fe_reserved64[2]; | ||
87 | __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ | ||
88 | __u32 fe_reserved[3]; | ||
89 | }; | ||
90 | |||
91 | All offsets and lengths are in bytes and mirror those on disk. It is valid | ||
92 | for an extents logical offset to start before the request or it's logical | ||
93 | length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is | ||
94 | returned, fe_logical, fe_physical, and fe_length will be aligned to the | ||
95 | block size of the file system. With the exception of extents flagged as | ||
96 | FIEMAP_EXTENT_MERGED, adjacent extents will not be merged. | ||
97 | |||
98 | The fe_flags field contains flags which describe the extent returned. | ||
99 | A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in | ||
100 | the file so that the process making fiemap calls can determine when no | ||
101 | more extents are available, without having to call the ioctl again. | ||
102 | |||
103 | Some flags are intentionally vague and will always be set in the | ||
104 | presence of other more specific flags. This way a program looking for | ||
105 | a general property does not have to know all existing and future flags | ||
106 | which imply that property. | ||
107 | |||
108 | For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL | ||
109 | are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking | ||
110 | for inline or tail-packed data can key on the specific flag. Software | ||
111 | which simply cares not to try operating on non-aligned extents | ||
112 | however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to | ||
113 | worry about all present and future flags which might imply unaligned | ||
114 | data. Note that the opposite is not true - it would be valid for | ||
115 | FIEMAP_EXTENT_NOT_ALIGNED to appear alone. | ||
116 | |||
117 | * FIEMAP_EXTENT_LAST | ||
118 | This is the last extent in the file. A mapping attempt past this | ||
119 | extent will return nothing. | ||
120 | |||
121 | * FIEMAP_EXTENT_UNKNOWN | ||
122 | The location of this extent is currently unknown. This may indicate | ||
123 | the data is stored on an inaccessible volume or that no storage has | ||
124 | been allocated for the file yet. | ||
125 | |||
126 | * FIEMAP_EXTENT_DELALLOC | ||
127 | - This will also set FIEMAP_EXTENT_UNKNOWN. | ||
128 | Delayed allocation - while there is data for this extent, it's | ||
129 | physical location has not been allocated yet. | ||
130 | |||
131 | * FIEMAP_EXTENT_ENCODED | ||
132 | This extent does not consist of plain filesystem blocks but is | ||
133 | encoded (e.g. encrypted or compressed). Reading the data in this | ||
134 | extent via I/O to the block device will have undefined results. | ||
135 | |||
136 | Note that it is *always* undefined to try to update the data | ||
137 | in-place by writing to the indicated location without the | ||
138 | assistance of the filesystem, or to access the data using the | ||
139 | information returned by the FIEMAP interface while the filesystem | ||
140 | is mounted. In other words, user applications may only read the | ||
141 | extent data via I/O to the block device while the filesystem is | ||
142 | unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is | ||
143 | clear; user applications must not try reading or writing to the | ||
144 | filesystem via the block device under any other circumstances. | ||
145 | |||
146 | * FIEMAP_EXTENT_DATA_ENCRYPTED | ||
147 | - This will also set FIEMAP_EXTENT_ENCODED | ||
148 | The data in this extent has been encrypted by the file system. | ||
149 | |||
150 | * FIEMAP_EXTENT_NOT_ALIGNED | ||
151 | Extent offsets and length are not guaranteed to be block aligned. | ||
152 | |||
153 | * FIEMAP_EXTENT_DATA_INLINE | ||
154 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
155 | Data is located within a meta data block. | ||
156 | |||
157 | * FIEMAP_EXTENT_DATA_TAIL | ||
158 | This will also set FIEMAP_EXTENT_NOT_ALIGNED | ||
159 | Data is packed into a block with data from other files. | ||
160 | |||
161 | * FIEMAP_EXTENT_UNWRITTEN | ||
162 | Unwritten extent - the extent is allocated but it's data has not been | ||
163 | initialized. This indicates the extent's data will be all zero if read | ||
164 | through the filesystem but the contents are undefined if read directly from | ||
165 | the device. | ||
166 | |||
167 | * FIEMAP_EXTENT_MERGED | ||
168 | This will be set when a file does not support extents, i.e., it uses a block | ||
169 | based addressing scheme. Since returning an extent for each block back to | ||
170 | userspace would be highly inefficient, the kernel will try to merge most | ||
171 | adjacent blocks into 'extents'. | ||
172 | |||
173 | |||
174 | VFS -> File System Implementation | ||
175 | --------------------------------- | ||
176 | |||
177 | File systems wishing to support fiemap must implement a ->fiemap callback on | ||
178 | their inode_operations structure. The fs ->fiemap call is responsible for | ||
179 | defining it's set of supported fiemap flags, and calling a helper function on | ||
180 | each discovered extent: | ||
181 | |||
182 | struct inode_operations { | ||
183 | ... | ||
184 | |||
185 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | ||
186 | u64 len); | ||
187 | |||
188 | ->fiemap is passed struct fiemap_extent_info which describes the | ||
189 | fiemap request: | ||
190 | |||
191 | struct fiemap_extent_info { | ||
192 | unsigned int fi_flags; /* Flags as passed from user */ | ||
193 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | ||
194 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | ||
195 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ | ||
196 | }; | ||
197 | |||
198 | It is intended that the file system should not need to access any of this | ||
199 | structure directly. | ||
200 | |||
201 | |||
202 | Flag checking should be done at the beginning of the ->fiemap callback via the | ||
203 | fiemap_check_flags() helper: | ||
204 | |||
205 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | ||
206 | |||
207 | The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The | ||
208 | set of fiemap flags which the fs understands should be passed via fs_flags. If | ||
209 | fiemap_check_flags finds invalid user flags, it will place the bad values in | ||
210 | fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from | ||
211 | fiemap_check_flags(), it should immediately exit, returning that error back to | ||
212 | ioctl_fiemap(). | ||
213 | |||
214 | |||
215 | For each extent in the request range, the file system should call | ||
216 | the helper function, fiemap_fill_next_extent(): | ||
217 | |||
218 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | ||
219 | u64 phys, u64 len, u32 flags, u32 dev); | ||
220 | |||
221 | fiemap_fill_next_extent() will use the passed values to populate the | ||
222 | next free extent in the fm_extents array. 'General' extent flags will | ||
223 | automatically be set from specific flags on behalf of the calling file | ||
224 | system so that the userspace API is not broken. | ||
225 | |||
226 | fiemap_fill_next_extent() returns 0 on success, and 1 when the | ||
227 | user-supplied fm_extents array is full. If an error is encountered | ||
228 | while copying the extent to user memory, -EFAULT will be returned. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f566ad9bcb7b..d831d24d2a6c 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -923,45 +923,44 @@ CPUs. | |||
923 | The "procs_blocked" line gives the number of processes currently blocked, | 923 | The "procs_blocked" line gives the number of processes currently blocked, |
924 | waiting for I/O to complete. | 924 | waiting for I/O to complete. |
925 | 925 | ||
926 | |||
926 | 1.9 Ext4 file system parameters | 927 | 1.9 Ext4 file system parameters |
927 | ------------------------------ | 928 | ------------------------------ |
928 | Ext4 file system have one directory per partition under /proc/fs/ext4/ | 929 | |
929 | # ls /proc/fs/ext4/hdc/ | 930 | Information about mounted ext4 file systems can be found in |
930 | group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req | 931 | /proc/fs/ext4. Each mounted filesystem will have a directory in |
931 | stats stream_req | 932 | /proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or |
932 | 933 | /proc/fs/ext4/dm-0). The files in each per-device directory are shown | |
933 | mb_groups: | 934 | in Table 1-10, below. |
934 | This file gives the details of multiblock allocator buddy cache of free blocks | 935 | |
935 | 936 | Table 1-10: Files in /proc/fs/ext4/<devname> | |
936 | mb_history: | 937 | .............................................................................. |
937 | Multiblock allocation history. | 938 | File Content |
938 | 939 | mb_groups details of multiblock allocator buddy cache of free blocks | |
939 | stats: | 940 | mb_history multiblock allocation history |
940 | This file indicate whether the multiblock allocator should start collecting | 941 | stats controls whether the multiblock allocator should start |
941 | statistics. The statistics are shown during unmount | 942 | collecting statistics, which are shown during the unmount |
942 | 943 | group_prealloc the multiblock allocator will round up allocation | |
943 | group_prealloc: | 944 | requests to a multiple of this tuning parameter if the |
944 | The multiblock allocator normalize the block allocation request to | 945 | stripe size is not set in the ext4 superblock |
945 | group_prealloc filesystem blocks if we don't have strip value set. | 946 | max_to_scan The maximum number of extents the multiblock allocator |
946 | The stripe value can be specified at mount time or during mke2fs. | 947 | will search to find the best extent |
947 | 948 | min_to_scan The minimum number of extents the multiblock allocator | |
948 | max_to_scan: | 949 | will search to find the best extent |
949 | How long multiblock allocator can look for a best extent (in found extents) | 950 | order2_req Tuning parameter which controls the minimum size for |
950 | 951 | requests (as a power of 2) where the buddy cache is | |
951 | min_to_scan: | 952 | used |
952 | How long multiblock allocator must look for a best extent | 953 | stream_req Files which have fewer blocks than this tunable |
953 | 954 | parameter will have their blocks allocated out of a | |
954 | order2_req: | 955 | block group specific preallocation pool, so that small |
955 | Multiblock allocator use 2^N search using buddies only for requests greater | 956 | files are packed closely together. Each large file |
956 | than or equal to order2_req. The request size is specfied in file system | 957 | will have its blocks allocated out of its own unique |
957 | blocks. A value of 2 indicate only if the requests are greater than or equal | 958 | preallocation pool. |
958 | to 4 blocks. | 959 | inode_readahead Tuning parameter which controls the maximum number of |
959 | 960 | inode table blocks that ext4's inode table readahead | |
960 | stream_req: | 961 | algorithm will pre-read into the buffer cache |
961 | Files smaller than stream_req are served by the stream allocator, whose | 962 | .............................................................................. |
962 | purpose is to pack requests as close each to other as possible to | 963 | |
963 | produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16 | ||
964 | filesystem block size will use group based preallocation. | ||
965 | 964 | ||
966 | ------------------------------------------------------------------------------ | 965 | ------------------------------------------------------------------------------ |
967 | Summary | 966 | Summary |
diff --git a/MAINTAINERS b/MAINTAINERS index 68781ed2b734..587f418ed00d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1659,9 +1659,10 @@ L: linux-ext4@vger.kernel.org | |||
1659 | S: Maintained | 1659 | S: Maintained |
1660 | 1660 | ||
1661 | EXT4 FILE SYSTEM | 1661 | EXT4 FILE SYSTEM |
1662 | P: Stephen Tweedie, Andrew Morton | 1662 | P: Theodore Ts'o |
1663 | M: sct@redhat.com, akpm@linux-foundation.org, adilger@sun.com | 1663 | M: tytso@mit.edu, adilger@sun.com |
1664 | L: linux-ext4@vger.kernel.org | 1664 | L: linux-ext4@vger.kernel.org |
1665 | W: http://ext4.wiki.kernel.org | ||
1665 | S: Maintained | 1666 | S: Maintained |
1666 | 1667 | ||
1667 | F71805F HARDWARE MONITORING DRIVER | 1668 | F71805F HARDWARE MONITORING DRIVER |
diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a8..40183d94b683 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -136,37 +136,51 @@ config EXT3_FS_SECURITY | |||
136 | If you are not using a security module that requires using | 136 | If you are not using a security module that requires using |
137 | extended attributes for file security labels, say N. | 137 | extended attributes for file security labels, say N. |
138 | 138 | ||
139 | config EXT4DEV_FS | 139 | config EXT4_FS |
140 | tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" | 140 | tristate "The Extended 4 (ext4) filesystem" |
141 | depends on EXPERIMENTAL | ||
142 | select JBD2 | 141 | select JBD2 |
143 | select CRC16 | 142 | select CRC16 |
144 | help | 143 | help |
145 | Ext4dev is a predecessor filesystem of the next generation | 144 | This is the next generation of the ext3 filesystem. |
146 | extended fs ext4, based on ext3 filesystem code. It will be | ||
147 | renamed ext4 fs later, once ext4dev is mature and stabilized. | ||
148 | 145 | ||
149 | Unlike the change from ext2 filesystem to ext3 filesystem, | 146 | Unlike the change from ext2 filesystem to ext3 filesystem, |
150 | the on-disk format of ext4dev is not the same as ext3 any more: | 147 | the on-disk format of ext4 is not forwards compatible with |
151 | it is based on extent maps and it supports 48-bit physical block | 148 | ext3; it is based on extent maps and it supports 48-bit |
152 | numbers. These combined on-disk format changes will allow | 149 | physical block numbers. The ext4 filesystem also supports delayed |
153 | ext4dev/ext4 to handle more than 16 TB filesystem volumes -- | 150 | allocation, persistent preallocation, high resolution time stamps, |
154 | a hard limit that ext3 cannot overcome without changing the | 151 | and a number of other features to improve performance and speed |
155 | on-disk format. | 152 | up fsck time. For more information, please see the web pages at |
156 | 153 | http://ext4.wiki.kernel.org. | |
157 | Other than extent maps and 48-bit block numbers, ext4dev also is | 154 | |
158 | likely to have other new features such as persistent preallocation, | 155 | The ext4 filesystem will support mounting an ext3 |
159 | high resolution time stamps, and larger file support etc. These | 156 | filesystem; while there will be some performance gains from |
160 | features will be added to ext4dev gradually. | 157 | the delayed allocation and inode table readahead, the best |
158 | performance gains will require enabling ext4 features in the | ||
159 | filesystem, or formating a new filesystem as an ext4 | ||
160 | filesystem initially. | ||
161 | 161 | ||
162 | To compile this file system support as a module, choose M here. The | 162 | To compile this file system support as a module, choose M here. The |
163 | module will be called ext4dev. | 163 | module will be called ext4dev. |
164 | 164 | ||
165 | If unsure, say N. | 165 | If unsure, say N. |
166 | 166 | ||
167 | config EXT4DEV_FS_XATTR | 167 | config EXT4DEV_COMPAT |
168 | bool "Ext4dev extended attributes" | 168 | bool "Enable ext4dev compatibility" |
169 | depends on EXT4DEV_FS | 169 | depends on EXT4_FS |
170 | help | ||
171 | Starting with 2.6.28, the name of the ext4 filesystem was | ||
172 | renamed from ext4dev to ext4. Unfortunately there are some | ||
173 | lagecy userspace programs (such as klibc's fstype) have | ||
174 | "ext4dev" hardcoded. | ||
175 | |||
176 | To enable backwards compatibility so that systems that are | ||
177 | still expecting to mount ext4 filesystems using ext4dev, | ||
178 | chose Y here. This feature will go away by 2.6.31, so | ||
179 | please arrange to get your userspace programs fixed! | ||
180 | |||
181 | config EXT4_FS_XATTR | ||
182 | bool "Ext4 extended attributes" | ||
183 | depends on EXT4_FS | ||
170 | default y | 184 | default y |
171 | help | 185 | help |
172 | Extended attributes are name:value pairs associated with inodes by | 186 | Extended attributes are name:value pairs associated with inodes by |
@@ -175,11 +189,11 @@ config EXT4DEV_FS_XATTR | |||
175 | 189 | ||
176 | If unsure, say N. | 190 | If unsure, say N. |
177 | 191 | ||
178 | You need this for POSIX ACL support on ext4dev/ext4. | 192 | You need this for POSIX ACL support on ext4. |
179 | 193 | ||
180 | config EXT4DEV_FS_POSIX_ACL | 194 | config EXT4_FS_POSIX_ACL |
181 | bool "Ext4dev POSIX Access Control Lists" | 195 | bool "Ext4 POSIX Access Control Lists" |
182 | depends on EXT4DEV_FS_XATTR | 196 | depends on EXT4_FS_XATTR |
183 | select FS_POSIX_ACL | 197 | select FS_POSIX_ACL |
184 | help | 198 | help |
185 | POSIX Access Control Lists (ACLs) support permissions for users and | 199 | POSIX Access Control Lists (ACLs) support permissions for users and |
@@ -190,14 +204,14 @@ config EXT4DEV_FS_POSIX_ACL | |||
190 | 204 | ||
191 | If you don't know what Access Control Lists are, say N | 205 | If you don't know what Access Control Lists are, say N |
192 | 206 | ||
193 | config EXT4DEV_FS_SECURITY | 207 | config EXT4_FS_SECURITY |
194 | bool "Ext4dev Security Labels" | 208 | bool "Ext4 Security Labels" |
195 | depends on EXT4DEV_FS_XATTR | 209 | depends on EXT4_FS_XATTR |
196 | help | 210 | help |
197 | Security labels support alternative access control models | 211 | Security labels support alternative access control models |
198 | implemented by security modules like SELinux. This option | 212 | implemented by security modules like SELinux. This option |
199 | enables an extended attribute handler for file security | 213 | enables an extended attribute handler for file security |
200 | labels in the ext4dev/ext4 filesystem. | 214 | labels in the ext4 filesystem. |
201 | 215 | ||
202 | If you are not using a security module that requires using | 216 | If you are not using a security module that requires using |
203 | extended attributes for file security labels, say N. | 217 | extended attributes for file security labels, say N. |
@@ -240,22 +254,22 @@ config JBD2 | |||
240 | help | 254 | help |
241 | This is a generic journaling layer for block devices that support | 255 | This is a generic journaling layer for block devices that support |
242 | both 32-bit and 64-bit block numbers. It is currently used by | 256 | both 32-bit and 64-bit block numbers. It is currently used by |
243 | the ext4dev/ext4 filesystem, but it could also be used to add | 257 | the ext4 filesystem, but it could also be used to add |
244 | journal support to other file systems or block devices such | 258 | journal support to other file systems or block devices such |
245 | as RAID or LVM. | 259 | as RAID or LVM. |
246 | 260 | ||
247 | If you are using ext4dev/ext4, you need to say Y here. If you are not | 261 | If you are using ext4, you need to say Y here. If you are not |
248 | using ext4dev/ext4 then you will probably want to say N. | 262 | using ext4 then you will probably want to say N. |
249 | 263 | ||
250 | To compile this device as a module, choose M here. The module will be | 264 | To compile this device as a module, choose M here. The module will be |
251 | called jbd2. If you are compiling ext4dev/ext4 into the kernel, | 265 | called jbd2. If you are compiling ext4 into the kernel, |
252 | you cannot compile this code as a module. | 266 | you cannot compile this code as a module. |
253 | 267 | ||
254 | config JBD2_DEBUG | 268 | config JBD2_DEBUG |
255 | bool "JBD2 (ext4dev/ext4) debugging support" | 269 | bool "JBD2 (ext4) debugging support" |
256 | depends on JBD2 && DEBUG_FS | 270 | depends on JBD2 && DEBUG_FS |
257 | help | 271 | help |
258 | If you are using the ext4dev/ext4 journaled file system (or | 272 | If you are using the ext4 journaled file system (or |
259 | potentially any other filesystem/device using JBD2), this option | 273 | potentially any other filesystem/device using JBD2), this option |
260 | allows you to enable debugging output while the system is running, | 274 | allows you to enable debugging output while the system is running, |
261 | in order to help track down any problems you are having. | 275 | in order to help track down any problems you are having. |
@@ -270,9 +284,9 @@ config JBD2_DEBUG | |||
270 | config FS_MBCACHE | 284 | config FS_MBCACHE |
271 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) | 285 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) |
272 | tristate | 286 | tristate |
273 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR | 287 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR |
274 | default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y | 288 | default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y |
275 | default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m | 289 | default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m |
276 | 290 | ||
277 | config REISERFS_FS | 291 | config REISERFS_FS |
278 | tristate "Reiserfs support" | 292 | tristate "Reiserfs support" |
diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff15..de404b00eb0c 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -69,7 +69,7 @@ obj-$(CONFIG_DLM) += dlm/ | |||
69 | # Do not add any filesystems before this line | 69 | # Do not add any filesystems before this line |
70 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 70 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
71 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 71 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
72 | obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev | 72 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev |
73 | obj-$(CONFIG_JBD) += jbd/ | 73 | obj-$(CONFIG_JBD) += jbd/ |
74 | obj-$(CONFIG_JBD2) += jbd2/ | 74 | obj-$(CONFIG_JBD2) += jbd2/ |
75 | obj-$(CONFIG_EXT2_FS) += ext2/ | 75 | obj-$(CONFIG_EXT2_FS) += ext2/ |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 47d88da2d33b..bae998c1e44e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *); | |||
133 | extern int ext2_setattr (struct dentry *, struct iattr *); | 133 | extern int ext2_setattr (struct dentry *, struct iattr *); |
134 | extern void ext2_set_inode_flags(struct inode *inode); | 134 | extern void ext2_set_inode_flags(struct inode *inode); |
135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); | 135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); |
136 | extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
137 | u64 start, u64 len); | ||
136 | int __ext2_write_begin(struct file *file, struct address_space *mapping, | 138 | int __ext2_write_begin(struct file *file, struct address_space *mapping, |
137 | loff_t pos, unsigned len, unsigned flags, | 139 | loff_t pos, unsigned len, unsigned flags, |
138 | struct page **pagep, void **fsdata); | 140 | struct page **pagep, void **fsdata); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5f2fa9c36293..45ed07122182 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = { | |||
86 | #endif | 86 | #endif |
87 | .setattr = ext2_setattr, | 87 | .setattr = ext2_setattr, |
88 | .permission = ext2_permission, | 88 | .permission = ext2_permission, |
89 | .fiemap = ext2_fiemap, | ||
89 | }; | 90 | }; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 991d6dfeb51f..7658b33e2653 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/writeback.h> | 31 | #include <linux/writeback.h> |
32 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
33 | #include <linux/mpage.h> | 33 | #include <linux/mpage.h> |
34 | #include <linux/fiemap.h> | ||
34 | #include "ext2.h" | 35 | #include "ext2.h" |
35 | #include "acl.h" | 36 | #include "acl.h" |
36 | #include "xip.h" | 37 | #include "xip.h" |
@@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ | |||
704 | 705 | ||
705 | } | 706 | } |
706 | 707 | ||
708 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
709 | u64 start, u64 len) | ||
710 | { | ||
711 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
712 | ext2_get_block); | ||
713 | } | ||
714 | |||
707 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) | 715 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) |
708 | { | 716 | { |
709 | return block_write_full_page(page, ext2_get_block, wbc); | 717 | return block_write_full_page(page, ext2_get_block, wbc); |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index acc4913d3019..3be1e0689c9a 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = { | |||
134 | .removexattr = generic_removexattr, | 134 | .removexattr = generic_removexattr, |
135 | #endif | 135 | #endif |
136 | .permission = ext3_permission, | 136 | .permission = ext3_permission, |
137 | .fiemap = ext3_fiemap, | ||
137 | }; | 138 | }; |
138 | 139 | ||
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b111..ebfec4d0148e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | ||
39 | #include "xattr.h" | 40 | #include "xattr.h" |
40 | #include "acl.h" | 41 | #include "acl.h" |
41 | 42 | ||
@@ -981,6 +982,13 @@ out: | |||
981 | return ret; | 982 | return ret; |
982 | } | 983 | } |
983 | 984 | ||
985 | int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
986 | u64 start, u64 len) | ||
987 | { | ||
988 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
989 | ext3_get_block); | ||
990 | } | ||
991 | |||
984 | /* | 992 | /* |
985 | * `handle' can be NULL if create is zero | 993 | * `handle' can be NULL if create is zero |
986 | */ | 994 | */ |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ac6fa8ca0a2f..a8ff003a00f7 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -2,12 +2,12 @@ | |||
2 | # Makefile for the linux ext4-filesystem routines. | 2 | # Makefile for the linux ext4-filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o | 9 | ext4_jbd2.o migrate.o mballoc.o |
10 | 10 | ||
11 | ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
13 | ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o | 13 | ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cd2b855a07d6..cb45257a246e 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size) | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl | 56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl |
57 | if the ACL has not been cached */ | 57 | if the ACL has not been cached */ |
58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) | 58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) |
59 | 59 | ||
60 | /* acl.c */ | 60 | /* acl.c */ |
61 | extern int ext4_permission (struct inode *, int); | 61 | extern int ext4_permission(struct inode *, int); |
62 | extern int ext4_acl_chmod (struct inode *); | 62 | extern int ext4_acl_chmod(struct inode *); |
63 | extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); | 63 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
64 | 64 | ||
65 | #else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 65 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ |
66 | #include <linux/sched.h> | 66 | #include <linux/sched.h> |
67 | #define ext4_permission NULL | 67 | #define ext4_permission NULL |
68 | 68 | ||
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
77 | { | 77 | { |
78 | return 0; | 78 | return 0; |
79 | } | 79 | } |
80 | #endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 80 | #endif /* CONFIG_EXT4_FS_POSIX_ACL */ |
81 | 81 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e9fa960ba6da..bd2ece228827 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, | |||
83 | } | 83 | } |
84 | return used_blocks; | 84 | return used_blocks; |
85 | } | 85 | } |
86 | |||
86 | /* Initializes an uninitialized block bitmap if given, and returns the | 87 | /* Initializes an uninitialized block bitmap if given, and returns the |
87 | * number of blocks free in the group. */ | 88 | * number of blocks free in the group. */ |
88 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 89 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, |
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
132 | */ | 133 | */ |
133 | group_blocks = ext4_blocks_count(sbi->s_es) - | 134 | group_blocks = ext4_blocks_count(sbi->s_es) - |
134 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 135 | le32_to_cpu(sbi->s_es->s_first_data_block) - |
135 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); | 136 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); |
136 | } else { | 137 | } else { |
137 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 138 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
138 | } | 139 | } |
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
200 | * @bh: pointer to the buffer head to store the block | 201 | * @bh: pointer to the buffer head to store the block |
201 | * group descriptor | 202 | * group descriptor |
202 | */ | 203 | */ |
203 | struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 204 | struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, |
204 | ext4_group_t block_group, | 205 | ext4_group_t block_group, |
205 | struct buffer_head ** bh) | 206 | struct buffer_head **bh) |
206 | { | 207 | { |
207 | unsigned long group_desc; | 208 | unsigned long group_desc; |
208 | unsigned long offset; | 209 | unsigned long offset; |
209 | struct ext4_group_desc * desc; | 210 | struct ext4_group_desc *desc; |
210 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 211 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
211 | 212 | ||
212 | if (block_group >= sbi->s_groups_count) { | 213 | if (block_group >= sbi->s_groups_count) { |
213 | ext4_error (sb, "ext4_get_group_desc", | 214 | ext4_error(sb, "ext4_get_group_desc", |
214 | "block_group >= groups_count - " | 215 | "block_group >= groups_count - " |
215 | "block_group = %lu, groups_count = %lu", | 216 | "block_group = %lu, groups_count = %lu", |
216 | block_group, sbi->s_groups_count); | 217 | block_group, sbi->s_groups_count); |
217 | 218 | ||
218 | return NULL; | 219 | return NULL; |
219 | } | 220 | } |
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
222 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 223 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
223 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 224 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
224 | if (!sbi->s_group_desc[group_desc]) { | 225 | if (!sbi->s_group_desc[group_desc]) { |
225 | ext4_error (sb, "ext4_get_group_desc", | 226 | ext4_error(sb, "ext4_get_group_desc", |
226 | "Group descriptor not loaded - " | 227 | "Group descriptor not loaded - " |
227 | "block_group = %lu, group_desc = %lu, desc = %lu", | 228 | "block_group = %lu, group_desc = %lu, desc = %lu", |
228 | block_group, group_desc, offset); | 229 | block_group, group_desc, offset); |
229 | return NULL; | 230 | return NULL; |
230 | } | 231 | } |
231 | 232 | ||
@@ -302,8 +303,8 @@ err_out: | |||
302 | struct buffer_head * | 303 | struct buffer_head * |
303 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 304 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
304 | { | 305 | { |
305 | struct ext4_group_desc * desc; | 306 | struct ext4_group_desc *desc; |
306 | struct buffer_head * bh = NULL; | 307 | struct buffer_head *bh = NULL; |
307 | ext4_fsblk_t bitmap_blk; | 308 | ext4_fsblk_t bitmap_blk; |
308 | 309 | ||
309 | desc = ext4_get_group_desc(sb, block_group, NULL); | 310 | desc = ext4_get_group_desc(sb, block_group, NULL); |
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
318 | block_group, bitmap_blk); | 319 | block_group, bitmap_blk); |
319 | return NULL; | 320 | return NULL; |
320 | } | 321 | } |
321 | if (bh_uptodate_or_lock(bh)) | 322 | if (buffer_uptodate(bh) && |
323 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
322 | return bh; | 324 | return bh; |
323 | 325 | ||
326 | lock_buffer(bh); | ||
324 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 327 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
325 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 328 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
326 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 329 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
345 | */ | 348 | */ |
346 | return bh; | 349 | return bh; |
347 | } | 350 | } |
348 | /* | ||
349 | * The reservation window structure operations | ||
350 | * -------------------------------------------- | ||
351 | * Operations include: | ||
352 | * dump, find, add, remove, is_empty, find_next_reservable_window, etc. | ||
353 | * | ||
354 | * We use a red-black tree to represent per-filesystem reservation | ||
355 | * windows. | ||
356 | * | ||
357 | */ | ||
358 | |||
359 | /** | ||
360 | * __rsv_window_dump() -- Dump the filesystem block allocation reservation map | ||
361 | * @rb_root: root of per-filesystem reservation rb tree | ||
362 | * @verbose: verbose mode | ||
363 | * @fn: function which wishes to dump the reservation map | ||
364 | * | ||
365 | * If verbose is turned on, it will print the whole block reservation | ||
366 | * windows(start, end). Otherwise, it will only print out the "bad" windows, | ||
367 | * those windows that overlap with their immediate neighbors. | ||
368 | */ | ||
369 | #if 1 | ||
370 | static void __rsv_window_dump(struct rb_root *root, int verbose, | ||
371 | const char *fn) | ||
372 | { | ||
373 | struct rb_node *n; | ||
374 | struct ext4_reserve_window_node *rsv, *prev; | ||
375 | int bad; | ||
376 | |||
377 | restart: | ||
378 | n = rb_first(root); | ||
379 | bad = 0; | ||
380 | prev = NULL; | ||
381 | |||
382 | printk("Block Allocation Reservation Windows Map (%s):\n", fn); | ||
383 | while (n) { | ||
384 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
385 | if (verbose) | ||
386 | printk("reservation window 0x%p " | ||
387 | "start: %llu, end: %llu\n", | ||
388 | rsv, rsv->rsv_start, rsv->rsv_end); | ||
389 | if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { | ||
390 | printk("Bad reservation %p (start >= end)\n", | ||
391 | rsv); | ||
392 | bad = 1; | ||
393 | } | ||
394 | if (prev && prev->rsv_end >= rsv->rsv_start) { | ||
395 | printk("Bad reservation %p (prev->end >= start)\n", | ||
396 | rsv); | ||
397 | bad = 1; | ||
398 | } | ||
399 | if (bad) { | ||
400 | if (!verbose) { | ||
401 | printk("Restarting reservation walk in verbose mode\n"); | ||
402 | verbose = 1; | ||
403 | goto restart; | ||
404 | } | ||
405 | } | ||
406 | n = rb_next(n); | ||
407 | prev = rsv; | ||
408 | } | ||
409 | printk("Window map complete.\n"); | ||
410 | BUG_ON(bad); | ||
411 | } | ||
412 | #define rsv_window_dump(root, verbose) \ | ||
413 | __rsv_window_dump((root), (verbose), __func__) | ||
414 | #else | ||
415 | #define rsv_window_dump(root, verbose) do {} while (0) | ||
416 | #endif | ||
417 | |||
418 | /** | ||
419 | * goal_in_my_reservation() | ||
420 | * @rsv: inode's reservation window | ||
421 | * @grp_goal: given goal block relative to the allocation block group | ||
422 | * @group: the current allocation block group | ||
423 | * @sb: filesystem super block | ||
424 | * | ||
425 | * Test if the given goal block (group relative) is within the file's | ||
426 | * own block reservation window range. | ||
427 | * | ||
428 | * If the reservation window is outside the goal allocation group, return 0; | ||
429 | * grp_goal (given goal block) could be -1, which means no specific | ||
430 | * goal block. In this case, always return 1. | ||
431 | * If the goal block is within the reservation window, return 1; | ||
432 | * otherwise, return 0; | ||
433 | */ | ||
434 | static int | ||
435 | goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, | ||
436 | ext4_group_t group, struct super_block *sb) | ||
437 | { | ||
438 | ext4_fsblk_t group_first_block, group_last_block; | ||
439 | |||
440 | group_first_block = ext4_group_first_block_no(sb, group); | ||
441 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
442 | |||
443 | if ((rsv->_rsv_start > group_last_block) || | ||
444 | (rsv->_rsv_end < group_first_block)) | ||
445 | return 0; | ||
446 | if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) | ||
447 | || (grp_goal + group_first_block > rsv->_rsv_end))) | ||
448 | return 0; | ||
449 | return 1; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * search_reserve_window() | ||
454 | * @rb_root: root of reservation tree | ||
455 | * @goal: target allocation block | ||
456 | * | ||
457 | * Find the reserved window which includes the goal, or the previous one | ||
458 | * if the goal is not in any window. | ||
459 | * Returns NULL if there are no windows or if all windows start after the goal. | ||
460 | */ | ||
461 | static struct ext4_reserve_window_node * | ||
462 | search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) | ||
463 | { | ||
464 | struct rb_node *n = root->rb_node; | ||
465 | struct ext4_reserve_window_node *rsv; | ||
466 | |||
467 | if (!n) | ||
468 | return NULL; | ||
469 | |||
470 | do { | ||
471 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
472 | |||
473 | if (goal < rsv->rsv_start) | ||
474 | n = n->rb_left; | ||
475 | else if (goal > rsv->rsv_end) | ||
476 | n = n->rb_right; | ||
477 | else | ||
478 | return rsv; | ||
479 | } while (n); | ||
480 | /* | ||
481 | * We've fallen off the end of the tree: the goal wasn't inside | ||
482 | * any particular node. OK, the previous node must be to one | ||
483 | * side of the interval containing the goal. If it's the RHS, | ||
484 | * we need to back up one. | ||
485 | */ | ||
486 | if (rsv->rsv_start > goal) { | ||
487 | n = rb_prev(&rsv->rsv_node); | ||
488 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
489 | } | ||
490 | return rsv; | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. | ||
495 | * @sb: super block | ||
496 | * @rsv: reservation window to add | ||
497 | * | ||
498 | * Must be called with rsv_lock hold. | ||
499 | */ | ||
500 | void ext4_rsv_window_add(struct super_block *sb, | ||
501 | struct ext4_reserve_window_node *rsv) | ||
502 | { | ||
503 | struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; | ||
504 | struct rb_node *node = &rsv->rsv_node; | ||
505 | ext4_fsblk_t start = rsv->rsv_start; | ||
506 | |||
507 | struct rb_node ** p = &root->rb_node; | ||
508 | struct rb_node * parent = NULL; | ||
509 | struct ext4_reserve_window_node *this; | ||
510 | |||
511 | while (*p) | ||
512 | { | ||
513 | parent = *p; | ||
514 | this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); | ||
515 | |||
516 | if (start < this->rsv_start) | ||
517 | p = &(*p)->rb_left; | ||
518 | else if (start > this->rsv_end) | ||
519 | p = &(*p)->rb_right; | ||
520 | else { | ||
521 | rsv_window_dump(root, 1); | ||
522 | BUG(); | ||
523 | } | ||
524 | } | ||
525 | |||
526 | rb_link_node(node, parent, p); | ||
527 | rb_insert_color(node, root); | ||
528 | } | ||
529 | |||
530 | /** | ||
531 | * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree | ||
532 | * @sb: super block | ||
533 | * @rsv: reservation window to remove | ||
534 | * | ||
535 | * Mark the block reservation window as not allocated, and unlink it | ||
536 | * from the filesystem reservation window rb tree. Must be called with | ||
537 | * rsv_lock hold. | ||
538 | */ | ||
539 | static void rsv_window_remove(struct super_block *sb, | ||
540 | struct ext4_reserve_window_node *rsv) | ||
541 | { | ||
542 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
543 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
544 | rsv->rsv_alloc_hit = 0; | ||
545 | rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); | ||
546 | } | ||
547 | |||
548 | /* | ||
549 | * rsv_is_empty() -- Check if the reservation window is allocated. | ||
550 | * @rsv: given reservation window to check | ||
551 | * | ||
552 | * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. | ||
553 | */ | ||
554 | static inline int rsv_is_empty(struct ext4_reserve_window *rsv) | ||
555 | { | ||
556 | /* a valid reservation end block could not be 0 */ | ||
557 | return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
558 | } | ||
559 | |||
560 | /** | ||
561 | * ext4_init_block_alloc_info() | ||
562 | * @inode: file inode structure | ||
563 | * | ||
564 | * Allocate and initialize the reservation window structure, and | ||
565 | * link the window to the ext4 inode structure at last | ||
566 | * | ||
567 | * The reservation window structure is only dynamically allocated | ||
568 | * and linked to ext4 inode the first time the open file | ||
569 | * needs a new block. So, before every ext4_new_block(s) call, for | ||
570 | * regular files, we should check whether the reservation window | ||
571 | * structure exists or not. In the latter case, this function is called. | ||
572 | * Fail to do so will result in block reservation being turned off for that | ||
573 | * open file. | ||
574 | * | ||
575 | * This function is called from ext4_get_blocks_handle(), also called | ||
576 | * when setting the reservation window size through ioctl before the file | ||
577 | * is open for write (needs block allocation). | ||
578 | * | ||
579 | * Needs down_write(i_data_sem) protection prior to call this function. | ||
580 | */ | ||
581 | void ext4_init_block_alloc_info(struct inode *inode) | ||
582 | { | ||
583 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
584 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
585 | struct super_block *sb = inode->i_sb; | ||
586 | |||
587 | block_i = kmalloc(sizeof(*block_i), GFP_NOFS); | ||
588 | if (block_i) { | ||
589 | struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; | ||
590 | |||
591 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
592 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
593 | |||
594 | /* | ||
595 | * if filesystem is mounted with NORESERVATION, the goal | ||
596 | * reservation window size is set to zero to indicate | ||
597 | * block reservation is off | ||
598 | */ | ||
599 | if (!test_opt(sb, RESERVATION)) | ||
600 | rsv->rsv_goal_size = 0; | ||
601 | else | ||
602 | rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; | ||
603 | rsv->rsv_alloc_hit = 0; | ||
604 | block_i->last_alloc_logical_block = 0; | ||
605 | block_i->last_alloc_physical_block = 0; | ||
606 | } | ||
607 | ei->i_block_alloc_info = block_i; | ||
608 | } | ||
609 | |||
610 | /** | ||
611 | * ext4_discard_reservation() | ||
612 | * @inode: inode | ||
613 | * | ||
614 | * Discard(free) block reservation window on last file close, or truncate | ||
615 | * or at last iput(). | ||
616 | * | ||
617 | * It is being called in three cases: | ||
618 | * ext4_release_file(): last writer close the file | ||
619 | * ext4_clear_inode(): last iput(), when nobody link to this file. | ||
620 | * ext4_truncate(): when the block indirect map is about to change. | ||
621 | * | ||
622 | */ | ||
623 | void ext4_discard_reservation(struct inode *inode) | ||
624 | { | ||
625 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
626 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
627 | struct ext4_reserve_window_node *rsv; | ||
628 | spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; | ||
629 | |||
630 | ext4_mb_discard_inode_preallocations(inode); | ||
631 | |||
632 | if (!block_i) | ||
633 | return; | ||
634 | |||
635 | rsv = &block_i->rsv_window_node; | ||
636 | if (!rsv_is_empty(&rsv->rsv_window)) { | ||
637 | spin_lock(rsv_lock); | ||
638 | if (!rsv_is_empty(&rsv->rsv_window)) | ||
639 | rsv_window_remove(inode->i_sb, rsv); | ||
640 | spin_unlock(rsv_lock); | ||
641 | } | ||
642 | } | ||
643 | 351 | ||
644 | /** | 352 | /** |
645 | * ext4_free_blocks_sb() -- Free given blocks and update quota | 353 | * ext4_free_blocks_sb() -- Free given blocks and update quota |
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode) | |||
648 | * @block: start physcial block to free | 356 | * @block: start physcial block to free |
649 | * @count: number of blocks to free | 357 | * @count: number of blocks to free |
650 | * @pdquot_freed_blocks: pointer to quota | 358 | * @pdquot_freed_blocks: pointer to quota |
359 | * | ||
360 | * XXX This function is only used by the on-line resizing code, which | ||
361 | * should probably be fixed up to call the mballoc variant. There | ||
362 | * this needs to be cleaned up later; in fact, I'm not convinced this | ||
363 | * is 100% correct in the face of the mballoc code. The online resizing | ||
364 | * code needs to be fixed up to more tightly (and correctly) interlock | ||
365 | * with the mballoc code. | ||
651 | */ | 366 | */ |
652 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | 367 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
653 | ext4_fsblk_t block, unsigned long count, | 368 | ext4_fsblk_t block, unsigned long count, |
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
659 | ext4_grpblk_t bit; | 374 | ext4_grpblk_t bit; |
660 | unsigned long i; | 375 | unsigned long i; |
661 | unsigned long overflow; | 376 | unsigned long overflow; |
662 | struct ext4_group_desc * desc; | 377 | struct ext4_group_desc *desc; |
663 | struct ext4_super_block * es; | 378 | struct ext4_super_block *es; |
664 | struct ext4_sb_info *sbi; | 379 | struct ext4_sb_info *sbi; |
665 | int err = 0, ret; | 380 | int err = 0, ret; |
666 | ext4_grpblk_t group_freed; | 381 | ext4_grpblk_t group_freed; |
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
671 | if (block < le32_to_cpu(es->s_first_data_block) || | 386 | if (block < le32_to_cpu(es->s_first_data_block) || |
672 | block + count < block || | 387 | block + count < block || |
673 | block + count > ext4_blocks_count(es)) { | 388 | block + count > ext4_blocks_count(es)) { |
674 | ext4_error (sb, "ext4_free_blocks", | 389 | ext4_error(sb, "ext4_free_blocks", |
675 | "Freeing blocks not in datazone - " | 390 | "Freeing blocks not in datazone - " |
676 | "block = %llu, count = %lu", block, count); | 391 | "block = %llu, count = %lu", block, count); |
677 | goto error_return; | 392 | goto error_return; |
678 | } | 393 | } |
679 | 394 | ||
680 | ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); | 395 | ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); |
681 | 396 | ||
682 | do_more: | 397 | do_more: |
683 | overflow = 0; | 398 | overflow = 0; |
@@ -694,7 +409,7 @@ do_more: | |||
694 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 409 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
695 | if (!bitmap_bh) | 410 | if (!bitmap_bh) |
696 | goto error_return; | 411 | goto error_return; |
697 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); | 412 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); |
698 | if (!desc) | 413 | if (!desc) |
699 | goto error_return; | 414 | goto error_return; |
700 | 415 | ||
@@ -703,10 +418,10 @@ do_more: | |||
703 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 418 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
704 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 419 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
705 | sbi->s_itb_per_group)) { | 420 | sbi->s_itb_per_group)) { |
706 | ext4_error (sb, "ext4_free_blocks", | 421 | ext4_error(sb, "ext4_free_blocks", |
707 | "Freeing blocks in system zones - " | 422 | "Freeing blocks in system zones - " |
708 | "Block = %llu, count = %lu", | 423 | "Block = %llu, count = %lu", |
709 | block, count); | 424 | block, count); |
710 | goto error_return; | 425 | goto error_return; |
711 | } | 426 | } |
712 | 427 | ||
@@ -848,7 +563,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
848 | ext4_fsblk_t block, unsigned long count, | 563 | ext4_fsblk_t block, unsigned long count, |
849 | int metadata) | 564 | int metadata) |
850 | { | 565 | { |
851 | struct super_block * sb; | 566 | struct super_block *sb; |
852 | unsigned long dquot_freed_blocks; | 567 | unsigned long dquot_freed_blocks; |
853 | 568 | ||
854 | /* this isn't the right place to decide whether block is metadata | 569 | /* this isn't the right place to decide whether block is metadata |
@@ -859,748 +574,52 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
859 | 574 | ||
860 | sb = inode->i_sb; | 575 | sb = inode->i_sb; |
861 | 576 | ||
862 | if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) | 577 | ext4_mb_free_blocks(handle, inode, block, count, |
863 | ext4_free_blocks_sb(handle, sb, block, count, | 578 | metadata, &dquot_freed_blocks); |
864 | &dquot_freed_blocks); | ||
865 | else | ||
866 | ext4_mb_free_blocks(handle, inode, block, count, | ||
867 | metadata, &dquot_freed_blocks); | ||
868 | if (dquot_freed_blocks) | 579 | if (dquot_freed_blocks) |
869 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); | 580 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); |
870 | return; | 581 | return; |
871 | } | 582 | } |
872 | 583 | ||
873 | /** | 584 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
874 | * ext4_test_allocatable() | 585 | s64 nblocks) |
875 | * @nr: given allocation block group | ||
876 | * @bh: bufferhead contains the bitmap of the given block group | ||
877 | * | ||
878 | * For ext4 allocations, we must not reuse any blocks which are | ||
879 | * allocated in the bitmap buffer's "last committed data" copy. This | ||
880 | * prevents deletes from freeing up the page for reuse until we have | ||
881 | * committed the delete transaction. | ||
882 | * | ||
883 | * If we didn't do this, then deleting something and reallocating it as | ||
884 | * data would allow the old block to be overwritten before the | ||
885 | * transaction committed (because we force data to disk before commit). | ||
886 | * This would lead to corruption if we crashed between overwriting the | ||
887 | * data and committing the delete. | ||
888 | * | ||
889 | * @@@ We may want to make this allocation behaviour conditional on | ||
890 | * data-writes at some point, and disable it for metadata allocations or | ||
891 | * sync-data inodes. | ||
892 | */ | ||
893 | static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) | ||
894 | { | ||
895 | int ret; | ||
896 | struct journal_head *jh = bh2jh(bh); | ||
897 | |||
898 | if (ext4_test_bit(nr, bh->b_data)) | ||
899 | return 0; | ||
900 | |||
901 | jbd_lock_bh_state(bh); | ||
902 | if (!jh->b_committed_data) | ||
903 | ret = 1; | ||
904 | else | ||
905 | ret = !ext4_test_bit(nr, jh->b_committed_data); | ||
906 | jbd_unlock_bh_state(bh); | ||
907 | return ret; | ||
908 | } | ||
909 | |||
910 | /** | ||
911 | * bitmap_search_next_usable_block() | ||
912 | * @start: the starting block (group relative) of the search | ||
913 | * @bh: bufferhead contains the block group bitmap | ||
914 | * @maxblocks: the ending block (group relative) of the reservation | ||
915 | * | ||
916 | * The bitmap search --- search forward alternately through the actual | ||
917 | * bitmap on disk and the last-committed copy in journal, until we find a | ||
918 | * bit free in both bitmaps. | ||
919 | */ | ||
920 | static ext4_grpblk_t | ||
921 | bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
922 | ext4_grpblk_t maxblocks) | ||
923 | { | 586 | { |
924 | ext4_grpblk_t next; | 587 | s64 free_blocks, dirty_blocks; |
925 | struct journal_head *jh = bh2jh(bh); | 588 | s64 root_blocks = 0; |
926 | 589 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | |
927 | while (start < maxblocks) { | 590 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; |
928 | next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); | ||
929 | if (next >= maxblocks) | ||
930 | return -1; | ||
931 | if (ext4_test_allocatable(next, bh)) | ||
932 | return next; | ||
933 | jbd_lock_bh_state(bh); | ||
934 | if (jh->b_committed_data) | ||
935 | start = ext4_find_next_zero_bit(jh->b_committed_data, | ||
936 | maxblocks, next); | ||
937 | jbd_unlock_bh_state(bh); | ||
938 | } | ||
939 | return -1; | ||
940 | } | ||
941 | 591 | ||
942 | /** | 592 | free_blocks = percpu_counter_read_positive(fbc); |
943 | * find_next_usable_block() | 593 | dirty_blocks = percpu_counter_read_positive(dbc); |
944 | * @start: the starting block (group relative) to find next | ||
945 | * allocatable block in bitmap. | ||
946 | * @bh: bufferhead contains the block group bitmap | ||
947 | * @maxblocks: the ending block (group relative) for the search | ||
948 | * | ||
949 | * Find an allocatable block in a bitmap. We honor both the bitmap and | ||
950 | * its last-committed copy (if that exists), and perform the "most | ||
951 | * appropriate allocation" algorithm of looking for a free block near | ||
952 | * the initial goal; then for a free byte somewhere in the bitmap; then | ||
953 | * for any free bit in the bitmap. | ||
954 | */ | ||
955 | static ext4_grpblk_t | ||
956 | find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
957 | ext4_grpblk_t maxblocks) | ||
958 | { | ||
959 | ext4_grpblk_t here, next; | ||
960 | char *p, *r; | ||
961 | |||
962 | if (start > 0) { | ||
963 | /* | ||
964 | * The goal was occupied; search forward for a free | ||
965 | * block within the next XX blocks. | ||
966 | * | ||
967 | * end_goal is more or less random, but it has to be | ||
968 | * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the | ||
969 | * next 64-bit boundary is simple.. | ||
970 | */ | ||
971 | ext4_grpblk_t end_goal = (start + 63) & ~63; | ||
972 | if (end_goal > maxblocks) | ||
973 | end_goal = maxblocks; | ||
974 | here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); | ||
975 | if (here < end_goal && ext4_test_allocatable(here, bh)) | ||
976 | return here; | ||
977 | ext4_debug("Bit not found near goal\n"); | ||
978 | } | ||
979 | |||
980 | here = start; | ||
981 | if (here < 0) | ||
982 | here = 0; | ||
983 | |||
984 | p = ((char *)bh->b_data) + (here >> 3); | ||
985 | r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); | ||
986 | next = (r - ((char *)bh->b_data)) << 3; | ||
987 | |||
988 | if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) | ||
989 | return next; | ||
990 | |||
991 | /* | ||
992 | * The bitmap search --- search forward alternately through the actual | ||
993 | * bitmap and the last-committed copy until we find a bit free in | ||
994 | * both | ||
995 | */ | ||
996 | here = bitmap_search_next_usable_block(here, bh, maxblocks); | ||
997 | return here; | ||
998 | } | ||
999 | |||
1000 | /** | ||
1001 | * claim_block() | ||
1002 | * @block: the free block (group relative) to allocate | ||
1003 | * @bh: the bufferhead containts the block group bitmap | ||
1004 | * | ||
1005 | * We think we can allocate this block in this bitmap. Try to set the bit. | ||
1006 | * If that succeeds then check that nobody has allocated and then freed the | ||
1007 | * block since we saw that is was not marked in b_committed_data. If it _was_ | ||
1008 | * allocated and freed then clear the bit in the bitmap again and return | ||
1009 | * zero (failure). | ||
1010 | */ | ||
1011 | static inline int | ||
1012 | claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) | ||
1013 | { | ||
1014 | struct journal_head *jh = bh2jh(bh); | ||
1015 | int ret; | ||
1016 | |||
1017 | if (ext4_set_bit_atomic(lock, block, bh->b_data)) | ||
1018 | return 0; | ||
1019 | jbd_lock_bh_state(bh); | ||
1020 | if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { | ||
1021 | ext4_clear_bit_atomic(lock, block, bh->b_data); | ||
1022 | ret = 0; | ||
1023 | } else { | ||
1024 | ret = 1; | ||
1025 | } | ||
1026 | jbd_unlock_bh_state(bh); | ||
1027 | return ret; | ||
1028 | } | ||
1029 | 594 | ||
1030 | /** | 595 | if (!capable(CAP_SYS_RESOURCE) && |
1031 | * ext4_try_to_allocate() | 596 | sbi->s_resuid != current->fsuid && |
1032 | * @sb: superblock | 597 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
1033 | * @handle: handle to this transaction | 598 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
1034 | * @group: given allocation block group | ||
1035 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1036 | * @grp_goal: given target block within the group | ||
1037 | * @count: target number of blocks to allocate | ||
1038 | * @my_rsv: reservation window | ||
1039 | * | ||
1040 | * Attempt to allocate blocks within a give range. Set the range of allocation | ||
1041 | * first, then find the first free bit(s) from the bitmap (within the range), | ||
1042 | * and at last, allocate the blocks by claiming the found free bit as allocated. | ||
1043 | * | ||
1044 | * To set the range of this allocation: | ||
1045 | * if there is a reservation window, only try to allocate block(s) from the | ||
1046 | * file's own reservation window; | ||
1047 | * Otherwise, the allocation range starts from the give goal block, ends at | ||
1048 | * the block group's last block. | ||
1049 | * | ||
1050 | * If we failed to allocate the desired block then we may end up crossing to a | ||
1051 | * new bitmap. In that case we must release write access to the old one via | ||
1052 | * ext4_journal_release_buffer(), else we'll run out of credits. | ||
1053 | */ | ||
1054 | static ext4_grpblk_t | ||
1055 | ext4_try_to_allocate(struct super_block *sb, handle_t *handle, | ||
1056 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1057 | ext4_grpblk_t grp_goal, unsigned long *count, | ||
1058 | struct ext4_reserve_window *my_rsv) | ||
1059 | { | ||
1060 | ext4_fsblk_t group_first_block; | ||
1061 | ext4_grpblk_t start, end; | ||
1062 | unsigned long num = 0; | ||
1063 | |||
1064 | /* we do allocation within the reservation window if we have a window */ | ||
1065 | if (my_rsv) { | ||
1066 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1067 | if (my_rsv->_rsv_start >= group_first_block) | ||
1068 | start = my_rsv->_rsv_start - group_first_block; | ||
1069 | else | ||
1070 | /* reservation window cross group boundary */ | ||
1071 | start = 0; | ||
1072 | end = my_rsv->_rsv_end - group_first_block + 1; | ||
1073 | if (end > EXT4_BLOCKS_PER_GROUP(sb)) | ||
1074 | /* reservation window crosses group boundary */ | ||
1075 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1076 | if ((start <= grp_goal) && (grp_goal < end)) | ||
1077 | start = grp_goal; | ||
1078 | else | ||
1079 | grp_goal = -1; | ||
1080 | } else { | ||
1081 | if (grp_goal > 0) | ||
1082 | start = grp_goal; | ||
1083 | else | ||
1084 | start = 0; | ||
1085 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1086 | } | ||
1087 | |||
1088 | BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); | ||
1089 | |||
1090 | repeat: | ||
1091 | if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { | ||
1092 | grp_goal = find_next_usable_block(start, bitmap_bh, end); | ||
1093 | if (grp_goal < 0) | ||
1094 | goto fail_access; | ||
1095 | if (!my_rsv) { | ||
1096 | int i; | ||
1097 | |||
1098 | for (i = 0; i < 7 && grp_goal > start && | ||
1099 | ext4_test_allocatable(grp_goal - 1, | ||
1100 | bitmap_bh); | ||
1101 | i++, grp_goal--) | ||
1102 | ; | ||
1103 | } | ||
1104 | } | ||
1105 | start = grp_goal; | ||
1106 | |||
1107 | if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1108 | grp_goal, bitmap_bh)) { | ||
1109 | /* | ||
1110 | * The block was allocated by another thread, or it was | ||
1111 | * allocated and then freed by another thread | ||
1112 | */ | ||
1113 | start++; | ||
1114 | grp_goal++; | ||
1115 | if (start >= end) | ||
1116 | goto fail_access; | ||
1117 | goto repeat; | ||
1118 | } | ||
1119 | num++; | ||
1120 | grp_goal++; | ||
1121 | while (num < *count && grp_goal < end | ||
1122 | && ext4_test_allocatable(grp_goal, bitmap_bh) | ||
1123 | && claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1124 | grp_goal, bitmap_bh)) { | ||
1125 | num++; | ||
1126 | grp_goal++; | ||
1127 | } | ||
1128 | *count = num; | ||
1129 | return grp_goal - num; | ||
1130 | fail_access: | ||
1131 | *count = num; | ||
1132 | return -1; | ||
1133 | } | ||
1134 | |||
1135 | /** | ||
1136 | * find_next_reservable_window(): | ||
1137 | * find a reservable space within the given range. | ||
1138 | * It does not allocate the reservation window for now: | ||
1139 | * alloc_new_reservation() will do the work later. | ||
1140 | * | ||
1141 | * @search_head: the head of the searching list; | ||
1142 | * This is not necessarily the list head of the whole filesystem | ||
1143 | * | ||
1144 | * We have both head and start_block to assist the search | ||
1145 | * for the reservable space. The list starts from head, | ||
1146 | * but we will shift to the place where start_block is, | ||
1147 | * then start from there, when looking for a reservable space. | ||
1148 | * | ||
1149 | * @size: the target new reservation window size | ||
1150 | * | ||
1151 | * @group_first_block: the first block we consider to start | ||
1152 | * the real search from | ||
1153 | * | ||
1154 | * @last_block: | ||
1155 | * the maximum block number that our goal reservable space | ||
1156 | * could start from. This is normally the last block in this | ||
1157 | * group. The search will end when we found the start of next | ||
1158 | * possible reservable space is out of this boundary. | ||
1159 | * This could handle the cross boundary reservation window | ||
1160 | * request. | ||
1161 | * | ||
1162 | * basically we search from the given range, rather than the whole | ||
1163 | * reservation double linked list, (start_block, last_block) | ||
1164 | * to find a free region that is of my size and has not | ||
1165 | * been reserved. | ||
1166 | * | ||
1167 | */ | ||
1168 | static int find_next_reservable_window( | ||
1169 | struct ext4_reserve_window_node *search_head, | ||
1170 | struct ext4_reserve_window_node *my_rsv, | ||
1171 | struct super_block * sb, | ||
1172 | ext4_fsblk_t start_block, | ||
1173 | ext4_fsblk_t last_block) | ||
1174 | { | ||
1175 | struct rb_node *next; | ||
1176 | struct ext4_reserve_window_node *rsv, *prev; | ||
1177 | ext4_fsblk_t cur; | ||
1178 | int size = my_rsv->rsv_goal_size; | ||
1179 | |||
1180 | /* TODO: make the start of the reservation window byte-aligned */ | ||
1181 | /* cur = *start_block & ~7;*/ | ||
1182 | cur = start_block; | ||
1183 | rsv = search_head; | ||
1184 | if (!rsv) | ||
1185 | return -1; | ||
1186 | |||
1187 | while (1) { | ||
1188 | if (cur <= rsv->rsv_end) | ||
1189 | cur = rsv->rsv_end + 1; | ||
1190 | |||
1191 | /* TODO? | ||
1192 | * in the case we could not find a reservable space | ||
1193 | * that is what is expected, during the re-search, we could | ||
1194 | * remember what's the largest reservable space we could have | ||
1195 | * and return that one. | ||
1196 | * | ||
1197 | * For now it will fail if we could not find the reservable | ||
1198 | * space with expected-size (or more)... | ||
1199 | */ | ||
1200 | if (cur > last_block) | ||
1201 | return -1; /* fail */ | ||
1202 | |||
1203 | prev = rsv; | ||
1204 | next = rb_next(&rsv->rsv_node); | ||
1205 | rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); | ||
1206 | 599 | ||
1207 | /* | 600 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1208 | * Reached the last reservation, we can just append to the | 601 | EXT4_FREEBLOCKS_WATERMARK) { |
1209 | * previous one. | 602 | free_blocks = percpu_counter_sum(fbc); |
1210 | */ | 603 | dirty_blocks = percpu_counter_sum(dbc); |
1211 | if (!next) | 604 | if (dirty_blocks < 0) { |
1212 | break; | 605 | printk(KERN_CRIT "Dirty block accounting " |
1213 | 606 | "went wrong %lld\n", | |
1214 | if (cur + size <= rsv->rsv_start) { | 607 | dirty_blocks); |
1215 | /* | ||
1216 | * Found a reserveable space big enough. We could | ||
1217 | * have a reservation across the group boundary here | ||
1218 | */ | ||
1219 | break; | ||
1220 | } | 608 | } |
1221 | } | 609 | } |
1222 | /* | 610 | /* Check whether we have space after |
1223 | * we come here either : | 611 | * accounting for current dirty blocks |
1224 | * when we reach the end of the whole list, | ||
1225 | * and there is empty reservable space after last entry in the list. | ||
1226 | * append it to the end of the list. | ||
1227 | * | ||
1228 | * or we found one reservable space in the middle of the list, | ||
1229 | * return the reservation window that we could append to. | ||
1230 | * succeed. | ||
1231 | */ | 612 | */ |
613 | if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) | ||
614 | /* we don't have free space */ | ||
615 | return -ENOSPC; | ||
1232 | 616 | ||
1233 | if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) | 617 | /* Add the blocks to nblocks */ |
1234 | rsv_window_remove(sb, my_rsv); | 618 | percpu_counter_add(dbc, nblocks); |
1235 | |||
1236 | /* | ||
1237 | * Let's book the whole avaliable window for now. We will check the | ||
1238 | * disk bitmap later and then, if there are free blocks then we adjust | ||
1239 | * the window size if it's larger than requested. | ||
1240 | * Otherwise, we will remove this node from the tree next time | ||
1241 | * call find_next_reservable_window. | ||
1242 | */ | ||
1243 | my_rsv->rsv_start = cur; | ||
1244 | my_rsv->rsv_end = cur + size - 1; | ||
1245 | my_rsv->rsv_alloc_hit = 0; | ||
1246 | |||
1247 | if (prev != my_rsv) | ||
1248 | ext4_rsv_window_add(sb, my_rsv); | ||
1249 | |||
1250 | return 0; | 619 | return 0; |
1251 | } | 620 | } |
1252 | 621 | ||
1253 | /** | 622 | /** |
1254 | * alloc_new_reservation()--allocate a new reservation window | ||
1255 | * | ||
1256 | * To make a new reservation, we search part of the filesystem | ||
1257 | * reservation list (the list that inside the group). We try to | ||
1258 | * allocate a new reservation window near the allocation goal, | ||
1259 | * or the beginning of the group, if there is no goal. | ||
1260 | * | ||
1261 | * We first find a reservable space after the goal, then from | ||
1262 | * there, we check the bitmap for the first free block after | ||
1263 | * it. If there is no free block until the end of group, then the | ||
1264 | * whole group is full, we failed. Otherwise, check if the free | ||
1265 | * block is inside the expected reservable space, if so, we | ||
1266 | * succeed. | ||
1267 | * If the first free block is outside the reservable space, then | ||
1268 | * start from the first free block, we search for next available | ||
1269 | * space, and go on. | ||
1270 | * | ||
1271 | * on succeed, a new reservation will be found and inserted into the list | ||
1272 | * It contains at least one free block, and it does not overlap with other | ||
1273 | * reservation windows. | ||
1274 | * | ||
1275 | * failed: we failed to find a reservation window in this group | ||
1276 | * | ||
1277 | * @rsv: the reservation | ||
1278 | * | ||
1279 | * @grp_goal: The goal (group-relative). It is where the search for a | ||
1280 | * free reservable space should start from. | ||
1281 | * if we have a grp_goal(grp_goal >0 ), then start from there, | ||
1282 | * no grp_goal(grp_goal = -1), we start from the first block | ||
1283 | * of the group. | ||
1284 | * | ||
1285 | * @sb: the super block | ||
1286 | * @group: the group we are trying to allocate in | ||
1287 | * @bitmap_bh: the block group block bitmap | ||
1288 | * | ||
1289 | */ | ||
1290 | static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1291 | ext4_grpblk_t grp_goal, struct super_block *sb, | ||
1292 | ext4_group_t group, struct buffer_head *bitmap_bh) | ||
1293 | { | ||
1294 | struct ext4_reserve_window_node *search_head; | ||
1295 | ext4_fsblk_t group_first_block, group_end_block, start_block; | ||
1296 | ext4_grpblk_t first_free_block; | ||
1297 | struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; | ||
1298 | unsigned long size; | ||
1299 | int ret; | ||
1300 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1301 | |||
1302 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1303 | group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1304 | |||
1305 | if (grp_goal < 0) | ||
1306 | start_block = group_first_block; | ||
1307 | else | ||
1308 | start_block = grp_goal + group_first_block; | ||
1309 | |||
1310 | size = my_rsv->rsv_goal_size; | ||
1311 | |||
1312 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | ||
1313 | /* | ||
1314 | * if the old reservation is cross group boundary | ||
1315 | * and if the goal is inside the old reservation window, | ||
1316 | * we will come here when we just failed to allocate from | ||
1317 | * the first part of the window. We still have another part | ||
1318 | * that belongs to the next group. In this case, there is no | ||
1319 | * point to discard our window and try to allocate a new one | ||
1320 | * in this group(which will fail). we should | ||
1321 | * keep the reservation window, just simply move on. | ||
1322 | * | ||
1323 | * Maybe we could shift the start block of the reservation | ||
1324 | * window to the first block of next group. | ||
1325 | */ | ||
1326 | |||
1327 | if ((my_rsv->rsv_start <= group_end_block) && | ||
1328 | (my_rsv->rsv_end > group_end_block) && | ||
1329 | (start_block >= my_rsv->rsv_start)) | ||
1330 | return -1; | ||
1331 | |||
1332 | if ((my_rsv->rsv_alloc_hit > | ||
1333 | (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { | ||
1334 | /* | ||
1335 | * if the previously allocation hit ratio is | ||
1336 | * greater than 1/2, then we double the size of | ||
1337 | * the reservation window the next time, | ||
1338 | * otherwise we keep the same size window | ||
1339 | */ | ||
1340 | size = size * 2; | ||
1341 | if (size > EXT4_MAX_RESERVE_BLOCKS) | ||
1342 | size = EXT4_MAX_RESERVE_BLOCKS; | ||
1343 | my_rsv->rsv_goal_size= size; | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | spin_lock(rsv_lock); | ||
1348 | /* | ||
1349 | * shift the search start to the window near the goal block | ||
1350 | */ | ||
1351 | search_head = search_reserve_window(fs_rsv_root, start_block); | ||
1352 | |||
1353 | /* | ||
1354 | * find_next_reservable_window() simply finds a reservable window | ||
1355 | * inside the given range(start_block, group_end_block). | ||
1356 | * | ||
1357 | * To make sure the reservation window has a free bit inside it, we | ||
1358 | * need to check the bitmap after we found a reservable window. | ||
1359 | */ | ||
1360 | retry: | ||
1361 | ret = find_next_reservable_window(search_head, my_rsv, sb, | ||
1362 | start_block, group_end_block); | ||
1363 | |||
1364 | if (ret == -1) { | ||
1365 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1366 | rsv_window_remove(sb, my_rsv); | ||
1367 | spin_unlock(rsv_lock); | ||
1368 | return -1; | ||
1369 | } | ||
1370 | |||
1371 | /* | ||
1372 | * On success, find_next_reservable_window() returns the | ||
1373 | * reservation window where there is a reservable space after it. | ||
1374 | * Before we reserve this reservable space, we need | ||
1375 | * to make sure there is at least a free block inside this region. | ||
1376 | * | ||
1377 | * searching the first free bit on the block bitmap and copy of | ||
1378 | * last committed bitmap alternatively, until we found a allocatable | ||
1379 | * block. Search start from the start block of the reservable space | ||
1380 | * we just found. | ||
1381 | */ | ||
1382 | spin_unlock(rsv_lock); | ||
1383 | first_free_block = bitmap_search_next_usable_block( | ||
1384 | my_rsv->rsv_start - group_first_block, | ||
1385 | bitmap_bh, group_end_block - group_first_block + 1); | ||
1386 | |||
1387 | if (first_free_block < 0) { | ||
1388 | /* | ||
1389 | * no free block left on the bitmap, no point | ||
1390 | * to reserve the space. return failed. | ||
1391 | */ | ||
1392 | spin_lock(rsv_lock); | ||
1393 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1394 | rsv_window_remove(sb, my_rsv); | ||
1395 | spin_unlock(rsv_lock); | ||
1396 | return -1; /* failed */ | ||
1397 | } | ||
1398 | |||
1399 | start_block = first_free_block + group_first_block; | ||
1400 | /* | ||
1401 | * check if the first free block is within the | ||
1402 | * free space we just reserved | ||
1403 | */ | ||
1404 | if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) | ||
1405 | return 0; /* success */ | ||
1406 | /* | ||
1407 | * if the first free bit we found is out of the reservable space | ||
1408 | * continue search for next reservable space, | ||
1409 | * start from where the free block is, | ||
1410 | * we also shift the list head to where we stopped last time | ||
1411 | */ | ||
1412 | search_head = my_rsv; | ||
1413 | spin_lock(rsv_lock); | ||
1414 | goto retry; | ||
1415 | } | ||
1416 | |||
1417 | /** | ||
1418 | * try_to_extend_reservation() | ||
1419 | * @my_rsv: given reservation window | ||
1420 | * @sb: super block | ||
1421 | * @size: the delta to extend | ||
1422 | * | ||
1423 | * Attempt to expand the reservation window large enough to have | ||
1424 | * required number of free blocks | ||
1425 | * | ||
1426 | * Since ext4_try_to_allocate() will always allocate blocks within | ||
1427 | * the reservation window range, if the window size is too small, | ||
1428 | * multiple blocks allocation has to stop at the end of the reservation | ||
1429 | * window. To make this more efficient, given the total number of | ||
1430 | * blocks needed and the current size of the window, we try to | ||
1431 | * expand the reservation window size if necessary on a best-effort | ||
1432 | * basis before ext4_new_blocks() tries to allocate blocks, | ||
1433 | */ | ||
1434 | static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1435 | struct super_block *sb, int size) | ||
1436 | { | ||
1437 | struct ext4_reserve_window_node *next_rsv; | ||
1438 | struct rb_node *next; | ||
1439 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1440 | |||
1441 | if (!spin_trylock(rsv_lock)) | ||
1442 | return; | ||
1443 | |||
1444 | next = rb_next(&my_rsv->rsv_node); | ||
1445 | |||
1446 | if (!next) | ||
1447 | my_rsv->rsv_end += size; | ||
1448 | else { | ||
1449 | next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); | ||
1450 | |||
1451 | if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) | ||
1452 | my_rsv->rsv_end += size; | ||
1453 | else | ||
1454 | my_rsv->rsv_end = next_rsv->rsv_start - 1; | ||
1455 | } | ||
1456 | spin_unlock(rsv_lock); | ||
1457 | } | ||
1458 | |||
1459 | /** | ||
1460 | * ext4_try_to_allocate_with_rsv() | ||
1461 | * @sb: superblock | ||
1462 | * @handle: handle to this transaction | ||
1463 | * @group: given allocation block group | ||
1464 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1465 | * @grp_goal: given target block within the group | ||
1466 | * @count: target number of blocks to allocate | ||
1467 | * @my_rsv: reservation window | ||
1468 | * @errp: pointer to store the error code | ||
1469 | * | ||
1470 | * This is the main function used to allocate a new block and its reservation | ||
1471 | * window. | ||
1472 | * | ||
1473 | * Each time when a new block allocation is need, first try to allocate from | ||
1474 | * its own reservation. If it does not have a reservation window, instead of | ||
1475 | * looking for a free bit on bitmap first, then look up the reservation list to | ||
1476 | * see if it is inside somebody else's reservation window, we try to allocate a | ||
1477 | * reservation window for it starting from the goal first. Then do the block | ||
1478 | * allocation within the reservation window. | ||
1479 | * | ||
1480 | * This will avoid keeping on searching the reservation list again and | ||
1481 | * again when somebody is looking for a free block (without | ||
1482 | * reservation), and there are lots of free blocks, but they are all | ||
1483 | * being reserved. | ||
1484 | * | ||
1485 | * We use a red-black tree for the per-filesystem reservation list. | ||
1486 | * | ||
1487 | */ | ||
1488 | static ext4_grpblk_t | ||
1489 | ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | ||
1490 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1491 | ext4_grpblk_t grp_goal, | ||
1492 | struct ext4_reserve_window_node * my_rsv, | ||
1493 | unsigned long *count, int *errp) | ||
1494 | { | ||
1495 | ext4_fsblk_t group_first_block, group_last_block; | ||
1496 | ext4_grpblk_t ret = 0; | ||
1497 | int fatal; | ||
1498 | unsigned long num = *count; | ||
1499 | |||
1500 | *errp = 0; | ||
1501 | |||
1502 | /* | ||
1503 | * Make sure we use undo access for the bitmap, because it is critical | ||
1504 | * that we do the frozen_data COW on bitmap buffers in all cases even | ||
1505 | * if the buffer is in BJ_Forget state in the committing transaction. | ||
1506 | */ | ||
1507 | BUFFER_TRACE(bitmap_bh, "get undo access for new block"); | ||
1508 | fatal = ext4_journal_get_undo_access(handle, bitmap_bh); | ||
1509 | if (fatal) { | ||
1510 | *errp = fatal; | ||
1511 | return -1; | ||
1512 | } | ||
1513 | |||
1514 | /* | ||
1515 | * we don't deal with reservation when | ||
1516 | * filesystem is mounted without reservation | ||
1517 | * or the file is not a regular file | ||
1518 | * or last attempt to allocate a block with reservation turned on failed | ||
1519 | */ | ||
1520 | if (my_rsv == NULL ) { | ||
1521 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1522 | grp_goal, count, NULL); | ||
1523 | goto out; | ||
1524 | } | ||
1525 | /* | ||
1526 | * grp_goal is a group relative block number (if there is a goal) | ||
1527 | * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) | ||
1528 | * first block is a filesystem wide block number | ||
1529 | * first block is the block number of the first block in this group | ||
1530 | */ | ||
1531 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1532 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1533 | |||
1534 | /* | ||
1535 | * Basically we will allocate a new block from inode's reservation | ||
1536 | * window. | ||
1537 | * | ||
1538 | * We need to allocate a new reservation window, if: | ||
1539 | * a) inode does not have a reservation window; or | ||
1540 | * b) last attempt to allocate a block from existing reservation | ||
1541 | * failed; or | ||
1542 | * c) we come here with a goal and with a reservation window | ||
1543 | * | ||
1544 | * We do not need to allocate a new reservation window if we come here | ||
1545 | * at the beginning with a goal and the goal is inside the window, or | ||
1546 | * we don't have a goal but already have a reservation window. | ||
1547 | * then we could go to allocate from the reservation window directly. | ||
1548 | */ | ||
1549 | while (1) { | ||
1550 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || | ||
1551 | !goal_in_my_reservation(&my_rsv->rsv_window, | ||
1552 | grp_goal, group, sb)) { | ||
1553 | if (my_rsv->rsv_goal_size < *count) | ||
1554 | my_rsv->rsv_goal_size = *count; | ||
1555 | ret = alloc_new_reservation(my_rsv, grp_goal, sb, | ||
1556 | group, bitmap_bh); | ||
1557 | if (ret < 0) | ||
1558 | break; /* failed */ | ||
1559 | |||
1560 | if (!goal_in_my_reservation(&my_rsv->rsv_window, | ||
1561 | grp_goal, group, sb)) | ||
1562 | grp_goal = -1; | ||
1563 | } else if (grp_goal >= 0) { | ||
1564 | int curr = my_rsv->rsv_end - | ||
1565 | (grp_goal + group_first_block) + 1; | ||
1566 | |||
1567 | if (curr < *count) | ||
1568 | try_to_extend_reservation(my_rsv, sb, | ||
1569 | *count - curr); | ||
1570 | } | ||
1571 | |||
1572 | if ((my_rsv->rsv_start > group_last_block) || | ||
1573 | (my_rsv->rsv_end < group_first_block)) { | ||
1574 | rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); | ||
1575 | BUG(); | ||
1576 | } | ||
1577 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1578 | grp_goal, &num, &my_rsv->rsv_window); | ||
1579 | if (ret >= 0) { | ||
1580 | my_rsv->rsv_alloc_hit += num; | ||
1581 | *count = num; | ||
1582 | break; /* succeed */ | ||
1583 | } | ||
1584 | num = *count; | ||
1585 | } | ||
1586 | out: | ||
1587 | if (ret >= 0) { | ||
1588 | BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " | ||
1589 | "bitmap block"); | ||
1590 | fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
1591 | if (fatal) { | ||
1592 | *errp = fatal; | ||
1593 | return -1; | ||
1594 | } | ||
1595 | return ret; | ||
1596 | } | ||
1597 | |||
1598 | BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); | ||
1599 | ext4_journal_release_buffer(handle, bitmap_bh); | ||
1600 | return ret; | ||
1601 | } | ||
1602 | |||
1603 | /** | ||
1604 | * ext4_has_free_blocks() | 623 | * ext4_has_free_blocks() |
1605 | * @sbi: in-core super block structure. | 624 | * @sbi: in-core super block structure. |
1606 | * @nblocks: number of neeed blocks | 625 | * @nblocks: number of neeed blocks |
@@ -1610,29 +629,34 @@ out: | |||
1610 | * On success, return nblocks | 629 | * On success, return nblocks |
1611 | */ | 630 | */ |
1612 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 631 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
1613 | ext4_fsblk_t nblocks) | 632 | s64 nblocks) |
1614 | { | 633 | { |
1615 | ext4_fsblk_t free_blocks; | 634 | s64 free_blocks, dirty_blocks; |
1616 | ext4_fsblk_t root_blocks = 0; | 635 | s64 root_blocks = 0; |
636 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | ||
637 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; | ||
1617 | 638 | ||
1618 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 639 | free_blocks = percpu_counter_read_positive(fbc); |
640 | dirty_blocks = percpu_counter_read_positive(dbc); | ||
1619 | 641 | ||
1620 | if (!capable(CAP_SYS_RESOURCE) && | 642 | if (!capable(CAP_SYS_RESOURCE) && |
1621 | sbi->s_resuid != current->fsuid && | 643 | sbi->s_resuid != current->fsuid && |
1622 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) | 644 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
1623 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 645 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
1624 | #ifdef CONFIG_SMP | 646 | |
1625 | if (free_blocks - root_blocks < FBC_BATCH) | 647 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1626 | free_blocks = | 648 | EXT4_FREEBLOCKS_WATERMARK) { |
1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 649 | free_blocks = percpu_counter_sum(fbc); |
1628 | #endif | 650 | dirty_blocks = percpu_counter_sum(dbc); |
1629 | if (free_blocks <= root_blocks) | 651 | } |
652 | if (free_blocks <= (root_blocks + dirty_blocks)) | ||
1630 | /* we don't have free space */ | 653 | /* we don't have free space */ |
1631 | return 0; | 654 | return 0; |
1632 | if (free_blocks - root_blocks < nblocks) | 655 | |
1633 | return free_blocks - root_blocks; | 656 | if (free_blocks - (root_blocks + dirty_blocks) < nblocks) |
657 | return free_blocks - (root_blocks + dirty_blocks); | ||
1634 | return nblocks; | 658 | return nblocks; |
1635 | } | 659 | } |
1636 | 660 | ||
1637 | 661 | ||
1638 | /** | 662 | /** |
@@ -1657,303 +681,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
1657 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); | 681 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); |
1658 | } | 682 | } |
1659 | 683 | ||
1660 | /** | ||
1661 | * ext4_old_new_blocks() -- core block bitmap based block allocation function | ||
1662 | * | ||
1663 | * @handle: handle to this transaction | ||
1664 | * @inode: file inode | ||
1665 | * @goal: given target block(filesystem wide) | ||
1666 | * @count: target number of blocks to allocate | ||
1667 | * @errp: error code | ||
1668 | * | ||
1669 | * ext4_old_new_blocks uses a goal block to assist allocation and look up | ||
1670 | * the block bitmap directly to do block allocation. It tries to | ||
1671 | * allocate block(s) from the block group contains the goal block first. If | ||
1672 | * that fails, it will try to allocate block(s) from other block groups | ||
1673 | * without any specific goal block. | ||
1674 | * | ||
1675 | * This function is called when -o nomballoc mount option is enabled | ||
1676 | * | ||
1677 | */ | ||
1678 | ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | ||
1679 | ext4_fsblk_t goal, unsigned long *count, int *errp) | ||
1680 | { | ||
1681 | struct buffer_head *bitmap_bh = NULL; | ||
1682 | struct buffer_head *gdp_bh; | ||
1683 | ext4_group_t group_no; | ||
1684 | ext4_group_t goal_group; | ||
1685 | ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ | ||
1686 | ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ | ||
1687 | ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ | ||
1688 | ext4_group_t bgi; /* blockgroup iteration index */ | ||
1689 | int fatal = 0, err; | ||
1690 | int performed_allocation = 0; | ||
1691 | ext4_grpblk_t free_blocks; /* number of free blocks in a group */ | ||
1692 | struct super_block *sb; | ||
1693 | struct ext4_group_desc *gdp; | ||
1694 | struct ext4_super_block *es; | ||
1695 | struct ext4_sb_info *sbi; | ||
1696 | struct ext4_reserve_window_node *my_rsv = NULL; | ||
1697 | struct ext4_block_alloc_info *block_i; | ||
1698 | unsigned short windowsz = 0; | ||
1699 | ext4_group_t ngroups; | ||
1700 | unsigned long num = *count; | ||
1701 | |||
1702 | sb = inode->i_sb; | ||
1703 | if (!sb) { | ||
1704 | *errp = -ENODEV; | ||
1705 | printk("ext4_new_block: nonexistent device"); | ||
1706 | return 0; | ||
1707 | } | ||
1708 | |||
1709 | sbi = EXT4_SB(sb); | ||
1710 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) { | ||
1711 | /* | ||
1712 | * With delalloc we already reserved the blocks | ||
1713 | */ | ||
1714 | *count = ext4_has_free_blocks(sbi, *count); | ||
1715 | } | ||
1716 | if (*count == 0) { | ||
1717 | *errp = -ENOSPC; | ||
1718 | return 0; /*return with ENOSPC error */ | ||
1719 | } | ||
1720 | num = *count; | ||
1721 | |||
1722 | /* | ||
1723 | * Check quota for allocation of this block. | ||
1724 | */ | ||
1725 | if (DQUOT_ALLOC_BLOCK(inode, num)) { | ||
1726 | *errp = -EDQUOT; | ||
1727 | return 0; | ||
1728 | } | ||
1729 | |||
1730 | sbi = EXT4_SB(sb); | ||
1731 | es = EXT4_SB(sb)->s_es; | ||
1732 | ext4_debug("goal=%llu.\n", goal); | ||
1733 | /* | ||
1734 | * Allocate a block from reservation only when | ||
1735 | * filesystem is mounted with reservation(default,-o reservation), and | ||
1736 | * it's a regular file, and | ||
1737 | * the desired window size is greater than 0 (One could use ioctl | ||
1738 | * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off | ||
1739 | * reservation on that particular file) | ||
1740 | */ | ||
1741 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
1742 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) | ||
1743 | my_rsv = &block_i->rsv_window_node; | ||
1744 | |||
1745 | /* | ||
1746 | * First, test whether the goal block is free. | ||
1747 | */ | ||
1748 | if (goal < le32_to_cpu(es->s_first_data_block) || | ||
1749 | goal >= ext4_blocks_count(es)) | ||
1750 | goal = le32_to_cpu(es->s_first_data_block); | ||
1751 | ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); | ||
1752 | goal_group = group_no; | ||
1753 | retry_alloc: | ||
1754 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1755 | if (!gdp) | ||
1756 | goto io_error; | ||
1757 | |||
1758 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1759 | /* | ||
1760 | * if there is not enough free blocks to make a new resevation | ||
1761 | * turn off reservation for this allocation | ||
1762 | */ | ||
1763 | if (my_rsv && (free_blocks < windowsz) | ||
1764 | && (rsv_is_empty(&my_rsv->rsv_window))) | ||
1765 | my_rsv = NULL; | ||
1766 | |||
1767 | if (free_blocks > 0) { | ||
1768 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1769 | if (!bitmap_bh) | ||
1770 | goto io_error; | ||
1771 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1772 | group_no, bitmap_bh, grp_target_blk, | ||
1773 | my_rsv, &num, &fatal); | ||
1774 | if (fatal) | ||
1775 | goto out; | ||
1776 | if (grp_alloc_blk >= 0) | ||
1777 | goto allocated; | ||
1778 | } | ||
1779 | |||
1780 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
1781 | smp_rmb(); | ||
1782 | |||
1783 | /* | ||
1784 | * Now search the rest of the groups. We assume that | ||
1785 | * group_no and gdp correctly point to the last group visited. | ||
1786 | */ | ||
1787 | for (bgi = 0; bgi < ngroups; bgi++) { | ||
1788 | group_no++; | ||
1789 | if (group_no >= ngroups) | ||
1790 | group_no = 0; | ||
1791 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1792 | if (!gdp) | ||
1793 | goto io_error; | ||
1794 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1795 | /* | ||
1796 | * skip this group if the number of | ||
1797 | * free blocks is less than half of the reservation | ||
1798 | * window size. | ||
1799 | */ | ||
1800 | if (free_blocks <= (windowsz/2)) | ||
1801 | continue; | ||
1802 | |||
1803 | brelse(bitmap_bh); | ||
1804 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1805 | if (!bitmap_bh) | ||
1806 | goto io_error; | ||
1807 | /* | ||
1808 | * try to allocate block(s) from this group, without a goal(-1). | ||
1809 | */ | ||
1810 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1811 | group_no, bitmap_bh, -1, my_rsv, | ||
1812 | &num, &fatal); | ||
1813 | if (fatal) | ||
1814 | goto out; | ||
1815 | if (grp_alloc_blk >= 0) | ||
1816 | goto allocated; | ||
1817 | } | ||
1818 | /* | ||
1819 | * We may end up a bogus ealier ENOSPC error due to | ||
1820 | * filesystem is "full" of reservations, but | ||
1821 | * there maybe indeed free blocks avaliable on disk | ||
1822 | * In this case, we just forget about the reservations | ||
1823 | * just do block allocation as without reservations. | ||
1824 | */ | ||
1825 | if (my_rsv) { | ||
1826 | my_rsv = NULL; | ||
1827 | windowsz = 0; | ||
1828 | group_no = goal_group; | ||
1829 | goto retry_alloc; | ||
1830 | } | ||
1831 | /* No space left on the device */ | ||
1832 | *errp = -ENOSPC; | ||
1833 | goto out; | ||
1834 | |||
1835 | allocated: | ||
1836 | |||
1837 | ext4_debug("using block group %lu(%d)\n", | ||
1838 | group_no, gdp->bg_free_blocks_count); | ||
1839 | |||
1840 | BUFFER_TRACE(gdp_bh, "get_write_access"); | ||
1841 | fatal = ext4_journal_get_write_access(handle, gdp_bh); | ||
1842 | if (fatal) | ||
1843 | goto out; | ||
1844 | |||
1845 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); | ||
1846 | |||
1847 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | ||
1848 | in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || | ||
1849 | in_range(ret_block, ext4_inode_table(sb, gdp), | ||
1850 | EXT4_SB(sb)->s_itb_per_group) || | ||
1851 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | ||
1852 | EXT4_SB(sb)->s_itb_per_group)) { | ||
1853 | ext4_error(sb, "ext4_new_block", | ||
1854 | "Allocating block in system zone - " | ||
1855 | "blocks from %llu, length %lu", | ||
1856 | ret_block, num); | ||
1857 | /* | ||
1858 | * claim_block marked the blocks we allocated | ||
1859 | * as in use. So we may want to selectively | ||
1860 | * mark some of the blocks as free | ||
1861 | */ | ||
1862 | goto retry_alloc; | ||
1863 | } | ||
1864 | |||
1865 | performed_allocation = 1; | ||
1866 | |||
1867 | #ifdef CONFIG_JBD2_DEBUG | ||
1868 | { | ||
1869 | struct buffer_head *debug_bh; | ||
1870 | |||
1871 | /* Record bitmap buffer state in the newly allocated block */ | ||
1872 | debug_bh = sb_find_get_block(sb, ret_block); | ||
1873 | if (debug_bh) { | ||
1874 | BUFFER_TRACE(debug_bh, "state when allocated"); | ||
1875 | BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); | ||
1876 | brelse(debug_bh); | ||
1877 | } | ||
1878 | } | ||
1879 | jbd_lock_bh_state(bitmap_bh); | ||
1880 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1881 | if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { | ||
1882 | int i; | ||
1883 | |||
1884 | for (i = 0; i < num; i++) { | ||
1885 | if (ext4_test_bit(grp_alloc_blk+i, | ||
1886 | bh2jh(bitmap_bh)->b_committed_data)) { | ||
1887 | printk("%s: block was unexpectedly set in " | ||
1888 | "b_committed_data\n", __func__); | ||
1889 | } | ||
1890 | } | ||
1891 | } | ||
1892 | ext4_debug("found bit %d\n", grp_alloc_blk); | ||
1893 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1894 | jbd_unlock_bh_state(bitmap_bh); | ||
1895 | #endif | ||
1896 | |||
1897 | if (ret_block + num - 1 >= ext4_blocks_count(es)) { | ||
1898 | ext4_error(sb, "ext4_new_block", | ||
1899 | "block(%llu) >= blocks count(%llu) - " | ||
1900 | "block_group = %lu, es == %p ", ret_block, | ||
1901 | ext4_blocks_count(es), group_no, es); | ||
1902 | goto out; | ||
1903 | } | ||
1904 | |||
1905 | /* | ||
1906 | * It is up to the caller to add the new buffer to a journal | ||
1907 | * list of some description. We don't know in advance whether | ||
1908 | * the caller wants to use it as metadata or data. | ||
1909 | */ | ||
1910 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1911 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1912 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
1913 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); | ||
1914 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); | ||
1915 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1916 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) | ||
1917 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | ||
1918 | |||
1919 | if (sbi->s_log_groups_per_flex) { | ||
1920 | ext4_group_t flex_group = ext4_flex_group(sbi, group_no); | ||
1921 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
1922 | sbi->s_flex_groups[flex_group].free_blocks -= num; | ||
1923 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
1924 | } | ||
1925 | |||
1926 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | ||
1927 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | ||
1928 | if (!fatal) | ||
1929 | fatal = err; | ||
1930 | |||
1931 | sb->s_dirt = 1; | ||
1932 | if (fatal) | ||
1933 | goto out; | ||
1934 | |||
1935 | *errp = 0; | ||
1936 | brelse(bitmap_bh); | ||
1937 | DQUOT_FREE_BLOCK(inode, *count-num); | ||
1938 | *count = num; | ||
1939 | return ret_block; | ||
1940 | |||
1941 | io_error: | ||
1942 | *errp = -EIO; | ||
1943 | out: | ||
1944 | if (fatal) { | ||
1945 | *errp = fatal; | ||
1946 | ext4_std_error(sb, fatal); | ||
1947 | } | ||
1948 | /* | ||
1949 | * Undo the block allocation | ||
1950 | */ | ||
1951 | if (!performed_allocation) | ||
1952 | DQUOT_FREE_BLOCK(inode, *count); | ||
1953 | brelse(bitmap_bh); | ||
1954 | return 0; | ||
1955 | } | ||
1956 | |||
1957 | #define EXT4_META_BLOCK 0x1 | 684 | #define EXT4_META_BLOCK 0x1 |
1958 | 685 | ||
1959 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | 686 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, |
@@ -1963,10 +690,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | |||
1963 | struct ext4_allocation_request ar; | 690 | struct ext4_allocation_request ar; |
1964 | ext4_fsblk_t ret; | 691 | ext4_fsblk_t ret; |
1965 | 692 | ||
1966 | if (!test_opt(inode->i_sb, MBALLOC)) { | ||
1967 | return ext4_old_new_blocks(handle, inode, goal, count, errp); | ||
1968 | } | ||
1969 | |||
1970 | memset(&ar, 0, sizeof(ar)); | 693 | memset(&ar, 0, sizeof(ar)); |
1971 | /* Fill with neighbour allocated blocks */ | 694 | /* Fill with neighbour allocated blocks */ |
1972 | 695 | ||
@@ -2008,7 +731,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
2008 | /* | 731 | /* |
2009 | * Account for the allocated meta blocks | 732 | * Account for the allocated meta blocks |
2010 | */ | 733 | */ |
2011 | if (!(*errp)) { | 734 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { |
2012 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 735 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
2013 | EXT4_I(inode)->i_allocated_meta_blocks += *count; | 736 | EXT4_I(inode)->i_allocated_meta_blocks += *count; |
2014 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 737 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2093,10 +816,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
2093 | bitmap_count += x; | 816 | bitmap_count += x; |
2094 | } | 817 | } |
2095 | brelse(bitmap_bh); | 818 | brelse(bitmap_bh); |
2096 | printk("ext4_count_free_blocks: stored = %llu" | 819 | printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" |
2097 | ", computed = %llu, %llu\n", | 820 | ", computed = %llu, %llu\n", ext4_free_blocks_count(es), |
2098 | ext4_free_blocks_count(es), | 821 | desc_count, bitmap_count); |
2099 | desc_count, bitmap_count); | ||
2100 | return bitmap_count; | 822 | return bitmap_count; |
2101 | #else | 823 | #else |
2102 | desc_count = 0; | 824 | desc_count = 0; |
@@ -2183,8 +905,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
2183 | 905 | ||
2184 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || | 906 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || |
2185 | metagroup < first_meta_bg) | 907 | metagroup < first_meta_bg) |
2186 | return ext4_bg_num_gdb_nometa(sb,group); | 908 | return ext4_bg_num_gdb_nometa(sb, group); |
2187 | 909 | ||
2188 | return ext4_bg_num_gdb_meta(sb,group); | 910 | return ext4_bg_num_gdb_meta(sb,group); |
2189 | 911 | ||
2190 | } | 912 | } |
913 | |||
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index d37ea6750454..0a7a6663c190 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -15,17 +15,17 @@ | |||
15 | 15 | ||
16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; | 16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; |
17 | 17 | ||
18 | unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) | 18 | unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) |
19 | { | 19 | { |
20 | unsigned int i; | 20 | unsigned int i; |
21 | unsigned long sum = 0; | 21 | unsigned long sum = 0; |
22 | 22 | ||
23 | if (!map) | 23 | if (!map) |
24 | return (0); | 24 | return 0; |
25 | for (i = 0; i < numchars; i++) | 25 | for (i = 0; i < numchars; i++) |
26 | sum += nibblemap[map->b_data[i] & 0xf] + | 26 | sum += nibblemap[map->b_data[i] & 0xf] + |
27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; | 27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; |
28 | return (sum); | 28 | return sum; |
29 | } | 29 | } |
30 | 30 | ||
31 | #endif /* EXT4FS_DEBUG */ | 31 | #endif /* EXT4FS_DEBUG */ |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ec8e33b45219..3ca6a2b7632d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = { | |||
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int ext4_readdir(struct file *, void *, filldir_t); | 35 | static int ext4_readdir(struct file *, void *, filldir_t); |
36 | static int ext4_dx_readdir(struct file * filp, | 36 | static int ext4_dx_readdir(struct file *filp, |
37 | void * dirent, filldir_t filldir); | 37 | void *dirent, filldir_t filldir); |
38 | static int ext4_release_dir (struct inode * inode, | 38 | static int ext4_release_dir(struct inode *inode, |
39 | struct file * filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = generic_file_llseek, |
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | 63 | ||
64 | int ext4_check_dir_entry (const char * function, struct inode * dir, | 64 | int ext4_check_dir_entry(const char *function, struct inode *dir, |
65 | struct ext4_dir_entry_2 * de, | 65 | struct ext4_dir_entry_2 *de, |
66 | struct buffer_head * bh, | 66 | struct buffer_head *bh, |
67 | unsigned long offset) | 67 | unsigned long offset) |
68 | { | 68 | { |
69 | const char * error_msg = NULL; | 69 | const char *error_msg = NULL; |
70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); | 70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); |
71 | 71 | ||
72 | if (rlen < EXT4_DIR_REC_LEN(1)) | 72 | if (rlen < EXT4_DIR_REC_LEN(1)) |
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
82 | error_msg = "inode out of bounds"; | 82 | error_msg = "inode out of bounds"; |
83 | 83 | ||
84 | if (error_msg != NULL) | 84 | if (error_msg != NULL) |
85 | ext4_error (dir->i_sb, function, | 85 | ext4_error(dir->i_sb, function, |
86 | "bad entry in directory #%lu: %s - " | 86 | "bad entry in directory #%lu: %s - " |
87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | 87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", |
88 | dir->i_ino, error_msg, offset, | 88 | dir->i_ino, error_msg, offset, |
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
91 | return error_msg == NULL ? 1 : 0; | 91 | return error_msg == NULL ? 1 : 0; |
92 | } | 92 | } |
93 | 93 | ||
94 | static int ext4_readdir(struct file * filp, | 94 | static int ext4_readdir(struct file *filp, |
95 | void * dirent, filldir_t filldir) | 95 | void *dirent, filldir_t filldir) |
96 | { | 96 | { |
97 | int error = 0; | 97 | int error = 0; |
98 | unsigned long offset; | 98 | unsigned long offset; |
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp, | |||
102 | int err; | 102 | int err; |
103 | struct inode *inode = filp->f_path.dentry->d_inode; | 103 | struct inode *inode = filp->f_path.dentry->d_inode; |
104 | int ret = 0; | 104 | int ret = 0; |
105 | int dir_has_error = 0; | ||
105 | 106 | ||
106 | sb = inode->i_sb; | 107 | sb = inode->i_sb; |
107 | 108 | ||
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp, | |||
148 | * of recovering data when there's a bad sector | 149 | * of recovering data when there's a bad sector |
149 | */ | 150 | */ |
150 | if (!bh) { | 151 | if (!bh) { |
151 | ext4_error (sb, "ext4_readdir", | 152 | if (!dir_has_error) { |
152 | "directory #%lu contains a hole at offset %lu", | 153 | ext4_error(sb, __func__, "directory #%lu " |
153 | inode->i_ino, (unsigned long)filp->f_pos); | 154 | "contains a hole at offset %Lu", |
155 | inode->i_ino, | ||
156 | (unsigned long long) filp->f_pos); | ||
157 | dir_has_error = 1; | ||
158 | } | ||
154 | /* corrupt size? Maybe no more blocks to read */ | 159 | /* corrupt size? Maybe no more blocks to read */ |
155 | if (filp->f_pos > inode->i_blocks << 9) | 160 | if (filp->f_pos > inode->i_blocks << 9) |
156 | break; | 161 | break; |
@@ -187,14 +192,14 @@ revalidate: | |||
187 | while (!error && filp->f_pos < inode->i_size | 192 | while (!error && filp->f_pos < inode->i_size |
188 | && offset < sb->s_blocksize) { | 193 | && offset < sb->s_blocksize) { |
189 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 194 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
190 | if (!ext4_check_dir_entry ("ext4_readdir", inode, de, | 195 | if (!ext4_check_dir_entry("ext4_readdir", inode, de, |
191 | bh, offset)) { | 196 | bh, offset)) { |
192 | /* | 197 | /* |
193 | * On error, skip the f_pos to the next block | 198 | * On error, skip the f_pos to the next block |
194 | */ | 199 | */ |
195 | filp->f_pos = (filp->f_pos | | 200 | filp->f_pos = (filp->f_pos | |
196 | (sb->s_blocksize - 1)) + 1; | 201 | (sb->s_blocksize - 1)) + 1; |
197 | brelse (bh); | 202 | brelse(bh); |
198 | ret = stored; | 203 | ret = stored; |
199 | goto out; | 204 | goto out; |
200 | } | 205 | } |
@@ -218,12 +223,12 @@ revalidate: | |||
218 | break; | 223 | break; |
219 | if (version != filp->f_version) | 224 | if (version != filp->f_version) |
220 | goto revalidate; | 225 | goto revalidate; |
221 | stored ++; | 226 | stored++; |
222 | } | 227 | } |
223 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); | 228 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); |
224 | } | 229 | } |
225 | offset = 0; | 230 | offset = 0; |
226 | brelse (bh); | 231 | brelse(bh); |
227 | } | 232 | } |
228 | out: | 233 | out: |
229 | return ret; | 234 | return ret; |
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
290 | parent = rb_parent(n); | 295 | parent = rb_parent(n); |
291 | fname = rb_entry(n, struct fname, rb_hash); | 296 | fname = rb_entry(n, struct fname, rb_hash); |
292 | while (fname) { | 297 | while (fname) { |
293 | struct fname * old = fname; | 298 | struct fname *old = fname; |
294 | fname = fname->next; | 299 | fname = fname->next; |
295 | kfree (old); | 300 | kfree(old); |
296 | } | 301 | } |
297 | if (!parent) | 302 | if (!parent) |
298 | root->rb_node = NULL; | 303 | root->rb_node = NULL; |
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
331 | struct ext4_dir_entry_2 *dirent) | 336 | struct ext4_dir_entry_2 *dirent) |
332 | { | 337 | { |
333 | struct rb_node **p, *parent = NULL; | 338 | struct rb_node **p, *parent = NULL; |
334 | struct fname * fname, *new_fn; | 339 | struct fname *fname, *new_fn; |
335 | struct dir_private_info *info; | 340 | struct dir_private_info *info; |
336 | int len; | 341 | int len; |
337 | 342 | ||
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
388 | * for all entres on the fname linked list. (Normally there is only | 393 | * for all entres on the fname linked list. (Normally there is only |
389 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 394 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
390 | */ | 395 | */ |
391 | static int call_filldir(struct file * filp, void * dirent, | 396 | static int call_filldir(struct file *filp, void *dirent, |
392 | filldir_t filldir, struct fname *fname) | 397 | filldir_t filldir, struct fname *fname) |
393 | { | 398 | { |
394 | struct dir_private_info *info = filp->private_data; | 399 | struct dir_private_info *info = filp->private_data; |
395 | loff_t curr_pos; | 400 | loff_t curr_pos; |
396 | struct inode *inode = filp->f_path.dentry->d_inode; | 401 | struct inode *inode = filp->f_path.dentry->d_inode; |
397 | struct super_block * sb; | 402 | struct super_block *sb; |
398 | int error; | 403 | int error; |
399 | 404 | ||
400 | sb = inode->i_sb; | 405 | sb = inode->i_sb; |
401 | 406 | ||
402 | if (!fname) { | 407 | if (!fname) { |
403 | printk("call_filldir: called with null fname?!?\n"); | 408 | printk(KERN_ERR "ext4: call_filldir: called with " |
409 | "null fname?!?\n"); | ||
404 | return 0; | 410 | return 0; |
405 | } | 411 | } |
406 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 412 | curr_pos = hash2pos(fname->hash, fname->minor_hash); |
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent, | |||
419 | return 0; | 425 | return 0; |
420 | } | 426 | } |
421 | 427 | ||
422 | static int ext4_dx_readdir(struct file * filp, | 428 | static int ext4_dx_readdir(struct file *filp, |
423 | void * dirent, filldir_t filldir) | 429 | void *dirent, filldir_t filldir) |
424 | { | 430 | { |
425 | struct dir_private_info *info = filp->private_data; | 431 | struct dir_private_info *info = filp->private_data; |
426 | struct inode *inode = filp->f_path.dentry->d_inode; | 432 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -511,7 +517,7 @@ finished: | |||
511 | return 0; | 517 | return 0; |
512 | } | 518 | } |
513 | 519 | ||
514 | static int ext4_release_dir (struct inode * inode, struct file * filp) | 520 | static int ext4_release_dir(struct inode *inode, struct file *filp) |
515 | { | 521 | { |
516 | if (filp->private_data) | 522 | if (filp->private_data) |
517 | ext4_htree_free_dir_info(filp->private_data); | 523 | ext4_htree_free_dir_info(filp->private_data); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 295003241d3d..f46a513a5157 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -44,9 +44,9 @@ | |||
44 | #ifdef EXT4FS_DEBUG | 44 | #ifdef EXT4FS_DEBUG |
45 | #define ext4_debug(f, a...) \ | 45 | #define ext4_debug(f, a...) \ |
46 | do { \ | 46 | do { \ |
47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 47 | printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
48 | __FILE__, __LINE__, __func__); \ | 48 | __FILE__, __LINE__, __func__); \ |
49 | printk (KERN_DEBUG f, ## a); \ | 49 | printk(KERN_DEBUG f, ## a); \ |
50 | } while (0) | 50 | } while (0) |
51 | #else | 51 | #else |
52 | #define ext4_debug(f, a...) do {} while (0) | 52 | #define ext4_debug(f, a...) do {} while (0) |
@@ -128,7 +128,7 @@ struct ext4_allocation_request { | |||
128 | #else | 128 | #else |
129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
130 | #endif | 130 | #endif |
131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) | 131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
132 | #ifdef __KERNEL__ | 132 | #ifdef __KERNEL__ |
133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
134 | #else | 134 | #else |
@@ -245,7 +245,7 @@ struct flex_groups { | |||
245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
246 | 246 | ||
247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
248 | #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 248 | #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Inode dynamic state flags | 251 | * Inode dynamic state flags |
@@ -291,8 +291,6 @@ struct ext4_new_group_data { | |||
291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS | 291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS |
292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) | 292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) |
293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
294 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | ||
295 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) | ||
296 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 294 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
297 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 295 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
298 | #ifdef CONFIG_JBD2_DEBUG | 296 | #ifdef CONFIG_JBD2_DEBUG |
@@ -300,7 +298,10 @@ struct ext4_new_group_data { | |||
300 | #endif | 298 | #endif |
301 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 299 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
302 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 300 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
303 | #define EXT4_IOC_MIGRATE _IO('f', 7) | 301 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
302 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) | ||
303 | #define EXT4_IOC_MIGRATE _IO('f', 9) | ||
304 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | ||
304 | 305 | ||
305 | /* | 306 | /* |
306 | * ioctl commands in 32 bit emulation | 307 | * ioctl commands in 32 bit emulation |
@@ -538,7 +539,6 @@ do { \ | |||
538 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 539 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 540 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 541 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
541 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ | ||
542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
544 | #ifndef _LINUX_EXT2_FS_H | 544 | #ifndef _LINUX_EXT2_FS_H |
@@ -667,7 +667,7 @@ struct ext4_super_block { | |||
667 | }; | 667 | }; |
668 | 668 | ||
669 | #ifdef __KERNEL__ | 669 | #ifdef __KERNEL__ |
670 | static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) | 670 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
671 | { | 671 | { |
672 | return sb->s_fs_info; | 672 | return sb->s_fs_info; |
673 | } | 673 | } |
@@ -725,11 +725,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
725 | */ | 725 | */ |
726 | 726 | ||
727 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ | 727 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ |
728 | ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) | 728 | (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) |
729 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ | 729 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ |
730 | ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) | 730 | (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) |
731 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ | 731 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ |
732 | ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) | 732 | (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) |
733 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ | 733 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ |
734 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) | 734 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) |
735 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ | 735 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ |
@@ -789,6 +789,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
789 | #define EXT4_DEF_RESUID 0 | 789 | #define EXT4_DEF_RESUID 0 |
790 | #define EXT4_DEF_RESGID 0 | 790 | #define EXT4_DEF_RESGID 0 |
791 | 791 | ||
792 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 | ||
793 | |||
792 | /* | 794 | /* |
793 | * Default mount options | 795 | * Default mount options |
794 | */ | 796 | */ |
@@ -954,6 +956,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
954 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 956 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
955 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); | 957 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); |
956 | 958 | ||
959 | extern struct proc_dir_entry *ext4_proc_root; | ||
960 | |||
961 | #ifdef CONFIG_PROC_FS | ||
962 | extern const struct file_operations ext4_ui_proc_fops; | ||
963 | |||
964 | #define EXT4_PROC_HANDLER(name, var) \ | ||
965 | do { \ | ||
966 | proc = proc_create_data(name, mode, sbi->s_proc, \ | ||
967 | &ext4_ui_proc_fops, &sbi->s_##var); \ | ||
968 | if (proc == NULL) { \ | ||
969 | printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ | ||
970 | goto err_out; \ | ||
971 | } \ | ||
972 | } while (0) | ||
973 | #else | ||
974 | #define EXT4_PROC_HANDLER(name, var) | ||
975 | #endif | ||
976 | |||
957 | /* | 977 | /* |
958 | * Function prototypes | 978 | * Function prototypes |
959 | */ | 979 | */ |
@@ -981,23 +1001,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
981 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1001 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, |
982 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 1002 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
983 | unsigned long *count, int *errp); | 1003 | unsigned long *count, int *errp); |
984 | extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | 1004 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
985 | ext4_fsblk_t goal, unsigned long *count, int *errp); | ||
986 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 1005 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
987 | ext4_fsblk_t nblocks); | 1006 | s64 nblocks); |
988 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | 1007 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
989 | ext4_fsblk_t block, unsigned long count, int metadata); | 1008 | ext4_fsblk_t block, unsigned long count, int metadata); |
990 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | 1009 | extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
991 | ext4_fsblk_t block, unsigned long count, | 1010 | ext4_fsblk_t block, unsigned long count, |
992 | unsigned long *pdquot_freed_blocks); | 1011 | unsigned long *pdquot_freed_blocks); |
993 | extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); | 1012 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
994 | extern void ext4_check_blocks_bitmap (struct super_block *); | 1013 | extern void ext4_check_blocks_bitmap(struct super_block *); |
995 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1014 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
996 | ext4_group_t block_group, | 1015 | ext4_group_t block_group, |
997 | struct buffer_head ** bh); | 1016 | struct buffer_head ** bh); |
998 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1017 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
999 | extern void ext4_init_block_alloc_info(struct inode *); | ||
1000 | extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); | ||
1001 | 1018 | ||
1002 | /* dir.c */ | 1019 | /* dir.c */ |
1003 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1020 | extern int ext4_check_dir_entry(const char *, struct inode *, |
@@ -1009,20 +1026,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
1009 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 1026 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
1010 | 1027 | ||
1011 | /* fsync.c */ | 1028 | /* fsync.c */ |
1012 | extern int ext4_sync_file (struct file *, struct dentry *, int); | 1029 | extern int ext4_sync_file(struct file *, struct dentry *, int); |
1013 | 1030 | ||
1014 | /* hash.c */ | 1031 | /* hash.c */ |
1015 | extern int ext4fs_dirhash(const char *name, int len, struct | 1032 | extern int ext4fs_dirhash(const char *name, int len, struct |
1016 | dx_hash_info *hinfo); | 1033 | dx_hash_info *hinfo); |
1017 | 1034 | ||
1018 | /* ialloc.c */ | 1035 | /* ialloc.c */ |
1019 | extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); | 1036 | extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); |
1020 | extern void ext4_free_inode (handle_t *, struct inode *); | 1037 | extern void ext4_free_inode(handle_t *, struct inode *); |
1021 | extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); | 1038 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
1022 | extern unsigned long ext4_count_free_inodes (struct super_block *); | 1039 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1023 | extern unsigned long ext4_count_dirs (struct super_block *); | 1040 | extern unsigned long ext4_count_dirs(struct super_block *); |
1024 | extern void ext4_check_inodes_bitmap (struct super_block *); | 1041 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1025 | extern unsigned long ext4_count_free (struct buffer_head *, unsigned); | 1042 | extern unsigned long ext4_count_free(struct buffer_head *, unsigned); |
1026 | 1043 | ||
1027 | /* mballoc.c */ | 1044 | /* mballoc.c */ |
1028 | extern long ext4_mb_stats; | 1045 | extern long ext4_mb_stats; |
@@ -1032,7 +1049,7 @@ extern int ext4_mb_release(struct super_block *); | |||
1032 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | 1049 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, |
1033 | struct ext4_allocation_request *, int *); | 1050 | struct ext4_allocation_request *, int *); |
1034 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1051 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1035 | extern void ext4_mb_discard_inode_preallocations(struct inode *); | 1052 | extern void ext4_discard_preallocations(struct inode *); |
1036 | extern int __init init_ext4_mballoc(void); | 1053 | extern int __init init_ext4_mballoc(void); |
1037 | extern void exit_ext4_mballoc(void); | 1054 | extern void exit_ext4_mballoc(void); |
1038 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1055 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
@@ -1050,24 +1067,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, | |||
1050 | ext4_lblk_t, int, int *); | 1067 | ext4_lblk_t, int, int *); |
1051 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | 1068 | struct buffer_head *ext4_bread(handle_t *, struct inode *, |
1052 | ext4_lblk_t, int, int *); | 1069 | ext4_lblk_t, int, int *); |
1070 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1071 | struct buffer_head *bh_result, int create); | ||
1053 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 1072 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, |
1054 | ext4_lblk_t iblock, unsigned long maxblocks, | 1073 | ext4_lblk_t iblock, unsigned long maxblocks, |
1055 | struct buffer_head *bh_result, | 1074 | struct buffer_head *bh_result, |
1056 | int create, int extend_disksize); | 1075 | int create, int extend_disksize); |
1057 | 1076 | ||
1058 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1077 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
1059 | extern int ext4_write_inode (struct inode *, int); | 1078 | extern int ext4_write_inode(struct inode *, int); |
1060 | extern int ext4_setattr (struct dentry *, struct iattr *); | 1079 | extern int ext4_setattr(struct dentry *, struct iattr *); |
1061 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1080 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1062 | struct kstat *stat); | 1081 | struct kstat *stat); |
1063 | extern void ext4_delete_inode (struct inode *); | 1082 | extern void ext4_delete_inode(struct inode *); |
1064 | extern int ext4_sync_inode (handle_t *, struct inode *); | 1083 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1065 | extern void ext4_discard_reservation (struct inode *); | ||
1066 | extern void ext4_dirty_inode(struct inode *); | 1084 | extern void ext4_dirty_inode(struct inode *); |
1067 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1085 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
1068 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1086 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1069 | extern int ext4_can_truncate(struct inode *inode); | 1087 | extern int ext4_can_truncate(struct inode *inode); |
1070 | extern void ext4_truncate (struct inode *); | 1088 | extern void ext4_truncate(struct inode *); |
1071 | extern void ext4_set_inode_flags(struct inode *); | 1089 | extern void ext4_set_inode_flags(struct inode *); |
1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1090 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1073 | extern void ext4_set_aops(struct inode *inode); | 1091 | extern void ext4_set_aops(struct inode *inode); |
@@ -1080,11 +1098,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | |||
1080 | 1098 | ||
1081 | /* ioctl.c */ | 1099 | /* ioctl.c */ |
1082 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1100 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
1083 | extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); | 1101 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
1084 | 1102 | ||
1085 | /* migrate.c */ | 1103 | /* migrate.c */ |
1086 | extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, | 1104 | extern int ext4_ext_migrate(struct inode *); |
1087 | unsigned long); | ||
1088 | /* namei.c */ | 1105 | /* namei.c */ |
1089 | extern int ext4_orphan_add(handle_t *, struct inode *); | 1106 | extern int ext4_orphan_add(handle_t *, struct inode *); |
1090 | extern int ext4_orphan_del(handle_t *, struct inode *); | 1107 | extern int ext4_orphan_del(handle_t *, struct inode *); |
@@ -1099,14 +1116,14 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1099 | ext4_fsblk_t n_blocks_count); | 1116 | ext4_fsblk_t n_blocks_count); |
1100 | 1117 | ||
1101 | /* super.c */ | 1118 | /* super.c */ |
1102 | extern void ext4_error (struct super_block *, const char *, const char *, ...) | 1119 | extern void ext4_error(struct super_block *, const char *, const char *, ...) |
1103 | __attribute__ ((format (printf, 3, 4))); | 1120 | __attribute__ ((format (printf, 3, 4))); |
1104 | extern void __ext4_std_error (struct super_block *, const char *, int); | 1121 | extern void __ext4_std_error(struct super_block *, const char *, int); |
1105 | extern void ext4_abort (struct super_block *, const char *, const char *, ...) | 1122 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) |
1106 | __attribute__ ((format (printf, 3, 4))); | 1123 | __attribute__ ((format (printf, 3, 4))); |
1107 | extern void ext4_warning (struct super_block *, const char *, const char *, ...) | 1124 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
1108 | __attribute__ ((format (printf, 3, 4))); | 1125 | __attribute__ ((format (printf, 3, 4))); |
1109 | extern void ext4_update_dynamic_rev (struct super_block *sb); | 1126 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
1110 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1127 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
1111 | __u32 compat); | 1128 | __u32 compat); |
1112 | extern int ext4_update_rocompat_feature(handle_t *handle, | 1129 | extern int ext4_update_rocompat_feature(handle_t *handle, |
@@ -1179,7 +1196,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | |||
1179 | 1196 | ||
1180 | static inline | 1197 | static inline |
1181 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | 1198 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, |
1182 | ext4_group_t group) | 1199 | ext4_group_t group) |
1183 | { | 1200 | { |
1184 | struct ext4_group_info ***grp_info; | 1201 | struct ext4_group_info ***grp_info; |
1185 | long indexv, indexh; | 1202 | long indexv, indexh; |
@@ -1207,6 +1224,28 @@ do { \ | |||
1207 | __ext4_std_error((sb), __func__, (errno)); \ | 1224 | __ext4_std_error((sb), __func__, (errno)); \ |
1208 | } while (0) | 1225 | } while (0) |
1209 | 1226 | ||
1227 | #ifdef CONFIG_SMP | ||
1228 | /* Each CPU can accumulate FBC_BATCH blocks in their local | ||
1229 | * counters. So we need to make sure we have free blocks more | ||
1230 | * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. | ||
1231 | */ | ||
1232 | #define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) | ||
1233 | #else | ||
1234 | #define EXT4_FREEBLOCKS_WATERMARK 0 | ||
1235 | #endif | ||
1236 | |||
1237 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | ||
1238 | { | ||
1239 | /* | ||
1240 | * XXX: replace with spinlock if seen contended -bzzz | ||
1241 | */ | ||
1242 | down_write(&EXT4_I(inode)->i_data_sem); | ||
1243 | if (newsize > EXT4_I(inode)->i_disksize) | ||
1244 | EXT4_I(inode)->i_disksize = newsize; | ||
1245 | up_write(&EXT4_I(inode)->i_data_sem); | ||
1246 | return ; | ||
1247 | } | ||
1248 | |||
1210 | /* | 1249 | /* |
1211 | * Inodes and files operations | 1250 | * Inodes and files operations |
1212 | */ | 1251 | */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index d33dc56d6986..bec7ce59fc0d 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -124,6 +124,19 @@ struct ext4_ext_path { | |||
124 | #define EXT4_EXT_CACHE_GAP 1 | 124 | #define EXT4_EXT_CACHE_GAP 1 |
125 | #define EXT4_EXT_CACHE_EXTENT 2 | 125 | #define EXT4_EXT_CACHE_EXTENT 2 |
126 | 126 | ||
127 | /* | ||
128 | * to be called by ext4_ext_walk_space() | ||
129 | * negative retcode - error | ||
130 | * positive retcode - signal for ext4_ext_walk_space(), see below | ||
131 | * callback must return valid extent (passed or newly created) | ||
132 | */ | ||
133 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | ||
134 | struct ext4_ext_cache *, | ||
135 | struct ext4_extent *, void *); | ||
136 | |||
137 | #define EXT_CONTINUE 0 | ||
138 | #define EXT_BREAK 1 | ||
139 | #define EXT_REPEAT 2 | ||
127 | 140 | ||
128 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
129 | 142 | ||
@@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode, | |||
224 | struct ext4_extent *); | 237 | struct ext4_extent *); |
225 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | 238 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); |
226 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); | 239 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); |
240 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
241 | ext_prepare_callback, void *); | ||
227 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 242 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
228 | struct ext4_ext_path *); | 243 | struct ext4_ext_path *); |
229 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | 244 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index ef7409f0e7e4..5c124c0ac6d3 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t; | |||
33 | /* data type for block group number */ | 33 | /* data type for block group number */ |
34 | typedef unsigned long ext4_group_t; | 34 | typedef unsigned long ext4_group_t; |
35 | 35 | ||
36 | struct ext4_reserve_window { | ||
37 | ext4_fsblk_t _rsv_start; /* First byte reserved */ | ||
38 | ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ | ||
39 | }; | ||
40 | |||
41 | struct ext4_reserve_window_node { | ||
42 | struct rb_node rsv_node; | ||
43 | __u32 rsv_goal_size; | ||
44 | __u32 rsv_alloc_hit; | ||
45 | struct ext4_reserve_window rsv_window; | ||
46 | }; | ||
47 | |||
48 | struct ext4_block_alloc_info { | ||
49 | /* information about reservation window */ | ||
50 | struct ext4_reserve_window_node rsv_window_node; | ||
51 | /* | ||
52 | * was i_next_alloc_block in ext4_inode_info | ||
53 | * is the logical (file-relative) number of the | ||
54 | * most-recently-allocated block in this file. | ||
55 | * We use this for detecting linearly ascending allocation requests. | ||
56 | */ | ||
57 | ext4_lblk_t last_alloc_logical_block; | ||
58 | /* | ||
59 | * Was i_next_alloc_goal in ext4_inode_info | ||
60 | * is the *physical* companion to i_next_alloc_block. | ||
61 | * it the physical block number of the block which was most-recentl | ||
62 | * allocated to this file. This give us the goal (target) for the next | ||
63 | * allocation when we detect linearly ascending requests. | ||
64 | */ | ||
65 | ext4_fsblk_t last_alloc_physical_block; | ||
66 | }; | ||
67 | |||
68 | #define rsv_start rsv_window._rsv_start | 36 | #define rsv_start rsv_window._rsv_start |
69 | #define rsv_end rsv_window._rsv_end | 37 | #define rsv_end rsv_window._rsv_end |
70 | 38 | ||
@@ -97,11 +65,8 @@ struct ext4_inode_info { | |||
97 | ext4_group_t i_block_group; | 65 | ext4_group_t i_block_group; |
98 | __u32 i_state; /* Dynamic state flags for ext4 */ | 66 | __u32 i_state; /* Dynamic state flags for ext4 */ |
99 | 67 | ||
100 | /* block reservation info */ | ||
101 | struct ext4_block_alloc_info *i_block_alloc_info; | ||
102 | |||
103 | ext4_lblk_t i_dir_start_lookup; | 68 | ext4_lblk_t i_dir_start_lookup; |
104 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 69 | #ifdef CONFIG_EXT4_FS_XATTR |
105 | /* | 70 | /* |
106 | * Extended attributes can be read independently of the main file | 71 | * Extended attributes can be read independently of the main file |
107 | * data. Taking i_mutex even when reading would cause contention | 72 | * data. Taking i_mutex even when reading would cause contention |
@@ -111,7 +76,7 @@ struct ext4_inode_info { | |||
111 | */ | 76 | */ |
112 | struct rw_semaphore xattr_sem; | 77 | struct rw_semaphore xattr_sem; |
113 | #endif | 78 | #endif |
114 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 79 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
115 | struct posix_acl *i_acl; | 80 | struct posix_acl *i_acl; |
116 | struct posix_acl *i_default_acl; | 81 | struct posix_acl *i_default_acl; |
117 | #endif | 82 | #endif |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6300226d5531..6a0b40d43264 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -40,8 +40,8 @@ struct ext4_sb_info { | |||
40 | unsigned long s_blocks_last; /* Last seen block count */ | 40 | unsigned long s_blocks_last; /* Last seen block count */ |
41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
43 | struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ | 43 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
44 | struct buffer_head ** s_group_desc; | 44 | struct buffer_head **s_group_desc; |
45 | unsigned long s_mount_opt; | 45 | unsigned long s_mount_opt; |
46 | ext4_fsblk_t s_sb_block; | 46 | ext4_fsblk_t s_sb_block; |
47 | uid_t s_resuid; | 47 | uid_t s_resuid; |
@@ -52,6 +52,7 @@ struct ext4_sb_info { | |||
52 | int s_desc_per_block_bits; | 52 | int s_desc_per_block_bits; |
53 | int s_inode_size; | 53 | int s_inode_size; |
54 | int s_first_ino; | 54 | int s_first_ino; |
55 | unsigned int s_inode_readahead_blks; | ||
55 | spinlock_t s_next_gen_lock; | 56 | spinlock_t s_next_gen_lock; |
56 | u32 s_next_generation; | 57 | u32 s_next_generation; |
57 | u32 s_hash_seed[4]; | 58 | u32 s_hash_seed[4]; |
@@ -59,16 +60,17 @@ struct ext4_sb_info { | |||
59 | struct percpu_counter s_freeblocks_counter; | 60 | struct percpu_counter s_freeblocks_counter; |
60 | struct percpu_counter s_freeinodes_counter; | 61 | struct percpu_counter s_freeinodes_counter; |
61 | struct percpu_counter s_dirs_counter; | 62 | struct percpu_counter s_dirs_counter; |
63 | struct percpu_counter s_dirtyblocks_counter; | ||
62 | struct blockgroup_lock s_blockgroup_lock; | 64 | struct blockgroup_lock s_blockgroup_lock; |
65 | struct proc_dir_entry *s_proc; | ||
63 | 66 | ||
64 | /* root of the per fs reservation window tree */ | 67 | /* root of the per fs reservation window tree */ |
65 | spinlock_t s_rsv_window_lock; | 68 | spinlock_t s_rsv_window_lock; |
66 | struct rb_root s_rsv_window_root; | 69 | struct rb_root s_rsv_window_root; |
67 | struct ext4_reserve_window_node s_rsv_window_head; | ||
68 | 70 | ||
69 | /* Journaling */ | 71 | /* Journaling */ |
70 | struct inode * s_journal_inode; | 72 | struct inode *s_journal_inode; |
71 | struct journal_s * s_journal; | 73 | struct journal_s *s_journal; |
72 | struct list_head s_orphan; | 74 | struct list_head s_orphan; |
73 | unsigned long s_commit_interval; | 75 | unsigned long s_commit_interval; |
74 | struct block_device *journal_bdev; | 76 | struct block_device *journal_bdev; |
@@ -106,12 +108,12 @@ struct ext4_sb_info { | |||
106 | 108 | ||
107 | /* tunables */ | 109 | /* tunables */ |
108 | unsigned long s_stripe; | 110 | unsigned long s_stripe; |
109 | unsigned long s_mb_stream_request; | 111 | unsigned int s_mb_stream_request; |
110 | unsigned long s_mb_max_to_scan; | 112 | unsigned int s_mb_max_to_scan; |
111 | unsigned long s_mb_min_to_scan; | 113 | unsigned int s_mb_min_to_scan; |
112 | unsigned long s_mb_stats; | 114 | unsigned int s_mb_stats; |
113 | unsigned long s_mb_order2_reqs; | 115 | unsigned int s_mb_order2_reqs; |
114 | unsigned long s_mb_group_prealloc; | 116 | unsigned int s_mb_group_prealloc; |
115 | /* where last allocation was done - for stream allocation */ | 117 | /* where last allocation was done - for stream allocation */ |
116 | unsigned long s_mb_last_group; | 118 | unsigned long s_mb_last_group; |
117 | unsigned long s_mb_last_start; | 119 | unsigned long s_mb_last_start; |
@@ -121,7 +123,6 @@ struct ext4_sb_info { | |||
121 | int s_mb_history_cur; | 123 | int s_mb_history_cur; |
122 | int s_mb_history_max; | 124 | int s_mb_history_max; |
123 | int s_mb_history_num; | 125 | int s_mb_history_num; |
124 | struct proc_dir_entry *s_mb_proc; | ||
125 | spinlock_t s_mb_history_lock; | 126 | spinlock_t s_mb_history_lock; |
126 | int s_mb_history_filter; | 127 | int s_mb_history_filter; |
127 | 128 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b24d3c53f20c..ea2ce3c0ae66 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/falloc.h> | 41 | #include <linux/falloc.h> |
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | #include <linux/fiemap.h> | ||
43 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
44 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
45 | 46 | ||
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
383 | ext_debug("\n"); | 384 | ext_debug("\n"); |
384 | } | 385 | } |
385 | #else | 386 | #else |
386 | #define ext4_ext_show_path(inode,path) | 387 | #define ext4_ext_show_path(inode, path) |
387 | #define ext4_ext_show_leaf(inode,path) | 388 | #define ext4_ext_show_leaf(inode, path) |
388 | #endif | 389 | #endif |
389 | 390 | ||
390 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 391 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
440 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { | 441 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { |
441 | if (k != 0 && | 442 | if (k != 0 && |
442 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { | 443 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { |
443 | printk("k=%d, ix=0x%p, first=0x%p\n", k, | 444 | printk(KERN_DEBUG "k=%d, ix=0x%p, " |
444 | ix, EXT_FIRST_INDEX(eh)); | 445 | "first=0x%p\n", k, |
445 | printk("%u <= %u\n", | 446 | ix, EXT_FIRST_INDEX(eh)); |
447 | printk(KERN_DEBUG "%u <= %u\n", | ||
446 | le32_to_cpu(ix->ei_block), | 448 | le32_to_cpu(ix->ei_block), |
447 | le32_to_cpu(ix[-1].ei_block)); | 449 | le32_to_cpu(ix[-1].ei_block)); |
448 | } | 450 | } |
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1475 | struct ext4_ext_path *path, | 1477 | struct ext4_ext_path *path, |
1476 | struct ext4_extent *newext) | 1478 | struct ext4_extent *newext) |
1477 | { | 1479 | { |
1478 | struct ext4_extent_header * eh; | 1480 | struct ext4_extent_header *eh; |
1479 | struct ext4_extent *ex, *fex; | 1481 | struct ext4_extent *ex, *fex; |
1480 | struct ext4_extent *nearex; /* nearest extent */ | 1482 | struct ext4_extent *nearex; /* nearest extent */ |
1481 | struct ext4_ext_path *npath = NULL; | 1483 | struct ext4_ext_path *npath = NULL; |
@@ -1625,6 +1627,113 @@ cleanup: | |||
1625 | return err; | 1627 | return err; |
1626 | } | 1628 | } |
1627 | 1629 | ||
1630 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | ||
1631 | ext4_lblk_t num, ext_prepare_callback func, | ||
1632 | void *cbdata) | ||
1633 | { | ||
1634 | struct ext4_ext_path *path = NULL; | ||
1635 | struct ext4_ext_cache cbex; | ||
1636 | struct ext4_extent *ex; | ||
1637 | ext4_lblk_t next, start = 0, end = 0; | ||
1638 | ext4_lblk_t last = block + num; | ||
1639 | int depth, exists, err = 0; | ||
1640 | |||
1641 | BUG_ON(func == NULL); | ||
1642 | BUG_ON(inode == NULL); | ||
1643 | |||
1644 | while (block < last && block != EXT_MAX_BLOCK) { | ||
1645 | num = last - block; | ||
1646 | /* find extent for this block */ | ||
1647 | path = ext4_ext_find_extent(inode, block, path); | ||
1648 | if (IS_ERR(path)) { | ||
1649 | err = PTR_ERR(path); | ||
1650 | path = NULL; | ||
1651 | break; | ||
1652 | } | ||
1653 | |||
1654 | depth = ext_depth(inode); | ||
1655 | BUG_ON(path[depth].p_hdr == NULL); | ||
1656 | ex = path[depth].p_ext; | ||
1657 | next = ext4_ext_next_allocated_block(path); | ||
1658 | |||
1659 | exists = 0; | ||
1660 | if (!ex) { | ||
1661 | /* there is no extent yet, so try to allocate | ||
1662 | * all requested space */ | ||
1663 | start = block; | ||
1664 | end = block + num; | ||
1665 | } else if (le32_to_cpu(ex->ee_block) > block) { | ||
1666 | /* need to allocate space before found extent */ | ||
1667 | start = block; | ||
1668 | end = le32_to_cpu(ex->ee_block); | ||
1669 | if (block + num < end) | ||
1670 | end = block + num; | ||
1671 | } else if (block >= le32_to_cpu(ex->ee_block) | ||
1672 | + ext4_ext_get_actual_len(ex)) { | ||
1673 | /* need to allocate space after found extent */ | ||
1674 | start = block; | ||
1675 | end = block + num; | ||
1676 | if (end >= next) | ||
1677 | end = next; | ||
1678 | } else if (block >= le32_to_cpu(ex->ee_block)) { | ||
1679 | /* | ||
1680 | * some part of requested space is covered | ||
1681 | * by found extent | ||
1682 | */ | ||
1683 | start = block; | ||
1684 | end = le32_to_cpu(ex->ee_block) | ||
1685 | + ext4_ext_get_actual_len(ex); | ||
1686 | if (block + num < end) | ||
1687 | end = block + num; | ||
1688 | exists = 1; | ||
1689 | } else { | ||
1690 | BUG(); | ||
1691 | } | ||
1692 | BUG_ON(end <= start); | ||
1693 | |||
1694 | if (!exists) { | ||
1695 | cbex.ec_block = start; | ||
1696 | cbex.ec_len = end - start; | ||
1697 | cbex.ec_start = 0; | ||
1698 | cbex.ec_type = EXT4_EXT_CACHE_GAP; | ||
1699 | } else { | ||
1700 | cbex.ec_block = le32_to_cpu(ex->ee_block); | ||
1701 | cbex.ec_len = ext4_ext_get_actual_len(ex); | ||
1702 | cbex.ec_start = ext_pblock(ex); | ||
1703 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | ||
1704 | } | ||
1705 | |||
1706 | BUG_ON(cbex.ec_len == 0); | ||
1707 | err = func(inode, path, &cbex, ex, cbdata); | ||
1708 | ext4_ext_drop_refs(path); | ||
1709 | |||
1710 | if (err < 0) | ||
1711 | break; | ||
1712 | |||
1713 | if (err == EXT_REPEAT) | ||
1714 | continue; | ||
1715 | else if (err == EXT_BREAK) { | ||
1716 | err = 0; | ||
1717 | break; | ||
1718 | } | ||
1719 | |||
1720 | if (ext_depth(inode) != depth) { | ||
1721 | /* depth was changed. we have to realloc path */ | ||
1722 | kfree(path); | ||
1723 | path = NULL; | ||
1724 | } | ||
1725 | |||
1726 | block = cbex.ec_block + cbex.ec_len; | ||
1727 | } | ||
1728 | |||
1729 | if (path) { | ||
1730 | ext4_ext_drop_refs(path); | ||
1731 | kfree(path); | ||
1732 | } | ||
1733 | |||
1734 | return err; | ||
1735 | } | ||
1736 | |||
1628 | static void | 1737 | static void |
1629 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | 1738 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, |
1630 | __u32 len, ext4_fsblk_t start, int type) | 1739 | __u32 len, ext4_fsblk_t start, int type) |
@@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb) | |||
2142 | */ | 2251 | */ |
2143 | 2252 | ||
2144 | if (test_opt(sb, EXTENTS)) { | 2253 | if (test_opt(sb, EXTENTS)) { |
2145 | printk("EXT4-fs: file extents enabled"); | 2254 | printk(KERN_INFO "EXT4-fs: file extents enabled"); |
2146 | #ifdef AGGRESSIVE_TEST | 2255 | #ifdef AGGRESSIVE_TEST |
2147 | printk(", aggressive tests"); | 2256 | printk(", aggressive tests"); |
2148 | #endif | 2257 | #endif |
@@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2696 | goto out2; | 2805 | goto out2; |
2697 | } | 2806 | } |
2698 | /* | 2807 | /* |
2699 | * Okay, we need to do block allocation. Lazily initialize the block | 2808 | * Okay, we need to do block allocation. |
2700 | * allocation info here if necessary. | ||
2701 | */ | 2809 | */ |
2702 | if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) | ||
2703 | ext4_init_block_alloc_info(inode); | ||
2704 | 2810 | ||
2705 | /* find neighbour allocated blocks */ | 2811 | /* find neighbour allocated blocks */ |
2706 | ar.lleft = iblock; | 2812 | ar.lleft = iblock; |
@@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2760 | /* free data blocks we just allocated */ | 2866 | /* free data blocks we just allocated */ |
2761 | /* not a good idea to call discard here directly, | 2867 | /* not a good idea to call discard here directly, |
2762 | * but otherwise we'd need to call it every free() */ | 2868 | * but otherwise we'd need to call it every free() */ |
2763 | ext4_mb_discard_inode_preallocations(inode); | 2869 | ext4_discard_preallocations(inode); |
2764 | ext4_free_blocks(handle, inode, ext_pblock(&newex), | 2870 | ext4_free_blocks(handle, inode, ext_pblock(&newex), |
2765 | ext4_ext_get_actual_len(&newex), 0); | 2871 | ext4_ext_get_actual_len(&newex), 0); |
2766 | goto out2; | 2872 | goto out2; |
@@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2824 | down_write(&EXT4_I(inode)->i_data_sem); | 2930 | down_write(&EXT4_I(inode)->i_data_sem); |
2825 | ext4_ext_invalidate_cache(inode); | 2931 | ext4_ext_invalidate_cache(inode); |
2826 | 2932 | ||
2827 | ext4_discard_reservation(inode); | 2933 | ext4_discard_preallocations(inode); |
2828 | 2934 | ||
2829 | /* | 2935 | /* |
2830 | * TODO: optimization is possible here. | 2936 | * TODO: optimization is possible here. |
@@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
2877 | * Update only when preallocation was requested beyond | 2983 | * Update only when preallocation was requested beyond |
2878 | * the file size. | 2984 | * the file size. |
2879 | */ | 2985 | */ |
2880 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 2986 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { |
2881 | new_size > i_size_read(inode)) { | 2987 | if (new_size > i_size_read(inode)) |
2882 | i_size_write(inode, new_size); | 2988 | i_size_write(inode, new_size); |
2883 | EXT4_I(inode)->i_disksize = new_size; | 2989 | if (new_size > EXT4_I(inode)->i_disksize) |
2990 | ext4_update_i_disksize(inode, new_size); | ||
2884 | } | 2991 | } |
2885 | 2992 | ||
2886 | } | 2993 | } |
@@ -2972,3 +3079,143 @@ retry: | |||
2972 | mutex_unlock(&inode->i_mutex); | 3079 | mutex_unlock(&inode->i_mutex); |
2973 | return ret > 0 ? ret2 : ret; | 3080 | return ret > 0 ? ret2 : ret; |
2974 | } | 3081 | } |
3082 | |||
3083 | /* | ||
3084 | * Callback function called for each extent to gather FIEMAP information. | ||
3085 | */ | ||
3086 | int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | ||
3087 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | ||
3088 | void *data) | ||
3089 | { | ||
3090 | struct fiemap_extent_info *fieinfo = data; | ||
3091 | unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3092 | __u64 logical; | ||
3093 | __u64 physical; | ||
3094 | __u64 length; | ||
3095 | __u32 flags = 0; | ||
3096 | int error; | ||
3097 | |||
3098 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3099 | |||
3100 | if (newex->ec_type == EXT4_EXT_CACHE_GAP) { | ||
3101 | pgoff_t offset; | ||
3102 | struct page *page; | ||
3103 | struct buffer_head *bh = NULL; | ||
3104 | |||
3105 | offset = logical >> PAGE_SHIFT; | ||
3106 | page = find_get_page(inode->i_mapping, offset); | ||
3107 | if (!page || !page_has_buffers(page)) | ||
3108 | return EXT_CONTINUE; | ||
3109 | |||
3110 | bh = page_buffers(page); | ||
3111 | |||
3112 | if (!bh) | ||
3113 | return EXT_CONTINUE; | ||
3114 | |||
3115 | if (buffer_delay(bh)) { | ||
3116 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
3117 | page_cache_release(page); | ||
3118 | } else { | ||
3119 | page_cache_release(page); | ||
3120 | return EXT_CONTINUE; | ||
3121 | } | ||
3122 | } | ||
3123 | |||
3124 | physical = (__u64)newex->ec_start << blksize_bits; | ||
3125 | length = (__u64)newex->ec_len << blksize_bits; | ||
3126 | |||
3127 | if (ex && ext4_ext_is_uninitialized(ex)) | ||
3128 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
3129 | |||
3130 | /* | ||
3131 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | ||
3132 | * | ||
3133 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3134 | * this also indicates no more allocated blocks. | ||
3135 | * | ||
3136 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3137 | */ | ||
3138 | if (logical + length - 1 == EXT_MAX_BLOCK || | ||
3139 | ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) | ||
3140 | flags |= FIEMAP_EXTENT_LAST; | ||
3141 | |||
3142 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | ||
3143 | length, flags); | ||
3144 | if (error < 0) | ||
3145 | return error; | ||
3146 | if (error == 1) | ||
3147 | return EXT_BREAK; | ||
3148 | |||
3149 | return EXT_CONTINUE; | ||
3150 | } | ||
3151 | |||
3152 | /* fiemap flags we can handle specified here */ | ||
3153 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | ||
3154 | |||
3155 | int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) | ||
3156 | { | ||
3157 | __u64 physical = 0; | ||
3158 | __u64 length; | ||
3159 | __u32 flags = FIEMAP_EXTENT_LAST; | ||
3160 | int blockbits = inode->i_sb->s_blocksize_bits; | ||
3161 | int error = 0; | ||
3162 | |||
3163 | /* in-inode? */ | ||
3164 | if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { | ||
3165 | struct ext4_iloc iloc; | ||
3166 | int offset; /* offset of xattr in inode */ | ||
3167 | |||
3168 | error = ext4_get_inode_loc(inode, &iloc); | ||
3169 | if (error) | ||
3170 | return error; | ||
3171 | physical = iloc.bh->b_blocknr << blockbits; | ||
3172 | offset = EXT4_GOOD_OLD_INODE_SIZE + | ||
3173 | EXT4_I(inode)->i_extra_isize; | ||
3174 | physical += offset; | ||
3175 | length = EXT4_SB(inode->i_sb)->s_inode_size - offset; | ||
3176 | flags |= FIEMAP_EXTENT_DATA_INLINE; | ||
3177 | } else { /* external block */ | ||
3178 | physical = EXT4_I(inode)->i_file_acl << blockbits; | ||
3179 | length = inode->i_sb->s_blocksize; | ||
3180 | } | ||
3181 | |||
3182 | if (physical) | ||
3183 | error = fiemap_fill_next_extent(fieinfo, 0, physical, | ||
3184 | length, flags); | ||
3185 | return (error < 0 ? error : 0); | ||
3186 | } | ||
3187 | |||
3188 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
3189 | __u64 start, __u64 len) | ||
3190 | { | ||
3191 | ext4_lblk_t start_blk; | ||
3192 | ext4_lblk_t len_blks; | ||
3193 | int error = 0; | ||
3194 | |||
3195 | /* fallback to generic here if not in extents fmt */ | ||
3196 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
3197 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
3198 | ext4_get_block); | ||
3199 | |||
3200 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | ||
3201 | return -EBADR; | ||
3202 | |||
3203 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { | ||
3204 | error = ext4_xattr_fiemap(inode, fieinfo); | ||
3205 | } else { | ||
3206 | start_blk = start >> inode->i_sb->s_blocksize_bits; | ||
3207 | len_blks = len >> inode->i_sb->s_blocksize_bits; | ||
3208 | |||
3209 | /* | ||
3210 | * Walk the extent tree gathering extent information. | ||
3211 | * ext4_ext_fiemap_cb will push extents back to user. | ||
3212 | */ | ||
3213 | down_write(&EXT4_I(inode)->i_data_sem); | ||
3214 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | ||
3215 | ext4_ext_fiemap_cb, fieinfo); | ||
3216 | up_write(&EXT4_I(inode)->i_data_sem); | ||
3217 | } | ||
3218 | |||
3219 | return error; | ||
3220 | } | ||
3221 | |||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 430eb7978db4..6bd11fba71f7 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -31,14 +31,14 @@ | |||
31 | * from ext4_file_open: open gets called at every open, but release | 31 | * from ext4_file_open: open gets called at every open, but release |
32 | * gets called only when /all/ the files are closed. | 32 | * gets called only when /all/ the files are closed. |
33 | */ | 33 | */ |
34 | static int ext4_release_file (struct inode * inode, struct file * filp) | 34 | static int ext4_release_file(struct inode *inode, struct file *filp) |
35 | { | 35 | { |
36 | /* if we are the last writer on the inode, drop the block reservation */ | 36 | /* if we are the last writer on the inode, drop the block reservation */ |
37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
39 | { | 39 | { |
40 | down_write(&EXT4_I(inode)->i_data_sem); | 40 | down_write(&EXT4_I(inode)->i_data_sem); |
41 | ext4_discard_reservation(inode); | 41 | ext4_discard_preallocations(inode); |
42 | up_write(&EXT4_I(inode)->i_data_sem); | 42 | up_write(&EXT4_I(inode)->i_data_sem); |
43 | } | 43 | } |
44 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
140 | return 0; | 140 | return 0; |
141 | } | 141 | } |
142 | 142 | ||
143 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
144 | __u64 start, __u64 len); | ||
145 | |||
143 | const struct file_operations ext4_file_operations = { | 146 | const struct file_operations ext4_file_operations = { |
144 | .llseek = generic_file_llseek, | 147 | .llseek = generic_file_llseek, |
145 | .read = do_sync_read, | 148 | .read = do_sync_read, |
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = { | |||
162 | .truncate = ext4_truncate, | 165 | .truncate = ext4_truncate, |
163 | .setattr = ext4_setattr, | 166 | .setattr = ext4_setattr, |
164 | .getattr = ext4_getattr, | 167 | .getattr = ext4_getattr, |
165 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 168 | #ifdef CONFIG_EXT4_FS_XATTR |
166 | .setxattr = generic_setxattr, | 169 | .setxattr = generic_setxattr, |
167 | .getxattr = generic_getxattr, | 170 | .getxattr = generic_getxattr, |
168 | .listxattr = ext4_listxattr, | 171 | .listxattr = ext4_listxattr, |
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = { | |||
170 | #endif | 173 | #endif |
171 | .permission = ext4_permission, | 174 | .permission = ext4_permission, |
172 | .fallocate = ext4_fallocate, | 175 | .fallocate = ext4_fallocate, |
176 | .fiemap = ext4_fiemap, | ||
173 | }; | 177 | }; |
174 | 178 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a45c3737ad31..5afe4370840b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
30 | #include <linux/blkdev.h> | 30 | #include <linux/blkdev.h> |
31 | #include <linux/marker.h> | ||
31 | #include "ext4.h" | 32 | #include "ext4.h" |
32 | #include "ext4_jbd2.h" | 33 | #include "ext4_jbd2.h" |
33 | 34 | ||
@@ -43,7 +44,7 @@ | |||
43 | * inode to disk. | 44 | * inode to disk. |
44 | */ | 45 | */ |
45 | 46 | ||
46 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | 47 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) |
47 | { | 48 | { |
48 | struct inode *inode = dentry->d_inode; | 49 | struct inode *inode = dentry->d_inode; |
49 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 50 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
51 | 52 | ||
52 | J_ASSERT(ext4_journal_current_handle() == NULL); | 53 | J_ASSERT(ext4_journal_current_handle() == NULL); |
53 | 54 | ||
55 | trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", | ||
56 | inode->i_sb->s_id, datasync, inode->i_ino, | ||
57 | dentry->d_parent->d_inode->i_ino); | ||
58 | |||
54 | /* | 59 | /* |
55 | * data=writeback: | 60 | * data=writeback: |
56 | * The caller's filemap_fdatawrite()/wait will sync the data. | 61 | * The caller's filemap_fdatawrite()/wait will sync the data. |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1d6329dbe390..556ca8eba3db 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
27 | sum += DELTA; | 27 | sum += DELTA; |
28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | 28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); |
29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | 29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); |
30 | } while(--n); | 30 | } while (--n); |
31 | 31 | ||
32 | buf[0] += b0; | 32 | buf[0] += b0; |
33 | buf[1] += b1; | 33 | buf[1] += b1; |
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
35 | 35 | ||
36 | 36 | ||
37 | /* The old legacy hash */ | 37 | /* The old legacy hash */ |
38 | static __u32 dx_hack_hash (const char *name, int len) | 38 | static __u32 dx_hack_hash(const char *name, int len) |
39 | { | 39 | { |
40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | 40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
41 | while (len--) { | 41 | while (len--) { |
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | |||
59 | val = pad; | 59 | val = pad; |
60 | if (len > num*4) | 60 | if (len > num*4) |
61 | len = num * 4; | 61 | len = num * 4; |
62 | for (i=0; i < len; i++) { | 62 | for (i = 0; i < len; i++) { |
63 | if ((i % 4) == 0) | 63 | if ((i % 4) == 0) |
64 | val = pad; | 64 | val = pad; |
65 | val = msg[i] + (val << 8); | 65 | val = msg[i] + (val << 8); |
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
104 | 104 | ||
105 | /* Check to see if the seed is all zero's */ | 105 | /* Check to see if the seed is all zero's */ |
106 | if (hinfo->seed) { | 106 | if (hinfo->seed) { |
107 | for (i=0; i < 4; i++) { | 107 | for (i = 0; i < 4; i++) { |
108 | if (hinfo->seed[i]) | 108 | if (hinfo->seed[i]) |
109 | break; | 109 | break; |
110 | } | 110 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f344834bbf58..fe34d74cfb19 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
115 | block_group, bitmap_blk); | 115 | block_group, bitmap_blk); |
116 | return NULL; | 116 | return NULL; |
117 | } | 117 | } |
118 | if (bh_uptodate_or_lock(bh)) | 118 | if (buffer_uptodate(bh) && |
119 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
119 | return bh; | 120 | return bh; |
120 | 121 | ||
122 | lock_buffer(bh); | ||
121 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 123 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
122 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 124 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
123 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 125 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
154 | * though), and then we'd have two inodes sharing the | 156 | * though), and then we'd have two inodes sharing the |
155 | * same inode number and space on the harddisk. | 157 | * same inode number and space on the harddisk. |
156 | */ | 158 | */ |
157 | void ext4_free_inode (handle_t *handle, struct inode * inode) | 159 | void ext4_free_inode(handle_t *handle, struct inode *inode) |
158 | { | 160 | { |
159 | struct super_block * sb = inode->i_sb; | 161 | struct super_block *sb = inode->i_sb; |
160 | int is_directory; | 162 | int is_directory; |
161 | unsigned long ino; | 163 | unsigned long ino; |
162 | struct buffer_head *bitmap_bh = NULL; | 164 | struct buffer_head *bitmap_bh = NULL; |
163 | struct buffer_head *bh2; | 165 | struct buffer_head *bh2; |
164 | ext4_group_t block_group; | 166 | ext4_group_t block_group; |
165 | unsigned long bit; | 167 | unsigned long bit; |
166 | struct ext4_group_desc * gdp; | 168 | struct ext4_group_desc *gdp; |
167 | struct ext4_super_block * es; | 169 | struct ext4_super_block *es; |
168 | struct ext4_sb_info *sbi; | 170 | struct ext4_sb_info *sbi; |
169 | int fatal = 0, err; | 171 | int fatal = 0, err; |
170 | ext4_group_t flex_group; | 172 | ext4_group_t flex_group; |
171 | 173 | ||
172 | if (atomic_read(&inode->i_count) > 1) { | 174 | if (atomic_read(&inode->i_count) > 1) { |
173 | printk ("ext4_free_inode: inode has count=%d\n", | 175 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", |
174 | atomic_read(&inode->i_count)); | 176 | atomic_read(&inode->i_count)); |
175 | return; | 177 | return; |
176 | } | 178 | } |
177 | if (inode->i_nlink) { | 179 | if (inode->i_nlink) { |
178 | printk ("ext4_free_inode: inode has nlink=%d\n", | 180 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", |
179 | inode->i_nlink); | 181 | inode->i_nlink); |
180 | return; | 182 | return; |
181 | } | 183 | } |
182 | if (!sb) { | 184 | if (!sb) { |
183 | printk("ext4_free_inode: inode on nonexistent device\n"); | 185 | printk(KERN_ERR "ext4_free_inode: inode on " |
186 | "nonexistent device\n"); | ||
184 | return; | 187 | return; |
185 | } | 188 | } |
186 | sbi = EXT4_SB(sb); | 189 | sbi = EXT4_SB(sb); |
187 | 190 | ||
188 | ino = inode->i_ino; | 191 | ino = inode->i_ino; |
189 | ext4_debug ("freeing inode %lu\n", ino); | 192 | ext4_debug("freeing inode %lu\n", ino); |
190 | 193 | ||
191 | /* | 194 | /* |
192 | * Note: we must free any quota before locking the superblock, | 195 | * Note: we must free any quota before locking the superblock, |
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
200 | is_directory = S_ISDIR(inode->i_mode); | 203 | is_directory = S_ISDIR(inode->i_mode); |
201 | 204 | ||
202 | /* Do this BEFORE marking the inode not in use or returning an error */ | 205 | /* Do this BEFORE marking the inode not in use or returning an error */ |
203 | clear_inode (inode); | 206 | clear_inode(inode); |
204 | 207 | ||
205 | es = EXT4_SB(sb)->s_es; | 208 | es = EXT4_SB(sb)->s_es; |
206 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 209 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
207 | ext4_error (sb, "ext4_free_inode", | 210 | ext4_error(sb, "ext4_free_inode", |
208 | "reserved or nonexistent inode %lu", ino); | 211 | "reserved or nonexistent inode %lu", ino); |
209 | goto error_return; | 212 | goto error_return; |
210 | } | 213 | } |
211 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 214 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
222 | /* Ok, now we can actually update the inode bitmaps.. */ | 225 | /* Ok, now we can actually update the inode bitmaps.. */ |
223 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 226 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), |
224 | bit, bitmap_bh->b_data)) | 227 | bit, bitmap_bh->b_data)) |
225 | ext4_error (sb, "ext4_free_inode", | 228 | ext4_error(sb, "ext4_free_inode", |
226 | "bit already cleared for inode %lu", ino); | 229 | "bit already cleared for inode %lu", ino); |
227 | else { | 230 | else { |
228 | gdp = ext4_get_group_desc (sb, block_group, &bh2); | 231 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
229 | 232 | ||
230 | BUFFER_TRACE(bh2, "get_write_access"); | 233 | BUFFER_TRACE(bh2, "get_write_access"); |
231 | fatal = ext4_journal_get_write_access(handle, bh2); | 234 | fatal = ext4_journal_get_write_access(handle, bh2); |
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
287 | avefreei = freei / ngroups; | 290 | avefreei = freei / ngroups; |
288 | 291 | ||
289 | for (group = 0; group < ngroups; group++) { | 292 | for (group = 0; group < ngroups; group++) { |
290 | desc = ext4_get_group_desc (sb, group, NULL); | 293 | desc = ext4_get_group_desc(sb, group, NULL); |
291 | if (!desc || !desc->bg_free_inodes_count) | 294 | if (!desc || !desc->bg_free_inodes_count) |
292 | continue; | 295 | continue; |
293 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) | 296 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) |
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
576 | * For other inodes, search forward from the parent directory's block | 579 | * For other inodes, search forward from the parent directory's block |
577 | * group to find a free inode. | 580 | * group to find a free inode. |
578 | */ | 581 | */ |
579 | struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | 582 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) |
580 | { | 583 | { |
581 | struct super_block *sb; | 584 | struct super_block *sb; |
582 | struct buffer_head *bitmap_bh = NULL; | 585 | struct buffer_head *bitmap_bh = NULL; |
583 | struct buffer_head *bh2; | 586 | struct buffer_head *bh2; |
584 | ext4_group_t group = 0; | 587 | ext4_group_t group = 0; |
585 | unsigned long ino = 0; | 588 | unsigned long ino = 0; |
586 | struct inode * inode; | 589 | struct inode *inode; |
587 | struct ext4_group_desc * gdp = NULL; | 590 | struct ext4_group_desc *gdp = NULL; |
588 | struct ext4_super_block * es; | 591 | struct ext4_super_block *es; |
589 | struct ext4_inode_info *ei; | 592 | struct ext4_inode_info *ei; |
590 | struct ext4_sb_info *sbi; | 593 | struct ext4_sb_info *sbi; |
591 | int ret2, err = 0; | 594 | int ret2, err = 0; |
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
613 | } | 616 | } |
614 | 617 | ||
615 | if (S_ISDIR(mode)) { | 618 | if (S_ISDIR(mode)) { |
616 | if (test_opt (sb, OLDALLOC)) | 619 | if (test_opt(sb, OLDALLOC)) |
617 | ret2 = find_group_dir(sb, dir, &group); | 620 | ret2 = find_group_dir(sb, dir, &group); |
618 | else | 621 | else |
619 | ret2 = find_group_orlov(sb, dir, &group); | 622 | ret2 = find_group_orlov(sb, dir, &group); |
@@ -783,7 +786,7 @@ got: | |||
783 | } | 786 | } |
784 | 787 | ||
785 | inode->i_uid = current->fsuid; | 788 | inode->i_uid = current->fsuid; |
786 | if (test_opt (sb, GRPID)) | 789 | if (test_opt(sb, GRPID)) |
787 | inode->i_gid = dir->i_gid; | 790 | inode->i_gid = dir->i_gid; |
788 | else if (dir->i_mode & S_ISGID) { | 791 | else if (dir->i_mode & S_ISGID) { |
789 | inode->i_gid = dir->i_gid; | 792 | inode->i_gid = dir->i_gid; |
@@ -816,7 +819,6 @@ got: | |||
816 | ei->i_flags &= ~EXT4_DIRSYNC_FL; | 819 | ei->i_flags &= ~EXT4_DIRSYNC_FL; |
817 | ei->i_file_acl = 0; | 820 | ei->i_file_acl = 0; |
818 | ei->i_dtime = 0; | 821 | ei->i_dtime = 0; |
819 | ei->i_block_alloc_info = NULL; | ||
820 | ei->i_block_group = group; | 822 | ei->i_block_group = group; |
821 | 823 | ||
822 | ext4_set_inode_flags(inode); | 824 | ext4_set_inode_flags(inode); |
@@ -832,7 +834,7 @@ got: | |||
832 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 834 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
833 | 835 | ||
834 | ret = inode; | 836 | ret = inode; |
835 | if(DQUOT_ALLOC_INODE(inode)) { | 837 | if (DQUOT_ALLOC_INODE(inode)) { |
836 | err = -EDQUOT; | 838 | err = -EDQUOT; |
837 | goto fail_drop; | 839 | goto fail_drop; |
838 | } | 840 | } |
@@ -841,7 +843,7 @@ got: | |||
841 | if (err) | 843 | if (err) |
842 | goto fail_free_drop; | 844 | goto fail_free_drop; |
843 | 845 | ||
844 | err = ext4_init_security(handle,inode, dir); | 846 | err = ext4_init_security(handle, inode, dir); |
845 | if (err) | 847 | if (err) |
846 | goto fail_free_drop; | 848 | goto fail_free_drop; |
847 | 849 | ||
@@ -959,7 +961,7 @@ error: | |||
959 | return ERR_PTR(err); | 961 | return ERR_PTR(err); |
960 | } | 962 | } |
961 | 963 | ||
962 | unsigned long ext4_count_free_inodes (struct super_block * sb) | 964 | unsigned long ext4_count_free_inodes(struct super_block *sb) |
963 | { | 965 | { |
964 | unsigned long desc_count; | 966 | unsigned long desc_count; |
965 | struct ext4_group_desc *gdp; | 967 | struct ext4_group_desc *gdp; |
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
974 | bitmap_count = 0; | 976 | bitmap_count = 0; |
975 | gdp = NULL; | 977 | gdp = NULL; |
976 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 978 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
977 | gdp = ext4_get_group_desc (sb, i, NULL); | 979 | gdp = ext4_get_group_desc(sb, i, NULL); |
978 | if (!gdp) | 980 | if (!gdp) |
979 | continue; | 981 | continue; |
980 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 982 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
989 | bitmap_count += x; | 991 | bitmap_count += x; |
990 | } | 992 | } |
991 | brelse(bitmap_bh); | 993 | brelse(bitmap_bh); |
992 | printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", | 994 | printk(KERN_DEBUG "ext4_count_free_inodes: " |
993 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | 995 | "stored = %u, computed = %lu, %lu\n", |
996 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | ||
994 | return desc_count; | 997 | return desc_count; |
995 | #else | 998 | #else |
996 | desc_count = 0; | 999 | desc_count = 0; |
997 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1000 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
998 | gdp = ext4_get_group_desc (sb, i, NULL); | 1001 | gdp = ext4_get_group_desc(sb, i, NULL); |
999 | if (!gdp) | 1002 | if (!gdp) |
1000 | continue; | 1003 | continue; |
1001 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 1004 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
1006 | } | 1009 | } |
1007 | 1010 | ||
1008 | /* Called at mount-time, super-block is locked */ | 1011 | /* Called at mount-time, super-block is locked */ |
1009 | unsigned long ext4_count_dirs (struct super_block * sb) | 1012 | unsigned long ext4_count_dirs(struct super_block * sb) |
1010 | { | 1013 | { |
1011 | unsigned long count = 0; | 1014 | unsigned long count = 0; |
1012 | ext4_group_t i; | 1015 | ext4_group_t i; |
1013 | 1016 | ||
1014 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1017 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
1015 | struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); | 1018 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
1016 | if (!gdp) | 1019 | if (!gdp) |
1017 | continue; | 1020 | continue; |
1018 | count += le16_to_cpu(gdp->bg_used_dirs_count); | 1021 | count += le16_to_cpu(gdp->bg_used_dirs_count); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7e91913e325b..9b4ec9decfd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | |||
190 | /* | 190 | /* |
191 | * Called at the last iput() if i_nlink is zero. | 191 | * Called at the last iput() if i_nlink is zero. |
192 | */ | 192 | */ |
193 | void ext4_delete_inode (struct inode * inode) | 193 | void ext4_delete_inode(struct inode *inode) |
194 | { | 194 | { |
195 | handle_t *handle; | 195 | handle_t *handle; |
196 | int err; | 196 | int err; |
@@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
330 | int final = 0; | 330 | int final = 0; |
331 | 331 | ||
332 | if (i_block < 0) { | 332 | if (i_block < 0) { |
333 | ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); | 333 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); |
334 | } else if (i_block < direct_blocks) { | 334 | } else if (i_block < direct_blocks) { |
335 | offsets[n++] = i_block; | 335 | offsets[n++] = i_block; |
336 | final = direct_blocks; | 336 | final = direct_blocks; |
337 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | 337 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
338 | offsets[n++] = EXT4_IND_BLOCK; | 338 | offsets[n++] = EXT4_IND_BLOCK; |
339 | offsets[n++] = i_block; | 339 | offsets[n++] = i_block; |
340 | final = ptrs; | 340 | final = ptrs; |
@@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
400 | 400 | ||
401 | *err = 0; | 401 | *err = 0; |
402 | /* i_data is not going away, no lock needed */ | 402 | /* i_data is not going away, no lock needed */ |
403 | add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); | 403 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); |
404 | if (!p->key) | 404 | if (!p->key) |
405 | goto no_block; | 405 | goto no_block; |
406 | while (--depth) { | 406 | while (--depth) { |
407 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 407 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
408 | if (!bh) | 408 | if (!bh) |
409 | goto failure; | 409 | goto failure; |
410 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 410 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
411 | /* Reader: end */ | 411 | /* Reader: end */ |
412 | if (!p->key) | 412 | if (!p->key) |
413 | goto no_block; | 413 | goto no_block; |
@@ -443,7 +443,7 @@ no_block: | |||
443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | 443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) |
444 | { | 444 | { |
445 | struct ext4_inode_info *ei = EXT4_I(inode); | 445 | struct ext4_inode_info *ei = EXT4_I(inode); |
446 | __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; | 446 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
447 | __le32 *p; | 447 | __le32 *p; |
448 | ext4_fsblk_t bg_start; | 448 | ext4_fsblk_t bg_start; |
449 | ext4_fsblk_t last_block; | 449 | ext4_fsblk_t last_block; |
@@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
487 | Indirect *partial) | 487 | Indirect *partial) |
488 | { | 488 | { |
489 | struct ext4_block_alloc_info *block_i; | ||
490 | |||
491 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
492 | |||
493 | /* | 489 | /* |
494 | * try the heuristic for sequential allocation, | 490 | * XXX need to get goal block from mballoc's data structures |
495 | * failing that at least try to get decent locality. | ||
496 | */ | 491 | */ |
497 | if (block_i && (block == block_i->last_alloc_logical_block + 1) | ||
498 | && (block_i->last_alloc_physical_block != 0)) { | ||
499 | return block_i->last_alloc_physical_block + 1; | ||
500 | } | ||
501 | 492 | ||
502 | return ext4_find_near(inode, partial); | 493 | return ext4_find_near(inode, partial); |
503 | } | 494 | } |
@@ -630,7 +621,7 @@ allocated: | |||
630 | *err = 0; | 621 | *err = 0; |
631 | return ret; | 622 | return ret; |
632 | failed_out: | 623 | failed_out: |
633 | for (i = 0; i <index; i++) | 624 | for (i = 0; i < index; i++) |
634 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 625 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
635 | return ret; | 626 | return ret; |
636 | } | 627 | } |
@@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
703 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 694 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
704 | branch[n].key = cpu_to_le32(new_blocks[n]); | 695 | branch[n].key = cpu_to_le32(new_blocks[n]); |
705 | *branch[n].p = branch[n].key; | 696 | *branch[n].p = branch[n].key; |
706 | if ( n == indirect_blks) { | 697 | if (n == indirect_blks) { |
707 | current_block = new_blocks[n]; | 698 | current_block = new_blocks[n]; |
708 | /* | 699 | /* |
709 | * End of chain, update the last new metablock of | 700 | * End of chain, update the last new metablock of |
@@ -730,7 +721,7 @@ failed: | |||
730 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); | 721 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); |
731 | ext4_journal_forget(handle, branch[i].bh); | 722 | ext4_journal_forget(handle, branch[i].bh); |
732 | } | 723 | } |
733 | for (i = 0; i <indirect_blks; i++) | 724 | for (i = 0; i < indirect_blks; i++) |
734 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 725 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
735 | 726 | ||
736 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); | 727 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); |
@@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
757 | { | 748 | { |
758 | int i; | 749 | int i; |
759 | int err = 0; | 750 | int err = 0; |
760 | struct ext4_block_alloc_info *block_i; | ||
761 | ext4_fsblk_t current_block; | 751 | ext4_fsblk_t current_block; |
762 | 752 | ||
763 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
764 | /* | 753 | /* |
765 | * If we're splicing into a [td]indirect block (as opposed to the | 754 | * If we're splicing into a [td]indirect block (as opposed to the |
766 | * inode) then we need to get write access to the [td]indirect block | 755 | * inode) then we need to get write access to the [td]indirect block |
@@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
783 | if (num == 0 && blks > 1) { | 772 | if (num == 0 && blks > 1) { |
784 | current_block = le32_to_cpu(where->key) + 1; | 773 | current_block = le32_to_cpu(where->key) + 1; |
785 | for (i = 1; i < blks; i++) | 774 | for (i = 1; i < blks; i++) |
786 | *(where->p + i ) = cpu_to_le32(current_block++); | 775 | *(where->p + i) = cpu_to_le32(current_block++); |
787 | } | ||
788 | |||
789 | /* | ||
790 | * update the most recently allocated logical & physical block | ||
791 | * in i_block_alloc_info, to assist find the proper goal block for next | ||
792 | * allocation | ||
793 | */ | ||
794 | if (block_i) { | ||
795 | block_i->last_alloc_logical_block = block + blks - 1; | ||
796 | block_i->last_alloc_physical_block = | ||
797 | le32_to_cpu(where[num].key) + blks - 1; | ||
798 | } | 776 | } |
799 | 777 | ||
800 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 778 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
@@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
914 | goto cleanup; | 892 | goto cleanup; |
915 | 893 | ||
916 | /* | 894 | /* |
917 | * Okay, we need to do block allocation. Lazily initialize the block | 895 | * Okay, we need to do block allocation. |
918 | * allocation info here if necessary | ||
919 | */ | 896 | */ |
920 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | ||
921 | ext4_init_block_alloc_info(inode); | ||
922 | |||
923 | goal = ext4_find_goal(inode, iblock, partial); | 897 | goal = ext4_find_goal(inode, iblock, partial); |
924 | 898 | ||
925 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 899 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
@@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1030 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1004 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); |
1031 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1005 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; |
1032 | 1006 | ||
1033 | /* Account for allocated meta_blocks */ | 1007 | if (mdb_free) { |
1034 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | 1008 | /* Account for allocated meta_blocks */ |
1009 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
1035 | 1010 | ||
1036 | /* update fs free blocks counter for truncate case */ | 1011 | /* update fs dirty blocks counter */ |
1037 | percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); | 1012 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1013 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1014 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1015 | } | ||
1038 | 1016 | ||
1039 | /* update per-inode reservations */ | 1017 | /* update per-inode reservations */ |
1040 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | 1018 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); |
1041 | EXT4_I(inode)->i_reserved_data_blocks -= used; | 1019 | EXT4_I(inode)->i_reserved_data_blocks -= used; |
1042 | 1020 | ||
1043 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1044 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1045 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1046 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1021 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1047 | } | 1022 | } |
1048 | 1023 | ||
@@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1160 | /* Maximum number of blocks we map for direct IO at once. */ | 1135 | /* Maximum number of blocks we map for direct IO at once. */ |
1161 | #define DIO_MAX_BLOCKS 4096 | 1136 | #define DIO_MAX_BLOCKS 4096 |
1162 | 1137 | ||
1163 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1138 | int ext4_get_block(struct inode *inode, sector_t iblock, |
1164 | struct buffer_head *bh_result, int create) | 1139 | struct buffer_head *bh_result, int create) |
1165 | { | 1140 | { |
1166 | handle_t *handle = ext4_journal_current_handle(); | 1141 | handle_t *handle = ext4_journal_current_handle(); |
1167 | int ret = 0, started = 0; | 1142 | int ret = 0, started = 0; |
@@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1241 | BUFFER_TRACE(bh, "call get_create_access"); | 1216 | BUFFER_TRACE(bh, "call get_create_access"); |
1242 | fatal = ext4_journal_get_create_access(handle, bh); | 1217 | fatal = ext4_journal_get_create_access(handle, bh); |
1243 | if (!fatal && !buffer_uptodate(bh)) { | 1218 | if (!fatal && !buffer_uptodate(bh)) { |
1244 | memset(bh->b_data,0,inode->i_sb->s_blocksize); | 1219 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1245 | set_buffer_uptodate(bh); | 1220 | set_buffer_uptodate(bh); |
1246 | } | 1221 | } |
1247 | unlock_buffer(bh); | 1222 | unlock_buffer(bh); |
@@ -1266,7 +1241,7 @@ err: | |||
1266 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1241 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
1267 | ext4_lblk_t block, int create, int *err) | 1242 | ext4_lblk_t block, int create, int *err) |
1268 | { | 1243 | { |
1269 | struct buffer_head * bh; | 1244 | struct buffer_head *bh; |
1270 | 1245 | ||
1271 | bh = ext4_getblk(handle, inode, block, create, err); | 1246 | bh = ext4_getblk(handle, inode, block, create, err); |
1272 | if (!bh) | 1247 | if (!bh) |
@@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
1282 | return NULL; | 1257 | return NULL; |
1283 | } | 1258 | } |
1284 | 1259 | ||
1285 | static int walk_page_buffers( handle_t *handle, | 1260 | static int walk_page_buffers(handle_t *handle, |
1286 | struct buffer_head *head, | 1261 | struct buffer_head *head, |
1287 | unsigned from, | 1262 | unsigned from, |
1288 | unsigned to, | 1263 | unsigned to, |
1289 | int *partial, | 1264 | int *partial, |
1290 | int (*fn)( handle_t *handle, | 1265 | int (*fn)(handle_t *handle, |
1291 | struct buffer_head *bh)) | 1266 | struct buffer_head *bh)) |
1292 | { | 1267 | { |
1293 | struct buffer_head *bh; | 1268 | struct buffer_head *bh; |
1294 | unsigned block_start, block_end; | 1269 | unsigned block_start, block_end; |
@@ -1296,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle, | |||
1296 | int err, ret = 0; | 1271 | int err, ret = 0; |
1297 | struct buffer_head *next; | 1272 | struct buffer_head *next; |
1298 | 1273 | ||
1299 | for ( bh = head, block_start = 0; | 1274 | for (bh = head, block_start = 0; |
1300 | ret == 0 && (bh != head || !block_start); | 1275 | ret == 0 && (bh != head || !block_start); |
1301 | block_start = block_end, bh = next) | 1276 | block_start = block_end, bh = next) |
1302 | { | 1277 | { |
1303 | next = bh->b_this_page; | 1278 | next = bh->b_this_page; |
1304 | block_end = block_start + blocksize; | 1279 | block_end = block_start + blocksize; |
@@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1351 | loff_t pos, unsigned len, unsigned flags, | 1326 | loff_t pos, unsigned len, unsigned flags, |
1352 | struct page **pagep, void **fsdata) | 1327 | struct page **pagep, void **fsdata) |
1353 | { | 1328 | { |
1354 | struct inode *inode = mapping->host; | 1329 | struct inode *inode = mapping->host; |
1355 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1330 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); |
1356 | handle_t *handle; | 1331 | handle_t *handle; |
1357 | int retries = 0; | 1332 | int retries = 0; |
1358 | struct page *page; | 1333 | struct page *page; |
1359 | pgoff_t index; | 1334 | pgoff_t index; |
1360 | unsigned from, to; | 1335 | unsigned from, to; |
1361 | 1336 | ||
1362 | index = pos >> PAGE_CACHE_SHIFT; | 1337 | index = pos >> PAGE_CACHE_SHIFT; |
1363 | from = pos & (PAGE_CACHE_SIZE - 1); | 1338 | from = pos & (PAGE_CACHE_SIZE - 1); |
1364 | to = from + len; | 1339 | to = from + len; |
1365 | 1340 | ||
1366 | retry: | 1341 | retry: |
1367 | handle = ext4_journal_start(inode, needed_blocks); | 1342 | handle = ext4_journal_start(inode, needed_blocks); |
1368 | if (IS_ERR(handle)) { | 1343 | if (IS_ERR(handle)) { |
1369 | ret = PTR_ERR(handle); | 1344 | ret = PTR_ERR(handle); |
1370 | goto out; | 1345 | goto out; |
1371 | } | 1346 | } |
1372 | 1347 | ||
1373 | page = __grab_cache_page(mapping, index); | 1348 | page = __grab_cache_page(mapping, index); |
@@ -1387,9 +1362,16 @@ retry: | |||
1387 | } | 1362 | } |
1388 | 1363 | ||
1389 | if (ret) { | 1364 | if (ret) { |
1390 | unlock_page(page); | 1365 | unlock_page(page); |
1391 | ext4_journal_stop(handle); | 1366 | ext4_journal_stop(handle); |
1392 | page_cache_release(page); | 1367 | page_cache_release(page); |
1368 | /* | ||
1369 | * block_write_begin may have instantiated a few blocks | ||
1370 | * outside i_size. Trim these off again. Don't need | ||
1371 | * i_size_read because we hold i_mutex. | ||
1372 | */ | ||
1373 | if (pos + len > inode->i_size) | ||
1374 | vmtruncate(inode, inode->i_size); | ||
1393 | } | 1375 | } |
1394 | 1376 | ||
1395 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1377 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
1426 | ret = ext4_jbd2_file_inode(handle, inode); | 1408 | ret = ext4_jbd2_file_inode(handle, inode); |
1427 | 1409 | ||
1428 | if (ret == 0) { | 1410 | if (ret == 0) { |
1429 | /* | ||
1430 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
1431 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
1432 | * into that. | ||
1433 | */ | ||
1434 | loff_t new_i_size; | 1411 | loff_t new_i_size; |
1435 | 1412 | ||
1436 | new_i_size = pos + copied; | 1413 | new_i_size = pos + copied; |
1437 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1414 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1438 | EXT4_I(inode)->i_disksize = new_i_size; | 1415 | ext4_update_i_disksize(inode, new_i_size); |
1416 | /* We need to mark inode dirty even if | ||
1417 | * new_i_size is less that inode->i_size | ||
1418 | * bu greater than i_disksize.(hint delalloc) | ||
1419 | */ | ||
1420 | ext4_mark_inode_dirty(handle, inode); | ||
1421 | } | ||
1422 | |||
1439 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1423 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1440 | page, fsdata); | 1424 | page, fsdata); |
1441 | copied = ret2; | 1425 | copied = ret2; |
@@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file, | |||
1460 | loff_t new_i_size; | 1444 | loff_t new_i_size; |
1461 | 1445 | ||
1462 | new_i_size = pos + copied; | 1446 | new_i_size = pos + copied; |
1463 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1447 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1464 | EXT4_I(inode)->i_disksize = new_i_size; | 1448 | ext4_update_i_disksize(inode, new_i_size); |
1449 | /* We need to mark inode dirty even if | ||
1450 | * new_i_size is less that inode->i_size | ||
1451 | * bu greater than i_disksize.(hint delalloc) | ||
1452 | */ | ||
1453 | ext4_mark_inode_dirty(handle, inode); | ||
1454 | } | ||
1465 | 1455 | ||
1466 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1456 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1467 | page, fsdata); | 1457 | page, fsdata); |
@@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1486 | int ret = 0, ret2; | 1476 | int ret = 0, ret2; |
1487 | int partial = 0; | 1477 | int partial = 0; |
1488 | unsigned from, to; | 1478 | unsigned from, to; |
1479 | loff_t new_i_size; | ||
1489 | 1480 | ||
1490 | from = pos & (PAGE_CACHE_SIZE - 1); | 1481 | from = pos & (PAGE_CACHE_SIZE - 1); |
1491 | to = from + len; | 1482 | to = from + len; |
@@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file, | |||
1500 | to, &partial, write_end_fn); | 1491 | to, &partial, write_end_fn); |
1501 | if (!partial) | 1492 | if (!partial) |
1502 | SetPageUptodate(page); | 1493 | SetPageUptodate(page); |
1503 | if (pos+copied > inode->i_size) | 1494 | new_i_size = pos + copied; |
1495 | if (new_i_size > inode->i_size) | ||
1504 | i_size_write(inode, pos+copied); | 1496 | i_size_write(inode, pos+copied); |
1505 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1497 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; |
1506 | if (inode->i_size > EXT4_I(inode)->i_disksize) { | 1498 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1507 | EXT4_I(inode)->i_disksize = inode->i_size; | 1499 | ext4_update_i_disksize(inode, new_i_size); |
1508 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1500 | ret2 = ext4_mark_inode_dirty(handle, inode); |
1509 | if (!ret) | 1501 | if (!ret) |
1510 | ret = ret2; | 1502 | ret = ret2; |
@@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1521 | 1513 | ||
1522 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1514 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) |
1523 | { | 1515 | { |
1516 | int retries = 0; | ||
1524 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1517 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1525 | unsigned long md_needed, mdblocks, total = 0; | 1518 | unsigned long md_needed, mdblocks, total = 0; |
1526 | 1519 | ||
@@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1529 | * in order to allocate nrblocks | 1522 | * in order to allocate nrblocks |
1530 | * worse case is one extent per block | 1523 | * worse case is one extent per block |
1531 | */ | 1524 | */ |
1525 | repeat: | ||
1532 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1526 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1533 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1527 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; |
1534 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1528 | mdblocks = ext4_calc_metadata_amount(inode, total); |
@@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1537 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | 1531 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; |
1538 | total = md_needed + nrblocks; | 1532 | total = md_needed + nrblocks; |
1539 | 1533 | ||
1540 | if (ext4_has_free_blocks(sbi, total) < total) { | 1534 | if (ext4_claim_free_blocks(sbi, total)) { |
1541 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1535 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1536 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1537 | yield(); | ||
1538 | goto repeat; | ||
1539 | } | ||
1542 | return -ENOSPC; | 1540 | return -ENOSPC; |
1543 | } | 1541 | } |
1544 | /* reduce fs free blocks counter */ | ||
1545 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
1546 | |||
1547 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1542 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; |
1548 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | 1543 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; |
1549 | 1544 | ||
@@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1585 | 1580 | ||
1586 | release = to_free + mdb_free; | 1581 | release = to_free + mdb_free; |
1587 | 1582 | ||
1588 | /* update fs free blocks counter for truncate case */ | 1583 | /* update fs dirty blocks counter for truncate case */ |
1589 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | 1584 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); |
1590 | 1585 | ||
1591 | /* update per-inode reservations */ | 1586 | /* update per-inode reservations */ |
1592 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1587 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1630,6 +1625,7 @@ struct mpage_da_data { | |||
1630 | struct writeback_control *wbc; | 1625 | struct writeback_control *wbc; |
1631 | int io_done; | 1626 | int io_done; |
1632 | long pages_written; | 1627 | long pages_written; |
1628 | int retval; | ||
1633 | }; | 1629 | }; |
1634 | 1630 | ||
1635 | /* | 1631 | /* |
@@ -1783,6 +1779,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1783 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | 1779 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); |
1784 | } | 1780 | } |
1785 | 1781 | ||
1782 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | ||
1783 | sector_t logical, long blk_cnt) | ||
1784 | { | ||
1785 | int nr_pages, i; | ||
1786 | pgoff_t index, end; | ||
1787 | struct pagevec pvec; | ||
1788 | struct inode *inode = mpd->inode; | ||
1789 | struct address_space *mapping = inode->i_mapping; | ||
1790 | |||
1791 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1792 | end = (logical + blk_cnt - 1) >> | ||
1793 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1794 | while (index <= end) { | ||
1795 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1796 | if (nr_pages == 0) | ||
1797 | break; | ||
1798 | for (i = 0; i < nr_pages; i++) { | ||
1799 | struct page *page = pvec.pages[i]; | ||
1800 | index = page->index; | ||
1801 | if (index > end) | ||
1802 | break; | ||
1803 | index++; | ||
1804 | |||
1805 | BUG_ON(!PageLocked(page)); | ||
1806 | BUG_ON(PageWriteback(page)); | ||
1807 | block_invalidatepage(page, 0); | ||
1808 | ClearPageUptodate(page); | ||
1809 | unlock_page(page); | ||
1810 | } | ||
1811 | } | ||
1812 | return; | ||
1813 | } | ||
1814 | |||
1815 | static void ext4_print_free_blocks(struct inode *inode) | ||
1816 | { | ||
1817 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1818 | printk(KERN_EMERG "Total free blocks count %lld\n", | ||
1819 | ext4_count_free_blocks(inode->i_sb)); | ||
1820 | printk(KERN_EMERG "Free/Dirty block details\n"); | ||
1821 | printk(KERN_EMERG "free_blocks=%lld\n", | ||
1822 | percpu_counter_sum(&sbi->s_freeblocks_counter)); | ||
1823 | printk(KERN_EMERG "dirty_blocks=%lld\n", | ||
1824 | percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | ||
1825 | printk(KERN_EMERG "Block reservation details\n"); | ||
1826 | printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", | ||
1827 | EXT4_I(inode)->i_reserved_data_blocks); | ||
1828 | printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", | ||
1829 | EXT4_I(inode)->i_reserved_meta_blocks); | ||
1830 | return; | ||
1831 | } | ||
1832 | |||
1786 | /* | 1833 | /* |
1787 | * mpage_da_map_blocks - go through given space | 1834 | * mpage_da_map_blocks - go through given space |
1788 | * | 1835 | * |
@@ -1792,32 +1839,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1792 | * The function skips space we know is already mapped to disk blocks. | 1839 | * The function skips space we know is already mapped to disk blocks. |
1793 | * | 1840 | * |
1794 | */ | 1841 | */ |
1795 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1842 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
1796 | { | 1843 | { |
1797 | int err = 0; | 1844 | int err = 0; |
1798 | struct buffer_head *lbh = &mpd->lbh; | ||
1799 | sector_t next = lbh->b_blocknr; | ||
1800 | struct buffer_head new; | 1845 | struct buffer_head new; |
1846 | struct buffer_head *lbh = &mpd->lbh; | ||
1847 | sector_t next; | ||
1801 | 1848 | ||
1802 | /* | 1849 | /* |
1803 | * We consider only non-mapped and non-allocated blocks | 1850 | * We consider only non-mapped and non-allocated blocks |
1804 | */ | 1851 | */ |
1805 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1852 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
1806 | return; | 1853 | return 0; |
1807 | |||
1808 | new.b_state = lbh->b_state; | 1854 | new.b_state = lbh->b_state; |
1809 | new.b_blocknr = 0; | 1855 | new.b_blocknr = 0; |
1810 | new.b_size = lbh->b_size; | 1856 | new.b_size = lbh->b_size; |
1811 | 1857 | next = lbh->b_blocknr; | |
1812 | /* | 1858 | /* |
1813 | * If we didn't accumulate anything | 1859 | * If we didn't accumulate anything |
1814 | * to write simply return | 1860 | * to write simply return |
1815 | */ | 1861 | */ |
1816 | if (!new.b_size) | 1862 | if (!new.b_size) |
1817 | return; | 1863 | return 0; |
1818 | err = mpd->get_block(mpd->inode, next, &new, 1); | 1864 | err = mpd->get_block(mpd->inode, next, &new, 1); |
1819 | if (err) | 1865 | if (err) { |
1820 | return; | 1866 | |
1867 | /* If get block returns with error | ||
1868 | * we simply return. Later writepage | ||
1869 | * will redirty the page and writepages | ||
1870 | * will find the dirty page again | ||
1871 | */ | ||
1872 | if (err == -EAGAIN) | ||
1873 | return 0; | ||
1874 | |||
1875 | if (err == -ENOSPC && | ||
1876 | ext4_count_free_blocks(mpd->inode->i_sb)) { | ||
1877 | mpd->retval = err; | ||
1878 | return 0; | ||
1879 | } | ||
1880 | |||
1881 | /* | ||
1882 | * get block failure will cause us | ||
1883 | * to loop in writepages. Because | ||
1884 | * a_ops->writepage won't be able to | ||
1885 | * make progress. The page will be redirtied | ||
1886 | * by writepage and writepages will again | ||
1887 | * try to write the same. | ||
1888 | */ | ||
1889 | printk(KERN_EMERG "%s block allocation failed for inode %lu " | ||
1890 | "at logical offset %llu with max blocks " | ||
1891 | "%zd with error %d\n", | ||
1892 | __func__, mpd->inode->i_ino, | ||
1893 | (unsigned long long)next, | ||
1894 | lbh->b_size >> mpd->inode->i_blkbits, err); | ||
1895 | printk(KERN_EMERG "This should not happen.!! " | ||
1896 | "Data will be lost\n"); | ||
1897 | if (err == -ENOSPC) { | ||
1898 | ext4_print_free_blocks(mpd->inode); | ||
1899 | } | ||
1900 | /* invlaidate all the pages */ | ||
1901 | ext4_da_block_invalidatepages(mpd, next, | ||
1902 | lbh->b_size >> mpd->inode->i_blkbits); | ||
1903 | return err; | ||
1904 | } | ||
1821 | BUG_ON(new.b_size == 0); | 1905 | BUG_ON(new.b_size == 0); |
1822 | 1906 | ||
1823 | if (buffer_new(&new)) | 1907 | if (buffer_new(&new)) |
@@ -1830,7 +1914,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1830 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | 1914 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) |
1831 | mpage_put_bnr_to_bhs(mpd, next, &new); | 1915 | mpage_put_bnr_to_bhs(mpd, next, &new); |
1832 | 1916 | ||
1833 | return; | 1917 | return 0; |
1834 | } | 1918 | } |
1835 | 1919 | ||
1836 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 1920 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -1899,8 +1983,8 @@ flush_it: | |||
1899 | * We couldn't merge the block to our extent, so we | 1983 | * We couldn't merge the block to our extent, so we |
1900 | * need to flush current extent and start new one | 1984 | * need to flush current extent and start new one |
1901 | */ | 1985 | */ |
1902 | mpage_da_map_blocks(mpd); | 1986 | if (mpage_da_map_blocks(mpd) == 0) |
1903 | mpage_da_submit_io(mpd); | 1987 | mpage_da_submit_io(mpd); |
1904 | mpd->io_done = 1; | 1988 | mpd->io_done = 1; |
1905 | return; | 1989 | return; |
1906 | } | 1990 | } |
@@ -1942,8 +2026,8 @@ static int __mpage_da_writepage(struct page *page, | |||
1942 | * and start IO on them using writepage() | 2026 | * and start IO on them using writepage() |
1943 | */ | 2027 | */ |
1944 | if (mpd->next_page != mpd->first_page) { | 2028 | if (mpd->next_page != mpd->first_page) { |
1945 | mpage_da_map_blocks(mpd); | 2029 | if (mpage_da_map_blocks(mpd) == 0) |
1946 | mpage_da_submit_io(mpd); | 2030 | mpage_da_submit_io(mpd); |
1947 | /* | 2031 | /* |
1948 | * skip rest of the page in the page_vec | 2032 | * skip rest of the page in the page_vec |
1949 | */ | 2033 | */ |
@@ -2018,39 +2102,36 @@ static int __mpage_da_writepage(struct page *page, | |||
2018 | */ | 2102 | */ |
2019 | static int mpage_da_writepages(struct address_space *mapping, | 2103 | static int mpage_da_writepages(struct address_space *mapping, |
2020 | struct writeback_control *wbc, | 2104 | struct writeback_control *wbc, |
2021 | get_block_t get_block) | 2105 | struct mpage_da_data *mpd) |
2022 | { | 2106 | { |
2023 | struct mpage_da_data mpd; | ||
2024 | long to_write; | 2107 | long to_write; |
2025 | int ret; | 2108 | int ret; |
2026 | 2109 | ||
2027 | if (!get_block) | 2110 | if (!mpd->get_block) |
2028 | return generic_writepages(mapping, wbc); | 2111 | return generic_writepages(mapping, wbc); |
2029 | 2112 | ||
2030 | mpd.wbc = wbc; | 2113 | mpd->lbh.b_size = 0; |
2031 | mpd.inode = mapping->host; | 2114 | mpd->lbh.b_state = 0; |
2032 | mpd.lbh.b_size = 0; | 2115 | mpd->lbh.b_blocknr = 0; |
2033 | mpd.lbh.b_state = 0; | 2116 | mpd->first_page = 0; |
2034 | mpd.lbh.b_blocknr = 0; | 2117 | mpd->next_page = 0; |
2035 | mpd.first_page = 0; | 2118 | mpd->io_done = 0; |
2036 | mpd.next_page = 0; | 2119 | mpd->pages_written = 0; |
2037 | mpd.get_block = get_block; | 2120 | mpd->retval = 0; |
2038 | mpd.io_done = 0; | ||
2039 | mpd.pages_written = 0; | ||
2040 | 2121 | ||
2041 | to_write = wbc->nr_to_write; | 2122 | to_write = wbc->nr_to_write; |
2042 | 2123 | ||
2043 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | 2124 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); |
2044 | 2125 | ||
2045 | /* | 2126 | /* |
2046 | * Handle last extent of pages | 2127 | * Handle last extent of pages |
2047 | */ | 2128 | */ |
2048 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2129 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { |
2049 | mpage_da_map_blocks(&mpd); | 2130 | if (mpage_da_map_blocks(mpd) == 0) |
2050 | mpage_da_submit_io(&mpd); | 2131 | mpage_da_submit_io(mpd); |
2051 | } | 2132 | } |
2052 | 2133 | ||
2053 | wbc->nr_to_write = to_write - mpd.pages_written; | 2134 | wbc->nr_to_write = to_write - mpd->pages_written; |
2054 | return ret; | 2135 | return ret; |
2055 | } | 2136 | } |
2056 | 2137 | ||
@@ -2103,18 +2184,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2103 | handle_t *handle = NULL; | 2184 | handle_t *handle = NULL; |
2104 | 2185 | ||
2105 | handle = ext4_journal_current_handle(); | 2186 | handle = ext4_journal_current_handle(); |
2106 | if (!handle) { | 2187 | BUG_ON(!handle); |
2107 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | 2188 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, |
2108 | bh_result, 0, 0, 0); | 2189 | bh_result, create, 0, EXT4_DELALLOC_RSVED); |
2109 | BUG_ON(!ret); | ||
2110 | } else { | ||
2111 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2112 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2113 | } | ||
2114 | |||
2115 | if (ret > 0) { | 2190 | if (ret > 0) { |
2191 | |||
2116 | bh_result->b_size = (ret << inode->i_blkbits); | 2192 | bh_result->b_size = (ret << inode->i_blkbits); |
2117 | 2193 | ||
2194 | if (ext4_should_order_data(inode)) { | ||
2195 | int retval; | ||
2196 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2197 | if (retval) | ||
2198 | /* | ||
2199 | * Failed to add inode for ordered | ||
2200 | * mode. Don't update file size | ||
2201 | */ | ||
2202 | return retval; | ||
2203 | } | ||
2204 | |||
2118 | /* | 2205 | /* |
2119 | * Update on-disk size along with block allocation | 2206 | * Update on-disk size along with block allocation |
2120 | * we don't use 'extend_disksize' as size may change | 2207 | * we don't use 'extend_disksize' as size may change |
@@ -2124,18 +2211,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2124 | if (disksize > i_size_read(inode)) | 2211 | if (disksize > i_size_read(inode)) |
2125 | disksize = i_size_read(inode); | 2212 | disksize = i_size_read(inode); |
2126 | if (disksize > EXT4_I(inode)->i_disksize) { | 2213 | if (disksize > EXT4_I(inode)->i_disksize) { |
2127 | /* | 2214 | ext4_update_i_disksize(inode, disksize); |
2128 | * XXX: replace with spinlock if seen contended -bzzz | 2215 | ret = ext4_mark_inode_dirty(handle, inode); |
2129 | */ | 2216 | return ret; |
2130 | down_write(&EXT4_I(inode)->i_data_sem); | ||
2131 | if (disksize > EXT4_I(inode)->i_disksize) | ||
2132 | EXT4_I(inode)->i_disksize = disksize; | ||
2133 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2134 | |||
2135 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
2136 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2137 | return ret; | ||
2138 | } | ||
2139 | } | 2217 | } |
2140 | ret = 0; | 2218 | ret = 0; |
2141 | } | 2219 | } |
@@ -2284,6 +2362,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2284 | { | 2362 | { |
2285 | handle_t *handle = NULL; | 2363 | handle_t *handle = NULL; |
2286 | loff_t range_start = 0; | 2364 | loff_t range_start = 0; |
2365 | struct mpage_da_data mpd; | ||
2287 | struct inode *inode = mapping->host; | 2366 | struct inode *inode = mapping->host; |
2288 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2367 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2289 | long to_write, pages_skipped = 0; | 2368 | long to_write, pages_skipped = 0; |
@@ -2317,6 +2396,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2317 | range_start = wbc->range_start; | 2396 | range_start = wbc->range_start; |
2318 | pages_skipped = wbc->pages_skipped; | 2397 | pages_skipped = wbc->pages_skipped; |
2319 | 2398 | ||
2399 | mpd.wbc = wbc; | ||
2400 | mpd.inode = mapping->host; | ||
2401 | |||
2320 | restart_loop: | 2402 | restart_loop: |
2321 | to_write = wbc->nr_to_write; | 2403 | to_write = wbc->nr_to_write; |
2322 | while (!ret && to_write > 0) { | 2404 | while (!ret && to_write > 0) { |
@@ -2340,23 +2422,17 @@ restart_loop: | |||
2340 | dump_stack(); | 2422 | dump_stack(); |
2341 | goto out_writepages; | 2423 | goto out_writepages; |
2342 | } | 2424 | } |
2343 | if (ext4_should_order_data(inode)) { | ||
2344 | /* | ||
2345 | * With ordered mode we need to add | ||
2346 | * the inode to the journal handl | ||
2347 | * when we do block allocation. | ||
2348 | */ | ||
2349 | ret = ext4_jbd2_file_inode(handle, inode); | ||
2350 | if (ret) { | ||
2351 | ext4_journal_stop(handle); | ||
2352 | goto out_writepages; | ||
2353 | } | ||
2354 | } | ||
2355 | |||
2356 | to_write -= wbc->nr_to_write; | 2425 | to_write -= wbc->nr_to_write; |
2357 | ret = mpage_da_writepages(mapping, wbc, | 2426 | |
2358 | ext4_da_get_block_write); | 2427 | mpd.get_block = ext4_da_get_block_write; |
2428 | ret = mpage_da_writepages(mapping, wbc, &mpd); | ||
2429 | |||
2359 | ext4_journal_stop(handle); | 2430 | ext4_journal_stop(handle); |
2431 | |||
2432 | if (mpd.retval == -ENOSPC) | ||
2433 | jbd2_journal_force_commit_nested(sbi->s_journal); | ||
2434 | |||
2435 | /* reset the retry count */ | ||
2360 | if (ret == MPAGE_DA_EXTENT_TAIL) { | 2436 | if (ret == MPAGE_DA_EXTENT_TAIL) { |
2361 | /* | 2437 | /* |
2362 | * got one extent now try with | 2438 | * got one extent now try with |
@@ -2391,6 +2467,33 @@ out_writepages: | |||
2391 | return ret; | 2467 | return ret; |
2392 | } | 2468 | } |
2393 | 2469 | ||
2470 | #define FALL_BACK_TO_NONDELALLOC 1 | ||
2471 | static int ext4_nonda_switch(struct super_block *sb) | ||
2472 | { | ||
2473 | s64 free_blocks, dirty_blocks; | ||
2474 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2475 | |||
2476 | /* | ||
2477 | * switch to non delalloc mode if we are running low | ||
2478 | * on free block. The free block accounting via percpu | ||
2479 | * counters can get slightly wrong with FBC_BATCH getting | ||
2480 | * accumulated on each CPU without updating global counters | ||
2481 | * Delalloc need an accurate free block accounting. So switch | ||
2482 | * to non delalloc when we are near to error range. | ||
2483 | */ | ||
2484 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
2485 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | ||
2486 | if (2 * free_blocks < 3 * dirty_blocks || | ||
2487 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | ||
2488 | /* | ||
2489 | * free block count is less that 150% of dirty blocks | ||
2490 | * or free blocks is less that watermark | ||
2491 | */ | ||
2492 | return 1; | ||
2493 | } | ||
2494 | return 0; | ||
2495 | } | ||
2496 | |||
2394 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2497 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2395 | loff_t pos, unsigned len, unsigned flags, | 2498 | loff_t pos, unsigned len, unsigned flags, |
2396 | struct page **pagep, void **fsdata) | 2499 | struct page **pagep, void **fsdata) |
@@ -2406,6 +2509,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2406 | from = pos & (PAGE_CACHE_SIZE - 1); | 2509 | from = pos & (PAGE_CACHE_SIZE - 1); |
2407 | to = from + len; | 2510 | to = from + len; |
2408 | 2511 | ||
2512 | if (ext4_nonda_switch(inode->i_sb)) { | ||
2513 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | ||
2514 | return ext4_write_begin(file, mapping, pos, | ||
2515 | len, flags, pagep, fsdata); | ||
2516 | } | ||
2517 | *fsdata = (void *)0; | ||
2409 | retry: | 2518 | retry: |
2410 | /* | 2519 | /* |
2411 | * With delayed allocation, we don't log the i_disksize update | 2520 | * With delayed allocation, we don't log the i_disksize update |
@@ -2433,6 +2542,13 @@ retry: | |||
2433 | unlock_page(page); | 2542 | unlock_page(page); |
2434 | ext4_journal_stop(handle); | 2543 | ext4_journal_stop(handle); |
2435 | page_cache_release(page); | 2544 | page_cache_release(page); |
2545 | /* | ||
2546 | * block_write_begin may have instantiated a few blocks | ||
2547 | * outside i_size. Trim these off again. Don't need | ||
2548 | * i_size_read because we hold i_mutex. | ||
2549 | */ | ||
2550 | if (pos + len > inode->i_size) | ||
2551 | vmtruncate(inode, inode->i_size); | ||
2436 | } | 2552 | } |
2437 | 2553 | ||
2438 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2554 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -2456,7 +2572,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2456 | bh = page_buffers(page); | 2572 | bh = page_buffers(page); |
2457 | idx = offset >> inode->i_blkbits; | 2573 | idx = offset >> inode->i_blkbits; |
2458 | 2574 | ||
2459 | for (i=0; i < idx; i++) | 2575 | for (i = 0; i < idx; i++) |
2460 | bh = bh->b_this_page; | 2576 | bh = bh->b_this_page; |
2461 | 2577 | ||
2462 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2578 | if (!buffer_mapped(bh) || (buffer_delay(bh))) |
@@ -2474,9 +2590,22 @@ static int ext4_da_write_end(struct file *file, | |||
2474 | handle_t *handle = ext4_journal_current_handle(); | 2590 | handle_t *handle = ext4_journal_current_handle(); |
2475 | loff_t new_i_size; | 2591 | loff_t new_i_size; |
2476 | unsigned long start, end; | 2592 | unsigned long start, end; |
2593 | int write_mode = (int)(unsigned long)fsdata; | ||
2594 | |||
2595 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | ||
2596 | if (ext4_should_order_data(inode)) { | ||
2597 | return ext4_ordered_write_end(file, mapping, pos, | ||
2598 | len, copied, page, fsdata); | ||
2599 | } else if (ext4_should_writeback_data(inode)) { | ||
2600 | return ext4_writeback_write_end(file, mapping, pos, | ||
2601 | len, copied, page, fsdata); | ||
2602 | } else { | ||
2603 | BUG(); | ||
2604 | } | ||
2605 | } | ||
2477 | 2606 | ||
2478 | start = pos & (PAGE_CACHE_SIZE - 1); | 2607 | start = pos & (PAGE_CACHE_SIZE - 1); |
2479 | end = start + copied -1; | 2608 | end = start + copied - 1; |
2480 | 2609 | ||
2481 | /* | 2610 | /* |
2482 | * generic_write_end() will run mark_inode_dirty() if i_size | 2611 | * generic_write_end() will run mark_inode_dirty() if i_size |
@@ -2500,6 +2629,11 @@ static int ext4_da_write_end(struct file *file, | |||
2500 | EXT4_I(inode)->i_disksize = new_i_size; | 2629 | EXT4_I(inode)->i_disksize = new_i_size; |
2501 | } | 2630 | } |
2502 | up_write(&EXT4_I(inode)->i_data_sem); | 2631 | up_write(&EXT4_I(inode)->i_data_sem); |
2632 | /* We need to mark inode dirty even if | ||
2633 | * new_i_size is less that inode->i_size | ||
2634 | * bu greater than i_disksize.(hint delalloc) | ||
2635 | */ | ||
2636 | ext4_mark_inode_dirty(handle, inode); | ||
2503 | } | 2637 | } |
2504 | } | 2638 | } |
2505 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2639 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
@@ -2591,7 +2725,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
2591 | return 0; | 2725 | return 0; |
2592 | } | 2726 | } |
2593 | 2727 | ||
2594 | return generic_block_bmap(mapping,block,ext4_get_block); | 2728 | return generic_block_bmap(mapping, block, ext4_get_block); |
2595 | } | 2729 | } |
2596 | 2730 | ||
2597 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2731 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
@@ -3197,7 +3331,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3197 | if (!partial->key && *partial->p) | 3331 | if (!partial->key && *partial->p) |
3198 | /* Writer: end */ | 3332 | /* Writer: end */ |
3199 | goto no_top; | 3333 | goto no_top; |
3200 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | 3334 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) |
3201 | ; | 3335 | ; |
3202 | /* | 3336 | /* |
3203 | * OK, we've found the last block that must survive. The rest of our | 3337 | * OK, we've found the last block that must survive. The rest of our |
@@ -3216,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3216 | } | 3350 | } |
3217 | /* Writer: end */ | 3351 | /* Writer: end */ |
3218 | 3352 | ||
3219 | while(partial > p) { | 3353 | while (partial > p) { |
3220 | brelse(partial->bh); | 3354 | brelse(partial->bh); |
3221 | partial--; | 3355 | partial--; |
3222 | } | 3356 | } |
@@ -3408,9 +3542,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3408 | /* This zaps the entire block. Bottom up. */ | 3542 | /* This zaps the entire block. Bottom up. */ |
3409 | BUFFER_TRACE(bh, "free child branches"); | 3543 | BUFFER_TRACE(bh, "free child branches"); |
3410 | ext4_free_branches(handle, inode, bh, | 3544 | ext4_free_branches(handle, inode, bh, |
3411 | (__le32*)bh->b_data, | 3545 | (__le32 *) bh->b_data, |
3412 | (__le32*)bh->b_data + addr_per_block, | 3546 | (__le32 *) bh->b_data + addr_per_block, |
3413 | depth); | 3547 | depth); |
3414 | 3548 | ||
3415 | /* | 3549 | /* |
3416 | * We've probably journalled the indirect block several | 3550 | * We've probably journalled the indirect block several |
@@ -3578,7 +3712,7 @@ void ext4_truncate(struct inode *inode) | |||
3578 | */ | 3712 | */ |
3579 | down_write(&ei->i_data_sem); | 3713 | down_write(&ei->i_data_sem); |
3580 | 3714 | ||
3581 | ext4_discard_reservation(inode); | 3715 | ext4_discard_preallocations(inode); |
3582 | 3716 | ||
3583 | /* | 3717 | /* |
3584 | * The orphan list entry will now protect us from any crash which | 3718 | * The orphan list entry will now protect us from any crash which |
@@ -3673,41 +3807,6 @@ out_stop: | |||
3673 | ext4_journal_stop(handle); | 3807 | ext4_journal_stop(handle); |
3674 | } | 3808 | } |
3675 | 3809 | ||
3676 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | ||
3677 | unsigned long ino, struct ext4_iloc *iloc) | ||
3678 | { | ||
3679 | ext4_group_t block_group; | ||
3680 | unsigned long offset; | ||
3681 | ext4_fsblk_t block; | ||
3682 | struct ext4_group_desc *gdp; | ||
3683 | |||
3684 | if (!ext4_valid_inum(sb, ino)) { | ||
3685 | /* | ||
3686 | * This error is already checked for in namei.c unless we are | ||
3687 | * looking at an NFS filehandle, in which case no error | ||
3688 | * report is needed | ||
3689 | */ | ||
3690 | return 0; | ||
3691 | } | ||
3692 | |||
3693 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | ||
3694 | gdp = ext4_get_group_desc(sb, block_group, NULL); | ||
3695 | if (!gdp) | ||
3696 | return 0; | ||
3697 | |||
3698 | /* | ||
3699 | * Figure out the offset within the block group inode table | ||
3700 | */ | ||
3701 | offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * | ||
3702 | EXT4_INODE_SIZE(sb); | ||
3703 | block = ext4_inode_table(sb, gdp) + | ||
3704 | (offset >> EXT4_BLOCK_SIZE_BITS(sb)); | ||
3705 | |||
3706 | iloc->block_group = block_group; | ||
3707 | iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); | ||
3708 | return block; | ||
3709 | } | ||
3710 | |||
3711 | /* | 3810 | /* |
3712 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3811 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
3713 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3812 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
@@ -3717,19 +3816,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | |||
3717 | static int __ext4_get_inode_loc(struct inode *inode, | 3816 | static int __ext4_get_inode_loc(struct inode *inode, |
3718 | struct ext4_iloc *iloc, int in_mem) | 3817 | struct ext4_iloc *iloc, int in_mem) |
3719 | { | 3818 | { |
3720 | ext4_fsblk_t block; | 3819 | struct ext4_group_desc *gdp; |
3721 | struct buffer_head *bh; | 3820 | struct buffer_head *bh; |
3821 | struct super_block *sb = inode->i_sb; | ||
3822 | ext4_fsblk_t block; | ||
3823 | int inodes_per_block, inode_offset; | ||
3824 | |||
3825 | iloc->bh = 0; | ||
3826 | if (!ext4_valid_inum(sb, inode->i_ino)) | ||
3827 | return -EIO; | ||
3722 | 3828 | ||
3723 | block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); | 3829 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
3724 | if (!block) | 3830 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
3831 | if (!gdp) | ||
3725 | return -EIO; | 3832 | return -EIO; |
3726 | 3833 | ||
3727 | bh = sb_getblk(inode->i_sb, block); | 3834 | /* |
3835 | * Figure out the offset within the block group inode table | ||
3836 | */ | ||
3837 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | ||
3838 | inode_offset = ((inode->i_ino - 1) % | ||
3839 | EXT4_INODES_PER_GROUP(sb)); | ||
3840 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | ||
3841 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | ||
3842 | |||
3843 | bh = sb_getblk(sb, block); | ||
3728 | if (!bh) { | 3844 | if (!bh) { |
3729 | ext4_error (inode->i_sb, "ext4_get_inode_loc", | 3845 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " |
3730 | "unable to read inode block - " | 3846 | "inode block - inode=%lu, block=%llu", |
3731 | "inode=%lu, block=%llu", | 3847 | inode->i_ino, block); |
3732 | inode->i_ino, block); | ||
3733 | return -EIO; | 3848 | return -EIO; |
3734 | } | 3849 | } |
3735 | if (!buffer_uptodate(bh)) { | 3850 | if (!buffer_uptodate(bh)) { |
@@ -3757,28 +3872,12 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3757 | */ | 3872 | */ |
3758 | if (in_mem) { | 3873 | if (in_mem) { |
3759 | struct buffer_head *bitmap_bh; | 3874 | struct buffer_head *bitmap_bh; |
3760 | struct ext4_group_desc *desc; | 3875 | int i, start; |
3761 | int inodes_per_buffer; | ||
3762 | int inode_offset, i; | ||
3763 | ext4_group_t block_group; | ||
3764 | int start; | ||
3765 | |||
3766 | block_group = (inode->i_ino - 1) / | ||
3767 | EXT4_INODES_PER_GROUP(inode->i_sb); | ||
3768 | inodes_per_buffer = bh->b_size / | ||
3769 | EXT4_INODE_SIZE(inode->i_sb); | ||
3770 | inode_offset = ((inode->i_ino - 1) % | ||
3771 | EXT4_INODES_PER_GROUP(inode->i_sb)); | ||
3772 | start = inode_offset & ~(inodes_per_buffer - 1); | ||
3773 | 3876 | ||
3774 | /* Is the inode bitmap in cache? */ | 3877 | start = inode_offset & ~(inodes_per_block - 1); |
3775 | desc = ext4_get_group_desc(inode->i_sb, | ||
3776 | block_group, NULL); | ||
3777 | if (!desc) | ||
3778 | goto make_io; | ||
3779 | 3878 | ||
3780 | bitmap_bh = sb_getblk(inode->i_sb, | 3879 | /* Is the inode bitmap in cache? */ |
3781 | ext4_inode_bitmap(inode->i_sb, desc)); | 3880 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
3782 | if (!bitmap_bh) | 3881 | if (!bitmap_bh) |
3783 | goto make_io; | 3882 | goto make_io; |
3784 | 3883 | ||
@@ -3791,14 +3890,14 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3791 | brelse(bitmap_bh); | 3890 | brelse(bitmap_bh); |
3792 | goto make_io; | 3891 | goto make_io; |
3793 | } | 3892 | } |
3794 | for (i = start; i < start + inodes_per_buffer; i++) { | 3893 | for (i = start; i < start + inodes_per_block; i++) { |
3795 | if (i == inode_offset) | 3894 | if (i == inode_offset) |
3796 | continue; | 3895 | continue; |
3797 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 3896 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
3798 | break; | 3897 | break; |
3799 | } | 3898 | } |
3800 | brelse(bitmap_bh); | 3899 | brelse(bitmap_bh); |
3801 | if (i == start + inodes_per_buffer) { | 3900 | if (i == start + inodes_per_block) { |
3802 | /* all other inodes are free, so skip I/O */ | 3901 | /* all other inodes are free, so skip I/O */ |
3803 | memset(bh->b_data, 0, bh->b_size); | 3902 | memset(bh->b_data, 0, bh->b_size); |
3804 | set_buffer_uptodate(bh); | 3903 | set_buffer_uptodate(bh); |
@@ -3809,6 +3908,36 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3809 | 3908 | ||
3810 | make_io: | 3909 | make_io: |
3811 | /* | 3910 | /* |
3911 | * If we need to do any I/O, try to pre-readahead extra | ||
3912 | * blocks from the inode table. | ||
3913 | */ | ||
3914 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | ||
3915 | ext4_fsblk_t b, end, table; | ||
3916 | unsigned num; | ||
3917 | |||
3918 | table = ext4_inode_table(sb, gdp); | ||
3919 | /* Make sure s_inode_readahead_blks is a power of 2 */ | ||
3920 | while (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3921 | (EXT4_SB(sb)->s_inode_readahead_blks-1)) | ||
3922 | EXT4_SB(sb)->s_inode_readahead_blks = | ||
3923 | (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3924 | (EXT4_SB(sb)->s_inode_readahead_blks-1)); | ||
3925 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | ||
3926 | if (table > b) | ||
3927 | b = table; | ||
3928 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | ||
3929 | num = EXT4_INODES_PER_GROUP(sb); | ||
3930 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3931 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
3932 | num -= le16_to_cpu(gdp->bg_itable_unused); | ||
3933 | table += num / inodes_per_block; | ||
3934 | if (end > table) | ||
3935 | end = table; | ||
3936 | while (b <= end) | ||
3937 | sb_breadahead(sb, b++); | ||
3938 | } | ||
3939 | |||
3940 | /* | ||
3812 | * There are other valid inodes in the buffer, this inode | 3941 | * There are other valid inodes in the buffer, this inode |
3813 | * has in-inode xattrs, or we don't have this inode in memory. | 3942 | * has in-inode xattrs, or we don't have this inode in memory. |
3814 | * Read the block from disk. | 3943 | * Read the block from disk. |
@@ -3818,10 +3947,9 @@ make_io: | |||
3818 | submit_bh(READ_META, bh); | 3947 | submit_bh(READ_META, bh); |
3819 | wait_on_buffer(bh); | 3948 | wait_on_buffer(bh); |
3820 | if (!buffer_uptodate(bh)) { | 3949 | if (!buffer_uptodate(bh)) { |
3821 | ext4_error(inode->i_sb, "ext4_get_inode_loc", | 3950 | ext4_error(sb, __func__, |
3822 | "unable to read inode block - " | 3951 | "unable to read inode block - inode=%lu, " |
3823 | "inode=%lu, block=%llu", | 3952 | "block=%llu", inode->i_ino, block); |
3824 | inode->i_ino, block); | ||
3825 | brelse(bh); | 3953 | brelse(bh); |
3826 | return -EIO; | 3954 | return -EIO; |
3827 | } | 3955 | } |
@@ -3913,11 +4041,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3913 | return inode; | 4041 | return inode; |
3914 | 4042 | ||
3915 | ei = EXT4_I(inode); | 4043 | ei = EXT4_I(inode); |
3916 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 4044 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3917 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 4045 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
3918 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 4046 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
3919 | #endif | 4047 | #endif |
3920 | ei->i_block_alloc_info = NULL; | ||
3921 | 4048 | ||
3922 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4049 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
3923 | if (ret < 0) | 4050 | if (ret < 0) |
@@ -3927,7 +4054,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3927 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4054 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
3928 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4055 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
3929 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 4056 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
3930 | if(!(test_opt (inode->i_sb, NO_UID32))) { | 4057 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
3931 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 4058 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
3932 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 4059 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
3933 | } | 4060 | } |
@@ -3945,7 +4072,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3945 | if (inode->i_mode == 0 || | 4072 | if (inode->i_mode == 0 || |
3946 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 4073 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
3947 | /* this inode is deleted */ | 4074 | /* this inode is deleted */ |
3948 | brelse (bh); | 4075 | brelse(bh); |
3949 | ret = -ESTALE; | 4076 | ret = -ESTALE; |
3950 | goto bad_inode; | 4077 | goto bad_inode; |
3951 | } | 4078 | } |
@@ -3978,7 +4105,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3978 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 4105 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
3979 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 4106 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
3980 | EXT4_INODE_SIZE(inode->i_sb)) { | 4107 | EXT4_INODE_SIZE(inode->i_sb)) { |
3981 | brelse (bh); | 4108 | brelse(bh); |
3982 | ret = -EIO; | 4109 | ret = -EIO; |
3983 | goto bad_inode; | 4110 | goto bad_inode; |
3984 | } | 4111 | } |
@@ -4031,7 +4158,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4031 | init_special_inode(inode, inode->i_mode, | 4158 | init_special_inode(inode, inode->i_mode, |
4032 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 4159 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
4033 | } | 4160 | } |
4034 | brelse (iloc.bh); | 4161 | brelse(iloc.bh); |
4035 | ext4_set_inode_flags(inode); | 4162 | ext4_set_inode_flags(inode); |
4036 | unlock_new_inode(inode); | 4163 | unlock_new_inode(inode); |
4037 | return inode; | 4164 | return inode; |
@@ -4113,14 +4240,14 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4113 | 4240 | ||
4114 | ext4_get_inode_flags(ei); | 4241 | ext4_get_inode_flags(ei); |
4115 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 4242 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
4116 | if(!(test_opt(inode->i_sb, NO_UID32))) { | 4243 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4117 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); | 4244 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); |
4118 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); | 4245 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); |
4119 | /* | 4246 | /* |
4120 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 4247 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
4121 | * re-used with the upper 16 bits of the uid/gid intact | 4248 | * re-used with the upper 16 bits of the uid/gid intact |
4122 | */ | 4249 | */ |
4123 | if(!ei->i_dtime) { | 4250 | if (!ei->i_dtime) { |
4124 | raw_inode->i_uid_high = | 4251 | raw_inode->i_uid_high = |
4125 | cpu_to_le16(high_16_bits(inode->i_uid)); | 4252 | cpu_to_le16(high_16_bits(inode->i_uid)); |
4126 | raw_inode->i_gid_high = | 4253 | raw_inode->i_gid_high = |
@@ -4208,7 +4335,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4208 | ei->i_state &= ~EXT4_STATE_NEW; | 4335 | ei->i_state &= ~EXT4_STATE_NEW; |
4209 | 4336 | ||
4210 | out_brelse: | 4337 | out_brelse: |
4211 | brelse (bh); | 4338 | brelse(bh); |
4212 | ext4_std_error(inode->i_sb, err); | 4339 | ext4_std_error(inode->i_sb, err); |
4213 | return err; | 4340 | return err; |
4214 | } | 4341 | } |
@@ -4811,6 +4938,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4811 | loff_t size; | 4938 | loff_t size; |
4812 | unsigned long len; | 4939 | unsigned long len; |
4813 | int ret = -EINVAL; | 4940 | int ret = -EINVAL; |
4941 | void *fsdata; | ||
4814 | struct file *file = vma->vm_file; | 4942 | struct file *file = vma->vm_file; |
4815 | struct inode *inode = file->f_path.dentry->d_inode; | 4943 | struct inode *inode = file->f_path.dentry->d_inode; |
4816 | struct address_space *mapping = inode->i_mapping; | 4944 | struct address_space *mapping = inode->i_mapping; |
@@ -4849,11 +4977,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4849 | * on the same page though | 4977 | * on the same page though |
4850 | */ | 4978 | */ |
4851 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 4979 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), |
4852 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 4980 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); |
4853 | if (ret < 0) | 4981 | if (ret < 0) |
4854 | goto out_unlock; | 4982 | goto out_unlock; |
4855 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | 4983 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), |
4856 | len, len, page, NULL); | 4984 | len, len, page, fsdata); |
4857 | if (ret < 0) | 4985 | if (ret < 0) |
4858 | goto out_unlock; | 4986 | goto out_unlock; |
4859 | ret = 0; | 4987 | ret = 0; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7a6c2f1faba6..ea27eaa0cfe5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
23 | struct inode *inode = filp->f_dentry->d_inode; | 23 | struct inode *inode = filp->f_dentry->d_inode; |
24 | struct ext4_inode_info *ei = EXT4_I(inode); | 24 | struct ext4_inode_info *ei = EXT4_I(inode); |
25 | unsigned int flags; | 25 | unsigned int flags; |
26 | unsigned short rsv_window_size; | ||
27 | 26 | ||
28 | ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); | 27 | ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); |
29 | 28 | ||
30 | switch (cmd) { | 29 | switch (cmd) { |
31 | case EXT4_IOC_GETFLAGS: | 30 | case EXT4_IOC_GETFLAGS: |
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
34 | return put_user(flags, (int __user *) arg); | 33 | return put_user(flags, (int __user *) arg); |
35 | case EXT4_IOC_SETFLAGS: { | 34 | case EXT4_IOC_SETFLAGS: { |
36 | handle_t *handle = NULL; | 35 | handle_t *handle = NULL; |
37 | int err; | 36 | int err, migrate = 0; |
38 | struct ext4_iloc iloc; | 37 | struct ext4_iloc iloc; |
39 | unsigned int oldflags; | 38 | unsigned int oldflags; |
40 | unsigned int jflag; | 39 | unsigned int jflag; |
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
82 | if (!capable(CAP_SYS_RESOURCE)) | 81 | if (!capable(CAP_SYS_RESOURCE)) |
83 | goto flags_out; | 82 | goto flags_out; |
84 | } | 83 | } |
84 | if (oldflags & EXT4_EXTENTS_FL) { | ||
85 | /* We don't support clearning extent flags */ | ||
86 | if (!(flags & EXT4_EXTENTS_FL)) { | ||
87 | err = -EOPNOTSUPP; | ||
88 | goto flags_out; | ||
89 | } | ||
90 | } else if (flags & EXT4_EXTENTS_FL) { | ||
91 | /* migrate the file */ | ||
92 | migrate = 1; | ||
93 | flags &= ~EXT4_EXTENTS_FL; | ||
94 | } | ||
85 | 95 | ||
86 | handle = ext4_journal_start(inode, 1); | 96 | handle = ext4_journal_start(inode, 1); |
87 | if (IS_ERR(handle)) { | 97 | if (IS_ERR(handle)) { |
@@ -109,6 +119,10 @@ flags_err: | |||
109 | 119 | ||
110 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | 120 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) |
111 | err = ext4_change_inode_journal_flag(inode, jflag); | 121 | err = ext4_change_inode_journal_flag(inode, jflag); |
122 | if (err) | ||
123 | goto flags_out; | ||
124 | if (migrate) | ||
125 | err = ext4_ext_migrate(inode); | ||
112 | flags_out: | 126 | flags_out: |
113 | mutex_unlock(&inode->i_mutex); | 127 | mutex_unlock(&inode->i_mutex); |
114 | mnt_drop_write(filp->f_path.mnt); | 128 | mnt_drop_write(filp->f_path.mnt); |
@@ -175,49 +189,6 @@ setversion_out: | |||
175 | return ret; | 189 | return ret; |
176 | } | 190 | } |
177 | #endif | 191 | #endif |
178 | case EXT4_IOC_GETRSVSZ: | ||
179 | if (test_opt(inode->i_sb, RESERVATION) | ||
180 | && S_ISREG(inode->i_mode) | ||
181 | && ei->i_block_alloc_info) { | ||
182 | rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; | ||
183 | return put_user(rsv_window_size, (int __user *)arg); | ||
184 | } | ||
185 | return -ENOTTY; | ||
186 | case EXT4_IOC_SETRSVSZ: { | ||
187 | int err; | ||
188 | |||
189 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | ||
190 | return -ENOTTY; | ||
191 | |||
192 | if (!is_owner_or_cap(inode)) | ||
193 | return -EACCES; | ||
194 | |||
195 | if (get_user(rsv_window_size, (int __user *)arg)) | ||
196 | return -EFAULT; | ||
197 | |||
198 | err = mnt_want_write(filp->f_path.mnt); | ||
199 | if (err) | ||
200 | return err; | ||
201 | |||
202 | if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) | ||
203 | rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; | ||
204 | |||
205 | /* | ||
206 | * need to allocate reservation structure for this inode | ||
207 | * before set the window size | ||
208 | */ | ||
209 | down_write(&ei->i_data_sem); | ||
210 | if (!ei->i_block_alloc_info) | ||
211 | ext4_init_block_alloc_info(inode); | ||
212 | |||
213 | if (ei->i_block_alloc_info){ | ||
214 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; | ||
215 | rsv->rsv_goal_size = rsv_window_size; | ||
216 | } | ||
217 | up_write(&ei->i_data_sem); | ||
218 | mnt_drop_write(filp->f_path.mnt); | ||
219 | return 0; | ||
220 | } | ||
221 | case EXT4_IOC_GROUP_EXTEND: { | 192 | case EXT4_IOC_GROUP_EXTEND: { |
222 | ext4_fsblk_t n_blocks_count; | 193 | ext4_fsblk_t n_blocks_count; |
223 | struct super_block *sb = inode->i_sb; | 194 | struct super_block *sb = inode->i_sb; |
@@ -267,7 +238,26 @@ setversion_out: | |||
267 | } | 238 | } |
268 | 239 | ||
269 | case EXT4_IOC_MIGRATE: | 240 | case EXT4_IOC_MIGRATE: |
270 | return ext4_ext_migrate(inode, filp, cmd, arg); | 241 | { |
242 | int err; | ||
243 | if (!is_owner_or_cap(inode)) | ||
244 | return -EACCES; | ||
245 | |||
246 | err = mnt_want_write(filp->f_path.mnt); | ||
247 | if (err) | ||
248 | return err; | ||
249 | /* | ||
250 | * inode_mutex prevent write and truncate on the file. | ||
251 | * Read still goes through. We take i_data_sem in | ||
252 | * ext4_ext_swap_inode_data before we switch the | ||
253 | * inode format to prevent read. | ||
254 | */ | ||
255 | mutex_lock(&(inode->i_mutex)); | ||
256 | err = ext4_ext_migrate(inode); | ||
257 | mutex_unlock(&(inode->i_mutex)); | ||
258 | mnt_drop_write(filp->f_path.mnt); | ||
259 | return err; | ||
260 | } | ||
271 | 261 | ||
272 | default: | 262 | default: |
273 | return -ENOTTY; | 263 | return -ENOTTY; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e0e3a5eb1ddb..b580714f0d85 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
477 | b2 = (unsigned char *) bitmap; | 477 | b2 = (unsigned char *) bitmap; |
478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
479 | if (b1[i] != b2[i]) { | 479 | if (b1[i] != b2[i]) { |
480 | printk("corruption in group %lu at byte %u(%u):" | 480 | printk(KERN_ERR "corruption in group %lu " |
481 | " %x in copy != %x on disk/prealloc\n", | 481 | "at byte %u(%u): %x in copy != %x " |
482 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 482 | "on disk/prealloc\n", |
483 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
483 | BUG(); | 484 | BUG(); |
484 | } | 485 | } |
485 | } | 486 | } |
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
533 | void *buddy; | 534 | void *buddy; |
534 | void *buddy2; | 535 | void *buddy2; |
535 | 536 | ||
536 | if (!test_opt(sb, MBALLOC)) | ||
537 | return 0; | ||
538 | |||
539 | { | 537 | { |
540 | static int mb_check_counter; | 538 | static int mb_check_counter; |
541 | if (mb_check_counter++ % 100 != 0) | 539 | if (mb_check_counter++ % 100 != 0) |
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
784 | if (bh[i] == NULL) | 782 | if (bh[i] == NULL) |
785 | goto out; | 783 | goto out; |
786 | 784 | ||
787 | if (bh_uptodate_or_lock(bh[i])) | 785 | if (buffer_uptodate(bh[i]) && |
786 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
788 | continue; | 787 | continue; |
789 | 788 | ||
789 | lock_buffer(bh[i]); | ||
790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
792 | ext4_init_block_bitmap(sb, bh[i], | 792 | ext4_init_block_bitmap(sb, bh[i], |
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
2169 | { | 2169 | { |
2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2171 | 2171 | ||
2172 | remove_proc_entry("mb_groups", sbi->s_mb_proc); | 2172 | if (sbi->s_proc != NULL) { |
2173 | remove_proc_entry("mb_history", sbi->s_mb_proc); | 2173 | remove_proc_entry("mb_groups", sbi->s_proc); |
2174 | 2174 | remove_proc_entry("mb_history", sbi->s_proc); | |
2175 | } | ||
2175 | kfree(sbi->s_mb_history); | 2176 | kfree(sbi->s_mb_history); |
2176 | } | 2177 | } |
2177 | 2178 | ||
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
2180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2181 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2181 | int i; | 2182 | int i; |
2182 | 2183 | ||
2183 | if (sbi->s_mb_proc != NULL) { | 2184 | if (sbi->s_proc != NULL) { |
2184 | proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, | 2185 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
2185 | &ext4_mb_seq_history_fops, sb); | 2186 | &ext4_mb_seq_history_fops, sb); |
2186 | proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, | 2187 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2187 | &ext4_mb_seq_groups_fops, sb); | 2188 | &ext4_mb_seq_groups_fops, sb); |
2188 | } | 2189 | } |
2189 | 2190 | ||
@@ -2485,19 +2486,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2485 | unsigned max; | 2486 | unsigned max; |
2486 | int ret; | 2487 | int ret; |
2487 | 2488 | ||
2488 | if (!test_opt(sb, MBALLOC)) | ||
2489 | return 0; | ||
2490 | |||
2491 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2489 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); |
2492 | 2490 | ||
2493 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2491 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2494 | if (sbi->s_mb_offsets == NULL) { | 2492 | if (sbi->s_mb_offsets == NULL) { |
2495 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2496 | return -ENOMEM; | 2493 | return -ENOMEM; |
2497 | } | 2494 | } |
2498 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2495 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2499 | if (sbi->s_mb_maxs == NULL) { | 2496 | if (sbi->s_mb_maxs == NULL) { |
2500 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2501 | kfree(sbi->s_mb_maxs); | 2497 | kfree(sbi->s_mb_maxs); |
2502 | return -ENOMEM; | 2498 | return -ENOMEM; |
2503 | } | 2499 | } |
@@ -2520,7 +2516,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2520 | /* init file for buddy data */ | 2516 | /* init file for buddy data */ |
2521 | ret = ext4_mb_init_backend(sb); | 2517 | ret = ext4_mb_init_backend(sb); |
2522 | if (ret != 0) { | 2518 | if (ret != 0) { |
2523 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2524 | kfree(sbi->s_mb_offsets); | 2519 | kfree(sbi->s_mb_offsets); |
2525 | kfree(sbi->s_mb_maxs); | 2520 | kfree(sbi->s_mb_maxs); |
2526 | return ret; | 2521 | return ret; |
@@ -2540,17 +2535,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2540 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | 2535 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; |
2541 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2536 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2542 | 2537 | ||
2543 | i = sizeof(struct ext4_locality_group) * nr_cpu_ids; | 2538 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2544 | sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); | ||
2545 | if (sbi->s_locality_groups == NULL) { | 2539 | if (sbi->s_locality_groups == NULL) { |
2546 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2547 | kfree(sbi->s_mb_offsets); | 2540 | kfree(sbi->s_mb_offsets); |
2548 | kfree(sbi->s_mb_maxs); | 2541 | kfree(sbi->s_mb_maxs); |
2549 | return -ENOMEM; | 2542 | return -ENOMEM; |
2550 | } | 2543 | } |
2551 | for (i = 0; i < nr_cpu_ids; i++) { | 2544 | for_each_possible_cpu(i) { |
2552 | struct ext4_locality_group *lg; | 2545 | struct ext4_locality_group *lg; |
2553 | lg = &sbi->s_locality_groups[i]; | 2546 | lg = per_cpu_ptr(sbi->s_locality_groups, i); |
2554 | mutex_init(&lg->lg_mutex); | 2547 | mutex_init(&lg->lg_mutex); |
2555 | for (j = 0; j < PREALLOC_TB_SIZE; j++) | 2548 | for (j = 0; j < PREALLOC_TB_SIZE; j++) |
2556 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); | 2549 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); |
@@ -2560,7 +2553,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2560 | ext4_mb_init_per_dev_proc(sb); | 2553 | ext4_mb_init_per_dev_proc(sb); |
2561 | ext4_mb_history_init(sb); | 2554 | ext4_mb_history_init(sb); |
2562 | 2555 | ||
2563 | printk("EXT4-fs: mballoc enabled\n"); | 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2564 | return 0; | 2557 | return 0; |
2565 | } | 2558 | } |
2566 | 2559 | ||
@@ -2589,9 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2589 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2590 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2591 | 2584 | ||
2592 | if (!test_opt(sb, MBALLOC)) | ||
2593 | return 0; | ||
2594 | |||
2595 | /* release freed, non-committed blocks */ | 2585 | /* release freed, non-committed blocks */ |
2596 | spin_lock(&sbi->s_md_lock); | 2586 | spin_lock(&sbi->s_md_lock); |
2597 | list_splice_init(&sbi->s_closed_transaction, | 2587 | list_splice_init(&sbi->s_closed_transaction, |
@@ -2647,8 +2637,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2647 | atomic_read(&sbi->s_mb_discarded)); | 2637 | atomic_read(&sbi->s_mb_discarded)); |
2648 | } | 2638 | } |
2649 | 2639 | ||
2650 | kfree(sbi->s_locality_groups); | 2640 | free_percpu(sbi->s_locality_groups); |
2651 | |||
2652 | ext4_mb_history_release(sb); | 2641 | ext4_mb_history_release(sb); |
2653 | ext4_mb_destroy_per_dev_proc(sb); | 2642 | ext4_mb_destroy_per_dev_proc(sb); |
2654 | 2643 | ||
@@ -2721,118 +2710,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2721 | #define EXT4_MB_STREAM_REQ "stream_req" | 2710 | #define EXT4_MB_STREAM_REQ "stream_req" |
2722 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | 2711 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" |
2723 | 2712 | ||
2724 | |||
2725 | |||
2726 | #define MB_PROC_FOPS(name) \ | ||
2727 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ | ||
2728 | { \ | ||
2729 | struct ext4_sb_info *sbi = m->private; \ | ||
2730 | \ | ||
2731 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ | ||
2732 | return 0; \ | ||
2733 | } \ | ||
2734 | \ | ||
2735 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ | ||
2736 | { \ | ||
2737 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ | ||
2738 | } \ | ||
2739 | \ | ||
2740 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ | ||
2741 | const char __user *buf, size_t cnt, loff_t *ppos) \ | ||
2742 | { \ | ||
2743 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ | ||
2744 | char str[32]; \ | ||
2745 | long value; \ | ||
2746 | if (cnt >= sizeof(str)) \ | ||
2747 | return -EINVAL; \ | ||
2748 | if (copy_from_user(str, buf, cnt)) \ | ||
2749 | return -EFAULT; \ | ||
2750 | value = simple_strtol(str, NULL, 0); \ | ||
2751 | if (value <= 0) \ | ||
2752 | return -ERANGE; \ | ||
2753 | sbi->s_mb_##name = value; \ | ||
2754 | return cnt; \ | ||
2755 | } \ | ||
2756 | \ | ||
2757 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
2758 | .owner = THIS_MODULE, \ | ||
2759 | .open = ext4_mb_##name##_proc_open, \ | ||
2760 | .read = seq_read, \ | ||
2761 | .llseek = seq_lseek, \ | ||
2762 | .release = single_release, \ | ||
2763 | .write = ext4_mb_##name##_proc_write, \ | ||
2764 | }; | ||
2765 | |||
2766 | MB_PROC_FOPS(stats); | ||
2767 | MB_PROC_FOPS(max_to_scan); | ||
2768 | MB_PROC_FOPS(min_to_scan); | ||
2769 | MB_PROC_FOPS(order2_reqs); | ||
2770 | MB_PROC_FOPS(stream_request); | ||
2771 | MB_PROC_FOPS(group_prealloc); | ||
2772 | |||
2773 | #define MB_PROC_HANDLER(name, var) \ | ||
2774 | do { \ | ||
2775 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ | ||
2776 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
2777 | if (proc == NULL) { \ | ||
2778 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | ||
2779 | goto err_out; \ | ||
2780 | } \ | ||
2781 | } while (0) | ||
2782 | |||
2783 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2713 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
2784 | { | 2714 | { |
2785 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2715 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
2786 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2716 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2787 | struct proc_dir_entry *proc; | 2717 | struct proc_dir_entry *proc; |
2788 | char devname[64]; | ||
2789 | 2718 | ||
2790 | if (proc_root_ext4 == NULL) { | 2719 | if (sbi->s_proc == NULL) |
2791 | sbi->s_mb_proc = NULL; | ||
2792 | return -EINVAL; | 2720 | return -EINVAL; |
2793 | } | ||
2794 | bdevname(sb->s_bdev, devname); | ||
2795 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | ||
2796 | |||
2797 | MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); | ||
2798 | MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); | ||
2799 | MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); | ||
2800 | MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); | ||
2801 | MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); | ||
2802 | MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); | ||
2803 | 2721 | ||
2722 | EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); | ||
2723 | EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); | ||
2724 | EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); | ||
2725 | EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); | ||
2726 | EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); | ||
2727 | EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); | ||
2804 | return 0; | 2728 | return 0; |
2805 | 2729 | ||
2806 | err_out: | 2730 | err_out: |
2807 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); | 2731 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2808 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2732 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2809 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2733 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2810 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2734 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2811 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2735 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2812 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2736 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2813 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2814 | remove_proc_entry(devname, proc_root_ext4); | ||
2815 | sbi->s_mb_proc = NULL; | ||
2816 | |||
2817 | return -ENOMEM; | 2737 | return -ENOMEM; |
2818 | } | 2738 | } |
2819 | 2739 | ||
2820 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2740 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
2821 | { | 2741 | { |
2822 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2742 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2823 | char devname[64]; | ||
2824 | 2743 | ||
2825 | if (sbi->s_mb_proc == NULL) | 2744 | if (sbi->s_proc == NULL) |
2826 | return -EINVAL; | 2745 | return -EINVAL; |
2827 | 2746 | ||
2828 | bdevname(sb->s_bdev, devname); | 2747 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2829 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2748 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2830 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2749 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2831 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2750 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2832 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2751 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2833 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2752 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2834 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2835 | remove_proc_entry(devname, proc_root_ext4); | ||
2836 | 2753 | ||
2837 | return 0; | 2754 | return 0; |
2838 | } | 2755 | } |
@@ -2854,11 +2771,6 @@ int __init init_ext4_mballoc(void) | |||
2854 | kmem_cache_destroy(ext4_pspace_cachep); | 2771 | kmem_cache_destroy(ext4_pspace_cachep); |
2855 | return -ENOMEM; | 2772 | return -ENOMEM; |
2856 | } | 2773 | } |
2857 | #ifdef CONFIG_PROC_FS | ||
2858 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); | ||
2859 | if (proc_root_ext4 == NULL) | ||
2860 | printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); | ||
2861 | #endif | ||
2862 | return 0; | 2774 | return 0; |
2863 | } | 2775 | } |
2864 | 2776 | ||
@@ -2867,9 +2779,6 @@ void exit_ext4_mballoc(void) | |||
2867 | /* XXX: synchronize_rcu(); */ | 2779 | /* XXX: synchronize_rcu(); */ |
2868 | kmem_cache_destroy(ext4_pspace_cachep); | 2780 | kmem_cache_destroy(ext4_pspace_cachep); |
2869 | kmem_cache_destroy(ext4_ac_cachep); | 2781 | kmem_cache_destroy(ext4_ac_cachep); |
2870 | #ifdef CONFIG_PROC_FS | ||
2871 | remove_proc_entry("fs/ext4", NULL); | ||
2872 | #endif | ||
2873 | } | 2782 | } |
2874 | 2783 | ||
2875 | 2784 | ||
@@ -2879,7 +2788,7 @@ void exit_ext4_mballoc(void) | |||
2879 | */ | 2788 | */ |
2880 | static noinline_for_stack int | 2789 | static noinline_for_stack int |
2881 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2790 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
2882 | handle_t *handle) | 2791 | handle_t *handle, unsigned long reserv_blks) |
2883 | { | 2792 | { |
2884 | struct buffer_head *bitmap_bh = NULL; | 2793 | struct buffer_head *bitmap_bh = NULL; |
2885 | struct ext4_super_block *es; | 2794 | struct ext4_super_block *es; |
@@ -2968,15 +2877,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2968 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2877 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
2969 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2878 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
2970 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2879 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
2971 | 2880 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | |
2972 | /* | 2881 | /* |
2973 | * free blocks account has already be reduced/reserved | 2882 | * Now reduce the dirty block count also. Should not go negative |
2974 | * at write_begin() time for delayed allocation | ||
2975 | * do not double accounting | ||
2976 | */ | 2883 | */ |
2977 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2884 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2978 | percpu_counter_sub(&sbi->s_freeblocks_counter, | 2885 | /* release all the reserved blocks if non delalloc */ |
2979 | ac->ac_b_ex.fe_len); | 2886 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
2887 | else | ||
2888 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
2889 | ac->ac_b_ex.fe_len); | ||
2980 | 2890 | ||
2981 | if (sbi->s_log_groups_per_flex) { | 2891 | if (sbi->s_log_groups_per_flex) { |
2982 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2892 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -3884,7 +3794,7 @@ out: | |||
3884 | * | 3794 | * |
3885 | * FIXME!! Make sure it is valid at all the call sites | 3795 | * FIXME!! Make sure it is valid at all the call sites |
3886 | */ | 3796 | */ |
3887 | void ext4_mb_discard_inode_preallocations(struct inode *inode) | 3797 | void ext4_discard_preallocations(struct inode *inode) |
3888 | { | 3798 | { |
3889 | struct ext4_inode_info *ei = EXT4_I(inode); | 3799 | struct ext4_inode_info *ei = EXT4_I(inode); |
3890 | struct super_block *sb = inode->i_sb; | 3800 | struct super_block *sb = inode->i_sb; |
@@ -3896,7 +3806,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) | |||
3896 | struct ext4_buddy e4b; | 3806 | struct ext4_buddy e4b; |
3897 | int err; | 3807 | int err; |
3898 | 3808 | ||
3899 | if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { | 3809 | if (!S_ISREG(inode->i_mode)) { |
3900 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ | 3810 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ |
3901 | return; | 3811 | return; |
3902 | } | 3812 | } |
@@ -4094,8 +4004,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4094 | * per cpu locality group is to reduce the contention between block | 4004 | * per cpu locality group is to reduce the contention between block |
4095 | * request from multiple CPUs. | 4005 | * request from multiple CPUs. |
4096 | */ | 4006 | */ |
4097 | ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; | 4007 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); |
4098 | put_cpu(); | ||
4099 | 4008 | ||
4100 | /* we're going to use group allocation */ | 4009 | /* we're going to use group allocation */ |
4101 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 4010 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
@@ -4369,33 +4278,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4369 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4278 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
4370 | struct ext4_allocation_request *ar, int *errp) | 4279 | struct ext4_allocation_request *ar, int *errp) |
4371 | { | 4280 | { |
4281 | int freed; | ||
4372 | struct ext4_allocation_context *ac = NULL; | 4282 | struct ext4_allocation_context *ac = NULL; |
4373 | struct ext4_sb_info *sbi; | 4283 | struct ext4_sb_info *sbi; |
4374 | struct super_block *sb; | 4284 | struct super_block *sb; |
4375 | ext4_fsblk_t block = 0; | 4285 | ext4_fsblk_t block = 0; |
4376 | int freed; | 4286 | unsigned long inquota; |
4377 | int inquota; | 4287 | unsigned long reserv_blks = 0; |
4378 | 4288 | ||
4379 | sb = ar->inode->i_sb; | 4289 | sb = ar->inode->i_sb; |
4380 | sbi = EXT4_SB(sb); | 4290 | sbi = EXT4_SB(sb); |
4381 | 4291 | ||
4382 | if (!test_opt(sb, MBALLOC)) { | ||
4383 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, | ||
4384 | &(ar->len), errp); | ||
4385 | return block; | ||
4386 | } | ||
4387 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4292 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { |
4388 | /* | 4293 | /* |
4389 | * With delalloc we already reserved the blocks | 4294 | * With delalloc we already reserved the blocks |
4390 | */ | 4295 | */ |
4391 | ar->len = ext4_has_free_blocks(sbi, ar->len); | 4296 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { |
4392 | } | 4297 | /* let others to free the space */ |
4393 | 4298 | yield(); | |
4394 | if (ar->len == 0) { | 4299 | ar->len = ar->len >> 1; |
4395 | *errp = -ENOSPC; | 4300 | } |
4396 | return 0; | 4301 | if (!ar->len) { |
4302 | *errp = -ENOSPC; | ||
4303 | return 0; | ||
4304 | } | ||
4305 | reserv_blks = ar->len; | ||
4397 | } | 4306 | } |
4398 | |||
4399 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4307 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
4400 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4308 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4401 | ar->len--; | 4309 | ar->len--; |
@@ -4441,7 +4349,7 @@ repeat: | |||
4441 | } | 4349 | } |
4442 | 4350 | ||
4443 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4351 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4444 | *errp = ext4_mb_mark_diskspace_used(ac, handle); | 4352 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4445 | if (*errp == -EAGAIN) { | 4353 | if (*errp == -EAGAIN) { |
4446 | ac->ac_b_ex.fe_group = 0; | 4354 | ac->ac_b_ex.fe_group = 0; |
4447 | ac->ac_b_ex.fe_start = 0; | 4355 | ac->ac_b_ex.fe_start = 0; |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c7c9906c2a75..b3b4828f8b89 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -257,7 +257,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); | |||
257 | 257 | ||
258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
259 | 259 | ||
260 | static struct proc_dir_entry *proc_root_ext4; | ||
261 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | 260 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); |
262 | 261 | ||
263 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 262 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 46fc0b5b12ba..f2a9cf498ecd 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) | |||
447 | 447 | ||
448 | } | 448 | } |
449 | 449 | ||
450 | int ext4_ext_migrate(struct inode *inode, struct file *filp, | 450 | int ext4_ext_migrate(struct inode *inode) |
451 | unsigned int cmd, unsigned long arg) | ||
452 | { | 451 | { |
453 | handle_t *handle; | 452 | handle_t *handle; |
454 | int retval = 0, i; | 453 | int retval = 0, i; |
@@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
516 | * when we add extents we extent the journal | 515 | * when we add extents we extent the journal |
517 | */ | 516 | */ |
518 | /* | 517 | /* |
519 | * inode_mutex prevent write and truncate on the file. Read still goes | ||
520 | * through. We take i_data_sem in ext4_ext_swap_inode_data before we | ||
521 | * switch the inode format to prevent read. | ||
522 | */ | ||
523 | mutex_lock(&(inode->i_mutex)); | ||
524 | /* | ||
525 | * Even though we take i_mutex we can still cause block allocation | 518 | * Even though we take i_mutex we can still cause block allocation |
526 | * via mmap write to holes. If we have allocated new blocks we fail | 519 | * via mmap write to holes. If we have allocated new blocks we fail |
527 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 520 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. |
@@ -623,7 +616,6 @@ err_out: | |||
623 | tmp_inode->i_nlink = 0; | 616 | tmp_inode->i_nlink = 0; |
624 | 617 | ||
625 | ext4_journal_stop(handle); | 618 | ext4_journal_stop(handle); |
626 | mutex_unlock(&(inode->i_mutex)); | ||
627 | 619 | ||
628 | if (tmp_inode) | 620 | if (tmp_inode) |
629 | iput(tmp_inode); | 621 | iput(tmp_inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 387ad98350c3..92db9e945147 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -151,34 +151,36 @@ struct dx_map_entry | |||
151 | 151 | ||
152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); | 152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); |
153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); | 153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); |
154 | static inline unsigned dx_get_hash (struct dx_entry *entry); | 154 | static inline unsigned dx_get_hash(struct dx_entry *entry); |
155 | static void dx_set_hash (struct dx_entry *entry, unsigned value); | 155 | static void dx_set_hash(struct dx_entry *entry, unsigned value); |
156 | static unsigned dx_get_count (struct dx_entry *entries); | 156 | static unsigned dx_get_count(struct dx_entry *entries); |
157 | static unsigned dx_get_limit (struct dx_entry *entries); | 157 | static unsigned dx_get_limit(struct dx_entry *entries); |
158 | static void dx_set_count (struct dx_entry *entries, unsigned value); | 158 | static void dx_set_count(struct dx_entry *entries, unsigned value); |
159 | static void dx_set_limit (struct dx_entry *entries, unsigned value); | 159 | static void dx_set_limit(struct dx_entry *entries, unsigned value); |
160 | static unsigned dx_root_limit (struct inode *dir, unsigned infosize); | 160 | static unsigned dx_root_limit(struct inode *dir, unsigned infosize); |
161 | static unsigned dx_node_limit (struct inode *dir); | 161 | static unsigned dx_node_limit(struct inode *dir); |
162 | static struct dx_frame *dx_probe(struct dentry *dentry, | 162 | static struct dx_frame *dx_probe(const struct qstr *d_name, |
163 | struct inode *dir, | 163 | struct inode *dir, |
164 | struct dx_hash_info *hinfo, | 164 | struct dx_hash_info *hinfo, |
165 | struct dx_frame *frame, | 165 | struct dx_frame *frame, |
166 | int *err); | 166 | int *err); |
167 | static void dx_release (struct dx_frame *frames); | 167 | static void dx_release(struct dx_frame *frames); |
168 | static int dx_make_map (struct ext4_dir_entry_2 *de, int size, | 168 | static int dx_make_map(struct ext4_dir_entry_2 *de, int size, |
169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); | 170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); |
171 | static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, | 171 | static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, |
172 | struct dx_map_entry *offsets, int count); | 172 | struct dx_map_entry *offsets, int count); |
173 | static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); | 173 | static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); |
174 | static void dx_insert_block(struct dx_frame *frame, | 174 | static void dx_insert_block(struct dx_frame *frame, |
175 | u32 hash, ext4_lblk_t block); | 175 | u32 hash, ext4_lblk_t block); |
176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, | 176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, |
177 | struct dx_frame *frame, | 177 | struct dx_frame *frame, |
178 | struct dx_frame *frames, | 178 | struct dx_frame *frames, |
179 | __u32 *start_hash); | 179 | __u32 *start_hash); |
180 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 180 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, |
181 | struct ext4_dir_entry_2 **res_dir, int *err); | 181 | const struct qstr *d_name, |
182 | struct ext4_dir_entry_2 **res_dir, | ||
183 | int *err); | ||
182 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 184 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
183 | struct inode *inode); | 185 | struct inode *inode); |
184 | 186 | ||
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) | |||
207 | entry->block = cpu_to_le32(value); | 209 | entry->block = cpu_to_le32(value); |
208 | } | 210 | } |
209 | 211 | ||
210 | static inline unsigned dx_get_hash (struct dx_entry *entry) | 212 | static inline unsigned dx_get_hash(struct dx_entry *entry) |
211 | { | 213 | { |
212 | return le32_to_cpu(entry->hash); | 214 | return le32_to_cpu(entry->hash); |
213 | } | 215 | } |
214 | 216 | ||
215 | static inline void dx_set_hash (struct dx_entry *entry, unsigned value) | 217 | static inline void dx_set_hash(struct dx_entry *entry, unsigned value) |
216 | { | 218 | { |
217 | entry->hash = cpu_to_le32(value); | 219 | entry->hash = cpu_to_le32(value); |
218 | } | 220 | } |
219 | 221 | ||
220 | static inline unsigned dx_get_count (struct dx_entry *entries) | 222 | static inline unsigned dx_get_count(struct dx_entry *entries) |
221 | { | 223 | { |
222 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); | 224 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); |
223 | } | 225 | } |
224 | 226 | ||
225 | static inline unsigned dx_get_limit (struct dx_entry *entries) | 227 | static inline unsigned dx_get_limit(struct dx_entry *entries) |
226 | { | 228 | { |
227 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); | 229 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); |
228 | } | 230 | } |
229 | 231 | ||
230 | static inline void dx_set_count (struct dx_entry *entries, unsigned value) | 232 | static inline void dx_set_count(struct dx_entry *entries, unsigned value) |
231 | { | 233 | { |
232 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); | 234 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); |
233 | } | 235 | } |
234 | 236 | ||
235 | static inline void dx_set_limit (struct dx_entry *entries, unsigned value) | 237 | static inline void dx_set_limit(struct dx_entry *entries, unsigned value) |
236 | { | 238 | { |
237 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); | 239 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); |
238 | } | 240 | } |
239 | 241 | ||
240 | static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) | 242 | static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) |
241 | { | 243 | { |
242 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 244 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
243 | EXT4_DIR_REC_LEN(2) - infosize; | 245 | EXT4_DIR_REC_LEN(2) - infosize; |
244 | return entry_space / sizeof(struct dx_entry); | 246 | return entry_space / sizeof(struct dx_entry); |
245 | } | 247 | } |
246 | 248 | ||
247 | static inline unsigned dx_node_limit (struct inode *dir) | 249 | static inline unsigned dx_node_limit(struct inode *dir) |
248 | { | 250 | { |
249 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 251 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
250 | return entry_space / sizeof(struct dx_entry); | 252 | return entry_space / sizeof(struct dx_entry); |
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir) | |||
254 | * Debug | 256 | * Debug |
255 | */ | 257 | */ |
256 | #ifdef DX_DEBUG | 258 | #ifdef DX_DEBUG |
257 | static void dx_show_index (char * label, struct dx_entry *entries) | 259 | static void dx_show_index(char * label, struct dx_entry *entries) |
258 | { | 260 | { |
259 | int i, n = dx_get_count (entries); | 261 | int i, n = dx_get_count (entries); |
260 | printk("%s index ", label); | 262 | printk(KERN_DEBUG "%s index ", label); |
261 | for (i = 0; i < n; i++) { | 263 | for (i = 0; i < n; i++) { |
262 | printk("%x->%lu ", i? dx_get_hash(entries + i) : | 264 | printk("%x->%lu ", i ? dx_get_hash(entries + i) : |
263 | 0, (unsigned long)dx_get_block(entries + i)); | 265 | 0, (unsigned long)dx_get_block(entries + i)); |
264 | } | 266 | } |
265 | printk("\n"); | 267 | printk("\n"); |
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
306 | struct dx_entry *entries, int levels) | 308 | struct dx_entry *entries, int levels) |
307 | { | 309 | { |
308 | unsigned blocksize = dir->i_sb->s_blocksize; | 310 | unsigned blocksize = dir->i_sb->s_blocksize; |
309 | unsigned count = dx_get_count (entries), names = 0, space = 0, i; | 311 | unsigned count = dx_get_count(entries), names = 0, space = 0, i; |
310 | unsigned bcount = 0; | 312 | unsigned bcount = 0; |
311 | struct buffer_head *bh; | 313 | struct buffer_head *bh; |
312 | int err; | 314 | int err; |
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
325 | names += stats.names; | 327 | names += stats.names; |
326 | space += stats.space; | 328 | space += stats.space; |
327 | bcount += stats.bcount; | 329 | bcount += stats.bcount; |
328 | brelse (bh); | 330 | brelse(bh); |
329 | } | 331 | } |
330 | if (bcount) | 332 | if (bcount) |
331 | printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", | 333 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", |
332 | names, space/bcount,(space/bcount)*100/blocksize); | 334 | levels ? "" : " ", names, space/bcount, |
335 | (space/bcount)*100/blocksize); | ||
333 | return (struct stats) { names, space, bcount}; | 336 | return (struct stats) { names, space, bcount}; |
334 | } | 337 | } |
335 | #endif /* DX_DEBUG */ | 338 | #endif /* DX_DEBUG */ |
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
344 | * back to userspace. | 347 | * back to userspace. |
345 | */ | 348 | */ |
346 | static struct dx_frame * | 349 | static struct dx_frame * |
347 | dx_probe(struct dentry *dentry, struct inode *dir, | 350 | dx_probe(const struct qstr *d_name, struct inode *dir, |
348 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 351 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) |
349 | { | 352 | { |
350 | unsigned count, indirect; | 353 | unsigned count, indirect; |
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
355 | u32 hash; | 358 | u32 hash; |
356 | 359 | ||
357 | frame->bh = NULL; | 360 | frame->bh = NULL; |
358 | if (dentry) | ||
359 | dir = dentry->d_parent->d_inode; | ||
360 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) | 361 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) |
361 | goto fail; | 362 | goto fail; |
362 | root = (struct dx_root *) bh->b_data; | 363 | root = (struct dx_root *) bh->b_data; |
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
372 | } | 373 | } |
373 | hinfo->hash_version = root->info.hash_version; | 374 | hinfo->hash_version = root->info.hash_version; |
374 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 375 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
375 | if (dentry) | 376 | if (d_name) |
376 | ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); | 377 | ext4fs_dirhash(d_name->name, d_name->len, hinfo); |
377 | hash = hinfo->hash; | 378 | hash = hinfo->hash; |
378 | 379 | ||
379 | if (root->info.unused_flags & 1) { | 380 | if (root->info.unused_flags & 1) { |
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
406 | goto fail; | 407 | goto fail; |
407 | } | 408 | } |
408 | 409 | ||
409 | dxtrace (printk("Look up %x", hash)); | 410 | dxtrace(printk("Look up %x", hash)); |
410 | while (1) | 411 | while (1) |
411 | { | 412 | { |
412 | count = dx_get_count(entries); | 413 | count = dx_get_count(entries); |
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
555 | 0, &err))) | 556 | 0, &err))) |
556 | return err; /* Failure */ | 557 | return err; /* Failure */ |
557 | p++; | 558 | p++; |
558 | brelse (p->bh); | 559 | brelse(p->bh); |
559 | p->bh = bh; | 560 | p->bh = bh; |
560 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; | 561 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; |
561 | } | 562 | } |
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
593 | /* On error, skip the f_pos to the next block. */ | 594 | /* On error, skip the f_pos to the next block. */ |
594 | dir_file->f_pos = (dir_file->f_pos | | 595 | dir_file->f_pos = (dir_file->f_pos | |
595 | (dir->i_sb->s_blocksize - 1)) + 1; | 596 | (dir->i_sb->s_blocksize - 1)) + 1; |
596 | brelse (bh); | 597 | brelse(bh); |
597 | return count; | 598 | return count; |
598 | } | 599 | } |
599 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 600 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
635 | int ret, err; | 636 | int ret, err; |
636 | __u32 hashval; | 637 | __u32 hashval; |
637 | 638 | ||
638 | dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, | 639 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
639 | start_minor_hash)); | 640 | start_hash, start_minor_hash)); |
640 | dir = dir_file->f_path.dentry->d_inode; | 641 | dir = dir_file->f_path.dentry->d_inode; |
641 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | 642 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { |
642 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 643 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
648 | } | 649 | } |
649 | hinfo.hash = start_hash; | 650 | hinfo.hash = start_hash; |
650 | hinfo.minor_hash = 0; | 651 | hinfo.minor_hash = 0; |
651 | frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); | 652 | frame = dx_probe(NULL, dir, &hinfo, frames, &err); |
652 | if (!frame) | 653 | if (!frame) |
653 | return err; | 654 | return err; |
654 | 655 | ||
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
694 | break; | 695 | break; |
695 | } | 696 | } |
696 | dx_release(frames); | 697 | dx_release(frames); |
697 | dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", | 698 | dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " |
698 | count, *next_hash)); | 699 | "next hash: %x\n", count, *next_hash)); |
699 | return count; | 700 | return count; |
700 | errout: | 701 | errout: |
701 | dx_release(frames); | 702 | dx_release(frames); |
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name, | |||
802 | /* | 803 | /* |
803 | * Returns 0 if not found, -1 on failure, and 1 on success | 804 | * Returns 0 if not found, -1 on failure, and 1 on success |
804 | */ | 805 | */ |
805 | static inline int search_dirblock(struct buffer_head * bh, | 806 | static inline int search_dirblock(struct buffer_head *bh, |
806 | struct inode *dir, | 807 | struct inode *dir, |
807 | struct dentry *dentry, | 808 | const struct qstr *d_name, |
808 | unsigned long offset, | 809 | unsigned long offset, |
809 | struct ext4_dir_entry_2 ** res_dir) | 810 | struct ext4_dir_entry_2 ** res_dir) |
810 | { | 811 | { |
811 | struct ext4_dir_entry_2 * de; | 812 | struct ext4_dir_entry_2 * de; |
812 | char * dlimit; | 813 | char * dlimit; |
813 | int de_len; | 814 | int de_len; |
814 | const char *name = dentry->d_name.name; | 815 | const char *name = d_name->name; |
815 | int namelen = dentry->d_name.len; | 816 | int namelen = d_name->len; |
816 | 817 | ||
817 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 818 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
818 | dlimit = bh->b_data + dir->i_sb->s_blocksize; | 819 | dlimit = bh->b_data + dir->i_sb->s_blocksize; |
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh, | |||
851 | * The returned buffer_head has ->b_count elevated. The caller is expected | 852 | * The returned buffer_head has ->b_count elevated. The caller is expected |
852 | * to brelse() it when appropriate. | 853 | * to brelse() it when appropriate. |
853 | */ | 854 | */ |
854 | static struct buffer_head * ext4_find_entry (struct dentry *dentry, | 855 | static struct buffer_head * ext4_find_entry (struct inode *dir, |
856 | const struct qstr *d_name, | ||
855 | struct ext4_dir_entry_2 ** res_dir) | 857 | struct ext4_dir_entry_2 ** res_dir) |
856 | { | 858 | { |
857 | struct super_block * sb; | 859 | struct super_block *sb; |
858 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; | 860 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
859 | struct buffer_head * bh, *ret = NULL; | 861 | struct buffer_head *bh, *ret = NULL; |
860 | ext4_lblk_t start, block, b; | 862 | ext4_lblk_t start, block, b; |
861 | int ra_max = 0; /* Number of bh's in the readahead | 863 | int ra_max = 0; /* Number of bh's in the readahead |
862 | buffer, bh_use[] */ | 864 | buffer, bh_use[] */ |
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
865 | int num = 0; | 867 | int num = 0; |
866 | ext4_lblk_t nblocks; | 868 | ext4_lblk_t nblocks; |
867 | int i, err; | 869 | int i, err; |
868 | struct inode *dir = dentry->d_parent->d_inode; | ||
869 | int namelen; | 870 | int namelen; |
870 | 871 | ||
871 | *res_dir = NULL; | 872 | *res_dir = NULL; |
872 | sb = dir->i_sb; | 873 | sb = dir->i_sb; |
873 | namelen = dentry->d_name.len; | 874 | namelen = d_name->len; |
874 | if (namelen > EXT4_NAME_LEN) | 875 | if (namelen > EXT4_NAME_LEN) |
875 | return NULL; | 876 | return NULL; |
876 | if (is_dx(dir)) { | 877 | if (is_dx(dir)) { |
877 | bh = ext4_dx_find_entry(dentry, res_dir, &err); | 878 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
878 | /* | 879 | /* |
879 | * On success, or if the error was file not found, | 880 | * On success, or if the error was file not found, |
880 | * return. Otherwise, fall back to doing a search the | 881 | * return. Otherwise, fall back to doing a search the |
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
882 | */ | 883 | */ |
883 | if (bh || (err != ERR_BAD_DX_DIR)) | 884 | if (bh || (err != ERR_BAD_DX_DIR)) |
884 | return bh; | 885 | return bh; |
885 | dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); | 886 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
887 | "falling back\n")); | ||
886 | } | 888 | } |
887 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); | 889 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); |
888 | start = EXT4_I(dir)->i_dir_start_lookup; | 890 | start = EXT4_I(dir)->i_dir_start_lookup; |
@@ -926,7 +928,7 @@ restart: | |||
926 | brelse(bh); | 928 | brelse(bh); |
927 | goto next; | 929 | goto next; |
928 | } | 930 | } |
929 | i = search_dirblock(bh, dir, dentry, | 931 | i = search_dirblock(bh, dir, d_name, |
930 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); | 932 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); |
931 | if (i == 1) { | 933 | if (i == 1) { |
932 | EXT4_I(dir)->i_dir_start_lookup = block; | 934 | EXT4_I(dir)->i_dir_start_lookup = block; |
@@ -956,11 +958,11 @@ restart: | |||
956 | cleanup_and_exit: | 958 | cleanup_and_exit: |
957 | /* Clean up the read-ahead blocks */ | 959 | /* Clean up the read-ahead blocks */ |
958 | for (; ra_ptr < ra_max; ra_ptr++) | 960 | for (; ra_ptr < ra_max; ra_ptr++) |
959 | brelse (bh_use[ra_ptr]); | 961 | brelse(bh_use[ra_ptr]); |
960 | return ret; | 962 | return ret; |
961 | } | 963 | } |
962 | 964 | ||
963 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 965 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
964 | struct ext4_dir_entry_2 **res_dir, int *err) | 966 | struct ext4_dir_entry_2 **res_dir, int *err) |
965 | { | 967 | { |
966 | struct super_block * sb; | 968 | struct super_block * sb; |
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
971 | struct buffer_head *bh; | 973 | struct buffer_head *bh; |
972 | ext4_lblk_t block; | 974 | ext4_lblk_t block; |
973 | int retval; | 975 | int retval; |
974 | int namelen = dentry->d_name.len; | 976 | int namelen = d_name->len; |
975 | const u8 *name = dentry->d_name.name; | 977 | const u8 *name = d_name->name; |
976 | struct inode *dir = dentry->d_parent->d_inode; | ||
977 | 978 | ||
978 | sb = dir->i_sb; | 979 | sb = dir->i_sb; |
979 | /* NFS may look up ".." - look at dx_root directory block */ | 980 | /* NFS may look up ".." - look at dx_root directory block */ |
980 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | 981 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ |
981 | if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) | 982 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
982 | return NULL; | 983 | return NULL; |
983 | } else { | 984 | } else { |
984 | frame = frames; | 985 | frame = frames; |
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1010 | return bh; | 1011 | return bh; |
1011 | } | 1012 | } |
1012 | } | 1013 | } |
1013 | brelse (bh); | 1014 | brelse(bh); |
1014 | /* Check to see if we should continue to search */ | 1015 | /* Check to see if we should continue to search */ |
1015 | retval = ext4_htree_next_block(dir, hash, frame, | 1016 | retval = ext4_htree_next_block(dir, hash, frame, |
1016 | frames, NULL); | 1017 | frames, NULL); |
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1025 | 1026 | ||
1026 | *err = -ENOENT; | 1027 | *err = -ENOENT; |
1027 | errout: | 1028 | errout: |
1028 | dxtrace(printk("%s not found\n", name)); | 1029 | dxtrace(printk(KERN_DEBUG "%s not found\n", name)); |
1029 | dx_release (frames); | 1030 | dx_release (frames); |
1030 | return NULL; | 1031 | return NULL; |
1031 | } | 1032 | } |
1032 | 1033 | ||
1033 | static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) | 1034 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
1034 | { | 1035 | { |
1035 | struct inode * inode; | 1036 | struct inode *inode; |
1036 | struct ext4_dir_entry_2 * de; | 1037 | struct ext4_dir_entry_2 *de; |
1037 | struct buffer_head * bh; | 1038 | struct buffer_head *bh; |
1038 | 1039 | ||
1039 | if (dentry->d_name.len > EXT4_NAME_LEN) | 1040 | if (dentry->d_name.len > EXT4_NAME_LEN) |
1040 | return ERR_PTR(-ENAMETOOLONG); | 1041 | return ERR_PTR(-ENAMETOOLONG); |
1041 | 1042 | ||
1042 | bh = ext4_find_entry(dentry, &de); | 1043 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
1043 | inode = NULL; | 1044 | inode = NULL; |
1044 | if (bh) { | 1045 | if (bh) { |
1045 | unsigned long ino = le32_to_cpu(de->inode); | 1046 | unsigned long ino = le32_to_cpu(de->inode); |
1046 | brelse (bh); | 1047 | brelse(bh); |
1047 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1048 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
1048 | ext4_error(dir->i_sb, "ext4_lookup", | 1049 | ext4_error(dir->i_sb, "ext4_lookup", |
1049 | "bad inode number: %lu", ino); | 1050 | "bad inode number: %lu", ino); |
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1062 | unsigned long ino; | 1063 | unsigned long ino; |
1063 | struct dentry *parent; | 1064 | struct dentry *parent; |
1064 | struct inode *inode; | 1065 | struct inode *inode; |
1065 | struct dentry dotdot; | 1066 | static const struct qstr dotdot = { |
1067 | .name = "..", | ||
1068 | .len = 2, | ||
1069 | }; | ||
1066 | struct ext4_dir_entry_2 * de; | 1070 | struct ext4_dir_entry_2 * de; |
1067 | struct buffer_head *bh; | 1071 | struct buffer_head *bh; |
1068 | 1072 | ||
1069 | dotdot.d_name.name = ".."; | 1073 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); |
1070 | dotdot.d_name.len = 2; | ||
1071 | dotdot.d_parent = child; /* confusing, isn't it! */ | ||
1072 | |||
1073 | bh = ext4_find_entry(&dotdot, &de); | ||
1074 | inode = NULL; | 1074 | inode = NULL; |
1075 | if (!bh) | 1075 | if (!bh) |
1076 | return ERR_PTR(-ENOENT); | 1076 | return ERR_PTR(-ENOENT); |
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1201 | 1201 | ||
1202 | /* create map in the end of data2 block */ | 1202 | /* create map in the end of data2 block */ |
1203 | map = (struct dx_map_entry *) (data2 + blocksize); | 1203 | map = (struct dx_map_entry *) (data2 + blocksize); |
1204 | count = dx_make_map ((struct ext4_dir_entry_2 *) data1, | 1204 | count = dx_make_map((struct ext4_dir_entry_2 *) data1, |
1205 | blocksize, hinfo, map); | 1205 | blocksize, hinfo, map); |
1206 | map -= count; | 1206 | map -= count; |
1207 | dx_sort_map (map, count); | 1207 | dx_sort_map(map, count); |
1208 | /* Split the existing block in the middle, size-wise */ | 1208 | /* Split the existing block in the middle, size-wise */ |
1209 | size = 0; | 1209 | size = 0; |
1210 | move = 0; | 1210 | move = 0; |
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1225 | 1225 | ||
1226 | /* Fancy dance to stay within two buffers */ | 1226 | /* Fancy dance to stay within two buffers */ |
1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); | 1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); |
1228 | de = dx_pack_dirents(data1,blocksize); | 1228 | de = dx_pack_dirents(data1, blocksize); |
1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); | 1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); |
1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); | 1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); |
1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); |
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1237 | swap(*bh, bh2); | 1237 | swap(*bh, bh2); |
1238 | de = de2; | 1238 | de = de2; |
1239 | } | 1239 | } |
1240 | dx_insert_block (frame, hash2 + continued, newblock); | 1240 | dx_insert_block(frame, hash2 + continued, newblock); |
1241 | err = ext4_journal_dirty_metadata (handle, bh2); | 1241 | err = ext4_journal_dirty_metadata(handle, bh2); |
1242 | if (err) | 1242 | if (err) |
1243 | goto journal_error; | 1243 | goto journal_error; |
1244 | err = ext4_journal_dirty_metadata (handle, frame->bh); | 1244 | err = ext4_journal_dirty_metadata(handle, frame->bh); |
1245 | if (err) | 1245 | if (err) |
1246 | goto journal_error; | 1246 | goto journal_error; |
1247 | brelse (bh2); | 1247 | brelse(bh2); |
1248 | dxtrace(dx_show_index ("frame", frame->entries)); | 1248 | dxtrace(dx_show_index("frame", frame->entries)); |
1249 | return de; | 1249 | return de; |
1250 | 1250 | ||
1251 | journal_error: | 1251 | journal_error: |
@@ -1271,7 +1271,7 @@ errout: | |||
1271 | */ | 1271 | */ |
1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | 1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, |
1273 | struct inode *inode, struct ext4_dir_entry_2 *de, | 1273 | struct inode *inode, struct ext4_dir_entry_2 *de, |
1274 | struct buffer_head * bh) | 1274 | struct buffer_head *bh) |
1275 | { | 1275 | { |
1276 | struct inode *dir = dentry->d_parent->d_inode; | 1276 | struct inode *dir = dentry->d_parent->d_inode; |
1277 | const char *name = dentry->d_name.name; | 1277 | const char *name = dentry->d_name.name; |
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1288 | while ((char *) de <= top) { | 1288 | while ((char *) de <= top) { |
1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, |
1290 | bh, offset)) { | 1290 | bh, offset)) { |
1291 | brelse (bh); | 1291 | brelse(bh); |
1292 | return -EIO; | 1292 | return -EIO; |
1293 | } | 1293 | } |
1294 | if (ext4_match (namelen, name, de)) { | 1294 | if (ext4_match(namelen, name, de)) { |
1295 | brelse (bh); | 1295 | brelse(bh); |
1296 | return -EEXIST; | 1296 | return -EEXIST; |
1297 | } | 1297 | } |
1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1329 | } else | 1329 | } else |
1330 | de->inode = 0; | 1330 | de->inode = 0; |
1331 | de->name_len = namelen; | 1331 | de->name_len = namelen; |
1332 | memcpy (de->name, name, namelen); | 1332 | memcpy(de->name, name, namelen); |
1333 | /* | 1333 | /* |
1334 | * XXX shouldn't update any times until successful | 1334 | * XXX shouldn't update any times until successful |
1335 | * completion of syscall, but too many callers depend | 1335 | * completion of syscall, but too many callers depend |
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1377 | struct fake_dirent *fde; | 1377 | struct fake_dirent *fde; |
1378 | 1378 | ||
1379 | blocksize = dir->i_sb->s_blocksize; | 1379 | blocksize = dir->i_sb->s_blocksize; |
1380 | dxtrace(printk("Creating index\n")); | 1380 | dxtrace(printk(KERN_DEBUG "Creating index\n")); |
1381 | retval = ext4_journal_get_write_access(handle, bh); | 1381 | retval = ext4_journal_get_write_access(handle, bh); |
1382 | if (retval) { | 1382 | if (retval) { |
1383 | ext4_std_error(dir->i_sb, retval); | 1383 | ext4_std_error(dir->i_sb, retval); |
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1386 | } | 1386 | } |
1387 | root = (struct dx_root *) bh->b_data; | 1387 | root = (struct dx_root *) bh->b_data; |
1388 | 1388 | ||
1389 | bh2 = ext4_append (handle, dir, &block, &retval); | 1389 | bh2 = ext4_append(handle, dir, &block, &retval); |
1390 | if (!(bh2)) { | 1390 | if (!(bh2)) { |
1391 | brelse(bh); | 1391 | brelse(bh); |
1392 | return retval; | 1392 | return retval; |
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1412 | root->info.info_length = sizeof(root->info); | 1412 | root->info.info_length = sizeof(root->info); |
1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
1414 | entries = root->entries; | 1414 | entries = root->entries; |
1415 | dx_set_block (entries, 1); | 1415 | dx_set_block(entries, 1); |
1416 | dx_set_count (entries, 1); | 1416 | dx_set_count(entries, 1); |
1417 | dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); | 1417 | dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); |
1418 | 1418 | ||
1419 | /* Initialize as for dx_probe */ | 1419 | /* Initialize as for dx_probe */ |
1420 | hinfo.hash_version = root->info.hash_version; | 1420 | hinfo.hash_version = root->info.hash_version; |
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1443 | * may not sleep between calling this and putting something into | 1443 | * may not sleep between calling this and putting something into |
1444 | * the entry, as someone else might have used it while you slept. | 1444 | * the entry, as someone else might have used it while you slept. |
1445 | */ | 1445 | */ |
1446 | static int ext4_add_entry (handle_t *handle, struct dentry *dentry, | 1446 | static int ext4_add_entry(handle_t *handle, struct dentry *dentry, |
1447 | struct inode *inode) | 1447 | struct inode *inode) |
1448 | { | 1448 | { |
1449 | struct inode *dir = dentry->d_parent->d_inode; | 1449 | struct inode *dir = dentry->d_parent->d_inode; |
1450 | unsigned long offset; | 1450 | unsigned long offset; |
1451 | struct buffer_head * bh; | 1451 | struct buffer_head *bh; |
1452 | struct ext4_dir_entry_2 *de; | 1452 | struct ext4_dir_entry_2 *de; |
1453 | struct super_block * sb; | 1453 | struct super_block *sb; |
1454 | int retval; | 1454 | int retval; |
1455 | int dx_fallback=0; | 1455 | int dx_fallback=0; |
1456 | unsigned blocksize; | 1456 | unsigned blocksize; |
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1500 | struct dx_frame frames[2], *frame; | 1500 | struct dx_frame frames[2], *frame; |
1501 | struct dx_entry *entries, *at; | 1501 | struct dx_entry *entries, *at; |
1502 | struct dx_hash_info hinfo; | 1502 | struct dx_hash_info hinfo; |
1503 | struct buffer_head * bh; | 1503 | struct buffer_head *bh; |
1504 | struct inode *dir = dentry->d_parent->d_inode; | 1504 | struct inode *dir = dentry->d_parent->d_inode; |
1505 | struct super_block * sb = dir->i_sb; | 1505 | struct super_block *sb = dir->i_sb; |
1506 | struct ext4_dir_entry_2 *de; | 1506 | struct ext4_dir_entry_2 *de; |
1507 | int err; | 1507 | int err; |
1508 | 1508 | ||
1509 | frame = dx_probe(dentry, NULL, &hinfo, frames, &err); | 1509 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); |
1510 | if (!frame) | 1510 | if (!frame) |
1511 | return err; | 1511 | return err; |
1512 | entries = frame->entries; | 1512 | entries = frame->entries; |
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1527 | } | 1527 | } |
1528 | 1528 | ||
1529 | /* Block full, should compress but for now just split */ | 1529 | /* Block full, should compress but for now just split */ |
1530 | dxtrace(printk("using %u of %u node entries\n", | 1530 | dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", |
1531 | dx_get_count(entries), dx_get_limit(entries))); | 1531 | dx_get_count(entries), dx_get_limit(entries))); |
1532 | /* Need to split index? */ | 1532 | /* Need to split index? */ |
1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { | 1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { |
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1559 | if (levels) { | 1559 | if (levels) { |
1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; | 1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; |
1561 | unsigned hash2 = dx_get_hash(entries + icount1); | 1561 | unsigned hash2 = dx_get_hash(entries + icount1); |
1562 | dxtrace(printk("Split index %i/%i\n", icount1, icount2)); | 1562 | dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", |
1563 | icount1, icount2)); | ||
1563 | 1564 | ||
1564 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ | 1565 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ |
1565 | err = ext4_journal_get_write_access(handle, | 1566 | err = ext4_journal_get_write_access(handle, |
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1567 | if (err) | 1568 | if (err) |
1568 | goto journal_error; | 1569 | goto journal_error; |
1569 | 1570 | ||
1570 | memcpy ((char *) entries2, (char *) (entries + icount1), | 1571 | memcpy((char *) entries2, (char *) (entries + icount1), |
1571 | icount2 * sizeof(struct dx_entry)); | 1572 | icount2 * sizeof(struct dx_entry)); |
1572 | dx_set_count (entries, icount1); | 1573 | dx_set_count(entries, icount1); |
1573 | dx_set_count (entries2, icount2); | 1574 | dx_set_count(entries2, icount2); |
1574 | dx_set_limit (entries2, dx_node_limit(dir)); | 1575 | dx_set_limit(entries2, dx_node_limit(dir)); |
1575 | 1576 | ||
1576 | /* Which index block gets the new entry? */ | 1577 | /* Which index block gets the new entry? */ |
1577 | if (at - entries >= icount1) { | 1578 | if (at - entries >= icount1) { |
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1579 | frame->entries = entries = entries2; | 1580 | frame->entries = entries = entries2; |
1580 | swap(frame->bh, bh2); | 1581 | swap(frame->bh, bh2); |
1581 | } | 1582 | } |
1582 | dx_insert_block (frames + 0, hash2, newblock); | 1583 | dx_insert_block(frames + 0, hash2, newblock); |
1583 | dxtrace(dx_show_index ("node", frames[1].entries)); | 1584 | dxtrace(dx_show_index("node", frames[1].entries)); |
1584 | dxtrace(dx_show_index ("node", | 1585 | dxtrace(dx_show_index("node", |
1585 | ((struct dx_node *) bh2->b_data)->entries)); | 1586 | ((struct dx_node *) bh2->b_data)->entries)); |
1586 | err = ext4_journal_dirty_metadata(handle, bh2); | 1587 | err = ext4_journal_dirty_metadata(handle, bh2); |
1587 | if (err) | 1588 | if (err) |
1588 | goto journal_error; | 1589 | goto journal_error; |
1589 | brelse (bh2); | 1590 | brelse (bh2); |
1590 | } else { | 1591 | } else { |
1591 | dxtrace(printk("Creating second level index...\n")); | 1592 | dxtrace(printk(KERN_DEBUG |
1593 | "Creating second level index...\n")); | ||
1592 | memcpy((char *) entries2, (char *) entries, | 1594 | memcpy((char *) entries2, (char *) entries, |
1593 | icount * sizeof(struct dx_entry)); | 1595 | icount * sizeof(struct dx_entry)); |
1594 | dx_set_limit(entries2, dx_node_limit(dir)); | 1596 | dx_set_limit(entries2, dx_node_limit(dir)); |
@@ -1630,12 +1632,12 @@ cleanup: | |||
1630 | * ext4_delete_entry deletes a directory entry by merging it with the | 1632 | * ext4_delete_entry deletes a directory entry by merging it with the |
1631 | * previous entry | 1633 | * previous entry |
1632 | */ | 1634 | */ |
1633 | static int ext4_delete_entry (handle_t *handle, | 1635 | static int ext4_delete_entry(handle_t *handle, |
1634 | struct inode * dir, | 1636 | struct inode *dir, |
1635 | struct ext4_dir_entry_2 * de_del, | 1637 | struct ext4_dir_entry_2 *de_del, |
1636 | struct buffer_head * bh) | 1638 | struct buffer_head *bh) |
1637 | { | 1639 | { |
1638 | struct ext4_dir_entry_2 * de, * pde; | 1640 | struct ext4_dir_entry_2 *de, *pde; |
1639 | int i; | 1641 | int i; |
1640 | 1642 | ||
1641 | i = 0; | 1643 | i = 0; |
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle, | |||
1716 | * If the create succeeds, we fill in the inode information | 1718 | * If the create succeeds, we fill in the inode information |
1717 | * with d_instantiate(). | 1719 | * with d_instantiate(). |
1718 | */ | 1720 | */ |
1719 | static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, | 1721 | static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, |
1720 | struct nameidata *nd) | 1722 | struct nameidata *nd) |
1721 | { | 1723 | { |
1722 | handle_t *handle; | 1724 | handle_t *handle; |
1723 | struct inode * inode; | 1725 | struct inode *inode; |
1724 | int err, retries = 0; | 1726 | int err, retries = 0; |
1725 | 1727 | ||
1726 | retry: | 1728 | retry: |
@@ -1747,8 +1749,8 @@ retry: | |||
1747 | return err; | 1749 | return err; |
1748 | } | 1750 | } |
1749 | 1751 | ||
1750 | static int ext4_mknod (struct inode * dir, struct dentry *dentry, | 1752 | static int ext4_mknod(struct inode *dir, struct dentry *dentry, |
1751 | int mode, dev_t rdev) | 1753 | int mode, dev_t rdev) |
1752 | { | 1754 | { |
1753 | handle_t *handle; | 1755 | handle_t *handle; |
1754 | struct inode *inode; | 1756 | struct inode *inode; |
@@ -1767,11 +1769,11 @@ retry: | |||
1767 | if (IS_DIRSYNC(dir)) | 1769 | if (IS_DIRSYNC(dir)) |
1768 | handle->h_sync = 1; | 1770 | handle->h_sync = 1; |
1769 | 1771 | ||
1770 | inode = ext4_new_inode (handle, dir, mode); | 1772 | inode = ext4_new_inode(handle, dir, mode); |
1771 | err = PTR_ERR(inode); | 1773 | err = PTR_ERR(inode); |
1772 | if (!IS_ERR(inode)) { | 1774 | if (!IS_ERR(inode)) { |
1773 | init_special_inode(inode, inode->i_mode, rdev); | 1775 | init_special_inode(inode, inode->i_mode, rdev); |
1774 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1776 | #ifdef CONFIG_EXT4_FS_XATTR |
1775 | inode->i_op = &ext4_special_inode_operations; | 1777 | inode->i_op = &ext4_special_inode_operations; |
1776 | #endif | 1778 | #endif |
1777 | err = ext4_add_nondir(handle, dentry, inode); | 1779 | err = ext4_add_nondir(handle, dentry, inode); |
@@ -1782,12 +1784,12 @@ retry: | |||
1782 | return err; | 1784 | return err; |
1783 | } | 1785 | } |
1784 | 1786 | ||
1785 | static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) | 1787 | static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
1786 | { | 1788 | { |
1787 | handle_t *handle; | 1789 | handle_t *handle; |
1788 | struct inode * inode; | 1790 | struct inode *inode; |
1789 | struct buffer_head * dir_block; | 1791 | struct buffer_head *dir_block; |
1790 | struct ext4_dir_entry_2 * de; | 1792 | struct ext4_dir_entry_2 *de; |
1791 | int err, retries = 0; | 1793 | int err, retries = 0; |
1792 | 1794 | ||
1793 | if (EXT4_DIR_LINK_MAX(dir)) | 1795 | if (EXT4_DIR_LINK_MAX(dir)) |
@@ -1803,7 +1805,7 @@ retry: | |||
1803 | if (IS_DIRSYNC(dir)) | 1805 | if (IS_DIRSYNC(dir)) |
1804 | handle->h_sync = 1; | 1806 | handle->h_sync = 1; |
1805 | 1807 | ||
1806 | inode = ext4_new_inode (handle, dir, S_IFDIR | mode); | 1808 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode); |
1807 | err = PTR_ERR(inode); | 1809 | err = PTR_ERR(inode); |
1808 | if (IS_ERR(inode)) | 1810 | if (IS_ERR(inode)) |
1809 | goto out_stop; | 1811 | goto out_stop; |
@@ -1811,7 +1813,7 @@ retry: | |||
1811 | inode->i_op = &ext4_dir_inode_operations; | 1813 | inode->i_op = &ext4_dir_inode_operations; |
1812 | inode->i_fop = &ext4_dir_operations; | 1814 | inode->i_fop = &ext4_dir_operations; |
1813 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; | 1815 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; |
1814 | dir_block = ext4_bread (handle, inode, 0, 1, &err); | 1816 | dir_block = ext4_bread(handle, inode, 0, 1, &err); |
1815 | if (!dir_block) | 1817 | if (!dir_block) |
1816 | goto out_clear_inode; | 1818 | goto out_clear_inode; |
1817 | BUFFER_TRACE(dir_block, "get_write_access"); | 1819 | BUFFER_TRACE(dir_block, "get_write_access"); |
@@ -1820,26 +1822,26 @@ retry: | |||
1820 | de->inode = cpu_to_le32(inode->i_ino); | 1822 | de->inode = cpu_to_le32(inode->i_ino); |
1821 | de->name_len = 1; | 1823 | de->name_len = 1; |
1822 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); | 1824 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); |
1823 | strcpy (de->name, "."); | 1825 | strcpy(de->name, "."); |
1824 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1826 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1825 | de = ext4_next_entry(de); | 1827 | de = ext4_next_entry(de); |
1826 | de->inode = cpu_to_le32(dir->i_ino); | 1828 | de->inode = cpu_to_le32(dir->i_ino); |
1827 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - | 1829 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - |
1828 | EXT4_DIR_REC_LEN(1)); | 1830 | EXT4_DIR_REC_LEN(1)); |
1829 | de->name_len = 2; | 1831 | de->name_len = 2; |
1830 | strcpy (de->name, ".."); | 1832 | strcpy(de->name, ".."); |
1831 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1833 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1832 | inode->i_nlink = 2; | 1834 | inode->i_nlink = 2; |
1833 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); | 1835 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); |
1834 | ext4_journal_dirty_metadata(handle, dir_block); | 1836 | ext4_journal_dirty_metadata(handle, dir_block); |
1835 | brelse (dir_block); | 1837 | brelse(dir_block); |
1836 | ext4_mark_inode_dirty(handle, inode); | 1838 | ext4_mark_inode_dirty(handle, inode); |
1837 | err = ext4_add_entry (handle, dentry, inode); | 1839 | err = ext4_add_entry(handle, dentry, inode); |
1838 | if (err) { | 1840 | if (err) { |
1839 | out_clear_inode: | 1841 | out_clear_inode: |
1840 | clear_nlink(inode); | 1842 | clear_nlink(inode); |
1841 | ext4_mark_inode_dirty(handle, inode); | 1843 | ext4_mark_inode_dirty(handle, inode); |
1842 | iput (inode); | 1844 | iput(inode); |
1843 | goto out_stop; | 1845 | goto out_stop; |
1844 | } | 1846 | } |
1845 | ext4_inc_count(handle, dir); | 1847 | ext4_inc_count(handle, dir); |
@@ -1856,17 +1858,17 @@ out_stop: | |||
1856 | /* | 1858 | /* |
1857 | * routine to check that the specified directory is empty (for rmdir) | 1859 | * routine to check that the specified directory is empty (for rmdir) |
1858 | */ | 1860 | */ |
1859 | static int empty_dir (struct inode * inode) | 1861 | static int empty_dir(struct inode *inode) |
1860 | { | 1862 | { |
1861 | unsigned long offset; | 1863 | unsigned long offset; |
1862 | struct buffer_head * bh; | 1864 | struct buffer_head *bh; |
1863 | struct ext4_dir_entry_2 * de, * de1; | 1865 | struct ext4_dir_entry_2 *de, *de1; |
1864 | struct super_block * sb; | 1866 | struct super_block *sb; |
1865 | int err = 0; | 1867 | int err = 0; |
1866 | 1868 | ||
1867 | sb = inode->i_sb; | 1869 | sb = inode->i_sb; |
1868 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1870 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
1869 | !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { | 1871 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
1870 | if (err) | 1872 | if (err) |
1871 | ext4_error(inode->i_sb, __func__, | 1873 | ext4_error(inode->i_sb, __func__, |
1872 | "error %d reading directory #%lu offset 0", | 1874 | "error %d reading directory #%lu offset 0", |
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode) | |||
1881 | de1 = ext4_next_entry(de); | 1883 | de1 = ext4_next_entry(de); |
1882 | if (le32_to_cpu(de->inode) != inode->i_ino || | 1884 | if (le32_to_cpu(de->inode) != inode->i_ino || |
1883 | !le32_to_cpu(de1->inode) || | 1885 | !le32_to_cpu(de1->inode) || |
1884 | strcmp (".", de->name) || | 1886 | strcmp(".", de->name) || |
1885 | strcmp ("..", de1->name)) { | 1887 | strcmp("..", de1->name)) { |
1886 | ext4_warning (inode->i_sb, "empty_dir", | 1888 | ext4_warning(inode->i_sb, "empty_dir", |
1887 | "bad directory (dir #%lu) - no `.' or `..'", | 1889 | "bad directory (dir #%lu) - no `.' or `..'", |
1888 | inode->i_ino); | 1890 | inode->i_ino); |
1889 | brelse (bh); | 1891 | brelse(bh); |
1890 | return 1; | 1892 | return 1; |
1891 | } | 1893 | } |
1892 | offset = ext4_rec_len_from_disk(de->rec_len) + | 1894 | offset = ext4_rec_len_from_disk(de->rec_len) + |
1893 | ext4_rec_len_from_disk(de1->rec_len); | 1895 | ext4_rec_len_from_disk(de1->rec_len); |
1894 | de = ext4_next_entry(de1); | 1896 | de = ext4_next_entry(de1); |
1895 | while (offset < inode->i_size ) { | 1897 | while (offset < inode->i_size) { |
1896 | if (!bh || | 1898 | if (!bh || |
1897 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1899 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
1898 | err = 0; | 1900 | err = 0; |
1899 | brelse (bh); | 1901 | brelse(bh); |
1900 | bh = ext4_bread (NULL, inode, | 1902 | bh = ext4_bread(NULL, inode, |
1901 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1903 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); |
1902 | if (!bh) { | 1904 | if (!bh) { |
1903 | if (err) | 1905 | if (err) |
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode) | |||
1917 | continue; | 1919 | continue; |
1918 | } | 1920 | } |
1919 | if (le32_to_cpu(de->inode)) { | 1921 | if (le32_to_cpu(de->inode)) { |
1920 | brelse (bh); | 1922 | brelse(bh); |
1921 | return 0; | 1923 | return 0; |
1922 | } | 1924 | } |
1923 | offset += ext4_rec_len_from_disk(de->rec_len); | 1925 | offset += ext4_rec_len_from_disk(de->rec_len); |
1924 | de = ext4_next_entry(de); | 1926 | de = ext4_next_entry(de); |
1925 | } | 1927 | } |
1926 | brelse (bh); | 1928 | brelse(bh); |
1927 | return 1; | 1929 | return 1; |
1928 | } | 1930 | } |
1929 | 1931 | ||
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
1954 | * ->i_nlink. For, say it, character device. Not a regular file, | 1956 | * ->i_nlink. For, say it, character device. Not a regular file, |
1955 | * not a directory, not a symlink and ->i_nlink > 0. | 1957 | * not a directory, not a symlink and ->i_nlink > 0. |
1956 | */ | 1958 | */ |
1957 | J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1959 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
1958 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 1960 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
1959 | 1961 | ||
1960 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); | 1962 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); |
1961 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); | 1963 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
@@ -2069,12 +2071,12 @@ out_brelse: | |||
2069 | goto out_err; | 2071 | goto out_err; |
2070 | } | 2072 | } |
2071 | 2073 | ||
2072 | static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | 2074 | static int ext4_rmdir(struct inode *dir, struct dentry *dentry) |
2073 | { | 2075 | { |
2074 | int retval; | 2076 | int retval; |
2075 | struct inode * inode; | 2077 | struct inode *inode; |
2076 | struct buffer_head * bh; | 2078 | struct buffer_head *bh; |
2077 | struct ext4_dir_entry_2 * de; | 2079 | struct ext4_dir_entry_2 *de; |
2078 | handle_t *handle; | 2080 | handle_t *handle; |
2079 | 2081 | ||
2080 | /* Initialize quotas before so that eventual writes go in | 2082 | /* Initialize quotas before so that eventual writes go in |
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2085 | return PTR_ERR(handle); | 2087 | return PTR_ERR(handle); |
2086 | 2088 | ||
2087 | retval = -ENOENT; | 2089 | retval = -ENOENT; |
2088 | bh = ext4_find_entry (dentry, &de); | 2090 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2089 | if (!bh) | 2091 | if (!bh) |
2090 | goto end_rmdir; | 2092 | goto end_rmdir; |
2091 | 2093 | ||
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2099 | goto end_rmdir; | 2101 | goto end_rmdir; |
2100 | 2102 | ||
2101 | retval = -ENOTEMPTY; | 2103 | retval = -ENOTEMPTY; |
2102 | if (!empty_dir (inode)) | 2104 | if (!empty_dir(inode)) |
2103 | goto end_rmdir; | 2105 | goto end_rmdir; |
2104 | 2106 | ||
2105 | retval = ext4_delete_entry(handle, dir, de, bh); | 2107 | retval = ext4_delete_entry(handle, dir, de, bh); |
2106 | if (retval) | 2108 | if (retval) |
2107 | goto end_rmdir; | 2109 | goto end_rmdir; |
2108 | if (!EXT4_DIR_LINK_EMPTY(inode)) | 2110 | if (!EXT4_DIR_LINK_EMPTY(inode)) |
2109 | ext4_warning (inode->i_sb, "ext4_rmdir", | 2111 | ext4_warning(inode->i_sb, "ext4_rmdir", |
2110 | "empty directory has too many links (%d)", | 2112 | "empty directory has too many links (%d)", |
2111 | inode->i_nlink); | 2113 | inode->i_nlink); |
2112 | inode->i_version++; | 2114 | inode->i_version++; |
2113 | clear_nlink(inode); | 2115 | clear_nlink(inode); |
2114 | /* There's no need to set i_disksize: the fact that i_nlink is | 2116 | /* There's no need to set i_disksize: the fact that i_nlink is |
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2124 | 2126 | ||
2125 | end_rmdir: | 2127 | end_rmdir: |
2126 | ext4_journal_stop(handle); | 2128 | ext4_journal_stop(handle); |
2127 | brelse (bh); | 2129 | brelse(bh); |
2128 | return retval; | 2130 | return retval; |
2129 | } | 2131 | } |
2130 | 2132 | ||
2131 | static int ext4_unlink(struct inode * dir, struct dentry *dentry) | 2133 | static int ext4_unlink(struct inode *dir, struct dentry *dentry) |
2132 | { | 2134 | { |
2133 | int retval; | 2135 | int retval; |
2134 | struct inode * inode; | 2136 | struct inode *inode; |
2135 | struct buffer_head * bh; | 2137 | struct buffer_head *bh; |
2136 | struct ext4_dir_entry_2 * de; | 2138 | struct ext4_dir_entry_2 *de; |
2137 | handle_t *handle; | 2139 | handle_t *handle; |
2138 | 2140 | ||
2139 | /* Initialize quotas before so that eventual writes go | 2141 | /* Initialize quotas before so that eventual writes go |
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2147 | handle->h_sync = 1; | 2149 | handle->h_sync = 1; |
2148 | 2150 | ||
2149 | retval = -ENOENT; | 2151 | retval = -ENOENT; |
2150 | bh = ext4_find_entry (dentry, &de); | 2152 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2151 | if (!bh) | 2153 | if (!bh) |
2152 | goto end_unlink; | 2154 | goto end_unlink; |
2153 | 2155 | ||
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2158 | goto end_unlink; | 2160 | goto end_unlink; |
2159 | 2161 | ||
2160 | if (!inode->i_nlink) { | 2162 | if (!inode->i_nlink) { |
2161 | ext4_warning (inode->i_sb, "ext4_unlink", | 2163 | ext4_warning(inode->i_sb, "ext4_unlink", |
2162 | "Deleting nonexistent file (%lu), %d", | 2164 | "Deleting nonexistent file (%lu), %d", |
2163 | inode->i_ino, inode->i_nlink); | 2165 | inode->i_ino, inode->i_nlink); |
2164 | inode->i_nlink = 1; | 2166 | inode->i_nlink = 1; |
2165 | } | 2167 | } |
2166 | retval = ext4_delete_entry(handle, dir, de, bh); | 2168 | retval = ext4_delete_entry(handle, dir, de, bh); |
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2178 | 2180 | ||
2179 | end_unlink: | 2181 | end_unlink: |
2180 | ext4_journal_stop(handle); | 2182 | ext4_journal_stop(handle); |
2181 | brelse (bh); | 2183 | brelse(bh); |
2182 | return retval; | 2184 | return retval; |
2183 | } | 2185 | } |
2184 | 2186 | ||
2185 | static int ext4_symlink (struct inode * dir, | 2187 | static int ext4_symlink(struct inode *dir, |
2186 | struct dentry *dentry, const char * symname) | 2188 | struct dentry *dentry, const char *symname) |
2187 | { | 2189 | { |
2188 | handle_t *handle; | 2190 | handle_t *handle; |
2189 | struct inode * inode; | 2191 | struct inode *inode; |
2190 | int l, err, retries = 0; | 2192 | int l, err, retries = 0; |
2191 | 2193 | ||
2192 | l = strlen(symname)+1; | 2194 | l = strlen(symname)+1; |
@@ -2203,12 +2205,12 @@ retry: | |||
2203 | if (IS_DIRSYNC(dir)) | 2205 | if (IS_DIRSYNC(dir)) |
2204 | handle->h_sync = 1; | 2206 | handle->h_sync = 1; |
2205 | 2207 | ||
2206 | inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); | 2208 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); |
2207 | err = PTR_ERR(inode); | 2209 | err = PTR_ERR(inode); |
2208 | if (IS_ERR(inode)) | 2210 | if (IS_ERR(inode)) |
2209 | goto out_stop; | 2211 | goto out_stop; |
2210 | 2212 | ||
2211 | if (l > sizeof (EXT4_I(inode)->i_data)) { | 2213 | if (l > sizeof(EXT4_I(inode)->i_data)) { |
2212 | inode->i_op = &ext4_symlink_inode_operations; | 2214 | inode->i_op = &ext4_symlink_inode_operations; |
2213 | ext4_set_aops(inode); | 2215 | ext4_set_aops(inode); |
2214 | /* | 2216 | /* |
@@ -2221,14 +2223,14 @@ retry: | |||
2221 | if (err) { | 2223 | if (err) { |
2222 | clear_nlink(inode); | 2224 | clear_nlink(inode); |
2223 | ext4_mark_inode_dirty(handle, inode); | 2225 | ext4_mark_inode_dirty(handle, inode); |
2224 | iput (inode); | 2226 | iput(inode); |
2225 | goto out_stop; | 2227 | goto out_stop; |
2226 | } | 2228 | } |
2227 | } else { | 2229 | } else { |
2228 | /* clear the extent format for fast symlink */ | 2230 | /* clear the extent format for fast symlink */ |
2229 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; | 2231 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; |
2230 | inode->i_op = &ext4_fast_symlink_inode_operations; | 2232 | inode->i_op = &ext4_fast_symlink_inode_operations; |
2231 | memcpy((char*)&EXT4_I(inode)->i_data,symname,l); | 2233 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); |
2232 | inode->i_size = l-1; | 2234 | inode->i_size = l-1; |
2233 | } | 2235 | } |
2234 | EXT4_I(inode)->i_disksize = inode->i_size; | 2236 | EXT4_I(inode)->i_disksize = inode->i_size; |
@@ -2240,8 +2242,8 @@ out_stop: | |||
2240 | return err; | 2242 | return err; |
2241 | } | 2243 | } |
2242 | 2244 | ||
2243 | static int ext4_link (struct dentry * old_dentry, | 2245 | static int ext4_link(struct dentry *old_dentry, |
2244 | struct inode * dir, struct dentry *dentry) | 2246 | struct inode *dir, struct dentry *dentry) |
2245 | { | 2247 | { |
2246 | handle_t *handle; | 2248 | handle_t *handle; |
2247 | struct inode *inode = old_dentry->d_inode; | 2249 | struct inode *inode = old_dentry->d_inode; |
@@ -2284,13 +2286,13 @@ retry: | |||
2284 | * Anybody can rename anything with this: the permission checks are left to the | 2286 | * Anybody can rename anything with this: the permission checks are left to the |
2285 | * higher-level routines. | 2287 | * higher-level routines. |
2286 | */ | 2288 | */ |
2287 | static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | 2289 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
2288 | struct inode * new_dir,struct dentry *new_dentry) | 2290 | struct inode *new_dir, struct dentry *new_dentry) |
2289 | { | 2291 | { |
2290 | handle_t *handle; | 2292 | handle_t *handle; |
2291 | struct inode * old_inode, * new_inode; | 2293 | struct inode *old_inode, *new_inode; |
2292 | struct buffer_head * old_bh, * new_bh, * dir_bh; | 2294 | struct buffer_head *old_bh, *new_bh, *dir_bh; |
2293 | struct ext4_dir_entry_2 * old_de, * new_de; | 2295 | struct ext4_dir_entry_2 *old_de, *new_de; |
2294 | int retval; | 2296 | int retval; |
2295 | 2297 | ||
2296 | old_bh = new_bh = dir_bh = NULL; | 2298 | old_bh = new_bh = dir_bh = NULL; |
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2308 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 2310 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) |
2309 | handle->h_sync = 1; | 2311 | handle->h_sync = 1; |
2310 | 2312 | ||
2311 | old_bh = ext4_find_entry (old_dentry, &old_de); | 2313 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); |
2312 | /* | 2314 | /* |
2313 | * Check for inode number is _not_ due to possible IO errors. | 2315 | * Check for inode number is _not_ due to possible IO errors. |
2314 | * We might rmdir the source, keep it as pwd of some process | 2316 | * We might rmdir the source, keep it as pwd of some process |
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2321 | goto end_rename; | 2323 | goto end_rename; |
2322 | 2324 | ||
2323 | new_inode = new_dentry->d_inode; | 2325 | new_inode = new_dentry->d_inode; |
2324 | new_bh = ext4_find_entry (new_dentry, &new_de); | 2326 | new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); |
2325 | if (new_bh) { | 2327 | if (new_bh) { |
2326 | if (!new_inode) { | 2328 | if (!new_inode) { |
2327 | brelse (new_bh); | 2329 | brelse(new_bh); |
2328 | new_bh = NULL; | 2330 | new_bh = NULL; |
2329 | } | 2331 | } |
2330 | } | 2332 | } |
2331 | if (S_ISDIR(old_inode->i_mode)) { | 2333 | if (S_ISDIR(old_inode->i_mode)) { |
2332 | if (new_inode) { | 2334 | if (new_inode) { |
2333 | retval = -ENOTEMPTY; | 2335 | retval = -ENOTEMPTY; |
2334 | if (!empty_dir (new_inode)) | 2336 | if (!empty_dir(new_inode)) |
2335 | goto end_rename; | 2337 | goto end_rename; |
2336 | } | 2338 | } |
2337 | retval = -EIO; | 2339 | retval = -EIO; |
2338 | dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); | 2340 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); |
2339 | if (!dir_bh) | 2341 | if (!dir_bh) |
2340 | goto end_rename; | 2342 | goto end_rename; |
2341 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) | 2343 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) |
2342 | goto end_rename; | 2344 | goto end_rename; |
2343 | retval = -EMLINK; | 2345 | retval = -EMLINK; |
2344 | if (!new_inode && new_dir!=old_dir && | 2346 | if (!new_inode && new_dir != old_dir && |
2345 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2347 | new_dir->i_nlink >= EXT4_LINK_MAX) |
2346 | goto end_rename; | 2348 | goto end_rename; |
2347 | } | 2349 | } |
2348 | if (!new_bh) { | 2350 | if (!new_bh) { |
2349 | retval = ext4_add_entry (handle, new_dentry, old_inode); | 2351 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
2350 | if (retval) | 2352 | if (retval) |
2351 | goto end_rename; | 2353 | goto end_rename; |
2352 | } else { | 2354 | } else { |
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2388 | struct buffer_head *old_bh2; | 2390 | struct buffer_head *old_bh2; |
2389 | struct ext4_dir_entry_2 *old_de2; | 2391 | struct ext4_dir_entry_2 *old_de2; |
2390 | 2392 | ||
2391 | old_bh2 = ext4_find_entry(old_dentry, &old_de2); | 2393 | old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); |
2392 | if (old_bh2) { | 2394 | if (old_bh2) { |
2393 | retval = ext4_delete_entry(handle, old_dir, | 2395 | retval = ext4_delete_entry(handle, old_dir, |
2394 | old_de2, old_bh2); | 2396 | old_de2, old_bh2); |
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2433 | retval = 0; | 2435 | retval = 0; |
2434 | 2436 | ||
2435 | end_rename: | 2437 | end_rename: |
2436 | brelse (dir_bh); | 2438 | brelse(dir_bh); |
2437 | brelse (old_bh); | 2439 | brelse(old_bh); |
2438 | brelse (new_bh); | 2440 | brelse(new_bh); |
2439 | ext4_journal_stop(handle); | 2441 | ext4_journal_stop(handle); |
2440 | return retval; | 2442 | return retval; |
2441 | } | 2443 | } |
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2454 | .mknod = ext4_mknod, | 2456 | .mknod = ext4_mknod, |
2455 | .rename = ext4_rename, | 2457 | .rename = ext4_rename, |
2456 | .setattr = ext4_setattr, | 2458 | .setattr = ext4_setattr, |
2457 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2459 | #ifdef CONFIG_EXT4_FS_XATTR |
2458 | .setxattr = generic_setxattr, | 2460 | .setxattr = generic_setxattr, |
2459 | .getxattr = generic_getxattr, | 2461 | .getxattr = generic_getxattr, |
2460 | .listxattr = ext4_listxattr, | 2462 | .listxattr = ext4_listxattr, |
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2465 | 2467 | ||
2466 | const struct inode_operations ext4_special_inode_operations = { | 2468 | const struct inode_operations ext4_special_inode_operations = { |
2467 | .setattr = ext4_setattr, | 2469 | .setattr = ext4_setattr, |
2468 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2470 | #ifdef CONFIG_EXT4_FS_XATTR |
2469 | .setxattr = generic_setxattr, | 2471 | .setxattr = generic_setxattr, |
2470 | .getxattr = generic_getxattr, | 2472 | .getxattr = generic_getxattr, |
2471 | .listxattr = ext4_listxattr, | 2473 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b3d35604ea18..b6ec1843a015 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", | 416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", |
417 | gdb_num); | 417 | gdb_num); |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * If we are not using the primary superblock/GDT copy don't resize, | 420 | * If we are not using the primary superblock/GDT copy don't resize, |
421 | * because the user tools have no way of handling this. Probably a | 421 | * because the user tools have no way of handling this. Probably a |
422 | * bad time to do it anyways. | 422 | * bad time to do it anyways. |
423 | */ | 423 | */ |
@@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
870 | * We can allocate memory for mb_alloc based on the new group | 870 | * We can allocate memory for mb_alloc based on the new group |
871 | * descriptor | 871 | * descriptor |
872 | */ | 872 | */ |
873 | if (test_opt(sb, MBALLOC)) { | 873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); |
874 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | 874 | if (err) |
875 | if (err) | 875 | goto exit_journal; |
876 | goto exit_journal; | 876 | |
877 | } | ||
878 | /* | 877 | /* |
879 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
880 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
@@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
929 | percpu_counter_add(&sbi->s_freeinodes_counter, | 928 | percpu_counter_add(&sbi->s_freeinodes_counter, |
930 | EXT4_INODES_PER_GROUP(sb)); | 929 | EXT4_INODES_PER_GROUP(sb)); |
931 | 930 | ||
931 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | ||
932 | ext4_group_t flex_group; | ||
933 | flex_group = ext4_flex_group(sbi, input->group); | ||
934 | sbi->s_flex_groups[flex_group].free_blocks += | ||
935 | input->free_blocks_count; | ||
936 | sbi->s_flex_groups[flex_group].free_inodes += | ||
937 | EXT4_INODES_PER_GROUP(sb); | ||
938 | } | ||
939 | |||
932 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); | 940 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); |
933 | sb->s_dirt = 1; | 941 | sb->s_dirt = 1; |
934 | 942 | ||
@@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
964 | ext4_group_t o_groups_count; | 972 | ext4_group_t o_groups_count; |
965 | ext4_grpblk_t last; | 973 | ext4_grpblk_t last; |
966 | ext4_grpblk_t add; | 974 | ext4_grpblk_t add; |
967 | struct buffer_head * bh; | 975 | struct buffer_head *bh; |
968 | handle_t *handle; | 976 | handle_t *handle; |
969 | int err; | 977 | int err; |
970 | unsigned long freed_blocks; | 978 | unsigned long freed_blocks; |
@@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1077 | /* | 1085 | /* |
1078 | * Mark mballoc pages as not up to date so that they will be updated | 1086 | * Mark mballoc pages as not up to date so that they will be updated |
1079 | * next time they are loaded by ext4_mb_load_buddy. | 1087 | * next time they are loaded by ext4_mb_load_buddy. |
1088 | * | ||
1089 | * XXX Bad, Bad, BAD!!! We should not be overloading the | ||
1090 | * Uptodate flag, particularly on thte bitmap bh, as way of | ||
1091 | * hinting to ext4_mb_load_buddy() that it needs to be | ||
1092 | * overloaded. A user could take a LVM snapshot, then do an | ||
1093 | * on-line fsck, and clear the uptodate flag, and this would | ||
1094 | * not be a bug in userspace, but a bug in the kernel. FIXME!!! | ||
1080 | */ | 1095 | */ |
1081 | if (test_opt(sb, MBALLOC)) { | 1096 | { |
1082 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1097 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1083 | struct inode *inode = sbi->s_buddy_cache; | 1098 | struct inode *inode = sbi->s_buddy_cache; |
1084 | int blocks_per_page; | 1099 | int blocks_per_page; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 566344b926b7..0e661c569660 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include <linux/quotaops.h> | 35 | #include <linux/quotaops.h> |
36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
37 | #include <linux/proc_fs.h> | ||
38 | #include <linux/marker.h> | ||
37 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
38 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
@@ -45,6 +47,8 @@ | |||
45 | #include "namei.h" | 47 | #include "namei.h" |
46 | #include "group.h" | 48 | #include "group.h" |
47 | 49 | ||
50 | struct proc_dir_entry *ext4_proc_root; | ||
51 | |||
48 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 52 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
49 | unsigned long journal_devnum); | 53 | unsigned long journal_devnum); |
50 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, | 54 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, |
@@ -508,10 +512,12 @@ static void ext4_put_super(struct super_block *sb) | |||
508 | if (!(sb->s_flags & MS_RDONLY)) { | 512 | if (!(sb->s_flags & MS_RDONLY)) { |
509 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 513 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
510 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 514 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
511 | BUFFER_TRACE(sbi->s_sbh, "marking dirty"); | ||
512 | mark_buffer_dirty(sbi->s_sbh); | ||
513 | ext4_commit_super(sb, es, 1); | 515 | ext4_commit_super(sb, es, 1); |
514 | } | 516 | } |
517 | if (sbi->s_proc) { | ||
518 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
519 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
520 | } | ||
515 | 521 | ||
516 | for (i = 0; i < sbi->s_gdb_count; i++) | 522 | for (i = 0; i < sbi->s_gdb_count; i++) |
517 | brelse(sbi->s_group_desc[i]); | 523 | brelse(sbi->s_group_desc[i]); |
@@ -520,6 +526,7 @@ static void ext4_put_super(struct super_block *sb) | |||
520 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 526 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
521 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 527 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
522 | percpu_counter_destroy(&sbi->s_dirs_counter); | 528 | percpu_counter_destroy(&sbi->s_dirs_counter); |
529 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
523 | brelse(sbi->s_sbh); | 530 | brelse(sbi->s_sbh); |
524 | #ifdef CONFIG_QUOTA | 531 | #ifdef CONFIG_QUOTA |
525 | for (i = 0; i < MAXQUOTAS; i++) | 532 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -562,11 +569,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
562 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 569 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
563 | if (!ei) | 570 | if (!ei) |
564 | return NULL; | 571 | return NULL; |
565 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 572 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
566 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 573 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
567 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 574 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
568 | #endif | 575 | #endif |
569 | ei->i_block_alloc_info = NULL; | ||
570 | ei->vfs_inode.i_version = 1; | 576 | ei->vfs_inode.i_version = 1; |
571 | ei->vfs_inode.i_data.writeback_index = 0; | 577 | ei->vfs_inode.i_data.writeback_index = 0; |
572 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 578 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
@@ -599,7 +605,7 @@ static void init_once(void *foo) | |||
599 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; | 605 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; |
600 | 606 | ||
601 | INIT_LIST_HEAD(&ei->i_orphan); | 607 | INIT_LIST_HEAD(&ei->i_orphan); |
602 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 608 | #ifdef CONFIG_EXT4_FS_XATTR |
603 | init_rwsem(&ei->xattr_sem); | 609 | init_rwsem(&ei->xattr_sem); |
604 | #endif | 610 | #endif |
605 | init_rwsem(&ei->i_data_sem); | 611 | init_rwsem(&ei->i_data_sem); |
@@ -625,8 +631,7 @@ static void destroy_inodecache(void) | |||
625 | 631 | ||
626 | static void ext4_clear_inode(struct inode *inode) | 632 | static void ext4_clear_inode(struct inode *inode) |
627 | { | 633 | { |
628 | struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; | 634 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
629 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | ||
630 | if (EXT4_I(inode)->i_acl && | 635 | if (EXT4_I(inode)->i_acl && |
631 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { | 636 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { |
632 | posix_acl_release(EXT4_I(inode)->i_acl); | 637 | posix_acl_release(EXT4_I(inode)->i_acl); |
@@ -638,10 +643,7 @@ static void ext4_clear_inode(struct inode *inode) | |||
638 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; | 643 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; |
639 | } | 644 | } |
640 | #endif | 645 | #endif |
641 | ext4_discard_reservation(inode); | 646 | ext4_discard_preallocations(inode); |
642 | EXT4_I(inode)->i_block_alloc_info = NULL; | ||
643 | if (unlikely(rsv)) | ||
644 | kfree(rsv); | ||
645 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | 647 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, |
646 | &EXT4_I(inode)->jinode); | 648 | &EXT4_I(inode)->jinode); |
647 | } | 649 | } |
@@ -654,7 +656,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
654 | 656 | ||
655 | if (sbi->s_jquota_fmt) | 657 | if (sbi->s_jquota_fmt) |
656 | seq_printf(seq, ",jqfmt=%s", | 658 | seq_printf(seq, ",jqfmt=%s", |
657 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); | 659 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); |
658 | 660 | ||
659 | if (sbi->s_qf_names[USRQUOTA]) | 661 | if (sbi->s_qf_names[USRQUOTA]) |
660 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | 662 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); |
@@ -718,7 +720,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
718 | seq_puts(seq, ",debug"); | 720 | seq_puts(seq, ",debug"); |
719 | if (test_opt(sb, OLDALLOC)) | 721 | if (test_opt(sb, OLDALLOC)) |
720 | seq_puts(seq, ",oldalloc"); | 722 | seq_puts(seq, ",oldalloc"); |
721 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 723 | #ifdef CONFIG_EXT4_FS_XATTR |
722 | if (test_opt(sb, XATTR_USER) && | 724 | if (test_opt(sb, XATTR_USER) && |
723 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | 725 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) |
724 | seq_puts(seq, ",user_xattr"); | 726 | seq_puts(seq, ",user_xattr"); |
@@ -727,7 +729,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
727 | seq_puts(seq, ",nouser_xattr"); | 729 | seq_puts(seq, ",nouser_xattr"); |
728 | } | 730 | } |
729 | #endif | 731 | #endif |
730 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 732 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
731 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 733 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
732 | seq_puts(seq, ",acl"); | 734 | seq_puts(seq, ",acl"); |
733 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | 735 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) |
@@ -752,8 +754,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
752 | seq_puts(seq, ",nobh"); | 754 | seq_puts(seq, ",nobh"); |
753 | if (!test_opt(sb, EXTENTS)) | 755 | if (!test_opt(sb, EXTENTS)) |
754 | seq_puts(seq, ",noextents"); | 756 | seq_puts(seq, ",noextents"); |
755 | if (!test_opt(sb, MBALLOC)) | ||
756 | seq_puts(seq, ",nomballoc"); | ||
757 | if (test_opt(sb, I_VERSION)) | 757 | if (test_opt(sb, I_VERSION)) |
758 | seq_puts(seq, ",i_version"); | 758 | seq_puts(seq, ",i_version"); |
759 | if (!test_opt(sb, DELALLOC)) | 759 | if (!test_opt(sb, DELALLOC)) |
@@ -773,6 +773,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
773 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 773 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
774 | seq_puts(seq, ",data=writeback"); | 774 | seq_puts(seq, ",data=writeback"); |
775 | 775 | ||
776 | if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
777 | seq_printf(seq, ",inode_readahead_blks=%u", | ||
778 | sbi->s_inode_readahead_blks); | ||
779 | |||
776 | ext4_show_quota_options(seq, sb); | 780 | ext4_show_quota_options(seq, sb); |
777 | return 0; | 781 | return 0; |
778 | } | 782 | } |
@@ -822,7 +826,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
822 | } | 826 | } |
823 | 827 | ||
824 | #ifdef CONFIG_QUOTA | 828 | #ifdef CONFIG_QUOTA |
825 | #define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") | 829 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") |
826 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 830 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
827 | 831 | ||
828 | static int ext4_dquot_initialize(struct inode *inode, int type); | 832 | static int ext4_dquot_initialize(struct inode *inode, int type); |
@@ -907,6 +911,7 @@ enum { | |||
907 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 911 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
908 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 912 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
909 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 913 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
914 | Opt_inode_readahead_blks | ||
910 | }; | 915 | }; |
911 | 916 | ||
912 | static match_table_t tokens = { | 917 | static match_table_t tokens = { |
@@ -967,6 +972,7 @@ static match_table_t tokens = { | |||
967 | {Opt_resize, "resize"}, | 972 | {Opt_resize, "resize"}, |
968 | {Opt_delalloc, "delalloc"}, | 973 | {Opt_delalloc, "delalloc"}, |
969 | {Opt_nodelalloc, "nodelalloc"}, | 974 | {Opt_nodelalloc, "nodelalloc"}, |
975 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | ||
970 | {Opt_err, NULL}, | 976 | {Opt_err, NULL}, |
971 | }; | 977 | }; |
972 | 978 | ||
@@ -981,7 +987,7 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
981 | /*todo: use simple_strtoll with >32bit ext4 */ | 987 | /*todo: use simple_strtoll with >32bit ext4 */ |
982 | sb_block = simple_strtoul(options, &options, 0); | 988 | sb_block = simple_strtoul(options, &options, 0); |
983 | if (*options && *options != ',') { | 989 | if (*options && *options != ',') { |
984 | printk("EXT4-fs: Invalid sb specification: %s\n", | 990 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
985 | (char *) *data); | 991 | (char *) *data); |
986 | return 1; | 992 | return 1; |
987 | } | 993 | } |
@@ -1072,7 +1078,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1072 | case Opt_orlov: | 1078 | case Opt_orlov: |
1073 | clear_opt(sbi->s_mount_opt, OLDALLOC); | 1079 | clear_opt(sbi->s_mount_opt, OLDALLOC); |
1074 | break; | 1080 | break; |
1075 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1081 | #ifdef CONFIG_EXT4_FS_XATTR |
1076 | case Opt_user_xattr: | 1082 | case Opt_user_xattr: |
1077 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1083 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1078 | break; | 1084 | break; |
@@ -1082,10 +1088,11 @@ static int parse_options(char *options, struct super_block *sb, | |||
1082 | #else | 1088 | #else |
1083 | case Opt_user_xattr: | 1089 | case Opt_user_xattr: |
1084 | case Opt_nouser_xattr: | 1090 | case Opt_nouser_xattr: |
1085 | printk("EXT4 (no)user_xattr options not supported\n"); | 1091 | printk(KERN_ERR "EXT4 (no)user_xattr options " |
1092 | "not supported\n"); | ||
1086 | break; | 1093 | break; |
1087 | #endif | 1094 | #endif |
1088 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1095 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1089 | case Opt_acl: | 1096 | case Opt_acl: |
1090 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1097 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1091 | break; | 1098 | break; |
@@ -1095,7 +1102,8 @@ static int parse_options(char *options, struct super_block *sb, | |||
1095 | #else | 1102 | #else |
1096 | case Opt_acl: | 1103 | case Opt_acl: |
1097 | case Opt_noacl: | 1104 | case Opt_noacl: |
1098 | printk("EXT4 (no)acl options not supported\n"); | 1105 | printk(KERN_ERR "EXT4 (no)acl options " |
1106 | "not supported\n"); | ||
1099 | break; | 1107 | break; |
1100 | #endif | 1108 | #endif |
1101 | case Opt_reservation: | 1109 | case Opt_reservation: |
@@ -1189,8 +1197,8 @@ set_qf_name: | |||
1189 | sb_any_quota_suspended(sb)) && | 1197 | sb_any_quota_suspended(sb)) && |
1190 | !sbi->s_qf_names[qtype]) { | 1198 | !sbi->s_qf_names[qtype]) { |
1191 | printk(KERN_ERR | 1199 | printk(KERN_ERR |
1192 | "EXT4-fs: Cannot change journaled " | 1200 | "EXT4-fs: Cannot change journaled " |
1193 | "quota options when quota turned on.\n"); | 1201 | "quota options when quota turned on.\n"); |
1194 | return 0; | 1202 | return 0; |
1195 | } | 1203 | } |
1196 | qname = match_strdup(&args[0]); | 1204 | qname = match_strdup(&args[0]); |
@@ -1357,12 +1365,6 @@ set_qf_format: | |||
1357 | case Opt_nodelalloc: | 1365 | case Opt_nodelalloc: |
1358 | clear_opt(sbi->s_mount_opt, DELALLOC); | 1366 | clear_opt(sbi->s_mount_opt, DELALLOC); |
1359 | break; | 1367 | break; |
1360 | case Opt_mballoc: | ||
1361 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
1362 | break; | ||
1363 | case Opt_nomballoc: | ||
1364 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
1365 | break; | ||
1366 | case Opt_stripe: | 1368 | case Opt_stripe: |
1367 | if (match_int(&args[0], &option)) | 1369 | if (match_int(&args[0], &option)) |
1368 | return 0; | 1370 | return 0; |
@@ -1373,6 +1375,13 @@ set_qf_format: | |||
1373 | case Opt_delalloc: | 1375 | case Opt_delalloc: |
1374 | set_opt(sbi->s_mount_opt, DELALLOC); | 1376 | set_opt(sbi->s_mount_opt, DELALLOC); |
1375 | break; | 1377 | break; |
1378 | case Opt_inode_readahead_blks: | ||
1379 | if (match_int(&args[0], &option)) | ||
1380 | return 0; | ||
1381 | if (option < 0 || option > (1 << 30)) | ||
1382 | return 0; | ||
1383 | sbi->s_inode_readahead_blks = option; | ||
1384 | break; | ||
1376 | default: | 1385 | default: |
1377 | printk(KERN_ERR | 1386 | printk(KERN_ERR |
1378 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1387 | "EXT4-fs: Unrecognized mount option \"%s\" " |
@@ -1473,15 +1482,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1473 | EXT4_INODES_PER_GROUP(sb), | 1482 | EXT4_INODES_PER_GROUP(sb), |
1474 | sbi->s_mount_opt); | 1483 | sbi->s_mount_opt); |
1475 | 1484 | ||
1476 | printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); | 1485 | printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", |
1477 | if (EXT4_SB(sb)->s_journal->j_inode == NULL) { | 1486 | sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : |
1478 | char b[BDEVNAME_SIZE]; | 1487 | "external", EXT4_SB(sb)->s_journal->j_devname); |
1479 | |||
1480 | printk("external journal on %s\n", | ||
1481 | bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); | ||
1482 | } else { | ||
1483 | printk("internal journal\n"); | ||
1484 | } | ||
1485 | return res; | 1488 | return res; |
1486 | } | 1489 | } |
1487 | 1490 | ||
@@ -1504,8 +1507,11 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1504 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1507 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1505 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 1508 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; |
1506 | 1509 | ||
1507 | flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / | 1510 | /* We allocate both existing and potentially added groups */ |
1508 | groups_per_flex; | 1511 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
1512 | ((sbi->s_es->s_reserved_gdt_blocks +1 ) << | ||
1513 | EXT4_DESC_PER_BLOCK_BITS(sb))) / | ||
1514 | groups_per_flex; | ||
1509 | sbi->s_flex_groups = kzalloc(flex_group_count * | 1515 | sbi->s_flex_groups = kzalloc(flex_group_count * |
1510 | sizeof(struct flex_groups), GFP_KERNEL); | 1516 | sizeof(struct flex_groups), GFP_KERNEL); |
1511 | if (sbi->s_flex_groups == NULL) { | 1517 | if (sbi->s_flex_groups == NULL) { |
@@ -1584,7 +1590,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1584 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1590 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1585 | flexbg_flag = 1; | 1591 | flexbg_flag = 1; |
1586 | 1592 | ||
1587 | ext4_debug ("Checking group descriptors"); | 1593 | ext4_debug("Checking group descriptors"); |
1588 | 1594 | ||
1589 | for (i = 0; i < sbi->s_groups_count; i++) { | 1595 | for (i = 0; i < sbi->s_groups_count; i++) { |
1590 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 1596 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
@@ -1623,8 +1629,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1623 | "Checksum for group %lu failed (%u!=%u)\n", | 1629 | "Checksum for group %lu failed (%u!=%u)\n", |
1624 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1630 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
1625 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 1631 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
1626 | if (!(sb->s_flags & MS_RDONLY)) | 1632 | if (!(sb->s_flags & MS_RDONLY)) { |
1633 | spin_unlock(sb_bgl_lock(sbi, i)); | ||
1627 | return 0; | 1634 | return 0; |
1635 | } | ||
1628 | } | 1636 | } |
1629 | spin_unlock(sb_bgl_lock(sbi, i)); | 1637 | spin_unlock(sb_bgl_lock(sbi, i)); |
1630 | if (!flexbg_flag) | 1638 | if (!flexbg_flag) |
@@ -1714,9 +1722,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1714 | DQUOT_INIT(inode); | 1722 | DQUOT_INIT(inode); |
1715 | if (inode->i_nlink) { | 1723 | if (inode->i_nlink) { |
1716 | printk(KERN_DEBUG | 1724 | printk(KERN_DEBUG |
1717 | "%s: truncating inode %lu to %Ld bytes\n", | 1725 | "%s: truncating inode %lu to %lld bytes\n", |
1718 | __func__, inode->i_ino, inode->i_size); | 1726 | __func__, inode->i_ino, inode->i_size); |
1719 | jbd_debug(2, "truncating inode %lu to %Ld bytes\n", | 1727 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
1720 | inode->i_ino, inode->i_size); | 1728 | inode->i_ino, inode->i_size); |
1721 | ext4_truncate(inode); | 1729 | ext4_truncate(inode); |
1722 | nr_truncates++; | 1730 | nr_truncates++; |
@@ -1914,6 +1922,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1914 | unsigned long journal_devnum = 0; | 1922 | unsigned long journal_devnum = 0; |
1915 | unsigned long def_mount_opts; | 1923 | unsigned long def_mount_opts; |
1916 | struct inode *root; | 1924 | struct inode *root; |
1925 | char *cp; | ||
1917 | int ret = -EINVAL; | 1926 | int ret = -EINVAL; |
1918 | int blocksize; | 1927 | int blocksize; |
1919 | int db_count; | 1928 | int db_count; |
@@ -1930,10 +1939,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1930 | sbi->s_mount_opt = 0; | 1939 | sbi->s_mount_opt = 0; |
1931 | sbi->s_resuid = EXT4_DEF_RESUID; | 1940 | sbi->s_resuid = EXT4_DEF_RESUID; |
1932 | sbi->s_resgid = EXT4_DEF_RESGID; | 1941 | sbi->s_resgid = EXT4_DEF_RESGID; |
1942 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | ||
1933 | sbi->s_sb_block = sb_block; | 1943 | sbi->s_sb_block = sb_block; |
1934 | 1944 | ||
1935 | unlock_kernel(); | 1945 | unlock_kernel(); |
1936 | 1946 | ||
1947 | /* Cleanup superblock name */ | ||
1948 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | ||
1949 | *cp = '!'; | ||
1950 | |||
1937 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 1951 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
1938 | if (!blocksize) { | 1952 | if (!blocksize) { |
1939 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); | 1953 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); |
@@ -1973,11 +1987,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1973 | set_opt(sbi->s_mount_opt, GRPID); | 1987 | set_opt(sbi->s_mount_opt, GRPID); |
1974 | if (def_mount_opts & EXT4_DEFM_UID16) | 1988 | if (def_mount_opts & EXT4_DEFM_UID16) |
1975 | set_opt(sbi->s_mount_opt, NO_UID32); | 1989 | set_opt(sbi->s_mount_opt, NO_UID32); |
1976 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1990 | #ifdef CONFIG_EXT4_FS_XATTR |
1977 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 1991 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) |
1978 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1992 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1979 | #endif | 1993 | #endif |
1980 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1994 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1981 | if (def_mount_opts & EXT4_DEFM_ACL) | 1995 | if (def_mount_opts & EXT4_DEFM_ACL) |
1982 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1996 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1983 | #endif | 1997 | #endif |
@@ -2012,11 +2026,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2012 | ext4_warning(sb, __func__, | 2026 | ext4_warning(sb, __func__, |
2013 | "extents feature not enabled on this filesystem, " | 2027 | "extents feature not enabled on this filesystem, " |
2014 | "use tune2fs.\n"); | 2028 | "use tune2fs.\n"); |
2015 | /* | ||
2016 | * turn on mballoc code by default in ext4 filesystem | ||
2017 | * Use -o nomballoc to turn it off | ||
2018 | */ | ||
2019 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
2020 | 2029 | ||
2021 | /* | 2030 | /* |
2022 | * enable delayed allocation by default | 2031 | * enable delayed allocation by default |
@@ -2041,16 +2050,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2041 | "running e2fsck is recommended\n"); | 2050 | "running e2fsck is recommended\n"); |
2042 | 2051 | ||
2043 | /* | 2052 | /* |
2044 | * Since ext4 is still considered development code, we require | ||
2045 | * that the TEST_FILESYS flag in s->flags be set. | ||
2046 | */ | ||
2047 | if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { | ||
2048 | printk(KERN_WARNING "EXT4-fs: %s: not marked " | ||
2049 | "OK to use with test code.\n", sb->s_id); | ||
2050 | goto failed_mount; | ||
2051 | } | ||
2052 | |||
2053 | /* | ||
2054 | * Check feature flags regardless of the revision level, since we | 2053 | * Check feature flags regardless of the revision level, since we |
2055 | * previously didn't change the revision level when setting the flags, | 2054 | * previously didn't change the revision level when setting the flags, |
2056 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2055 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
@@ -2219,6 +2218,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2219 | goto failed_mount; | 2218 | goto failed_mount; |
2220 | } | 2219 | } |
2221 | 2220 | ||
2221 | if (ext4_proc_root) | ||
2222 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | ||
2223 | |||
2224 | if (sbi->s_proc) | ||
2225 | proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, | ||
2226 | &ext4_ui_proc_fops, | ||
2227 | &sbi->s_inode_readahead_blks); | ||
2228 | |||
2222 | bgl_lock_init(&sbi->s_blockgroup_lock); | 2229 | bgl_lock_init(&sbi->s_blockgroup_lock); |
2223 | 2230 | ||
2224 | for (i = 0; i < db_count; i++) { | 2231 | for (i = 0; i < db_count; i++) { |
@@ -2257,24 +2264,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2257 | err = percpu_counter_init(&sbi->s_dirs_counter, | 2264 | err = percpu_counter_init(&sbi->s_dirs_counter, |
2258 | ext4_count_dirs(sb)); | 2265 | ext4_count_dirs(sb)); |
2259 | } | 2266 | } |
2267 | if (!err) { | ||
2268 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2269 | } | ||
2260 | if (err) { | 2270 | if (err) { |
2261 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); | 2271 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); |
2262 | goto failed_mount3; | 2272 | goto failed_mount3; |
2263 | } | 2273 | } |
2264 | 2274 | ||
2265 | /* per fileystem reservation list head & lock */ | ||
2266 | spin_lock_init(&sbi->s_rsv_window_lock); | ||
2267 | sbi->s_rsv_window_root = RB_ROOT; | ||
2268 | /* Add a single, static dummy reservation to the start of the | ||
2269 | * reservation window list --- it gives us a placeholder for | ||
2270 | * append-at-start-of-list which makes the allocation logic | ||
2271 | * _much_ simpler. */ | ||
2272 | sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2273 | sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2274 | sbi->s_rsv_window_head.rsv_alloc_hit = 0; | ||
2275 | sbi->s_rsv_window_head.rsv_goal_size = 0; | ||
2276 | ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); | ||
2277 | |||
2278 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2275 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2279 | 2276 | ||
2280 | /* | 2277 | /* |
@@ -2471,7 +2468,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2471 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | 2468 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); |
2472 | 2469 | ||
2473 | ext4_ext_init(sb); | 2470 | ext4_ext_init(sb); |
2474 | ext4_mb_init(sb, needs_recovery); | 2471 | err = ext4_mb_init(sb, needs_recovery); |
2472 | if (err) { | ||
2473 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2474 | err); | ||
2475 | goto failed_mount4; | ||
2476 | } | ||
2475 | 2477 | ||
2476 | lock_kernel(); | 2478 | lock_kernel(); |
2477 | return 0; | 2479 | return 0; |
@@ -2489,11 +2491,16 @@ failed_mount3: | |||
2489 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2491 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
2490 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2492 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
2491 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2493 | percpu_counter_destroy(&sbi->s_dirs_counter); |
2494 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
2492 | failed_mount2: | 2495 | failed_mount2: |
2493 | for (i = 0; i < db_count; i++) | 2496 | for (i = 0; i < db_count; i++) |
2494 | brelse(sbi->s_group_desc[i]); | 2497 | brelse(sbi->s_group_desc[i]); |
2495 | kfree(sbi->s_group_desc); | 2498 | kfree(sbi->s_group_desc); |
2496 | failed_mount: | 2499 | failed_mount: |
2500 | if (sbi->s_proc) { | ||
2501 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
2502 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
2503 | } | ||
2497 | #ifdef CONFIG_QUOTA | 2504 | #ifdef CONFIG_QUOTA |
2498 | for (i = 0; i < MAXQUOTAS; i++) | 2505 | for (i = 0; i < MAXQUOTAS; i++) |
2499 | kfree(sbi->s_qf_names[i]); | 2506 | kfree(sbi->s_qf_names[i]); |
@@ -2552,7 +2559,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, | |||
2552 | return NULL; | 2559 | return NULL; |
2553 | } | 2560 | } |
2554 | 2561 | ||
2555 | jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", | 2562 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
2556 | journal_inode, journal_inode->i_size); | 2563 | journal_inode, journal_inode->i_size); |
2557 | if (!S_ISREG(journal_inode->i_mode)) { | 2564 | if (!S_ISREG(journal_inode->i_mode)) { |
2558 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); | 2565 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); |
@@ -2715,6 +2722,11 @@ static int ext4_load_journal(struct super_block *sb, | |||
2715 | return -EINVAL; | 2722 | return -EINVAL; |
2716 | } | 2723 | } |
2717 | 2724 | ||
2725 | if (journal->j_flags & JBD2_BARRIER) | ||
2726 | printk(KERN_INFO "EXT4-fs: barriers enabled\n"); | ||
2727 | else | ||
2728 | printk(KERN_INFO "EXT4-fs: barriers disabled\n"); | ||
2729 | |||
2718 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 2730 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
2719 | err = jbd2_journal_update_format(journal); | 2731 | err = jbd2_journal_update_format(journal); |
2720 | if (err) { | 2732 | if (err) { |
@@ -2799,13 +2811,34 @@ static void ext4_commit_super(struct super_block *sb, | |||
2799 | 2811 | ||
2800 | if (!sbh) | 2812 | if (!sbh) |
2801 | return; | 2813 | return; |
2814 | if (buffer_write_io_error(sbh)) { | ||
2815 | /* | ||
2816 | * Oh, dear. A previous attempt to write the | ||
2817 | * superblock failed. This could happen because the | ||
2818 | * USB device was yanked out. Or it could happen to | ||
2819 | * be a transient write error and maybe the block will | ||
2820 | * be remapped. Nothing we can do but to retry the | ||
2821 | * write and hope for the best. | ||
2822 | */ | ||
2823 | printk(KERN_ERR "ext4: previous I/O error to " | ||
2824 | "superblock detected for %s.\n", sb->s_id); | ||
2825 | clear_buffer_write_io_error(sbh); | ||
2826 | set_buffer_uptodate(sbh); | ||
2827 | } | ||
2802 | es->s_wtime = cpu_to_le32(get_seconds()); | 2828 | es->s_wtime = cpu_to_le32(get_seconds()); |
2803 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); | 2829 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); |
2804 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 2830 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); |
2805 | BUFFER_TRACE(sbh, "marking dirty"); | 2831 | BUFFER_TRACE(sbh, "marking dirty"); |
2806 | mark_buffer_dirty(sbh); | 2832 | mark_buffer_dirty(sbh); |
2807 | if (sync) | 2833 | if (sync) { |
2808 | sync_dirty_buffer(sbh); | 2834 | sync_dirty_buffer(sbh); |
2835 | if (buffer_write_io_error(sbh)) { | ||
2836 | printk(KERN_ERR "ext4: I/O error while writing " | ||
2837 | "superblock for %s.\n", sb->s_id); | ||
2838 | clear_buffer_write_io_error(sbh); | ||
2839 | set_buffer_uptodate(sbh); | ||
2840 | } | ||
2841 | } | ||
2809 | } | 2842 | } |
2810 | 2843 | ||
2811 | 2844 | ||
@@ -2907,6 +2940,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
2907 | { | 2940 | { |
2908 | tid_t target; | 2941 | tid_t target; |
2909 | 2942 | ||
2943 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | ||
2910 | sb->s_dirt = 0; | 2944 | sb->s_dirt = 0; |
2911 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | 2945 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
2912 | if (wait) | 2946 | if (wait) |
@@ -3162,7 +3196,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3162 | buf->f_type = EXT4_SUPER_MAGIC; | 3196 | buf->f_type = EXT4_SUPER_MAGIC; |
3163 | buf->f_bsize = sb->s_blocksize; | 3197 | buf->f_bsize = sb->s_blocksize; |
3164 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 3198 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
3165 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); | 3199 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
3200 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | ||
3166 | ext4_free_blocks_count_set(es, buf->f_bfree); | 3201 | ext4_free_blocks_count_set(es, buf->f_bfree); |
3167 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 3202 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
3168 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 3203 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
@@ -3432,7 +3467,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3432 | handle_t *handle = journal_current_handle(); | 3467 | handle_t *handle = journal_current_handle(); |
3433 | 3468 | ||
3434 | if (!handle) { | 3469 | if (!handle) { |
3435 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" | 3470 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" |
3436 | " cancelled because transaction is not started.\n", | 3471 | " cancelled because transaction is not started.\n", |
3437 | (unsigned long long)off, (unsigned long long)len); | 3472 | (unsigned long long)off, (unsigned long long)len); |
3438 | return -EIO; | 3473 | return -EIO; |
@@ -3493,18 +3528,82 @@ static int ext4_get_sb(struct file_system_type *fs_type, | |||
3493 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3528 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); |
3494 | } | 3529 | } |
3495 | 3530 | ||
3531 | #ifdef CONFIG_PROC_FS | ||
3532 | static int ext4_ui_proc_show(struct seq_file *m, void *v) | ||
3533 | { | ||
3534 | unsigned int *p = m->private; | ||
3535 | |||
3536 | seq_printf(m, "%u\n", *p); | ||
3537 | return 0; | ||
3538 | } | ||
3539 | |||
3540 | static int ext4_ui_proc_open(struct inode *inode, struct file *file) | ||
3541 | { | ||
3542 | return single_open(file, ext4_ui_proc_show, PDE(inode)->data); | ||
3543 | } | ||
3544 | |||
3545 | static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, | ||
3546 | size_t cnt, loff_t *ppos) | ||
3547 | { | ||
3548 | unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; | ||
3549 | char str[32]; | ||
3550 | unsigned long value; | ||
3551 | |||
3552 | if (cnt >= sizeof(str)) | ||
3553 | return -EINVAL; | ||
3554 | if (copy_from_user(str, buf, cnt)) | ||
3555 | return -EFAULT; | ||
3556 | value = simple_strtol(str, NULL, 0); | ||
3557 | if (value < 0) | ||
3558 | return -ERANGE; | ||
3559 | *p = value; | ||
3560 | return cnt; | ||
3561 | } | ||
3562 | |||
3563 | const struct file_operations ext4_ui_proc_fops = { | ||
3564 | .owner = THIS_MODULE, | ||
3565 | .open = ext4_ui_proc_open, | ||
3566 | .read = seq_read, | ||
3567 | .llseek = seq_lseek, | ||
3568 | .release = single_release, | ||
3569 | .write = ext4_ui_proc_write, | ||
3570 | }; | ||
3571 | #endif | ||
3572 | |||
3573 | static struct file_system_type ext4_fs_type = { | ||
3574 | .owner = THIS_MODULE, | ||
3575 | .name = "ext4", | ||
3576 | .get_sb = ext4_get_sb, | ||
3577 | .kill_sb = kill_block_super, | ||
3578 | .fs_flags = FS_REQUIRES_DEV, | ||
3579 | }; | ||
3580 | |||
3581 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3582 | static int ext4dev_get_sb(struct file_system_type *fs_type, | ||
3583 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
3584 | { | ||
3585 | printk(KERN_WARNING "EXT4-fs: Update your userspace programs " | ||
3586 | "to mount using ext4\n"); | ||
3587 | printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " | ||
3588 | "will go away by 2.6.31\n"); | ||
3589 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | ||
3590 | } | ||
3591 | |||
3496 | static struct file_system_type ext4dev_fs_type = { | 3592 | static struct file_system_type ext4dev_fs_type = { |
3497 | .owner = THIS_MODULE, | 3593 | .owner = THIS_MODULE, |
3498 | .name = "ext4dev", | 3594 | .name = "ext4dev", |
3499 | .get_sb = ext4_get_sb, | 3595 | .get_sb = ext4dev_get_sb, |
3500 | .kill_sb = kill_block_super, | 3596 | .kill_sb = kill_block_super, |
3501 | .fs_flags = FS_REQUIRES_DEV, | 3597 | .fs_flags = FS_REQUIRES_DEV, |
3502 | }; | 3598 | }; |
3599 | MODULE_ALIAS("ext4dev"); | ||
3600 | #endif | ||
3503 | 3601 | ||
3504 | static int __init init_ext4_fs(void) | 3602 | static int __init init_ext4_fs(void) |
3505 | { | 3603 | { |
3506 | int err; | 3604 | int err; |
3507 | 3605 | ||
3606 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
3508 | err = init_ext4_mballoc(); | 3607 | err = init_ext4_mballoc(); |
3509 | if (err) | 3608 | if (err) |
3510 | return err; | 3609 | return err; |
@@ -3515,9 +3614,16 @@ static int __init init_ext4_fs(void) | |||
3515 | err = init_inodecache(); | 3614 | err = init_inodecache(); |
3516 | if (err) | 3615 | if (err) |
3517 | goto out1; | 3616 | goto out1; |
3518 | err = register_filesystem(&ext4dev_fs_type); | 3617 | err = register_filesystem(&ext4_fs_type); |
3519 | if (err) | 3618 | if (err) |
3520 | goto out; | 3619 | goto out; |
3620 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3621 | err = register_filesystem(&ext4dev_fs_type); | ||
3622 | if (err) { | ||
3623 | unregister_filesystem(&ext4_fs_type); | ||
3624 | goto out; | ||
3625 | } | ||
3626 | #endif | ||
3521 | return 0; | 3627 | return 0; |
3522 | out: | 3628 | out: |
3523 | destroy_inodecache(); | 3629 | destroy_inodecache(); |
@@ -3530,10 +3636,14 @@ out2: | |||
3530 | 3636 | ||
3531 | static void __exit exit_ext4_fs(void) | 3637 | static void __exit exit_ext4_fs(void) |
3532 | { | 3638 | { |
3639 | unregister_filesystem(&ext4_fs_type); | ||
3640 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3533 | unregister_filesystem(&ext4dev_fs_type); | 3641 | unregister_filesystem(&ext4dev_fs_type); |
3642 | #endif | ||
3534 | destroy_inodecache(); | 3643 | destroy_inodecache(); |
3535 | exit_ext4_xattr(); | 3644 | exit_ext4_xattr(); |
3536 | exit_ext4_mballoc(); | 3645 | exit_ext4_mballoc(); |
3646 | remove_proc_entry("fs/ext4", NULL); | ||
3537 | } | 3647 | } |
3538 | 3648 | ||
3539 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 3649 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e9178643dc01..00740cb32be3 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
@@ -23,10 +23,10 @@ | |||
23 | #include "ext4.h" | 23 | #include "ext4.h" |
24 | #include "xattr.h" | 24 | #include "xattr.h" |
25 | 25 | ||
26 | static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) | 26 | static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) |
27 | { | 27 | { |
28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); | 28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); |
29 | nd_set_link(nd, (char*)ei->i_data); | 29 | nd_set_link(nd, (char *) ei->i_data); |
30 | return NULL; | 30 | return NULL; |
31 | } | 31 | } |
32 | 32 | ||
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
34 | .readlink = generic_readlink, | 34 | .readlink = generic_readlink, |
35 | .follow_link = page_follow_link_light, | 35 | .follow_link = page_follow_link_light, |
36 | .put_link = page_put_link, | 36 | .put_link = page_put_link, |
37 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 37 | #ifdef CONFIG_EXT4_FS_XATTR |
38 | .setxattr = generic_setxattr, | 38 | .setxattr = generic_setxattr, |
39 | .getxattr = generic_getxattr, | 39 | .getxattr = generic_getxattr, |
40 | .listxattr = ext4_listxattr, | 40 | .listxattr = ext4_listxattr, |
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
45 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 45 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
46 | .readlink = generic_readlink, | 46 | .readlink = generic_readlink, |
47 | .follow_link = ext4_follow_link, | 47 | .follow_link = ext4_follow_link, |
48 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 48 | #ifdef CONFIG_EXT4_FS_XATTR |
49 | .setxattr = generic_setxattr, | 49 | .setxattr = generic_setxattr, |
50 | .getxattr = generic_getxattr, | 50 | .getxattr = generic_getxattr, |
51 | .listxattr = ext4_listxattr, | 51 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8954208b4893..80626d516fee 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache; | |||
99 | 99 | ||
100 | static struct xattr_handler *ext4_xattr_handler_map[] = { | 100 | static struct xattr_handler *ext4_xattr_handler_map[] = { |
101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
102 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, | 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, |
104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, | 104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, |
105 | #endif | 105 | #endif |
106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, | 106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, |
107 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 107 | #ifdef CONFIG_EXT4_FS_SECURITY |
108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, | 108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, |
109 | #endif | 109 | #endif |
110 | }; | 110 | }; |
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { | |||
112 | struct xattr_handler *ext4_xattr_handlers[] = { | 112 | struct xattr_handler *ext4_xattr_handlers[] = { |
113 | &ext4_xattr_user_handler, | 113 | &ext4_xattr_user_handler, |
114 | &ext4_xattr_trusted_handler, | 114 | &ext4_xattr_trusted_handler, |
115 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
116 | &ext4_xattr_acl_access_handler, | 116 | &ext4_xattr_acl_access_handler, |
117 | &ext4_xattr_acl_default_handler, | 117 | &ext4_xattr_acl_default_handler, |
118 | #endif | 118 | #endif |
119 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 119 | #ifdef CONFIG_EXT4_FS_SECURITY |
120 | &ext4_xattr_security_handler, | 120 | &ext4_xattr_security_handler, |
121 | #endif | 121 | #endif |
122 | NULL | 122 | NULL |
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
959 | struct ext4_xattr_block_find bs = { | 959 | struct ext4_xattr_block_find bs = { |
960 | .s = { .not_found = -ENODATA, }, | 960 | .s = { .not_found = -ENODATA, }, |
961 | }; | 961 | }; |
962 | unsigned long no_expand; | ||
962 | int error; | 963 | int error; |
963 | 964 | ||
964 | if (!name) | 965 | if (!name) |
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
966 | if (strlen(name) > 255) | 967 | if (strlen(name) > 255) |
967 | return -ERANGE; | 968 | return -ERANGE; |
968 | down_write(&EXT4_I(inode)->xattr_sem); | 969 | down_write(&EXT4_I(inode)->xattr_sem); |
970 | no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; | ||
971 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | ||
972 | |||
969 | error = ext4_get_inode_loc(inode, &is.iloc); | 973 | error = ext4_get_inode_loc(inode, &is.iloc); |
970 | if (error) | 974 | if (error) |
971 | goto cleanup; | 975 | goto cleanup; |
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
1042 | cleanup: | 1046 | cleanup: |
1043 | brelse(is.iloc.bh); | 1047 | brelse(is.iloc.bh); |
1044 | brelse(bs.bh); | 1048 | brelse(bs.bh); |
1049 | if (no_expand == 0) | ||
1050 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; | ||
1045 | up_write(&EXT4_I(inode)->xattr_sem); | 1051 | up_write(&EXT4_I(inode)->xattr_sem); |
1046 | return error; | 1052 | return error; |
1047 | } | 1053 | } |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 5992fe979bb9..8ede88b18c29 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -51,8 +51,8 @@ struct ext4_xattr_entry { | |||
51 | (((name_len) + EXT4_XATTR_ROUND + \ | 51 | (((name_len) + EXT4_XATTR_ROUND + \ |
52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) | 52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) |
53 | #define EXT4_XATTR_NEXT(entry) \ | 53 | #define EXT4_XATTR_NEXT(entry) \ |
54 | ( (struct ext4_xattr_entry *)( \ | 54 | ((struct ext4_xattr_entry *)( \ |
55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) | 55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) |
56 | #define EXT4_XATTR_SIZE(size) \ | 56 | #define EXT4_XATTR_SIZE(size) \ |
57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) | 57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) |
58 | 58 | ||
@@ -63,7 +63,7 @@ struct ext4_xattr_entry { | |||
63 | EXT4_I(inode)->i_extra_isize)) | 63 | EXT4_I(inode)->i_extra_isize)) |
64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) | 64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) |
65 | 65 | ||
66 | # ifdef CONFIG_EXT4DEV_FS_XATTR | 66 | # ifdef CONFIG_EXT4_FS_XATTR |
67 | 67 | ||
68 | extern struct xattr_handler ext4_xattr_user_handler; | 68 | extern struct xattr_handler ext4_xattr_user_handler; |
69 | extern struct xattr_handler ext4_xattr_trusted_handler; | 69 | extern struct xattr_handler ext4_xattr_trusted_handler; |
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void); | |||
88 | 88 | ||
89 | extern struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
91 | # else /* CONFIG_EXT4DEV_FS_XATTR */ | 91 | # else /* CONFIG_EXT4_FS_XATTR */ |
92 | 92 | ||
93 | static inline int | 93 | static inline int |
94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, | 94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, |
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
141 | 141 | ||
142 | #define ext4_xattr_handlers NULL | 142 | #define ext4_xattr_handlers NULL |
143 | 143 | ||
144 | # endif /* CONFIG_EXT4DEV_FS_XATTR */ | 144 | # endif /* CONFIG_EXT4_FS_XATTR */ |
145 | 145 | ||
146 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 146 | #ifdef CONFIG_EXT4_FS_SECURITY |
147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
148 | struct inode *dir); | 148 | struct inode *dir); |
149 | #else | 149 | #else |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 7db32b3382d3..33a6b7ecb8b8 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -13,9 +13,14 @@ | |||
13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
16 | #include <linux/writeback.h> | ||
17 | #include <linux/buffer_head.h> | ||
16 | 18 | ||
17 | #include <asm/ioctls.h> | 19 | #include <asm/ioctls.h> |
18 | 20 | ||
21 | /* So that the fiemap access checks can't overflow on 32 bit machines. */ | ||
22 | #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) | ||
23 | |||
19 | /** | 24 | /** |
20 | * vfs_ioctl - call filesystem specific ioctl methods | 25 | * vfs_ioctl - call filesystem specific ioctl methods |
21 | * @filp: open file to invoke ioctl method on | 26 | * @filp: open file to invoke ioctl method on |
@@ -71,6 +76,272 @@ static int ioctl_fibmap(struct file *filp, int __user *p) | |||
71 | return put_user(res, p); | 76 | return put_user(res, p); |
72 | } | 77 | } |
73 | 78 | ||
79 | /** | ||
80 | * fiemap_fill_next_extent - Fiemap helper function | ||
81 | * @fieinfo: Fiemap context passed into ->fiemap | ||
82 | * @logical: Extent logical start offset, in bytes | ||
83 | * @phys: Extent physical start offset, in bytes | ||
84 | * @len: Extent length, in bytes | ||
85 | * @flags: FIEMAP_EXTENT flags that describe this extent | ||
86 | * | ||
87 | * Called from file system ->fiemap callback. Will populate extent | ||
88 | * info as passed in via arguments and copy to user memory. On | ||
89 | * success, extent count on fieinfo is incremented. | ||
90 | * | ||
91 | * Returns 0 on success, -errno on error, 1 if this was the last | ||
92 | * extent that will fit in user array. | ||
93 | */ | ||
94 | #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) | ||
95 | #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) | ||
96 | #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) | ||
97 | int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, | ||
98 | u64 phys, u64 len, u32 flags) | ||
99 | { | ||
100 | struct fiemap_extent extent; | ||
101 | struct fiemap_extent *dest = fieinfo->fi_extents_start; | ||
102 | |||
103 | /* only count the extents */ | ||
104 | if (fieinfo->fi_extents_max == 0) { | ||
105 | fieinfo->fi_extents_mapped++; | ||
106 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
107 | } | ||
108 | |||
109 | if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) | ||
110 | return 1; | ||
111 | |||
112 | if (flags & SET_UNKNOWN_FLAGS) | ||
113 | flags |= FIEMAP_EXTENT_UNKNOWN; | ||
114 | if (flags & SET_NO_UNMOUNTED_IO_FLAGS) | ||
115 | flags |= FIEMAP_EXTENT_ENCODED; | ||
116 | if (flags & SET_NOT_ALIGNED_FLAGS) | ||
117 | flags |= FIEMAP_EXTENT_NOT_ALIGNED; | ||
118 | |||
119 | memset(&extent, 0, sizeof(extent)); | ||
120 | extent.fe_logical = logical; | ||
121 | extent.fe_physical = phys; | ||
122 | extent.fe_length = len; | ||
123 | extent.fe_flags = flags; | ||
124 | |||
125 | dest += fieinfo->fi_extents_mapped; | ||
126 | if (copy_to_user(dest, &extent, sizeof(extent))) | ||
127 | return -EFAULT; | ||
128 | |||
129 | fieinfo->fi_extents_mapped++; | ||
130 | if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) | ||
131 | return 1; | ||
132 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
133 | } | ||
134 | EXPORT_SYMBOL(fiemap_fill_next_extent); | ||
135 | |||
136 | /** | ||
137 | * fiemap_check_flags - check validity of requested flags for fiemap | ||
138 | * @fieinfo: Fiemap context passed into ->fiemap | ||
139 | * @fs_flags: Set of fiemap flags that the file system understands | ||
140 | * | ||
141 | * Called from file system ->fiemap callback. This will compute the | ||
142 | * intersection of valid fiemap flags and those that the fs supports. That | ||
143 | * value is then compared against the user supplied flags. In case of bad user | ||
144 | * flags, the invalid values will be written into the fieinfo structure, and | ||
145 | * -EBADR is returned, which tells ioctl_fiemap() to return those values to | ||
146 | * userspace. For this reason, a return code of -EBADR should be preserved. | ||
147 | * | ||
148 | * Returns 0 on success, -EBADR on bad flags. | ||
149 | */ | ||
150 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) | ||
151 | { | ||
152 | u32 incompat_flags; | ||
153 | |||
154 | incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); | ||
155 | if (incompat_flags) { | ||
156 | fieinfo->fi_flags = incompat_flags; | ||
157 | return -EBADR; | ||
158 | } | ||
159 | return 0; | ||
160 | } | ||
161 | EXPORT_SYMBOL(fiemap_check_flags); | ||
162 | |||
163 | static int fiemap_check_ranges(struct super_block *sb, | ||
164 | u64 start, u64 len, u64 *new_len) | ||
165 | { | ||
166 | *new_len = len; | ||
167 | |||
168 | if (len == 0) | ||
169 | return -EINVAL; | ||
170 | |||
171 | if (start > sb->s_maxbytes) | ||
172 | return -EFBIG; | ||
173 | |||
174 | /* | ||
175 | * Shrink request scope to what the fs can actually handle. | ||
176 | */ | ||
177 | if ((len > sb->s_maxbytes) || | ||
178 | (sb->s_maxbytes - len) < start) | ||
179 | *new_len = sb->s_maxbytes - start; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int ioctl_fiemap(struct file *filp, unsigned long arg) | ||
185 | { | ||
186 | struct fiemap fiemap; | ||
187 | struct fiemap_extent_info fieinfo = { 0, }; | ||
188 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
189 | struct super_block *sb = inode->i_sb; | ||
190 | u64 len; | ||
191 | int error; | ||
192 | |||
193 | if (!inode->i_op->fiemap) | ||
194 | return -EOPNOTSUPP; | ||
195 | |||
196 | if (copy_from_user(&fiemap, (struct fiemap __user *)arg, | ||
197 | sizeof(struct fiemap))) | ||
198 | return -EFAULT; | ||
199 | |||
200 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) | ||
201 | return -EINVAL; | ||
202 | |||
203 | error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, | ||
204 | &len); | ||
205 | if (error) | ||
206 | return error; | ||
207 | |||
208 | fieinfo.fi_flags = fiemap.fm_flags; | ||
209 | fieinfo.fi_extents_max = fiemap.fm_extent_count; | ||
210 | fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); | ||
211 | |||
212 | if (fiemap.fm_extent_count != 0 && | ||
213 | !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, | ||
214 | fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) | ||
215 | return -EFAULT; | ||
216 | |||
217 | if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) | ||
218 | filemap_write_and_wait(inode->i_mapping); | ||
219 | |||
220 | error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); | ||
221 | fiemap.fm_flags = fieinfo.fi_flags; | ||
222 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; | ||
223 | if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) | ||
224 | error = -EFAULT; | ||
225 | |||
226 | return error; | ||
227 | } | ||
228 | |||
229 | #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) | ||
230 | #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); | ||
231 | |||
232 | /* | ||
233 | * @inode - the inode to map | ||
234 | * @arg - the pointer to userspace where we copy everything to | ||
235 | * @get_block - the fs's get_block function | ||
236 | * | ||
237 | * This does FIEMAP for block based inodes. Basically it will just loop | ||
238 | * through get_block until we hit the number of extents we want to map, or we | ||
239 | * go past the end of the file and hit a hole. | ||
240 | * | ||
241 | * If it is possible to have data blocks beyond a hole past @inode->i_size, then | ||
242 | * please do not use this function, it will stop at the first unmapped block | ||
243 | * beyond i_size | ||
244 | */ | ||
245 | int generic_block_fiemap(struct inode *inode, | ||
246 | struct fiemap_extent_info *fieinfo, u64 start, | ||
247 | u64 len, get_block_t *get_block) | ||
248 | { | ||
249 | struct buffer_head tmp; | ||
250 | unsigned int start_blk; | ||
251 | long long length = 0, map_len = 0; | ||
252 | u64 logical = 0, phys = 0, size = 0; | ||
253 | u32 flags = FIEMAP_EXTENT_MERGED; | ||
254 | int ret = 0; | ||
255 | |||
256 | if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) | ||
257 | return ret; | ||
258 | |||
259 | start_blk = logical_to_blk(inode, start); | ||
260 | |||
261 | /* guard against change */ | ||
262 | mutex_lock(&inode->i_mutex); | ||
263 | |||
264 | length = (long long)min_t(u64, len, i_size_read(inode)); | ||
265 | map_len = length; | ||
266 | |||
267 | do { | ||
268 | /* | ||
269 | * we set b_size to the total size we want so it will map as | ||
270 | * many contiguous blocks as possible at once | ||
271 | */ | ||
272 | memset(&tmp, 0, sizeof(struct buffer_head)); | ||
273 | tmp.b_size = map_len; | ||
274 | |||
275 | ret = get_block(inode, start_blk, &tmp, 0); | ||
276 | if (ret) | ||
277 | break; | ||
278 | |||
279 | /* HOLE */ | ||
280 | if (!buffer_mapped(&tmp)) { | ||
281 | /* | ||
282 | * first hole after going past the EOF, this is our | ||
283 | * last extent | ||
284 | */ | ||
285 | if (length <= 0) { | ||
286 | flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; | ||
287 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
288 | phys, size, | ||
289 | flags); | ||
290 | break; | ||
291 | } | ||
292 | |||
293 | length -= blk_to_logical(inode, 1); | ||
294 | |||
295 | /* if we have holes up to/past EOF then we're done */ | ||
296 | if (length <= 0) | ||
297 | break; | ||
298 | |||
299 | start_blk++; | ||
300 | } else { | ||
301 | if (length <= 0 && size) { | ||
302 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
303 | phys, size, | ||
304 | flags); | ||
305 | if (ret) | ||
306 | break; | ||
307 | } | ||
308 | |||
309 | logical = blk_to_logical(inode, start_blk); | ||
310 | phys = blk_to_logical(inode, tmp.b_blocknr); | ||
311 | size = tmp.b_size; | ||
312 | flags = FIEMAP_EXTENT_MERGED; | ||
313 | |||
314 | length -= tmp.b_size; | ||
315 | start_blk += logical_to_blk(inode, size); | ||
316 | |||
317 | /* | ||
318 | * if we are past the EOF we need to loop again to see | ||
319 | * if there is a hole so we can mark this extent as the | ||
320 | * last one, and if not keep mapping things until we | ||
321 | * find a hole, or we run out of slots in the extent | ||
322 | * array | ||
323 | */ | ||
324 | if (length <= 0) | ||
325 | continue; | ||
326 | |||
327 | ret = fiemap_fill_next_extent(fieinfo, logical, phys, | ||
328 | size, flags); | ||
329 | if (ret) | ||
330 | break; | ||
331 | } | ||
332 | cond_resched(); | ||
333 | } while (1); | ||
334 | |||
335 | mutex_unlock(&inode->i_mutex); | ||
336 | |||
337 | /* if ret is 1 then we just hit the end of the extent array */ | ||
338 | if (ret == 1) | ||
339 | ret = 0; | ||
340 | |||
341 | return ret; | ||
342 | } | ||
343 | EXPORT_SYMBOL(generic_block_fiemap); | ||
344 | |||
74 | static int file_ioctl(struct file *filp, unsigned int cmd, | 345 | static int file_ioctl(struct file *filp, unsigned int cmd, |
75 | unsigned long arg) | 346 | unsigned long arg) |
76 | { | 347 | { |
@@ -80,6 +351,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd, | |||
80 | switch (cmd) { | 351 | switch (cmd) { |
81 | case FIBMAP: | 352 | case FIBMAP: |
82 | return ioctl_fibmap(filp, p); | 353 | return ioctl_fibmap(filp, p); |
354 | case FS_IOC_FIEMAP: | ||
355 | return ioctl_fiemap(filp, arg); | ||
83 | case FIGETBSZ: | 356 | case FIGETBSZ: |
84 | return put_user(inode->i_sb->s_blocksize, p); | 357 | return put_user(inode->i_sb->s_blocksize, p); |
85 | case FIONREAD: | 358 | case FIONREAD: |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8a..42895d369458 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/marker.h> | ||
23 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
25 | 26 | ||
@@ -126,14 +127,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
126 | 127 | ||
127 | /* | 128 | /* |
128 | * Test again, another process may have checkpointed while we | 129 | * Test again, another process may have checkpointed while we |
129 | * were waiting for the checkpoint lock | 130 | * were waiting for the checkpoint lock. If there are no |
131 | * outstanding transactions there is nothing to checkpoint and | ||
132 | * we can't make progress. Abort the journal in this case. | ||
130 | */ | 133 | */ |
131 | spin_lock(&journal->j_state_lock); | 134 | spin_lock(&journal->j_state_lock); |
135 | spin_lock(&journal->j_list_lock); | ||
132 | nblocks = jbd_space_needed(journal); | 136 | nblocks = jbd_space_needed(journal); |
133 | if (__jbd2_log_space_left(journal) < nblocks) { | 137 | if (__jbd2_log_space_left(journal) < nblocks) { |
138 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
139 | |||
140 | spin_unlock(&journal->j_list_lock); | ||
134 | spin_unlock(&journal->j_state_lock); | 141 | spin_unlock(&journal->j_state_lock); |
135 | jbd2_log_do_checkpoint(journal); | 142 | if (chkpt) { |
143 | jbd2_log_do_checkpoint(journal); | ||
144 | } else { | ||
145 | printk(KERN_ERR "%s: no transactions\n", | ||
146 | __func__); | ||
147 | jbd2_journal_abort(journal, 0); | ||
148 | } | ||
149 | |||
136 | spin_lock(&journal->j_state_lock); | 150 | spin_lock(&journal->j_state_lock); |
151 | } else { | ||
152 | spin_unlock(&journal->j_list_lock); | ||
137 | } | 153 | } |
138 | mutex_unlock(&journal->j_checkpoint_mutex); | 154 | mutex_unlock(&journal->j_checkpoint_mutex); |
139 | } | 155 | } |
@@ -313,6 +329,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
313 | * journal straight away. | 329 | * journal straight away. |
314 | */ | 330 | */ |
315 | result = jbd2_cleanup_journal_tail(journal); | 331 | result = jbd2_cleanup_journal_tail(journal); |
332 | trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", | ||
333 | journal->j_devname, result); | ||
316 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 334 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
317 | if (result <= 0) | 335 | if (result <= 0) |
318 | return result; | 336 | return result; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f2ad061e95ec..0d3814a35ed1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/jbd2.h> | 18 | #include <linux/jbd2.h> |
19 | #include <linux/marker.h> | ||
19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
@@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
126 | 127 | ||
127 | JBUFFER_TRACE(descriptor, "submit commit block"); | 128 | JBUFFER_TRACE(descriptor, "submit commit block"); |
128 | lock_buffer(bh); | 129 | lock_buffer(bh); |
129 | get_bh(bh); | 130 | clear_buffer_dirty(bh); |
130 | set_buffer_dirty(bh); | ||
131 | set_buffer_uptodate(bh); | 131 | set_buffer_uptodate(bh); |
132 | bh->b_end_io = journal_end_buffer_io_sync; | 132 | bh->b_end_io = journal_end_buffer_io_sync; |
133 | 133 | ||
@@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
147 | * to remember if we sent a barrier request | 147 | * to remember if we sent a barrier request |
148 | */ | 148 | */ |
149 | if (ret == -EOPNOTSUPP && barrier_done) { | 149 | if (ret == -EOPNOTSUPP && barrier_done) { |
150 | char b[BDEVNAME_SIZE]; | ||
151 | |||
152 | printk(KERN_WARNING | 150 | printk(KERN_WARNING |
153 | "JBD: barrier-based sync failed on %s - " | 151 | "JBD: barrier-based sync failed on %s - " |
154 | "disabling barriers\n", | 152 | "disabling barriers\n", journal->j_devname); |
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | 153 | spin_lock(&journal->j_state_lock); |
157 | journal->j_flags &= ~JBD2_BARRIER; | 154 | journal->j_flags &= ~JBD2_BARRIER; |
158 | spin_unlock(&journal->j_state_lock); | 155 | spin_unlock(&journal->j_state_lock); |
@@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
160 | /* And try again, without the barrier */ | 157 | /* And try again, without the barrier */ |
161 | lock_buffer(bh); | 158 | lock_buffer(bh); |
162 | set_buffer_uptodate(bh); | 159 | set_buffer_uptodate(bh); |
163 | set_buffer_dirty(bh); | 160 | clear_buffer_dirty(bh); |
164 | ret = submit_bh(WRITE, bh); | 161 | ret = submit_bh(WRITE, bh); |
165 | } | 162 | } |
166 | *cbh = bh; | 163 | *cbh = bh; |
@@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
371 | commit_transaction = journal->j_running_transaction; | 368 | commit_transaction = journal->j_running_transaction; |
372 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 369 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
373 | 370 | ||
371 | trace_mark(jbd2_start_commit, "dev %s transaction %d", | ||
372 | journal->j_devname, commit_transaction->t_tid); | ||
374 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 373 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
375 | commit_transaction->t_tid); | 374 | commit_transaction->t_tid); |
376 | 375 | ||
@@ -681,11 +680,9 @@ start_journal_io: | |||
681 | */ | 680 | */ |
682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 681 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
683 | if (err) { | 682 | if (err) { |
684 | char b[BDEVNAME_SIZE]; | ||
685 | |||
686 | printk(KERN_WARNING | 683 | printk(KERN_WARNING |
687 | "JBD2: Detected IO errors while flushing file data " | 684 | "JBD2: Detected IO errors while flushing file data " |
688 | "on %s\n", bdevname(journal->j_fs_dev, b)); | 685 | "on %s\n", journal->j_devname); |
689 | err = 0; | 686 | err = 0; |
690 | } | 687 | } |
691 | 688 | ||
@@ -990,6 +987,9 @@ restart_loop: | |||
990 | } | 987 | } |
991 | spin_unlock(&journal->j_list_lock); | 988 | spin_unlock(&journal->j_list_lock); |
992 | 989 | ||
990 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | ||
991 | journal->j_devname, commit_transaction->t_tid, | ||
992 | journal->j_tail_sequence); | ||
993 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 993 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
994 | journal->j_commit_sequence, journal->j_tail_sequence); | 994 | journal->j_commit_sequence, journal->j_tail_sequence); |
995 | 995 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8207a01c4edb..01c3901c3a07 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
597 | if (ret) | 597 | if (ret) |
598 | *retp = ret; | 598 | *retp = ret; |
599 | else { | 599 | else { |
600 | char b[BDEVNAME_SIZE]; | ||
601 | |||
602 | printk(KERN_ALERT "%s: journal block not found " | 600 | printk(KERN_ALERT "%s: journal block not found " |
603 | "at offset %lu on %s\n", | 601 | "at offset %lu on %s\n", |
604 | __func__, | 602 | __func__, blocknr, journal->j_devname); |
605 | blocknr, | ||
606 | bdevname(journal->j_dev, b)); | ||
607 | err = -EIO; | 603 | err = -EIO; |
608 | __journal_abort_soft(journal, err); | 604 | __journal_abort_soft(journal, err); |
609 | } | 605 | } |
@@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats; | |||
901 | 897 | ||
902 | static void jbd2_stats_proc_init(journal_t *journal) | 898 | static void jbd2_stats_proc_init(journal_t *journal) |
903 | { | 899 | { |
904 | char name[BDEVNAME_SIZE]; | 900 | journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); |
905 | |||
906 | bdevname(journal->j_dev, name); | ||
907 | journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); | ||
908 | if (journal->j_proc_entry) { | 901 | if (journal->j_proc_entry) { |
909 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, | 902 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, |
910 | &jbd2_seq_history_fops, journal); | 903 | &jbd2_seq_history_fops, journal); |
@@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal) | |||
915 | 908 | ||
916 | static void jbd2_stats_proc_exit(journal_t *journal) | 909 | static void jbd2_stats_proc_exit(journal_t *journal) |
917 | { | 910 | { |
918 | char name[BDEVNAME_SIZE]; | ||
919 | |||
920 | bdevname(journal->j_dev, name); | ||
921 | remove_proc_entry("info", journal->j_proc_entry); | 911 | remove_proc_entry("info", journal->j_proc_entry); |
922 | remove_proc_entry("history", journal->j_proc_entry); | 912 | remove_proc_entry("history", journal->j_proc_entry); |
923 | remove_proc_entry(name, proc_jbd2_stats); | 913 | remove_proc_entry(journal->j_devname, proc_jbd2_stats); |
924 | } | 914 | } |
925 | 915 | ||
926 | static void journal_init_stats(journal_t *journal) | 916 | static void journal_init_stats(journal_t *journal) |
@@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1018 | { | 1008 | { |
1019 | journal_t *journal = journal_init_common(); | 1009 | journal_t *journal = journal_init_common(); |
1020 | struct buffer_head *bh; | 1010 | struct buffer_head *bh; |
1011 | char *p; | ||
1021 | int n; | 1012 | int n; |
1022 | 1013 | ||
1023 | if (!journal) | 1014 | if (!journal) |
@@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1039 | journal->j_fs_dev = fs_dev; | 1030 | journal->j_fs_dev = fs_dev; |
1040 | journal->j_blk_offset = start; | 1031 | journal->j_blk_offset = start; |
1041 | journal->j_maxlen = len; | 1032 | journal->j_maxlen = len; |
1033 | bdevname(journal->j_dev, journal->j_devname); | ||
1034 | p = journal->j_devname; | ||
1035 | while ((p = strchr(p, '/'))) | ||
1036 | *p = '!'; | ||
1042 | jbd2_stats_proc_init(journal); | 1037 | jbd2_stats_proc_init(journal); |
1043 | 1038 | ||
1044 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1039 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
@@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1061 | { | 1056 | { |
1062 | struct buffer_head *bh; | 1057 | struct buffer_head *bh; |
1063 | journal_t *journal = journal_init_common(); | 1058 | journal_t *journal = journal_init_common(); |
1059 | char *p; | ||
1064 | int err; | 1060 | int err; |
1065 | int n; | 1061 | int n; |
1066 | unsigned long long blocknr; | 1062 | unsigned long long blocknr; |
@@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1070 | 1066 | ||
1071 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; | 1067 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; |
1072 | journal->j_inode = inode; | 1068 | journal->j_inode = inode; |
1069 | bdevname(journal->j_dev, journal->j_devname); | ||
1070 | p = journal->j_devname; | ||
1071 | while ((p = strchr(p, '/'))) | ||
1072 | *p = '!'; | ||
1073 | p = journal->j_devname + strlen(journal->j_devname); | ||
1074 | sprintf(p, ":%lu", journal->j_inode->i_ino); | ||
1073 | jbd_debug(1, | 1075 | jbd_debug(1, |
1074 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", | 1076 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", |
1075 | journal, inode->i_sb->s_id, inode->i_ino, | 1077 | journal, inode->i_sb->s_id, inode->i_ino, |
@@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1253 | goto out; | 1255 | goto out; |
1254 | } | 1256 | } |
1255 | 1257 | ||
1258 | if (buffer_write_io_error(bh)) { | ||
1259 | /* | ||
1260 | * Oh, dear. A previous attempt to write the journal | ||
1261 | * superblock failed. This could happen because the | ||
1262 | * USB device was yanked out. Or it could happen to | ||
1263 | * be a transient write error and maybe the block will | ||
1264 | * be remapped. Nothing we can do but to retry the | ||
1265 | * write and hope for the best. | ||
1266 | */ | ||
1267 | printk(KERN_ERR "JBD2: previous I/O error detected " | ||
1268 | "for journal superblock update for %s.\n", | ||
1269 | journal->j_devname); | ||
1270 | clear_buffer_write_io_error(bh); | ||
1271 | set_buffer_uptodate(bh); | ||
1272 | } | ||
1273 | |||
1256 | spin_lock(&journal->j_state_lock); | 1274 | spin_lock(&journal->j_state_lock); |
1257 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1275 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", |
1258 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1276 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
@@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1264 | 1282 | ||
1265 | BUFFER_TRACE(bh, "marking dirty"); | 1283 | BUFFER_TRACE(bh, "marking dirty"); |
1266 | mark_buffer_dirty(bh); | 1284 | mark_buffer_dirty(bh); |
1267 | if (wait) | 1285 | if (wait) { |
1268 | sync_dirty_buffer(bh); | 1286 | sync_dirty_buffer(bh); |
1269 | else | 1287 | if (buffer_write_io_error(bh)) { |
1288 | printk(KERN_ERR "JBD2: I/O error detected " | ||
1289 | "when updating journal superblock for %s.\n", | ||
1290 | journal->j_devname); | ||
1291 | clear_buffer_write_io_error(bh); | ||
1292 | set_buffer_uptodate(bh); | ||
1293 | } | ||
1294 | } else | ||
1270 | ll_rw_block(SWRITE, 1, &bh); | 1295 | ll_rw_block(SWRITE, 1, &bh); |
1271 | 1296 | ||
1272 | out: | 1297 | out: |
@@ -1761,23 +1786,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1761 | } | 1786 | } |
1762 | 1787 | ||
1763 | /* | 1788 | /* |
1764 | * journal_dev_name: format a character string to describe on what | ||
1765 | * device this journal is present. | ||
1766 | */ | ||
1767 | |||
1768 | static const char *journal_dev_name(journal_t *journal, char *buffer) | ||
1769 | { | ||
1770 | struct block_device *bdev; | ||
1771 | |||
1772 | if (journal->j_inode) | ||
1773 | bdev = journal->j_inode->i_sb->s_bdev; | ||
1774 | else | ||
1775 | bdev = journal->j_dev; | ||
1776 | |||
1777 | return bdevname(bdev, buffer); | ||
1778 | } | ||
1779 | |||
1780 | /* | ||
1781 | * Journal abort has very specific semantics, which we describe | 1789 | * Journal abort has very specific semantics, which we describe |
1782 | * for journal abort. | 1790 | * for journal abort. |
1783 | * | 1791 | * |
@@ -1793,13 +1801,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) | |||
1793 | void __jbd2_journal_abort_hard(journal_t *journal) | 1801 | void __jbd2_journal_abort_hard(journal_t *journal) |
1794 | { | 1802 | { |
1795 | transaction_t *transaction; | 1803 | transaction_t *transaction; |
1796 | char b[BDEVNAME_SIZE]; | ||
1797 | 1804 | ||
1798 | if (journal->j_flags & JBD2_ABORT) | 1805 | if (journal->j_flags & JBD2_ABORT) |
1799 | return; | 1806 | return; |
1800 | 1807 | ||
1801 | printk(KERN_ERR "Aborting journal on device %s.\n", | 1808 | printk(KERN_ERR "Aborting journal on device %s.\n", |
1802 | journal_dev_name(journal, b)); | 1809 | journal->j_devname); |
1803 | 1810 | ||
1804 | spin_lock(&journal->j_state_lock); | 1811 | spin_lock(&journal->j_state_lock); |
1805 | journal->j_flags |= JBD2_ABORT; | 1812 | journal->j_flags |= JBD2_ABORT; |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 10bfb466e068..29ff57ec5d1f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -990,15 +990,6 @@ out: | |||
990 | } | 990 | } |
991 | 991 | ||
992 | /* | 992 | /* |
993 | * This is only valid for leaf nodes, which are the only ones that can | ||
994 | * have empty extents anyway. | ||
995 | */ | ||
996 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
997 | { | ||
998 | return !rec->e_leaf_clusters; | ||
999 | } | ||
1000 | |||
1001 | /* | ||
1002 | * This function will discard the rightmost extent record. | 993 | * This function will discard the rightmost extent record. |
1003 | */ | 994 | */ |
1004 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) | 995 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 42ff94bd8011..60cd3d59230c 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el, | |||
146 | return le16_to_cpu(rec->e_leaf_clusters); | 146 | return le16_to_cpu(rec->e_leaf_clusters); |
147 | } | 147 | } |
148 | 148 | ||
149 | /* | ||
150 | * This is only valid for leaf nodes, which are the only ones that can | ||
151 | * have empty extents anyway. | ||
152 | */ | ||
153 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
154 | { | ||
155 | return !rec->e_leaf_clusters; | ||
156 | } | ||
157 | |||
149 | #endif /* OCFS2_ALLOC_H */ | 158 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index c58668a326fe..aed268e80b49 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/fiemap.h> | ||
28 | 29 | ||
29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP | 30 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP |
30 | #include <cluster/masklog.h> | 31 | #include <cluster/masklog.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include "ocfs2.h" | 33 | #include "ocfs2.h" |
33 | 34 | ||
34 | #include "alloc.h" | 35 | #include "alloc.h" |
36 | #include "dlmglue.h" | ||
35 | #include "extent_map.h" | 37 | #include "extent_map.h" |
36 | #include "inode.h" | 38 | #include "inode.h" |
37 | #include "super.h" | 39 | #include "super.h" |
@@ -282,6 +284,51 @@ out: | |||
282 | kfree(new_emi); | 284 | kfree(new_emi); |
283 | } | 285 | } |
284 | 286 | ||
287 | static int ocfs2_last_eb_is_empty(struct inode *inode, | ||
288 | struct ocfs2_dinode *di) | ||
289 | { | ||
290 | int ret, next_free; | ||
291 | u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); | ||
292 | struct buffer_head *eb_bh = NULL; | ||
293 | struct ocfs2_extent_block *eb; | ||
294 | struct ocfs2_extent_list *el; | ||
295 | |||
296 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk, | ||
297 | &eb_bh, OCFS2_BH_CACHED, inode); | ||
298 | if (ret) { | ||
299 | mlog_errno(ret); | ||
300 | goto out; | ||
301 | } | ||
302 | |||
303 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
304 | el = &eb->h_list; | ||
305 | |||
306 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { | ||
307 | ret = -EROFS; | ||
308 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); | ||
309 | goto out; | ||
310 | } | ||
311 | |||
312 | if (el->l_tree_depth) { | ||
313 | ocfs2_error(inode->i_sb, | ||
314 | "Inode %lu has non zero tree depth in " | ||
315 | "leaf block %llu\n", inode->i_ino, | ||
316 | (unsigned long long)eb_bh->b_blocknr); | ||
317 | ret = -EROFS; | ||
318 | goto out; | ||
319 | } | ||
320 | |||
321 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
322 | |||
323 | if (next_free == 0 || | ||
324 | (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) | ||
325 | ret = 1; | ||
326 | |||
327 | out: | ||
328 | brelse(eb_bh); | ||
329 | return ret; | ||
330 | } | ||
331 | |||
285 | /* | 332 | /* |
286 | * Return the 1st index within el which contains an extent start | 333 | * Return the 1st index within el which contains an extent start |
287 | * larger than v_cluster. | 334 | * larger than v_cluster. |
@@ -373,42 +420,28 @@ out: | |||
373 | return ret; | 420 | return ret; |
374 | } | 421 | } |
375 | 422 | ||
376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 423 | static int ocfs2_get_clusters_nocache(struct inode *inode, |
377 | u32 *p_cluster, u32 *num_clusters, | 424 | struct buffer_head *di_bh, |
378 | unsigned int *extent_flags) | 425 | u32 v_cluster, unsigned int *hole_len, |
426 | struct ocfs2_extent_rec *ret_rec, | ||
427 | unsigned int *is_last) | ||
379 | { | 428 | { |
380 | int ret, i; | 429 | int i, ret, tree_height, len; |
381 | unsigned int flags = 0; | ||
382 | struct buffer_head *di_bh = NULL; | ||
383 | struct buffer_head *eb_bh = NULL; | ||
384 | struct ocfs2_dinode *di; | 430 | struct ocfs2_dinode *di; |
385 | struct ocfs2_extent_block *eb; | 431 | struct ocfs2_extent_block *uninitialized_var(eb); |
386 | struct ocfs2_extent_list *el; | 432 | struct ocfs2_extent_list *el; |
387 | struct ocfs2_extent_rec *rec; | 433 | struct ocfs2_extent_rec *rec; |
388 | u32 coff; | 434 | struct buffer_head *eb_bh = NULL; |
389 | |||
390 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
391 | ret = -ERANGE; | ||
392 | mlog_errno(ret); | ||
393 | goto out; | ||
394 | } | ||
395 | |||
396 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
397 | num_clusters, extent_flags); | ||
398 | if (ret == 0) | ||
399 | goto out; | ||
400 | 435 | ||
401 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 436 | memset(ret_rec, 0, sizeof(*ret_rec)); |
402 | &di_bh, OCFS2_BH_CACHED, inode); | 437 | if (is_last) |
403 | if (ret) { | 438 | *is_last = 0; |
404 | mlog_errno(ret); | ||
405 | goto out; | ||
406 | } | ||
407 | 439 | ||
408 | di = (struct ocfs2_dinode *) di_bh->b_data; | 440 | di = (struct ocfs2_dinode *) di_bh->b_data; |
409 | el = &di->id2.i_list; | 441 | el = &di->id2.i_list; |
442 | tree_height = le16_to_cpu(el->l_tree_depth); | ||
410 | 443 | ||
411 | if (el->l_tree_depth) { | 444 | if (tree_height > 0) { |
412 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 445 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); |
413 | if (ret) { | 446 | if (ret) { |
414 | mlog_errno(ret); | 447 | mlog_errno(ret); |
@@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
431 | i = ocfs2_search_extent_list(el, v_cluster); | 464 | i = ocfs2_search_extent_list(el, v_cluster); |
432 | if (i == -1) { | 465 | if (i == -1) { |
433 | /* | 466 | /* |
434 | * A hole was found. Return some canned values that | 467 | * Holes can be larger than the maximum size of an |
435 | * callers can key on. If asked for, num_clusters will | 468 | * extent, so we return their lengths in a seperate |
436 | * be populated with the size of the hole. | 469 | * field. |
437 | */ | 470 | */ |
438 | *p_cluster = 0; | 471 | if (hole_len) { |
439 | if (num_clusters) { | ||
440 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, | 472 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, |
441 | v_cluster, | 473 | v_cluster, &len); |
442 | num_clusters); | ||
443 | if (ret) { | 474 | if (ret) { |
444 | mlog_errno(ret); | 475 | mlog_errno(ret); |
445 | goto out; | 476 | goto out; |
446 | } | 477 | } |
478 | |||
479 | *hole_len = len; | ||
447 | } | 480 | } |
448 | } else { | 481 | goto out_hole; |
449 | rec = &el->l_recs[i]; | 482 | } |
450 | 483 | ||
451 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | 484 | rec = &el->l_recs[i]; |
452 | 485 | ||
453 | if (!rec->e_blkno) { | 486 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
454 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | 487 | |
455 | "record (%u, %u, 0)", inode->i_ino, | 488 | if (!rec->e_blkno) { |
456 | le32_to_cpu(rec->e_cpos), | 489 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " |
457 | ocfs2_rec_clusters(el, rec)); | 490 | "record (%u, %u, 0)", inode->i_ino, |
458 | ret = -EROFS; | 491 | le32_to_cpu(rec->e_cpos), |
459 | goto out; | 492 | ocfs2_rec_clusters(el, rec)); |
493 | ret = -EROFS; | ||
494 | goto out; | ||
495 | } | ||
496 | |||
497 | *ret_rec = *rec; | ||
498 | |||
499 | /* | ||
500 | * Checking for last extent is potentially expensive - we | ||
501 | * might have to look at the next leaf over to see if it's | ||
502 | * empty. | ||
503 | * | ||
504 | * The first two checks are to see whether the caller even | ||
505 | * cares for this information, and if the extent is at least | ||
506 | * the last in it's list. | ||
507 | * | ||
508 | * If those hold true, then the extent is last if any of the | ||
509 | * additional conditions hold true: | ||
510 | * - Extent list is in-inode | ||
511 | * - Extent list is right-most | ||
512 | * - Extent list is 2nd to rightmost, with empty right-most | ||
513 | */ | ||
514 | if (is_last) { | ||
515 | if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { | ||
516 | if (tree_height == 0) | ||
517 | *is_last = 1; | ||
518 | else if (eb->h_blkno == di->i_last_eb_blk) | ||
519 | *is_last = 1; | ||
520 | else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { | ||
521 | ret = ocfs2_last_eb_is_empty(inode, di); | ||
522 | if (ret < 0) { | ||
523 | mlog_errno(ret); | ||
524 | goto out; | ||
525 | } | ||
526 | if (ret == 1) | ||
527 | *is_last = 1; | ||
528 | } | ||
460 | } | 529 | } |
530 | } | ||
531 | |||
532 | out_hole: | ||
533 | ret = 0; | ||
534 | out: | ||
535 | brelse(eb_bh); | ||
536 | return ret; | ||
537 | } | ||
538 | |||
539 | static void ocfs2_relative_extent_offsets(struct super_block *sb, | ||
540 | u32 v_cluster, | ||
541 | struct ocfs2_extent_rec *rec, | ||
542 | u32 *p_cluster, u32 *num_clusters) | ||
543 | |||
544 | { | ||
545 | u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); | ||
546 | |||
547 | *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); | ||
548 | *p_cluster = *p_cluster + coff; | ||
549 | |||
550 | if (num_clusters) | ||
551 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; | ||
552 | } | ||
553 | |||
554 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | ||
555 | u32 *p_cluster, u32 *num_clusters, | ||
556 | unsigned int *extent_flags) | ||
557 | { | ||
558 | int ret; | ||
559 | unsigned int uninitialized_var(hole_len), flags = 0; | ||
560 | struct buffer_head *di_bh = NULL; | ||
561 | struct ocfs2_extent_rec rec; | ||
461 | 562 | ||
462 | coff = v_cluster - le32_to_cpu(rec->e_cpos); | 563 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
564 | ret = -ERANGE; | ||
565 | mlog_errno(ret); | ||
566 | goto out; | ||
567 | } | ||
463 | 568 | ||
464 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, | 569 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, |
465 | le64_to_cpu(rec->e_blkno)); | 570 | num_clusters, extent_flags); |
466 | *p_cluster = *p_cluster + coff; | 571 | if (ret == 0) |
572 | goto out; | ||
467 | 573 | ||
468 | if (num_clusters) | 574 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, |
469 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; | 575 | &di_bh, OCFS2_BH_CACHED, inode); |
576 | if (ret) { | ||
577 | mlog_errno(ret); | ||
578 | goto out; | ||
579 | } | ||
470 | 580 | ||
471 | flags = rec->e_flags; | 581 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, |
582 | &rec, NULL); | ||
583 | if (ret) { | ||
584 | mlog_errno(ret); | ||
585 | goto out; | ||
586 | } | ||
472 | 587 | ||
473 | ocfs2_extent_map_insert_rec(inode, rec); | 588 | if (rec.e_blkno == 0ULL) { |
589 | /* | ||
590 | * A hole was found. Return some canned values that | ||
591 | * callers can key on. If asked for, num_clusters will | ||
592 | * be populated with the size of the hole. | ||
593 | */ | ||
594 | *p_cluster = 0; | ||
595 | if (num_clusters) { | ||
596 | *num_clusters = hole_len; | ||
597 | } | ||
598 | } else { | ||
599 | ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, | ||
600 | p_cluster, num_clusters); | ||
601 | flags = rec.e_flags; | ||
602 | |||
603 | ocfs2_extent_map_insert_rec(inode, &rec); | ||
474 | } | 604 | } |
475 | 605 | ||
476 | if (extent_flags) | 606 | if (extent_flags) |
@@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
478 | 608 | ||
479 | out: | 609 | out: |
480 | brelse(di_bh); | 610 | brelse(di_bh); |
481 | brelse(eb_bh); | ||
482 | return ret; | 611 | return ret; |
483 | } | 612 | } |
484 | 613 | ||
@@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | |||
521 | out: | 650 | out: |
522 | return ret; | 651 | return ret; |
523 | } | 652 | } |
653 | |||
654 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, | ||
655 | struct fiemap_extent_info *fieinfo, | ||
656 | u64 map_start) | ||
657 | { | ||
658 | int ret; | ||
659 | unsigned int id_count; | ||
660 | struct ocfs2_dinode *di; | ||
661 | u64 phys; | ||
662 | u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; | ||
663 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
664 | |||
665 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
666 | id_count = le16_to_cpu(di->id2.i_data.id_count); | ||
667 | |||
668 | if (map_start < id_count) { | ||
669 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; | ||
670 | phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
671 | |||
672 | ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, | ||
673 | flags); | ||
674 | if (ret < 0) | ||
675 | return ret; | ||
676 | } | ||
677 | |||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | #define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | ||
682 | |||
683 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
684 | u64 map_start, u64 map_len) | ||
685 | { | ||
686 | int ret, is_last; | ||
687 | u32 mapping_end, cpos; | ||
688 | unsigned int hole_size; | ||
689 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
690 | u64 len_bytes, phys_bytes, virt_bytes; | ||
691 | struct buffer_head *di_bh = NULL; | ||
692 | struct ocfs2_extent_rec rec; | ||
693 | |||
694 | ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); | ||
695 | if (ret) | ||
696 | return ret; | ||
697 | |||
698 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
699 | if (ret) { | ||
700 | mlog_errno(ret); | ||
701 | goto out; | ||
702 | } | ||
703 | |||
704 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
705 | |||
706 | /* | ||
707 | * Handle inline-data separately. | ||
708 | */ | ||
709 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
710 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); | ||
711 | goto out_unlock; | ||
712 | } | ||
713 | |||
714 | cpos = map_start >> osb->s_clustersize_bits; | ||
715 | mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, | ||
716 | map_start + map_len); | ||
717 | mapping_end -= cpos; | ||
718 | is_last = 0; | ||
719 | while (cpos < mapping_end && !is_last) { | ||
720 | u32 fe_flags; | ||
721 | |||
722 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, | ||
723 | &hole_size, &rec, &is_last); | ||
724 | if (ret) { | ||
725 | mlog_errno(ret); | ||
726 | goto out; | ||
727 | } | ||
728 | |||
729 | if (rec.e_blkno == 0ULL) { | ||
730 | cpos += hole_size; | ||
731 | continue; | ||
732 | } | ||
733 | |||
734 | fe_flags = 0; | ||
735 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) | ||
736 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
737 | if (is_last) | ||
738 | fe_flags |= FIEMAP_EXTENT_LAST; | ||
739 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; | ||
740 | phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; | ||
741 | virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; | ||
742 | |||
743 | ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, | ||
744 | len_bytes, fe_flags); | ||
745 | if (ret) | ||
746 | break; | ||
747 | |||
748 | cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); | ||
749 | } | ||
750 | |||
751 | if (ret > 0) | ||
752 | ret = 0; | ||
753 | |||
754 | out_unlock: | ||
755 | brelse(di_bh); | ||
756 | |||
757 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
758 | |||
759 | ocfs2_inode_unlock(inode, 0); | ||
760 | out: | ||
761 | |||
762 | return ret; | ||
763 | } | ||
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index de91e3e41a22..1b97490e1ea8 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h | |||
@@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, | |||
50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | 50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
51 | u64 *ret_count, unsigned int *extent_flags); | 51 | u64 *ret_count, unsigned int *extent_flags); |
52 | 52 | ||
53 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
54 | u64 map_start, u64 map_len); | ||
55 | |||
53 | #endif /* _EXTENT_MAP_H */ | 56 | #endif /* _EXTENT_MAP_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ec2ed15c3daa..ed38796052d2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = { | |||
2228 | .getattr = ocfs2_getattr, | 2228 | .getattr = ocfs2_getattr, |
2229 | .permission = ocfs2_permission, | 2229 | .permission = ocfs2_permission, |
2230 | .fallocate = ocfs2_fallocate, | 2230 | .fallocate = ocfs2_fallocate, |
2231 | .fiemap = ocfs2_fiemap, | ||
2231 | }; | 2232 | }; |
2232 | 2233 | ||
2233 | const struct inode_operations ocfs2_special_file_iops = { | 2234 | const struct inode_operations ocfs2_special_file_iops = { |
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 80171ee89a22..8120fa1bc235 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h | |||
@@ -837,6 +837,8 @@ extern void ext3_truncate (struct inode *); | |||
837 | extern void ext3_set_inode_flags(struct inode *); | 837 | extern void ext3_set_inode_flags(struct inode *); |
838 | extern void ext3_get_inode_flags(struct ext3_inode_info *); | 838 | extern void ext3_get_inode_flags(struct ext3_inode_info *); |
839 | extern void ext3_set_aops(struct inode *inode); | 839 | extern void ext3_set_aops(struct inode *inode); |
840 | extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
841 | u64 start, u64 len); | ||
840 | 842 | ||
841 | /* ioctl.c */ | 843 | /* ioctl.c */ |
842 | extern int ext3_ioctl (struct inode *, struct file *, unsigned int, | 844 | extern int ext3_ioctl (struct inode *, struct file *, unsigned int, |
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h new file mode 100644 index 000000000000..671decbd2aeb --- /dev/null +++ b/include/linux/fiemap.h | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * FS_IOC_FIEMAP ioctl infrastructure. | ||
3 | * | ||
4 | * Some portions copyright (C) 2007 Cluster File Systems, Inc | ||
5 | * | ||
6 | * Authors: Mark Fasheh <mfasheh@suse.com> | ||
7 | * Kalpak Shah <kalpak.shah@sun.com> | ||
8 | * Andreas Dilger <adilger@sun.com> | ||
9 | */ | ||
10 | |||
11 | #ifndef _LINUX_FIEMAP_H | ||
12 | #define _LINUX_FIEMAP_H | ||
13 | |||
14 | struct fiemap_extent { | ||
15 | __u64 fe_logical; /* logical offset in bytes for the start of | ||
16 | * the extent from the beginning of the file */ | ||
17 | __u64 fe_physical; /* physical offset in bytes for the start | ||
18 | * of the extent from the beginning of the disk */ | ||
19 | __u64 fe_length; /* length in bytes for this extent */ | ||
20 | __u64 fe_reserved64[2]; | ||
21 | __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ | ||
22 | __u32 fe_reserved[3]; | ||
23 | }; | ||
24 | |||
25 | struct fiemap { | ||
26 | __u64 fm_start; /* logical offset (inclusive) at | ||
27 | * which to start mapping (in) */ | ||
28 | __u64 fm_length; /* logical length of mapping which | ||
29 | * userspace wants (in) */ | ||
30 | __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ | ||
31 | __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ | ||
32 | __u32 fm_extent_count; /* size of fm_extents array (in) */ | ||
33 | __u32 fm_reserved; | ||
34 | struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ | ||
35 | }; | ||
36 | |||
37 | #define FIEMAP_MAX_OFFSET (~0ULL) | ||
38 | |||
39 | #define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ | ||
40 | #define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ | ||
41 | |||
42 | #define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) | ||
43 | |||
44 | #define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ | ||
45 | #define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ | ||
46 | #define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. | ||
47 | * Sets EXTENT_UNKNOWN. */ | ||
48 | #define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read | ||
49 | * while fs is unmounted */ | ||
50 | #define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs. | ||
51 | * Sets EXTENT_NO_BYPASS. */ | ||
52 | #define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be | ||
53 | * block aligned. */ | ||
54 | #define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata. | ||
55 | * Sets EXTENT_NOT_ALIGNED.*/ | ||
56 | #define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block. | ||
57 | * Sets EXTENT_NOT_ALIGNED.*/ | ||
58 | #define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but | ||
59 | * no data (i.e. zero). */ | ||
60 | #define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively | ||
61 | * support extents. Result | ||
62 | * merged for efficiency. */ | ||
63 | |||
64 | #endif /* _LINUX_FIEMAP_H */ | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index 32477e8872d5..44e3cb2f1966 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -234,6 +234,7 @@ extern int dir_notify_enable; | |||
234 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) | 234 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) |
235 | #define FS_IOC_GETVERSION _IOR('v', 1, long) | 235 | #define FS_IOC_GETVERSION _IOR('v', 1, long) |
236 | #define FS_IOC_SETVERSION _IOW('v', 2, long) | 236 | #define FS_IOC_SETVERSION _IOW('v', 2, long) |
237 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) | ||
237 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) | 238 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) |
238 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) | 239 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) |
239 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) | 240 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) |
@@ -294,6 +295,7 @@ extern int dir_notify_enable; | |||
294 | #include <linux/mutex.h> | 295 | #include <linux/mutex.h> |
295 | #include <linux/capability.h> | 296 | #include <linux/capability.h> |
296 | #include <linux/semaphore.h> | 297 | #include <linux/semaphore.h> |
298 | #include <linux/fiemap.h> | ||
297 | 299 | ||
298 | #include <asm/atomic.h> | 300 | #include <asm/atomic.h> |
299 | #include <asm/byteorder.h> | 301 | #include <asm/byteorder.h> |
@@ -1182,6 +1184,20 @@ extern void dentry_unhash(struct dentry *dentry); | |||
1182 | extern int file_permission(struct file *, int); | 1184 | extern int file_permission(struct file *, int); |
1183 | 1185 | ||
1184 | /* | 1186 | /* |
1187 | * VFS FS_IOC_FIEMAP helper definitions. | ||
1188 | */ | ||
1189 | struct fiemap_extent_info { | ||
1190 | unsigned int fi_flags; /* Flags as passed from user */ | ||
1191 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | ||
1192 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | ||
1193 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent | ||
1194 | * array */ | ||
1195 | }; | ||
1196 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | ||
1197 | u64 phys, u64 len, u32 flags); | ||
1198 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | ||
1199 | |||
1200 | /* | ||
1185 | * File types | 1201 | * File types |
1186 | * | 1202 | * |
1187 | * NOTE! These match bits 12..15 of stat.st_mode | 1203 | * NOTE! These match bits 12..15 of stat.st_mode |
@@ -1290,6 +1306,8 @@ struct inode_operations { | |||
1290 | void (*truncate_range)(struct inode *, loff_t, loff_t); | 1306 | void (*truncate_range)(struct inode *, loff_t, loff_t); |
1291 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, | 1307 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, |
1292 | loff_t len); | 1308 | loff_t len); |
1309 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | ||
1310 | u64 len); | ||
1293 | }; | 1311 | }; |
1294 | 1312 | ||
1295 | struct seq_file; | 1313 | struct seq_file; |
@@ -1987,6 +2005,9 @@ extern int vfs_fstat(unsigned int, struct kstat *); | |||
1987 | 2005 | ||
1988 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | 2006 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, |
1989 | unsigned long arg); | 2007 | unsigned long arg); |
2008 | extern int generic_block_fiemap(struct inode *inode, | ||
2009 | struct fiemap_extent_info *fieinfo, u64 start, | ||
2010 | u64 len, get_block_t *get_block); | ||
1990 | 2011 | ||
1991 | extern void get_filesystem(struct file_system_type *fs); | 2012 | extern void get_filesystem(struct file_system_type *fs); |
1992 | extern void put_filesystem(struct file_system_type *fs); | 2013 | extern void put_filesystem(struct file_system_type *fs); |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 3dd209007098..66c3499478b5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -850,7 +850,8 @@ struct journal_s | |||
850 | */ | 850 | */ |
851 | struct block_device *j_dev; | 851 | struct block_device *j_dev; |
852 | int j_blocksize; | 852 | int j_blocksize; |
853 | unsigned long long j_blk_offset; | 853 | unsigned long long j_blk_offset; |
854 | char j_devname[BDEVNAME_SIZE+24]; | ||
854 | 855 | ||
855 | /* | 856 | /* |
856 | * Device which holds the client fs. For internal journal this will be | 857 | * Device which holds the client fs. For internal journal this will be |
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 208388835357..9007ccdfc112 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h | |||
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); | |||
35 | void percpu_counter_destroy(struct percpu_counter *fbc); | 35 | void percpu_counter_destroy(struct percpu_counter *fbc); |
36 | void percpu_counter_set(struct percpu_counter *fbc, s64 amount); | 36 | void percpu_counter_set(struct percpu_counter *fbc, s64 amount); |
37 | void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); | 37 | void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); |
38 | s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); | 38 | s64 __percpu_counter_sum(struct percpu_counter *fbc); |
39 | 39 | ||
40 | static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) | 40 | static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) |
41 | { | 41 | { |
@@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) | |||
44 | 44 | ||
45 | static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) | 45 | static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) |
46 | { | 46 | { |
47 | s64 ret = __percpu_counter_sum(fbc, 0); | 47 | s64 ret = __percpu_counter_sum(fbc); |
48 | return ret < 0 ? 0 : ret; | 48 | return ret < 0 ? 0 : ret; |
49 | } | 49 | } |
50 | 50 | ||
51 | static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) | ||
52 | { | ||
53 | return __percpu_counter_sum(fbc, 1); | ||
54 | } | ||
55 | |||
56 | |||
57 | static inline s64 percpu_counter_sum(struct percpu_counter *fbc) | 51 | static inline s64 percpu_counter_sum(struct percpu_counter *fbc) |
58 | { | 52 | { |
59 | return __percpu_counter_sum(fbc, 0); | 53 | return __percpu_counter_sum(fbc); |
60 | } | 54 | } |
61 | 55 | ||
62 | static inline s64 percpu_counter_read(struct percpu_counter *fbc) | 56 | static inline s64 percpu_counter_read(struct percpu_counter *fbc) |
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 4a8ba4bf5f6f..a8663890a88c 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c | |||
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); | |||
52 | * Add up all the per-cpu counts, return the result. This is a more accurate | 52 | * Add up all the per-cpu counts, return the result. This is a more accurate |
53 | * but much slower version of percpu_counter_read_positive() | 53 | * but much slower version of percpu_counter_read_positive() |
54 | */ | 54 | */ |
55 | s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) | 55 | s64 __percpu_counter_sum(struct percpu_counter *fbc) |
56 | { | 56 | { |
57 | s64 ret; | 57 | s64 ret; |
58 | int cpu; | 58 | int cpu; |
@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) | |||
62 | for_each_online_cpu(cpu) { | 62 | for_each_online_cpu(cpu) { |
63 | s32 *pcount = per_cpu_ptr(fbc->counters, cpu); | 63 | s32 *pcount = per_cpu_ptr(fbc->counters, cpu); |
64 | ret += *pcount; | 64 | ret += *pcount; |
65 | if (set) | 65 | *pcount = 0; |
66 | *pcount = 0; | ||
67 | } | 66 | } |
68 | if (set) | 67 | fbc->count = ret; |
69 | fbc->count = ret; | ||
70 | 68 | ||
71 | spin_unlock(&fbc->lock); | 69 | spin_unlock(&fbc->lock); |
72 | return ret; | 70 | return ret; |